WebSVN - Projects - Blame - Rev 365 - /MissionCockpit/tags/V0.1.0/perl/lib/XML/Simple.pm

Rev	Author	Line No.	Line
365	rain-er	1	# $Id: Simple.pm,v 1.40 2007/08/15 10:36:48 grantm Exp $
		2
		3	package XML::Simple;
		4
		5	=head1 NAME
		6
		7	XML::Simple - Easy API to maintain XML (esp config files)
		8
		9	=head1 SYNOPSIS
		10
		11	use XML::Simple;
		12
		13	my $ref = XMLin([<xml file or string>] [, <options>]);
		14
		15	my $xml = XMLout($hashref [, <options>]);
		16
		17	Or the object oriented way:
		18
		19	require XML::Simple;
		20
		21	my $xs = XML::Simple->new(options);
		22
		23	my $ref = $xs->XMLin([<xml file or string>] [, <options>]);
		24
		25	my $xml = $xs->XMLout($hashref [, <options>]);
		26
		27	(or see L<"SAX SUPPORT"> for 'the SAX way').
		28
		29	To catch common errors:
		30
		31	use XML::Simple qw(:strict);
		32
		33	(see L<"STRICT MODE"> for more details).
		34
		35	=cut
		36
		37	# See after __END__ for more POD documentation
		38
		39
		40	# Load essentials here, other modules loaded on demand later
		41
		42	use strict;
		43	use Carp;
		44	require Exporter;
		45
		46
		47	##############################################################################
		48	# Define some constants
		49	#
		50
		51	use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $PREFERRED_PARSER);
		52
		53	@ISA = qw(Exporter);
		54	@EXPORT = qw(XMLin XMLout);
		55	@EXPORT_OK = qw(xml_in xml_out);
		56	$VERSION = '2.18';
		57	$PREFERRED_PARSER = undef;
		58
		59	my $StrictMode = 0;
		60
		61	my @KnownOptIn = qw(keyattr keeproot forcecontent contentkey noattr
		62	searchpath forcearray cache suppressempty parseropts
		63	grouptags nsexpand datahandler varattr variables
		64	normalisespace normalizespace valueattr);
		65
		66	my @KnownOptOut = qw(keyattr keeproot contentkey noattr
		67	rootname xmldecl outputfile noescape suppressempty
		68	grouptags nsexpand handler noindent attrindent nosort
		69	valueattr numericescape);
		70
		71	my @DefKeyAttr = qw(name key id);
		72	my $DefRootName = qq(opt);
		73	my $DefContentKey = qq(content);
		74	my $DefXmlDecl = qq(<?xml version='1.0' standalone='yes'?>);
		75
		76	my $xmlns_ns = 'http://www.w3.org/2000/xmlns/';
		77	my $bad_def_ns_jcn = '{' . $xmlns_ns . '}'; # LibXML::SAX workaround
		78
		79
		80	##############################################################################
		81	# Globals for use by caching routines
		82	#
		83
		84	my %MemShareCache = ();
		85	my %MemCopyCache = ();
		86
		87
		88	##############################################################################
		89	# Wrapper for Exporter - handles ':strict'
		90	#
		91
		92	sub import {
		93	# Handle the :strict tag
		94
		95	$StrictMode = 1 if grep(/^:strict$/, @_);
		96
		97	# Pass everything else to Exporter.pm
		98
		99	@_ = grep(!/^:strict$/, @_);
		100	goto &Exporter::import;
		101	}
		102
		103
		104	##############################################################################
		105	# Constructor for optional object interface.
		106	#
		107
		108	sub new {
		109	my $class = shift;
		110
		111	if(@_ % 2) {
		112	croak "Default options must be name=>value pairs (odd number supplied)";
		113	}
		114
		115	my %known_opt;
		116	@known_opt{@KnownOptIn, @KnownOptOut} = (undef) x 100;
		117
		118	my %raw_opt = @_;
		119	my %def_opt;
		120	while(my($key, $val) = each %raw_opt) {
		121	my $lkey = lc($key);
		122	$lkey =~ s/_//g;
		123	croak "Unrecognised option: $key" unless(exists($known_opt{$lkey}));
		124	$def_opt{$lkey} = $val;
		125	}
		126	my $self = { def_opt => \%def_opt };
		127
		128	return(bless($self, $class));
		129	}
		130
		131
		132	##############################################################################
		133	# Sub: _get_object()
		134	#
		135	# Helper routine called from XMLin() and XMLout() to create an object if none
		136	# was provided. Note, this routine does mess with the caller's @_ array.
		137	#
		138
		139	sub _get_object {
		140	my $self;
		141	if($_[0] and UNIVERSAL::isa($_[0], 'XML::Simple')) {
		142	$self = shift;
		143	}
		144	else {
		145	$self = XML::Simple->new();
		146	}
		147
		148	return $self;
		149	}
		150
		151
		152	##############################################################################
		153	# Sub/Method: XMLin()
		154	#
		155	# Exported routine for slurping XML into a hashref - see pod for info.
		156	#
		157	# May be called as object method or as a plain function.
		158	#
		159	# Expects one arg for the source XML, optionally followed by a number of
		160	# name => value option pairs.
		161	#
		162
		163	sub XMLin {
		164	my $self = &_get_object; # note, @_ is passed implicitly
		165
		166	my $target = shift;
		167
		168
		169	# Work out whether to parse a string, a file or a filehandle
		170
		171	if(not defined $target) {
		172	return $self->parse_file(undef, @_);
		173	}
		174
		175	elsif($target eq '-') {
		176	local($/) = undef;
		177	$target = <STDIN>;
		178	return $self->parse_string(\$target, @_);
		179	}
		180
		181	elsif(my $type = ref($target)) {
		182	if($type eq 'SCALAR') {
		183	return $self->parse_string($target, @_);
		184	}
		185	else {
		186	return $self->parse_fh($target, @_);
		187	}
		188	}
		189
		190	elsif($target =~ m{<.*?>}s) {
		191	return $self->parse_string(\$target, @_);
		192	}
		193
		194	else {
		195	return $self->parse_file($target, @_);
		196	}
		197	}
		198
		199
		200	##############################################################################
		201	# Sub/Method: parse_file()
		202	#
		203	# Same as XMLin, but only parses from a named file.
		204	#
		205
		206	sub parse_file {
		207	my $self = &_get_object; # note, @_ is passed implicitly
		208
		209	my $filename = shift;
		210
		211	$self->handle_options('in', @_);
		212
		213	$filename = $self->default_config_file if not defined $filename;
		214
		215	$filename = $self->find_xml_file($filename, @{$self->{opt}->{searchpath}});
		216
		217	# Check cache for previous parse
		218
		219	if($self->{opt}->{cache}) {
		220	foreach my $scheme (@{$self->{opt}->{cache}}) {
		221	my $method = 'cache_read_' . $scheme;
		222	my $opt = $self->$method($filename);
		223	return($opt) if($opt);
		224	}
		225	}
		226
		227	my $ref = $self->build_simple_tree($filename, undef);
		228
		229	if($self->{opt}->{cache}) {
		230	my $method = 'cache_write_' . $self->{opt}->{cache}->[0];
		231	$self->$method($ref, $filename);
		232	}
		233
		234	return $ref;
		235	}
		236
		237
		238	##############################################################################
		239	# Sub/Method: parse_fh()
		240	#
		241	# Same as XMLin, but only parses from a filehandle.
		242	#
		243
		244	sub parse_fh {
		245	my $self = &_get_object; # note, @_ is passed implicitly
		246
		247	my $fh = shift;
		248	croak "Can't use " . (defined $fh ? qq{string ("$fh")} : 'undef') .
		249	" as a filehandle" unless ref $fh;
		250
		251	$self->handle_options('in', @_);
		252
		253	return $self->build_simple_tree(undef, $fh);
		254	}
		255
		256
		257	##############################################################################
		258	# Sub/Method: parse_string()
		259	#
		260	# Same as XMLin, but only parses from a string or a reference to a string.
		261	#
		262
		263	sub parse_string {
		264	my $self = &_get_object; # note, @_ is passed implicitly
		265
		266	my $string = shift;
		267
		268	$self->handle_options('in', @_);
		269
		270	return $self->build_simple_tree(undef, ref $string ? $string : \$string);
		271	}
		272
		273
		274	##############################################################################
		275	# Method: default_config_file()
		276	#
		277	# Returns the name of the XML file to parse if no filename (or XML string)
		278	# was provided.
		279	#
		280
		281	sub default_config_file {
		282	my $self = shift;
		283
		284	require File::Basename;
		285
		286	my($basename, $script_dir, $ext) = File::Basename::fileparse($0, '\.[^\.]+');
		287
		288	# Add script directory to searchpath
		289
		290	if($script_dir) {
		291	unshift(@{$self->{opt}->{searchpath}}, $script_dir);
		292	}
		293
		294	return $basename . '.xml';
		295	}
		296
		297
		298	##############################################################################
		299	# Method: build_simple_tree()
		300	#
		301	# Builds a 'tree' data structure as provided by XML::Parser and then
		302	# 'simplifies' it as specified by the various options in effect.
		303	#
		304
		305	sub build_simple_tree {
		306	my $self = shift;
		307
		308	my $tree = $self->build_tree(@_);
		309
		310	return $self->{opt}->{keeproot}
		311	? $self->collapse({}, @$tree)
		312	: $self->collapse(@{$tree->[1]});
		313	}
		314
		315
		316	##############################################################################
		317	# Method: build_tree()
		318	#
		319	# This routine will be called if there is no suitable pre-parsed tree in a
		320	# cache. It parses the XML and returns an XML::Parser 'Tree' style data
		321	# structure (summarised in the comments for the collapse() routine below).
		322	#
		323	# XML::Simple requires the services of another module that knows how to parse
		324	# XML. If XML::SAX is installed, the default SAX parser will be used,
		325	# otherwise XML::Parser will be used.
		326	#
		327	# This routine expects to be passed a filename as argument 1 or a 'string' as
		328	# argument 2. The 'string' might be a string of XML (passed by reference to
		329	# save memory) or it might be a reference to an IO::Handle. (This
		330	# non-intuitive mess results in part from the way XML::Parser works but that's
		331	# really no excuse).
		332	#
		333
		334	sub build_tree {
		335	my $self = shift;
		336	my $filename = shift;
		337	my $string = shift;
		338
		339
		340	my $preferred_parser = $PREFERRED_PARSER;
		341	unless(defined($preferred_parser)) {
		342	$preferred_parser = $ENV{XML_SIMPLE_PREFERRED_PARSER} \|\| '';
		343	}
		344	if($preferred_parser eq 'XML::Parser') {
		345	return($self->build_tree_xml_parser($filename, $string));
		346	}
		347
		348	eval { require XML::SAX; }; # We didn't need it until now
		349	if($@) { # No XML::SAX - fall back to XML::Parser
		350	if($preferred_parser) { # unless a SAX parser was expressly requested
		351	croak "XMLin() could not load XML::SAX";
		352	}
		353	return($self->build_tree_xml_parser($filename, $string));
		354	}
		355
		356	$XML::SAX::ParserPackage = $preferred_parser if($preferred_parser);
		357
		358	my $sp = XML::SAX::ParserFactory->parser(Handler => $self);
		359
		360	$self->{nocollapse} = 1;
		361	my($tree);
		362	if($filename) {
		363	$tree = $sp->parse_uri($filename);
		364	}
		365	else {
		366	if(ref($string) && ref($string) ne 'SCALAR') {
		367	$tree = $sp->parse_file($string);
		368	}
		369	else {
		370	$tree = $sp->parse_string($$string);
		371	}
		372	}
		373
		374	return($tree);
		375	}
		376
		377
		378	##############################################################################
		379	# Method: build_tree_xml_parser()
		380	#
		381	# This routine will be called if XML::SAX is not installed, or if XML::Parser
		382	# was specifically requested. It takes the same arguments as build_tree() and
		383	# returns the same data structure (XML::Parser 'Tree' style).
		384	#
		385
		386	sub build_tree_xml_parser {
		387	my $self = shift;
		388	my $filename = shift;
		389	my $string = shift;
		390
		391
		392	eval {
		393	local($^W) = 0; # Suppress warning from Expat.pm re File::Spec::load()
		394	require XML::Parser; # We didn't need it until now
		395	};
		396	if($@) {
		397	croak "XMLin() requires either XML::SAX or XML::Parser";
		398	}
		399
		400	if($self->{opt}->{nsexpand}) {
		401	carp "'nsexpand' option requires XML::SAX";
		402	}
		403
		404	my $xp = XML::Parser->new(Style => 'Tree', @{$self->{opt}->{parseropts}});
		405	my($tree);
		406	if($filename) {
		407	# $tree = $xp->parsefile($filename); # Changed due to prob w/mod_perl
		408	local(*XML_FILE);
		409	open(XML_FILE, '<', $filename) \|\| croak qq($filename - $!);
		410	$tree = $xp->parse(*XML_FILE);
		411	close(XML_FILE);
		412	}
		413	else {
		414	$tree = $xp->parse($$string);
		415	}
		416
		417	return($tree);
		418	}
		419
		420
		421	##############################################################################
		422	# Method: cache_write_storable()
		423	#
		424	# Wrapper routine for invoking Storable::nstore() to cache a parsed data
		425	# structure.
		426	#
		427
		428	sub cache_write_storable {
		429	my($self, $data, $filename) = @_;
		430
		431	my $cachefile = $self->storable_filename($filename);
		432
		433	require Storable; # We didn't need it until now
		434
		435	if ('VMS' eq $^O) {
		436	Storable::nstore($data, $cachefile);
		437	}
		438	else {
		439	# If the following line fails for you, your Storable.pm is old - upgrade
		440	Storable::lock_nstore($data, $cachefile);
		441	}
		442
		443	}
		444
		445
		446	##############################################################################
		447	# Method: cache_read_storable()
		448	#
		449	# Wrapper routine for invoking Storable::retrieve() to read a cached parsed
		450	# data structure. Only returns cached data if the cache file exists and is
		451	# newer than the source XML file.
		452	#
		453
		454	sub cache_read_storable {
		455	my($self, $filename) = @_;
		456
		457	my $cachefile = $self->storable_filename($filename);
		458
		459	return unless(-r $cachefile);
		460	return unless((stat($cachefile))[9] > (stat($filename))[9]);
		461
		462	require Storable; # We didn't need it until now
		463
		464	if ('VMS' eq $^O) {
		465	return(Storable::retrieve($cachefile));
		466	}
		467	else {
		468	return(Storable::lock_retrieve($cachefile));
		469	}
		470
		471	}
		472
		473
		474	##############################################################################
		475	# Method: storable_filename()
		476	#
		477	# Translates the supplied source XML filename into a filename for the storable
		478	# cached data. A '.stor' suffix is added after stripping an optional '.xml'
		479	# suffix.
		480	#
		481
		482	sub storable_filename {
		483	my($self, $cachefile) = @_;
		484
		485	$cachefile =~ s{(\.xml)?$}{.stor};
		486	return $cachefile;
		487	}
		488
		489
		490	##############################################################################
		491	# Method: cache_write_memshare()
		492	#
		493	# Takes the supplied data structure reference and stores it away in a global
		494	# hash structure.
		495	#
		496
		497	sub cache_write_memshare {
		498	my($self, $data, $filename) = @_;
		499
		500	$MemShareCache{$filename} = [time(), $data];
		501	}
		502
		503
		504	##############################################################################
		505	# Method: cache_read_memshare()
		506	#
		507	# Takes a filename and looks in a global hash for a cached parsed version.
		508	#
		509
		510	sub cache_read_memshare {
		511	my($self, $filename) = @_;
		512
		513	return unless($MemShareCache{$filename});
		514	return unless($MemShareCache{$filename}->[0] > (stat($filename))[9]);
		515
		516	return($MemShareCache{$filename}->[1]);
		517
		518	}
		519
		520
		521	##############################################################################
		522	# Method: cache_write_memcopy()
		523	#
		524	# Takes the supplied data structure and stores a copy of it in a global hash
		525	# structure.
		526	#
		527
		528	sub cache_write_memcopy {
		529	my($self, $data, $filename) = @_;
		530
		531	require Storable; # We didn't need it until now
		532
		533	$MemCopyCache{$filename} = [time(), Storable::dclone($data)];
		534	}
		535
		536
		537	##############################################################################
		538	# Method: cache_read_memcopy()
		539	#
		540	# Takes a filename and looks in a global hash for a cached parsed version.
		541	# Returns a reference to a copy of that data structure.
		542	#
		543
		544	sub cache_read_memcopy {
		545	my($self, $filename) = @_;
		546
		547	return unless($MemCopyCache{$filename});
		548	return unless($MemCopyCache{$filename}->[0] > (stat($filename))[9]);
		549
		550	return(Storable::dclone($MemCopyCache{$filename}->[1]));
		551
		552	}
		553
		554
		555	##############################################################################
		556	# Sub/Method: XMLout()
		557	#
		558	# Exported routine for 'unslurping' a data structure out to XML.
		559	#
		560	# Expects a reference to a data structure and an optional list of option
		561	# name => value pairs.
		562	#
		563
		564	sub XMLout {
		565	my $self = &_get_object; # note, @_ is passed implicitly
		566
		567	croak "XMLout() requires at least one argument" unless(@_);
		568	my $ref = shift;
		569
		570	$self->handle_options('out', @_);
		571
		572
		573	# If namespace expansion is set, XML::NamespaceSupport is required
		574
		575	if($self->{opt}->{nsexpand}) {
		576	require XML::NamespaceSupport;
		577	$self->{nsup} = XML::NamespaceSupport->new();
		578	$self->{ns_prefix} = 'aaa';
		579	}
		580
		581
		582	# Wrap top level arrayref in a hash
		583
		584	if(UNIVERSAL::isa($ref, 'ARRAY')) {
		585	$ref = { anon => $ref };
		586	}
		587
		588
		589	# Extract rootname from top level hash if keeproot enabled
		590
		591	if($self->{opt}->{keeproot}) {
		592	my(@keys) = keys(%$ref);
		593	if(@keys == 1) {
		594	$ref = $ref->{$keys[0]};
		595	$self->{opt}->{rootname} = $keys[0];
		596	}
		597	}
		598
		599	# Ensure there are no top level attributes if we're not adding root elements
		600
		601	elsif($self->{opt}->{rootname} eq '') {
		602	if(UNIVERSAL::isa($ref, 'HASH')) {
		603	my $refsave = $ref;
		604	$ref = {};
		605	foreach (keys(%$refsave)) {
		606	if(ref($refsave->{$_})) {
		607	$ref->{$_} = $refsave->{$_};
		608	}
		609	else {
		610	$ref->{$_} = [ $refsave->{$_} ];
		611	}
		612	}
		613	}
		614	}
		615
		616
		617	# Encode the hashref and write to file if necessary
		618
		619	$self->{_ancestors} = [];
		620	my $xml = $self->value_to_xml($ref, $self->{opt}->{rootname}, '');
		621	delete $self->{_ancestors};
		622
		623	if($self->{opt}->{xmldecl}) {
		624	$xml = $self->{opt}->{xmldecl} . "\n" . $xml;
		625	}
		626
		627	if($self->{opt}->{outputfile}) {
		628	if(ref($self->{opt}->{outputfile})) {
		629	my $fh = $self->{opt}->{outputfile};
		630	if(UNIVERSAL::isa($fh, 'GLOB') and !UNIVERSAL::can($fh, 'print')) {
		631	eval { require IO::Handle; };
		632	croak $@ if $@;
		633	}
		634	return($fh->print($xml));
		635	}
		636	else {
		637	local(*OUT);
		638	open(OUT, '>', "$self->{opt}->{outputfile}") \|\|
		639	croak "open($self->{opt}->{outputfile}): $!";
		640	binmode(OUT, ':utf8') if($] >= 5.008);
		641	print OUT $xml \|\| croak "print: $!";
		642	close(OUT);
		643	}
		644	}
		645	elsif($self->{opt}->{handler}) {
		646	require XML::SAX;
		647	my $sp = XML::SAX::ParserFactory->parser(
		648	Handler => $self->{opt}->{handler}
		649	);
		650	return($sp->parse_string($xml));
		651	}
		652	else {
		653	return($xml);
		654	}
		655	}
		656
		657
		658	##############################################################################
		659	# Method: handle_options()
		660	#
		661	# Helper routine for both XMLin() and XMLout(). Both routines handle their
		662	# first argument and assume all other args are options handled by this routine.
		663	# Saves a hash of options in $self->{opt}.
		664	#
		665	# If default options were passed to the constructor, they will be retrieved
		666	# here and merged with options supplied to the method call.
		667	#
		668	# First argument should be the string 'in' or the string 'out'.
		669	#
		670	# Remaining arguments should be name=>value pairs. Sets up default values
		671	# for options not supplied. Unrecognised options are a fatal error.
		672	#
		673
		674	sub handle_options {
		675	my $self = shift;
		676	my $dirn = shift;
		677
		678
		679	# Determine valid options based on context
		680
		681	my %known_opt;
		682	if($dirn eq 'in') {
		683	@known_opt{@KnownOptIn} = @KnownOptIn;
		684	}
		685	else {
		686	@known_opt{@KnownOptOut} = @KnownOptOut;
		687	}
		688
		689
		690	# Store supplied options in hashref and weed out invalid ones
		691
		692	if(@_ % 2) {
		693	croak "Options must be name=>value pairs (odd number supplied)";
		694	}
		695	my %raw_opt = @_;
		696	my $opt = {};
		697	$self->{opt} = $opt;
		698
		699	while(my($key, $val) = each %raw_opt) {
		700	my $lkey = lc($key);
		701	$lkey =~ s/_//g;
		702	croak "Unrecognised option: $key" unless($known_opt{$lkey});
		703	$opt->{$lkey} = $val;
		704	}
		705
		706
		707	# Merge in options passed to constructor
		708
		709	foreach (keys(%known_opt)) {
		710	unless(exists($opt->{$_})) {
		711	if(exists($self->{def_opt}->{$_})) {
		712	$opt->{$_} = $self->{def_opt}->{$_};
		713	}
		714	}
		715	}
		716
		717
		718	# Set sensible defaults if not supplied
		719
		720	if(exists($opt->{rootname})) {
		721	unless(defined($opt->{rootname})) {
		722	$opt->{rootname} = '';
		723	}
		724	}
		725	else {
		726	$opt->{rootname} = $DefRootName;
		727	}
		728
		729	if($opt->{xmldecl} and $opt->{xmldecl} eq '1') {
		730	$opt->{xmldecl} = $DefXmlDecl;
		731	}
		732
		733	if(exists($opt->{contentkey})) {
		734	if($opt->{contentkey} =~ m{^-(.*)$}) {
		735	$opt->{contentkey} = $1;
		736	$opt->{collapseagain} = 1;
		737	}
		738	}
		739	else {
		740	$opt->{contentkey} = $DefContentKey;
		741	}
		742
		743	unless(exists($opt->{normalisespace})) {
		744	$opt->{normalisespace} = $opt->{normalizespace};
		745	}
		746	$opt->{normalisespace} = 0 unless(defined($opt->{normalisespace}));
		747
		748	# Cleanups for values assumed to be arrays later
		749
		750	if($opt->{searchpath}) {
		751	unless(ref($opt->{searchpath})) {
		752	$opt->{searchpath} = [ $opt->{searchpath} ];
		753	}
		754	}
		755	else {
		756	$opt->{searchpath} = [ ];
		757	}
		758
		759	if($opt->{cache} and !ref($opt->{cache})) {
		760	$opt->{cache} = [ $opt->{cache} ];
		761	}
		762	if($opt->{cache}) {
		763	$_ = lc($_) foreach (@{$opt->{cache}});
		764	foreach my $scheme (@{$opt->{cache}}) {
		765	my $method = 'cache_read_' . $scheme;
		766	croak "Unsupported caching scheme: $scheme"
		767	unless($self->can($method));
		768	}
		769	}
		770
		771	if(exists($opt->{parseropts})) {
		772	if($^W) {
		773	carp "Warning: " .
		774	"'ParserOpts' is deprecated, contact the author if you need it";
		775	}
		776	}
		777	else {
		778	$opt->{parseropts} = [ ];
		779	}
		780
		781
		782	# Special cleanup for {forcearray} which could be regex, arrayref or boolean
		783	# or left to default to 0
		784
		785	if(exists($opt->{forcearray})) {
		786	if(ref($opt->{forcearray}) eq 'Regexp') {
		787	$opt->{forcearray} = [ $opt->{forcearray} ];
		788	}
		789
		790	if(ref($opt->{forcearray}) eq 'ARRAY') {
		791	my @force_list = @{$opt->{forcearray}};
		792	if(@force_list) {
		793	$opt->{forcearray} = {};
		794	foreach my $tag (@force_list) {
		795	if(ref($tag) eq 'Regexp') {
		796	push @{$opt->{forcearray}->{_regex}}, $tag;
		797	}
		798	else {
		799	$opt->{forcearray}->{$tag} = 1;
		800	}
		801	}
		802	}
		803	else {
		804	$opt->{forcearray} = 0;
		805	}
		806	}
		807	else {
		808	$opt->{forcearray} = ( $opt->{forcearray} ? 1 : 0 );
		809	}
		810	}
		811	else {
		812	if($StrictMode and $dirn eq 'in') {
		813	croak "No value specified for 'ForceArray' option in call to XML$dirn()";
		814	}
		815	$opt->{forcearray} = 0;
		816	}
		817
		818
		819	# Special cleanup for {keyattr} which could be arrayref or hashref or left
		820	# to default to arrayref
		821
		822	if(exists($opt->{keyattr})) {
		823	if(ref($opt->{keyattr})) {
		824	if(ref($opt->{keyattr}) eq 'HASH') {
		825
		826	# Make a copy so we can mess with it
		827
		828	$opt->{keyattr} = { %{$opt->{keyattr}} };
		829
		830
		831	# Convert keyattr => { elem => '+attr' }
		832	# to keyattr => { elem => [ 'attr', '+' ] }
		833
		834	foreach my $el (keys(%{$opt->{keyattr}})) {
		835	if($opt->{keyattr}->{$el} =~ /^(\+\|-)?(.*)$/) {
		836	$opt->{keyattr}->{$el} = [ $2, ($1 ? $1 : '') ];
		837	if($StrictMode and $dirn eq 'in') {
		838	next if($opt->{forcearray} == 1);
		839	next if(ref($opt->{forcearray}) eq 'HASH'
		840	and $opt->{forcearray}->{$el});
		841	croak "<$el> set in KeyAttr but not in ForceArray";
		842	}
		843	}
		844	else {
		845	delete($opt->{keyattr}->{$el}); # Never reached (famous last words?)
		846	}
		847	}
		848	}
		849	else {
		850	if(@{$opt->{keyattr}} == 0) {
		851	delete($opt->{keyattr});
		852	}
		853	}
		854	}
		855	else {
		856	$opt->{keyattr} = [ $opt->{keyattr} ];
		857	}
		858	}
		859	else {
		860	if($StrictMode) {
		861	croak "No value specified for 'KeyAttr' option in call to XML$dirn()";
		862	}
		863	$opt->{keyattr} = [ @DefKeyAttr ];
		864	}
		865
		866
		867	# Special cleanup for {valueattr} which could be arrayref or hashref
		868
		869	if(exists($opt->{valueattr})) {
		870	if(ref($opt->{valueattr}) eq 'ARRAY') {
		871	$opt->{valueattrlist} = {};
		872	$opt->{valueattrlist}->{$_} = 1 foreach(@{ delete $opt->{valueattr} });
		873	}
		874	}
		875
		876	# make sure there's nothing weird in {grouptags}
		877
		878	if($opt->{grouptags}) {
		879	croak "Illegal value for 'GroupTags' option - expected a hashref"
		880	unless UNIVERSAL::isa($opt->{grouptags}, 'HASH');
		881
		882	while(my($key, $val) = each %{$opt->{grouptags}}) {
		883	next if $key ne $val;
		884	croak "Bad value in GroupTags: '$key' => '$val'";
		885	}
		886	}
		887
		888
		889	# Check the {variables} option is valid and initialise variables hash
		890
		891	if($opt->{variables} and !UNIVERSAL::isa($opt->{variables}, 'HASH')) {
		892	croak "Illegal value for 'Variables' option - expected a hashref";
		893	}
		894
		895	if($opt->{variables}) {
		896	$self->{_var_values} = { %{$opt->{variables}} };
		897	}
		898	elsif($opt->{varattr}) {
		899	$self->{_var_values} = {};
		900	}
		901
		902	}
		903
		904
		905	##############################################################################
		906	# Method: find_xml_file()
		907	#
		908	# Helper routine for XMLin().
		909	# Takes a filename, and a list of directories, attempts to locate the file in
		910	# the directories listed.
		911	# Returns a full pathname on success; croaks on failure.
		912	#
		913
		914	sub find_xml_file {
		915	my $self = shift;
		916	my $file = shift;
		917	my @search_path = @_;
		918
		919
		920	require File::Basename;
		921	require File::Spec;
		922
		923	my($filename, $filedir) = File::Basename::fileparse($file);
		924
		925	if($filename ne $file) { # Ignore searchpath if dir component
		926	return($file) if(-e $file);
		927	}
		928	else {
		929	my($path);
		930	foreach $path (@search_path) {
		931	my $fullpath = File::Spec->catfile($path, $file);
		932	return($fullpath) if(-e $fullpath);
		933	}
		934	}
		935
		936	# If user did not supply a search path, default to current directory
		937
		938	if(!@search_path) {
		939	return($file) if(-e $file);
		940	croak "File does not exist: $file";
		941	}
		942
		943	croak "Could not find $file in ", join(':', @search_path);
		944	}
		945
		946
		947	##############################################################################
		948	# Method: collapse()
		949	#
		950	# Helper routine for XMLin(). This routine really comprises the 'smarts' (or
		951	# value add) of this module.
		952	#
		953	# Takes the parse tree that XML::Parser produced from the supplied XML and
		954	# recurses through it 'collapsing' unnecessary levels of indirection (nested
		955	# arrays etc) to produce a data structure that is easier to work with.
		956	#
		957	# Elements in the original parser tree are represented as an element name
		958	# followed by an arrayref. The first element of the array is a hashref
		959	# containing the attributes. The rest of the array contains a list of any
		960	# nested elements as name+arrayref pairs:
		961	#
		962	# <element name>, [ { <attribute hashref> }, <element name>, [ ... ], ... ]
		963	#
		964	# The special element name '0' (zero) flags text content.
		965	#
		966	# This routine cuts down the noise by discarding any text content consisting of
		967	# only whitespace and then moves the nested elements into the attribute hash
		968	# using the name of the nested element as the hash key and the collapsed
		969	# version of the nested element as the value. Multiple nested elements with
		970	# the same name will initially be represented as an arrayref, but this may be
		971	# 'folded' into a hashref depending on the value of the keyattr option.
		972	#
		973
		974	sub collapse {
		975	my $self = shift;
		976
		977
		978	# Start with the hash of attributes
		979
		980	my $attr = shift;
		981	if($self->{opt}->{noattr}) { # Discard if 'noattr' set
		982	$attr = {};
		983	}
		984	elsif($self->{opt}->{normalisespace} == 2) {
		985	while(my($key, $value) = each %$attr) {
		986	$attr->{$key} = $self->normalise_space($value)
		987	}
		988	}
		989
		990
		991	# Do variable substitutions
		992
		993	if(my $var = $self->{_var_values}) {
		994	while(my($key, $val) = each(%$attr)) {
		995	$val =~ s{\$\{([\w.]+)\}}{ $self->get_var($1) }ge;
		996	$attr->{$key} = $val;
		997	}
		998	}
		999
		1000
		1001	# Roll up 'value' attributes (but only if no nested elements)
		1002
		1003	if(!@_ and keys %$attr == 1) {
		1004	my($k) = keys %$attr;
		1005	if($self->{opt}->{valueattrlist} and $self->{opt}->{valueattrlist}->{$k}) {
		1006	return $attr->{$k};
		1007	}
		1008	}
		1009
		1010
		1011	# Add any nested elements
		1012
		1013	my($key, $val);
		1014	while(@_) {
		1015	$key = shift;
		1016	$val = shift;
		1017
		1018	if(ref($val)) {
		1019	$val = $self->collapse(@$val);
		1020	next if(!defined($val) and $self->{opt}->{suppressempty});
		1021	}
		1022	elsif($key eq '0') {
		1023	next if($val =~ m{^\s*$}s); # Skip all whitespace content
		1024
		1025	$val = $self->normalise_space($val)
		1026	if($self->{opt}->{normalisespace} == 2);
		1027
		1028	# do variable substitutions
		1029
		1030	if(my $var = $self->{_var_values}) {
		1031	$val =~ s{\$\{(\w+)\}}{ $self->get_var($1) }ge;
		1032	}
		1033
		1034
		1035	# look for variable definitions
		1036
		1037	if(my $var = $self->{opt}->{varattr}) {
		1038	if(exists $attr->{$var}) {
		1039	$self->set_var($attr->{$var}, $val);
		1040	}
		1041	}
		1042
		1043
		1044	# Collapse text content in element with no attributes to a string
		1045
		1046	if(!%$attr and !@_) {
		1047	return($self->{opt}->{forcecontent} ?
		1048	{ $self->{opt}->{contentkey} => $val } : $val
		1049	);
		1050	}
		1051	$key = $self->{opt}->{contentkey};
		1052	}
		1053
		1054
		1055	# Combine duplicate attributes into arrayref if required
		1056
		1057	if(exists($attr->{$key})) {
		1058	if(UNIVERSAL::isa($attr->{$key}, 'ARRAY')) {
		1059	push(@{$attr->{$key}}, $val);
		1060	}
		1061	else {
		1062	$attr->{$key} = [ $attr->{$key}, $val ];
		1063	}
		1064	}
		1065	elsif(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) {
		1066	$attr->{$key} = [ $val ];
		1067	}
		1068	else {
		1069	if( $key ne $self->{opt}->{contentkey}
		1070	and (
		1071	($self->{opt}->{forcearray} == 1)
		1072	or (
		1073	(ref($self->{opt}->{forcearray}) eq 'HASH')
		1074	and (
		1075	$self->{opt}->{forcearray}->{$key}
		1076	or (grep $key =~ $_, @{$self->{opt}->{forcearray}->{_regex}})
		1077	)
		1078	)
		1079	)
		1080	) {
		1081	$attr->{$key} = [ $val ];
		1082	}
		1083	else {
		1084	$attr->{$key} = $val;
		1085	}
		1086	}
		1087
		1088	}
		1089
		1090
		1091	# Turn arrayrefs into hashrefs if key fields present
		1092
		1093	if($self->{opt}->{keyattr}) {
		1094	while(($key,$val) = each %$attr) {
		1095	if(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) {
		1096	$attr->{$key} = $self->array_to_hash($key, $val);
		1097	}
		1098	}
		1099	}
		1100
		1101
		1102	# disintermediate grouped tags
		1103
		1104	if($self->{opt}->{grouptags}) {
		1105	while(my($key, $val) = each(%$attr)) {
		1106	next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1));
		1107	next unless(exists($self->{opt}->{grouptags}->{$key}));
		1108
		1109	my($child_key, $child_val) = %$val;
		1110
		1111	if($self->{opt}->{grouptags}->{$key} eq $child_key) {
		1112	$attr->{$key}= $child_val;
		1113	}
		1114	}
		1115	}
		1116
		1117
		1118	# Fold hashes containing a single anonymous array up into just the array
		1119
		1120	my $count = scalar keys %$attr;
		1121	if($count == 1
		1122	and exists $attr->{anon}
		1123	and UNIVERSAL::isa($attr->{anon}, 'ARRAY')
		1124	) {
		1125	return($attr->{anon});
		1126	}
		1127
		1128
		1129	# Do the right thing if hash is empty, otherwise just return it
		1130
		1131	if(!%$attr and exists($self->{opt}->{suppressempty})) {
		1132	if(defined($self->{opt}->{suppressempty}) and
		1133	$self->{opt}->{suppressempty} eq '') {
		1134	return('');
		1135	}
		1136	return(undef);
		1137	}
		1138
		1139
		1140	# Roll up named elements with named nested 'value' attributes
		1141
		1142	if($self->{opt}->{valueattr}) {
		1143	while(my($key, $val) = each(%$attr)) {
		1144	next unless($self->{opt}->{valueattr}->{$key});
		1145	next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1));
		1146	my($k) = keys %$val;
		1147	next unless($k eq $self->{opt}->{valueattr}->{$key});
		1148	$attr->{$key} = $val->{$k};
		1149	}
		1150	}
		1151
		1152	return($attr)
		1153
		1154	}
		1155
		1156
		1157	##############################################################################
		1158	# Method: set_var()
		1159	#
		1160	# Called when a variable definition is encountered in the XML. (A variable
		1161	# definition looks like <element attrname="name">value</element> where attrname
		1162	# matches the varattr setting).
		1163	#
		1164
		1165	sub set_var {
		1166	my($self, $name, $value) = @_;
		1167
		1168	$self->{_var_values}->{$name} = $value;
		1169	}
		1170
		1171
		1172	##############################################################################
		1173	# Method: get_var()
		1174	#
		1175	# Called during variable substitution to get the value for the named variable.
		1176	#
		1177
		1178	sub get_var {
		1179	my($self, $name) = @_;
		1180
		1181	my $value = $self->{_var_values}->{$name};
		1182	return $value if(defined($value));
		1183
		1184	return '${' . $name . '}';
		1185	}
		1186
		1187
		1188	##############################################################################
		1189	# Method: normalise_space()
		1190	#
		1191	# Strips leading and trailing whitespace and collapses sequences of whitespace
		1192	# characters to a single space.
		1193	#
		1194
		1195	sub normalise_space {
		1196	my($self, $text) = @_;
		1197
		1198	$text =~ s/^\s+//s;
		1199	$text =~ s/\s+$//s;
		1200	$text =~ s/\s\s+/ /sg;
		1201
		1202	return $text;
		1203	}
		1204
		1205
		1206	##############################################################################
		1207	# Method: array_to_hash()
		1208	#
		1209	# Helper routine for collapse().
		1210	# Attempts to 'fold' an array of hashes into an hash of hashes. Returns a
		1211	# reference to the hash on success or the original array if folding is
		1212	# not possible. Behaviour is controlled by 'keyattr' option.
		1213	#
		1214
		1215	sub array_to_hash {
		1216	my $self = shift;
		1217	my $name = shift;
		1218	my $arrayref = shift;
		1219
		1220	my $hashref = $self->new_hashref;
		1221
		1222	my($i, $key, $val, $flag);
		1223
		1224
		1225	# Handle keyattr => { .... }
		1226
		1227	if(ref($self->{opt}->{keyattr}) eq 'HASH') {
		1228	return($arrayref) unless(exists($self->{opt}->{keyattr}->{$name}));
		1229	($key, $flag) = @{$self->{opt}->{keyattr}->{$name}};
		1230	for($i = 0; $i < @$arrayref; $i++) {
		1231	if(UNIVERSAL::isa($arrayref->[$i], 'HASH') and
		1232	exists($arrayref->[$i]->{$key})
		1233	) {
		1234	$val = $arrayref->[$i]->{$key};
		1235	if(ref($val)) {
		1236	$self->die_or_warn("<$name> element has non-scalar '$key' key attribute");
		1237	return($arrayref);
		1238	}
		1239	$val = $self->normalise_space($val)
		1240	if($self->{opt}->{normalisespace} == 1);
		1241	$self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val")
		1242	if(exists($hashref->{$val}));
		1243	$hashref->{$val} = { %{$arrayref->[$i]} };
		1244	$hashref->{$val}->{"-$key"} = $hashref->{$val}->{$key} if($flag eq '-');
		1245	delete $hashref->{$val}->{$key} unless($flag eq '+');
		1246	}
		1247	else {
		1248	$self->die_or_warn("<$name> element has no '$key' key attribute");
		1249	return($arrayref);
		1250	}
		1251	}
		1252	}
		1253
		1254
		1255	# Or assume keyattr => [ .... ]
		1256
		1257	else {
		1258	my $default_keys =
		1259	join(',', @DefKeyAttr) eq join(',', @{$self->{opt}->{keyattr}});
		1260
		1261	ELEMENT: for($i = 0; $i < @$arrayref; $i++) {
		1262	return($arrayref) unless(UNIVERSAL::isa($arrayref->[$i], 'HASH'));
		1263
		1264	foreach $key (@{$self->{opt}->{keyattr}}) {
		1265	if(defined($arrayref->[$i]->{$key})) {
		1266	$val = $arrayref->[$i]->{$key};
		1267	if(ref($val)) {
		1268	$self->die_or_warn("<$name> element has non-scalar '$key' key attribute")
		1269	if not $default_keys;
		1270	return($arrayref);
		1271	}
		1272	$val = $self->normalise_space($val)
		1273	if($self->{opt}->{normalisespace} == 1);
		1274	$self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val")
		1275	if(exists($hashref->{$val}));
		1276	$hashref->{$val} = { %{$arrayref->[$i]} };
		1277	delete $hashref->{$val}->{$key};
		1278	next ELEMENT;
		1279	}
		1280	}
		1281
		1282	return($arrayref); # No keyfield matched
		1283	}
		1284	}
		1285
		1286	# collapse any hashes which now only have a 'content' key
		1287
		1288	if($self->{opt}->{collapseagain}) {
		1289	$hashref = $self->collapse_content($hashref);
		1290	}
		1291
		1292	return($hashref);
		1293	}
		1294
		1295
		1296	##############################################################################
		1297	# Method: die_or_warn()
		1298	#
		1299	# Takes a diagnostic message and does one of three things:
		1300	# 1. dies if strict mode is enabled
		1301	# 2. warns if warnings are enabled but strict mode is not
		1302	# 3. ignores message and resturns silently if neither strict mode nor warnings
		1303	# are enabled
		1304	#
		1305
		1306	sub die_or_warn {
		1307	my $self = shift;
		1308	my $msg = shift;
		1309
		1310	croak $msg if($StrictMode);
		1311	carp "Warning: $msg" if($^W);
		1312	}
		1313
		1314
		1315	##############################################################################
		1316	# Method: new_hashref()
		1317	#
		1318	# This is a hook routine for overriding in a sub-class. Some people believe
		1319	# that using Tie::IxHash here will solve order-loss problems.
		1320	#
		1321
		1322	sub new_hashref {
		1323	my $self = shift;
		1324
		1325	return { @_ };
		1326	}
		1327
		1328
		1329	##############################################################################
		1330	# Method: collapse_content()
		1331	#
		1332	# Helper routine for array_to_hash
		1333	#
		1334	# Arguments expected are:
		1335	# - an XML::Simple object
		1336	# - a hasref
		1337	# the hashref is a former array, turned into a hash by array_to_hash because
		1338	# of the presence of key attributes
		1339	# at this point collapse_content avoids over-complicated structures like
		1340	# dir => { libexecdir => { content => '$exec_prefix/libexec' },
		1341	# localstatedir => { content => '$prefix' },
		1342	# }
		1343	# into
		1344	# dir => { libexecdir => '$exec_prefix/libexec',
		1345	# localstatedir => '$prefix',
		1346	# }
		1347
		1348	sub collapse_content {
		1349	my $self = shift;
		1350	my $hashref = shift;
		1351
		1352	my $contentkey = $self->{opt}->{contentkey};
		1353
		1354	# first go through the values,checking that they are fit to collapse
		1355	foreach my $val (values %$hashref) {
		1356	return $hashref unless ( (ref($val) eq 'HASH')
		1357	and (keys %$val == 1)
		1358	and (exists $val->{$contentkey})
		1359	);
		1360	}
		1361
		1362	# now collapse them
		1363	foreach my $key (keys %$hashref) {
		1364	$hashref->{$key}= $hashref->{$key}->{$contentkey};
		1365	}
		1366
		1367	return $hashref;
		1368	}
		1369
		1370
		1371	##############################################################################
		1372	# Method: value_to_xml()
		1373	#
		1374	# Helper routine for XMLout() - recurses through a data structure building up
		1375	# and returning an XML representation of that structure as a string.
		1376	#
		1377	# Arguments expected are:
		1378	# - the data structure to be encoded (usually a reference)
		1379	# - the XML tag name to use for this item
		1380	# - a string of spaces for use as the current indent level
		1381	#
		1382
		1383	sub value_to_xml {
		1384	my $self = shift;;
		1385
		1386
		1387	# Grab the other arguments
		1388
		1389	my($ref, $name, $indent) = @_;
		1390
		1391	my $named = (defined($name) and $name ne '' ? 1 : 0);
		1392
		1393	my $nl = "\n";
		1394
		1395	my $is_root = $indent eq '' ? 1 : 0; # Warning, dirty hack!
		1396	if($self->{opt}->{noindent}) {
		1397	$indent = '';
		1398	$nl = '';
		1399	}
		1400
		1401
		1402	# Convert to XML
		1403
		1404	if(ref($ref)) {
		1405	croak "circular data structures not supported"
		1406	if(grep($_ == $ref, @{$self->{_ancestors}}));
		1407	push @{$self->{_ancestors}}, $ref;
		1408	}
		1409	else {
		1410	if($named) {
		1411	return(join('',
		1412	$indent, '<', $name, '>',
		1413	($self->{opt}->{noescape} ? $ref : $self->escape_value($ref)),
		1414	'</', $name, ">", $nl
		1415	));
		1416	}
		1417	else {
		1418	return("$ref$nl");
		1419	}
		1420	}
		1421
		1422
		1423	# Unfold hash to array if possible
		1424
		1425	if(UNIVERSAL::isa($ref, 'HASH') # It is a hash
		1426	and keys %$ref # and it's not empty
		1427	and $self->{opt}->{keyattr} # and folding is enabled
		1428	and !$is_root # and its not the root element
		1429	) {
		1430	$ref = $self->hash_to_array($name, $ref);
		1431	}
		1432
		1433
		1434	my @result = ();
		1435	my($key, $value);
		1436
		1437
		1438	# Handle hashrefs
		1439
		1440	if(UNIVERSAL::isa($ref, 'HASH')) {
		1441
		1442	# Reintermediate grouped values if applicable
		1443
		1444	if($self->{opt}->{grouptags}) {
		1445	$ref = $self->copy_hash($ref);
		1446	while(my($key, $val) = each %$ref) {
		1447	if($self->{opt}->{grouptags}->{$key}) {
		1448	$ref->{$key} = { $self->{opt}->{grouptags}->{$key} => $val };
		1449	}
		1450	}
		1451	}
		1452
		1453
		1454	# Scan for namespace declaration attributes
		1455
		1456	my $nsdecls = '';
		1457	my $default_ns_uri;
		1458	if($self->{nsup}) {
		1459	$ref = $self->copy_hash($ref);
		1460	$self->{nsup}->push_context();
		1461
		1462	# Look for default namespace declaration first
		1463
		1464	if(exists($ref->{xmlns})) {
		1465	$self->{nsup}->declare_prefix('', $ref->{xmlns});
		1466	$nsdecls .= qq( xmlns="$ref->{xmlns}");
		1467	delete($ref->{xmlns});
		1468	}
		1469	$default_ns_uri = $self->{nsup}->get_uri('');
		1470
		1471
		1472	# Then check all the other keys
		1473
		1474	foreach my $qname (keys(%$ref)) {
		1475	my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname);
		1476	if($uri) {
		1477	if($uri eq $xmlns_ns) {
		1478	$self->{nsup}->declare_prefix($lname, $ref->{$qname});
		1479	$nsdecls .= qq( xmlns:$lname="$ref->{$qname}");
		1480	delete($ref->{$qname});
		1481	}
		1482	}
		1483	}
		1484
		1485	# Translate any remaining Clarkian names
		1486
		1487	foreach my $qname (keys(%$ref)) {
		1488	my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname);
		1489	if($uri) {
		1490	if($default_ns_uri and $uri eq $default_ns_uri) {
		1491	$ref->{$lname} = $ref->{$qname};
		1492	delete($ref->{$qname});
		1493	}
		1494	else {
		1495	my $prefix = $self->{nsup}->get_prefix($uri);
		1496	unless($prefix) {
		1497	# $self->{nsup}->declare_prefix(undef, $uri);
		1498	# $prefix = $self->{nsup}->get_prefix($uri);
		1499	$prefix = $self->{ns_prefix}++;
		1500	$self->{nsup}->declare_prefix($prefix, $uri);
		1501	$nsdecls .= qq( xmlns:$prefix="$uri");
		1502	}
		1503	$ref->{"$prefix:$lname"} = $ref->{$qname};
		1504	delete($ref->{$qname});
		1505	}
		1506	}
		1507	}
		1508	}
		1509
		1510
		1511	my @nested = ();
		1512	my $text_content = undef;
		1513	if($named) {
		1514	push @result, $indent, '<', $name, $nsdecls;
		1515	}
		1516
		1517	if(keys %$ref) {
		1518	my $first_arg = 1;
		1519	foreach my $key ($self->sorted_keys($name, $ref)) {
		1520	my $value = $ref->{$key};
		1521	next if(substr($key, 0, 1) eq '-');
		1522	if(!defined($value)) {
		1523	next if $self->{opt}->{suppressempty};
		1524	unless(exists($self->{opt}->{suppressempty})
		1525	and !defined($self->{opt}->{suppressempty})
		1526	) {
		1527	carp 'Use of uninitialized value' if($^W);
		1528	}
		1529	if($key eq $self->{opt}->{contentkey}) {
		1530	$text_content = '';
		1531	}
		1532	else {
		1533	$value = exists($self->{opt}->{suppressempty}) ? {} : '';
		1534	}
		1535	}
		1536
		1537	if(!ref($value)
		1538	and $self->{opt}->{valueattr}
		1539	and $self->{opt}->{valueattr}->{$key}
		1540	) {
		1541	$value = { $self->{opt}->{valueattr}->{$key} => $value };
		1542	}
		1543
		1544	if(ref($value) or $self->{opt}->{noattr}) {
		1545	push @nested,
		1546	$self->value_to_xml($value, $key, "$indent ");
		1547	}
		1548	else {
		1549	$value = $self->escape_value($value) unless($self->{opt}->{noescape});
		1550	if($key eq $self->{opt}->{contentkey}) {
		1551	$text_content = $value;
		1552	}
		1553	else {
		1554	push @result, "\n$indent " . ' ' x length($name)
		1555	if($self->{opt}->{attrindent} and !$first_arg);
		1556	push @result, ' ', $key, '="', $value , '"';
		1557	$first_arg = 0;
		1558	}
		1559	}
		1560	}
		1561	}
		1562	else {
		1563	$text_content = '';
		1564	}
		1565
		1566	if(@nested or defined($text_content)) {
		1567	if($named) {
		1568	push @result, ">";
		1569	if(defined($text_content)) {
		1570	push @result, $text_content;
		1571	$nested[0] =~ s/^\s+// if(@nested);
		1572	}
		1573	else {
		1574	push @result, $nl;
		1575	}
		1576	if(@nested) {
		1577	push @result, @nested, $indent;
		1578	}
		1579	push @result, '</', $name, ">", $nl;
		1580	}
		1581	else {
		1582	push @result, @nested; # Special case if no root elements
		1583	}
		1584	}
		1585	else {
		1586	push @result, " />", $nl;
		1587	}
		1588	$self->{nsup}->pop_context() if($self->{nsup});
		1589	}
		1590
		1591
		1592	# Handle arrayrefs
		1593
		1594	elsif(UNIVERSAL::isa($ref, 'ARRAY')) {
		1595	foreach $value (@$ref) {
		1596	next if !defined($value) and $self->{opt}->{suppressempty};
		1597	if(!ref($value)) {
		1598	push @result,
		1599	$indent, '<', $name, '>',
		1600	($self->{opt}->{noescape} ? $value : $self->escape_value($value)),
		1601	'</', $name, ">$nl";
		1602	}
		1603	elsif(UNIVERSAL::isa($value, 'HASH')) {
		1604	push @result, $self->value_to_xml($value, $name, $indent);
		1605	}
		1606	else {
		1607	push @result,
		1608	$indent, '<', $name, ">$nl",
		1609	$self->value_to_xml($value, 'anon', "$indent "),
		1610	$indent, '</', $name, ">$nl";
		1611	}
		1612	}
		1613	}
		1614
		1615	else {
		1616	croak "Can't encode a value of type: " . ref($ref);
		1617	}
		1618
		1619
		1620	pop @{$self->{_ancestors}} if(ref($ref));
		1621
		1622	return(join('', @result));
		1623	}
		1624
		1625
		1626	##############################################################################
		1627	# Method: sorted_keys()
		1628	#
		1629	# Returns the keys of the referenced hash sorted into alphabetical order, but
		1630	# with the 'key' key (as in KeyAttr) first, if there is one.
		1631	#
		1632
		1633	sub sorted_keys {
		1634	my($self, $name, $ref) = @_;
		1635
		1636	return keys %$ref if $self->{opt}->{nosort};
		1637
		1638	my %hash = %$ref;
		1639	my $keyattr = $self->{opt}->{keyattr};
		1640
		1641	my @key;
		1642
		1643	if(ref $keyattr eq 'HASH') {
		1644	if(exists $keyattr->{$name} and exists $hash{$keyattr->{$name}->[0]}) {
		1645	push @key, $keyattr->{$name}->[0];
		1646	delete $hash{$keyattr->{$name}->[0]};
		1647	}
		1648	}
		1649	elsif(ref $keyattr eq 'ARRAY') {
		1650	foreach (@{$keyattr}) {
		1651	if(exists $hash{$_}) {
		1652	push @key, $_;
		1653	delete $hash{$_};
		1654	last;
		1655	}
		1656	}
		1657	}
		1658
		1659	return(@key, sort keys %hash);
		1660	}
		1661
		1662	##############################################################################
		1663	# Method: escape_value()
		1664	#
		1665	# Helper routine for automatically escaping values for XMLout().
		1666	# Expects a scalar data value. Returns escaped version.
		1667	#
		1668
		1669	sub escape_value {
		1670	my($self, $data) = @_;
		1671
		1672	return '' unless(defined($data));
		1673
		1674	$data =~ s/&/&/sg;
		1675	$data =~ s/</</sg;
		1676	$data =~ s/>/>/sg;
		1677	$data =~ s/"/"/sg;
		1678
		1679	my $level = $self->{opt}->{numericescape} or return $data;
		1680
		1681	return $self->numeric_escape($data, $level);
		1682	}
		1683
		1684	sub numeric_escape {
		1685	my($self, $data, $level) = @_;
		1686
		1687	use utf8; # required for 5.6
		1688
		1689	if($self->{opt}->{numericescape} eq '2') {
		1690	$data =~ s/([^\x00-\x7F])/'&#' . ord($1) . ';'/gse;
		1691	}
		1692	else {
		1693	$data =~ s/([^\x00-\xFF])/'&#' . ord($1) . ';'/gse;
		1694	}
		1695
		1696	return $data;
		1697	}
		1698
		1699
		1700	##############################################################################
		1701	# Method: hash_to_array()
		1702	#
		1703	# Helper routine for value_to_xml().
		1704	# Attempts to 'unfold' a hash of hashes into an array of hashes. Returns a
		1705	# reference to the array on success or the original hash if unfolding is
		1706	# not possible.
		1707	#
		1708
		1709	sub hash_to_array {
		1710	my $self = shift;
		1711	my $parent = shift;
		1712	my $hashref = shift;
		1713
		1714	my $arrayref = [];
		1715
		1716	my($key, $value);
		1717
		1718	my @keys = $self->{opt}->{nosort} ? keys %$hashref : sort keys %$hashref;
		1719	foreach $key (@keys) {
		1720	$value = $hashref->{$key};
		1721	return($hashref) unless(UNIVERSAL::isa($value, 'HASH'));
		1722
		1723	if(ref($self->{opt}->{keyattr}) eq 'HASH') {
		1724	return($hashref) unless(defined($self->{opt}->{keyattr}->{$parent}));
		1725	push @$arrayref, $self->copy_hash(
		1726	$value, $self->{opt}->{keyattr}->{$parent}->[0] => $key
		1727	);
		1728	}
		1729	else {
		1730	push(@$arrayref, { $self->{opt}->{keyattr}->[0] => $key, %$value });
		1731	}
		1732	}
		1733
		1734	return($arrayref);
		1735	}
		1736
		1737
		1738	##############################################################################
		1739	# Method: copy_hash()
		1740	#
		1741	# Helper routine for hash_to_array(). When unfolding a hash of hashes into
		1742	# an array of hashes, we need to copy the key from the outer hash into the
		1743	# inner hash. This routine makes a copy of the original hash so we don't
		1744	# destroy the original data structure. You might wish to override this
		1745	# method if you're using tied hashes and don't want them to get untied.
		1746	#
		1747
		1748	sub copy_hash {
		1749	my($self, $orig, @extra) = @_;
		1750
		1751	return { @extra, %$orig };
		1752	}
		1753
		1754	##############################################################################
		1755	# Methods required for building trees from SAX events
		1756	##############################################################################
		1757
		1758	sub start_document {
		1759	my $self = shift;
		1760
		1761	$self->handle_options('in') unless($self->{opt});
		1762
		1763	$self->{lists} = [];
		1764	$self->{curlist} = $self->{tree} = [];
		1765	}
		1766
		1767
		1768	sub start_element {
		1769	my $self = shift;
		1770	my $element = shift;
		1771
		1772	my $name = $element->{Name};
		1773	if($self->{opt}->{nsexpand}) {
		1774	$name = $element->{LocalName} \|\| '';
		1775	if($element->{NamespaceURI}) {
		1776	$name = '{' . $element->{NamespaceURI} . '}' . $name;
		1777	}
		1778	}
		1779	my $attributes = {};
		1780	if($element->{Attributes}) { # Might be undef
		1781	foreach my $attr (values %{$element->{Attributes}}) {
		1782	if($self->{opt}->{nsexpand}) {
		1783	my $name = $attr->{LocalName} \|\| '';
		1784	if($attr->{NamespaceURI}) {
		1785	$name = '{' . $attr->{NamespaceURI} . '}' . $name
		1786	}
		1787	$name = 'xmlns' if($name eq $bad_def_ns_jcn);
		1788	$attributes->{$name} = $attr->{Value};
		1789	}
		1790	else {
		1791	$attributes->{$attr->{Name}} = $attr->{Value};
		1792	}
		1793	}
		1794	}
		1795	my $newlist = [ $attributes ];
		1796	push @{ $self->{lists} }, $self->{curlist};
		1797	push @{ $self->{curlist} }, $name => $newlist;
		1798	$self->{curlist} = $newlist;
		1799	}
		1800
		1801
		1802	sub characters {
		1803	my $self = shift;
		1804	my $chars = shift;
		1805
		1806	my $text = $chars->{Data};
		1807	my $clist = $self->{curlist};
		1808	my $pos = $#$clist;
		1809
		1810	if ($pos > 0 and $clist->[$pos - 1] eq '0') {
		1811	$clist->[$pos] .= $text;
		1812	}
		1813	else {
		1814	push @$clist, 0 => $text;
		1815	}
		1816	}
		1817
		1818
		1819	sub end_element {
		1820	my $self = shift;
		1821
		1822	$self->{curlist} = pop @{ $self->{lists} };
		1823	}
		1824
		1825
		1826	sub end_document {
		1827	my $self = shift;
		1828
		1829	delete($self->{curlist});
		1830	delete($self->{lists});
		1831
		1832	my $tree = $self->{tree};
		1833	delete($self->{tree});
		1834
		1835
		1836	# Return tree as-is to XMLin()
		1837
		1838	return($tree) if($self->{nocollapse});
		1839
		1840
		1841	# Or collapse it before returning it to SAX parser class
		1842
		1843	if($self->{opt}->{keeproot}) {
		1844	$tree = $self->collapse({}, @$tree);
		1845	}
		1846	else {
		1847	$tree = $self->collapse(@{$tree->[1]});
		1848	}
		1849
		1850	if($self->{opt}->{datahandler}) {
		1851	return($self->{opt}->{datahandler}->($self, $tree));
		1852	}
		1853
		1854	return($tree);
		1855	}
		1856
		1857	*xml_in = \&XMLin;
		1858	*xml_out = \&XMLout;
		1859
		1860	1;
		1861
		1862	__END__
		1863
		1864	=head1 QUICK START
		1865
		1866	Say you have a script called B<foo> and a file of configuration options
		1867	called B<foo.xml> containing this:
		1868
		1869	<config logdir="/var/log/foo/" debugfile="/tmp/foo.debug">
		1870	<server name="sahara" osname="solaris" osversion="2.6">
		1871	<address>10.0.0.101</address>
		1872	<address>10.0.1.101</address>
		1873	</server>
		1874	<server name="gobi" osname="irix" osversion="6.5">
		1875	<address>10.0.0.102</address>
		1876	</server>
		1877	<server name="kalahari" osname="linux" osversion="2.0.34">
		1878	<address>10.0.0.103</address>
		1879	<address>10.0.1.103</address>
		1880	</server>
		1881	</config>
		1882
		1883	The following lines of code in B<foo>:
		1884
		1885	use XML::Simple;
		1886
		1887	my $config = XMLin();
		1888
		1889	will 'slurp' the configuration options into the hashref $config (because no
		1890	arguments are passed to C<XMLin()> the name and location of the XML file will
		1891	be inferred from name and location of the script). You can dump out the
		1892	contents of the hashref using Data::Dumper:
		1893
		1894	use Data::Dumper;
		1895
		1896	print Dumper($config);
		1897
		1898	which will produce something like this (formatting has been adjusted for
		1899	brevity):
		1900
		1901	{
		1902	'logdir' => '/var/log/foo/',
		1903	'debugfile' => '/tmp/foo.debug',
		1904	'server' => {
		1905	'sahara' => {
		1906	'osversion' => '2.6',
		1907	'osname' => 'solaris',
		1908	'address' => [ '10.0.0.101', '10.0.1.101' ]
		1909	},
		1910	'gobi' => {
		1911	'osversion' => '6.5',
		1912	'osname' => 'irix',
		1913	'address' => '10.0.0.102'
		1914	},
		1915	'kalahari' => {
		1916	'osversion' => '2.0.34',
		1917	'osname' => 'linux',
		1918	'address' => [ '10.0.0.103', '10.0.1.103' ]
		1919	}
		1920	}
		1921	}
		1922
		1923	Your script could then access the name of the log directory like this:
		1924
		1925	print $config->{logdir};
		1926
		1927	similarly, the second address on the server 'kalahari' could be referenced as:
		1928
		1929	print $config->{server}->{kalahari}->{address}->[1];
		1930
		1931	What could be simpler? (Rhetorical).
		1932
		1933	For simple requirements, that's really all there is to it. If you want to
		1934	store your XML in a different directory or file, or pass it in as a string or
		1935	even pass it in via some derivative of an IO::Handle, you'll need to check out
		1936	L<"OPTIONS">. If you want to turn off or tweak the array folding feature (that
		1937	neat little transformation that produced $config->{server}) you'll find options
		1938	for that as well.
		1939
		1940	If you want to generate XML (for example to write a modified version of
		1941	$config back out as XML), check out C<XMLout()>.
		1942
		1943	If your needs are not so simple, this may not be the module for you. In that
		1944	case, you might want to read L<"WHERE TO FROM HERE?">.
		1945
		1946	=head1 DESCRIPTION
		1947
		1948	The XML::Simple module provides a simple API layer on top of an underlying XML
		1949	parsing module (either XML::Parser or one of the SAX2 parser modules). Two
		1950	functions are exported: C<XMLin()> and C<XMLout()>. Note: you can explicity
		1951	request the lower case versions of the function names: C<xml_in()> and
		1952	C<xml_out()>.
		1953
		1954	The simplest approach is to call these two functions directly, but an
		1955	optional object oriented interface (see L<"OPTIONAL OO INTERFACE"> below)
		1956	allows them to be called as methods of an B<XML::Simple> object. The object
		1957	interface can also be used at either end of a SAX pipeline.
		1958
		1959	=head2 XMLin()
		1960
		1961	Parses XML formatted data and returns a reference to a data structure which
		1962	contains the same information in a more readily accessible form. (Skip
		1963	down to L<"EXAMPLES"> below, for more sample code).
		1964
		1965	C<XMLin()> accepts an optional XML specifier followed by zero or more 'name =>
		1966	value' option pairs. The XML specifier can be one of the following:
		1967
		1968	=over 4
		1969
		1970	=item A filename
		1971
		1972	If the filename contains no directory components C<XMLin()> will look for the
		1973	file in each directory in the SearchPath (see L<"OPTIONS"> below) or in the
		1974	current directory if the SearchPath option is not defined. eg:
		1975
		1976	$ref = XMLin('/etc/params.xml');
		1977
		1978	Note, the filename '-' can be used to parse from STDIN.
		1979
		1980	=item undef
		1981
		1982	If there is no XML specifier, C<XMLin()> will check the script directory and
		1983	each of the SearchPath directories for a file with the same name as the script
		1984	but with the extension '.xml'. Note: if you wish to specify options, you
		1985	must specify the value 'undef'. eg:
		1986
		1987	$ref = XMLin(undef, ForceArray => 1);
		1988
		1989	=item A string of XML
		1990
		1991	A string containing XML (recognised by the presence of '<' and '>' characters)
		1992	will be parsed directly. eg:
		1993
		1994	$ref = XMLin('<opt username="bob" password="flurp" />');
		1995
		1996	=item An IO::Handle object
		1997
		1998	An IO::Handle object will be read to EOF and its contents parsed. eg:
		1999
		2000	$fh = IO::File->new('/etc/params.xml');
		2001	$ref = XMLin($fh);
		2002
		2003	=back
		2004
		2005	=head2 XMLout()
		2006
		2007	Takes a data structure (generally a hashref) and returns an XML encoding of
		2008	that structure. If the resulting XML is parsed using C<XMLin()>, it should
		2009	return a data structure equivalent to the original (see caveats below).
		2010
		2011	The C<XMLout()> function can also be used to output the XML as SAX events
		2012	see the C<Handler> option and L<"SAX SUPPORT"> for more details).
		2013
		2014	When translating hashes to XML, hash keys which have a leading '-' will be
		2015	silently skipped. This is the approved method for marking elements of a
		2016	data structure which should be ignored by C<XMLout>. (Note: If these items
		2017	were not skipped the key names would be emitted as element or attribute names
		2018	with a leading '-' which would not be valid XML).
		2019
		2020	=head2 Caveats
		2021
		2022	Some care is required in creating data structures which will be passed to
		2023	C<XMLout()>. Hash keys from the data structure will be encoded as either XML
		2024	element names or attribute names. Therefore, you should use hash key names
		2025	which conform to the relatively strict XML naming rules:
		2026
		2027	Names in XML must begin with a letter. The remaining characters may be
		2028	letters, digits, hyphens (-), underscores (_) or full stops (.). It is also
		2029	allowable to include one colon (:) in an element name but this should only be
		2030	used when working with namespaces (B<XML::Simple> can only usefully work with
		2031	namespaces when teamed with a SAX Parser).
		2032
		2033	You can use other punctuation characters in hash values (just not in hash
		2034	keys) however B<XML::Simple> does not support dumping binary data.
		2035
		2036	If you break these rules, the current implementation of C<XMLout()> will
		2037	simply emit non-compliant XML which will be rejected if you try to read it
		2038	back in. (A later version of B<XML::Simple> might take a more proactive
		2039	approach).
		2040
		2041	Note also that although you can nest hashes and arrays to arbitrary levels,
		2042	circular data structures are not supported and will cause C<XMLout()> to die.
		2043
		2044	If you wish to 'round-trip' arbitrary data structures from Perl to XML and back
		2045	to Perl, then you should probably disable array folding (using the KeyAttr
		2046	option) both with C<XMLout()> and with C<XMLin()>. If you still don't get the
		2047	expected results, you may prefer to use L<XML::Dumper> which is designed for
		2048	exactly that purpose.
		2049
		2050	Refer to L<"WHERE TO FROM HERE?"> if C<XMLout()> is too simple for your needs.
		2051
		2052
		2053	=head1 OPTIONS
		2054
		2055	B<XML::Simple> supports a number of options (in fact as each release of
		2056	B<XML::Simple> adds more options, the module's claim to the name 'Simple'
		2057	becomes increasingly tenuous). If you find yourself repeatedly having to
		2058	specify the same options, you might like to investigate L<"OPTIONAL OO
		2059	INTERFACE"> below.
		2060
		2061	If you can't be bothered reading the documentation, refer to
		2062	L<"STRICT MODE"> to automatically catch common mistakes.
		2063
		2064	Because there are so many options, it's hard for new users to know which ones
		2065	are important, so here are the two you really need to know about:
		2066
		2067	=over 4
		2068
		2069	=item *
		2070
		2071	check out C<ForceArray> because you'll almost certainly want to turn it on
		2072
		2073	=item *
		2074
		2075	make sure you know what the C<KeyAttr> option does and what its default value is
		2076	because it may surprise you otherwise (note in particular that 'KeyAttr'
		2077	affects both C<XMLin> and C<XMLout>)
		2078
		2079	=back
		2080
		2081	The option name headings below have a trailing 'comment' - a hash followed by
		2082	two pieces of metadata:
		2083
		2084	=over 4
		2085
		2086	=item *
		2087
		2088	Options are marked with 'I<in>' if they are recognised by C<XMLin()> and
		2089	'I<out>' if they are recognised by C<XMLout()>.
		2090
		2091	=item *
		2092
		2093	Each option is also flagged to indicate whether it is:
		2094
		2095	'important' - don't use the module until you understand this one
		2096	'handy' - you can skip this on the first time through
		2097	'advanced' - you can skip this on the second time through
		2098	'SAX only' - don't worry about this unless you're using SAX (or
		2099	alternatively if you need this, you also need SAX)
		2100	'seldom used' - you'll probably never use this unless you were the
		2101	person that requested the feature
		2102
		2103	=back
		2104
		2105	The options are listed alphabetically:
		2106
		2107	Note: option names are no longer case sensitive so you can use the mixed case
		2108	versions shown here; all lower case as required by versions 2.03 and earlier;
		2109	or you can add underscores between the words (eg: key_attr).
		2110
		2111
		2112	=head2 AttrIndent => 1 I<# out - handy>
		2113
		2114	When you are using C<XMLout()>, enable this option to have attributes printed
		2115	one-per-line with sensible indentation rather than all on one line.
		2116
		2117	=head2 Cache => [ cache schemes ] I<# in - advanced>
		2118
		2119	Because loading the B<XML::Parser> module and parsing an XML file can consume a
		2120	significant number of CPU cycles, it is often desirable to cache the output of
		2121	C<XMLin()> for later reuse.
		2122
		2123	When parsing from a named file, B<XML::Simple> supports a number of caching
		2124	schemes. The 'Cache' option may be used to specify one or more schemes (using
		2125	an anonymous array). Each scheme will be tried in turn in the hope of finding
		2126	a cached pre-parsed representation of the XML file. If no cached copy is
		2127	found, the file will be parsed and the first cache scheme in the list will be
		2128	used to save a copy of the results. The following cache schemes have been
		2129	implemented:
		2130
		2131	=over 4
		2132
		2133	=item storable
		2134
		2135	Utilises B<Storable.pm> to read/write a cache file with the same name as the
		2136	XML file but with the extension .stor
		2137
		2138	=item memshare
		2139
		2140	When a file is first parsed, a copy of the resulting data structure is retained
		2141	in memory in the B<XML::Simple> module's namespace. Subsequent calls to parse
		2142	the same file will return a reference to this structure. This cached version
		2143	will persist only for the life of the Perl interpreter (which in the case of
		2144	mod_perl for example, may be some significant time).
		2145
		2146	Because each caller receives a reference to the same data structure, a change
		2147	made by one caller will be visible to all. For this reason, the reference
		2148	returned should be treated as read-only.
		2149
		2150	=item memcopy
		2151
		2152	This scheme works identically to 'memshare' (above) except that each caller
		2153	receives a reference to a new data structure which is a copy of the cached
		2154	version. Copying the data structure will add a little processing overhead,
		2155	therefore this scheme should only be used where the caller intends to modify
		2156	the data structure (or wishes to protect itself from others who might). This
		2157	scheme uses B<Storable.pm> to perform the copy.
		2158
		2159	=back
		2160
		2161	Warning! The memory-based caching schemes compare the timestamp on the file to
		2162	the time when it was last parsed. If the file is stored on an NFS filesystem
		2163	(or other network share) and the clock on the file server is not exactly
		2164	synchronised with the clock where your script is run, updates to the source XML
		2165	file may appear to be ignored.
		2166
		2167	=head2 ContentKey => 'keyname' I<# in+out - seldom used>
		2168
		2169	When text content is parsed to a hash value, this option let's you specify a
		2170	name for the hash key to override the default 'content'. So for example:
		2171
		2172	XMLin('<opt one="1">Text</opt>', ContentKey => 'text')
		2173
		2174	will parse to:
		2175
		2176	{ 'one' => 1, 'text' => 'Text' }
		2177
		2178	instead of:
		2179
		2180	{ 'one' => 1, 'content' => 'Text' }
		2181
		2182	C<XMLout()> will also honour the value of this option when converting a hashref
		2183	to XML.
		2184
		2185	You can also prefix your selected key name with a '-' character to have
		2186	C<XMLin()> try a little harder to eliminate unnecessary 'content' keys after
		2187	array folding. For example:
		2188
		2189	XMLin(
		2190	'<opt><item name="one">First</item><item name="two">Second</item></opt>',
		2191	KeyAttr => {item => 'name'},
		2192	ForceArray => [ 'item' ],
		2193	ContentKey => '-content'
		2194	)
		2195
		2196	will parse to:
		2197
		2198	{
		2199	'item' => {
		2200	'one' => 'First'
		2201	'two' => 'Second'
		2202	}
		2203	}
		2204
		2205	rather than this (without the '-'):
		2206
		2207	{
		2208	'item' => {
		2209	'one' => { 'content' => 'First' }
		2210	'two' => { 'content' => 'Second' }
		2211	}
		2212	}
		2213
		2214	=head2 DataHandler => code_ref I<# in - SAX only>
		2215
		2216	When you use an B<XML::Simple> object as a SAX handler, it will return a
		2217	'simple tree' data structure in the same format as C<XMLin()> would return. If
		2218	this option is set (to a subroutine reference), then when the tree is built the
		2219	subroutine will be called and passed two arguments: a reference to the
		2220	B<XML::Simple> object and a reference to the data tree. The return value from
		2221	the subroutine will be returned to the SAX driver. (See L<"SAX SUPPORT"> for
		2222	more details).
		2223
		2224	=head2 ForceArray => 1 I<# in - important>
		2225
		2226	This option should be set to '1' to force nested elements to be represented
		2227	as arrays even when there is only one. Eg, with ForceArray enabled, this
		2228	XML:
		2229
		2230	<opt>
		2231	<name>value</name>
		2232	</opt>
		2233
		2234	would parse to this:
		2235
		2236	{
		2237	'name' => [
		2238	'value'
		2239	]
		2240	}
		2241
		2242	instead of this (the default):
		2243
		2244	{
		2245	'name' => 'value'
		2246	}
		2247
		2248	This option is especially useful if the data structure is likely to be written
		2249	back out as XML and the default behaviour of rolling single nested elements up
		2250	into attributes is not desirable.
		2251
		2252	If you are using the array folding feature, you should almost certainly enable
		2253	this option. If you do not, single nested elements will not be parsed to
		2254	arrays and therefore will not be candidates for folding to a hash. (Given that
		2255	the default value of 'KeyAttr' enables array folding, the default value of this
		2256	option should probably also have been enabled too - sorry).
		2257
		2258	=head2 ForceArray => [ names ] I<# in - important>
		2259
		2260	This alternative (and preferred) form of the 'ForceArray' option allows you to
		2261	specify a list of element names which should always be forced into an array
		2262	representation, rather than the 'all or nothing' approach above.
		2263
		2264	It is also possible (since version 2.05) to include compiled regular
		2265	expressions in the list - any element names which match the pattern will be
		2266	forced to arrays. If the list contains only a single regex, then it is not
		2267	necessary to enclose it in an arrayref. Eg:
		2268
		2269	ForceArray => qr/_list$/
		2270
		2271	=head2 ForceContent => 1 I<# in - seldom used>
		2272
		2273	When C<XMLin()> parses elements which have text content as well as attributes,
		2274	the text content must be represented as a hash value rather than a simple
		2275	scalar. This option allows you to force text content to always parse to
		2276	a hash value even when there are no attributes. So for example:
		2277
		2278	XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1)
		2279
		2280	will parse to:
		2281
		2282	{
		2283	'x' => { 'content' => 'text1' },
		2284	'y' => { 'a' => 2, 'content' => 'text2' }
		2285	}
		2286
		2287	instead of:
		2288
		2289	{
		2290	'x' => 'text1',
		2291	'y' => { 'a' => 2, 'content' => 'text2' }
		2292	}
		2293
		2294	=head2 GroupTags => { grouping tag => grouped tag } I<# in+out - handy>
		2295
		2296	You can use this option to eliminate extra levels of indirection in your Perl
		2297	data structure. For example this XML:
		2298
		2299	<opt>
		2300	<searchpath>
		2301	<dir>/usr/bin</dir>
		2302	<dir>/usr/local/bin</dir>
		2303	<dir>/usr/X11/bin</dir>
		2304	</searchpath>
		2305	</opt>
		2306
		2307	Would normally be read into a structure like this:
		2308
		2309	{
		2310	searchpath => {
		2311	dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ]
		2312	}
		2313	}
		2314
		2315	But when read in with the appropriate value for 'GroupTags':
		2316
		2317	my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' });
		2318
		2319	It will return this simpler structure:
		2320
		2321	{
		2322	searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ]
		2323	}
		2324
		2325	The grouping element (C<< <searchpath> >> in the example) must not contain any
		2326	attributes or elements other than the grouped element.
		2327
		2328	You can specify multiple 'grouping element' to 'grouped element' mappings in
		2329	the same hashref. If this option is combined with C<KeyAttr>, the array
		2330	folding will occur first and then the grouped element names will be eliminated.
		2331
		2332	C<XMLout> will also use the grouptag mappings to re-introduce the tags around
		2333	the grouped elements. Beware though that this will occur in all places that
		2334	the 'grouping tag' name occurs - you probably don't want to use the same name
		2335	for elements as well as attributes.
		2336
		2337	=head2 Handler => object_ref I<# out - SAX only>
		2338
		2339	Use the 'Handler' option to have C<XMLout()> generate SAX events rather than
		2340	returning a string of XML. For more details see L<"SAX SUPPORT"> below.
		2341
		2342	Note: the current implementation of this option generates a string of XML
		2343	and uses a SAX parser to translate it into SAX events. The normal encoding
		2344	rules apply here - your data must be UTF8 encoded unless you specify an
		2345	alternative encoding via the 'XMLDecl' option; and by the time the data reaches
		2346	the handler object, it will be in UTF8 form regardless of the encoding you
		2347	supply. A future implementation of this option may generate the events
		2348	directly.
		2349
		2350	=head2 KeepRoot => 1 I<# in+out - handy>
		2351
		2352	In its attempt to return a data structure free of superfluous detail and
		2353	unnecessary levels of indirection, C<XMLin()> normally discards the root
		2354	element name. Setting the 'KeepRoot' option to '1' will cause the root element
		2355	name to be retained. So after executing this code:
		2356
		2357	$config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1)
		2358
		2359	You'll be able to reference the tempdir as
		2360	C<$config-E<gt>{config}-E<gt>{tempdir}> instead of the default
		2361	C<$config-E<gt>{tempdir}>.
		2362
		2363	Similarly, setting the 'KeepRoot' option to '1' will tell C<XMLout()> that the
		2364	data structure already contains a root element name and it is not necessary to
		2365	add another.
		2366
		2367	=head2 KeyAttr => [ list ] I<# in+out - important>
		2368
		2369	This option controls the 'array folding' feature which translates nested
		2370	elements from an array to a hash. It also controls the 'unfolding' of hashes
		2371	to arrays.
		2372
		2373	For example, this XML:
		2374
		2375	<opt>
		2376	<user login="grep" fullname="Gary R Epstein" />
		2377	<user login="stty" fullname="Simon T Tyson" />
		2378	</opt>
		2379
		2380	would, by default, parse to this:
		2381
		2382	{
		2383	'user' => [
		2384	{
		2385	'login' => 'grep',
		2386	'fullname' => 'Gary R Epstein'
		2387	},
		2388	{
		2389	'login' => 'stty',
		2390	'fullname' => 'Simon T Tyson'
		2391	}
		2392	]
		2393	}
		2394
		2395	If the option 'KeyAttr => "login"' were used to specify that the 'login'
		2396	attribute is a key, the same XML would parse to:
		2397
		2398	{
		2399	'user' => {
		2400	'stty' => {
		2401	'fullname' => 'Simon T Tyson'
		2402	},
		2403	'grep' => {
		2404	'fullname' => 'Gary R Epstein'
		2405	}
		2406	}
		2407	}
		2408
		2409	The key attribute names should be supplied in an arrayref if there is more
		2410	than one. C<XMLin()> will attempt to match attribute names in the order
		2411	supplied. C<XMLout()> will use the first attribute name supplied when
		2412	'unfolding' a hash into an array.
		2413
		2414	Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id']. If you do
		2415	not want folding on input or unfolding on output you must setting this option
		2416	to an empty list to disable the feature.
		2417
		2418	Note 2: If you wish to use this option, you should also enable the
		2419	C<ForceArray> option. Without 'ForceArray', a single nested element will be
		2420	rolled up into a scalar rather than an array and therefore will not be folded
		2421	(since only arrays get folded).
		2422
		2423	=head2 KeyAttr => { list } I<# in+out - important>
		2424
		2425	This alternative (and preferred) method of specifiying the key attributes
		2426	allows more fine grained control over which elements are folded and on which
		2427	attributes. For example the option 'KeyAttr => { package => 'id' } will cause
		2428	any package elements to be folded on the 'id' attribute. No other elements
		2429	which have an 'id' attribute will be folded at all.
		2430
		2431	Note: C<XMLin()> will generate a warning (or a fatal error in L<"STRICT MODE">)
		2432	if this syntax is used and an element which does not have the specified key
		2433	attribute is encountered (eg: a 'package' element without an 'id' attribute, to
		2434	use the example above). Warnings will only be generated if B<-w> is in force.
		2435
		2436	Two further variations are made possible by prefixing a '+' or a '-' character
		2437	to the attribute name:
		2438
		2439	The option 'KeyAttr => { user => "+login" }' will cause this XML:
		2440
		2441	<opt>
		2442	<user login="grep" fullname="Gary R Epstein" />
		2443	<user login="stty" fullname="Simon T Tyson" />
		2444	</opt>
		2445
		2446	to parse to this data structure:
		2447
		2448	{
		2449	'user' => {
		2450	'stty' => {
		2451	'fullname' => 'Simon T Tyson',
		2452	'login' => 'stty'
		2453	},
		2454	'grep' => {
		2455	'fullname' => 'Gary R Epstein',
		2456	'login' => 'grep'
		2457	}
		2458	}
		2459	}
		2460
		2461	The '+' indicates that the value of the key attribute should be copied rather
		2462	than moved to the folded hash key.
		2463
		2464	A '-' prefix would produce this result:
		2465
		2466	{
		2467	'user' => {
		2468	'stty' => {
		2469	'fullname' => 'Simon T Tyson',
		2470	'-login' => 'stty'
		2471	},
		2472	'grep' => {
		2473	'fullname' => 'Gary R Epstein',
		2474	'-login' => 'grep'
		2475	}
		2476	}
		2477	}
		2478
		2479	As described earlier, C<XMLout> will ignore hash keys starting with a '-'.
		2480
		2481	=head2 NoAttr => 1 I<# in+out - handy>
		2482
		2483	When used with C<XMLout()>, the generated XML will contain no attributes.
		2484	All hash key/values will be represented as nested elements instead.
		2485
		2486	When used with C<XMLin()>, any attributes in the XML will be ignored.
		2487
		2488	=head2 NoEscape => 1 I<# out - seldom used>
		2489
		2490	By default, C<XMLout()> will translate the characters 'E<lt>', 'E<gt>', '&' and
		2491	'"' to '<', '>', '&' and '&quot' respectively. Use this option to
		2492	suppress escaping (presumably because you've already escaped the data in some
		2493	more sophisticated manner).
		2494
		2495	=head2 NoIndent => 1 I<# out - seldom used>
		2496
		2497	Set this option to 1 to disable C<XMLout()>'s default 'pretty printing' mode.
		2498	With this option enabled, the XML output will all be on one line (unless there
		2499	are newlines in the data) - this may be easier for downstream processing.
		2500
		2501	=head2 NoSort => 1 I<# out - seldom used>
		2502
		2503	Newer versions of XML::Simple sort elements and attributes alphabetically (*),
		2504	by default. Enable this option to suppress the sorting - possibly for
		2505	backwards compatibility.
		2506
		2507	* Actually, sorting is alphabetical but 'key' attribute or element names (as in
		2508	'KeyAttr') sort first. Also, when a hash of hashes is 'unfolded', the elements
		2509	are sorted alphabetically by the value of the key field.
		2510
		2511	=head2 NormaliseSpace => 0 \| 1 \| 2 I<# in - handy>
		2512
		2513	This option controls how whitespace in text content is handled. Recognised
		2514	values for the option are:
		2515
		2516	=over 4
		2517
		2518	=item *
		2519
		2520
		2521	normalisation of whitespace in attribute values which is mandated by the XML
		2522	recommendation)
		2523
		2524	=item *
		2525
		2526	1 = whitespace is normalised in any value used as a hash key (normalising means
		2527	removing leading and trailing whitespace and collapsing sequences of whitespace
		2528	characters to a single space)
		2529
		2530	=item *
		2531
		2532	2 = whitespace is normalised in all text content
		2533
		2534	=back
		2535
		2536	Note: you can spell this option with a 'z' if that is more natural for you.
		2537
		2538	=head2 NSExpand => 1 I<# in+out handy - SAX only>
		2539
		2540	This option controls namespace expansion - the translation of element and
		2541	attribute names of the form 'prefix:name' to '{uri}name'. For example the
		2542	element name 'xsl:template' might be expanded to:
		2543	'{http://www.w3.org/1999/XSL/Transform}template'.
		2544
		2545	By default, C<XMLin()> will return element names and attribute names exactly as
		2546	they appear in the XML. Setting this option to 1 will cause all element and
		2547	attribute names to be expanded to include their namespace prefix.
		2548
		2549	I<Note: You must be using a SAX parser for this option to work (ie: it does not
		2550	work with XML::Parser)>.
		2551
		2552	This option also controls whether C<XMLout()> performs the reverse translation
		2553	from '{uri}name' back to 'prefix:name'. The default is no translation. If
		2554	your data contains expanded names, you should set this option to 1 otherwise
		2555	C<XMLout> will emit XML which is not well formed.
		2556
		2557	I<Note: You must have the XML::NamespaceSupport module installed if you want
		2558	C<XMLout()> to translate URIs back to prefixes>.
		2559
		2560	=head2 NumericEscape => 0 \| 1 \| 2 I<# out - handy>
		2561
		2562	Use this option to have 'high' (non-ASCII) characters in your Perl data
		2563	structure converted to numeric entities (eg: €) in the XML output. Three
		2564	levels are possible:
		2565
		2566
		2567
		2568	1 - only characters above 0xFF are escaped (ie: characters in the 0x80-FF range are not escaped), possibly useful with ISO8859-1 output
		2569
		2570	2 - all characters above 0x7F are escaped (good for plain ASCII output)
		2571
		2572	=head2 OutputFile => <file specifier> I<# out - handy>
		2573
		2574	The default behaviour of C<XMLout()> is to return the XML as a string. If you
		2575	wish to write the XML to a file, simply supply the filename using the
		2576	'OutputFile' option.
		2577
		2578	This option also accepts an IO handle object - especially useful in Perl 5.8.0
		2579	and later for output using an encoding other than UTF-8, eg:
		2580
		2581	open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!";
		2582	XMLout($ref, OutputFile => $fh);
		2583
		2584	Note, XML::Simple does not require that the object you pass in to the
		2585	OutputFile option inherits from L<IO::Handle> - it simply assumes the object
		2586	supports a C<print> method.
		2587
		2588	=head2 ParserOpts => [ XML::Parser Options ] I<# in - don't use this>
		2589
		2590	I<Note: This option is now officially deprecated. If you find it useful, email
		2591	the author with an example of what you use it for. Do not use this option to
		2592	set the ProtocolEncoding, that's just plain wrong - fix the XML>.
		2593
		2594	This option allows you to pass parameters to the constructor of the underlying
		2595	XML::Parser object (which of course assumes you're not using SAX).
		2596
		2597	=head2 RootName => 'string' I<# out - handy>
		2598
		2599	By default, when C<XMLout()> generates XML, the root element will be named
		2600	'opt'. This option allows you to specify an alternative name.
		2601
		2602	Specifying either undef or the empty string for the RootName option will
		2603	produce XML with no root elements. In most cases the resulting XML fragment
		2604	will not be 'well formed' and therefore could not be read back in by C<XMLin()>.
		2605	Nevertheless, the option has been found to be useful in certain circumstances.
		2606
		2607	=head2 SearchPath => [ list ] I<# in - handy>
		2608
		2609	If you pass C<XMLin()> a filename, but the filename include no directory
		2610	component, you can use this option to specify which directories should be
		2611	searched to locate the file. You might use this option to search first in the
		2612	user's home directory, then in a global directory such as /etc.
		2613
		2614	If a filename is provided to C<XMLin()> but SearchPath is not defined, the
		2615	file is assumed to be in the current directory.
		2616
		2617	If the first parameter to C<XMLin()> is undefined, the default SearchPath
		2618	will contain only the directory in which the script itself is located.
		2619	Otherwise the default SearchPath will be empty.
		2620
		2621	=head2 SuppressEmpty => 1 \| '' \| undef I<# in+out - handy>
		2622
		2623	This option controls what C<XMLin()> should do with empty elements (no
		2624	attributes and no content). The default behaviour is to represent them as
		2625	empty hashes. Setting this option to a true value (eg: 1) will cause empty
		2626	elements to be skipped altogether. Setting the option to 'undef' or the empty
		2627	string will cause empty elements to be represented as the undefined value or
		2628	the empty string respectively. The latter two alternatives are a little
		2629	easier to test for in your code than a hash with no keys.
		2630
		2631	The option also controls what C<XMLout()> does with undefined values. Setting
		2632	the option to undef causes undefined values to be output as empty elements
		2633	(rather than empty attributes), it also suppresses the generation of warnings
		2634	about undefined values. Setting the option to a true value (eg: 1) causes
		2635	undefined values to be skipped altogether on output.
		2636
		2637	=head2 ValueAttr => [ names ] I<# in - handy>
		2638
		2639	Use this option to deal elements which always have a single attribute and no
		2640	content. Eg:
		2641
		2642	<opt>
		2643	<colour value="red" />
		2644	<size value="XXL" />
		2645	</opt>
		2646
		2647	Setting C<< ValueAttr => [ 'value' ] >> will cause the above XML to parse to:
		2648
		2649	{
		2650	colour => 'red',
		2651	size => 'XXL'
		2652	}
		2653
		2654	instead of this (the default):
		2655
		2656	{
		2657	colour => { value => 'red' },
		2658	size => { value => 'XXL' }
		2659	}
		2660
		2661	Note: This form of the ValueAttr option is not compatible with C<XMLout()> -
		2662	since the attribute name is discarded at parse time, the original XML cannot be
		2663	reconstructed.
		2664
		2665	=head2 ValueAttr => { element => attribute, ... } I<# in+out - handy>
		2666
		2667	This (preferred) form of the ValueAttr option requires you to specify both
		2668	the element and the attribute names. This is not only safer, it also allows
		2669	the original XML to be reconstructed by C<XMLout()>.
		2670
		2671	Note: You probably don't want to use this option and the NoAttr option at the
		2672	same time.
		2673
		2674	=head2 Variables => { name => value } I<# in - handy>
		2675
		2676	This option allows variables in the XML to be expanded when the file is read.
		2677	(there is no facility for putting the variable names back if you regenerate
		2678	XML using C<XMLout>).
		2679
		2680	A 'variable' is any text of the form C<${name}> which occurs in an attribute
		2681	value or in the text content of an element. If 'name' matches a key in the
		2682	supplied hashref, C<${name}> will be replaced with the corresponding value from
		2683	the hashref. If no matching key is found, the variable will not be replaced.
		2684	Names must match the regex: C<[\w.]+> (ie: only 'word' characters and dots are
		2685	allowed).
		2686
		2687	=head2 VarAttr => 'attr_name' I<# in - handy>
		2688
		2689	In addition to the variables defined using C<Variables>, this option allows
		2690	variables to be defined in the XML. A variable definition consists of an
		2691	element with an attribute called 'attr_name' (the value of the C<VarAttr>
		2692	option). The value of the attribute will be used as the variable name and the
		2693	text content of the element will be used as the value. A variable defined in
		2694	this way will override a variable defined using the C<Variables> option. For
		2695	example:
		2696
		2697	XMLin( '<opt>
		2698	<dir name="prefix">/usr/local/apache</dir>
		2699	<dir name="exec_prefix">${prefix}</dir>
		2700	<dir name="bindir">${exec_prefix}/bin</dir>
		2701	</opt>',
		2702	VarAttr => 'name', ContentKey => '-content'
		2703	);
		2704
		2705	produces the following data structure:
		2706
		2707	{
		2708	dir => {
		2709	prefix => '/usr/local/apache',
		2710	exec_prefix => '/usr/local/apache',
		2711	bindir => '/usr/local/apache/bin',
		2712	}
		2713	}
		2714
		2715	=head2 XMLDecl => 1 or XMLDecl => 'string' I<# out - handy>
		2716
		2717	If you want the output from C<XMLout()> to start with the optional XML
		2718	declaration, simply set the option to '1'. The default XML declaration is:
		2719
		2720	<?xml version='1.0' standalone='yes'?>
		2721
		2722	If you want some other string (for example to declare an encoding value), set
		2723	the value of this option to the complete string you require.
		2724
		2725
		2726	=head1 OPTIONAL OO INTERFACE
		2727
		2728	The procedural interface is both simple and convenient however there are a
		2729	couple of reasons why you might prefer to use the object oriented (OO)
		2730	interface:
		2731
		2732	=over 4
		2733
		2734	=item *
		2735
		2736	to define a set of default values which should be used on all subsequent calls
		2737	to C<XMLin()> or C<XMLout()>
		2738
		2739	=item *
		2740
		2741	to override methods in B<XML::Simple> to provide customised behaviour
		2742
		2743	=back
		2744
		2745	The default values for the options described above are unlikely to suit
		2746	everyone. The OO interface allows you to effectively override B<XML::Simple>'s
		2747	defaults with your preferred values. It works like this:
		2748
		2749	First create an XML::Simple parser object with your preferred defaults:
		2750
		2751	my $xs = XML::Simple->new(ForceArray => 1, KeepRoot => 1);
		2752
		2753	then call C<XMLin()> or C<XMLout()> as a method of that object:
		2754
		2755	my $ref = $xs->XMLin($xml);
		2756	my $xml = $xs->XMLout($ref);
		2757
		2758	You can also specify options when you make the method calls and these values
		2759	will be merged with the values specified when the object was created. Values
		2760	specified in a method call take precedence.
		2761
		2762	Note: when called as methods, the C<XMLin()> and C<XMLout()> routines may be
		2763	called as C<xml_in()> or C<xml_out()>. The method names are aliased so the
		2764	only difference is the aesthetics.
		2765
		2766	=head2 Parsing Methods
		2767
		2768	You can explicitly call one of the following methods rather than rely on the
		2769	C<xml_in()> method automatically determining whether the target to be parsed is
		2770	a string, a file or a filehandle:
		2771
		2772	=over 4
		2773
		2774	=item parse_string(text)
		2775
		2776	Works exactly like the C<xml_in()> method but assumes the first argument is
		2777	a string of XML (or a reference to a scalar containing a string of XML).
		2778
		2779	=item parse_file(filename)
		2780
		2781	Works exactly like the C<xml_in()> method but assumes the first argument is
		2782	the name of a file containing XML.
		2783
		2784	=item parse_fh(file_handle)
		2785
		2786	Works exactly like the C<xml_in()> method but assumes the first argument is
		2787	a filehandle which can be read to get XML.
		2788
		2789	=back
		2790
		2791	=head2 Hook Methods
		2792
		2793	You can make your own class which inherits from XML::Simple and overrides
		2794	certain behaviours. The following methods may provide useful 'hooks' upon
		2795	which to hang your modified behaviour. You may find other undocumented methods
		2796	by examining the source, but those may be subject to change in future releases.
		2797
		2798	=over 4
		2799
		2800	=item handle_options(direction, name => value ...)
		2801
		2802	This method will be called when one of the parsing methods or the C<XMLout()>
		2803	method is called. The initial argument will be a string (either 'in' or 'out')
		2804	and the remaining arguments will be name value pairs.
		2805
		2806	=item default_config_file()
		2807
		2808	Calculates and returns the name of the file which should be parsed if no
		2809	filename is passed to C<XMLin()> (default: C<$0.xml>).
		2810
		2811	=item build_simple_tree(filename, string)
		2812
		2813	Called from C<XMLin()> or any of the parsing methods. Takes either a file name
		2814	as the first argument or C<undef> followed by a 'string' as the second
		2815	argument. Returns a simple tree data structure. You could override this
		2816	method to apply your own transformations before the data structure is returned
		2817	to the caller.
		2818
		2819	=item new_hashref()
		2820
		2821	When the 'simple tree' data structure is being built, this method will be
		2822	called to create any required anonymous hashrefs.
		2823
		2824	=item sorted_keys(name, hashref)
		2825
		2826	Called when C<XMLout()> is translating a hashref to XML. This routine returns
		2827	a list of hash keys in the order that the corresponding attributes/elements
		2828	should appear in the output.
		2829
		2830	=item escape_value(string)
		2831
		2832	Called from C<XMLout()>, takes a string and returns a copy of the string with
		2833	XML character escaping rules applied.
		2834
		2835	=item numeric_escape(string)
		2836
		2837	Called from C<escape_value()>, to handle non-ASCII characters (depending on the
		2838	value of the NumericEscape option).
		2839
		2840	=item copy_hash(hashref, extra_key => value, ...)
		2841
		2842	Called from C<XMLout()>, when 'unfolding' a hash of hashes into an array of
		2843	hashes. You might wish to override this method if you're using tied hashes and
		2844	don't want them to get untied.
		2845
		2846	=back
		2847
		2848	=head2 Cache Methods
		2849
		2850	XML::Simple implements three caching schemes ('storable', 'memshare' and
		2851	'memcopy'). You can implement a custom caching scheme by implementing
		2852	two methods - one for reading from the cache and one for writing to it.
		2853
		2854	For example, you might implement a new 'dbm' scheme that stores cached data
		2855	structures using the L<MLDBM> module. First, you would add a
		2856	C<cache_read_dbm()> method which accepted a filename for use as a lookup key
		2857	and returned a data structure on success, or undef on failure. Then, you would
		2858	implement a C<cache_read_dbm()> method which accepted a data structure and a
		2859	filename.
		2860
		2861	You would use this caching scheme by specifying the option:
		2862
		2863	Cache => [ 'dbm' ]
		2864
		2865	=head1 STRICT MODE
		2866
		2867	If you import the B<XML::Simple> routines like this:
		2868
		2869	use XML::Simple qw(:strict);
		2870
		2871	the following common mistakes will be detected and treated as fatal errors
		2872
		2873	=over 4
		2874
		2875	=item *
		2876
		2877	Failing to explicitly set the C<KeyAttr> option - if you can't be bothered
		2878	reading about this option, turn it off with: KeyAttr => [ ]
		2879
		2880	=item *
		2881
		2882	Failing to explicitly set the C<ForceArray> option - if you can't be bothered
		2883	reading about this option, set it to the safest mode with: ForceArray => 1
		2884
		2885	=item *
		2886
		2887	Setting ForceArray to an array, but failing to list all the elements from the
		2888	KeyAttr hash.
		2889
		2890	=item *
		2891
		2892	Data error - KeyAttr is set to say { part => 'partnum' } but the XML contains
		2893	one or more E<lt>partE<gt> elements without a 'partnum' attribute (or nested
		2894	element). Note: if strict mode is not set but -w is, this condition triggers a
		2895	warning.
		2896
		2897	=item *
		2898
		2899	Data error - as above, but non-unique values are present in the key attribute
		2900	(eg: more than one E<lt>partE<gt> element with the same partnum). This will
		2901	also trigger a warning if strict mode is not enabled.
		2902
		2903	=item *
		2904
		2905	Data error - as above, but value of key attribute (eg: partnum) is not a
		2906	scalar string (due to nested elements etc). This will also trigger a warning
		2907	if strict mode is not enabled.
		2908
		2909	=back
		2910
		2911	=head1 SAX SUPPORT
		2912
		2913	From version 1.08_01, B<XML::Simple> includes support for SAX (the Simple API
		2914	for XML) - specifically SAX2.
		2915
		2916	In a typical SAX application, an XML parser (or SAX 'driver') module generates
		2917	SAX events (start of element, character data, end of element, etc) as it parses
		2918	an XML document and a 'handler' module processes the events to extract the
		2919	required data. This simple model allows for some interesting and powerful
		2920	possibilities:
		2921
		2922	=over 4
		2923
		2924	=item *
		2925
		2926	Applications written to the SAX API can extract data from huge XML documents
		2927	without the memory overheads of a DOM or tree API.
		2928
		2929	=item *
		2930
		2931	The SAX API allows for plug and play interchange of parser modules without
		2932	having to change your code to fit a new module's API. A number of SAX parsers
		2933	are available with capabilities ranging from extreme portability to blazing
		2934	performance.
		2935
		2936	=item *
		2937
		2938	A SAX 'filter' module can implement both a handler interface for receiving
		2939	data and a generator interface for passing modified data on to a downstream
		2940	handler. Filters can be chained together in 'pipelines'.
		2941
		2942	=item *
		2943
		2944	One filter module might split a data stream to direct data to two or more
		2945	downstream handlers.
		2946
		2947	=item *
		2948
		2949	Generating SAX events is not the exclusive preserve of XML parsing modules.
		2950	For example, a module might extract data from a relational database using DBI
		2951	and pass it on to a SAX pipeline for filtering and formatting.
		2952
		2953	=back
		2954
		2955	B<XML::Simple> can operate at either end of a SAX pipeline. For example,
		2956	you can take a data structure in the form of a hashref and pass it into a
		2957	SAX pipeline using the 'Handler' option on C<XMLout()>:
		2958
		2959	use XML::Simple;
		2960	use Some::SAX::Filter;
		2961	use XML::SAX::Writer;
		2962
		2963	my $ref = {
		2964	.... # your data here
		2965	};
		2966
		2967	my $writer = XML::SAX::Writer->new();
		2968	my $filter = Some::SAX::Filter->new(Handler => $writer);
		2969	my $simple = XML::Simple->new(Handler => $filter);
		2970	$simple->XMLout($ref);
		2971
		2972	You can also put B<XML::Simple> at the opposite end of the pipeline to take
		2973	advantage of the simple 'tree' data structure once the relevant data has been
		2974	isolated through filtering:
		2975
		2976	use XML::SAX;
		2977	use Some::SAX::Filter;
		2978	use XML::Simple;
		2979
		2980	my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']);
		2981	my $filter = Some::SAX::Filter->new(Handler => $simple);
		2982	my $parser = XML::SAX::ParserFactory->parser(Handler => $filter);
		2983
		2984	my $ref = $parser->parse_uri('some_huge_file.xml');
		2985
		2986	print $ref->{part}->{'555-1234'};
		2987
		2988	You can build a filter by using an XML::Simple object as a handler and setting
		2989	its DataHandler option to point to a routine which takes the resulting tree,
		2990	modifies it and sends it off as SAX events to a downstream handler:
		2991
		2992	my $writer = XML::SAX::Writer->new();
		2993	my $filter = XML::Simple->new(
		2994	DataHandler => sub {
		2995	my $simple = shift;
		2996	my $data = shift;
		2997
		2998	# Modify $data here
		2999
		3000	$simple->XMLout($data, Handler => $writer);
		3001	}
		3002	);
		3003	my $parser = XML::SAX::ParserFactory->parser(Handler => $filter);
		3004
		3005	$parser->parse_uri($filename);
		3006
		3007	I<Note: In this last example, the 'Handler' option was specified in the call to
		3008	C<XMLout()> but it could also have been specified in the constructor>.
		3009
		3010	=head1 ENVIRONMENT
		3011
		3012	If you don't care which parser module B<XML::Simple> uses then skip this
		3013	section entirely (it looks more complicated than it really is).
		3014
		3015	B<XML::Simple> will default to using a B<SAX> parser if one is available or
		3016	B<XML::Parser> if SAX is not available.
		3017
		3018	You can dictate which parser module is used by setting either the environment
		3019	variable 'XML_SIMPLE_PREFERRED_PARSER' or the package variable
		3020	$XML::Simple::PREFERRED_PARSER to contain the module name. The following rules
		3021	are used:
		3022
		3023	=over 4
		3024
		3025	=item *
		3026
		3027	The package variable takes precedence over the environment variable if both are defined. To force B<XML::Simple> to ignore the environment settings and use
		3028	its default rules, you can set the package variable to an empty string.
		3029
		3030	=item *
		3031
		3032	If the 'preferred parser' is set to the string 'XML::Parser', then
		3033	L<XML::Parser> will be used (or C<XMLin()> will die if L<XML::Parser> is not
		3034	installed).
		3035
		3036	=item *
		3037
		3038	If the 'preferred parser' is set to some other value, then it is assumed to be
		3039	the name of a SAX parser module and is passed to L<XML::SAX::ParserFactory.>
		3040	If L<XML::SAX> is not installed, or the requested parser module is not
		3041	installed, then C<XMLin()> will die.
		3042
		3043	=item *
		3044
		3045	If the 'preferred parser' is not defined at all (the normal default
		3046	state), an attempt will be made to load L<XML::SAX>. If L<XML::SAX> is
		3047	installed, then a parser module will be selected according to
		3048	L<XML::SAX::ParserFactory>'s normal rules (which typically means the last SAX
		3049	parser installed).
		3050
		3051	=item *
		3052
		3053	if the 'preferred parser' is not defined and B<XML::SAX> is not
		3054	installed, then B<XML::Parser> will be used. C<XMLin()> will die if
		3055	L<XML::Parser> is not installed.
		3056
		3057	=back
		3058
		3059	Note: The B<XML::SAX> distribution includes an XML parser written entirely in
		3060	Perl. It is very portable but it is not very fast. You should consider
		3061	installing L<XML::LibXML> or L<XML::SAX::Expat> if they are available for your
		3062	platform.
		3063
		3064	=head1 ERROR HANDLING
		3065
		3066	The XML standard is very clear on the issue of non-compliant documents. An
		3067	error in parsing any single element (for example a missing end tag) must cause
		3068	the whole document to be rejected. B<XML::Simple> will die with an appropriate
		3069	message if it encounters a parsing error.
		3070
		3071	If dying is not appropriate for your application, you should arrange to call
		3072	C<XMLin()> in an eval block and look for errors in $@. eg:
		3073
		3074	my $config = eval { XMLin() };
		3075	PopUpMessage($@) if($@);
		3076
		3077	Note, there is a common misconception that use of B<eval> will significantly
		3078	slow down a script. While that may be true when the code being eval'd is in a
		3079	string, it is not true of code like the sample above.
		3080
		3081	=head1 EXAMPLES
		3082
		3083	When C<XMLin()> reads the following very simple piece of XML:
		3084
		3085	<opt username="testuser" password="frodo"></opt>
		3086
		3087	it returns the following data structure:
		3088
		3089	{
		3090	'username' => 'testuser',
		3091	'password' => 'frodo'
		3092	}
		3093
		3094	The identical result could have been produced with this alternative XML:
		3095
		3096	<opt username="testuser" password="frodo" />
		3097
		3098	Or this (although see 'ForceArray' option for variations):
		3099
		3100	<opt>
		3101	<username>testuser</username>
		3102	<password>frodo</password>
		3103	</opt>
		3104
		3105	Repeated nested elements are represented as anonymous arrays:
		3106
		3107	<opt>
		3108	<person firstname="Joe" lastname="Smith">
		3109	<email>joe@smith.com</email>
		3110	<email>jsmith@yahoo.com</email>
		3111	</person>
		3112	<person firstname="Bob" lastname="Smith">
		3113	<email>bob@smith.com</email>
		3114	</person>
		3115	</opt>
		3116
		3117	{
		3118	'person' => [
		3119	{
		3120	'email' => [
		3121	'joe@smith.com',
		3122	'jsmith@yahoo.com'
		3123	],
		3124	'firstname' => 'Joe',
		3125	'lastname' => 'Smith'
		3126	},
		3127	{
		3128	'email' => 'bob@smith.com',
		3129	'firstname' => 'Bob',
		3130	'lastname' => 'Smith'
		3131	}
		3132	]
		3133	}
		3134
		3135	Nested elements with a recognised key attribute are transformed (folded) from
		3136	an array into a hash keyed on the value of that attribute (see the C<KeyAttr>
		3137	option):
		3138
		3139	<opt>
		3140	<person key="jsmith" firstname="Joe" lastname="Smith" />
		3141	<person key="tsmith" firstname="Tom" lastname="Smith" />
		3142	<person key="jbloggs" firstname="Joe" lastname="Bloggs" />
		3143	</opt>
		3144
		3145	{
		3146	'person' => {
		3147	'jbloggs' => {
		3148	'firstname' => 'Joe',
		3149	'lastname' => 'Bloggs'
		3150	},
		3151	'tsmith' => {
		3152	'firstname' => 'Tom',
		3153	'lastname' => 'Smith'
		3154	},
		3155	'jsmith' => {
		3156	'firstname' => 'Joe',
		3157	'lastname' => 'Smith'
		3158	}
		3159	}
		3160	}
		3161
		3162
		3163	The <anon> tag can be used to form anonymous arrays:
		3164
		3165	<opt>
		3166	<head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head>
		3167	<data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data>
		3168	<data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data>
		3169	<data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data>
		3170	</opt>
		3171
		3172	{
		3173	'head' => [
		3174	[ 'Col 1', 'Col 2', 'Col 3' ]
		3175	],
		3176	'data' => [
		3177	[ 'R1C1', 'R1C2', 'R1C3' ],
		3178	[ 'R2C1', 'R2C2', 'R2C3' ],
		3179	[ 'R3C1', 'R3C2', 'R3C3' ]
		3180	]
		3181	}
		3182
		3183	Anonymous arrays can be nested to arbirtrary levels and as a special case, if
		3184	the surrounding tags for an XML document contain only an anonymous array the
		3185	arrayref will be returned directly rather than the usual hashref:
		3186
		3187	<opt>
		3188	<anon><anon>Col 1</anon><anon>Col 2</anon></anon>
		3189	<anon><anon>R1C1</anon><anon>R1C2</anon></anon>
		3190	<anon><anon>R2C1</anon><anon>R2C2</anon></anon>
		3191	</opt>
		3192
		3193	[
		3194	[ 'Col 1', 'Col 2' ],
		3195	[ 'R1C1', 'R1C2' ],
		3196	[ 'R2C1', 'R2C2' ]
		3197	]
		3198
		3199	Elements which only contain text content will simply be represented as a
		3200	scalar. Where an element has both attributes and text content, the element
		3201	will be represented as a hashref with the text content in the 'content' key
		3202	(see the C<ContentKey> option):
		3203
		3204	<opt>
		3205	<one>first</one>
		3206	<two attr="value">second</two>
		3207	</opt>
		3208
		3209	{
		3210	'one' => 'first',
		3211	'two' => { 'attr' => 'value', 'content' => 'second' }
		3212	}
		3213
		3214	Mixed content (elements which contain both text content and nested elements)
		3215	will be not be represented in a useful way - element order and significant
		3216	whitespace will be lost. If you need to work with mixed content, then
		3217	XML::Simple is not the right tool for your job - check out the next section.
		3218
		3219	=head1 WHERE TO FROM HERE?
		3220
		3221	B<XML::Simple> is able to present a simple API because it makes some
		3222	assumptions on your behalf. These include:
		3223
		3224	=over 4
		3225
		3226	=item *
		3227
		3228	You're not interested in text content consisting only of whitespace
		3229
		3230	=item *
		3231
		3232	You don't mind that when things get slurped into a hash the order is lost
		3233
		3234	=item *
		3235
		3236	You don't want fine-grained control of the formatting of generated XML
		3237
		3238	=item *
		3239
		3240	You would never use a hash key that was not a legal XML element name
		3241
		3242	=item *
		3243
		3244	You don't need help converting between different encodings
		3245
		3246	=back
		3247
		3248	In a serious XML project, you'll probably outgrow these assumptions fairly
		3249	quickly. This section of the document used to offer some advice on chosing a
		3250	more powerful option. That advice has now grown into the 'Perl-XML FAQ'
		3251	document which you can find at: L<http://perl-xml.sourceforge.net/faq/>
		3252
		3253	The advice in the FAQ boils down to a quick explanation of tree versus
		3254	event based parsers and then recommends:
		3255
		3256	For event based parsing, use SAX (do not set out to write any new code for
		3257	XML::Parser's handler API - it is obselete).
		3258
		3259	For tree-based parsing, you could choose between the 'Perlish' approach of
		3260	L<XML::Twig> and more standards based DOM implementations - preferably one with
		3261	XPath support.
		3262
		3263
		3264	=head1 SEE ALSO
		3265
		3266	B<XML::Simple> requires either L<XML::Parser> or L<XML::SAX>.
		3267
		3268	To generate documents with namespaces, L<XML::NamespaceSupport> is required.
		3269
		3270	The optional caching functions require L<Storable>.
		3271
		3272	Answers to Frequently Asked Questions about XML::Simple are bundled with this
		3273	distribution as: L<XML::Simple::FAQ>
		3274
		3275	=head1 COPYRIGHT
		3276
		3277	Copyright 1999-2004 Grant McLean E<lt>grantm@cpan.orgE<gt>
		3278
		3279	This library is free software; you can redistribute it and/or modify it
		3280	under the same terms as Perl itself.
		3281
		3282	=cut
		3283
		3284

Subversion Repositories Projects

(root)/MissionCockpit/tags/V0.1.0/perl/lib/XML/Simple.pm - Rev 365