Subversion Repositories Projects

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
598 rain-er 1
# $Id: Simple.pm,v 1.40 2007/08/15 10:36:48 grantm Exp $
2
 
3
package XML::Simple;
4
 
5
=head1 NAME
6
 
7
XML::Simple - Easy API to maintain XML (esp config files)
8
 
9
=head1 SYNOPSIS
10
 
11
    use XML::Simple;
12
 
13
    my $ref = XMLin([<xml file or string>] [, <options>]);
14
 
15
    my $xml = XMLout($hashref [, <options>]);
16
 
17
Or the object oriented way:
18
 
19
    require XML::Simple;
20
 
21
    my $xs = XML::Simple->new(options);
22
 
23
    my $ref = $xs->XMLin([<xml file or string>] [, <options>]);
24
 
25
    my $xml = $xs->XMLout($hashref [, <options>]);
26
 
27
(or see L<"SAX SUPPORT"> for 'the SAX way').
28
 
29
To catch common errors:
30
 
31
    use XML::Simple qw(:strict);
32
 
33
(see L<"STRICT MODE"> for more details).
34
 
35
=cut
36
 
37
# See after __END__ for more POD documentation
38
 
39
 
40
# Load essentials here, other modules loaded on demand later
41
 
42
use strict;
43
use Carp;
44
require Exporter;
45
 
46
 
47
##############################################################################
48
# Define some constants
49
#
50
 
51
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $PREFERRED_PARSER);
52
 
53
@ISA               = qw(Exporter);
54
@EXPORT            = qw(XMLin XMLout);
55
@EXPORT_OK         = qw(xml_in xml_out);
56
$VERSION           = '2.18';
57
$PREFERRED_PARSER  = undef;
58
 
59
my $StrictMode     = 0;
60
 
61
my @KnownOptIn     = qw(keyattr keeproot forcecontent contentkey noattr
62
                        searchpath forcearray cache suppressempty parseropts
63
                        grouptags nsexpand datahandler varattr variables
64
                        normalisespace normalizespace valueattr);
65
 
66
my @KnownOptOut    = qw(keyattr keeproot contentkey noattr
67
                        rootname xmldecl outputfile noescape suppressempty
68
                        grouptags nsexpand handler noindent attrindent nosort
69
                        valueattr numericescape);
70
 
71
my @DefKeyAttr     = qw(name key id);
72
my $DefRootName    = qq(opt);
73
my $DefContentKey  = qq(content);
74
my $DefXmlDecl     = qq(<?xml version='1.0' standalone='yes'?>);
75
 
76
my $xmlns_ns       = 'http://www.w3.org/2000/xmlns/';
77
my $bad_def_ns_jcn = '{' . $xmlns_ns . '}';     # LibXML::SAX workaround
78
 
79
 
80
##############################################################################
81
# Globals for use by caching routines
82
#
83
 
84
my %MemShareCache  = ();
85
my %MemCopyCache   = ();
86
 
87
 
88
##############################################################################
89
# Wrapper for Exporter - handles ':strict'
90
#
91
 
92
sub import {
93
  # Handle the :strict tag
94
 
95
  $StrictMode = 1 if grep(/^:strict$/, @_);
96
 
97
  # Pass everything else to Exporter.pm
98
 
99
  @_ = grep(!/^:strict$/, @_);
100
  goto &Exporter::import;
101
}
102
 
103
 
104
##############################################################################
105
# Constructor for optional object interface.
106
#
107
 
108
sub new {
109
  my $class = shift;
110
 
111
  if(@_ % 2) {
112
    croak "Default options must be name=>value pairs (odd number supplied)";
113
  }
114
 
115
  my %known_opt;
116
  @known_opt{@KnownOptIn, @KnownOptOut} = (undef) x 100;
117
 
118
  my %raw_opt = @_;
119
  my %def_opt;
120
  while(my($key, $val) = each %raw_opt) {
121
    my $lkey = lc($key);
122
    $lkey =~ s/_//g;
123
    croak "Unrecognised option: $key" unless(exists($known_opt{$lkey}));
124
    $def_opt{$lkey} = $val;
125
  }
126
  my $self = { def_opt => \%def_opt };
127
 
128
  return(bless($self, $class));
129
}
130
 
131
 
132
##############################################################################
133
# Sub: _get_object()
134
#
135
# Helper routine called from XMLin() and XMLout() to create an object if none
136
# was provided.  Note, this routine does mess with the caller's @_ array.
137
#
138
 
139
sub _get_object {
140
  my $self;
141
  if($_[0]  and  UNIVERSAL::isa($_[0], 'XML::Simple')) {
142
    $self = shift;
143
  }
144
  else {
145
    $self = XML::Simple->new();
146
  }
147
 
148
  return $self;
149
}
150
 
151
 
152
##############################################################################
153
# Sub/Method: XMLin()
154
#
155
# Exported routine for slurping XML into a hashref - see pod for info.
156
#
157
# May be called as object method or as a plain function.
158
#
159
# Expects one arg for the source XML, optionally followed by a number of
160
# name => value option pairs.
161
#
162
 
163
sub XMLin {
164
  my $self = &_get_object;      # note, @_ is passed implicitly
165
 
166
  my $target = shift;
167
 
168
 
169
  # Work out whether to parse a string, a file or a filehandle
170
 
171
  if(not defined $target) {
172
    return $self->parse_file(undef, @_);
173
  }
174
 
175
  elsif($target eq '-') {
176
    local($/) = undef;
177
    $target = <STDIN>;
178
    return $self->parse_string(\$target, @_);
179
  }
180
 
181
  elsif(my $type = ref($target)) {
182
    if($type eq 'SCALAR') {
183
      return $self->parse_string($target, @_);
184
    }
185
    else {
186
      return $self->parse_fh($target, @_);
187
    }
188
  }
189
 
190
  elsif($target =~ m{<.*?>}s) {
191
    return $self->parse_string(\$target, @_);
192
  }
193
 
194
  else {
195
    return $self->parse_file($target, @_);
196
  }
197
}
198
 
199
 
200
##############################################################################
201
# Sub/Method: parse_file()
202
#
203
# Same as XMLin, but only parses from a named file.
204
#
205
 
206
sub parse_file {
207
  my $self = &_get_object;      # note, @_ is passed implicitly
208
 
209
  my $filename = shift;
210
 
211
  $self->handle_options('in', @_);
212
 
213
  $filename = $self->default_config_file if not defined $filename;
214
 
215
  $filename = $self->find_xml_file($filename, @{$self->{opt}->{searchpath}});
216
 
217
  # Check cache for previous parse
218
 
219
  if($self->{opt}->{cache}) {
220
    foreach my $scheme (@{$self->{opt}->{cache}}) {
221
      my $method = 'cache_read_' . $scheme;
222
      my $opt = $self->$method($filename);
223
      return($opt) if($opt);
224
    }
225
  }
226
 
227
  my $ref = $self->build_simple_tree($filename, undef);
228
 
229
  if($self->{opt}->{cache}) {
230
    my $method = 'cache_write_' . $self->{opt}->{cache}->[0];
231
    $self->$method($ref, $filename);
232
  }
233
 
234
  return $ref;
235
}
236
 
237
 
238
##############################################################################
239
# Sub/Method: parse_fh()
240
#
241
# Same as XMLin, but only parses from a filehandle.
242
#
243
 
244
sub parse_fh {
245
  my $self = &_get_object;      # note, @_ is passed implicitly
246
 
247
  my $fh = shift;
248
  croak "Can't use " . (defined $fh ? qq{string ("$fh")} : 'undef') .
249
        " as a filehandle" unless ref $fh;
250
 
251
  $self->handle_options('in', @_);
252
 
253
  return $self->build_simple_tree(undef, $fh);
254
}
255
 
256
 
257
##############################################################################
258
# Sub/Method: parse_string()
259
#
260
# Same as XMLin, but only parses from a string or a reference to a string.
261
#
262
 
263
sub parse_string {
264
  my $self = &_get_object;      # note, @_ is passed implicitly
265
 
266
  my $string = shift;
267
 
268
  $self->handle_options('in', @_);
269
 
270
  return $self->build_simple_tree(undef, ref $string ? $string : \$string);
271
}
272
 
273
 
274
##############################################################################
275
# Method: default_config_file()
276
#
277
# Returns the name of the XML file to parse if no filename (or XML string) 
278
# was provided.
279
#
280
 
281
sub default_config_file {
282
  my $self = shift;
283
 
284
  require File::Basename;
285
 
286
  my($basename, $script_dir, $ext) = File::Basename::fileparse($0, '\.[^\.]+');
287
 
288
  # Add script directory to searchpath
289
 
290
  if($script_dir) {
291
    unshift(@{$self->{opt}->{searchpath}}, $script_dir);
292
  }
293
 
294
  return $basename . '.xml';
295
}
296
 
297
 
298
##############################################################################
299
# Method: build_simple_tree()
300
#
301
# Builds a 'tree' data structure as provided by XML::Parser and then 
302
# 'simplifies' it as specified by the various options in effect.
303
#
304
 
305
sub build_simple_tree {
306
  my $self = shift;
307
 
308
  my $tree = $self->build_tree(@_);
309
 
310
  return $self->{opt}->{keeproot}
311
         ? $self->collapse({}, @$tree)
312
         : $self->collapse(@{$tree->[1]});
313
}
314
 
315
 
316
##############################################################################
317
# Method: build_tree()
318
#
319
# This routine will be called if there is no suitable pre-parsed tree in a
320
# cache.  It parses the XML and returns an XML::Parser 'Tree' style data
321
# structure (summarised in the comments for the collapse() routine below).
322
#
323
# XML::Simple requires the services of another module that knows how to parse
324
# XML.  If XML::SAX is installed, the default SAX parser will be used,
325
# otherwise XML::Parser will be used.
326
#
327
# This routine expects to be passed a filename as argument 1 or a 'string' as
328
# argument 2.  The 'string' might be a string of XML (passed by reference to
329
# save memory) or it might be a reference to an IO::Handle.  (This
330
# non-intuitive mess results in part from the way XML::Parser works but that's
331
# really no excuse).
332
#
333
 
334
sub build_tree {
335
  my $self     = shift;
336
  my $filename = shift;
337
  my $string   = shift;
338
 
339
 
340
  my $preferred_parser = $PREFERRED_PARSER;
341
  unless(defined($preferred_parser)) {
342
    $preferred_parser = $ENV{XML_SIMPLE_PREFERRED_PARSER} || '';
343
  }
344
  if($preferred_parser eq 'XML::Parser') {
345
    return($self->build_tree_xml_parser($filename, $string));
346
  }
347
 
348
  eval { require XML::SAX; };      # We didn't need it until now
349
  if($@) {                         # No XML::SAX - fall back to XML::Parser
350
    if($preferred_parser) {        # unless a SAX parser was expressly requested
351
      croak "XMLin() could not load XML::SAX";
352
    }
353
    return($self->build_tree_xml_parser($filename, $string));
354
  }
355
 
356
  $XML::SAX::ParserPackage = $preferred_parser if($preferred_parser);
357
 
358
  my $sp = XML::SAX::ParserFactory->parser(Handler => $self);
359
 
360
  $self->{nocollapse} = 1;
361
  my($tree);
362
  if($filename) {
363
    $tree = $sp->parse_uri($filename);
364
  }
365
  else {
366
    if(ref($string) && ref($string) ne 'SCALAR') {
367
      $tree = $sp->parse_file($string);
368
    }
369
    else {
370
      $tree = $sp->parse_string($$string);
371
    }
372
  }
373
 
374
  return($tree);
375
}
376
 
377
 
378
##############################################################################
379
# Method: build_tree_xml_parser()
380
#
381
# This routine will be called if XML::SAX is not installed, or if XML::Parser
382
# was specifically requested.  It takes the same arguments as build_tree() and
383
# returns the same data structure (XML::Parser 'Tree' style).
384
#
385
 
386
sub build_tree_xml_parser {
387
  my $self     = shift;
388
  my $filename = shift;
389
  my $string   = shift;
390
 
391
 
392
  eval {
393
    local($^W) = 0;      # Suppress warning from Expat.pm re File::Spec::load()
394
    require XML::Parser; # We didn't need it until now
395
  };
396
  if($@) {
397
    croak "XMLin() requires either XML::SAX or XML::Parser";
398
  }
399
 
400
  if($self->{opt}->{nsexpand}) {
401
    carp "'nsexpand' option requires XML::SAX";
402
  }
403
 
404
  my $xp = XML::Parser->new(Style => 'Tree', @{$self->{opt}->{parseropts}});
405
  my($tree);
406
  if($filename) {
407
    # $tree = $xp->parsefile($filename);  # Changed due to prob w/mod_perl
408
    local(*XML_FILE);
409
    open(XML_FILE, '<', $filename) || croak qq($filename - $!);
410
    $tree = $xp->parse(*XML_FILE);
411
    close(XML_FILE);
412
  }
413
  else {
414
    $tree = $xp->parse($$string);
415
  }
416
 
417
  return($tree);
418
}
419
 
420
 
421
##############################################################################
422
# Method: cache_write_storable()
423
#
424
# Wrapper routine for invoking Storable::nstore() to cache a parsed data
425
# structure.
426
#
427
 
428
sub cache_write_storable {
429
  my($self, $data, $filename) = @_;
430
 
431
  my $cachefile = $self->storable_filename($filename);
432
 
433
  require Storable;           # We didn't need it until now
434
 
435
  if ('VMS' eq $^O) {
436
    Storable::nstore($data, $cachefile);
437
  }
438
  else {
439
    # If the following line fails for you, your Storable.pm is old - upgrade
440
    Storable::lock_nstore($data, $cachefile);
441
  }
442
 
443
}
444
 
445
 
446
##############################################################################
447
# Method: cache_read_storable()
448
#
449
# Wrapper routine for invoking Storable::retrieve() to read a cached parsed
450
# data structure.  Only returns cached data if the cache file exists and is
451
# newer than the source XML file.
452
#
453
 
454
sub cache_read_storable {
455
  my($self, $filename) = @_;
456
 
457
  my $cachefile = $self->storable_filename($filename);
458
 
459
  return unless(-r $cachefile);
460
  return unless((stat($cachefile))[9] > (stat($filename))[9]);
461
 
462
  require Storable;           # We didn't need it until now
463
 
464
  if ('VMS' eq $^O) {
465
    return(Storable::retrieve($cachefile));
466
  }
467
  else {
468
    return(Storable::lock_retrieve($cachefile));
469
  }
470
 
471
}
472
 
473
 
474
##############################################################################
475
# Method: storable_filename()
476
#
477
# Translates the supplied source XML filename into a filename for the storable
478
# cached data.  A '.stor' suffix is added after stripping an optional '.xml'
479
# suffix.
480
#
481
 
482
sub storable_filename {
483
  my($self, $cachefile) = @_;
484
 
485
  $cachefile =~ s{(\.xml)?$}{.stor};
486
  return $cachefile;
487
}
488
 
489
 
490
##############################################################################
491
# Method: cache_write_memshare()
492
#
493
# Takes the supplied data structure reference and stores it away in a global
494
# hash structure.
495
#
496
 
497
sub cache_write_memshare {
498
  my($self, $data, $filename) = @_;
499
 
500
  $MemShareCache{$filename} = [time(), $data];
501
}
502
 
503
 
504
##############################################################################
505
# Method: cache_read_memshare()
506
#
507
# Takes a filename and looks in a global hash for a cached parsed version.
508
#
509
 
510
sub cache_read_memshare {
511
  my($self, $filename) = @_;
512
 
513
  return unless($MemShareCache{$filename});
514
  return unless($MemShareCache{$filename}->[0] > (stat($filename))[9]);
515
 
516
  return($MemShareCache{$filename}->[1]);
517
 
518
}
519
 
520
 
521
##############################################################################
522
# Method: cache_write_memcopy()
523
#
524
# Takes the supplied data structure and stores a copy of it in a global hash
525
# structure.
526
#
527
 
528
sub cache_write_memcopy {
529
  my($self, $data, $filename) = @_;
530
 
531
  require Storable;           # We didn't need it until now
532
 
533
  $MemCopyCache{$filename} = [time(), Storable::dclone($data)];
534
}
535
 
536
 
537
##############################################################################
538
# Method: cache_read_memcopy()
539
#
540
# Takes a filename and looks in a global hash for a cached parsed version.
541
# Returns a reference to a copy of that data structure.
542
#
543
 
544
sub cache_read_memcopy {
545
  my($self, $filename) = @_;
546
 
547
  return unless($MemCopyCache{$filename});
548
  return unless($MemCopyCache{$filename}->[0] > (stat($filename))[9]);
549
 
550
  return(Storable::dclone($MemCopyCache{$filename}->[1]));
551
 
552
}
553
 
554
 
555
##############################################################################
556
# Sub/Method: XMLout()
557
#
558
# Exported routine for 'unslurping' a data structure out to XML.
559
#
560
# Expects a reference to a data structure and an optional list of option
561
# name => value pairs.
562
#
563
 
564
sub XMLout {
565
  my $self = &_get_object;      # note, @_ is passed implicitly
566
 
567
  croak "XMLout() requires at least one argument" unless(@_);
568
  my $ref = shift;
569
 
570
  $self->handle_options('out', @_);
571
 
572
 
573
  # If namespace expansion is set, XML::NamespaceSupport is required
574
 
575
  if($self->{opt}->{nsexpand}) {
576
    require XML::NamespaceSupport;
577
    $self->{nsup} = XML::NamespaceSupport->new();
578
    $self->{ns_prefix} = 'aaa';
579
  }
580
 
581
 
582
  # Wrap top level arrayref in a hash
583
 
584
  if(UNIVERSAL::isa($ref, 'ARRAY')) {
585
    $ref = { anon => $ref };
586
  }
587
 
588
 
589
  # Extract rootname from top level hash if keeproot enabled
590
 
591
  if($self->{opt}->{keeproot}) {
592
    my(@keys) = keys(%$ref);
593
    if(@keys == 1) {
594
      $ref = $ref->{$keys[0]};
595
      $self->{opt}->{rootname} = $keys[0];
596
    }
597
  }
598
 
599
  # Ensure there are no top level attributes if we're not adding root elements
600
 
601
  elsif($self->{opt}->{rootname} eq '') {
602
    if(UNIVERSAL::isa($ref, 'HASH')) {
603
      my $refsave = $ref;
604
      $ref = {};
605
      foreach (keys(%$refsave)) {
606
        if(ref($refsave->{$_})) {
607
          $ref->{$_} = $refsave->{$_};
608
        }
609
        else {
610
          $ref->{$_} = [ $refsave->{$_} ];
611
        }
612
      }
613
    }
614
  }
615
 
616
 
617
  # Encode the hashref and write to file if necessary
618
 
619
  $self->{_ancestors} = [];
620
  my $xml = $self->value_to_xml($ref, $self->{opt}->{rootname}, '');
621
  delete $self->{_ancestors};
622
 
623
  if($self->{opt}->{xmldecl}) {
624
    $xml = $self->{opt}->{xmldecl} . "\n" . $xml;
625
  }
626
 
627
  if($self->{opt}->{outputfile}) {
628
    if(ref($self->{opt}->{outputfile})) {
629
      my $fh = $self->{opt}->{outputfile};
630
      if(UNIVERSAL::isa($fh, 'GLOB') and !UNIVERSAL::can($fh, 'print')) {
631
        eval { require IO::Handle; };
632
        croak $@ if $@;
633
      }
634
      return($fh->print($xml));
635
    }
636
    else {
637
      local(*OUT);
638
      open(OUT, '>', "$self->{opt}->{outputfile}") ||
639
        croak "open($self->{opt}->{outputfile}): $!";
640
      binmode(OUT, ':utf8') if($] >= 5.008);
641
      print OUT $xml || croak "print: $!";
642
      close(OUT);
643
    }
644
  }
645
  elsif($self->{opt}->{handler}) {
646
    require XML::SAX;
647
    my $sp = XML::SAX::ParserFactory->parser(
648
               Handler => $self->{opt}->{handler}
649
             );
650
    return($sp->parse_string($xml));
651
  }
652
  else {
653
    return($xml);
654
  }
655
}
656
 
657
 
658
##############################################################################
659
# Method: handle_options()
660
#
661
# Helper routine for both XMLin() and XMLout().  Both routines handle their
662
# first argument and assume all other args are options handled by this routine.
663
# Saves a hash of options in $self->{opt}.
664
#
665
# If default options were passed to the constructor, they will be retrieved
666
# here and merged with options supplied to the method call.
667
#
668
# First argument should be the string 'in' or the string 'out'.
669
#
670
# Remaining arguments should be name=>value pairs.  Sets up default values
671
# for options not supplied.  Unrecognised options are a fatal error.
672
#
673
 
674
sub handle_options  {
675
  my $self = shift;
676
  my $dirn = shift;
677
 
678
 
679
  # Determine valid options based on context
680
 
681
  my %known_opt;
682
  if($dirn eq 'in') {
683
    @known_opt{@KnownOptIn} = @KnownOptIn;
684
  }
685
  else {
686
    @known_opt{@KnownOptOut} = @KnownOptOut;
687
  }
688
 
689
 
690
  # Store supplied options in hashref and weed out invalid ones
691
 
692
  if(@_ % 2) {
693
    croak "Options must be name=>value pairs (odd number supplied)";
694
  }
695
  my %raw_opt  = @_;
696
  my $opt      = {};
697
  $self->{opt} = $opt;
698
 
699
  while(my($key, $val) = each %raw_opt) {
700
    my $lkey = lc($key);
701
    $lkey =~ s/_//g;
702
    croak "Unrecognised option: $key" unless($known_opt{$lkey});
703
    $opt->{$lkey} = $val;
704
  }
705
 
706
 
707
  # Merge in options passed to constructor
708
 
709
  foreach (keys(%known_opt)) {
710
    unless(exists($opt->{$_})) {
711
      if(exists($self->{def_opt}->{$_})) {
712
        $opt->{$_} = $self->{def_opt}->{$_};
713
      }
714
    }
715
  }
716
 
717
 
718
  # Set sensible defaults if not supplied
719
 
720
  if(exists($opt->{rootname})) {
721
    unless(defined($opt->{rootname})) {
722
      $opt->{rootname} = '';
723
    }
724
  }
725
  else {
726
    $opt->{rootname} = $DefRootName;
727
  }
728
 
729
  if($opt->{xmldecl}  and  $opt->{xmldecl} eq '1') {
730
    $opt->{xmldecl} = $DefXmlDecl;
731
  }
732
 
733
  if(exists($opt->{contentkey})) {
734
    if($opt->{contentkey} =~ m{^-(.*)$}) {
735
      $opt->{contentkey} = $1;
736
      $opt->{collapseagain} = 1;
737
    }
738
  }
739
  else {
740
    $opt->{contentkey} = $DefContentKey;
741
  }
742
 
743
  unless(exists($opt->{normalisespace})) {
744
    $opt->{normalisespace} = $opt->{normalizespace};
745
  }
746
  $opt->{normalisespace} = 0 unless(defined($opt->{normalisespace}));
747
 
748
  # Cleanups for values assumed to be arrays later
749
 
750
  if($opt->{searchpath}) {
751
    unless(ref($opt->{searchpath})) {
752
      $opt->{searchpath} = [ $opt->{searchpath} ];
753
    }
754
  }
755
  else  {
756
    $opt->{searchpath} = [ ];
757
  }
758
 
759
  if($opt->{cache}  and !ref($opt->{cache})) {
760
    $opt->{cache} = [ $opt->{cache} ];
761
  }
762
  if($opt->{cache}) {
763
    $_ = lc($_) foreach (@{$opt->{cache}});
764
    foreach my $scheme (@{$opt->{cache}}) {
765
      my $method = 'cache_read_' . $scheme;
766
      croak "Unsupported caching scheme: $scheme"
767
        unless($self->can($method));
768
    }
769
  }
770
 
771
  if(exists($opt->{parseropts})) {
772
    if($^W) {
773
      carp "Warning: " .
774
           "'ParserOpts' is deprecated, contact the author if you need it";
775
    }
776
  }
777
  else {
778
    $opt->{parseropts} = [ ];
779
  }
780
 
781
 
782
  # Special cleanup for {forcearray} which could be regex, arrayref or boolean
783
  # or left to default to 0
784
 
785
  if(exists($opt->{forcearray})) {
786
    if(ref($opt->{forcearray}) eq 'Regexp') {
787
      $opt->{forcearray} = [ $opt->{forcearray} ];
788
    }
789
 
790
    if(ref($opt->{forcearray}) eq 'ARRAY') {
791
      my @force_list = @{$opt->{forcearray}};
792
      if(@force_list) {
793
        $opt->{forcearray} = {};
794
        foreach my $tag (@force_list) {
795
          if(ref($tag) eq 'Regexp') {
796
            push @{$opt->{forcearray}->{_regex}}, $tag;
797
          }
798
          else {
799
            $opt->{forcearray}->{$tag} = 1;
800
          }
801
        }
802
      }
803
      else {
804
        $opt->{forcearray} = 0;
805
      }
806
    }
807
    else {
808
      $opt->{forcearray} = ( $opt->{forcearray} ? 1 : 0 );
809
    }
810
  }
811
  else {
812
    if($StrictMode  and  $dirn eq 'in') {
813
      croak "No value specified for 'ForceArray' option in call to XML$dirn()";
814
    }
815
    $opt->{forcearray} = 0;
816
  }
817
 
818
 
819
  # Special cleanup for {keyattr} which could be arrayref or hashref or left
820
  # to default to arrayref
821
 
822
  if(exists($opt->{keyattr}))  {
823
    if(ref($opt->{keyattr})) {
824
      if(ref($opt->{keyattr}) eq 'HASH') {
825
 
826
        # Make a copy so we can mess with it
827
 
828
        $opt->{keyattr} = { %{$opt->{keyattr}} };
829
 
830
 
831
        # Convert keyattr => { elem => '+attr' }
832
        # to keyattr => { elem => [ 'attr', '+' ] } 
833
 
834
        foreach my $el (keys(%{$opt->{keyattr}})) {
835
          if($opt->{keyattr}->{$el} =~ /^(\+|-)?(.*)$/) {
836
            $opt->{keyattr}->{$el} = [ $2, ($1 ? $1 : '') ];
837
            if($StrictMode  and  $dirn eq 'in') {
838
              next if($opt->{forcearray} == 1);
839
              next if(ref($opt->{forcearray}) eq 'HASH'
840
                      and $opt->{forcearray}->{$el});
841
              croak "<$el> set in KeyAttr but not in ForceArray";
842
            }
843
          }
844
          else {
845
            delete($opt->{keyattr}->{$el}); # Never reached (famous last words?)
846
          }
847
        }
848
      }
849
      else {
850
        if(@{$opt->{keyattr}} == 0) {
851
          delete($opt->{keyattr});
852
        }
853
      }
854
    }
855
    else {
856
      $opt->{keyattr} = [ $opt->{keyattr} ];
857
    }
858
  }
859
  else  {
860
    if($StrictMode) {
861
      croak "No value specified for 'KeyAttr' option in call to XML$dirn()";
862
    }
863
    $opt->{keyattr} = [ @DefKeyAttr ];
864
  }
865
 
866
 
867
  # Special cleanup for {valueattr} which could be arrayref or hashref
868
 
869
  if(exists($opt->{valueattr})) {
870
    if(ref($opt->{valueattr}) eq 'ARRAY') {
871
      $opt->{valueattrlist} = {};
872
      $opt->{valueattrlist}->{$_} = 1 foreach(@{ delete $opt->{valueattr} });
873
    }
874
  }
875
 
876
  # make sure there's nothing weird in {grouptags}
877
 
878
  if($opt->{grouptags}) {
879
    croak "Illegal value for 'GroupTags' option - expected a hashref"
880
      unless UNIVERSAL::isa($opt->{grouptags}, 'HASH');
881
 
882
    while(my($key, $val) = each %{$opt->{grouptags}}) {
883
      next if $key ne $val;
884
      croak "Bad value in GroupTags: '$key' => '$val'";
885
    }
886
  }
887
 
888
 
889
  # Check the {variables} option is valid and initialise variables hash
890
 
891
  if($opt->{variables} and !UNIVERSAL::isa($opt->{variables}, 'HASH')) {
892
    croak "Illegal value for 'Variables' option - expected a hashref";
893
  }
894
 
895
  if($opt->{variables}) {
896
    $self->{_var_values} = { %{$opt->{variables}} };
897
  }
898
  elsif($opt->{varattr}) {
899
    $self->{_var_values} = {};
900
  }
901
 
902
}
903
 
904
 
905
##############################################################################
906
# Method: find_xml_file()
907
#
908
# Helper routine for XMLin().
909
# Takes a filename, and a list of directories, attempts to locate the file in
910
# the directories listed.
911
# Returns a full pathname on success; croaks on failure.
912
#
913
 
914
sub find_xml_file  {
915
  my $self = shift;
916
  my $file = shift;
917
  my @search_path = @_;
918
 
919
 
920
  require File::Basename;
921
  require File::Spec;
922
 
923
  my($filename, $filedir) = File::Basename::fileparse($file);
924
 
925
  if($filename ne $file) {        # Ignore searchpath if dir component
926
    return($file) if(-e $file);
927
  }
928
  else {
929
    my($path);
930
    foreach $path (@search_path)  {
931
      my $fullpath = File::Spec->catfile($path, $file);
932
      return($fullpath) if(-e $fullpath);
933
    }
934
  }
935
 
936
  # If user did not supply a search path, default to current directory
937
 
938
  if(!@search_path) {
939
    return($file) if(-e $file);
940
    croak "File does not exist: $file";
941
  }
942
 
943
  croak "Could not find $file in ", join(':', @search_path);
944
}
945
 
946
 
947
##############################################################################
948
# Method: collapse()
949
#
950
# Helper routine for XMLin().  This routine really comprises the 'smarts' (or
951
# value add) of this module.
952
#
953
# Takes the parse tree that XML::Parser produced from the supplied XML and
954
# recurses through it 'collapsing' unnecessary levels of indirection (nested
955
# arrays etc) to produce a data structure that is easier to work with.
956
#
957
# Elements in the original parser tree are represented as an element name
958
# followed by an arrayref.  The first element of the array is a hashref
959
# containing the attributes.  The rest of the array contains a list of any
960
# nested elements as name+arrayref pairs:
961
#
962
#  <element name>, [ { <attribute hashref> }, <element name>, [ ... ], ... ]
963
#
964
# The special element name '0' (zero) flags text content.
965
#
966
# This routine cuts down the noise by discarding any text content consisting of
967
# only whitespace and then moves the nested elements into the attribute hash
968
# using the name of the nested element as the hash key and the collapsed
969
# version of the nested element as the value.  Multiple nested elements with
970
# the same name will initially be represented as an arrayref, but this may be
971
# 'folded' into a hashref depending on the value of the keyattr option.
972
#
973
 
974
sub collapse {
975
  my $self = shift;
976
 
977
 
978
  # Start with the hash of attributes
979
 
980
  my $attr  = shift;
981
  if($self->{opt}->{noattr}) {                    # Discard if 'noattr' set
982
    $attr = {};
983
  }
984
  elsif($self->{opt}->{normalisespace} == 2) {
985
    while(my($key, $value) = each %$attr) {
986
      $attr->{$key} = $self->normalise_space($value)
987
    }
988
  }
989
 
990
 
991
  # Do variable substitutions
992
 
993
  if(my $var = $self->{_var_values}) {
994
    while(my($key, $val) = each(%$attr)) {
995
      $val =~ s{\$\{([\w.]+)\}}{ $self->get_var($1) }ge;
996
      $attr->{$key} = $val;
997
    }
998
  }
999
 
1000
 
1001
  # Roll up 'value' attributes (but only if no nested elements)
1002
 
1003
  if(!@_  and  keys %$attr == 1) {
1004
    my($k) = keys %$attr;
1005
    if($self->{opt}->{valueattrlist}  and $self->{opt}->{valueattrlist}->{$k}) {
1006
      return $attr->{$k};
1007
    }
1008
  }
1009
 
1010
 
1011
  # Add any nested elements
1012
 
1013
  my($key, $val);
1014
  while(@_) {
1015
    $key = shift;
1016
    $val = shift;
1017
 
1018
    if(ref($val)) {
1019
      $val = $self->collapse(@$val);
1020
      next if(!defined($val)  and  $self->{opt}->{suppressempty});
1021
    }
1022
    elsif($key eq '0') {
1023
      next if($val =~ m{^\s*$}s);  # Skip all whitespace content
1024
 
1025
      $val = $self->normalise_space($val)
1026
        if($self->{opt}->{normalisespace} == 2);
1027
 
1028
      # do variable substitutions
1029
 
1030
      if(my $var = $self->{_var_values}) {
1031
        $val =~ s{\$\{(\w+)\}}{ $self->get_var($1) }ge;
1032
      }
1033
 
1034
 
1035
      # look for variable definitions
1036
 
1037
      if(my $var = $self->{opt}->{varattr}) {
1038
        if(exists $attr->{$var}) {
1039
          $self->set_var($attr->{$var}, $val);
1040
        }
1041
      }
1042
 
1043
 
1044
      # Collapse text content in element with no attributes to a string
1045
 
1046
      if(!%$attr  and  !@_) {
1047
        return($self->{opt}->{forcecontent} ?
1048
          { $self->{opt}->{contentkey} => $val } : $val
1049
        );
1050
      }
1051
      $key = $self->{opt}->{contentkey};
1052
    }
1053
 
1054
 
1055
    # Combine duplicate attributes into arrayref if required
1056
 
1057
    if(exists($attr->{$key})) {
1058
      if(UNIVERSAL::isa($attr->{$key}, 'ARRAY')) {
1059
        push(@{$attr->{$key}}, $val);
1060
      }
1061
      else {
1062
        $attr->{$key} = [ $attr->{$key}, $val ];
1063
      }
1064
    }
1065
    elsif(defined($val)  and  UNIVERSAL::isa($val, 'ARRAY')) {
1066
      $attr->{$key} = [ $val ];
1067
    }
1068
    else {
1069
      if( $key ne $self->{opt}->{contentkey}
1070
          and (
1071
            ($self->{opt}->{forcearray} == 1)
1072
            or (
1073
              (ref($self->{opt}->{forcearray}) eq 'HASH')
1074
              and (
1075
                $self->{opt}->{forcearray}->{$key}
1076
                or (grep $key =~ $_, @{$self->{opt}->{forcearray}->{_regex}})
1077
              )
1078
            )
1079
          )
1080
        ) {
1081
        $attr->{$key} = [ $val ];
1082
      }
1083
      else {
1084
        $attr->{$key} = $val;
1085
      }
1086
    }
1087
 
1088
  }
1089
 
1090
 
1091
  # Turn arrayrefs into hashrefs if key fields present
1092
 
1093
  if($self->{opt}->{keyattr}) {
1094
    while(($key,$val) = each %$attr) {
1095
      if(defined($val)  and  UNIVERSAL::isa($val, 'ARRAY')) {
1096
        $attr->{$key} = $self->array_to_hash($key, $val);
1097
      }
1098
    }
1099
  }
1100
 
1101
 
1102
  # disintermediate grouped tags
1103
 
1104
  if($self->{opt}->{grouptags}) {
1105
    while(my($key, $val) = each(%$attr)) {
1106
      next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1));
1107
      next unless(exists($self->{opt}->{grouptags}->{$key}));
1108
 
1109
      my($child_key, $child_val) =  %$val;
1110
 
1111
      if($self->{opt}->{grouptags}->{$key} eq $child_key) {
1112
        $attr->{$key}= $child_val;
1113
      }
1114
    }
1115
  }
1116
 
1117
 
1118
  # Fold hashes containing a single anonymous array up into just the array
1119
 
1120
  my $count = scalar keys %$attr;
1121
  if($count == 1
1122
     and  exists $attr->{anon}  
1123
     and  UNIVERSAL::isa($attr->{anon}, 'ARRAY')
1124
  ) {
1125
    return($attr->{anon});
1126
  }
1127
 
1128
 
1129
  # Do the right thing if hash is empty, otherwise just return it
1130
 
1131
  if(!%$attr  and  exists($self->{opt}->{suppressempty})) {
1132
    if(defined($self->{opt}->{suppressempty})  and
1133
       $self->{opt}->{suppressempty} eq '') {
1134
      return('');
1135
    }
1136
    return(undef);
1137
  }
1138
 
1139
 
1140
  # Roll up named elements with named nested 'value' attributes
1141
 
1142
  if($self->{opt}->{valueattr}) {
1143
    while(my($key, $val) = each(%$attr)) {
1144
      next unless($self->{opt}->{valueattr}->{$key});
1145
      next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1));
1146
      my($k) = keys %$val;
1147
      next unless($k eq $self->{opt}->{valueattr}->{$key});
1148
      $attr->{$key} = $val->{$k};
1149
    }
1150
  }
1151
 
1152
  return($attr)
1153
 
1154
}
1155
 
1156
 
1157
##############################################################################
1158
# Method: set_var()
1159
#
1160
# Called when a variable definition is encountered in the XML.  (A variable
1161
# definition looks like <element attrname="name">value</element> where attrname
1162
# matches the varattr setting).
1163
#
1164
 
1165
sub set_var {
1166
  my($self, $name, $value) = @_;
1167
 
1168
  $self->{_var_values}->{$name} = $value;
1169
}
1170
 
1171
 
1172
##############################################################################
1173
# Method: get_var()
1174
#
1175
# Called during variable substitution to get the value for the named variable.
1176
#
1177
 
1178
sub get_var {
1179
  my($self, $name) = @_;
1180
 
1181
  my $value = $self->{_var_values}->{$name};
1182
  return $value if(defined($value));
1183
 
1184
  return '${' . $name . '}';
1185
}
1186
 
1187
 
1188
##############################################################################
1189
# Method: normalise_space()
1190
#
1191
# Strips leading and trailing whitespace and collapses sequences of whitespace
1192
# characters to a single space.
1193
#
1194
 
1195
sub normalise_space {
1196
  my($self, $text) = @_;
1197
 
1198
  $text =~ s/^\s+//s;
1199
  $text =~ s/\s+$//s;
1200
  $text =~ s/\s\s+/ /sg;
1201
 
1202
  return $text;
1203
}
1204
 
1205
 
1206
##############################################################################
1207
# Method: array_to_hash()
1208
#
1209
# Helper routine for collapse().
1210
# Attempts to 'fold' an array of hashes into an hash of hashes.  Returns a
1211
# reference to the hash on success or the original array if folding is
1212
# not possible.  Behaviour is controlled by 'keyattr' option.
1213
#
1214
 
1215
sub array_to_hash {
1216
  my $self     = shift;
1217
  my $name     = shift;
1218
  my $arrayref = shift;
1219
 
1220
  my $hashref  = $self->new_hashref;
1221
 
1222
  my($i, $key, $val, $flag);
1223
 
1224
 
1225
  # Handle keyattr => { .... }
1226
 
1227
  if(ref($self->{opt}->{keyattr}) eq 'HASH') {
1228
    return($arrayref) unless(exists($self->{opt}->{keyattr}->{$name}));
1229
    ($key, $flag) = @{$self->{opt}->{keyattr}->{$name}};
1230
    for($i = 0; $i < @$arrayref; $i++)  {
1231
      if(UNIVERSAL::isa($arrayref->[$i], 'HASH') and
1232
         exists($arrayref->[$i]->{$key})
1233
      ) {
1234
        $val = $arrayref->[$i]->{$key};
1235
        if(ref($val)) {
1236
          $self->die_or_warn("<$name> element has non-scalar '$key' key attribute");
1237
          return($arrayref);
1238
        }
1239
        $val = $self->normalise_space($val)
1240
          if($self->{opt}->{normalisespace} == 1);
1241
        $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val")
1242
          if(exists($hashref->{$val}));
1243
        $hashref->{$val} = { %{$arrayref->[$i]} };
1244
        $hashref->{$val}->{"-$key"} = $hashref->{$val}->{$key} if($flag eq '-');
1245
        delete $hashref->{$val}->{$key} unless($flag eq '+');
1246
      }
1247
      else {
1248
        $self->die_or_warn("<$name> element has no '$key' key attribute");
1249
        return($arrayref);
1250
      }
1251
    }
1252
  }
1253
 
1254
 
1255
  # Or assume keyattr => [ .... ]
1256
 
1257
  else {
1258
    my $default_keys =
1259
      join(',', @DefKeyAttr) eq join(',', @{$self->{opt}->{keyattr}});
1260
 
1261
    ELEMENT: for($i = 0; $i < @$arrayref; $i++)  {
1262
      return($arrayref) unless(UNIVERSAL::isa($arrayref->[$i], 'HASH'));
1263
 
1264
      foreach $key (@{$self->{opt}->{keyattr}}) {
1265
        if(defined($arrayref->[$i]->{$key}))  {
1266
          $val = $arrayref->[$i]->{$key};
1267
          if(ref($val)) {
1268
            $self->die_or_warn("<$name> element has non-scalar '$key' key attribute")
1269
              if not $default_keys;
1270
            return($arrayref);
1271
          }
1272
          $val = $self->normalise_space($val)
1273
            if($self->{opt}->{normalisespace} == 1);
1274
          $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val")
1275
            if(exists($hashref->{$val}));
1276
          $hashref->{$val} = { %{$arrayref->[$i]} };
1277
          delete $hashref->{$val}->{$key};
1278
          next ELEMENT;
1279
        }
1280
      }
1281
 
1282
      return($arrayref);    # No keyfield matched
1283
    }
1284
  }
1285
 
1286
  # collapse any hashes which now only have a 'content' key
1287
 
1288
  if($self->{opt}->{collapseagain}) {
1289
    $hashref = $self->collapse_content($hashref);
1290
  }
1291
 
1292
  return($hashref);
1293
}
1294
 
1295
 
1296
##############################################################################
1297
# Method: die_or_warn()
1298
#
1299
# Takes a diagnostic message and does one of three things:
1300
# 1. dies if strict mode is enabled
1301
# 2. warns if warnings are enabled but strict mode is not
1302
# 3. ignores message and resturns silently if neither strict mode nor warnings
1303
#    are enabled
1304
# 
1305
 
1306
sub die_or_warn {
1307
  my $self = shift;
1308
  my $msg  = shift;
1309
 
1310
  croak $msg if($StrictMode);
1311
  carp "Warning: $msg" if($^W);
1312
}
1313
 
1314
 
1315
##############################################################################
1316
# Method: new_hashref()
1317
#
1318
# This is a hook routine for overriding in a sub-class.  Some people believe
1319
# that using Tie::IxHash here will solve order-loss problems.
1320
# 
1321
 
1322
sub new_hashref {
1323
  my $self = shift;
1324
 
1325
  return { @_ };
1326
}
1327
 
1328
 
1329
##############################################################################
1330
# Method: collapse_content()
1331
#
1332
# Helper routine for array_to_hash
1333
# 
1334
# Arguments expected are:
1335
# - an XML::Simple object
1336
# - a hasref
1337
# the hashref is a former array, turned into a hash by array_to_hash because
1338
# of the presence of key attributes
1339
# at this point collapse_content avoids over-complicated structures like
1340
# dir => { libexecdir    => { content => '$exec_prefix/libexec' },
1341
#          localstatedir => { content => '$prefix' },
1342
#        }
1343
# into
1344
# dir => { libexecdir    => '$exec_prefix/libexec',
1345
#          localstatedir => '$prefix',
1346
#        }
1347
 
1348
sub collapse_content {
1349
  my $self       = shift;
1350
  my $hashref    = shift;
1351
 
1352
  my $contentkey = $self->{opt}->{contentkey};
1353
 
1354
  # first go through the values,checking that they are fit to collapse
1355
  foreach my $val (values %$hashref) {
1356
    return $hashref unless (     (ref($val) eq 'HASH')
1357
                             and (keys %$val == 1)
1358
                             and (exists $val->{$contentkey})
1359
                           );
1360
  }
1361
 
1362
  # now collapse them
1363
  foreach my $key (keys %$hashref) {
1364
    $hashref->{$key}=  $hashref->{$key}->{$contentkey};
1365
  }
1366
 
1367
  return $hashref;
1368
}
1369
 
1370
 
1371
##############################################################################
1372
# Method: value_to_xml()
1373
#
1374
# Helper routine for XMLout() - recurses through a data structure building up
1375
# and returning an XML representation of that structure as a string.
1376
# 
1377
# Arguments expected are:
1378
# - the data structure to be encoded (usually a reference)
1379
# - the XML tag name to use for this item
1380
# - a string of spaces for use as the current indent level
1381
#
1382
 
1383
sub value_to_xml {
1384
  my $self = shift;;
1385
 
1386
 
1387
  # Grab the other arguments
1388
 
1389
  my($ref, $name, $indent) = @_;
1390
 
1391
  my $named = (defined($name) and $name ne '' ? 1 : 0);
1392
 
1393
  my $nl = "\n";
1394
 
1395
  my $is_root = $indent eq '' ? 1 : 0;   # Warning, dirty hack!
1396
  if($self->{opt}->{noindent}) {
1397
    $indent = '';
1398
    $nl     = '';
1399
  }
1400
 
1401
 
1402
  # Convert to XML
1403
 
1404
  if(ref($ref)) {
1405
    croak "circular data structures not supported"
1406
      if(grep($_ == $ref, @{$self->{_ancestors}}));
1407
    push @{$self->{_ancestors}}, $ref;
1408
  }
1409
  else {
1410
    if($named) {
1411
      return(join('',
1412
              $indent, '<', $name, '>',
1413
              ($self->{opt}->{noescape} ? $ref : $self->escape_value($ref)),
1414
              '</', $name, ">", $nl
1415
            ));
1416
    }
1417
    else {
1418
      return("$ref$nl");
1419
    }
1420
  }
1421
 
1422
 
1423
  # Unfold hash to array if possible
1424
 
1425
  if(UNIVERSAL::isa($ref, 'HASH')      # It is a hash
1426
     and keys %$ref                    # and it's not empty
1427
     and $self->{opt}->{keyattr}       # and folding is enabled
1428
     and !$is_root                     # and its not the root element
1429
  ) {
1430
    $ref = $self->hash_to_array($name, $ref);
1431
  }
1432
 
1433
 
1434
  my @result = ();
1435
  my($key, $value);
1436
 
1437
 
1438
  # Handle hashrefs
1439
 
1440
  if(UNIVERSAL::isa($ref, 'HASH')) {
1441
 
1442
    # Reintermediate grouped values if applicable
1443
 
1444
    if($self->{opt}->{grouptags}) {
1445
      $ref = $self->copy_hash($ref);
1446
      while(my($key, $val) = each %$ref) {
1447
        if($self->{opt}->{grouptags}->{$key}) {
1448
          $ref->{$key} = { $self->{opt}->{grouptags}->{$key} => $val };
1449
        }
1450
      }
1451
    }
1452
 
1453
 
1454
    # Scan for namespace declaration attributes
1455
 
1456
    my $nsdecls = '';
1457
    my $default_ns_uri;
1458
    if($self->{nsup}) {
1459
      $ref = $self->copy_hash($ref);
1460
      $self->{nsup}->push_context();
1461
 
1462
      # Look for default namespace declaration first
1463
 
1464
      if(exists($ref->{xmlns})) {
1465
        $self->{nsup}->declare_prefix('', $ref->{xmlns});
1466
        $nsdecls .= qq( xmlns="$ref->{xmlns}");
1467
        delete($ref->{xmlns});
1468
      }
1469
      $default_ns_uri = $self->{nsup}->get_uri('');
1470
 
1471
 
1472
      # Then check all the other keys
1473
 
1474
      foreach my $qname (keys(%$ref)) {
1475
        my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname);
1476
        if($uri) {
1477
          if($uri eq $xmlns_ns) {
1478
            $self->{nsup}->declare_prefix($lname, $ref->{$qname});
1479
            $nsdecls .= qq( xmlns:$lname="$ref->{$qname}");
1480
            delete($ref->{$qname});
1481
          }
1482
        }
1483
      }
1484
 
1485
      # Translate any remaining Clarkian names
1486
 
1487
      foreach my $qname (keys(%$ref)) {
1488
        my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname);
1489
        if($uri) {
1490
          if($default_ns_uri  and  $uri eq $default_ns_uri) {
1491
            $ref->{$lname} = $ref->{$qname};
1492
            delete($ref->{$qname});
1493
          }
1494
          else {
1495
            my $prefix = $self->{nsup}->get_prefix($uri);
1496
            unless($prefix) {
1497
              # $self->{nsup}->declare_prefix(undef, $uri);
1498
              # $prefix = $self->{nsup}->get_prefix($uri);
1499
              $prefix = $self->{ns_prefix}++;
1500
              $self->{nsup}->declare_prefix($prefix, $uri);
1501
              $nsdecls .= qq( xmlns:$prefix="$uri");
1502
            }
1503
            $ref->{"$prefix:$lname"} = $ref->{$qname};
1504
            delete($ref->{$qname});
1505
          }
1506
        }
1507
      }
1508
    }
1509
 
1510
 
1511
    my @nested = ();
1512
    my $text_content = undef;
1513
    if($named) {
1514
      push @result, $indent, '<', $name, $nsdecls;
1515
    }
1516
 
1517
    if(keys %$ref) {
1518
      my $first_arg = 1;
1519
      foreach my $key ($self->sorted_keys($name, $ref)) {
1520
        my $value = $ref->{$key};
1521
        next if(substr($key, 0, 1) eq '-');
1522
        if(!defined($value)) {
1523
          next if $self->{opt}->{suppressempty};
1524
          unless(exists($self->{opt}->{suppressempty})
1525
             and !defined($self->{opt}->{suppressempty})
1526
          ) {
1527
            carp 'Use of uninitialized value' if($^W);
1528
          }
1529
          if($key eq $self->{opt}->{contentkey}) {
1530
            $text_content = '';
1531
          }
1532
          else {
1533
            $value = exists($self->{opt}->{suppressempty}) ? {} : '';
1534
          }
1535
        }
1536
 
1537
        if(!ref($value)  
1538
           and $self->{opt}->{valueattr}
1539
           and $self->{opt}->{valueattr}->{$key}
1540
        ) {
1541
          $value = { $self->{opt}->{valueattr}->{$key} => $value };
1542
        }
1543
 
1544
        if(ref($value)  or  $self->{opt}->{noattr}) {
1545
          push @nested,
1546
            $self->value_to_xml($value, $key, "$indent  ");
1547
        }
1548
        else {
1549
          $value = $self->escape_value($value) unless($self->{opt}->{noescape});
1550
          if($key eq $self->{opt}->{contentkey}) {
1551
            $text_content = $value;
1552
          }
1553
          else {
1554
            push @result, "\n$indent " . ' ' x length($name)
1555
              if($self->{opt}->{attrindent}  and  !$first_arg);
1556
            push @result, ' ', $key, '="', $value , '"';
1557
            $first_arg = 0;
1558
          }
1559
        }
1560
      }
1561
    }
1562
    else {
1563
      $text_content = '';
1564
    }
1565
 
1566
    if(@nested  or  defined($text_content)) {
1567
      if($named) {
1568
        push @result, ">";
1569
        if(defined($text_content)) {
1570
          push @result, $text_content;
1571
          $nested[0] =~ s/^\s+// if(@nested);
1572
        }
1573
        else {
1574
          push @result, $nl;
1575
        }
1576
        if(@nested) {
1577
          push @result, @nested, $indent;
1578
        }
1579
        push @result, '</', $name, ">", $nl;
1580
      }
1581
      else {
1582
        push @result, @nested;             # Special case if no root elements
1583
      }
1584
    }
1585
    else {
1586
      push @result, " />", $nl;
1587
    }
1588
    $self->{nsup}->pop_context() if($self->{nsup});
1589
  }
1590
 
1591
 
1592
  # Handle arrayrefs
1593
 
1594
  elsif(UNIVERSAL::isa($ref, 'ARRAY')) {
1595
    foreach $value (@$ref) {
1596
      next if !defined($value) and $self->{opt}->{suppressempty};
1597
      if(!ref($value)) {
1598
        push @result,
1599
             $indent, '<', $name, '>',
1600
             ($self->{opt}->{noescape} ? $value : $self->escape_value($value)),
1601
             '</', $name, ">$nl";
1602
      }
1603
      elsif(UNIVERSAL::isa($value, 'HASH')) {
1604
        push @result, $self->value_to_xml($value, $name, $indent);
1605
      }
1606
      else {
1607
        push @result,
1608
               $indent, '<', $name, ">$nl",
1609
               $self->value_to_xml($value, 'anon', "$indent  "),
1610
               $indent, '</', $name, ">$nl";
1611
      }
1612
    }
1613
  }
1614
 
1615
  else {
1616
    croak "Can't encode a value of type: " . ref($ref);
1617
  }
1618
 
1619
 
1620
  pop @{$self->{_ancestors}} if(ref($ref));
1621
 
1622
  return(join('', @result));
1623
}
1624
 
1625
 
1626
##############################################################################
1627
# Method: sorted_keys()
1628
#
1629
# Returns the keys of the referenced hash sorted into alphabetical order, but
1630
# with the 'key' key (as in KeyAttr) first, if there is one.
1631
#
1632
 
1633
sub sorted_keys {
1634
  my($self, $name, $ref) = @_;
1635
 
1636
  return keys %$ref if $self->{opt}->{nosort};
1637
 
1638
  my %hash = %$ref;
1639
  my $keyattr = $self->{opt}->{keyattr};
1640
 
1641
  my @key;
1642
 
1643
  if(ref $keyattr eq 'HASH') {
1644
    if(exists $keyattr->{$name} and exists $hash{$keyattr->{$name}->[0]}) {
1645
      push @key, $keyattr->{$name}->[0];
1646
      delete $hash{$keyattr->{$name}->[0]};
1647
    }
1648
  }
1649
  elsif(ref $keyattr eq 'ARRAY') {
1650
    foreach (@{$keyattr}) {
1651
      if(exists $hash{$_}) {
1652
        push @key, $_;
1653
        delete $hash{$_};
1654
        last;
1655
      }
1656
    }
1657
  }
1658
 
1659
  return(@key, sort keys %hash);
1660
}
1661
 
1662
##############################################################################
1663
# Method: escape_value()
1664
#
1665
# Helper routine for automatically escaping values for XMLout().
1666
# Expects a scalar data value.  Returns escaped version.
1667
#
1668
 
1669
sub escape_value {
1670
  my($self, $data) = @_;
1671
 
1672
  return '' unless(defined($data));
1673
 
1674
  $data =~ s/&/&amp;/sg;
1675
  $data =~ s/</&lt;/sg;
1676
  $data =~ s/>/&gt;/sg;
1677
  $data =~ s/"/&quot;/sg;
1678
 
1679
  my $level = $self->{opt}->{numericescape} or return $data;
1680
 
1681
  return $self->numeric_escape($data, $level);
1682
}
1683
 
1684
sub numeric_escape {
1685
  my($self, $data, $level) = @_;
1686
 
1687
  use utf8; # required for 5.6
1688
 
1689
  if($self->{opt}->{numericescape} eq '2') {
1690
    $data =~ s/([^\x00-\x7F])/'&#' . ord($1) . ';'/gse;
1691
  }
1692
  else {
1693
    $data =~ s/([^\x00-\xFF])/'&#' . ord($1) . ';'/gse;
1694
  }
1695
 
1696
  return $data;
1697
}
1698
 
1699
 
1700
##############################################################################
1701
# Method: hash_to_array()
1702
#
1703
# Helper routine for value_to_xml().
1704
# Attempts to 'unfold' a hash of hashes into an array of hashes.  Returns a
1705
# reference to the array on success or the original hash if unfolding is
1706
# not possible.
1707
#
1708
 
1709
sub hash_to_array {
1710
  my $self    = shift;
1711
  my $parent  = shift;
1712
  my $hashref = shift;
1713
 
1714
  my $arrayref = [];
1715
 
1716
  my($key, $value);
1717
 
1718
  my @keys = $self->{opt}->{nosort} ? keys %$hashref : sort keys %$hashref;
1719
  foreach $key (@keys) {
1720
    $value = $hashref->{$key};
1721
    return($hashref) unless(UNIVERSAL::isa($value, 'HASH'));
1722
 
1723
    if(ref($self->{opt}->{keyattr}) eq 'HASH') {
1724
      return($hashref) unless(defined($self->{opt}->{keyattr}->{$parent}));
1725
      push @$arrayref, $self->copy_hash(
1726
        $value, $self->{opt}->{keyattr}->{$parent}->[0] => $key
1727
      );
1728
    }
1729
    else {
1730
      push(@$arrayref, { $self->{opt}->{keyattr}->[0] => $key, %$value });
1731
    }
1732
  }
1733
 
1734
  return($arrayref);
1735
}
1736
 
1737
 
1738
##############################################################################
1739
# Method: copy_hash()
1740
#
1741
# Helper routine for hash_to_array().  When unfolding a hash of hashes into
1742
# an array of hashes, we need to copy the key from the outer hash into the
1743
# inner hash.  This routine makes a copy of the original hash so we don't
1744
# destroy the original data structure.  You might wish to override this
1745
# method if you're using tied hashes and don't want them to get untied.
1746
#
1747
 
1748
sub copy_hash {
1749
  my($self, $orig, @extra) = @_;
1750
 
1751
  return { @extra, %$orig };
1752
}
1753
 
1754
##############################################################################
1755
# Methods required for building trees from SAX events
1756
##############################################################################
1757
 
1758
sub start_document {
1759
  my $self = shift;
1760
 
1761
  $self->handle_options('in') unless($self->{opt});
1762
 
1763
  $self->{lists} = [];
1764
  $self->{curlist} = $self->{tree} = [];
1765
}
1766
 
1767
 
1768
sub start_element {
1769
  my $self    = shift;
1770
  my $element = shift;
1771
 
1772
  my $name = $element->{Name};
1773
  if($self->{opt}->{nsexpand}) {
1774
    $name = $element->{LocalName} || '';
1775
    if($element->{NamespaceURI}) {
1776
      $name = '{' . $element->{NamespaceURI} . '}' . $name;
1777
    }
1778
  }
1779
  my $attributes = {};
1780
  if($element->{Attributes}) {  # Might be undef
1781
    foreach my $attr (values %{$element->{Attributes}}) {
1782
      if($self->{opt}->{nsexpand}) {
1783
        my $name = $attr->{LocalName} || '';
1784
        if($attr->{NamespaceURI}) {
1785
          $name = '{' . $attr->{NamespaceURI} . '}' . $name
1786
        }
1787
        $name = 'xmlns' if($name eq $bad_def_ns_jcn);
1788
        $attributes->{$name} = $attr->{Value};
1789
      }
1790
      else {
1791
        $attributes->{$attr->{Name}} = $attr->{Value};
1792
      }
1793
    }
1794
  }
1795
  my $newlist = [ $attributes ];
1796
  push @{ $self->{lists} }, $self->{curlist};
1797
  push @{ $self->{curlist} }, $name => $newlist;
1798
  $self->{curlist} = $newlist;
1799
}
1800
 
1801
 
1802
sub characters {
1803
  my $self  = shift;
1804
  my $chars = shift;
1805
 
1806
  my $text  = $chars->{Data};
1807
  my $clist = $self->{curlist};
1808
  my $pos = $#$clist;
1809
 
1810
  if ($pos > 0 and $clist->[$pos - 1] eq '0') {
1811
    $clist->[$pos] .= $text;
1812
  }
1813
  else {
1814
    push @$clist, 0 => $text;
1815
  }
1816
}
1817
 
1818
 
1819
sub end_element {
1820
  my $self    = shift;
1821
 
1822
  $self->{curlist} = pop @{ $self->{lists} };
1823
}
1824
 
1825
 
1826
sub end_document {
1827
  my $self = shift;
1828
 
1829
  delete($self->{curlist});
1830
  delete($self->{lists});
1831
 
1832
  my $tree = $self->{tree};
1833
  delete($self->{tree});
1834
 
1835
 
1836
  # Return tree as-is to XMLin()
1837
 
1838
  return($tree) if($self->{nocollapse});
1839
 
1840
 
1841
  # Or collapse it before returning it to SAX parser class
1842
 
1843
  if($self->{opt}->{keeproot}) {
1844
    $tree = $self->collapse({}, @$tree);
1845
  }
1846
  else {
1847
    $tree = $self->collapse(@{$tree->[1]});
1848
  }
1849
 
1850
  if($self->{opt}->{datahandler}) {
1851
    return($self->{opt}->{datahandler}->($self, $tree));
1852
  }
1853
 
1854
  return($tree);
1855
}
1856
 
1857
*xml_in  = \&XMLin;
1858
*xml_out = \&XMLout;
1859
 
1860
1;
1861
 
1862
__END__
1863
 
1864
=head1 QUICK START
1865
 
1866
Say you have a script called B<foo> and a file of configuration options
1867
called B<foo.xml> containing this:
1868
 
1869
  <config logdir="/var/log/foo/" debugfile="/tmp/foo.debug">
1870
    <server name="sahara" osname="solaris" osversion="2.6">
1871
      <address>10.0.0.101</address>
1872
      <address>10.0.1.101</address>
1873
    </server>
1874
    <server name="gobi" osname="irix" osversion="6.5">
1875
      <address>10.0.0.102</address>
1876
    </server>
1877
    <server name="kalahari" osname="linux" osversion="2.0.34">
1878
      <address>10.0.0.103</address>
1879
      <address>10.0.1.103</address>
1880
    </server>
1881
  </config>
1882
 
1883
The following lines of code in B<foo>:
1884
 
1885
  use XML::Simple;
1886
 
1887
  my $config = XMLin();
1888
 
1889
will 'slurp' the configuration options into the hashref $config (because no
1890
arguments are passed to C<XMLin()> the name and location of the XML file will
1891
be inferred from name and location of the script).  You can dump out the
1892
contents of the hashref using Data::Dumper:
1893
 
1894
  use Data::Dumper;
1895
 
1896
  print Dumper($config);
1897
 
1898
which will produce something like this (formatting has been adjusted for
1899
brevity):
1900
 
1901
  {
1902
      'logdir'        => '/var/log/foo/',
1903
      'debugfile'     => '/tmp/foo.debug',
1904
      'server'        => {
1905
          'sahara'        => {
1906
              'osversion'     => '2.6',
1907
              'osname'        => 'solaris',
1908
              'address'       => [ '10.0.0.101', '10.0.1.101' ]
1909
          },
1910
          'gobi'          => {
1911
              'osversion'     => '6.5',
1912
              'osname'        => 'irix',
1913
              'address'       => '10.0.0.102'
1914
          },
1915
          'kalahari'      => {
1916
              'osversion'     => '2.0.34',
1917
              'osname'        => 'linux',
1918
              'address'       => [ '10.0.0.103', '10.0.1.103' ]
1919
          }
1920
      }
1921
  }
1922
 
1923
Your script could then access the name of the log directory like this:
1924
 
1925
  print $config->{logdir};
1926
 
1927
similarly, the second address on the server 'kalahari' could be referenced as:
1928
 
1929
  print $config->{server}->{kalahari}->{address}->[1];
1930
 
1931
What could be simpler?  (Rhetorical).
1932
 
1933
For simple requirements, that's really all there is to it.  If you want to
1934
store your XML in a different directory or file, or pass it in as a string or
1935
even pass it in via some derivative of an IO::Handle, you'll need to check out
1936
L<"OPTIONS">.  If you want to turn off or tweak the array folding feature (that
1937
neat little transformation that produced $config->{server}) you'll find options
1938
for that as well.
1939
 
1940
If you want to generate XML (for example to write a modified version of
1941
$config back out as XML), check out C<XMLout()>.
1942
 
1943
If your needs are not so simple, this may not be the module for you.  In that
1944
case, you might want to read L<"WHERE TO FROM HERE?">.
1945
 
1946
=head1 DESCRIPTION
1947
 
1948
The XML::Simple module provides a simple API layer on top of an underlying XML
1949
parsing module (either XML::Parser or one of the SAX2 parser modules).  Two
1950
functions are exported: C<XMLin()> and C<XMLout()>.  Note: you can explicity
1951
request the lower case versions of the function names: C<xml_in()> and
1952
C<xml_out()>.
1953
 
1954
The simplest approach is to call these two functions directly, but an
1955
optional object oriented interface (see L<"OPTIONAL OO INTERFACE"> below)
1956
allows them to be called as methods of an B<XML::Simple> object.  The object
1957
interface can also be used at either end of a SAX pipeline.
1958
 
1959
=head2 XMLin()
1960
 
1961
Parses XML formatted data and returns a reference to a data structure which
1962
contains the same information in a more readily accessible form.  (Skip
1963
down to L<"EXAMPLES"> below, for more sample code).
1964
 
1965
C<XMLin()> accepts an optional XML specifier followed by zero or more 'name =>
1966
value' option pairs.  The XML specifier can be one of the following:
1967
 
1968
=over 4
1969
 
1970
=item A filename
1971
 
1972
If the filename contains no directory components C<XMLin()> will look for the
1973
file in each directory in the SearchPath (see L<"OPTIONS"> below) or in the
1974
current directory if the SearchPath option is not defined.  eg:
1975
 
1976
  $ref = XMLin('/etc/params.xml');
1977
 
1978
Note, the filename '-' can be used to parse from STDIN.
1979
 
1980
=item undef
1981
 
1982
If there is no XML specifier, C<XMLin()> will check the script directory and
1983
each of the SearchPath directories for a file with the same name as the script
1984
but with the extension '.xml'.  Note: if you wish to specify options, you
1985
must specify the value 'undef'.  eg:
1986
 
1987
  $ref = XMLin(undef, ForceArray => 1);
1988
 
1989
=item A string of XML
1990
 
1991
A string containing XML (recognised by the presence of '<' and '>' characters)
1992
will be parsed directly.  eg:
1993
 
1994
  $ref = XMLin('<opt username="bob" password="flurp" />');
1995
 
1996
=item An IO::Handle object
1997
 
1998
An IO::Handle object will be read to EOF and its contents parsed. eg:
1999
 
2000
  $fh = IO::File->new('/etc/params.xml');
2001
  $ref = XMLin($fh);
2002
 
2003
=back
2004
 
2005
=head2 XMLout()
2006
 
2007
Takes a data structure (generally a hashref) and returns an XML encoding of
2008
that structure.  If the resulting XML is parsed using C<XMLin()>, it should
2009
return a data structure equivalent to the original (see caveats below).
2010
 
2011
The C<XMLout()> function can also be used to output the XML as SAX events
2012
see the C<Handler> option and L<"SAX SUPPORT"> for more details).
2013
 
2014
When translating hashes to XML, hash keys which have a leading '-' will be
2015
silently skipped.  This is the approved method for marking elements of a
2016
data structure which should be ignored by C<XMLout>.  (Note: If these items
2017
were not skipped the key names would be emitted as element or attribute names
2018
with a leading '-' which would not be valid XML).
2019
 
2020
=head2 Caveats
2021
 
2022
Some care is required in creating data structures which will be passed to
2023
C<XMLout()>.  Hash keys from the data structure will be encoded as either XML
2024
element names or attribute names.  Therefore, you should use hash key names
2025
which conform to the relatively strict XML naming rules:
2026
 
2027
Names in XML must begin with a letter.  The remaining characters may be
2028
letters, digits, hyphens (-), underscores (_) or full stops (.).  It is also
2029
allowable to include one colon (:) in an element name but this should only be
2030
used when working with namespaces (B<XML::Simple> can only usefully work with
2031
namespaces when teamed with a SAX Parser).
2032
 
2033
You can use other punctuation characters in hash values (just not in hash
2034
keys) however B<XML::Simple> does not support dumping binary data.
2035
 
2036
If you break these rules, the current implementation of C<XMLout()> will
2037
simply emit non-compliant XML which will be rejected if you try to read it
2038
back in.  (A later version of B<XML::Simple> might take a more proactive
2039
approach).
2040
 
2041
Note also that although you can nest hashes and arrays to arbitrary levels,
2042
circular data structures are not supported and will cause C<XMLout()> to die.
2043
 
2044
If you wish to 'round-trip' arbitrary data structures from Perl to XML and back
2045
to Perl, then you should probably disable array folding (using the KeyAttr
2046
option) both with C<XMLout()> and with C<XMLin()>.  If you still don't get the
2047
expected results, you may prefer to use L<XML::Dumper> which is designed for
2048
exactly that purpose.
2049
 
2050
Refer to L<"WHERE TO FROM HERE?"> if C<XMLout()> is too simple for your needs.
2051
 
2052
 
2053
=head1 OPTIONS
2054
 
2055
B<XML::Simple> supports a number of options (in fact as each release of
2056
B<XML::Simple> adds more options, the module's claim to the name 'Simple'
2057
becomes increasingly tenuous).  If you find yourself repeatedly having to
2058
specify the same options, you might like to investigate L<"OPTIONAL OO
2059
INTERFACE"> below.
2060
 
2061
If you can't be bothered reading the documentation, refer to
2062
L<"STRICT MODE"> to automatically catch common mistakes.
2063
 
2064
Because there are so many options, it's hard for new users to know which ones
2065
are important, so here are the two you really need to know about:
2066
 
2067
=over 4
2068
 
2069
=item *
2070
 
2071
check out C<ForceArray> because you'll almost certainly want to turn it on
2072
 
2073
=item *
2074
 
2075
make sure you know what the C<KeyAttr> option does and what its default value is
2076
because it may surprise you otherwise (note in particular that 'KeyAttr'
2077
affects both C<XMLin> and C<XMLout>)
2078
 
2079
=back
2080
 
2081
The option name headings below have a trailing 'comment' - a hash followed by
2082
two pieces of metadata:
2083
 
2084
=over 4
2085
 
2086
=item *
2087
 
2088
Options are marked with 'I<in>' if they are recognised by C<XMLin()> and
2089
'I<out>' if they are recognised by C<XMLout()>.
2090
 
2091
=item *
2092
 
2093
Each option is also flagged to indicate whether it is:
2094
 
2095
 'important'   - don't use the module until you understand this one
2096
 'handy'       - you can skip this on the first time through
2097
 'advanced'    - you can skip this on the second time through
2098
 'SAX only'    - don't worry about this unless you're using SAX (or
2099
                 alternatively if you need this, you also need SAX)
2100
 'seldom used' - you'll probably never use this unless you were the
2101
                 person that requested the feature
2102
 
2103
=back
2104
 
2105
The options are listed alphabetically:
2106
 
2107
Note: option names are no longer case sensitive so you can use the mixed case
2108
versions shown here; all lower case as required by versions 2.03 and earlier;
2109
or you can add underscores between the words (eg: key_attr).
2110
 
2111
 
2112
=head2 AttrIndent => 1 I<# out - handy>
2113
 
2114
When you are using C<XMLout()>, enable this option to have attributes printed
2115
one-per-line with sensible indentation rather than all on one line.
2116
 
2117
=head2 Cache => [ cache schemes ] I<# in - advanced>
2118
 
2119
Because loading the B<XML::Parser> module and parsing an XML file can consume a
2120
significant number of CPU cycles, it is often desirable to cache the output of
2121
C<XMLin()> for later reuse.
2122
 
2123
When parsing from a named file, B<XML::Simple> supports a number of caching
2124
schemes.  The 'Cache' option may be used to specify one or more schemes (using
2125
an anonymous array).  Each scheme will be tried in turn in the hope of finding
2126
a cached pre-parsed representation of the XML file.  If no cached copy is
2127
found, the file will be parsed and the first cache scheme in the list will be
2128
used to save a copy of the results.  The following cache schemes have been
2129
implemented:
2130
 
2131
=over 4
2132
 
2133
=item storable
2134
 
2135
Utilises B<Storable.pm> to read/write a cache file with the same name as the
2136
XML file but with the extension .stor
2137
 
2138
=item memshare
2139
 
2140
When a file is first parsed, a copy of the resulting data structure is retained
2141
in memory in the B<XML::Simple> module's namespace.  Subsequent calls to parse
2142
the same file will return a reference to this structure.  This cached version
2143
will persist only for the life of the Perl interpreter (which in the case of
2144
mod_perl for example, may be some significant time).
2145
 
2146
Because each caller receives a reference to the same data structure, a change
2147
made by one caller will be visible to all.  For this reason, the reference
2148
returned should be treated as read-only.
2149
 
2150
=item memcopy
2151
 
2152
This scheme works identically to 'memshare' (above) except that each caller
2153
receives a reference to a new data structure which is a copy of the cached
2154
version.  Copying the data structure will add a little processing overhead,
2155
therefore this scheme should only be used where the caller intends to modify
2156
the data structure (or wishes to protect itself from others who might).  This
2157
scheme uses B<Storable.pm> to perform the copy.
2158
 
2159
=back
2160
 
2161
Warning! The memory-based caching schemes compare the timestamp on the file to
2162
the time when it was last parsed.  If the file is stored on an NFS filesystem
2163
(or other network share) and the clock on the file server is not exactly
2164
synchronised with the clock where your script is run, updates to the source XML
2165
file may appear to be ignored.
2166
 
2167
=head2 ContentKey => 'keyname' I<# in+out - seldom used>
2168
 
2169
When text content is parsed to a hash value, this option let's you specify a
2170
name for the hash key to override the default 'content'.  So for example:
2171
 
2172
  XMLin('<opt one="1">Text</opt>', ContentKey => 'text')
2173
 
2174
will parse to:
2175
 
2176
  { 'one' => 1, 'text' => 'Text' }
2177
 
2178
instead of:
2179
 
2180
  { 'one' => 1, 'content' => 'Text' }
2181
 
2182
C<XMLout()> will also honour the value of this option when converting a hashref
2183
to XML.
2184
 
2185
You can also prefix your selected key name with a '-' character to have
2186
C<XMLin()> try a little harder to eliminate unnecessary 'content' keys after
2187
array folding.  For example:
2188
 
2189
  XMLin(
2190
    '<opt><item name="one">First</item><item name="two">Second</item></opt>',
2191
    KeyAttr => {item => 'name'},
2192
    ForceArray => [ 'item' ],
2193
    ContentKey => '-content'
2194
  )
2195
 
2196
will parse to:
2197
 
2198
  {
2199
    'item' => {
2200
      'one' =>  'First'
2201
      'two' =>  'Second'
2202
    }
2203
  }
2204
 
2205
rather than this (without the '-'):
2206
 
2207
  {
2208
    'item' => {
2209
      'one' => { 'content' => 'First' }
2210
      'two' => { 'content' => 'Second' }
2211
    }
2212
  }
2213
 
2214
=head2 DataHandler => code_ref I<# in - SAX only>
2215
 
2216
When you use an B<XML::Simple> object as a SAX handler, it will return a
2217
'simple tree' data structure in the same format as C<XMLin()> would return.  If
2218
this option is set (to a subroutine reference), then when the tree is built the
2219
subroutine will be called and passed two arguments: a reference to the
2220
B<XML::Simple> object and a reference to the data tree.  The return value from
2221
the subroutine will be returned to the SAX driver.  (See L<"SAX SUPPORT"> for
2222
more details).
2223
 
2224
=head2 ForceArray => 1 I<# in - important>
2225
 
2226
This option should be set to '1' to force nested elements to be represented
2227
as arrays even when there is only one.  Eg, with ForceArray enabled, this
2228
XML:
2229
 
2230
    <opt>
2231
      <name>value</name>
2232
    </opt>
2233
 
2234
would parse to this:
2235
 
2236
    {
2237
      'name' => [
2238
                  'value'
2239
                ]
2240
    }
2241
 
2242
instead of this (the default):
2243
 
2244
    {
2245
      'name' => 'value'
2246
    }
2247
 
2248
This option is especially useful if the data structure is likely to be written
2249
back out as XML and the default behaviour of rolling single nested elements up
2250
into attributes is not desirable.
2251
 
2252
If you are using the array folding feature, you should almost certainly enable
2253
this option.  If you do not, single nested elements will not be parsed to
2254
arrays and therefore will not be candidates for folding to a hash.  (Given that
2255
the default value of 'KeyAttr' enables array folding, the default value of this
2256
option should probably also have been enabled too - sorry).
2257
 
2258
=head2 ForceArray => [ names ] I<# in - important>
2259
 
2260
This alternative (and preferred) form of the 'ForceArray' option allows you to
2261
specify a list of element names which should always be forced into an array
2262
representation, rather than the 'all or nothing' approach above.
2263
 
2264
It is also possible (since version 2.05) to include compiled regular
2265
expressions in the list - any element names which match the pattern will be
2266
forced to arrays.  If the list contains only a single regex, then it is not
2267
necessary to enclose it in an arrayref.  Eg:
2268
 
2269
  ForceArray => qr/_list$/
2270
 
2271
=head2 ForceContent => 1 I<# in - seldom used>
2272
 
2273
When C<XMLin()> parses elements which have text content as well as attributes,
2274
the text content must be represented as a hash value rather than a simple
2275
scalar.  This option allows you to force text content to always parse to
2276
a hash value even when there are no attributes.  So for example:
2277
 
2278
  XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1)
2279
 
2280
will parse to:
2281
 
2282
  {
2283
    'x' => {           'content' => 'text1' },
2284
    'y' => { 'a' => 2, 'content' => 'text2' }
2285
  }
2286
 
2287
instead of:
2288
 
2289
  {
2290
    'x' => 'text1',
2291
    'y' => { 'a' => 2, 'content' => 'text2' }
2292
  }
2293
 
2294
=head2 GroupTags => { grouping tag => grouped tag } I<# in+out - handy>
2295
 
2296
You can use this option to eliminate extra levels of indirection in your Perl
2297
data structure.  For example this XML:
2298
 
2299
  <opt>
2300
   <searchpath>
2301
     <dir>/usr/bin</dir>
2302
     <dir>/usr/local/bin</dir>
2303
     <dir>/usr/X11/bin</dir>
2304
   </searchpath>
2305
 </opt>
2306
 
2307
Would normally be read into a structure like this:
2308
 
2309
  {
2310
    searchpath => {
2311
                    dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ]
2312
                  }
2313
  }
2314
 
2315
But when read in with the appropriate value for 'GroupTags':
2316
 
2317
  my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' });
2318
 
2319
It will return this simpler structure:
2320
 
2321
  {
2322
    searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ]
2323
  }
2324
 
2325
The grouping element (C<< <searchpath> >> in the example) must not contain any
2326
attributes or elements other than the grouped element.
2327
 
2328
You can specify multiple 'grouping element' to 'grouped element' mappings in
2329
the same hashref.  If this option is combined with C<KeyAttr>, the array
2330
folding will occur first and then the grouped element names will be eliminated.
2331
 
2332
C<XMLout> will also use the grouptag mappings to re-introduce the tags around
2333
the grouped elements.  Beware though that this will occur in all places that
2334
the 'grouping tag' name occurs - you probably don't want to use the same name
2335
for elements as well as attributes.
2336
 
2337
=head2 Handler => object_ref I<# out - SAX only>
2338
 
2339
Use the 'Handler' option to have C<XMLout()> generate SAX events rather than
2340
returning a string of XML.  For more details see L<"SAX SUPPORT"> below.
2341
 
2342
Note: the current implementation of this option generates a string of XML
2343
and uses a SAX parser to translate it into SAX events.  The normal encoding
2344
rules apply here - your data must be UTF8 encoded unless you specify an
2345
alternative encoding via the 'XMLDecl' option; and by the time the data reaches
2346
the handler object, it will be in UTF8 form regardless of the encoding you
2347
supply.  A future implementation of this option may generate the events
2348
directly.
2349
 
2350
=head2 KeepRoot => 1 I<# in+out - handy>
2351
 
2352
In its attempt to return a data structure free of superfluous detail and
2353
unnecessary levels of indirection, C<XMLin()> normally discards the root
2354
element name.  Setting the 'KeepRoot' option to '1' will cause the root element
2355
name to be retained.  So after executing this code:
2356
 
2357
  $config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1)
2358
 
2359
You'll be able to reference the tempdir as
2360
C<$config-E<gt>{config}-E<gt>{tempdir}> instead of the default
2361
C<$config-E<gt>{tempdir}>.
2362
 
2363
Similarly, setting the 'KeepRoot' option to '1' will tell C<XMLout()> that the
2364
data structure already contains a root element name and it is not necessary to
2365
add another.
2366
 
2367
=head2 KeyAttr => [ list ] I<# in+out - important>
2368
 
2369
This option controls the 'array folding' feature which translates nested
2370
elements from an array to a hash.  It also controls the 'unfolding' of hashes
2371
to arrays.
2372
 
2373
For example, this XML:
2374
 
2375
    <opt>
2376
      <user login="grep" fullname="Gary R Epstein" />
2377
      <user login="stty" fullname="Simon T Tyson" />
2378
    </opt>
2379
 
2380
would, by default, parse to this:
2381
 
2382
    {
2383
      'user' => [
2384
                  {
2385
                    'login' => 'grep',
2386
                    'fullname' => 'Gary R Epstein'
2387
                  },
2388
                  {
2389
                    'login' => 'stty',
2390
                    'fullname' => 'Simon T Tyson'
2391
                  }
2392
                ]
2393
    }
2394
 
2395
If the option 'KeyAttr => "login"' were used to specify that the 'login'
2396
attribute is a key, the same XML would parse to:
2397
 
2398
    {
2399
      'user' => {
2400
                  'stty' => {
2401
                              'fullname' => 'Simon T Tyson'
2402
                            },
2403
                  'grep' => {
2404
                              'fullname' => 'Gary R Epstein'
2405
                            }
2406
                }
2407
    }
2408
 
2409
The key attribute names should be supplied in an arrayref if there is more
2410
than one.  C<XMLin()> will attempt to match attribute names in the order
2411
supplied.  C<XMLout()> will use the first attribute name supplied when
2412
'unfolding' a hash into an array.
2413
 
2414
Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id'].  If you do
2415
not want folding on input or unfolding on output you must setting this option
2416
to an empty list to disable the feature.
2417
 
2418
Note 2: If you wish to use this option, you should also enable the
2419
C<ForceArray> option.  Without 'ForceArray', a single nested element will be
2420
rolled up into a scalar rather than an array and therefore will not be folded
2421
(since only arrays get folded).
2422
 
2423
=head2 KeyAttr => { list } I<# in+out - important>
2424
 
2425
This alternative (and preferred) method of specifiying the key attributes
2426
allows more fine grained control over which elements are folded and on which
2427
attributes.  For example the option 'KeyAttr => { package => 'id' } will cause
2428
any package elements to be folded on the 'id' attribute.  No other elements
2429
which have an 'id' attribute will be folded at all.
2430
 
2431
Note: C<XMLin()> will generate a warning (or a fatal error in L<"STRICT MODE">)
2432
if this syntax is used and an element which does not have the specified key
2433
attribute is encountered (eg: a 'package' element without an 'id' attribute, to
2434
use the example above).  Warnings will only be generated if B<-w> is in force.
2435
 
2436
Two further variations are made possible by prefixing a '+' or a '-' character
2437
to the attribute name:
2438
 
2439
The option 'KeyAttr => { user => "+login" }' will cause this XML:
2440
 
2441
    <opt>
2442
      <user login="grep" fullname="Gary R Epstein" />
2443
      <user login="stty" fullname="Simon T Tyson" />
2444
    </opt>
2445
 
2446
to parse to this data structure:
2447
 
2448
    {
2449
      'user' => {
2450
                  'stty' => {
2451
                              'fullname' => 'Simon T Tyson',
2452
                              'login'    => 'stty'
2453
                            },
2454
                  'grep' => {
2455
                              'fullname' => 'Gary R Epstein',
2456
                              'login'    => 'grep'
2457
                            }
2458
                }
2459
    }
2460
 
2461
The '+' indicates that the value of the key attribute should be copied rather
2462
than moved to the folded hash key.
2463
 
2464
A '-' prefix would produce this result:
2465
 
2466
    {
2467
      'user' => {
2468
                  'stty' => {
2469
                              'fullname' => 'Simon T Tyson',
2470
                              '-login'    => 'stty'
2471
                            },
2472
                  'grep' => {
2473
                              'fullname' => 'Gary R Epstein',
2474
                              '-login'    => 'grep'
2475
                            }
2476
                }
2477
    }
2478
 
2479
As described earlier, C<XMLout> will ignore hash keys starting with a '-'.
2480
 
2481
=head2 NoAttr => 1 I<# in+out - handy>
2482
 
2483
When used with C<XMLout()>, the generated XML will contain no attributes.
2484
All hash key/values will be represented as nested elements instead.
2485
 
2486
When used with C<XMLin()>, any attributes in the XML will be ignored.
2487
 
2488
=head2 NoEscape => 1 I<# out - seldom used>
2489
 
2490
By default, C<XMLout()> will translate the characters 'E<lt>', 'E<gt>', '&' and
2491
'"' to '&lt;', '&gt;', '&amp;' and '&quot' respectively.  Use this option to
2492
suppress escaping (presumably because you've already escaped the data in some
2493
more sophisticated manner).
2494
 
2495
=head2 NoIndent => 1 I<# out - seldom used>
2496
 
2497
Set this option to 1 to disable C<XMLout()>'s default 'pretty printing' mode.
2498
With this option enabled, the XML output will all be on one line (unless there
2499
are newlines in the data) - this may be easier for downstream processing.
2500
 
2501
=head2 NoSort => 1 I<# out - seldom used>
2502
 
2503
Newer versions of XML::Simple sort elements and attributes alphabetically (*),
2504
by default.  Enable this option to suppress the sorting - possibly for
2505
backwards compatibility.
2506
 
2507
* Actually, sorting is alphabetical but 'key' attribute or element names (as in
2508
'KeyAttr') sort first.  Also, when a hash of hashes is 'unfolded', the elements
2509
are sorted alphabetically by the value of the key field.
2510
 
2511
=head2 NormaliseSpace => 0 | 1 | 2 I<# in - handy>
2512
 
2513
This option controls how whitespace in text content is handled.  Recognised
2514
values for the option are:
2515
 
2516
=over 4
2517
 
2518
=item *
2519
 
2520
 
2521
normalisation of whitespace in attribute values which is mandated by the XML
2522
recommendation)
2523
 
2524
=item *
2525
 
2526
1 = whitespace is normalised in any value used as a hash key (normalising means
2527
removing leading and trailing whitespace and collapsing sequences of whitespace
2528
characters to a single space)
2529
 
2530
=item *
2531
 
2532
2 = whitespace is normalised in all text content
2533
 
2534
=back
2535
 
2536
Note: you can spell this option with a 'z' if that is more natural for you.
2537
 
2538
=head2 NSExpand => 1 I<# in+out handy - SAX only>
2539
 
2540
This option controls namespace expansion - the translation of element and
2541
attribute names of the form 'prefix:name' to '{uri}name'.  For example the
2542
element name 'xsl:template' might be expanded to:
2543
'{http://www.w3.org/1999/XSL/Transform}template'.
2544
 
2545
By default, C<XMLin()> will return element names and attribute names exactly as
2546
they appear in the XML.  Setting this option to 1 will cause all element and
2547
attribute names to be expanded to include their namespace prefix.
2548
 
2549
I<Note: You must be using a SAX parser for this option to work (ie: it does not
2550
work with XML::Parser)>.
2551
 
2552
This option also controls whether C<XMLout()> performs the reverse translation
2553
from '{uri}name' back to 'prefix:name'.  The default is no translation.  If
2554
your data contains expanded names, you should set this option to 1 otherwise
2555
C<XMLout> will emit XML which is not well formed.
2556
 
2557
I<Note: You must have the XML::NamespaceSupport module installed if you want
2558
C<XMLout()> to translate URIs back to prefixes>.
2559
 
2560
=head2 NumericEscape => 0 | 1 | 2 I<# out - handy>
2561
 
2562
Use this option to have 'high' (non-ASCII) characters in your Perl data
2563
structure converted to numeric entities (eg: &#8364;) in the XML output.  Three
2564
levels are possible:
2565
 
2566
 
2567
 
2568
1 - only characters above 0xFF are escaped (ie: characters in the 0x80-FF range are not escaped), possibly useful with ISO8859-1 output
2569
 
2570
2 - all characters above 0x7F are escaped (good for plain ASCII output)
2571
 
2572
=head2 OutputFile => <file specifier> I<# out - handy>
2573
 
2574
The default behaviour of C<XMLout()> is to return the XML as a string.  If you
2575
wish to write the XML to a file, simply supply the filename using the
2576
'OutputFile' option.  
2577
 
2578
This option also accepts an IO handle object - especially useful in Perl 5.8.0
2579
and later for output using an encoding other than UTF-8, eg:
2580
 
2581
  open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!";
2582
  XMLout($ref, OutputFile => $fh);
2583
 
2584
Note, XML::Simple does not require that the object you pass in to the
2585
OutputFile option inherits from L<IO::Handle> - it simply assumes the object
2586
supports a C<print> method.
2587
 
2588
=head2 ParserOpts => [ XML::Parser Options ] I<# in - don't use this>
2589
 
2590
I<Note: This option is now officially deprecated.  If you find it useful, email
2591
the author with an example of what you use it for.  Do not use this option to
2592
set the ProtocolEncoding, that's just plain wrong - fix the XML>.
2593
 
2594
This option allows you to pass parameters to the constructor of the underlying
2595
XML::Parser object (which of course assumes you're not using SAX).
2596
 
2597
=head2 RootName => 'string' I<# out - handy>
2598
 
2599
By default, when C<XMLout()> generates XML, the root element will be named
2600
'opt'.  This option allows you to specify an alternative name.
2601
 
2602
Specifying either undef or the empty string for the RootName option will
2603
produce XML with no root elements.  In most cases the resulting XML fragment
2604
will not be 'well formed' and therefore could not be read back in by C<XMLin()>.
2605
Nevertheless, the option has been found to be useful in certain circumstances.
2606
 
2607
=head2 SearchPath => [ list ] I<# in - handy>
2608
 
2609
If you pass C<XMLin()> a filename, but the filename include no directory
2610
component, you can use this option to specify which directories should be
2611
searched to locate the file.  You might use this option to search first in the
2612
user's home directory, then in a global directory such as /etc.
2613
 
2614
If a filename is provided to C<XMLin()> but SearchPath is not defined, the
2615
file is assumed to be in the current directory.
2616
 
2617
If the first parameter to C<XMLin()> is undefined, the default SearchPath
2618
will contain only the directory in which the script itself is located.
2619
Otherwise the default SearchPath will be empty.  
2620
 
2621
=head2 SuppressEmpty => 1 | '' | undef I<# in+out - handy>
2622
 
2623
This option controls what C<XMLin()> should do with empty elements (no
2624
attributes and no content).  The default behaviour is to represent them as
2625
empty hashes.  Setting this option to a true value (eg: 1) will cause empty
2626
elements to be skipped altogether.  Setting the option to 'undef' or the empty
2627
string will cause empty elements to be represented as the undefined value or
2628
the empty string respectively.  The latter two alternatives are a little
2629
easier to test for in your code than a hash with no keys.
2630
 
2631
The option also controls what C<XMLout()> does with undefined values.  Setting
2632
the option to undef causes undefined values to be output as empty elements
2633
(rather than empty attributes), it also suppresses the generation of warnings
2634
about undefined values.  Setting the option to a true value (eg: 1) causes
2635
undefined values to be skipped altogether on output.
2636
 
2637
=head2 ValueAttr => [ names ] I<# in - handy>
2638
 
2639
Use this option to deal elements which always have a single attribute and no
2640
content.  Eg:
2641
 
2642
  <opt>
2643
    <colour value="red" />
2644
    <size   value="XXL" />
2645
  </opt>
2646
 
2647
Setting C<< ValueAttr => [ 'value' ] >> will cause the above XML to parse to:
2648
 
2649
  {
2650
    colour => 'red',
2651
    size   => 'XXL'
2652
  }
2653
 
2654
instead of this (the default):
2655
 
2656
  {
2657
    colour => { value => 'red' },
2658
    size   => { value => 'XXL' }
2659
  }
2660
 
2661
Note: This form of the ValueAttr option is not compatible with C<XMLout()> -
2662
since the attribute name is discarded at parse time, the original XML cannot be
2663
reconstructed.
2664
 
2665
=head2 ValueAttr => { element => attribute, ... } I<# in+out - handy>
2666
 
2667
This (preferred) form of the ValueAttr option requires you to specify both
2668
the element and the attribute names.  This is not only safer, it also allows
2669
the original XML to be reconstructed by C<XMLout()>.
2670
 
2671
Note: You probably don't want to use this option and the NoAttr option at the
2672
same time.
2673
 
2674
=head2 Variables => { name => value } I<# in - handy>
2675
 
2676
This option allows variables in the XML to be expanded when the file is read.
2677
(there is no facility for putting the variable names back if you regenerate
2678
XML using C<XMLout>).
2679
 
2680
A 'variable' is any text of the form C<${name}> which occurs in an attribute
2681
value or in the text content of an element.  If 'name' matches a key in the
2682
supplied hashref, C<${name}> will be replaced with the corresponding value from
2683
the hashref.  If no matching key is found, the variable will not be replaced.
2684
Names must match the regex: C<[\w.]+> (ie: only 'word' characters and dots are
2685
allowed).
2686
 
2687
=head2 VarAttr => 'attr_name' I<# in - handy>
2688
 
2689
In addition to the variables defined using C<Variables>, this option allows
2690
variables to be defined in the XML.  A variable definition consists of an
2691
element with an attribute called 'attr_name' (the value of the C<VarAttr>
2692
option).  The value of the attribute will be used as the variable name and the
2693
text content of the element will be used as the value.  A variable defined in
2694
this way will override a variable defined using the C<Variables> option.  For
2695
example:
2696
 
2697
  XMLin( '<opt>
2698
            <dir name="prefix">/usr/local/apache</dir>
2699
            <dir name="exec_prefix">${prefix}</dir>
2700
            <dir name="bindir">${exec_prefix}/bin</dir>
2701
          </opt>',
2702
         VarAttr => 'name', ContentKey => '-content'
2703
        );
2704
 
2705
produces the following data structure:
2706
 
2707
  {
2708
    dir => {
2709
             prefix      => '/usr/local/apache',
2710
             exec_prefix => '/usr/local/apache',
2711
             bindir      => '/usr/local/apache/bin',
2712
           }
2713
  }
2714
 
2715
=head2 XMLDecl => 1  or  XMLDecl => 'string'  I<# out - handy>
2716
 
2717
If you want the output from C<XMLout()> to start with the optional XML
2718
declaration, simply set the option to '1'.  The default XML declaration is:
2719
 
2720
        <?xml version='1.0' standalone='yes'?>
2721
 
2722
If you want some other string (for example to declare an encoding value), set
2723
the value of this option to the complete string you require.
2724
 
2725
 
2726
=head1 OPTIONAL OO INTERFACE
2727
 
2728
The procedural interface is both simple and convenient however there are a
2729
couple of reasons why you might prefer to use the object oriented (OO)
2730
interface:
2731
 
2732
=over 4
2733
 
2734
=item *
2735
 
2736
to define a set of default values which should be used on all subsequent calls
2737
to C<XMLin()> or C<XMLout()>
2738
 
2739
=item *
2740
 
2741
to override methods in B<XML::Simple> to provide customised behaviour
2742
 
2743
=back
2744
 
2745
The default values for the options described above are unlikely to suit
2746
everyone.  The OO interface allows you to effectively override B<XML::Simple>'s
2747
defaults with your preferred values.  It works like this:
2748
 
2749
First create an XML::Simple parser object with your preferred defaults:
2750
 
2751
  my $xs = XML::Simple->new(ForceArray => 1, KeepRoot => 1);
2752
 
2753
then call C<XMLin()> or C<XMLout()> as a method of that object:
2754
 
2755
  my $ref = $xs->XMLin($xml);
2756
  my $xml = $xs->XMLout($ref);
2757
 
2758
You can also specify options when you make the method calls and these values
2759
will be merged with the values specified when the object was created.  Values
2760
specified in a method call take precedence.
2761
 
2762
Note: when called as methods, the C<XMLin()> and C<XMLout()> routines may be
2763
called as C<xml_in()> or C<xml_out()>.  The method names are aliased so the
2764
only difference is the aesthetics.
2765
 
2766
=head2 Parsing Methods
2767
 
2768
You can explicitly call one of the following methods rather than rely on the
2769
C<xml_in()> method automatically determining whether the target to be parsed is
2770
a string, a file or a filehandle:
2771
 
2772
=over 4
2773
 
2774
=item parse_string(text)
2775
 
2776
Works exactly like the C<xml_in()> method but assumes the first argument is
2777
a string of XML (or a reference to a scalar containing a string of XML).
2778
 
2779
=item parse_file(filename)
2780
 
2781
Works exactly like the C<xml_in()> method but assumes the first argument is
2782
the name of a file containing XML.
2783
 
2784
=item parse_fh(file_handle)
2785
 
2786
Works exactly like the C<xml_in()> method but assumes the first argument is
2787
a filehandle which can be read to get XML.
2788
 
2789
=back
2790
 
2791
=head2 Hook Methods
2792
 
2793
You can make your own class which inherits from XML::Simple and overrides
2794
certain behaviours.  The following methods may provide useful 'hooks' upon
2795
which to hang your modified behaviour.  You may find other undocumented methods
2796
by examining the source, but those may be subject to change in future releases.
2797
 
2798
=over 4
2799
 
2800
=item handle_options(direction, name => value ...)
2801
 
2802
This method will be called when one of the parsing methods or the C<XMLout()>
2803
method is called.  The initial argument will be a string (either 'in' or 'out')
2804
and the remaining arguments will be name value pairs.
2805
 
2806
=item default_config_file()
2807
 
2808
Calculates and returns the name of the file which should be parsed if no
2809
filename is passed to C<XMLin()> (default: C<$0.xml>).
2810
 
2811
=item build_simple_tree(filename, string)
2812
 
2813
Called from C<XMLin()> or any of the parsing methods.  Takes either a file name
2814
as the first argument or C<undef> followed by a 'string' as the second
2815
argument.  Returns a simple tree data structure.  You could override this
2816
method to apply your own transformations before the data structure is returned
2817
to the caller.
2818
 
2819
=item new_hashref()
2820
 
2821
When the 'simple tree' data structure is being built, this method will be
2822
called to create any required anonymous hashrefs.
2823
 
2824
=item sorted_keys(name, hashref)
2825
 
2826
Called when C<XMLout()> is translating a hashref to XML.  This routine returns
2827
a list of hash keys in the order that the corresponding attributes/elements
2828
should appear in the output.
2829
 
2830
=item escape_value(string)
2831
 
2832
Called from C<XMLout()>, takes a string and returns a copy of the string with
2833
XML character escaping rules applied.
2834
 
2835
=item numeric_escape(string)
2836
 
2837
Called from C<escape_value()>, to handle non-ASCII characters (depending on the
2838
value of the NumericEscape option).
2839
 
2840
=item copy_hash(hashref, extra_key => value, ...)
2841
 
2842
Called from C<XMLout()>, when 'unfolding' a hash of hashes into an array of
2843
hashes.  You might wish to override this method if you're using tied hashes and
2844
don't want them to get untied.
2845
 
2846
=back
2847
 
2848
=head2 Cache Methods
2849
 
2850
XML::Simple implements three caching schemes ('storable', 'memshare' and
2851
'memcopy').  You can implement a custom caching scheme by implementing
2852
two methods - one for reading from the cache and one for writing to it.
2853
 
2854
For example, you might implement a new 'dbm' scheme that stores cached data
2855
structures using the L<MLDBM> module.  First, you would add a
2856
C<cache_read_dbm()> method which accepted a filename for use as a lookup key
2857
and returned a data structure on success, or undef on failure.  Then, you would
2858
implement a C<cache_read_dbm()> method which accepted a data structure and a
2859
filename.
2860
 
2861
You would use this caching scheme by specifying the option:
2862
 
2863
  Cache => [ 'dbm' ]
2864
 
2865
=head1 STRICT MODE
2866
 
2867
If you import the B<XML::Simple> routines like this:
2868
 
2869
  use XML::Simple qw(:strict);
2870
 
2871
the following common mistakes will be detected and treated as fatal errors
2872
 
2873
=over 4
2874
 
2875
=item *
2876
 
2877
Failing to explicitly set the C<KeyAttr> option - if you can't be bothered
2878
reading about this option, turn it off with: KeyAttr => [ ]
2879
 
2880
=item *
2881
 
2882
Failing to explicitly set the C<ForceArray> option - if you can't be bothered
2883
reading about this option, set it to the safest mode with: ForceArray => 1
2884
 
2885
=item *
2886
 
2887
Setting ForceArray to an array, but failing to list all the elements from the
2888
KeyAttr hash.
2889
 
2890
=item *
2891
 
2892
Data error - KeyAttr is set to say { part => 'partnum' } but the XML contains
2893
one or more E<lt>partE<gt> elements without a 'partnum' attribute (or nested
2894
element).  Note: if strict mode is not set but -w is, this condition triggers a
2895
warning.
2896
 
2897
=item *
2898
 
2899
Data error - as above, but non-unique values are present in the key attribute
2900
(eg: more than one E<lt>partE<gt> element with the same partnum).  This will
2901
also trigger a warning if strict mode is not enabled.
2902
 
2903
=item *
2904
 
2905
Data error - as above, but value of key attribute (eg: partnum) is not a
2906
scalar string (due to nested elements etc).  This will also trigger a warning
2907
if strict mode is not enabled.
2908
 
2909
=back
2910
 
2911
=head1 SAX SUPPORT
2912
 
2913
From version 1.08_01, B<XML::Simple> includes support for SAX (the Simple API
2914
for XML) - specifically SAX2.
2915
 
2916
In a typical SAX application, an XML parser (or SAX 'driver') module generates
2917
SAX events (start of element, character data, end of element, etc) as it parses
2918
an XML document and a 'handler' module processes the events to extract the
2919
required data.  This simple model allows for some interesting and powerful
2920
possibilities:
2921
 
2922
=over 4
2923
 
2924
=item *
2925
 
2926
Applications written to the SAX API can extract data from huge XML documents
2927
without the memory overheads of a DOM or tree API.
2928
 
2929
=item *
2930
 
2931
The SAX API allows for plug and play interchange of parser modules without
2932
having to change your code to fit a new module's API.  A number of SAX parsers
2933
are available with capabilities ranging from extreme portability to blazing
2934
performance.
2935
 
2936
=item *
2937
 
2938
A SAX 'filter' module can implement both a handler interface for receiving
2939
data and a generator interface for passing modified data on to a downstream
2940
handler.  Filters can be chained together in 'pipelines'.
2941
 
2942
=item *
2943
 
2944
One filter module might split a data stream to direct data to two or more
2945
downstream handlers.
2946
 
2947
=item *
2948
 
2949
Generating SAX events is not the exclusive preserve of XML parsing modules.
2950
For example, a module might extract data from a relational database using DBI
2951
and pass it on to a SAX pipeline for filtering and formatting.
2952
 
2953
=back
2954
 
2955
B<XML::Simple> can operate at either end of a SAX pipeline.  For example,
2956
you can take a data structure in the form of a hashref and pass it into a
2957
SAX pipeline using the 'Handler' option on C<XMLout()>:
2958
 
2959
  use XML::Simple;
2960
  use Some::SAX::Filter;
2961
  use XML::SAX::Writer;
2962
 
2963
  my $ref = {
2964
               ....   # your data here
2965
            };
2966
 
2967
  my $writer = XML::SAX::Writer->new();
2968
  my $filter = Some::SAX::Filter->new(Handler => $writer);
2969
  my $simple = XML::Simple->new(Handler => $filter);
2970
  $simple->XMLout($ref);
2971
 
2972
You can also put B<XML::Simple> at the opposite end of the pipeline to take
2973
advantage of the simple 'tree' data structure once the relevant data has been
2974
isolated through filtering:
2975
 
2976
  use XML::SAX;
2977
  use Some::SAX::Filter;
2978
  use XML::Simple;
2979
 
2980
  my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']);
2981
  my $filter = Some::SAX::Filter->new(Handler => $simple);
2982
  my $parser = XML::SAX::ParserFactory->parser(Handler => $filter);
2983
 
2984
  my $ref = $parser->parse_uri('some_huge_file.xml');
2985
 
2986
  print $ref->{part}->{'555-1234'};
2987
 
2988
You can build a filter by using an XML::Simple object as a handler and setting
2989
its DataHandler option to point to a routine which takes the resulting tree,
2990
modifies it and sends it off as SAX events to a downstream handler:
2991
 
2992
  my $writer = XML::SAX::Writer->new();
2993
  my $filter = XML::Simple->new(
2994
                 DataHandler => sub {
2995
                                  my $simple = shift;
2996
                                  my $data = shift;
2997
 
2998
                                  # Modify $data here
2999
 
3000
                                  $simple->XMLout($data, Handler => $writer);
3001
                                }
3002
               );
3003
  my $parser = XML::SAX::ParserFactory->parser(Handler => $filter);
3004
 
3005
  $parser->parse_uri($filename);
3006
 
3007
I<Note: In this last example, the 'Handler' option was specified in the call to
3008
C<XMLout()> but it could also have been specified in the constructor>.
3009
 
3010
=head1 ENVIRONMENT
3011
 
3012
If you don't care which parser module B<XML::Simple> uses then skip this
3013
section entirely (it looks more complicated than it really is).
3014
 
3015
B<XML::Simple> will default to using a B<SAX> parser if one is available or
3016
B<XML::Parser> if SAX is not available.
3017
 
3018
You can dictate which parser module is used by setting either the environment
3019
variable 'XML_SIMPLE_PREFERRED_PARSER' or the package variable
3020
$XML::Simple::PREFERRED_PARSER to contain the module name.  The following rules
3021
are used:
3022
 
3023
=over 4
3024
 
3025
=item *
3026
 
3027
The package variable takes precedence over the environment variable if both are defined.  To force B<XML::Simple> to ignore the environment settings and use
3028
its default rules, you can set the package variable to an empty string.
3029
 
3030
=item *
3031
 
3032
If the 'preferred parser' is set to the string 'XML::Parser', then
3033
L<XML::Parser> will be used (or C<XMLin()> will die if L<XML::Parser> is not
3034
installed).
3035
 
3036
=item *
3037
 
3038
If the 'preferred parser' is set to some other value, then it is assumed to be
3039
the name of a SAX parser module and is passed to L<XML::SAX::ParserFactory.>
3040
If L<XML::SAX> is not installed, or the requested parser module is not
3041
installed, then C<XMLin()> will die.
3042
 
3043
=item *
3044
 
3045
If the 'preferred parser' is not defined at all (the normal default
3046
state), an attempt will be made to load L<XML::SAX>.  If L<XML::SAX> is
3047
installed, then a parser module will be selected according to
3048
L<XML::SAX::ParserFactory>'s normal rules (which typically means the last SAX
3049
parser installed).
3050
 
3051
=item *
3052
 
3053
if the 'preferred parser' is not defined and B<XML::SAX> is not
3054
installed, then B<XML::Parser> will be used.  C<XMLin()> will die if
3055
L<XML::Parser> is not installed.
3056
 
3057
=back
3058
 
3059
Note: The B<XML::SAX> distribution includes an XML parser written entirely in
3060
Perl.  It is very portable but it is not very fast.  You should consider
3061
installing L<XML::LibXML> or L<XML::SAX::Expat> if they are available for your
3062
platform.
3063
 
3064
=head1 ERROR HANDLING
3065
 
3066
The XML standard is very clear on the issue of non-compliant documents.  An
3067
error in parsing any single element (for example a missing end tag) must cause
3068
the whole document to be rejected.  B<XML::Simple> will die with an appropriate
3069
message if it encounters a parsing error.
3070
 
3071
If dying is not appropriate for your application, you should arrange to call
3072
C<XMLin()> in an eval block and look for errors in $@.  eg:
3073
 
3074
    my $config = eval { XMLin() };
3075
    PopUpMessage($@) if($@);
3076
 
3077
Note, there is a common misconception that use of B<eval> will significantly
3078
slow down a script.  While that may be true when the code being eval'd is in a
3079
string, it is not true of code like the sample above.
3080
 
3081
=head1 EXAMPLES
3082
 
3083
When C<XMLin()> reads the following very simple piece of XML:
3084
 
3085
    <opt username="testuser" password="frodo"></opt>
3086
 
3087
it returns the following data structure:
3088
 
3089
    {
3090
      'username' => 'testuser',
3091
      'password' => 'frodo'
3092
    }
3093
 
3094
The identical result could have been produced with this alternative XML:
3095
 
3096
    <opt username="testuser" password="frodo" />
3097
 
3098
Or this (although see 'ForceArray' option for variations):
3099
 
3100
    <opt>
3101
      <username>testuser</username>
3102
      <password>frodo</password>
3103
    </opt>
3104
 
3105
Repeated nested elements are represented as anonymous arrays:
3106
 
3107
    <opt>
3108
      <person firstname="Joe" lastname="Smith">
3109
        <email>joe@smith.com</email>
3110
        <email>jsmith@yahoo.com</email>
3111
      </person>
3112
      <person firstname="Bob" lastname="Smith">
3113
        <email>bob@smith.com</email>
3114
      </person>
3115
    </opt>
3116
 
3117
    {
3118
      'person' => [
3119
                    {
3120
                      'email' => [
3121
                                   'joe@smith.com',
3122
                                   'jsmith@yahoo.com'
3123
                                 ],
3124
                      'firstname' => 'Joe',
3125
                      'lastname' => 'Smith'
3126
                    },
3127
                    {
3128
                      'email' => 'bob@smith.com',
3129
                      'firstname' => 'Bob',
3130
                      'lastname' => 'Smith'
3131
                    }
3132
                  ]
3133
    }
3134
 
3135
Nested elements with a recognised key attribute are transformed (folded) from
3136
an array into a hash keyed on the value of that attribute (see the C<KeyAttr>
3137
option):
3138
 
3139
    <opt>
3140
      <person key="jsmith" firstname="Joe" lastname="Smith" />
3141
      <person key="tsmith" firstname="Tom" lastname="Smith" />
3142
      <person key="jbloggs" firstname="Joe" lastname="Bloggs" />
3143
    </opt>
3144
 
3145
    {
3146
      'person' => {
3147
                    'jbloggs' => {
3148
                                   'firstname' => 'Joe',
3149
                                   'lastname' => 'Bloggs'
3150
                                 },
3151
                    'tsmith' => {
3152
                                  'firstname' => 'Tom',
3153
                                  'lastname' => 'Smith'
3154
                                },
3155
                    'jsmith' => {
3156
                                  'firstname' => 'Joe',
3157
                                  'lastname' => 'Smith'
3158
                                }
3159
                  }
3160
    }
3161
 
3162
 
3163
The <anon> tag can be used to form anonymous arrays:
3164
 
3165
    <opt>
3166
      <head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head>
3167
      <data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data>
3168
      <data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data>
3169
      <data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data>
3170
    </opt>
3171
 
3172
    {
3173
      'head' => [
3174
                  [ 'Col 1', 'Col 2', 'Col 3' ]
3175
                ],
3176
      'data' => [
3177
                  [ 'R1C1', 'R1C2', 'R1C3' ],
3178
                  [ 'R2C1', 'R2C2', 'R2C3' ],
3179
                  [ 'R3C1', 'R3C2', 'R3C3' ]
3180
                ]
3181
    }
3182
 
3183
Anonymous arrays can be nested to arbirtrary levels and as a special case, if
3184
the surrounding tags for an XML document contain only an anonymous array the
3185
arrayref will be returned directly rather than the usual hashref:
3186
 
3187
    <opt>
3188
      <anon><anon>Col 1</anon><anon>Col 2</anon></anon>
3189
      <anon><anon>R1C1</anon><anon>R1C2</anon></anon>
3190
      <anon><anon>R2C1</anon><anon>R2C2</anon></anon>
3191
    </opt>
3192
 
3193
    [
3194
      [ 'Col 1', 'Col 2' ],
3195
      [ 'R1C1', 'R1C2' ],
3196
      [ 'R2C1', 'R2C2' ]
3197
    ]
3198
 
3199
Elements which only contain text content will simply be represented as a
3200
scalar.  Where an element has both attributes and text content, the element
3201
will be represented as a hashref with the text content in the 'content' key
3202
(see the C<ContentKey> option):
3203
 
3204
  <opt>
3205
    <one>first</one>
3206
    <two attr="value">second</two>
3207
  </opt>
3208
 
3209
  {
3210
    'one' => 'first',
3211
    'two' => { 'attr' => 'value', 'content' => 'second' }
3212
  }
3213
 
3214
Mixed content (elements which contain both text content and nested elements)
3215
will be not be represented in a useful way - element order and significant
3216
whitespace will be lost.  If you need to work with mixed content, then
3217
XML::Simple is not the right tool for your job - check out the next section.
3218
 
3219
=head1 WHERE TO FROM HERE?
3220
 
3221
B<XML::Simple> is able to present a simple API because it makes some
3222
assumptions on your behalf.  These include:
3223
 
3224
=over 4
3225
 
3226
=item *
3227
 
3228
You're not interested in text content consisting only of whitespace
3229
 
3230
=item *
3231
 
3232
You don't mind that when things get slurped into a hash the order is lost
3233
 
3234
=item *
3235
 
3236
You don't want fine-grained control of the formatting of generated XML
3237
 
3238
=item *
3239
 
3240
You would never use a hash key that was not a legal XML element name
3241
 
3242
=item *
3243
 
3244
You don't need help converting between different encodings
3245
 
3246
=back
3247
 
3248
In a serious XML project, you'll probably outgrow these assumptions fairly
3249
quickly.  This section of the document used to offer some advice on chosing a
3250
more powerful option.  That advice has now grown into the 'Perl-XML FAQ'
3251
document which you can find at: L<http://perl-xml.sourceforge.net/faq/>
3252
 
3253
The advice in the FAQ boils down to a quick explanation of tree versus
3254
event based parsers and then recommends:
3255
 
3256
For event based parsing, use SAX (do not set out to write any new code for
3257
XML::Parser's handler API - it is obselete).
3258
 
3259
For tree-based parsing, you could choose between the 'Perlish' approach of
3260
L<XML::Twig> and more standards based DOM implementations - preferably one with
3261
XPath support.
3262
 
3263
 
3264
=head1 SEE ALSO
3265
 
3266
B<XML::Simple> requires either L<XML::Parser> or L<XML::SAX>.
3267
 
3268
To generate documents with namespaces, L<XML::NamespaceSupport> is required.
3269
 
3270
The optional caching functions require L<Storable>.
3271
 
3272
Answers to Frequently Asked Questions about XML::Simple are bundled with this
3273
distribution as: L<XML::Simple::FAQ>
3274
 
3275
=head1 COPYRIGHT
3276
 
3277
Copyright 1999-2004 Grant McLean E<lt>grantm@cpan.orgE<gt>
3278
 
3279
This library is free software; you can redistribute it and/or modify it
3280
under the same terms as Perl itself.
3281
 
3282
=cut
3283
 
3284