XML, the Perl Way

File Coverage

File: blib/lib/XML/Twig.pm
Coverage: 93.1%

line stmt bran cond sub pod time code
1
262
262
262
262
691847
249
13395
use strict;
2
256
256
256
256
852
228
13403
use warnings; # > perl 5.5
3
4 # This is created in the caller's space
5 # I realize (now!) that it's not clean, but it's been there for 10+ years...
6 BEGIN
7
230
6
230
6
1995
64
{ sub ::PCDATA { '#PCDATA' } ## no critic (Subroutines::ProhibitNestedSubs);
8
5
5
34
  sub ::CDATA { '#CDATA' } ## no critic (Subroutines::ProhibitNestedSubs);
9 }
10
11
230
230
226
226
57901
6374
6460
use UNIVERSAL();
12
13 ## if a sub returns a scalar, it better not bloody disappear in list context
14 ## no critic (Subroutines::ProhibitExplicitReturnUndef);
15
16 my $perl_version;
17 my $parser_version;
18
19 ######################################################################
20 package XML::Twig;
21 ######################################################################
22
23 require 5.004;
24
25
226
226
219
219
49922
5157
666
use utf8; # > perl 5.5
26
27
219
219
211
211
4660
3916
7758
use vars qw($VERSION @ISA %valid_option);
28
29
211
211
211
211
556
3017
7566
use Carp;
30
211
211
211
211
557
3037
1323
use File::Spec;
31
211
211
208
208
2727
3040
10516
use File::Basename;
32
33 *isa= *UNIVERSAL::isa;
34
35 # flag, set to true if the weaken sub is available
36
208
208
206
206
591
2555
75658
use vars qw( $weakrefs);
37
38 # flag set to true if the version of expat seems to be 1.95.2, which has annoying bugs
39 # wrt doctype handling. This is global for performance reasons.
40 my $expat_1_95_2=0;
41
42 # a slight non-xml mod: # is allowed as a first character
43 my $REG_TAG_FIRST_LETTER;
44 #$REG_TAG_FIRST_LETTER= q{(?:[^\W\d]|[:#_])}; # < perl 5.6 - does not work for leading non-ascii letters
45 $REG_TAG_FIRST_LETTER= q{(?:[[:alpha:]:#_])}; # >= perl 5.6
46
47 my $REG_TAG_LETTER= q{(?:[\w_.-]*)};
48
49 # a simple name (no colon)
50 my $REG_NAME_TOKEN= qq{(?:$REG_TAG_FIRST_LETTER$REG_TAG_LETTER*)};
51
52 # a tag name, possibly including namespace
53 my $REG_NAME= qq{(?:(?:$REG_NAME_TOKEN:)?$REG_NAME_TOKEN)};
54
55 # tag name (leading # allowed)
56 # first line is for perl 5.005, second line for modern perl, that accept character classes
57 my $REG_TAG_NAME=$REG_NAME;
58
59 # name or wildcard (* or '') (leading # allowed)
60 my $REG_NAME_W = qq{(?:$REG_NAME|[*])};
61
62 # class and ids are deliberatly permissive
63 my $REG_NTOKEN_FIRST_LETTER;
64 #$REG_NTOKEN_FIRST_LETTER= q{(?:[^\W\d]|[:_])}; # < perl 5.6 - does not work for leading non-ascii letters
65 $REG_NTOKEN_FIRST_LETTER= q{(?:[[:alpha:]:_])}; # >= perl 5.6
66
67 my $REG_NTOKEN_LETTER= q{(?:[\w_:.-]*)};
68
69 my $REG_NTOKEN= qq{(?:$REG_NTOKEN_FIRST_LETTER$REG_NTOKEN_LETTER*)};
70 my $REG_CLASS = $REG_NTOKEN;
71 my $REG_ID = $REG_NTOKEN;
72
73 # allow <tag> #<tag> (private elt) * <tag>.<class> *.<class> <tag>#<id> *#<id>
74 my $REG_TAG_PART= qq{(?:$REG_NAME_W(?:[.]$REG_CLASS|[#]$REG_ID)?|[.]$REG_CLASS)};
75
76 my $REG_REGEXP = q{(?:/(?:[^\\/]|\\.)*/[eimsox]*)}; # regexp
77 my $REG_MATCH = q{[!=]~}; # match (or not)
78 my $REG_STRING = q{(?:"(?:[^\\"]|\\.)*"|'(?:[^\\']|\\.)*')}; # string (simple or double quoted)
79 my $REG_NUMBER = q{(?:\d+(?:\.\d*)?|\.\d+)}; # number
80 my $REG_VALUE = qq{(?:$REG_STRING|$REG_NUMBER)}; # value
81 my $REG_OP = q{==|!=|>|<|>=|<=|eq|ne|lt|gt|le|ge|=}; # op
82 my $REG_FUNCTION = q{(?:string|text)\(\s*\)};
83 my $REG_STRING_ARG = qq{(?:string|text)\\(\\s*$REG_NAME_W\\s*\\)};
84 my $REG_COMP = q{(?:>=|<=|!=|<|>|=)};
85
86 my $REG_TAG_IN_PREDICATE= $REG_NAME_W . q{(?=\s*(?i:and\b|or\b|\]|$))};
87
88 # keys in the context stack, chosen not to interfere with att names, even private (#-prefixed) ones
89 my $ST_TAG = '##tag';
90 my $ST_ELT = '##elt';
91 my $ST_NS = '##ns' ;
92
93 # used in the handler trigger code
94 my $REG_NAKED_PREDICATE= qq{((?:"[^"]*"|'[^']*'|$REG_STRING_ARG|$REG_FUNCTION|\@$REG_NAME_W|$REG_MATCH\\s*$REG_REGEXP|[\\s\\d><=!()+.-]|(?i:and)|(?i:or)|$REG_TAG_IN_PREDICATE)*)};
95 my $REG_PREDICATE= qq{\\[$REG_NAKED_PREDICATE\\]};
96
97 # not all axis, only supported ones (in get_xpath)
98 my @supported_axis= ( 'ancestor', 'ancestor-or-self', 'child', 'descendant', 'descendant-or-self',
99                       'following', 'following-sibling', 'parent', 'preceding', 'preceding-sibling', 'self'
100                     );
101 my $REG_AXIS = "(?:" . join( '|', @supported_axis) .")";
102
103 # only used in the "xpath"engine (for get_xpath/findnodes) for now
104 my $REG_PREDICATE_ALT = qr{\[(?:(?:string\(\s*\)|\@$REG_TAG_NAME)\s*$REG_MATCH\s*$REG_REGEXP\s*|[^\]]*)\]};
105
106 # used to convert XPath tests on strings to the perl equivalent
107 my %PERL_ALPHA_TEST= ( '=' => ' eq ', '!=' => ' ne ', '>' => ' gt ', '>=' => ' ge ', '<' => ' lt ', '<=' => ' le ');
108
109 my( $FB_HTMLCREF, $FB_XMLCREF);
110
111 my $NO_WARNINGS= $perl_version >= 5.006 ? 'no warnings' : 'local $^W=0';
112
113 # default namespaces, both ways
114 my %DEFAULT_NS= ( xml => "http://www.w3.org/XML/1998/namespace",
115                   xmlns => "http://www.w3.org/2000/xmlns/",
116                 );
117 my %DEFAULT_URI2NS= map { $DEFAULT_NS{$_} => $_ } keys %DEFAULT_NS;
118
119 # constants
120 my( $PCDATA, $CDATA, $PI, $COMMENT, $ENT, $ELT, $TEXT, $ASIS, $EMPTY, $BUFSIZE);
121
122 # used when an HTML doc only has a PUBLIC declaration, to generate the SYSTEM one
123 # this should really be done by HTML::TreeBuilder, but as of HTML::TreeBuilder 4.2 it isn't
124 # the various declarations are taken from http://en.wikipedia.org/wiki/Document_Type_Declaration
125 my %HTML_DECL= ( "-//W3C//DTD HTML 4.0 Transitional//EN" => "http://www.w3.org/TR/REC-html40/loose.dtd",
126                  "-//W3C//DTD HTML 4.01//EN" => "http://www.w3.org/TR/html4/strict.dtd",
127                  "-//W3C//DTD HTML 4.01 Transitional//EN" => "http://www.w3.org/TR/html4/loose.dtd",
128                  "-//W3C//DTD HTML 4.01 Frameset//EN" => "http://www.w3.org/TR/html4/frameset.dtd",
129                  "-//W3C//DTD XHTML 1.0 Strict//EN" => "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd",
130                  "-//W3C//DTD XHTML 1.0 Transitional//EN" => "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd",
131                  "-//W3C//DTD XHTML 1.0 Frameset//EN" => "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd",
132                  "-//W3C//DTD XHTML 1.1//EN" => "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd",
133                  "-//W3C//DTD XHTML Basic 1.0//EN" => "http://www.w3.org/TR/xhtml-basic/xhtml-basic10.dtd",
134                  "-//W3C//DTD XHTML Basic 1.1//EN" => "http://www.w3.org/TR/xhtml-basic/xhtml-basic11.dtd",
135                  "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" => "http://www.wapforum.org/DTD/xhtml-mobile10.dtd",
136                  "-//WAPFORUM//DTD XHTML Mobile 1.1//EN" => "http://www.openmobilealliance.org/tech/DTD/xhtml-mobile11.dtd",
137                  "-//WAPFORUM//DTD XHTML Mobile 1.2//EN" => "http://www.openmobilealliance.org/tech/DTD/xhtml-mobile12.dtd",
138                  "-//W3C//DTD XHTML+RDFa 1.0//EN" => "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd",
139                );
140
141 my $DEFAULT_HTML_TYPE= "-//W3C//DTD HTML 4.0 Transitional//EN";
142
143 my $SEP= qr/\s*(?:$|\|)/;
144
145 BEGIN
146 {
147
204
203
61105
$VERSION = '3.47';
148
149
206
206
204
204
59459
927726
1319
use XML::Parser;
150
204
2183
my $needVersion = '2.23';
151
203
431
($parser_version= $XML::Parser::VERSION)=~ s{_\d+}{}; # remove _<n> from version so numeric tests do not warn
152
203
100
1233
croak "need at least XML::Parser version $needVersion" unless $parser_version >= $needVersion;
153
154
202
2003
($perl_version= $])=~ s{_\d+}{};
155
156
201
50
482
if( $perl_version >= 5.008)
157
201
187
187
187
201
9063
47386
1046721
36400
  { eval "use Encode qw( :all)";
158
201
1968
    $FB_XMLCREF = 0x0400; # Encode::FB_XMLCREF;
159
201
251
    $FB_HTMLCREF = 0x0200; # Encode::FB_HTMLCREF;
160   }
161
162 # test whether we can use weak references
163 # set local empty signal handler to trap error messages
164
201
201
186
2121
{ local $SIG{__DIE__};
165
194
50
0
33
6517
  if( eval( 'require Scalar::Util') && defined( \&Scalar::Util::weaken))
166
194
194
6328
993
    { import Scalar::Util( 'weaken'); $weakrefs= 1; }
167   elsif( eval( 'require WeakRef'))
168
4
4
13
3
    { import WeakRef; $weakrefs= 1; }
169   else
170
4
446
    { $weakrefs= 0; }
171 }
172
173
191
864
import XML::Twig::Elt;
174
191
749
import XML::Twig::Entity;
175
191
1141
import XML::Twig::Entity_list;
176
177 # used to store the gi's
178 # should be set for each twig really, at least when there are several
179 # the init ensures that special gi's are always the same
180
181 # constants: element types
182
191
351
$PCDATA = '#PCDATA';
183
191
175
$CDATA = '#CDATA';
184
191
595
$PI = '#PI';
185
191
173
$COMMENT = '#COMMENT';
186
191
19432
$ENT = '#ENT';
187
188 # element classes
189
191
6289
$ELT = '#ELT';
190
190
4099
$TEXT = '#TEXT';
191
192 # element properties
193
190
2217
$ASIS = '#ASIS';
194
190
1192
$EMPTY = '#EMPTY';
195
196 # used in parseurl to set the buffer size to the same size as in XML::Parser::Expat
197
190
173
$BUFSIZE = 32768;
198
199
200 # gi => index
201
190
1481
%XML::Twig::gi2index=( '', 0, $PCDATA => 1, $CDATA => 2, $PI => 3, $COMMENT => 4, $ENT => 5);
202 # list of gi's
203
190
989
@XML::Twig::index2gi=( '', $PCDATA, $CDATA, $PI, $COMMENT, $ENT);
204
205 # gi's under this value are special
206
190
606
$XML::Twig::SPECIAL_GI= @XML::Twig::index2gi;
207
208
190
477
%XML::Twig::base_ent= ( '>' => '&gt;', '<' => '&lt;', '&' => '&amp;', "'" => '&apos;', '"' => '&quot;',);
209
190
562
566
1876
foreach my $c ( "\n", "\r", "\t") { $XML::Twig::base_ent{$c}= sprintf( "&#x%02x;", ord( $c)); }
210
211 # now set some aliases
212
188
335
*find_nodes = *get_xpath; # same as XML::XPath
213
188
278
*findnodes = *get_xpath; # same as XML::LibXML
214
188
174
*getElementsByTagName = *descendants;
215
188
162
*descendants_or_self = *descendants; # valid in XML::Twig, not in XML::Twig::Elt
216
188
590
*find_by_tag_name = *descendants;
217
187
159
*getElementById = *elt_id;
218
187
564
*getEltById = *elt_id;
219
187
148
*toString = *sprint;
220
187
134891
*create_accessors = *att_accessors;
221
222 }
223
224 @ISA = qw(XML::Parser);
225
226 # fake gi's used in twig_handlers and start_tag_handlers
227 my $ALL = '_all_'; # the associated function is always called
228 my $DEFAULT= '_default_'; # the function is called if no other handler has been
229
230 # some defaults
231 my $COMMENTS_DEFAULT= 'keep';
232 my $PI_DEFAULT = 'keep';
233
234
235 # handlers used in regular mode
236 my %twig_handlers=( Start => \&_twig_start,
237                     End => \&_twig_end,
238                     Char => \&_twig_char,
239                     Entity => \&_twig_entity,
240                     XMLDecl => \&_twig_xmldecl,
241                     Doctype => \&_twig_doctype,
242                     Element => \&_twig_element,
243                     Attlist => \&_twig_attlist,
244                     CdataStart => \&_twig_cdatastart,
245                     CdataEnd => \&_twig_cdataend,
246                     Proc => \&_twig_pi,
247                     Comment => \&_twig_comment,
248                     Default => \&_twig_default,
249                     ExternEnt => \&_twig_extern_ent,
250       );
251
252 # handlers used when twig_roots is used and we are outside of the roots
253 my %twig_handlers_roots=
254   ( Start => \&_twig_start_check_roots,
255     End => \&_twig_end_check_roots,
256     Doctype => \&_twig_doctype,
257     Char => undef, Entity => undef, XMLDecl => \&_twig_xmldecl,
258     Element => undef, Attlist => undef, CdataStart => undef,
259     CdataEnd => undef, Proc => undef, Comment => undef,
260     Proc => \&_twig_pi_check_roots,
261     Default => sub {}, # hack needed for XML::Parser 2.27
262     ExternEnt => \&_twig_extern_ent,
263   );
264
265 # handlers used when twig_roots and print_outside_roots are used and we are
266 # outside of the roots
267 my %twig_handlers_roots_print_2_30=
268   ( Start => \&_twig_start_check_roots,
269     End => \&_twig_end_check_roots,
270     Char => \&_twig_print,
271     Entity => \&_twig_print_entity,
272     ExternEnt => \&_twig_print_entity,
273     DoctypeFin => \&_twig_doctype_fin_print,
274     XMLDecl => sub { _twig_xmldecl( @_); _twig_print( @_); },
275     Doctype => \&_twig_print_doctype, # because recognized_string is broken here
276     # Element => \&_twig_print, Attlist => \&_twig_print,
277     CdataStart => \&_twig_print, CdataEnd => \&_twig_print,
278     Proc => \&_twig_pi_check_roots, Comment => \&_twig_print,
279     Default => \&_twig_print_check_doctype,
280     ExternEnt => \&_twig_extern_ent,
281   );
282
283 # handlers used when twig_roots, print_outside_roots and keep_encoding are used
284 # and we are outside of the roots
285 my %twig_handlers_roots_print_original_2_30=
286   ( Start => \&_twig_start_check_roots,
287     End => \&_twig_end_check_roots,
288     Char => \&_twig_print_original,
289     # I have no idea why I should not be using this handler!
290     Entity => \&_twig_print_entity,
291     ExternEnt => \&_twig_print_entity,
292     DoctypeFin => \&_twig_doctype_fin_print,
293     XMLDecl => sub { _twig_xmldecl( @_); _twig_print_original( @_) },
294     Doctype => \&_twig_print_original_doctype, # because original_string is broken here
295     Element => \&_twig_print_original, Attlist => \&_twig_print_original,
296     CdataStart => \&_twig_print_original, CdataEnd => \&_twig_print_original,
297     Proc => \&_twig_pi_check_roots, Comment => \&_twig_print_original,
298     Default => \&_twig_print_original_check_doctype,
299   );
300
301 # handlers used when twig_roots and print_outside_roots are used and we are
302 # outside of the roots
303 my %twig_handlers_roots_print_2_27=
304   ( Start => \&_twig_start_check_roots,
305     End => \&_twig_end_check_roots,
306     Char => \&_twig_print,
307     # if the Entity handler is set then it prints the entity declaration
308     # before the entire internal subset (including the declaration!) is output
309     Entity => sub {},
310     XMLDecl => \&_twig_print, Doctype => \&_twig_print,
311     CdataStart => \&_twig_print, CdataEnd => \&_twig_print,
312     Proc => \&_twig_pi_check_roots, Comment => \&_twig_print,
313     Default => \&_twig_print,
314     ExternEnt => \&_twig_extern_ent,
315   );
316
317 # handlers used when twig_roots, print_outside_roots and keep_encoding are used
318 # and we are outside of the roots
319 my %twig_handlers_roots_print_original_2_27=
320   ( Start => \&_twig_start_check_roots,
321     End => \&_twig_end_check_roots,
322     Char => \&_twig_print_original,
323     # for some reason original_string is wrong here
324     # this can be a problem if the doctype includes non ascii characters
325     XMLDecl => \&_twig_print, Doctype => \&_twig_print,
326     # if the Entity handler is set then it prints the entity declaration
327     # before the entire internal subset (including the declaration!) is output
328     Entity => sub {},
329     #Element => undef, Attlist => undef,
330     CdataStart => \&_twig_print_original, CdataEnd => \&_twig_print_original,
331     Proc => \&_twig_pi_check_roots, Comment => \&_twig_print_original,
332     Default => \&_twig_print, # _twig_print_original does not work
333     ExternEnt => \&_twig_extern_ent,
334   );
335
336
337 my %twig_handlers_roots_print= $parser_version > 2.27
338                                ? %twig_handlers_roots_print_2_30
339                                : %twig_handlers_roots_print_2_27;
340 my %twig_handlers_roots_print_original= $parser_version > 2.27
341                                ? %twig_handlers_roots_print_original_2_30
342                                : %twig_handlers_roots_print_original_2_27;
343
344
345 # handlers used when the finish_print method has been called
346 my %twig_handlers_finish_print=
347   ( Start => \&_twig_print,
348     End => \&_twig_print, Char => \&_twig_print,
349     Entity => \&_twig_print, XMLDecl => \&_twig_print,
350     Doctype => \&_twig_print, Element => \&_twig_print,
351     Attlist => \&_twig_print, CdataStart => \&_twig_print,
352     CdataEnd => \&_twig_print, Proc => \&_twig_print,
353     Comment => \&_twig_print, Default => \&_twig_print,
354     ExternEnt => \&_twig_extern_ent,
355   );
356
357 # handlers used when the finish_print method has been called and the keep_encoding
358 # option is used
359 my %twig_handlers_finish_print_original=
360   ( Start => \&_twig_print_original, End => \&_twig_print_end_original,
361     Char => \&_twig_print_original, Entity => \&_twig_print_original,
362     XMLDecl => \&_twig_print_original, Doctype => \&_twig_print_original,
363     Element => \&_twig_print_original, Attlist => \&_twig_print_original,
364     CdataStart => \&_twig_print_original, CdataEnd => \&_twig_print_original,
365     Proc => \&_twig_print_original, Comment => \&_twig_print_original,
366     Default => \&_twig_print_original,
367   );
368
369 # handlers used within ignored elements
370 my %twig_handlers_ignore=
371   ( Start => \&_twig_ignore_start,
372     End => \&_twig_ignore_end,
373     Char => undef, Entity => undef, XMLDecl => undef,
374     Doctype => undef, Element => undef, Attlist => undef,
375     CdataStart => undef, CdataEnd => undef, Proc => undef,
376     Comment => undef, Default => undef,
377     ExternEnt => undef,
378   );
379
380
381 # those handlers are only used if the entities are NOT to be expanded
382 my %twig_noexpand_handlers= ( ExternEnt => undef, Default => \&_twig_default );
383
384 my @saved_default_handler;
385
386 my $ID= 'id'; # default value, set by the Id argument
387 my $css_sel=0; # set through the css_sel option to allow .class selectors in triggers
388
389 # all allowed options
390 %valid_option=
391     ( # XML::Twig options
392       TwigHandlers => 1, Id => 1,
393       TwigRoots => 1, TwigPrintOutsideRoots => 1,
394       StartTagHandlers => 1, EndTagHandlers => 1,
395       ForceEndTagHandlersUsage => 1,
396       DoNotChainHandlers => 1,
397       IgnoreElts => 1,
398       Index => 1,
399       AttAccessors => 1,
400       EltAccessors => 1,
401       FieldAccessors => 1,
402       CharHandler => 1,
403       TopDownHandlers => 1,
404       KeepEncoding => 1, DoNotEscapeAmpInAtts => 1,
405       ParseStartTag => 1, KeepAttsOrder => 1,
406       LoadDTD => 1, DTDHandler => 1,
407       DoNotOutputDTD => 1, NoProlog => 1,
408       ExpandExternalEnts => 1,
409       DiscardSpaces => 1, KeepSpaces => 1, DiscardAllSpaces => 1,
410       DiscardSpacesIn => 1, KeepSpacesIn => 1,
411       PrettyPrint => 1, EmptyTags => 1,
412       EscapeGt => 1,
413       Quote => 1,
414       Comments => 1, Pi => 1,
415       OutputFilter => 1, InputFilter => 1,
416       OutputTextFilter => 1,
417       OutputEncoding => 1,
418       RemoveCdata => 1,
419       EltClass => 1,
420       MapXmlns => 1, KeepOriginalPrefix => 1,
421       SkipMissingEnts => 1,
422       # XML::Parser options
423       ErrorContext => 1, ProtocolEncoding => 1,
424       Namespaces => 1, NoExpand => 1,
425       Stream_Delimiter => 1, ParseParamEnt => 1,
426       NoLWP => 1, Non_Expat_Options => 1,
427       Xmlns => 1, CssSel => 1,
428       UseTidy => 1, TidyOptions => 1,
429       OutputHtmlDoctype => 1,
430     );
431
432 my $active_twig; # last active twig,for XML::Twig::s
433
434 # predefined input and output filters
435
187
187
187
201
629
174
1040717
use vars qw( %filter);
436 %filter= ( html => \&html_encode,
437            safe => \&safe_encode,
438            safe_hex => \&safe_encode_hex,
439          );
440
441
442 # trigger types (used to sort them)
443 my ($LEVEL_TRIGGER, $REGEXP_TRIGGER, $XPATH_TRIGGER)=(1..3);
444
445 sub new
446
3137
1
11024513
  { my ($class, %args) = @_;
447
3137
2337
    my $handlers;
448
449     # change all nice_perlish_names into nicePerlishNames
450
3137
5126
    %args= _normalize_args( %args);
451
452     # check options
453
3137
100
5556
    unless( $args{MoreOptions})
454
3136
4931
      { foreach my $arg (keys %args)
455
5448
100
8753
        { carp "invalid option $arg" unless $valid_option{$arg}; }
456       }
457
458     # a twig is really an XML::Parser
459     # my $self= XML::Parser->new(%args);
460
3137
2749
    my $self;
461
3137
8831
    $self= XML::Parser->new(%args);
462
463
3137
71193
    bless $self, $class;
464
465
3137
4962
    $self->{_twig_context_stack}= [];
466
467     # allow tag.class selectors in handler triggers
468
3137
100
7852
    $css_sel= $args{CssSel} || 0;
469
470
471
3137
100
4729
    if( exists $args{TwigHandlers})
472
230
243
      { $handlers= $args{TwigHandlers};
473
230
415
        $self->setTwigHandlers( $handlers);
474
223
307
        delete $args{TwigHandlers};
475       }
476
477     # take care of twig-specific arguments
478
3130
100
4234
    if( exists $args{StartTagHandlers})
479
30
65
      { $self->setStartTagHandlers( $args{StartTagHandlers});
480
30
47
        delete $args{StartTagHandlers};
481       }
482
483
3130
100
4070
    if( exists $args{DoNotChainHandlers})
484
1
2
      { $self->{twig_do_not_chain_handlers}= $args{DoNotChainHandlers}; }
485
486
3130
100
4104
    if( exists $args{IgnoreElts})
487       { # change array to hash so you can write ignore_elts => [ qw(foo bar baz)]
488
11
1
2
1
100
32
2
5
2
        if( isa( $args{IgnoreElts}, 'ARRAY')) { $args{IgnoreElts}= { map { $_ => 1 } @{$args{IgnoreElts}} }; }
489
11
27
        $self->setIgnoreEltsHandlers( $args{IgnoreElts});
490
11
13
        delete $args{IgnoreElts};
491       }
492
493
3130
100
3969
    if( exists $args{Index})
494
2
2
      { my $index= $args{Index};
495         # we really want a hash name => path, we turn an array into a hash if necessary
496
2
100
4
        if( ref( $index) eq 'ARRAY')
497
1
2
1
4
          { my %index= map { $_ => $_ } @$index;
498
1
2
            $index= \%index;
499           }
500
2
5
        while( my( $name, $exp)= each %$index)
501
3
4
4
4
4
12
3
7
8
          { $self->setTwigHandler( $exp, sub { push @{$_[0]->{_twig_index}->{$name}}, $_; 1; }); }
502       }
503
504
3130
100
8078
    $self->{twig_elt_class}= $args{EltClass} || 'XML::Twig::Elt';
505
3130
75
100
100
4702
117
    if( defined( $args{EltClass}) && $args{EltClass} ne 'XML::Twig::Elt') { $self->{twig_alt_elt_class}=1; }
506
3130
76
100
3981
126
    if( exists( $args{EltClass})) { delete $args{EltClass}; }
507
508
3130
100
3646
    if( exists( $args{MapXmlns}))
509
22
24
      { $self->{twig_map_xmlns}= $args{MapXmlns};
510
22
23
        $self->{Namespaces}=1;
511
22
25
        delete $args{MapXmlns};
512       }
513
514
3130
100
4187
    if( exists( $args{KeepOriginalPrefix}))
515
4
5
      { $self->{twig_keep_original_prefix}= $args{KeepOriginalPrefix};
516
4
4
        delete $args{KeepOriginalPrefix};
517       }
518
519
3130
2932
    $self->{twig_dtd_handler}= $args{DTDHandler};
520
3130
2651
    delete $args{DTDHandler};
521
522
3130
100
3563
    if( $args{ExpandExternalEnts})
523
5
11
      { $self->set_expand_external_entities( 1);
524
5
6
        $self->{twig_expand_external_ents}= $args{ExpandExternalEnts};
525
5
9
        $self->{twig_read_external_dtd}= 1; # implied by ExpandExternalEnts
526
5
100
13
        if( $args{ExpandExternalEnts} == -1)
527
2
3
          { $self->{twig_extern_ent_nofail}= 1;
528
2
12
            $self->setHandlers( ExternEnt => \&_twig_extern_ent_nofail);
529           }
530
5
47
        delete $args{LoadDTD};
531
5
6
        delete $args{ExpandExternalEnts};
532       }
533     else
534
3125
4577
      { $self->set_expand_external_entities( 0); }
535
536
3130
50
0
5999
    if( !$args{NoLWP} && ! _use( 'URI') && ! _use( 'URI::File') && ! _use( 'LWP'))
537
0
0
      { $self->{twig_ext_ent_handler}= \&XML::Parser::initial_ext_ent_handler }
538     else
539
3130
4210
      { $self->{twig_ext_ent_handler}= \&XML::Parser::file_ext_ent_handler }
540
541
3130
100
3947
    if( $args{DoNotEscapeAmpInAtts})
542
1
1
      { $self->set_do_not_escape_amp_in_atts( 1);
543
1
1
        $self->{twig_do_not_escape_amp_in_atts}=1;
544       }
545     else
546
3129
4210
      { $self->set_do_not_escape_amp_in_atts( 0);
547
3129
2725
        $self->{twig_do_not_escape_amp_in_atts}=0;
548       }
549
550     # deal with TwigRoots argument, a hash of elements for which
551     # subtrees will be built (and associated handlers)
552
553
3130
100
3910
    if( $args{TwigRoots})
554
101
207
      { $self->setTwigRoots( $args{TwigRoots});
555
99
133
        delete $args{TwigRoots};
556       }
557
558
3128
100
3781
    if( $args{EndTagHandlers})
559
15
100
100
51
      { unless ($self->{twig_roots} || $args{ForceEndTagHandlersUsage})
560
1
153
          { croak "you should not use EndTagHandlers without TwigRoots\n",
561                   "if you want to use it anyway, normally because you have ",
562                   "a start_tag_handlers that calls 'ignore' and you want to ",
563                   "call an ent_tag_handlers at the end of the element, then ",
564                   "pass 'force_end_tag_handlers_usage => 1' as an argument ",
565                   "to new";
566           }
567
568
14
26
        $self->setEndTagHandlers( $args{EndTagHandlers});
569
14
164
        delete $args{EndTagHandlers};
570       }
571
572
3127
100
3709
    if( $args{TwigPrintOutsideRoots})
573
63
100
314
      { croak "cannot use twig_print_outside_roots without twig_roots"
574           unless( $self->{twig_roots});
575         # if the arg is a filehandle then store it
576
62
100
99
        if( _is_fh( $args{TwigPrintOutsideRoots}) )
577
59
102
          { $self->{twig_output_fh}= $args{TwigPrintOutsideRoots}; }
578
62
93
        $self->{twig_default_print}= $args{TwigPrintOutsideRoots};
579       }
580
581     # space policy
582
3126
100
3492
    if( $args{KeepSpaces})
583
49
100
195
      { croak "cannot use both keep_spaces and discard_spaces" if( $args{DiscardSpaces});
584
48
100
170
        croak "cannot use both keep_spaces and discard_all_spaces" if( $args{DiscardAllSpaces});
585
47
100
184
        croak "cannot use both keep_spaces and keep_spaces_in" if( $args{KeepSpacesIn});
586
46
50
        $self->{twig_keep_spaces}=1;
587
46
58
        delete $args{KeepSpaces};
588       }
589
3123
100
3487
    if( $args{DiscardSpaces})
590       {
591
5
100
106
        croak "cannot use both discard_spaces and keep_spaces_in" if( $args{KeepSpacesIn});
592
4
100
97
        croak "cannot use both discard_spaces and discard_all_spaces" if( $args{DiscardAllSpaces});
593
3
100
98
        croak "cannot use both discard_spaces and discard_spaces_in" if( $args{DiscardSpacesIn});
594
2
2
        $self->{twig_discard_spaces}=1;
595
2
2
        delete $args{DiscardSpaces};
596       }
597
3120
100
3442
    if( $args{KeepSpacesIn})
598
9
100
111
      { croak "cannot use both keep_spaces_in and discard_spaces_in" if( $args{DiscardSpacesIn});
599
8
100
107
        croak "cannot use both keep_spaces_in and discard_all_spaces" if( $args{DiscardAllSpaces});
600
7
7
        $self->{twig_discard_spaces}=1;
601
7
10
        $self->{twig_keep_spaces_in}={};
602
7
7
7
17
        my @tags= @{$args{KeepSpacesIn}};
603
7
9
10
15
        foreach my $tag (@tags) { $self->{twig_keep_spaces_in}->{$tag}=1; }
604
7
14
        delete $args{KeepSpacesIn};
605       }
606
607
3118
100
3416
    if( $args{DiscardAllSpaces})
608       {
609
2
100
100
        croak "cannot use both discard_all_spaces and discard_spaces_in" if( $args{DiscardSpacesIn});
610
1
1
        $self->{twig_discard_all_spaces}=1;
611
1
2
        delete $args{DiscardAllSpaces};
612       }
613
614
3117
100
3285
    if( $args{DiscardSpacesIn})
615
4
6
      { $self->{twig_keep_spaces}=1;
616
4
6
        $self->{twig_discard_spaces_in}={};
617
4
4
2
9
        my @tags= @{$args{DiscardSpacesIn}};
618
4
6
5
9
        foreach my $tag (@tags) { $self->{twig_discard_spaces_in}->{$tag}=1; }
619
4
8
        delete $args{DiscardSpacesIn};
620       }
621     # discard spaces by default
622
3117
100
4746
    $self->{twig_discard_spaces}= 1 unless( $self->{twig_keep_spaces});
623
624
3117
67
5619
    $args{Comments}||= $COMMENTS_DEFAULT;
625
3117
4
100
100
100
5956
7
    if( $args{Comments} eq 'drop') { $self->{twig_keep_comments}= 0; }
626
2225
2082
    elsif( $args{Comments} eq 'keep') { $self->{twig_keep_comments}= 1; }
627
887
701
    elsif( $args{Comments} eq 'process') { $self->{twig_process_comments}= 1; }
628
1
82
    else { croak "wrong value for comments argument: '$args{Comments}' (should be 'drop', 'keep' or 'process')"; }
629
3116
3508
    delete $args{Comments};
630
631
3116
67
5188
    $args{Pi}||= $PI_DEFAULT;
632
3116
3
100
100
100
4829
4
    if( $args{Pi} eq 'drop') { $self->{twig_keep_pi}= 0; }
633
2226
1990
    elsif( $args{Pi} eq 'keep') { $self->{twig_keep_pi}= 1; }
634
886
739
    elsif( $args{Pi} eq 'process') { $self->{twig_process_pi}= 1; }
635
1
81
    else { croak "wrong value for pi argument: '$args{Pi}' (should be 'drop', 'keep' or 'process')"; }
636
3115
2493
    delete $args{Pi};
637
638
3115
100
2996
    if( $args{KeepEncoding})
639       {
640         # set it in XML::Twig::Elt so print functions know what to do
641
1020
1171
        $self->set_keep_encoding( 1);
642
1020
100
2330
        $self->{parse_start_tag}= $args{ParseStartTag} || \&_parse_start_tag;
643
1020
100
1323
        delete $args{ParseStartTag} if defined( $args{ParseStartTag}) ;
644
1020
849
        delete $args{KeepEncoding};
645       }
646     else
647
2095
2701
      { $self->set_keep_encoding( 0);
648
2095
100
2016
        if( $args{ParseStartTag})
649
2
5
          { $self->{parse_start_tag}= $args{ParseStartTag}; }
650         else
651
2093
1687
          { delete $self->{parse_start_tag}; }
652
2095
1454
        delete $args{ParseStartTag};
653       }
654
655
3115
100
2961
    if( $args{OutputFilter})
656
5
8
      { $self->set_output_filter( $args{OutputFilter});
657
5
6
        delete $args{OutputFilter};
658       }
659     else
660
3110
3612
      { $self->set_output_filter( 0); }
661
662
3115
100
3515
    if( $args{RemoveCdata})
663
1
1
      { $self->set_remove_cdata( $args{RemoveCdata});
664
1
1
        delete $args{RemoveCdata};
665       }
666     else
667
3114
3630
      { $self->set_remove_cdata( 0); }
668
669
3115
100
3061
    if( $args{OutputTextFilter})
670
5
8
      { $self->set_output_text_filter( $args{OutputTextFilter});
671
5
5
        delete $args{OutputTextFilter};
672       }
673     else
674
3110
3508
      { $self->set_output_text_filter( 0); }
675
676
3115
100
3406
    if( exists $args{KeepAttsOrder})
677
12
19
      { $self->{keep_atts_order}= $args{KeepAttsOrder};
678
12
100
15
        if( _use( 'Tie::IxHash'))
679
11
22
          { $self->set_keep_atts_order( $self->{keep_atts_order}); }
680         else
681
1
86
          { croak "Tie::IxHash not available, option keep_atts_order not allowed"; }
682       }
683     else
684
3103
3301
      { $self->set_keep_atts_order( 0); }
685
686
687
3114
72
100
3339
113
    if( $args{PrettyPrint}) { $self->set_pretty_print( $args{PrettyPrint}); }
688
3114
1
100
3126
2
    if( $args{EscapeGt}) { $self->escape_gt( $args{EscapeGt}); }
689
3114
12
100
3086
18
    if( $args{EmptyTags}) { $self->set_empty_tag_style( $args{EmptyTags}) }
690
691
3114
1
1
100
3470
5
1
    if( exists $args{Id}) { $ID= $args{Id}; delete $args{ID}; }
692
3114
4
4
100
3093
11
5
    if( $args{NoProlog}) { $self->{no_prolog}= 1; delete $args{NoProlog}; }
693
3114
3
3
100
3154
7
5
    if( $args{DoNotOutputDTD}) { $self->{no_dtd_output}= 1; delete $args{DoNotOutputDTD}; }
694
3114
9
9
100
3226
13
9
    if( $args{LoadDTD}) { $self->{twig_read_external_dtd}= 1; delete $args{LoadDTD}; }
695
3114
1
1
100
2944
3
1
    if( $args{CharHandler}) { $self->setCharHandler( $args{CharHandler}); delete $args{CharHandler}; }
696
697
3114
3
1
100
3078
6
1
    if( $args{InputFilter}) { $self->set_input_filter( $args{InputFilter}); delete $args{InputFilter}; }
698
3112
3
3
100
3032
76
36
    if( $args{NoExpand}) { $self->setHandlers( %twig_noexpand_handlers); $self->{twig_no_expand}=1; }
699
3112
2
2
100
3435
4
2
    if( my $output_encoding= $args{OutputEncoding}) { $self->set_output_encoding( $output_encoding); delete $args{OutputFilter}; }
700
701
3112
4
4
100
4432
3
3
    if( my $tdh= $args{TopDownHandlers}) { $self->{twig_tdh}=1; delete $args{TopDownHandlers}; }
702
703
3112
1
100
3411
2
    if( my $acc_a= $args{AttAccessors}) { $self->att_accessors( @$acc_a); }
704
3112
4
100
100
3105
14
    if( my $acc_e= $args{EltAccessors}) { $self->elt_accessors( isa( $acc_e, 'ARRAY') ? @$acc_e : $acc_e); }
705
3112
2
100
100
3304
9
    if( my $acc_f= $args{FieldAccessors}) { $self->field_accessors( isa( $acc_f, 'ARRAY') ? @$acc_f : $acc_f); }
706
707
3112
24
100
3037
238
    if( $args{UseTidy}) { $self->{use_tidy}= 1; }
708
3112
50
7240
    $self->{tidy_options}= $args{TidyOptions} || {};
709
710
3112
1
100
3397
1
    if( $args{OutputHtmlDoctype}) { $self->{html_doctype}= 1; }
711
712
3112
100
6777
    $self->set_quote( $args{Quote} || 'double');
713
714     # set handlers
715
3112
100
3014
    if( $self->{twig_roots})
716
99
100
139
      { if( $self->{twig_default_print})
717
62
100
84
          { if( $self->{twig_keep_encoding})
718
36
255
              { $self->setHandlers( %twig_handlers_roots_print_original); }
719             else
720
26
119
              { $self->setHandlers( %twig_handlers_roots_print); }
721           }
722         else
723
37
156
          { $self->setHandlers( %twig_handlers_roots); }
724       }
725     else
726
3013
10392
      { $self->setHandlers( %twig_handlers); }
727
728     # XML::Parser::Expat does not like these handler to be set. So in order to
729     # use the various sets of handlers on XML::Parser or XML::Parser::Expat
730     # objects when needed, these ones have to be set only once, here, at
731     # XML::Parser level
732
3112
133706
    $self->setHandlers( Init => \&_twig_init, Final => \&_twig_final);
733
734
3112
37252
    $self->{twig_entity_list}= XML::Twig::Entity_list->new;
735
736
3112
3243
    $self->{twig_id}= $ID;
737
3112
2838
    $self->{twig_stored_spaces}='';
738
739
3112
2440
    $self->{twig_autoflush}= 1; # auto flush by default
740
741
3112
2482
    $self->{twig}= $self;
742
3112
3090
100
3914
5164
    if( $weakrefs) { weaken( $self->{twig}); }
743
744
3112
6152
    return $self;
745   }
746
747 sub parse
748   {
749
3282
1
845397
    my $t= shift;
750     # if called as a class method, calls nparse, which creates the twig then parses it
751
3282
179
100
67
9080
387
    if( !ref( $t) || !isa( $t, 'XML::Twig')) { return $t->nparse( @_); }
752
753     # requires 5.006 at least (or the ${^UNICODE} causes a problem) # > perl 5.5
754     # trap underlying bug in IO::Handle (see RT #17500) # > perl 5.5
755     # croak if perl 5.8+, -CD (or PERL_UNICODE set to D) and parsing a pipe # > perl 5.5
756
3103
50
17
8790
    if( $perl_version>=5.008 && ${^UNICODE} && (${^UNICODE} & 24) && isa( $_[0], 'GLOB') && -p $_[0] ) # > perl 5.5
757
1
162
      { croak "cannot parse the output of a pipe when perl is set to use the UTF8 perlIO layer\n" # > perl 5.5
758               . "set the environment variable PERL_UNICODE or use the -C option (see perldoc perlrun)\n" # > perl 5.5
759               . "not to include 'D'"; # > perl 5.5
760       } # > perl 5.5
761
3102
3102
2600
7305
    $t= eval { $t->SUPER::parse( @_); };
762
763
3102
100
75
11150070
    if( !$t
764         && $@=~m{(syntax error at line 1, column 0, byte 0|not well-formed \(invalid token\) at line 1, column 1, byte 1)}
765         && -f $_[0]
766       )
767
1
84
      { croak "you seem to have used the parse method on a filename ($_[0]), you probably want parsefile instead"; }
768
3101
3715
    return _checked_parse_result( $t, $@);
769   }
770
771 sub parsefile
772
82
82
1
154
  { my $t= shift;
773
82
1
100
100
716
3
    if( -f $_[0] && ! -s $_[0]) { return _checked_parse_result( undef, "empty file '$_[0]'"); }
774
81
81
93
400
    $t= eval { $t->SUPER::parsefile( @_); };
775
81
1599
    return _checked_parse_result( $t, $@);
776   }
777
778 sub _checked_parse_result
779
3183
2748
  { my( $t, $returned)= @_;
780
3183
100
3735
    if( !$t)
781
28
100
67
165
      { if( isa( $returned, 'XML::Twig') && $returned->{twig_finish_now})
782
6
4
          { $t= $returned;
783
6
5
            delete $t->{twig_finish_now};
784
6
8
            return $t->_twig_final;
785           }
786         else
787
22
53
          { _croak( $returned, 0); }
788       }
789
790
3155
2184
    $active_twig= $t;
791
3155
4095
    return $t;
792   }
793
794
2
2
1
7
sub active_twig { return $active_twig; }
795
796 sub finish_now
797
6
6
1
9
  { my $t= shift;
798
6
6
    $t->{twig_finish_now}=1;
799
6
26
    die $t;
800   }
801
802
803
3
3
1
7
sub parsefile_inplace { shift->_parse_inplace( parsefile => @_); }
804
6
6
1
17
sub parsefile_html_inplace { shift->_parse_inplace( parsefile_html => @_); }
805
806 sub _parse_inplace
807
9
9
19
  { my( $t, $method, $file, $suffix)= @_;
808
9
50
14
    _use( 'File::Temp') || croak "need File::Temp to use inplace methods\n";
809
9
13
    _use( 'File::Basename');
810
811
812
9
433
    my $tmpdir= dirname( $file);
813
9
41
    my( $tmpfh, $tmpfile)= File::Temp::tempfile( DIR => $tmpdir);
814
9
2032
    my $original_fh= select $tmpfh;
815
816
9
50
33
47
    unless( $t->{twig_keep_encoding} || $perl_version < 5.006)
817
9
50
104241
      { if( grep /useperlio=define/, `$^X -V`) # we can only use binmode :utf8 if perl was compiled with useperlio
818
9
66
          { binmode( $tmpfh, ":utf8" ); }
819       }
820
821
9
160
    $t->$method( $file);
822
823
9
28
    select $original_fh;
824
9
3139
    close $tmpfh;
825
9
69
    my $mode= (stat( $file))[2] & oct(7777);
826
9
50
79
    chmod $mode, $tmpfile or croak "cannot change temp file mode to $mode: $!";
827
828
9
100
18
    if( $suffix)
829
6
10
      { my $backup;
830
6
3
100
38
29
        if( $suffix=~ m{\*}) { ($backup = $suffix) =~ s/\*/$file/g; }
831
3
12
        else { $backup= $file . $suffix; }
832
833
6
50
164
        rename( $file, $backup) or croak "cannot backup initial file ($file) to $backup: $!";
834       }
835
9
50
339
    rename( $tmpfile, $file) or croak "cannot rename temp file ($tmpfile) to initial file ($file): $!";
836
837
9
62
    return $t;
838   }
839
840
841 sub parseurl
842
11
11
1
8591
  { my $t= shift;
843
11
27
    $t->_parseurl( 0, @_);
844   }
845
846 sub safe_parseurl
847
9
9
1
17
  { my $t= shift;
848
9
34
    $t->_parseurl( 1, @_);
849   }
850
851 sub safe_parsefile_html
852
2
2
1
2
  { my $t= shift;
853
2
2
3
4
    eval { $t->parsefile_html( @_); };
854
2
50
8
    return $@ ? $t->_reset_twig_after_error : $t;
855   }
856
857 sub safe_parseurl_html
858
2
2
1
5
  { my $t= shift;
859
2
50
3
    _use( 'LWP::Simple') or croak "missing LWP::Simple";
860
2
2
4
5
    eval { $t->parse_html( LWP::Simple::get( shift()), @_); } ;
861
2
50
10
    return $@ ? $t->_reset_twig_after_error : $t;
862   }
863
864 sub parseurl_html
865
1
1
1
2
  { my $t= shift;
866
1
50
2
    _use( 'LWP::Simple') or croak "missing LWP::Simple";
867
1
4
    $t->parse_html( LWP::Simple::get( shift()), @_);
868   }
869
870
871 # uses eval to catch the parser's death
872 sub safe_parse_html
873
8
8
1
23
  { my $t= shift;
874
8
8
10
35
    eval { $t->parse_html( @_); } ;
875
8
50
72
    return $@ ? $t->_reset_twig_after_error : $t;
876   }
877
878 sub parsefile_html
879
10
10
1
37
  { my $t= shift;
880
10
26
    my $file= shift;
881
10
100
41
    my $indent= $t->{ErrorContext} ? 1 : 0;
882
10
123
    $t->set_empty_tag_style( 'html');
883
10
100
36
    my $html2xml= $t->{use_tidy} ? \&_tidy_html : \&_html2xml;
884
10
100
50
54
    my $options= $t->{use_tidy} ? $t->{tidy_options} || {} : { indent => $indent, html_doctype => $t->{html_doctype} };
885
10
39
    $t->parse( $html2xml->( _slurp( $file), $options), @_);
886
10
23
    return $t;
887   }
888
889 sub parse_html
890
56
56
1
65661
  { my $t= shift;
891
56
100
100
164
    my $options= ref $_[0] && ref $_[0] eq 'HASH' ? shift() : {};
892
56
100
102
    my $use_tidy= exists $options->{use_tidy} ? $options->{use_tidy} : $t->{use_tidy};
893
56
243
    my $content= shift;
894
56
50
87
    my $indent= $t->{ErrorContext} ? 1 : 0;
895
56
90
    $t->set_empty_tag_style( 'html');
896
56
100
87
    my $html2xml= $use_tidy ? \&_tidy_html : \&_html2xml;
897
56
100
50
139
    my $conv_options= $use_tidy ? $t->{tidy_options} || {} : { indent => $indent, html_doctype => $t->{html_doctype} };
898
56
100
4408
    $t->parse( $html2xml->( isa( $content, 'GLOB') ? _slurp_fh( $content) : $content, $conv_options), @_);
899
55
169
    return $t;
900   }
901
902 sub xparse
903
2007
1
1446
  { my $t= shift;
904
2007
1448
    my $to_parse= $_[0];
905
2007
2
100
100
100
100
100
9424
4
    if( isa( $to_parse, 'GLOB')) { $t->parse( @_); }
906
1987
100
4668
    elsif( $to_parse=~ m{^\s*<}) { $to_parse=~ m{<html}i ? $t->_parse_as_xml_or_html( @_)
907                                                                      : $t->parse( @_);
908                                              }
909
2
100
3
    elsif( $to_parse=~ m{^\w+://.*\.html?$}) { _use( 'LWP::Simple') or croak "missing LWP::Simple";
910
1
3
                                               $t->_parse_as_xml_or_html( LWP::Simple::get( shift()), @_);
911                                              }
912
5
100
7
    elsif( $to_parse=~ m{^\w+://}) { _use( 'LWP::Simple') or croak "missing LWP::Simple";
913
4
10
                                               my $doc= LWP::Simple::get( shift);
914
4
1
100
492823
6
                                               if( ! defined $doc) { $doc=''; }
915
4
22
                                               my $xml_parse_ok= $t->safe_parse( $doc, @_);
916
4
100
6
                                               if( $xml_parse_ok)
917
1
3
                                                 { return $xml_parse_ok; }
918                                                else
919
3
4
                                                 { my $diag= $@;
920
3
100
10
                                                   if( $doc=~ m{<html}i)
921
2
3
                                                     { $t->parse_html( $doc, @_); }
922                                                     else
923
1
94
                                                      { croak $diag; }
924                                                  }
925                                              }
926
2
3
    elsif( $to_parse=~ m{\.html?$}) { my $content= _slurp( shift);
927
2
4
                                               $t->_parse_as_xml_or_html( $content, @_);
928                                              }
929
9
29
    else { $t->parsefile( @_); }
930   }
931
932 sub _parse_as_xml_or_html
933
11
11
113823
  { my $t= shift;
934
11
100
20
    if( _is_well_formed_xml( $_[0]))
935
3
5
      { $t->parse( @_) }
936     else
937
8
50
18
      { my $html2xml= $t->{use_tidy} ? \&_tidy_html : \&_html2xml;
938
8
50
0
24
        my $options= $t->{use_tidy} ? $t->{tidy_options} || {} : { indent => 0, html_doctype => $t->{html_doctype} };
939
8
16
        my $html= $html2xml->( $_[0], $options, @_);
940
8
50
12
        if( _is_well_formed_xml( $html))
941
8
17
          { $t->parse( $html); }
942         else
943
0
0
          { croak $@; } # can't really test this because HTML::Parser or HTML::Tidy may change how they deal with bas HTML between versions
944       }
945   }
946
947 { my $parser;
948   sub _is_well_formed_xml
949
19
67
19
43
    { $parser ||= XML::Parser->new;
950
19
19
114
43
      eval { $parser->parse( $_[0]); };
951
19
100
2776
      return $@ ? 0 : 1;
952     }
953 }
954
955 sub nparse
956
2008
1
443408
  { my $class= shift;
957
2008
1525
    my $to_parse= pop;
958
2008
2881
    $class->new( @_)->xparse( $to_parse);
959   }
960
961
1
1
1
2
sub nparse_pp { shift()->nparse( pretty_print => 'indented', @_); }
962
4
4
1
62
sub nparse_e { shift()->nparse( error_context => 1, @_); }
963
1
1
1
2
sub nparse_ppe { shift()->nparse( pretty_print => 'indented', error_context => 1, @_); }
964
965
966 sub _html2xml
967
49
49
233
  { my( $html, $options)= @_;
968
49
100
71
    _use( 'HTML::TreeBuilder', '3.13') or croak "cannot parse HTML: missing HTML::TreeBuilder v >= 3.13\n";
969
48
199
    my $tree= HTML::TreeBuilder->new;
970
48
7106
    $tree->ignore_ignorable_whitespace( 0);
971
48
289
    $tree->ignore_unknown( 0);
972
48
215
    $tree->no_space_compacting( 1);
973
48
209
    $tree->store_comments( 1);
974
48
200
    $tree->store_pis(1);
975
48
880
    $tree->parse( $html);
976
48
1675799
    $tree->eof;
977
978
48
809
    my $xml='';
979
48
100
67
126
    if( $options->{html_doctype} && exists $tree->{_decl} )
980
1
2
      { my $decl= $tree->{_decl}->as_XML;
981
982         # first try to fix declarations that are missing the SYSTEM part
983
1
1
33
123
5
        $decl =~ s{^\s*<!DOCTYPE \s+ ((?i)html) \s+ PUBLIC \s+ "([^"]*)" \s* >}
984
1
4
                  { my $system= $HTML_DECL{$2} || $HTML_DECL{$DEFAULT_HTML_TYPE};
985                     qq{<!DOCTYPE $1 PUBLIC "$2" "$system">}
986
987                   }xe;
988
989         # then check that the declaration looks OK (so it parses), if not remove it,
990         # better to parse without the declaration than to die stupidly
991
1
50
33
7
        if( $decl =~ m{<!DOCTYPE \s+ (?i:HTML) (\s+ PUBLIC \s+ "[^"]*" \s+ (SYSTEM \s+)? "[^"]*")? \s*>}x # PUBLIC then SYSTEM
992             || $decl =~ m{<!DOCTYPE \s+ (?i:HTML) \s+ SYSTEM \s+ "[^"]*" \s*>}x # just SYSTEM
993           )
994
1
1
          { $xml= $decl; }
995       }
996
997
48
158
    $xml.= _as_XML( $tree);
998
999
1000
48
127
    _fix_xml( $tree, \$xml);
1001
1002
48
1
100
89
9
    if( $options->{indent}) { _indent_xhtml( \$xml); }
1003
48
124
    $tree->delete;
1004
48
99242
    $xml=~ s{\s+$}{}s; # trim end
1005
48
980
    return $xml;
1006   }
1007
1008 sub _tidy_html
1009
25
25
90
  { my( $html, $options)= @_;
1010
25
50
42
   _use( 'HTML::Tidy') or croak "cannot cleanup HTML using HTML::Tidy (required by the use_tidy option): $@\n"; ;
1011
25
258
    my $TIDY_DEFAULTS= { output_xhtml => 1, # duh!
1012                          tidy_mark => 0, # do not add the "generated by tidy" comment
1013                          numeric_entities => 1,
1014                          char_encoding => 'utf8',
1015                          bare => 1,
1016                          clean => 1,
1017                          doctype => 'transitional',
1018                          fix_backslash => 1,
1019                          merge_divs => 0,
1020                          merge_spans => 0,
1021                          sort_attributes => 'alpha',
1022                          indent => 0,
1023                          wrap => 0,
1024                          break_before_br => 0,
1025                        };
1026
25
50
53
    $options ||= {};
1027
25
158
    my $tidy_options= { %$TIDY_DEFAULTS, %$options};
1028
25
139
    my $tidy = HTML::Tidy->new( $tidy_options);
1029
25
2754
    $tidy->ignore( type => 1, type => 2 ); # 1 is TIDY_WARNING, 2 is TIDY_ERROR, not clean
1030
25
406
    my $xml= $tidy->clean( $html );
1031
25
124838
    return $xml;
1032   }
1033
1034
1035 { my %xml_parser_encoding;
1036   sub _fix_xml
1037
49
49
718
    { my( $tree, $xml)= @_; # $xml is a ref to the xml string
1038
1039
49
46
      my $max_tries=5;
1040
49
36
      my $add_decl;
1041
1042
49
67
69
      while( ! _check_xml( $xml) && $max_tries--)
1043         {
1044           # a couple of fixes for weird HTML::TreeBuilder errors
1045
3
100
50
50
50
27
          if( $@=~ m{^\s*xml (or text )?declaration not at start of (external )?entity}i)
1046
1
5
            { $$xml=~ s{<\?xml.*?\?>}{}g;
1047               #warn " fixed xml declaration in the wrong place\n";
1048             }
1049           elsif( $@=~ m{undefined entity})
1050
0
0
0
            { $$xml=~ s{&(amp;)?Amp;}{&amp;}g if $HTML::TreeBuilder::VERSION < 4.00;
1051
0
0
0
0
0
              if( _use( 'HTML::Entities::Numbered')) { $$xml=name2hex_xml( $$xml); }
1052
0
0
0
0
0
0
0
0
0
              $$xml=~ s{&(\w+);}{ my $ent= $1; if( $ent !~ m{^(amp|lt|gt|apos|quote)$}) { "&amp;$ent;" } }eg;
1053             }
1054           elsif( $@=~ m{&Amp; used in html})
1055             # if $Amp; is used instead of &amp; then HTML::TreeBuilder's as_xml is tripped (old version)
1056
0
0
0
            { $$xml=~ s{&(amp;)?Amp;}{&amp;}g if $HTML::TreeBuilder::VERSION < 4.00;
1057             }
1058           elsif( $@=~ m{^\s*not well-formed \(invalid token\)})
1059
2
100
5
            { if( $HTML::TreeBuilder::VERSION < 4.00)
1060
1
5
                { $$xml=~ s{&(amp;)?Amp;}{&amp;}g;
1061
1
11
                  $$xml=~ s{(<[^>]* )(\d+=)"}{$1a$2"}g; # <table 1> comes out as <table 1="1">, "fix the attribute
1062                 }
1063
2
4
              my $q= '<img "="&#34;" '; # extracted so vim doesn't get confused
1064
2
2
50
3
6
              if( _use( 'HTML::Entities::Numbered')) { $$xml=name2hex_xml( $$xml); }
1065
2
50
32
              if( $$xml=~ m{$q})
1066
0
0
                { $$xml=~ s{$q}{<img }g; # happens with <img src="foo.png"" ...
1067                 }
1068               else
1069
2
4
                { my $encoding= _encoding_from_meta( $tree);
1070
2
2
50
6
4
                  unless( keys %xml_parser_encoding) { %xml_parser_encoding= _xml_parser_encodings(); }
1071
1072
2
50
6
                  if( ! $add_decl)
1073
2
100
50
50
33
33
9
                    { if( $xml_parser_encoding{$encoding})
1074
1
1
                        { $add_decl=1; }
1075                       elsif( $encoding eq 'euc-jp' && $xml_parser_encoding{'x-euc-jp-jisx0221'})
1076
0
0
0
0
                        { $encoding="x-euc-jp-jisx0221"; $add_decl=1;}
1077                       elsif( $encoding eq 'shift-jis' && $xml_parser_encoding{'x-sjis-jisx0221'})
1078
0
0
0
0
                        { $encoding="x-sjis-jisx0221"; $add_decl=1;}
1079
1080
2
100
3
                      if( $add_decl)
1081
1
6
                        { $$xml=~ s{^(<\?xml.*?\?>)?}{<?xml version="1.0" encoding="$encoding"?>}s;
1082                           #warn " added decl (encoding $encoding)\n";
1083                         }
1084                       else
1085
1
2
                        { $$xml=~ s{^(<\?xml.*?\?>)?}{}s;
1086                           #warn " converting to utf8 from $encoding\n";
1087
1
2
                          $$xml= _to_utf8( $encoding, $$xml);
1088                         }
1089                     }
1090                   else
1091
0
0
                    { $$xml=~ s{^(<\?xml.*?\?>)?}{}s;
1092                       #warn " converting to utf8 from $encoding\n";
1093
0
0
                      $$xml= _to_utf8( $encoding, $$xml);
1094                     }
1095                 }
1096             }
1097         }
1098
1099       # some versions of HTML::TreeBuilder escape CDATA sections
1100
49
0
243
0
      $$xml=~ s{(&lt;!\[CDATA\[.*?\]\]&gt;)}{_unescape_cdata( $1)}eg;
1101
1102   }
1103
1104   sub _xml_parser_encodings
1105
2
2
4
    { my @encodings=( 'iso-8859-1'); # this one is included by default, there is no map for it in @INC
1106
2
2
      foreach my $inc (@INC)
1107
18
42
557
1288
        { push @encodings, map { basename( $_, '.enc') } glob( File::Spec->catdir( $inc => XML => Parser => Encodings => '*.enc')); }
1108
2
44
5
53
      return map { $_ => 1 } @encodings;
1109     }
1110 }
1111
1112
1113 sub _unescape_cdata
1114
1
1
220
  { my( $cdata)= @_;
1115
1
4
    $cdata=~s{&lt;}{<}g;
1116
1
2
    $cdata=~s{&gt;}{>}g;
1117
1
3
    $cdata=~s{&amp;}{&}g;
1118
1
2
    return $cdata;
1119   }
1120
1121 sub _as_XML {
1122
1123     # fork of HTML::Element::as_XML, which is a little too buggy and inconsistent between versions for my liking
1124
48
48
51
    my ($elt) = @_;
1125
48
42
    my $xml= '';
1126
48
84
    my $empty_element_map = $elt->_empty_element_map;
1127
1128
48
103
    my ( $tag, $node, $start ); # per-iteration scratch
1129     $elt->traverse(
1130         sub {
1131
36543
301506
            ( $node, $start ) = @_;
1132
36543
100
100
42774
            if ( ref $node )
1133               { # it's an element
1134
24447
18781
                $tag = $node->{'_tag'};
1135
24447
100
20021
                if ($start)
1136                   { # on the way in
1137
12237
43554
17250
64274
                    foreach my $att ( grep { ! m{^(_|/$)} } keys %$node )
1138                        { # fix attribute names instead of dying
1139
6034
4161
                         my $new_att= $att;
1140
6034
4
100
7708
4
                         if( $att=~ m{^\d}) { $new_att= "a$att"; }
1141
6034
6122
                         $new_att=~ s{[^\w\d:_-]}{}g;
1142
6034
50
5723
                         $new_att ||= 'a';
1143
6034
6
100
9727
12
                         if( $new_att ne $att) { $node->{$new_att}= delete $node->{$att}; }
1144                        }
1145
1146
12237
27
50
100
67
20239
99
                    if ( $empty_element_map->{$tag} and !@{ $node->{'_content'} || []} )
1147
27
51
                      { $xml.= $node->starttag_XML( undef, 1 ); }
1148                     else
1149
12210
18161
                      { $xml.= $node->starttag_XML(undef); }
1150                   }
1151                 else
1152                  { # on the way out
1153
12210
0
0
50
33
18862
0
                   unless ( $empty_element_map->{$tag} and !@{ $node->{'_content'} || [] } )
1154
12210
16487
                    { $xml.= $node->endtag_XML();
1155                     } # otherwise it will have been an <... /> tag.
1156                   }
1157               }
1158             elsif( $node=~ /<!\[CDATA\[/) # the content includes CDATA
1159
2
10
              { foreach my $chunk (split /(<!\[CDATA\[.*?\]\]>)/s, $node) # chunks are CDATA sections or normal text
1160
5
100
16
                  { $xml.= $chunk =~ m{<!\[CDATA\[} ? $chunk : _xml_escape( $chunk); }
1161               }
1162             else # it's just text
1163
12094
10833
              { $xml .= _xml_escape($node); }
1164
36543
250776
            1; # keep traversing
1165         }
1166
48
301
    );
1167
48
1114
  return $xml;
1168 }
1169
1170 sub _xml_escape
1171
12097
7956
  { my( $html)= @_;
1172
12097
6266
    $html =~ s{&(?! # An ampersand that isn't followed by...
1173                   ( \#[0-9]+; | # A hash mark, digits and semicolon, or
1174                     \#x[0-9a-fA-F]+; | # A hash mark, "x", hex digits and semicolon, or
1175                     [\w]+; # A valid unicode entity name and semicolon
1176                   )
1177                 )
1178               }
1179               {&amp;}gx if 0; # Needs to be escaped to amp
1180
1181
12097
8550
    $html=~ s{&}{&amp;}g;
1182
1183     # in old versions of HTML::TreeBuilder &amp; can come out as &Amp;
1184
12097
0
50
33
26576
0
    if( $HTML::TreeBuilder::VERSION && $HTML::TreeBuilder::VERSION <= 3.23) { $html=~ s{&Amp;}{&amp;}g; }
1185
1186     # simple character escapes
1187
12097
7379
    $html =~ s/</&lt;/g;
1188
12097
7002
    $html =~ s/>/&gt;/g;
1189
12097
6512
    $html =~ s/"/&quot;/g;
1190
12097
8809
    $html =~ s/'/&apos;/g;
1191
1192
12097
14273
    return $html;
1193   }
1194
1195
1196
1197
1198 sub _check_xml
1199
52
52
49
  { my( $xml)= @_; # $xml is a ref to the xml string
1200
52
52
49
193
    my $ok= eval { XML::Parser->new->parse( $$xml); };
1201     #if( $ok) { warn " parse OK\n"; }
1202
52
16080
    return $ok;
1203   }
1204
1205 sub _encoding_from_meta
1206
2
2
2
  { my( $tree)= @_;
1207
2
3
    my $enc="iso-8859-1";
1208
2
9
    my @meta= $tree->find( 'meta');
1209
2
75
    foreach my $meta (@meta)
1210
1
50
20
13
      { if( $meta->{'http-equiv'} && ($meta->{'http-equiv'} =~ m{^\s*content-type\s*}i)
1211             && $meta->{content} && ($meta->{content} =~ m{^\s*text/html\s*;\s*charset\s*=\s*(\S*)\s*}i)
1212           )
1213
1
2
          { $enc= lc $1;
1214             #warn " encoding from meta tag is '$enc'\n";
1215
1
1
            last;
1216           }
1217       }
1218
2
3
    return $enc;
1219   }
1220
1221 { sub _to_utf8
1222
2
2
8
    { my( $encoding, $string)= @_;
1223
2
8
      local $SIG{__DIE__};
1224
2
50
0
0
0
5
      if( _use( 'Encode'))
1225
2
10
        { Encode::from_to( $string, $encoding => 'utf8', 0x0400); } # 0x0400 is Encode::FB_XMLCREF
1226       elsif( _use( 'Text::Iconv'))
1227
0
0
0
0
        { my $converter = eval { Text::Iconv->new( $encoding => "utf8") };
1228
0
0
0
0
0
          if( $converter) { $string= $converter->convert( $string); }
1229         }
1230       elsif( _use( 'Unicode::Map8') && _use( 'Unicode::String'))
1231
0
0
        { my $map= Unicode::Map8->new( $encoding);
1232
0
0
          $string= $map->tou( $string)->utf8;
1233         }
1234
2
212
      $string=~ s{[\x00-\x08\x0B\x0C\x0E-\x1F]}{}g; # get rid of control chars, portable in 5.6
1235
2
9
    return $string;
1236   }
1237 }
1238
1239
1240 sub _indent_xhtml
1241
2
2
30
  { my( $xhtml)= @_; # $xhtml is a ref
1242
2
78
7
132
    my %block_tag= map { $_ => 1 } qw( html
1243                                          head
1244                                            meta title link script base
1245                                          body
1246                                            h1 h2 h3 h4 h5 h6
1247                                            p br address blockquote pre
1248                                            ol ul li dd dl dt
1249                                            table tr td th tbody tfoot thead col colgroup caption
1250                                            div frame frameset hr
1251                                      );
1252
1253
2
11
    my $level=0;
1254
2
27
2
100
100
100
100
67
100
24
177
2
    $$xhtml=~ s{( (?:<!(?:--.*?-->|[CDATA[.*?]]>)) # ignore comments and CDATA sections
1255
2
8
                  | <(\w+)((?:\s+\w+\s*=\s*(?:"[^"]*"|'[^']*'))*\s*/>) # empty tag
1256                   | <(\w+) # start tag
1257
11
17
                  |</(\w+) # end tag
1258
11
50
26
                )
1259
11
100
26
               }
1260
11
59
               { if( $2 && $block_tag{$2}) { my $indent= " " x $level;
1261                                                  "\n$indent<$2$3";
1262
11
11
10
45
                                               }
1263
3
12
                 elsif( $4 && $block_tag{$4}) { my $indent= " " x $level;
1264                                                  $level++ unless( $4=~ m{/>});
1265                                                  my $nl= $4 eq 'html' ? '' : "\n";
1266                                                  "$nl$indent<$4";
1267                                                }
1268                  elsif( $5 && $block_tag{$5}) { $level--; "</$5"; }
1269                  else { $1; }
1270                }xesg;
1271   }
1272
1273
1274 sub add_stylesheet
1275
2
2
1
4
  { my( $t, $type, $href)= @_;
1276
2
4
2
7
    my %text_type= map { $_ => 1 } qw( xsl css);
1277
2
3
    my $ss= $t->{twig_elt_class}->new( $PI);
1278
2
100
3
    if( $text_type{$type})
1279
1
3
      { $ss->_set_pi( 'xml-stylesheet', qq{type="text/$type" href="$href"}); }
1280     else
1281
1
119
      { croak "unsupported style sheet type '$type'"; }
1282
1283
1
2
    $t->_add_cpi_outside_of_root( leading_cpi => $ss);
1284
1
2
    return $t;
1285   }
1286
1287 { my %used; # module => 1 if require ok, 0 otherwise
1288   my %disallowed; # for testing, refuses to _use modules in this hash
1289
1290   sub _disallow_use ## no critic (Subroutines::ProhibitNestedSubs);
1291
6
6
146
    { my( @modules)= @_;
1292
6
23
      $disallowed{$_}= 1 foreach (@modules);
1293     }
1294
1295   sub _allow_use ## no critic (Subroutines::ProhibitNestedSubs);
1296
4
4
6
    { my( @modules)= @_;
1297
4
11
      $disallowed{$_}= 0 foreach (@modules);
1298     }
1299
1300   sub _use ## no critic (Subroutines::ProhibitNestedSubs);
1301
3411
79823
    { my( $module, $version)= @_;
1302
3411
100
6813
      $version ||= 0;
1303
3411
7
100
4215
325
      if( $disallowed{$module}) { return 0; }
1304
3404
3145
100
4698
6116
      if( $used{$module}) { return 1; }
1305
259
256
256
100
13819
1563705
2223
      if( eval "require $module") { import $module; $used{$module}= 1;
1306
256
100
521
                                    if( $version)
1307                                       {
1308                                         ## no critic (TestingAndDebugging::ProhibitNoStrict);
1309
187
187
187
194
681
157
1248673
                                        no strict 'refs';
1310
12
12
11
100
11
72
28
                                        if( ${"${module}::VERSION"} >= $version ) { return 1; }
1311
1
14
                                        else { return 0; }
1312                                       }
1313                                     else
1314
244
832
                                      { return 1; }
1315                                   }
1316
3
3
7
8
      else { $used{$module}= 0; return 0; }
1317     }
1318 }
1319
1320 # used to solve the [n] predicates while avoiding getting the entire list
1321 # needs a prototype to accept passing bare blocks
1322 sub _first_n(&$@) ## nocritic (Subroutines::ProhibitSubroutinePrototypes);
1323
106
106
81
  { my $coderef= shift;
1324
106
66
    my $n= shift;
1325
106
57
    my $i=0;
1326
106
100
100
109
    if( $n > 0)
1327
99
121
35
35
100
100
101
1518
24
457
      { foreach (@_) { if( &$coderef) { $i++; return $_ if( $i == $n); } } }
1328     elsif( $n < 0)
1329
6
13
11
11
100
100
6
105
8
62
      { foreach (reverse @_) { if( &$coderef) { $i--; return $_ if( $i == $n); } } }
1330     else
1331
1
85
      { croak "illegal position number 0"; }
1332
71
1085
    return undef;
1333   }
1334
1335 sub _slurp_uri
1336
11
11
2344
  { my( $uri, $base)= @_;
1337
11
1
1
100
60
6
4
    if( $uri=~ m{^\w+://}) { _use( 'LWP::Simple'); return LWP::Simple::get( $uri); }
1338
10
20
    else { return _slurp( _based_filename( $uri, $base)); }
1339   }
1340
1341 sub _based_filename
1342
27
27
93
  { my( $filename, $base)= @_;
1343     # cf. XML/Parser.pm's file_ext_ent_handler
1344
27
100
67
67
    if (defined($base) and not ($filename =~ m{^(?:[\\/]|\w+:)}))
1345
3
3
          { my $newpath = $base;
1346
3
9
            $newpath =~ s{[^\\/:]*$}{$filename};
1347
3
3
            $filename = $newpath;
1348           }
1349
27
175
    return $filename;
1350   }
1351
1352 sub _slurp
1353
23
23
59
  { my( $filename)= @_;
1354
23
25
    my $to_slurp;
1355
23
100
1303
    open( $to_slurp, "<$filename") or croak "cannot open '$filename': $!";
1356
19
92
    local $/= undef;
1357
19
223
    my $content= <$to_slurp>;
1358
19
76
    close $to_slurp;
1359
19
152
    return $content;
1360   }
1361
1362 sub _slurp_fh
1363
2
2
3
  { my( $fh)= @_;
1364
2
6
    local $/= undef;
1365
2
34
    my $content= <$fh>;
1366
2
8
    return $content;
1367   }
1368
1369 # I should really add extra options to allow better configuration of the
1370 # LWP::UserAgent object
1371 # this method forks (except on VMS!)
1372 # - the child gets the data and copies it to the pipe,
1373 # - the parent reads the stream and sends it to XML::Parser
1374 # the data is cut it chunks the size of the XML::Parser::Expat buffer
1375 # the method returns the twig and the status
1376 sub _parseurl
1377
20
20
38
  { my( $t, $safe, $url, $agent)= @_;
1378
20
50
46
    _use( 'LWP') || croak "LWP not available, needed to use parseurl methods";
1379
20
50
114
    if( $^O ne 'VMS')
1380
20
50
251
      { pipe( README, WRITEME) or croak "cannot create connected pipes: $!";
1381
20
100
8079
        if( my $pid= fork)
1382           { # parent code: parse the incoming file
1383
15
270
            close WRITEME; # no need to write
1384
15
100
387
            my $result= $safe ? $t->safe_parse( \*README) : $t->parse( \*README);
1385
15
147
            close README;
1386
15
100
94
            return $@ ? 0 : $t;
1387           }
1388         else
1389          { # child
1390
5
230
            close README; # no need to read
1391
5
223
            local $|=1;
1392
5
67
409
            $agent ||= LWP::UserAgent->new;
1393
5
2070
            my $request = HTTP::Request->new( GET => $url);
1394             # _pass_url_content is called with chunks of data the same size as
1395             # the XML::Parser buffer
1396             my $response = $agent->request( $request,
1397
5
4
4
21626
287003
                             sub { _pass_url_content( \*WRITEME, @_); }, $BUFSIZE);
1398
5
100
17789
            $response->is_success or croak "$url ", $response->message;
1399
4
79
            close WRITEME;
1400
4
318
            CORE::exit(); # CORE is there for mod_perl (which redefines exit)
1401           }
1402       }
1403     else
1404       { # VMS branch (hard to test!)
1405
0
0
        local $|=1;
1406
0
0
0
        $agent ||= LWP::UserAgent->new;
1407
0
0
        my $request = HTTP::Request->new( GET => $url);
1408
0
0
        my $response = $agent->request( $request);
1409
0
0
0
        $response->is_success or croak "$url ", $response->message;
1410
0
0
0
        my $result= $safe ? $t->safe_parse($response->content) : $t->parse($response->content);
1411
0
0
0
        return $@ ? 0 : $t;
1412      }
1413
1414   }
1415
1416 # get the (hopefully!) XML data from the URL and
1417 sub _pass_url_content
1418
4
4
12
  { my( $fh, $data, $response, $protocol)= @_;
1419
4
4
4
35
    print {$fh} $data;
1420   }
1421
1422 sub add_options
1423
1
1
1
1
23
3
  { my %args= map { $_, 1 } @_;
1424
1
8
    %args= _normalize_args( %args);
1425
1
1
3
2
    foreach (keys %args) { $valid_option{$_}++; }
1426   }
1427
1428
27
27
60
sub _pretty_print_styles { return XML::Twig::Elt::_pretty_print_styles(); }
1429
1430 sub _twig_store_internal_dtd
1431  {
1432    # warn " in _twig_store_internal_dtd...\n"; # DEBUG handler
1433
161
161
134
    my( $p, $string)= @_;
1434
161
101
    my $t= $p->{twig};
1435
161
3
100
189
7
    if( $t->{twig_keep_encoding}) { $string= $p->original_string(); }
1436
161
147
    $t->{twig_doctype}->{internal} .= $string;
1437
161
535
    return;
1438   }
1439
1440 sub _twig_stop_storing_internal_dtd
1441    { # warn " in _twig_stop_storing_internal_dtd...\n"; # DEBUG handler
1442
110
110
101
    my $p= shift;
1443
110
50
33
342
    if( @saved_default_handler && defined $saved_default_handler[1])
1444
110
224
      { $p->setHandlers( @saved_default_handler); }
1445     else
1446       {
1447
0
0
        $p->setHandlers( Default => undef);
1448       }
1449
110
1798
    $p->{twig}->{twig_doctype}->{internal}=~ s{^\s*\[}{};
1450
110
178
    $p->{twig}->{twig_doctype}->{internal}=~ s{\]\s*$}{};
1451
110
365
    return;
1452   }
1453
1454 sub _twig_doctype_fin_print
1455   { # warn " in _twig_doctype_fin_print...\n"; # DEBUG handler
1456
16
16
20
    my( $p)= shift;
1457
16
6
100
67
45
7
    if( $p->{twig}->{twig_doctype}->{has_internal} && !$expat_1_95_2) { print ' ]>'; }
1458
16
47
    return;
1459   }
1460
1461
1462 sub _normalize_args
1463
8311
5193
  { my %normalized_args;
1464
8311
11813
    while( my $key= shift )
1465
5514
7679
6934
11029
      { $key= join '', map { ucfirst } split /_/, $key;
1466         #$key= "Twig".$key unless( substr( $key, 0, 4) eq 'Twig');
1467
5514
10348
        $normalized_args{$key}= shift ;
1468       }
1469
8311
14873
    return %normalized_args;
1470   }
1471
1472
62
62
50
100
67
62
96
272
sub _is_fh { return unless $_[0]; return $_[0] if( isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar')); }
1473
1474 sub _set_handler
1475
757
757
766
  { my( $handlers, $whole_path, $handler)= @_;
1476
1477
757
6131
    my $H_SPECIAL = qr{($ALL|$DEFAULT|$COMMENT|$TEXT)};
1478
757
3366
    my $H_PI = qr{(\?|$PI)\s*(([^\s]*)\s*)};
1479
757
1068
    my $H_LEVEL = qr{level \s* \( \s* ([0-9]+) \s* \)}x;
1480
757
1083
    my $H_REGEXP = qr{\(\?([\^xism]*)(-[\^xism]*)?:(.*)\)}x;
1481
757
17966
    my $H_XPATH = qr{(/?/?$REG_TAG_PART? \s* ($REG_PREDICATE\s*)?)+}x;
1482
1483
757
606
    my $prev_handler;
1484
1485
757
609
    my $cpath= $whole_path;
1486     #warn "\$cpath: '$cpath\n";
1487
757
100
31315
    while( $cpath && $cpath=~ s{^\s*($H_SPECIAL|$H_PI|$H_LEVEL|$H_REGEXP|$H_XPATH)\s*($|\|)}{})
1488
770
1647
      { my $path= $1;
1489         #warn "\$cpath: '$cpath' - $path: '$path'\n";
1490
770
67
3471
        $prev_handler ||= $handlers->{handlers}->{string}->{$path} || undef; # $prev_handler gets the first found handler
1491
1492
770
100
100
1017
           _set_special_handler ( $handlers, $path, $handler, $prev_handler)
1493         || _set_pi_handler ( $handlers, $path, $handler, $prev_handler)
1494         || _set_level_handler ( $handlers, $path, $handler, $prev_handler)
1495         || _set_regexp_handler ( $handlers, $path, $handler, $prev_handler)
1496         || _set_xpath_handler ( $handlers, $path, $handler, $prev_handler)
1497         || croak "unrecognized expression in handler: '$whole_path'";
1498
1499         # this both takes care of the simple (gi) handlers and store
1500         # the handler code reference for other handlers
1501
764
2512
        $handlers->{handlers}->{string}->{$path}= $handler;
1502       }
1503
1504
751
2
100
870
201
    if( $cpath) { croak "unrecognized expression in handler: '$whole_path'"; }
1505
1506
749
2694
    return $prev_handler;
1507   }
1508
1509
1510 sub _set_special_handler
1511
770
770
828
  { my( $handlers, $path, $handler, $prev_handler)= @_;
1512
770
100
2708
    if( $path =~ m{^\s*($ALL|$DEFAULT|$COMMENT|$TEXT)\s*$}io )
1513
47
92
      { $handlers->{handlers}->{$1}= $handler;
1514
47
138
        return 1;
1515       }
1516     else
1517
723
1816
      { return 0; }
1518   }
1519
1520 sub _set_xpath_handler
1521
665
665
620
  { my( $handlers, $path, $handler, $prev_handler)= @_;
1522
665
100
733
    if( my $handler_data= _parse_xpath_handler( $path, $handler))
1523
659
935
      { _add_handler( $handlers, $handler_data, $path, $prev_handler);
1524
659
2149
        return 1;
1525       }
1526     else
1527
4
425
      { return 0; }
1528   }
1529
1530 sub _add_handler
1531
674
674
794
  { my( $handlers, $handler_data, $path, $prev_handler)= @_;
1532
1533
674
657
    my $tag= $handler_data->{tag};
1534
674
156
100
1189
281
    my @handlers= $handlers->{xpath_handler}->{$tag} ? @{$handlers->{xpath_handler}->{$tag}} : ();
1535
1536
674
64
176
100
813
84
311
    if( $prev_handler) { @handlers= grep { $_->{path} ne $path } @handlers; }
1537
1538
674
100
1201
    push @handlers, $handler_data if( $handler_data->{handler});
1539
1540
674
100
962
    if( @handlers > 1)
1541
138
686
100
100
100
100
100
100
100
75
50
50
100
100
100
283
6797
      { @handlers= sort { (($b->{score}->{type} || 0) <=> ($a->{score}->{type} || 0))
1542                           || (($b->{score}->{anchored} || 0) <=> ($a->{score}->{anchored} || 0))
1543                           || (($b->{score}->{steps} || 0) <=> ($a->{score}->{steps} || 0))
1544                           || (($b->{score}->{predicates} || 0) <=> ($a->{score}->{predicates} || 0))
1545                           || (($b->{score}->{tests} || 0) <=> ($a->{score}->{tests} || 0))
1546                           || ($a->{path} cmp $b->{path})
1547                         } @handlers;
1548       }
1549
1550
674
1359
    $handlers->{xpath_handler}->{$tag}= \@handlers;
1551   }
1552
1553 sub _set_pi_handler
1554
723
723
704
  { my( $handlers, $path, $handler, $prev_handler)= @_;
1555     # PI conditions ( '?target' => \&handler or '?' => \&handler
1556     # or '#PItarget' => \&handler or '#PI' => \&handler)
1557
723
100
3330
    if( $path=~ /^\s*(?:\?|$PI)\s*(?:([^\s]*)\s*)$/)
1558
43
100
116
      { my $target= $1 || '';
1559         # update the path_handlers count, knowing that
1560         # either the previous or the new handler can be undef
1561
43
84
        $handlers->{pi_handlers}->{$1}= $handler;
1562
43
131
        return 1;
1563       }
1564     else
1565
680
1960
      { return 0;
1566       }
1567   }
1568
1569 sub _set_level_handler
1570
680
680
619
  { my( $handlers, $path, $handler, $prev_handler)= @_;
1571
680
100
831
    if( $path =~ m{^ \s* level \s* \( \s* ([0-9]+) \s* \) \s* $}ox )
1572
7
12
      { my $level= $1;
1573
7
31
31
67
31
28
25
171
        my $sub= sub { my( $stack)= @_; return( ($stack->[-1]->{$ST_TAG} !~ m{^#}) && (scalar @$stack == $level + 1) ) };
1574
7
26
        my $handler_data= { tag=> '*', score => { type => $LEVEL_TRIGGER}, trigger => $sub,
1575                              path => $path, handler => $handler, test_on_text => 0
1576                            };
1577
7
12
        _add_handler( $handlers, $handler_data, $path, $prev_handler);
1578
7
19
        return 1;
1579       }
1580     else
1581
673
1692
      { return 0; }
1582   }
1583
1584 sub _set_regexp_handler
1585
673
673
637
  { my( $handlers, $path, $handler, $prev_handler)= @_;
1586     # if the expression was a regexp it is now a string (it was stringified when it became a hash key)
1587
673
100
758
    if( $path=~ m{^\(\?([\^xism]*)(?:-[\^xism]*)?:(.*)\)$})
1588
8
98
      { my $regexp= qr/(?$1:$2)/; # convert it back into a regexp
1589
8
36
36
36
23
23
161
        my $sub= sub { my( $stack)= @_; return( $stack->[-1]->{$ST_TAG} =~ $regexp ) };
1590
8
32
        my $handler_data= { tag=> '*', score => { type => $REGEXP_TRIGGER} , trigger => $sub,
1591                              path => $path, handler => $handler, test_on_text => 0
1592                            };
1593
8
11
        _add_handler( $handlers, $handler_data, $path, $prev_handler);
1594
8
21
        return 1;
1595       }
1596     else
1597
665
1572
      { return 0; }
1598   }
1599
1600 my $DEBUG_HANDLER= 0; # 0 or 1 (output the handler checking code) or 2 (super verbose)
1601 my $handler_string; # store the handler itself
1602
2
2
47
sub _set_debug_handler { $DEBUG_HANDLER= shift; }
1603
6
0
6
50
6
8
0
10
sub _warn_debug_handler { if( $DEBUG_HANDLER < 3) { warn @_; } else { $handler_string .= join( '', @_); } }
1604
1
1
1
1
5
1
2
sub _return_debug_handler { my $string= $handler_string; $handler_string=''; return $string; }
1605
1606 sub _parse_xpath_handler
1607
665
665
561
  { my( $xpath, $handler)= @_;
1608
665
517
    my $xpath_original= $xpath;
1609
1610
1611
665
1
100
856
2
    if( $DEBUG_HANDLER >=1) { _warn_debug_handler( "\n\nparsing path '$xpath'\n"); }
1612
1613
665
586
    my $path_to_check= $xpath;
1614
665
15189
    $path_to_check=~ s{/?/?$REG_TAG_PART?\s*(?:$REG_PREDICATE\s*)?}{}g;
1615
665
0
50
67
1240
0
    if( $DEBUG_HANDLER && $path_to_check=~ /\S/) { _warn_debug_handler( "left: $path_to_check\n"); }
1616
665
50
893
    return if( $path_to_check=~ /\S/);
1617
1618
665
2091
    (my $xpath_to_display= $xpath)=~ s{(["{}'\[\]\@\$])}{\\$1}g;
1619
1620
665
448
    my @xpath_steps;
1621
665
441
    my $last_token_is_sep;
1622
1623
665
19116
    while( $xpath=~ s{^\s*
1624                        ( (//?) # separator
1625                         | (?:$REG_TAG_PART\s*(?:$REG_PREDICATE\s*)?) # tag name and optional predicate
1626                         | (?:$REG_PREDICATE) # just a predicate
1627                        )
1628                      }
1629                      {}x
1630          )
1631       { # check that we have alternating separators and steps
1632
857
100
1689
        if( $2) # found a separator
1633
114
1
100
132
3
          { if( $last_token_is_sep) { return 0; } # 2 separators in a row
1634
113
85
            $last_token_is_sep= 1;
1635           }
1636         else
1637
743
2
100
100
1285
5
          { if( defined( $last_token_is_sep) && !$last_token_is_sep) { return 0; } # 2 steps in a row
1638
741
589
            $last_token_is_sep= 0;
1639           }
1640
1641
854
4522
        push @xpath_steps, $1;
1642       }
1643
662
1
100
817
2
    if( $last_token_is_sep) { return 0; } # expression cannot end with a separator
1644
1645
661
453
    my $i=-1;
1646
1647
661
67
1464
    my $perlfunc= _join_n( $NO_WARNINGS . ';',
1648                            q|my( $stack)= @_; |,
1649                            q|my @current_elts= (scalar @$stack); |,
1650                            q|my @new_current_elts; |,
1651                            q|my $elt; |,
1652                            ($DEBUG_HANDLER >= 1) && (qq#warn q{checking path '$xpath_to_display'\n};#),
1653                          );
1654
1655
1656
661
535
    my $last_tag='';
1657
661
100
1068
    my $anchored= $xpath_original=~ m{^\s*/(?!/)} ? 1 : 0;
1658
661
1236
    my $score={ type => $XPATH_TRIGGER, anchored => $anchored };
1659
661
821
    my $flag= { test_on_text => 0 };
1660
661
546
    my $sep='/'; # '/' or '//'
1661
661
1173
    while( my $xpath_step= pop @xpath_steps)
1662
738
8016
      { my( $tag, $predicate)= $xpath_step =~ m{^($REG_TAG_PART)?(?:\[(.*)\])?\s*$};
1663
738
1039
        $score->{steps}++;
1664
738
100
907
        $tag||='*';
1665
1666
738
100
893
        my $warn_empty_stack= $DEBUG_HANDLER >= 2 ? qq{warn "return with empty stack\\n";} : '';
1667
1668
738
100
865
        if( $predicate)
1669
222
1
100
285
2
          { if( $DEBUG_HANDLER >= 2) { _warn_debug_handler( "predicate is: '$predicate'\n"); }
1670             # changes $predicate (from an XPath expression to a Perl one)
1671
222
1
100
1039
125
            if( $predicate=~ m{^\s*$REG_NUMBER\s*$}) { croak "position selector [$predicate] not supported on twig_handlers"; }
1672
221
281
            _parse_predicate_in_handler( $predicate, $flag, $score);
1673
221
1
100
409
3
            if( $DEBUG_HANDLER >= 2) { _warn_debug_handler( "predicate becomes: '$predicate'\n"); }
1674           }
1675
1676
737
820
       my $tag_cond= _tag_cond( $tag);
1677
737
100
838
       my $cond= join( " && ", grep { $_ } $tag_cond, $predicate) || 1;
1678
1679
737
8
8
100
100
67
1084
19
12
       if( $css_sel && $tag=~ m{\.}) { $tag=~s{\.[^.]*$}{}; $tag ||='*'; }
1680
737
679
       $tag=~ s{(.)#.+$}{$1};
1681
1682
737
67
1377
       $last_tag ||= $tag;
1683
1684
737
100
50
842
       if( $sep eq '/')
1685          {
1686
736
840
           $perlfunc .= sprintf( _join_n( q#foreach my $current_elt (@current_elts) #,
1687                                            q# { next if( !$current_elt); #,
1688                                            q# $current_elt--; #,
1689                                            q# $elt= $stack->[$current_elt]; #,
1690                                            q# if( %s) { push @new_current_elts, $current_elt;} #,
1691                                            q# } #,
1692                                         ),
1693                                  $cond
1694                                );
1695          }
1696        elsif( $sep eq '//')
1697          {
1698
1
2
           $perlfunc .= sprintf( _join_n( q#foreach my $current_elt (@current_elts) #,
1699                                            q# { next if( !$current_elt); #,
1700                                            q# $current_elt--; #,
1701                                            q# my $candidate= $current_elt; #,
1702                                            q# while( $candidate >=0) #,
1703                                            q# { $elt= $stack->[$candidate]; #,
1704                                            q# if( %s) { push @new_current_elts, $candidate;} #,
1705                                            q# $candidate--; #,
1706                                            q# } #,
1707                                            q# } #,
1708                                         ),
1709                                  $cond
1710                                );
1711          }
1712
737
100
1042
       my $warn= $DEBUG_HANDLER >= 2 ? _join_n( qq#warn qq%fail at cond '$cond'%;#) : '';
1713
737
758
       $perlfunc .= sprintf( _join_n( q#unless( @new_current_elts) { %s return 0; } #,
1714                                       q#@current_elts= @new_current_elts; #,
1715                                       q#@new_current_elts=(); #,
1716                                     ),
1717                              $warn
1718                            );
1719
1720
737
1746
        $sep= pop @xpath_steps;
1721      }
1722
1723
660
100
787
    if( $anchored) # there should be a better way, but this works
1724       {
1725
31
50
44
       my $warn= $DEBUG_HANDLER >= 2 ? _join_n( qq#warn qq{fail, stack not empty};#) : '';
1726
31
31
       $perlfunc .= sprintf( _join_n( q#if( ! grep { $_ == 0 } @current_elts) { %s return 0;}#), $warn);
1727       }
1728
1729
660
100
743
    $perlfunc.= qq{warn "handler for '$xpath_to_display' triggered\\n";\n} if( $DEBUG_HANDLER >=2);
1730
660
725
    $perlfunc.= qq{return q{$xpath_original};\n};
1731
660
100
724
    _warn_debug_handler( "\nperlfunc:\n$perlfunc\n") if( $DEBUG_HANDLER>=1);
1732
660
18
18
18
16
16
16
21
21
21
19
19
19
16
16
16
14
14
14
16
16
16
17
15
15
13
13
13
12
12
12
5
5
5
6
6
6
7
7
7
7
7
7
5
5
5
5
5
5
4
4
4
4
4
4
7
7
7
3
3
3
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
1
1
1
18
16
21
19
16
14
16
17
13
12
5
6
7
7
5
5
5
5
7
3
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
4
4
4
4
4
4
4
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
34049
61
18
1780
48
18
1510
65
22
1976
58
19
1802
52
15
1741
48
15
1540
51
15
1545
346
1863
1611
88
55
1507
35
51
1343
16
47
456
17
76
542
25
6
775
26
8
764
17
5
535
15
5
467
12
6
476
13
6
495
22
9
515
10
4
383
15
3
361
10
3
330
9
3
322
8
3
442
10
2
322
11
4
350
10
3
419
8
3
404
9
4
324
10
3
436
10
2
337
7
3
272
7
2
277
9
2
261
11
2
382
8
2
293
9
2
323
9
3
363
9
3
369
8
2
236
13
4
341
12
4
322
11
4
288
12
4
314
12
5
330
13
5
263
12
5
239
4
1
155
3
1
106
3
1
161
4
1
120
4
1
108
3
2
107
3
1
113
3
1
117
4
1
116
3
2
113
3
1
111
3
1
116
3
5
115
3
1
122
3
1
124
3
1
119
3
2
118
3
1
110
3
1
109
4
1
128
4
1
122
4
1
102
3
2
124
3
1
130
3
1
125
3
1
127
4
1
126
3
1
125
4
1
179
3
1
109
3
1
117
3
1
102
3
2
133
3
1
124
3
1
173
3
1
107
4
1
134
8
2
121
7
2
77
8
2
104
8
2
161
6
2
139
3
1
105
    my $s= eval "sub { $perlfunc }";
1733
660
100
1302
      if( $@)
1734
1
134
        { croak "wrong handler condition '$xpath' ($@);" }
1735
1736
659
100
948
      _warn_debug_handler( "last tag: '$last_tag', test_on_text: '$flag->{test_on_text}'\n") if( $DEBUG_HANDLER >=1);
1737
659
4
100
815
8
      _warn_debug_handler( "score: ", join( ' ', map { "$_: $score->{$_}" } sort keys %$score), "\n") if( $DEBUG_HANDLER >=1);
1738
659
4392
      return { tag=> $last_tag, score => $score, trigger => $s, path => $xpath_original, handler => $handler, test_on_text => $flag->{test_on_text} };
1739     }
1740
1741
2167
7018
sub _join_n { return join( "\n", @_, ''); }
1742
1743 # the "tag" part can be <tag>, <tag>.<class> or <tag>#<id> (where tag can be *, or start with # for hidden tags)
1744 sub _tag_cond
1745
737
737
667
  { my( $full_tag)= @_;
1746
1747
737
418
    my( $tag, $class, $id);
1748
737
100
894
    if( $full_tag=~ m{^(.+)#(.+)$})
1749
4
8
      { ($tag, $id)= ($1, $2); } # <tag>#<id>
1750     else
1751
733
100
1351
      { ( $tag, $class)= $css_sel ? $full_tag=~ m{^(.*?)(?:\.([^.]*))?$} : ($full_tag, undef); }
1752
1753
737
100
100
2405
    my $tag_cond = $tag && $tag ne '*' ? qq#(\$elt->{'$ST_TAG'} eq "$tag")# : '';
1754
737
100
772
    my $id_cond = defined $id ? qq#(\$elt->{id} eq "$id")# : '';
1755
737
100
721
    my $class_cond = defined $class ? qq#(\$elt->{class}=~ m{(^| )$class( |\$)})# : '';
1756
1757
737
2211
713
2540
    my $full_cond= join( ' && ', grep { $_ } ( $tag_cond, $class_cond, $id_cond));
1758
1759
737
1153
    return $full_cond;
1760   }
1761
1762 # input: the predicate ($_[0]) which will be changed in place
1763 # flags, a hashref with various flags (like test_on_text)
1764 # the score
1765 sub _parse_predicate_in_handler
1766
221
221
332
  { my( $flag, $score)= @_[1..2];
1767
221
585
5291
1991
    $_[0]=~ s{( ($REG_STRING) # strings
1768                  |\@($REG_TAG_NAME)(?=\s*(?:[><=!]|!~|=~)) # @att (followed by a comparison operator)
1769                  |\@($REG_TAG_NAME) # @att (not followed by a comparison operator)
1770
585
527
                 |=~|!~ # matching operators
1771                  |([><]=?|=|!=)(?=\s*[\d+-]) # test before a number
1772                  |([><]=?|=|!=) # test, other cases
1773
585
44
100
80
2895
55
                 |($REG_FUNCTION) # no arg functions
1774                  # this bit is a mess, but it is the only solution with this half-baked parser
1775
585
139
100
100
100
100
100
100
100
100
100
50
100
33
100
67
67
67
1833
346
                 |(string\(\s*$REG_NAME\s*\)\s*$REG_MATCH\s*$REG_REGEXP) # string( child)=~ /regexp/
1776
1
4
                 |(string\(\s*$REG_NAME\s*\)\s*$REG_COMP\s*$REG_STRING) # string( child) = "value" (or other test)
1777
161
100
709
                 |(string\(\s*$REG_NAME\s*\)\s*$REG_COMP\s*$REG_NUMBER) # string( child) = nb (or other test)
1778                  |(and|or)
1779                 # |($REG_NAME(?=\s*(and|or|$))) # nested tag name (needs to be after all other unquoted strings)
1780                  |($REG_TAG_IN_PREDICATE) # nested tag name (needs to be after all other unquoted strings)
1781
1782               )}
1783              { my( $token, $str, $att, $bare_att, $num_test, $alpha_test, $func, $str_regexp, $str_test_alpha, $str_test_num, $and_or, $tag)
1784
2
10
               = ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
1785
1786                $score->{predicates}++;
1787
1788                # store tests on text (they are not always allowed)
1789
2
11
               if( $func || $str_regexp || $str_test_num || $str_test_alpha ) { $flag->{test_on_text}= 1; }
1790
1791
2
5
               if( defined $str) { $token }
1792
2
2
               elsif( $tag) { qq{(\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->has_child( '$tag'))} }
1793
2
4
               elsif( $att) { $att=~ m{^#} ? qq{ (\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->{att}->{'$att'})}
1794
2
2
                                                     : qq{\$elt->{'$att'}}
1795
2
10
                                      }
1796                                         # for some reason Devel::Cover flags the following lines as not tested. They are though.
1797
0
0
0
               elsif( $bare_att) { $bare_att=~ m{^#} ? qq{(\$elt->{'$ST_ELT'} && defined(\$elt->{'$ST_ELT'}->{att}->{'$bare_att'}))}
1798
0
0
                                                          : qq{defined( \$elt->{'$bare_att'})}
1799                                       }
1800
12
12
100
14
54
               elsif( $num_test && ($num_test eq '=') ) { "==" } # others tests are unchanged
1801
60
911
               elsif( $alpha_test) { $PERL_ALPHA_TEST{$alpha_test} }
1802                elsif( $func && $func=~ m{^string})
1803                                       { "\$elt->{'$ST_ELT'}->text"; }
1804                elsif( $str_regexp && $str_regexp =~ m{string\(\s*($REG_TAG_NAME)\s*\)\s*($REG_MATCH)\s*($REG_REGEXP)})
1805                                       { "defined( _first_n { \$_->text $2 $3 } 1, \$elt->{'$ST_ELT'}->_children( '$1'))"; }
1806                elsif( $str_test_alpha && $str_test_alpha =~ m{string\(\s*($REG_TAG_NAME)\s*\)\s*($REG_COMP)\s*($REG_STRING)})
1807                                       { my( $tag, $op, $str)= ($1, $2, $3);
1808                                         $str=~ s{(?<=.)'(?=.)}{\\'}g; # escape a quote within the string
1809                                         $str=~ s{^"}{'};
1810                                         $str=~ s{"$}{'};
1811                                         "defined( _first_n { \$_->text $PERL_ALPHA_TEST{$op} $str } 1, \$elt->{'$ST_ELT'}->children( '$tag'))"; }
1812                elsif( $str_test_num && $str_test_num =~ m{string\(\s*($REG_TAG_NAME)\s*\)\s*($REG_COMP)\s*($REG_NUMBER)})
1813                                       { my $test= ($2 eq '=') ? '==' : $2;
1814                                         "defined( _first_n { \$_->text $test $3 } 1, \$elt->{'$ST_ELT'}->children( '$1'))";
1815                                       }
1816                elsif( $and_or) { $score->{tests}++; $and_or eq 'and' ? '&&' : '||' ; }
1817                else { $token; }
1818              }gexs;
1819   }
1820
1821
1822 sub setCharHandler
1823
1
1
1
1
  { my( $t, $handler)= @_;
1824
1
2
    $t->{twig_char_handler}= $handler;
1825   }
1826
1827
1828 sub _reset_handlers
1829
423
423
681
  { my $handlers= shift;
1830
423
508
    delete $handlers->{handlers};
1831
423
329
    delete $handlers->{path_handlers};
1832
423
351
    delete $handlers->{subpath_handlers};
1833
423
50
595
    $handlers->{attcond_handlers_exp}=[] if( $handlers->{attcond_handlers});
1834
423
539
    delete $handlers->{attcond_handlers};
1835   }
1836
1837 sub _set_handlers
1838
423
100
423
651
  { my $handlers= shift || return;
1839
422
394
    my $set_handlers= {};
1840
422
422
363
892
    foreach my $path (keys %{$handlers})
1841
613
926
      { _set_handler( $set_handlers, $path, $handlers->{$path}); }
1842
1843
415
2077
    return $set_handlers;
1844   }
1845
1846
1847 sub setTwigHandler
1848
64
64
1
2918
  { my( $t, $path, $handler)= @_;
1849
64
100
251
    $t->{twig_handlers} ||={};
1850
64
112
    return _set_handler( $t->{twig_handlers}, $path, $handler);
1851   }
1852
1853 sub setTwigHandlers
1854
263
263
1
507
  { my( $t, $handlers)= @_;
1855
263
100
660
    my $previous_handlers= $t->{twig_handlers} || undef;
1856
263
668
    _reset_handlers( $t->{twig_handlers});
1857
263
433
    $t->{twig_handlers}= _set_handlers( $handlers);
1858
256
471
    return $previous_handlers;
1859   }
1860
1861 sub setStartTagHandler
1862
3
3
1
8
  { my( $t, $path, $handler)= @_;
1863
3
50
5
    $t->{twig_starttag_handlers}||={};
1864
3
4
    return _set_handler( $t->{twig_starttag_handlers}, $path, $handler);
1865   }
1866
1867 sub setStartTagHandlers
1868
34
34
1
293
  { my( $t, $handlers)= @_;
1869
34
100
101
    my $previous_handlers= $t->{twig_starttag_handlers} || undef;
1870
34
77
    _reset_handlers( $t->{twig_starttag_handlers});
1871
34
56
    $t->{twig_starttag_handlers}= _set_handlers( $handlers);
1872
34
60
    return $previous_handlers;
1873    }
1874
1875 sub setIgnoreEltsHandler
1876
2
2
1
499
  { my( $t, $path, $action)= @_;
1877
2
100
12
    $t->{twig_ignore_elts_handlers}||={};
1878
2
5
    return _set_handler( $t->{twig_ignore_elts_handlers}, $path, $action );
1879   }
1880
1881 sub setIgnoreEltsHandlers
1882
11
11
1
14
  { my( $t, $handlers)= @_;
1883
11
9
    my $previous_handlers= $t->{twig_ignore_elts_handlers};
1884
11
35
    _reset_handlers( $t->{twig_ignore_elts_handlers});
1885
11
20
    $t->{twig_ignore_elts_handlers}= _set_handlers( $handlers);
1886
11
12
    return $previous_handlers;
1887    }
1888
1889 sub setEndTagHandler
1890
2
2
1
48
  { my( $t, $path, $handler)= @_;
1891
2
100
10
    $t->{twig_endtag_handlers}||={};
1892
2
5
    return _set_handler( $t->{twig_endtag_handlers}, $path,$handler);
1893   }
1894
1895 sub setEndTagHandlers
1896
14
14
1
12
  { my( $t, $handlers)= @_;
1897
14
16
    my $previous_handlers= $t->{twig_endtag_handlers};
1898
14
30
    _reset_handlers( $t->{twig_endtag_handlers});
1899
14
21
    $t->{twig_endtag_handlers}= _set_handlers( $handlers);
1900
14
13
    return $previous_handlers;
1901    }
1902
1903 # a little more complex: set the twig_handlers only if a code ref is given
1904 sub setTwigRoots
1905
101
101
1
96
  { my( $t, $handlers)= @_;
1906
101
100
    my $previous_roots= $t->{twig_roots};
1907
101
231
    _reset_handlers($t->{twig_roots});
1908
101
151
    $t->{twig_roots}= _set_handlers( $handlers);
1909
1910
101
178
    _check_illegal_twig_roots_handlers( $t->{twig_roots});
1911
1912
99
99
71
145
    foreach my $path (keys %{$handlers})
1913
98
100
315
      { $t->{twig_handlers}||= {};
1914
98
100
67
450
        _set_handler( $t->{twig_handlers}, $path, $handlers->{$path})
1915           if( ref($handlers->{$path}) && isa( $handlers->{$path}, 'CODE'));
1916       }
1917
99
115
    return $previous_roots;
1918   }
1919
1920 sub _check_illegal_twig_roots_handlers
1921
101
101
91
  { my( $handlers)= @_;
1922
101
101
84
229
    foreach my $tag_handlers (values %{$handlers->{xpath_handler}})
1923
82
101
      { foreach my $handler_data (@$tag_handlers)
1924
82
100
231
          { if( my $type= $handler_data->{test_on_text})
1925
2
204
              { croak "string() condition not supported on twig_roots option"; }
1926           }
1927       }
1928
99
91
    return;
1929   }
1930
1931
1932 # just store the reference to the expat object in the twig
1933 sub _twig_init
1934    { # warn " in _twig_init...\n"; # DEBUG handler
1935
1936
3102
423328
    my $p= shift;
1937
3102
3357
    my $t=$p->{twig};
1938
1939
3102
1
100
4244
104
    if( $t->{twig_parsing} ) { croak "cannot reuse a twig that is already parsing"; }
1940
3101
2725
    $t->{twig_parsing}=1;
1941
1942
3101
3048
    $t->{twig_parser}= $p;
1943
3101
3078
100
3498
5174
    if( $weakrefs) { weaken( $t->{twig_parser}); }
1944
1945     # in case they had been created by a previous parse
1946
3101
2504
    delete $t->{twig_dtd};
1947
3101
2339
    delete $t->{twig_doctype};
1948
3101
2059
    delete $t->{twig_xmldecl};
1949
3101
2539
    delete $t->{twig_root};
1950
1951     # if needed set the output filehandle
1952
3101
4135
    $t->_set_fh_to_twig_output_fh();
1953
3101
3708
    return;
1954   }
1955
1956 # uses eval to catch the parser's death
1957 sub safe_parse
1958
13
13
1
92
  { my $t= shift;
1959
13
13
84
138
    eval { $t->parse( @_); } ;
1960
13
100
77
    return $@ ? $t->_reset_twig_after_error : $t;
1961   }
1962
1963 sub safe_parsefile
1964
25
25
1
33
  { my $t= shift;
1965
25
25
27
55
    eval { $t->parsefile( @_); } ;
1966
25
100
73
    return $@ ? $t->_reset_twig_after_error : $t;
1967   }
1968
1969 # restore a twig in a proper state so it can be reused for a new parse
1970 sub _reset_twig
1971
10
10
7
  { my $t= shift;
1972
10
12
    $t->{twig_parsing}= 0;
1973
10
16
    delete $t->{twig_current};
1974
10
13
    delete $t->{extra_data};
1975
10
12
    delete $t->{twig_dtd};
1976
10
9
    delete $t->{twig_in_pcdata};
1977
10
10
    delete $t->{twig_in_cdata};
1978
10
14
    delete $t->{twig_stored_space};
1979
10
31
    delete $t->{twig_entity_list};
1980
10
100
41
    $t->root->delete if( $t->root);
1981
10
28
    delete $t->{twig_root};
1982
10
11
    return $t;
1983   }
1984
1985 sub _reset_twig_after_error
1986
10
10
12
  { my $t= shift;
1987
10
24
    $t->_reset_twig;
1988
10
18
    return undef;
1989   }
1990
1991
1992 sub _add_or_discard_stored_spaces
1993
87121
60829
  { my $t= shift;
1994
1995
87121
62892
    $t->{twig_right_after_root}=0; #XX
1996
1997
87121
100
106393
    my $current= $t->{twig_current} or return; # ugly hack, with ignore on, twig_current can disappear
1998
84026
100
116208
    return unless length $t->{twig_stored_spaces};
1999
8375
8362
    my $current_gi= $XML::Twig::index2gi[$current->{'gi'}];
2000
2001
8375
100
10365
    if( ! $t->{twig_discard_all_spaces})
2002
8374
100
13634
      { if( ! defined( $t->{twig_space_policy}->{$current_gi}))
2003
594
828
          { $t->{twig_space_policy}->{$current_gi}= _space_policy( $t, $current_gi); }
2004
8374
100
100
40966
        if( $t->{twig_space_policy}->{$current_gi} || ($t->{twig_stored_spaces}!~ m{\n}) || $t->{twig_preserve_space})
2005
113
131
          { _insert_pcdata( $t, $t->{twig_stored_spaces} ); }
2006       }
2007
2008
8375
7236
    $t->{twig_stored_spaces}='';
2009
2010
8375
6714
    return;
2011   }
2012
2013 # the default twig handlers, which build the tree
2014 sub _twig_start
2015    { # warn " in _twig_start...\n"; # DEBUG handler
2016
2017     #foreach my $s (@_) { next if ref $s; warn "$s: ", is_utf8( $s) ? "has flag" : "FLAG NOT SET"; } # YYY
2018
2019
43600
1688675
    my ($p, $gi, @att)= @_;
2020
43600
33292
    my $t=$p->{twig};
2021
2022     # empty the stored pcdata (space stored in case they are really part of
2023     # a pcdata element) or stored it if the space policy dictates so
2024     # create a pcdata element with the spaces if need be
2025
43600
41682
    _add_or_discard_stored_spaces( $t);
2026
43600
27768
    my $parent= $t->{twig_current};
2027
2028     # if we were parsing PCDATA then we exit the pcdata
2029
43600
100
46316
    if( $t->{twig_in_pcdata})
2030
18139
11352
      { $t->{twig_in_pcdata}= 0;
2031
18139
14295
        delete $parent->{'twig_current'};
2032
18139
15077
        $parent= $parent->{parent};
2033       }
2034
2035     # if we choose to keep the encoding then we need to parse the tag
2036
43600
100
50
67428
    if( my $func = $t->{parse_start_tag})
2037
1654
2668
      { ($gi, @att)= &$func($p->original_string); }
2038     elsif( $t->{twig_entities_in_attribute})
2039       {
2040
0
0
       ($gi,@att)= _parse_start_tag( $p->recognized_string);
2041
0
0
         $t->{twig_entities_in_attribute}=0;
2042       }
2043
2044     # if we are using an external DTD, we need to fill the default attributes
2045
43600
49
100
51037
59
    if( $t->{twig_read_external_dtd}) { _fill_default_atts( $t, $gi, \@att); }
2046
2047     # filter the input data if need be
2048
43600
100
48990
    if( my $filter= $t->{twig_input_filter})
2049
5
10
      { $gi= $filter->( $gi);
2050
5
4
18
8
        foreach my $att (@att) { $att= $filter->($att); }
2051       }
2052
2053
43600
22410
    my $ns_decl;
2054
43600
100
42909
    if( $t->{twig_map_xmlns})
2055
69
83
      { $ns_decl= _replace_ns( $t, \$gi, \@att); }
2056
2057
43600
63829
    my $elt= $t->{twig_elt_class}->new( $gi);
2058
43600
54639
    $elt->set_atts( @att);
2059
2060     # now we can store the tag and atts
2061
43600
80260
    my $context= { $ST_TAG => $gi, $ST_ELT => $elt, @att};
2062
43600
100
47374
    $context->{$ST_NS}= $ns_decl if $ns_decl;
2063
43600
25523
100
43401
36742
    if( $weakrefs) { weaken( $context->{$ST_ELT}); }
2064
43600
43600
26442
49970
    push @{$t->{_twig_context_stack}}, $context;
2065
2066
43600
100
61504
    delete $parent->{'twig_current'} if( $parent);
2067
43600
30548
    $t->{twig_current}= $elt;
2068
43600
33434
    $elt->{'twig_current'}=1;
2069
2070
43600
100
35955
    if( $parent)
2071
40505
29693
      { my $prev_sibling= $parent->{last_child};
2072
40505
100
40097
        if( $prev_sibling)
2073
37932
32161
          { $prev_sibling->{next_sibling}= $elt;
2074
37932
37932
19924
100
29610
39780
24006
            $elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ;
2075           }
2076
2077
40505
40505
22451
100
33195
37737
21625
        $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ;
2078
40505
2573
100
48028
2271
        unless( $parent->{first_child}) { $parent->{first_child}= $elt; }
2079
40505
40505
40505
22451
100
27549
26892
42096
25789
         $parent->{empty}=0; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ;
2080       }
2081     else
2082       { # processing root
2083
3095
3881
        $t->set_root( $elt);
2084         # call dtd handler if need be
2085
3095
50
3918
        $t->{twig_dtd_handler}->($t, $t->{twig_dtd})
2086           if( defined $t->{twig_dtd_handler});
2087
2088         # set this so we can catch external entities
2089         # (the handler was modified during DTD processing)
2090
3095
100
100
4352
        if( $t->{twig_default_print})
2091
62
167
          { $p->setHandlers( Default => \&_twig_print); }
2092         elsif( $t->{twig_roots})
2093
42
127
127
159
221
          { $p->setHandlers( Default => sub { return }); }
2094         else
2095
2991
6364
          { $p->setHandlers( Default => \&_twig_default); }
2096       }
2097
2098
43600
100
113512
    $elt->{empty}= $p->recognized_string=~ m{/\s*>$}s ? 1 : 0;
2099
2100
43600
100
252830
    $elt->{extra_data}= $t->{extra_data} if( $t->{extra_data});
2101
43600
32700
    $t->{extra_data}='';
2102
2103     # if the element is ID-ed then store that info
2104
43600
36540
    my $id= $elt->{'att'}->{$ID};
2105
43600
100
47062
    if( defined $id)
2106
18684
21739
      { $t->{twig_id_list}->{$id}= $elt;
2107
18684
9681
100
19634
11298
        if( $weakrefs) { weaken( $t->{twig_id_list}->{$id}); }
2108       }
2109
2110     # call user handler if need be
2111
43600
100
50639
    if( $t->{twig_starttag_handlers})
2112       { # call all appropriate handlers
2113
156
188
        my @handlers= _handler( $t, $t->{twig_starttag_handlers}, $gi);
2114
2115
156
117
        local $_= $elt;
2116
2117
156
143
        foreach my $handler ( @handlers)
2118
66
100
107
          { $handler->($t, $elt) || last; }
2119         # call _all_ handler if needed
2120
156
100
340
        if( my $all= $t->{twig_starttag_handlers}->{handlers}->{$ALL})
2121
3
6
          { $all->($t, $elt); }
2122       }
2123
2124     # check if the tag is in the list of tags to be ignored
2125
43600
100
47518
    if( $t->{twig_ignore_elts_handlers})
2126
81
105
      { my @handlers= _handler( $t, $t->{twig_ignore_elts_handlers}, $gi);
2127         # only the first handler counts, it contains the action (discard/print/string)
2128
81
24
24
100
98
26
37
        if( @handlers) { my $action= shift @handlers; $t->ignore( $elt, $action); }
2129       }
2130
2131
43600
2
100
100
59475
4
    if( $elt->{'att'}->{'xml:space'} && ( $elt->{'att'}->{'xml:space'} eq 'preserve')) { $t->{twig_preserve_space}++; }
2132
2133
2134
43600
103148
    return;
2135   }
2136
2137 sub _replace_ns
2138
83
83
72
  { my( $t, $gi, $atts)= @_;
2139
83
39
    my $decls;
2140
83
110
    foreach my $new_prefix ( $t->parser->new_ns_prefixes)
2141
37
116
      { my $uri= $t->parser->expand_ns_prefix( $new_prefix);
2142         # replace the prefix if it is mapped
2143
37
189
        $decls->{$new_prefix}= $uri;
2144
37
100
100
105
        if( !$t->{twig_keep_original_prefix} && (my $mapped_prefix= $t->{twig_map_xmlns}->{$uri}))
2145
25
20
          { $new_prefix= $mapped_prefix; }
2146         # now put the namespace declaration back in the element
2147
37
100
40
        if( $new_prefix eq '#default')
2148
3
21
          { push @$atts, "xmlns" => $uri; }
2149         else
2150
34
65
          { push @$atts, "xmlns:$new_prefix" => $uri; }
2151       }
2152
2153
83
100
352
    if( $t->{twig_keep_original_prefix})
2154       { # things become more complex: we need to find the original prefix
2155         # and store both prefixes
2156
13
18
        my $ns_info= $t->_ns_info( $$gi);
2157
13
9
        my $map_att;
2158
13
100
19
        if( $ns_info->{mapped_prefix})
2159
11
15
          { $$gi= "$ns_info->{mapped_prefix}:$$gi";
2160
11
19
            $map_att->{$ns_info->{mapped_prefix}}= $ns_info->{prefix};
2161           }
2162
13
8
        my $att_name=1;
2163
13
14
        foreach( @$atts)
2164
30
100
29
          { if( $att_name)
2165               {
2166
15
14
                my $ns_info= $t->_ns_info( $_);
2167
15
100
18
                if( $ns_info->{mapped_prefix})
2168
4
6
                  { $_= "$ns_info->{mapped_prefix}:$_";
2169
4
6
                    $map_att->{$ns_info->{mapped_prefix}}= $ns_info->{prefix};
2170                   }
2171
15
17
                $att_name=0;
2172               }
2173             else
2174
15
12
              { $att_name=1; }
2175           }
2176
13
100
28
        push @$atts, '#original_gi', $map_att if( $map_att);
2177       }
2178     else
2179
70
80
      { $$gi= $t->_replace_prefix( $$gi);
2180
70
52
        my $att_name=1;
2181
70
71
        foreach( @$atts)
2182
182
91
91
100
152
84
73
          { if( $att_name) { $_= $t->_replace_prefix( $_); $att_name=0; }
2183
91
75
            else { $att_name=1; }
2184           }
2185       }
2186
83
85
    return $decls;
2187   }
2188
2189
2190 # extract prefix, local_name, uri, mapped_prefix from a name
2191 # will only work if called from a start or end tag handler
2192 sub _ns_info
2193
28
28
21
  { my( $t, $name)= @_;
2194
28
23
    my $ns_info={};
2195
28
23
    my $p= $t->parser;
2196
28
43
    $ns_info->{uri}= $p->namespace( $name);
2197
28
100
150
    return $ns_info unless( $ns_info->{uri});
2198
2199
15
16
    $ns_info->{prefix}= _a_proper_ns_prefix( $p, $ns_info->{uri});
2200
15
67
37
    $ns_info->{mapped_prefix}= $t->{twig_map_xmlns}->{$ns_info->{uri}} || $ns_info->{prefix};
2201
2202
15
11
    return $ns_info;
2203   }
2204
2205 sub _a_proper_ns_prefix
2206
22
22
20
  { my( $p, $uri)= @_;
2207
22
32
    foreach my $prefix ($p->current_ns_prefixes)
2208
38
100
280
      { if( $p->expand_ns_prefix( $prefix) eq $uri)
2209
22
112
          { return $prefix; }
2210       }
2211
0
0
    return;
2212   }
2213
2214 # returns the uri bound to a prefix in the original document
2215 # only works in a handler
2216 # can be used to deal with xsi:type attributes
2217 sub original_uri
2218
2
2
1
16
  { my( $t, $prefix)= @_;
2219
2
2
    my $ST_NS = '##ns' ;
2220
2
6
2
100
1
12
3
    foreach my $ns (map { $_->{$ST_NS} if $_->{$ST_NS} } reverse @{$t->{_twig_context_stack}})
2221
6
100
17
      { return $ns->{$prefix} || next; }
2222
0
0
    return;
2223   }
2224
2225
2226 sub _fill_default_atts
2227
49
49
46
  { my( $t, $gi, $atts)= @_;
2228
49
37
    my $dtd= $t->{twig_dtd};
2229
49
51
    my $attlist= $dtd->{att}->{$gi};
2230
49
68
    my %value= @$atts;
2231
49
92
    foreach my $att (keys %$attlist)
2232
66
100
75
170
      { if( !exists( $value{$att})
2233             && exists( $attlist->{$att}->{default})
2234             && ( $attlist->{$att}->{default} ne '#IMPLIED')
2235           )
2236           { # the quotes are included in the default, so we need to remove them
2237
13
22
            my $default_value= substr( $attlist->{$att}->{default}, 1, -1);
2238
13
16
            push @$atts, $att, $default_value;
2239           }
2240       }
2241
49
73
    return;
2242   }
2243
2244
2245 # the default function to parse a start tag (in keep_encoding mode)
2246 # can be overridden with the parse_start_tag method
2247 # only works for 1-byte character sets
2248 sub _parse_start_tag
2249
1682
5826
  { my $string= shift;
2250
1682
921
    my( $gi, @atts);
2251
2252     # get the gi (between < and the first space, / or > character)
2253     #if( $string=~ s{^<\s*([^\s>/]*)[\s>/]*}{}s)
2254
1682
100
14189
    if( $string=~ s{^<\s*($REG_TAG_NAME)\s*[\s>/]}{}s)
2255
1664
2665
      { $gi= $1; }
2256     else
2257
18
1223
      { croak "error parsing tag '$string'"; }
2258
1664
2455
    while( $string=~ s{^([^\s=]*)\s*=\s*(["'])(.*?)\2\s*}{}s)
2259
200
531
      { push @atts, $1, $3; }
2260
1664
3767
    return $gi, @atts;
2261   }
2262
2263 sub set_root
2264
3137
1
2615
  { my( $t, $elt)= @_;
2265
3137
2923
    $t->{twig_root}= $elt;
2266
3137
50
3658
    if( $elt)
2267
3137
2708
      { $elt->{twig}= $t;
2268
3137
3111
100
3404
3973
        if( $weakrefs) { weaken( $elt->{twig}); }
2269       }
2270
3137
2329
    return $t;
2271   }
2272
2273 sub _twig_end
2274    { # warn " in _twig_end...\n"; # DEBUG handler
2275
43486
33715
    my ($p, $gi) = @_;
2276
2277
43486
31401
    my $t=$p->{twig};
2278
2279
43486
50
67
80840
    if( $t->{twig_in_pcdata} && (my $text_handler= $t->{TwigHandlers}->{$TEXT}) )
2280
0
0
0
0
0
      { local $_= $t->{twig_current}; $text_handler->( $t, $_) if $_;
2281       }
2282
2283
43486
69
100
44753
83
    if( $t->{twig_map_xmlns}) { $gi= $t->_replace_prefix( $gi); }
2284
2285
43486
34325
    _add_or_discard_stored_spaces( $t);
2286
2287     # the new twig_current is the parent
2288
43486
27268
    my $elt= $t->{twig_current};
2289
43486
34855
    delete $elt->{'twig_current'};
2290
2291     # if we were parsing PCDATA then we exit the pcdata too
2292
43486
100
45867
    if( $t->{twig_in_pcdata})
2293       {
2294
20602
12304
        $t->{twig_in_pcdata}= 0;
2295
20602
50
28992
        $elt= $elt->{parent} if($elt->{parent});
2296
20602
14882
        delete $elt->{'twig_current'};
2297       }
2298
2299     # parent is the new current element
2300
43486
27200
    my $parent= $elt->{parent};
2301
43486
30450
    $t->{twig_current}= $parent;
2302
2303
43486
100
41196
    if( $parent)
2304
40415
29684
      { $parent->{'twig_current'}=1;
2305         # twig_to_be_normalized
2306
40415
5
5
100
44164
9
6
        if( $parent->{twig_to_be_normalized}) { $parent->normalize; $parent->{twig_to_be_normalized}=0; }
2307       }
2308
2309
43486
100
47085
    if( $t->{extra_data})
2310
324
454
      { $elt->_set_extra_data_before_end_tag( $t->{extra_data});
2311
324
242
        $t->{extra_data}='';
2312       }
2313
2314
43486
100
44700
    if( $t->{twig_handlers})
2315       { # look for handlers
2316
3773
5123
        my @handlers= _handler( $t, $t->{twig_handlers}, $gi);
2317
2318
3773
100
4091
        if( $t->{twig_tdh})
2319
48
29
29
100
44
17
54
          { if( @handlers) { push @{$t->{twig_handlers_to_trigger}}, [ $elt, \@handlers ]; }
2320
48
100
71
            if( my $all= $t->{twig_handlers}->{handlers}->{$ALL})
2321
12
12
9
20
              { push @{$t->{twig_handlers_to_trigger}}, [ $elt, [$all] ]; }
2322           }
2323         else
2324           {
2325
3725
2691
            local $_= $elt; # so we can use $_ in the handlers
2326
2327
3725
3301
            foreach my $handler ( @handlers)
2328
711
100
1867
              { $handler->($t, $elt) || last; }
2329             # call _all_ handler if needed
2330
3713
4950
            my $all= $t->{twig_handlers}->{handlers}->{$ALL};
2331
3713
100
3720
            if( $all)
2332
283
394
              { $all->($t, $elt); }
2333
3713
883
100
100
8363
1204
            if( @handlers || $all) { $t->{twig_right_after_root}=0; }
2334           }
2335       }
2336
2337     # if twig_roots is set for the element then set appropriate handler
2338
43474
100
100
53155
    if( $t->{twig_root_depth} and ($p->depth == $t->{twig_root_depth}) )
2339
142
100
652
      { if( $t->{twig_default_print})
2340           { # select the proper fh (and store the currently selected one)
2341
79
99
            $t->_set_fh_to_twig_output_fh();
2342
79
0
50
110
0
            if( !$p->depth==1) { $t->{twig_right_after_root}=1; } #XX
2343
79
100
282
            if( $t->{twig_keep_encoding})
2344
60
194
              { $p->setHandlers( %twig_handlers_roots_print_original); }
2345             else
2346
19
64
              { $p->setHandlers( %twig_handlers_roots_print); }
2347           }
2348         else
2349
63
210
          { $p->setHandlers( %twig_handlers_roots); }
2350       }
2351
2352
43474
2
100
100
63319
2
    if( $elt->{'att'}->{'xml:space'} && ( $elt->{'att'}->{'xml:space'} eq 'preserve')) { $t->{twig_preserve_space}--; }
2353
2354
43474
43474
23024
35381
    pop @{$t->{_twig_context_stack}};
2355
43474
101240
    return;
2356   }
2357
2358 sub _trigger_tdh
2359
4
4
4
  { my( $t)= @_;
2360
2361
4
4
50
0
10
    if( @{$t->{twig_handlers_to_trigger}})
2362
4
78
4
1
78
9
      { my @handlers_to_trigger_now= sort { $a->[0]->cmp( $b->[0]) } @{$t->{twig_handlers_to_trigger}};
2363
4
5
        foreach my $elt_handlers (@handlers_to_trigger_now)
2364
41
25
          { my( $handled_elt, $handlers_to_trigger)= @$elt_handlers;
2365
41
26
            foreach my $handler ( @$handlers_to_trigger)
2366
41
41
50
24
38
              { local $_= $handled_elt; $handler->($t, $handled_elt) || last; }
2367           }
2368       }
2369
4
4
    return;
2370   }
2371
2372 # return the list of handler that can be activated for an element
2373 # (either of CODE ref's or 1's for twig_roots)
2374
2375 sub _handler
2376
4645
3760
  { my( $t, $handlers, $gi)= @_;
2377
2378
4645
3275
    my @found_handlers=();
2379
4645
2444
    my $found_handler;
2380
2381
4645
1140
9290
8330
1769
10486
    foreach my $handler ( map { @$_ } grep { $_ } $handlers->{xpath_handler}->{$gi}, $handlers->{xpath_handler}->{'*'})
2382
1493
1094
      { my $trigger= $handler->{trigger};
2383
1493
100
26337
         if( my $found_path= $trigger->( $t->{_twig_context_stack}))
2384
992
815
          { my $found_handler= $handler->{handler};
2385
992
1436
            push @found_handlers, $found_handler;
2386           }
2387       }
2388
2389     # if no handler found call default handler if defined
2390
4645
100
100
11566
    if( !@found_handlers && defined $handlers->{handlers}->{$DEFAULT})
2391
41
49
      { push @found_handlers, $handlers->{handlers}->{$DEFAULT}; }
2392
2393
4645
100
100
7341
    if( @found_handlers and $t->{twig_do_not_chain_handlers})
2394
1
1
      { @found_handlers= ($found_handlers[0]); }
2395
2396
4645
5666
    return @found_handlers; # empty if no handler found
2397
2398   }
2399
2400
2401 sub _replace_prefix
2402
234
234
169
  { my( $t, $name)= @_;
2403
234
201
    my $p= $t->parser;
2404
234
359
    my $uri= $p->namespace( $name);
2405     # try to get the namespace from default if none is found (for attributes)
2406     # this should probably be an option
2407
234
98
100
67
1312
135
    if( !$uri and( $name!~/^xml/)) { $uri= $p->expand_ns_prefix( '#default'); }
2408
234
100
508
    if( $uri)
2409
112
100
67
217
      { if (my $mapped_prefix= $t->{twig_map_xmlns}->{$uri} || $DEFAULT_URI2NS{$uri})
2410
105
166
          { return "$mapped_prefix:$name"; }
2411         else
2412
7
10
          { my $prefix= _a_proper_ns_prefix( $p, $uri);
2413
7
1
100
13
1
            if( $prefix eq '#default') { $prefix=''; }
2414
7
100
16
            return $prefix ? "$prefix:$name" : $name;
2415           }
2416       }
2417     else
2418
122
231
      { return $name; }
2419   }
2420
2421
2422 sub _twig_char
2423    { # warn " in _twig_char...\n"; # DEBUG handler
2424
2425
55618
44898
    my ($p, $string)= @_;
2426
55618
41272
    my $t=$p->{twig};
2427
2428
55618
100
62518
    if( $t->{twig_keep_encoding})
2429
7407
100
5683
      { if( !$t->{twig_in_cdata})
2430
1309
1913
          { $string= $p->original_string(); }
2431         else
2432           {
2433
187
187
187
191
749
203
739
            use bytes; # > perl 5.5
2434
6098
100
4486
            if( length( $string) < 1024)
2435
6058
6846
              { $string= $p->original_string(); }
2436             else
2437               { #warn "dodgy case";
2438                 # TODO original_string does not hold the entire string, but $string is wrong
2439                 # I believe due to a bug in XML::Parser
2440                 # for now, we use the original string, even if it means that it's been converted to utf8
2441               }
2442           }
2443       }
2444
2445
55618
2
100
72011
4
    if( $t->{twig_input_filter}) { $string= $t->{twig_input_filter}->( $string); }
2446
55618
2
100
55632
3
    if( $t->{twig_char_handler}) { $string= $t->{twig_char_handler}->( $string); }
2447
2448
55618
36156
    my $elt= $t->{twig_current};
2449
2450
55618
100
100
70396
    if( $t->{twig_in_cdata})
2451       { # text is the continuation of a previously created cdata
2452
6156
5153
        $elt->{cdata}.= $t->{twig_stored_spaces} . $string;
2453       }
2454     elsif( $t->{twig_in_pcdata})
2455       { # text is the continuation of a previously created pcdata
2456
314
100
370
        if( $t->{extra_data})
2457
75
139
          { $elt->_push_extra_data_in_pcdata( $t->{extra_data}, length( $elt->{pcdata}));
2458
75
70
            $t->{extra_data}='';
2459           }
2460
314
266
        $elt->{pcdata}.= $string;
2461       }
2462     else
2463       {
2464         # text is just space, which might be discarded later
2465
49148
100
80653
        if( $string=~/\A\s*\Z/s)
2466           {
2467
10362
100
9799
            if( $t->{extra_data})
2468               { # we got extra data (comment, pi), lets add the spaces to it
2469
34
35
                $t->{extra_data} .= $string;
2470               }
2471             else
2472               { # no extra data, just store the spaces
2473
10328
11534
                $t->{twig_stored_spaces}.= $string;
2474               }
2475           }
2476         else
2477
38786
52616
          { my $new_elt= _insert_pcdata( $t, $t->{twig_stored_spaces}.$string);
2478
38786
36402
            delete $elt->{'twig_current'};
2479
38786
27994
            $new_elt->{'twig_current'}=1;
2480
38786
24620
            $t->{twig_current}= $new_elt;
2481
38786
25802
            $t->{twig_in_pcdata}=1;
2482
38786
100
51802
            if( $t->{extra_data})
2483
23
36
              { $new_elt->_push_extra_data_in_pcdata( $t->{extra_data}, 0);
2484
23
24
                $t->{extra_data}='';
2485               }
2486           }
2487       }
2488
55618
126368
    return;
2489   }
2490
2491 sub _twig_cdatastart
2492    { # warn " in _twig_cdatastart...\n"; # DEBUG handler
2493
2494
103
103
74
    my $p= shift;
2495
103
88
    my $t=$p->{twig};
2496
2497
103
90
    $t->{twig_in_cdata}=1;
2498
103
147
    my $cdata= $t->{twig_elt_class}->new( $CDATA);
2499
103
86
    my $twig_current= $t->{twig_current};
2500
2501
103
100
113
    if( $t->{twig_in_pcdata})
2502       { # create the node as a sibling of the PCDATA
2503
20
20
19
100
20
28
31
        $cdata->{prev_sibling}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $cdata->{prev_sibling});} ;
2504
20
18
        $twig_current->{next_sibling}= $cdata;
2505
20
24
        my $parent= $twig_current->{parent};
2506
20
20
19
100
13
25
27
        $cdata->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $cdata->{parent});} ;
2507
20
20
20
19
100
21
17
24
24
         $parent->{empty}=0; $parent->{last_child}=$cdata; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ;
2508
20
21
        $t->{twig_in_pcdata}=0;
2509       }
2510     else
2511       { # we have to create a PCDATA element if we need to store spaces
2512
83
100
67
132
        if( $t->_space_policy($XML::Twig::index2gi[$twig_current->{'gi'}]) && $t->{twig_stored_spaces})
2513
5
6
          { _insert_pcdata( $t, $t->{twig_stored_spaces}); }
2514
83
76
        $t->{twig_stored_spaces}='';
2515
2516         # create the node as a child of the current element
2517
83
83
83
50
69
94
121
        $cdata->{parent}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $cdata->{parent});} ;
2518
83
100
103
        if( my $prev_sibling= $twig_current->{last_child})
2519
8
8
8
50
9
12
9
          { $cdata->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $cdata->{prev_sibling});} ;
2520
8
8
            $prev_sibling->{next_sibling}= $cdata;
2521           }
2522         else
2523
75
73
          { $twig_current->{first_child}= $cdata; }
2524
83
83
83
83
50
56
74
96
114
         $twig_current->{empty}=0; $twig_current->{last_child}=$cdata; if( $XML::Twig::weakrefs) { weaken( $twig_current->{last_child});} ;
2525
2526       }
2527
2528
103
118
    delete $twig_current->{'twig_current'};
2529
103
68
    $t->{twig_current}= $cdata;
2530
103
74
    $cdata->{'twig_current'}=1;
2531
103
4
4
100
131
6
3
    if( $t->{extra_data}) { $cdata->set_extra_data( $t->{extra_data}); $t->{extra_data}='' };
2532
103
429
    return;
2533   }
2534
2535 sub _twig_cdataend
2536    { # warn " in _twig_cdataend...\n"; # DEBUG handler
2537
2538
103
103
75
    my $p= shift;
2539
103
74
    my $t=$p->{twig};
2540
2541
103
83
    $t->{twig_in_cdata}=0;
2542
2543
103
71
    my $elt= $t->{twig_current};
2544
103
92
    delete $elt->{'twig_current'};
2545
103
94
    my $cdata= $elt->{cdata};
2546
103
113
    $elt->_set_cdata( $cdata);
2547
2548
103
103
65
199
    push @{$t->{_twig_context_stack}}, { $ST_TAG => $CDATA };
2549
2550
103
100
151
    if( $t->{twig_handlers})
2551       { # look for handlers
2552
19
28
        my @handlers= _handler( $t, $t->{twig_handlers}, $CDATA);
2553
19
15
        local $_= $elt; # so we can use $_ in the handlers
2554
19
4
100
33
6
        foreach my $handler ( @handlers) { $handler->($t, $elt) || last; }
2555       }
2556
2557
103
103
52
96
    pop @{$t->{_twig_context_stack}};
2558
2559
103
130
    $elt= $elt->{parent};
2560
103
82
    $t->{twig_current}= $elt;
2561
103
82
    $elt->{'twig_current'}=1;
2562
2563
103
98
    $t->{twig_long_cdata}=0;
2564
103
197
    return;
2565   }
2566
2567 sub _pi_elt_handlers
2568
1525
1072
  { my( $t, $pi)= @_;
2569
1525
100
3177
    my $pi_handlers= $t->{twig_handlers}->{pi_handlers} || return;
2570
66
101
    foreach my $handler ( $pi_handlers->{$pi->{target}}, $pi_handlers->{''})
2571
132
66
66
50
100
239
47
101
      { if( $handler) { local $_= $pi; $handler->( $t, $pi) || last; } }
2572   }
2573
2574 sub _pi_text_handler
2575
1506
1123
  { my( $t, $target, $data)= @_;
2576
1506
100
2290
    if( my $handler= $t->{twig_handlers}->{pi_handlers}->{$target})
2577
1
3
      { return $handler->( $t, $target, $data); }
2578
1505
100
1620
    if( my $handler= $t->{twig_handlers}->{pi_handlers}->{''})
2579
1
3
      { return $handler->( $t, $target, $data); }
2580
1504
100
100
4942
    return defined( $data) && $data ne '' ? "<?$target $data?>" : "<?$target?>" ;
2581   }
2582
2583 sub _comment_elt_handler
2584
1755
1125
  { my( $t, $comment)= @_;
2585
1755
100
4068
    if( my $handler= $t->{twig_handlers}->{handlers}->{$COMMENT})
2586
6
6
5
10
      { local $_= $comment; $handler->($t, $comment); }
2587   }
2588
2589 sub _comment_text_handler
2590
1892
1269
  { my( $t, $comment)= @_;
2591
1892
100
3046
    if( my $handler= $t->{twig_handlers}->{handlers}->{$COMMENT})
2592
4
10
      { $comment= $handler->($t, $comment);
2593
4
2
100
100
22
3
        if( !defined $comment || $comment eq '') { return ''; }
2594       }
2595
1890
2999
    return "<!--$comment-->";
2596   }
2597
2598
2599
2600 sub _twig_comment
2601    { # warn " in _twig_comment...\n"; # DEBUG handler
2602
2603
3653
16883
    my( $p, $comment_text)= @_;
2604
3653
2788
    my $t=$p->{twig};
2605
2606
3653
1751
100
4334
2595
    if( $t->{twig_keep_encoding}) { $comment_text= substr( $p->original_string(), 4, -3); }
2607
2608
3653
12427
    $t->_twig_pi_comment( $p, $COMMENT, $t->{twig_keep_comments}, $t->{twig_process_comments},
2609                           '_set_comment', '_comment_elt_handler', '_comment_text_handler', $comment_text
2610                         );
2611
3653
9919
    return;
2612   }
2613
2614 sub _twig_pi
2615    { # warn " in _twig_pi...\n"; # DEBUG handler
2616
2617
3038
12526
    my( $p, $target, $data)= @_;
2618
3038
2430
    my $t=$p->{twig};
2619
2620
3038
100
3539
    if( $t->{twig_keep_encoding})
2621
1518
2351
      { my $pi_text= substr( $p->original_string(), 2, -2);
2622
1518
6841
        ($target, $data)= split( /\s+/, $pi_text, 2);
2623       }
2624
2625
3038
5763
    $t->_twig_pi_comment( $p, $PI, $t->{twig_keep_pi}, $t->{twig_process_pi},
2626                           '_set_pi', '_pi_elt_handlers', '_pi_text_handler', $target, $data
2627                         );
2628
3038
8531
    return;
2629   }
2630
2631 sub _twig_pi_comment
2632
6691
12309
  { my( $t, $p, $type, $keep, $process, $set, $elt_handler, $text_handler, @parser_args)= @_;
2633
2634
6691
100
7295
    if( $t->{twig_input_filter})
2635
2
3
2
7
          { foreach my $arg (@parser_args) { $arg= $t->{twig_input_filter}->( $arg); } }
2636
2637     # if pi/comments are to be kept then we piggyback them to the current element
2638
6691
100
100
7645
    if( $keep)
2639       { # first add spaces
2640
3398
100
3854
        if( $t->{twig_stored_spaces})
2641
22
26
              { $t->{extra_data}.= $t->{twig_stored_spaces};
2642
22
23
                $t->{twig_stored_spaces}= '';
2643               }
2644
2645
3398
4810
        my $extra_data= $t->$text_handler( @parser_args);
2646
3398
4849
        $t->{extra_data}.= $extra_data;
2647
2648       }
2649     elsif( $process)
2650       {
2651
3280
2267
        my $twig_current= $t->{twig_current}; # defined unless we are outside of the root
2652
2653
3280
4861
        my $elt= $t->{twig_elt_class}->new( $type);
2654
3280
4396
        $elt->$set( @parser_args);
2655
3280
100
3887
        if( $t->{extra_data})
2656
676
755
          { $elt->set_extra_data( $t->{extra_data});
2657
676
458
            $t->{extra_data}='';
2658           }
2659
2660
3280
100
100
100
2900
        unless( $t->root)
2661
1451
1278
          { $t->_add_cpi_outside_of_root( leading_cpi => $elt);
2662           }
2663         elsif( $t->{twig_in_pcdata})
2664           { # create the node as a sibling of the PCDATA
2665
7
13
            $elt->paste_after( $twig_current);
2666
7
4
            $t->{twig_in_pcdata}=0;
2667           }
2668         elsif( $twig_current)
2669           { # we have to create a PCDATA element if we need to store spaces
2670
376
100
100
572
            if( $t->_space_policy($XML::Twig::index2gi[$twig_current->{'gi'}]) && $t->{twig_stored_spaces})
2671
1
2
              { _insert_pcdata( $t, $t->{twig_stored_spaces}); }
2672
376
363
            $t->{twig_stored_spaces}='';
2673             # create the node as a child of the current element
2674
376
419
            $elt->paste_last_child( $twig_current);
2675           }
2676         else
2677
1446
1437
          { $t->_add_cpi_outside_of_root( trailing_cpi => $elt); }
2678
2679
3280
100
3052
        if( $twig_current)
2680
383
396
          { delete $twig_current->{'twig_current'};
2681
383
256
            my $parent= $elt->{parent};
2682
383
271
            $t->{twig_current}= $parent;
2683
383
330
            $parent->{'twig_current'}=1;
2684           }
2685
2686
3280
4082
        $t->$elt_handler( $elt);
2687       }
2688
2689   }
2690
2691
2692 # add a comment or pi before the first element
2693 sub _add_cpi_outside_of_root
2694
2902
2106
  { my($t, $type, $elt)= @_; # $type is 'leading_cpi' or 'trailing_cpi'
2695
2902
67
4782
    $t->{$type} ||= $t->{twig_elt_class}->new( '#CPI');
2696     # create the node as a child of the current element
2697
2902
3299
    $elt->paste_last_child( $t->{$type});
2698
2902
1743
    return $t;
2699   }
2700
2701 sub _twig_final
2702    { # warn " in _twig_final...\n"; # DEBUG handler
2703
2704
3084
21097
    my $p= shift;
2705
3084
100
9197
    my $t= $p->isa( 'XML::Twig') ? $p : $p->{twig};
2706
2707     # store trailing data
2708
3084
579
579
100
3966
983
464
    if( $t->{extra_data}) { $t->{trailing_cpi_text} = $t->{extra_data}; $t->{extra_data}=''; }
2709
3084
100
8195
    $t->{trailing_spaces}= $t->{twig_stored_spaces} || '';
2710
3084
3084
2190
3749
    my $s= $t->{twig_stored_spaces}; $s=~s{\n}{\\n}g;
2711
3084
287
100
3539
346
    if( $t->{twig_stored_spaces}) { my $s= $t->{twig_stored_spaces}; }
2712
2713     # restore the selected filehandle if needed
2714
3084
3535
    $t->_set_fh_to_selected_fh();
2715
2716
3084
100
3741
    $t->_trigger_tdh if( $t->{twig_tdh});
2717
2718
3084
100
3262
    select $t->{twig_original_selected_fh} if($t->{twig_original_selected_fh}); # probably dodgy
2719
2720
3084
100
3869
    if( exists $t->{twig_autoflush_data})
2721
36
39
      { my @args;
2722
36
100
78
        push @args, $t->{twig_autoflush_data}->{fh} if( $t->{twig_autoflush_data}->{fh});
2723
36
1
100
73
2
        push @args, @{$t->{twig_autoflush_data}->{args}} if( $t->{twig_autoflush_data}->{args});
2724
36
59
        $t->flush( @args);
2725
36
59
        delete $t->{twig_autoflush_data};
2726
36
50
76
        $t->root->delete if $t->root;
2727       }
2728
2729     # tries to clean-up (probably not very well at the moment)
2730     #undef $p->{twig};
2731
3084
3046
    undef $t->{twig_parser};
2732
3084
2972
    delete $t->{twig_parsing};
2733
3084
3084
2263
5072
    @{$t}{ qw( twig_parser twig_parsing _twig_context_stack twig_current) }=();
2734
2735
3084
5042
    return $t;
2736   }
2737
2738 sub _insert_pcdata
2739
38905
28236
  { my( $t, $string)= @_;
2740     # create a new PCDATA element
2741
38905
25050
    my $parent= $t->{twig_current}; # always defined
2742
38905
19729
    my $elt;
2743
38905
100
38709
    if( exists $t->{twig_alt_elt_class})
2744
162
235
      { $elt= $t->{twig_elt_class}->new( $PCDATA);
2745
162
157
        $elt->_set_pcdata( $string);
2746       }
2747     else
2748
38743
109928
      { $elt= bless( { gi => $XML::Twig::gi2index{$PCDATA}, pcdata => $string }, 'XML::Twig::Elt'); }
2749
2750
38905
29744
    my $prev_sibling= $parent->{last_child};
2751
38905
100
32449
    if( $prev_sibling)
2752
191
182
      { $prev_sibling->{next_sibling}= $elt;
2753
191
191
185
100
154
244
283
        $elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ;
2754       }
2755     else
2756
38714
45673
      { $parent->{first_child}= $elt; }
2757
2758
38905
38905
20880
100
29084
39477
22940
    $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ;
2759
38905
38905
38905
20880
100
29307
42881
43015
21995
     $parent->{empty}=0; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ;
2760
38905
32758
    $t->{twig_stored_spaces}='';
2761
38905
40251
    return $elt;
2762   }
2763
2764 sub _space_policy
2765
1053
939
  { my( $t, $gi)= @_;
2766
1053
654
    my $policy;
2767
1053
100
1408
    $policy=0 if( $t->{twig_discard_spaces});
2768
1053
100
1280
    $policy=1 if( $t->{twig_keep_spaces});
2769
1053
100
100
1655
    $policy=1 if( $t->{twig_keep_spaces_in}
2770                && $t->{twig_keep_spaces_in}->{$gi});
2771
1053
100
100
1528
    $policy=0 if( $t->{twig_discard_spaces_in}
2772                && $t->{twig_discard_spaces_in}->{$gi});
2773
1053
1849
    return $policy;
2774   }
2775
2776
2777 sub _twig_entity
2778    { # warn " in _twig_entity...\n"; # DEBUG handler
2779
76
76
94
    my( $p, $name, $val, $sysid, $pubid, $ndata, $param)= @_;
2780
76
79
    my $t=$p->{twig};
2781
2782     #{ no warnings; my $base= $p->base; warn "_twig_entity called: expand: '$t->{twig_expand_external_ents}', base: '$base', name: '$name', val: '$val', sysid: '$sysid', pubid: '$pubid', ndata: '$ndata', param: '$param'\n";}
2783
2784
76
55
    my $missing_entity=0;
2785
2786
76
100
125
    if( $sysid)
2787
38
100
49
      { if($ndata)
2788
15
14
100
35
16
          { if( ! -f _based_filename( $sysid, $p->base)) { $missing_entity= 1; }
2789           }
2790         else
2791
23
100
41
          { if( $t->{twig_expand_external_ents})
2792
5
5
9
16
              { $val= eval { _slurp_uri( $sysid, $p->base) };
2793
5
100
22
                if( ! defined $val)
2794
3
100
10
                  { if( $t->{twig_extern_ent_nofail})
2795
2
3
                      { $missing_entity= 1; }
2796                     else
2797
1
6
                      { _croak( "cannot load SYSTEM entity '$name' from '$sysid': $@", 3); }
2798                   }
2799               }
2800           }
2801       }
2802
2803
75
200
    my $ent=XML::Twig::Entity->new( $name, $val, $sysid, $pubid, $ndata, $param);
2804
75
16
100
103
34
    if( $missing_entity) { $t->{twig_missing_system_entities}->{$name}= $ent; }
2805
2806
75
107
    my $entity_list= $t->entity_list;
2807
75
75
50
131
107
    if( $entity_list) { $entity_list->add( $ent); }
2808
2809
75
50
110
    if( $parser_version > 2.27)
2810       { # this is really ugly, but with some versions of XML::Parser the value
2811         # of the entity is not properly returned by the default handler
2812
75
105
        my $ent_decl= $ent->text;
2813
75
100
114
        if( $t->{twig_keep_encoding})
2814
3
50
33
18
          { if( defined $ent->{val} && ($ent_decl !~ /["']/))
2815
0
0
              { my $val= $ent->{val};
2816
0
0
0
                $ent_decl .= $val =~ /"/ ? qq{'$val' } : qq{"$val" };
2817               }
2818             # for my solaris box (perl 5.6.1, XML::Parser 2.31, expat?)
2819
3
0
39
0
            $t->{twig_doctype}->{internal}=~ s{<!ENTITY\s+$name\s+$}{substr( $ent_decl, 0, -1)}e;
2820           }
2821
75
50
1014
        $t->{twig_doctype}->{internal} .= $ent_decl
2822           unless( $t->{twig_doctype}->{internal}=~ m{<!ENTITY\s+$name\s+});
2823       }
2824
2825
75
244
    return;
2826   }
2827
2828
2829 sub _twig_extern_ent
2830    { # warn " in _twig_extern_ent...I (", $_[0]->original_string, ")\n"; # DEBUG handler
2831
16
16
22
    my( $p, $base, $sysid, $pubid)= @_;
2832
16
19
    my $t= $p->{twig};
2833
16
100
27
    if( $t->{twig_no_expand})
2834
8
50
16
      { my $ent_name= $t->{twig_keep_encoding} ? $p->original_string : $p->recognized_string;
2835
8
33
        _twig_insert_ent( $t, $ent_name);
2836
8
102
        return '';
2837       }
2838
8
8
12
22
    my $ent_content= eval { $t->{twig_ext_ent_handler}->( $p, $base, $sysid) };
2839
8
100
15939
    if( ! defined $ent_content)
2840       {
2841
2
6
        my $ent_name = $p->recognized_string;
2842
2
12
        my $file = _based_filename( $sysid, $base);
2843
2
6
        my $error_message= "cannot expand $ent_name - cannot load '$file'";
2844
2
1
100
5
14
        if( $t->{twig_extern_ent_nofail}) { return "<!-- $error_message -->"; }
2845
1
1
        else { _croak( $error_message); }
2846       }
2847
6
104
    return $ent_content;
2848   }
2849
2850 # I use this so I can change the $Carp::CarpLevel (which determines how many call frames to skip when reporting an error)
2851 sub _croak
2852
50
50
70
  { my( $message, $level)= @_;
2853
50
100
145
    $Carp::CarpLevel= $level || 0;
2854
50
5819
    croak $message;
2855   }
2856
2857 sub _twig_xmldecl
2858    { # warn " in _twig_xmldecl...\n"; # DEBUG handler
2859
2860
123
123
31700
    my $p= shift;
2861
123
139
    my $t=$p->{twig};
2862
123
50
395
    $t->{twig_xmldecl}||={}; # could have been set by set_output_encoding
2863
123
190
    $t->{twig_xmldecl}->{version}= shift;
2864
123
138
    $t->{twig_xmldecl}->{encoding}= shift;
2865
123
128
    $t->{twig_xmldecl}->{standalone}= shift;
2866
123
455
    return;
2867   }
2868
2869 sub _twig_doctype
2870    { # warn " in _twig_doctype...\n"; # DEBUG handler
2871
111
111
22616
    my( $p, $name, $sysid, $pub, $internal)= @_;
2872
111
134
    my $t=$p->{twig};
2873
111
50
385
    $t->{twig_doctype}||= {}; # create
2874
111
203
    $t->{twig_doctype}->{name}= $name; # always there
2875
111
145
    $t->{twig_doctype}->{sysid}= $sysid; #
2876
111
156
    $t->{twig_doctype}->{pub}= $pub; #
2877
2878     # now let's try to cope with XML::Parser 2.28 and above
2879
111
50
190
    if( $parser_version > 2.27)
2880
111
328
      { @saved_default_handler= $p->setHandlers( Default => \&_twig_store_internal_dtd,
2881                                                  Entity => \&_twig_entity,
2882                                                );
2883
111
2122
      $p->setHandlers( DoctypeFin => \&_twig_stop_storing_internal_dtd);
2884
111
1014
      $t->{twig_doctype}->{internal}='';
2885       }
2886     else
2887       # for XML::Parser before 2.28
2888
0
0
0
      { $internal||='';
2889
0
0
        $internal=~ s{^\s*\[}{};
2890
0
0
        $internal=~ s{]\s*$}{};
2891
0
0
        $t->{twig_doctype}->{internal}=$internal;
2892       }
2893
2894     # now check if we want to get the DTD info
2895
111
100
100
250
    if( $t->{twig_read_external_dtd} && $sysid)
2896       { # let's build a fake document with an internal DTD
2897
5
17
        my $dtd= "<!DOCTYPE $name [" . _slurp_uri( $sysid) . "]><$name/>";
2898
2899
5
14
        $t->save_global_state(); # save the globals (they will be reset by the following new)
2900
5
100
27
        my $t_dtd= XML::Twig->new( load_DTD => 1, ParseParamEnt => 1, error_context => $t->{ErrorContext} || 0); # create a temp twig
2901
5
12
        $t_dtd->parse( $dtd); # parse it
2902
5
9
        $t->{twig_dtd}= $t_dtd->{twig_dtd}; # grab the dtd info
2903         #$t->{twig_dtd_is_external}=1;
2904
5
50
10
        $t->entity_list->_add_list( $t_dtd->entity_list) if( $t_dtd->entity_list); # grab the entity info
2905
5
8
        $t->restore_global_state();
2906       }
2907
111
427
    return;
2908   }
2909
2910 sub _twig_element
2911    { # warn " in _twig_element...\n"; # DEBUG handler
2912
2913
45
45
48
    my( $p, $name, $model)= @_;
2914
45
37
    my $t=$p->{twig};
2915
45
100
100
    $t->{twig_dtd}||= {}; # may create the dtd
2916
45
100
90
    $t->{twig_dtd}->{model}||= {}; # may create the model hash
2917
45
100
89
    $t->{twig_dtd}->{elt_list}||= []; # ordered list of elements
2918
45
45
22
490
    push @{$t->{twig_dtd}->{elt_list}}, $name; # store the elt
2919
45
70
    $t->{twig_dtd}->{model}->{$name}= $model; # store the model
2920
45
50
33
582
    if( ($parser_version > 2.27) && ($t->{twig_doctype}->{internal}=~ m{(^|>)\s*$}) )
2921
45
50
106
      { my $text= $XML::Twig::Elt::keep_encoding ? $p->original_string : $p->recognized_string;
2922
45
50
226
        unless( $text)
2923           { # this version of XML::Parser does not return the text in the *_string method
2924             # we need to rebuild it
2925
45
334
            $text= "<!ELEMENT $name $model>";
2926           }
2927
45
582
        $t->{twig_doctype}->{internal} .= $text;
2928       }
2929
45
119
    return;
2930   }
2931
2932 sub _twig_attlist
2933    { # warn " in _twig_attlist...\n"; # DEBUG handler
2934
2935
37
37
39
    my( $p, $gi, $att, $type, $default, $fixed)= @_;
2936     #warn "in attlist: gi: '$gi', att: '$att', type: '$type', default: '$default', fixed: '$fixed'\n";
2937
37
26
    my $t=$p->{twig};
2938
37
50
45
    $t->{twig_dtd}||= {}; # create dtd if need be
2939
37
100
76
    $t->{twig_dtd}->{$gi}||= {}; # create elt if need be
2940     #$t->{twig_dtd}->{$gi}->{att}||= {}; # create att if need be
2941
37
50
33
889
    if( ($parser_version > 2.27) && ($t->{twig_doctype}->{internal}=~ m{(^|>)\s*$}) )
2942
37
50
65
      { my $text= $XML::Twig::Elt::keep_encoding ? $p->original_string : $p->recognized_string;
2943
37
50
154
        unless( $text)
2944           { # this version of XML::Parser does not return the text in the *_string method
2945             # we need to rebuild it
2946
37
44
            my $att_decl="$att $type";
2947
37
100
42
            $att_decl .= " #FIXED" if( $fixed);
2948
37
50
46
            $att_decl .= " $default" if( defined $default);
2949             # 2 cases: there is already an attlist on that element or not
2950
37
100
55
            if( $t->{twig_dtd}->{att}->{$gi})
2951               { # there is already an attlist, add to it
2952
15
15
236
77
                $t->{twig_doctype}->{internal}=~ s{(<!ATTLIST\s*$gi )(.*?)\n?>}
2953                                                   { "$1$2\n" . ' ' x length( $1) . "$att_decl\n>"}es;
2954               }
2955             else
2956               { # create the attlist
2957
22
47
                 $t->{twig_doctype}->{internal}.= "<!ATTLIST $gi $att_decl>"
2958               }
2959           }
2960       }
2961
37
73
    $t->{twig_dtd}->{att}->{$gi}->{$att}= {} ;
2962
37
57
    $t->{twig_dtd}->{att}->{$gi}->{$att}->{type}= $type;
2963
37
50
78
    $t->{twig_dtd}->{att}->{$gi}->{$att}->{default}= $default if( defined $default);
2964
37
57
    $t->{twig_dtd}->{att}->{$gi}->{$att}->{fixed}= $fixed;
2965
37
97
    return;
2966   }
2967
2968 sub _twig_default
2969    { # warn " in _twig_default...\n"; # DEBUG handler
2970
2971
415
415
4351
    my( $p, $string)= @_;
2972
2973
415
523
    my $t= $p->{twig};
2974
2975     # we need to process the data in 2 cases: entity, or spaces after the closing tag
2976
2977     # after the closing tag (no twig_current and root has been created)
2978
415
287
100
75
2076
340
    if( ! $t->{twig_current} && $t->{twig_root} && $string=~ m{^\s+$}m) { $t->{twig_stored_spaces} .= $string; }
2979
2980     # process only if we have an entity
2981
415
100
2765
    if( $string=~ m{^&([^;]*);$})
2982       { # the entity has to be pure pcdata, or we have a problem
2983
36
50
33
451
        if( ($p->original_string=~ m{^<}) && ($p->original_string=~ m{>$}) )
2984           { # string is a tag, entity is in an attribute
2985
0
0
0
            $t->{twig_entities_in_attribute}=1 if( $t->{twig_do_not_escape_amp_in_atts});
2986           }
2987         else
2988
36
199
          { my $ent;
2989
36
100
39
            if( $t->{twig_keep_encoding})
2990
9
10
              { _twig_char( $p, $string);
2991
9
20
                $ent= substr( $string, 1, -1);
2992               }
2993             else
2994
27
34
              { $ent= _twig_insert_ent( $t, $string);
2995               }
2996
2997
36
130
            return $ent;
2998           }
2999       }
3000   }
3001
3002 sub _twig_insert_ent
3003   {
3004
35
35
34
    my( $t, $string)=@_;
3005
3006
35
31
    my $twig_current= $t->{twig_current};
3007
3008
35
53
    my $ent= $t->{twig_elt_class}->new( $ENT);
3009
35
39
    $ent->{ent}= $string;
3010
3011
35
40
    _add_or_discard_stored_spaces( $t);
3012
3013
35
100
43
    if( $t->{twig_in_pcdata})
3014       { # create the node as a sibling of the #PCDATA
3015
3016
16
16
13
100
17
22
27
        $ent->{prev_sibling}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $ent->{prev_sibling});} ;
3017
16
16
        $twig_current->{next_sibling}= $ent;
3018
16
13
        my $parent= $twig_current->{parent};
3019
16
16
13
100
14
21
17
        $ent->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $ent->{parent});} ;
3020
16
16
16
13
100
13
12
25
16
         $parent->{empty}=0; $parent->{last_child}=$ent; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ;
3021         # the twig_current is now the parent
3022
16
18
        delete $twig_current->{'twig_current'};
3023
16
13
        $t->{twig_current}= $parent;
3024         # we left pcdata
3025
16
13
        $t->{twig_in_pcdata}=0;
3026       }
3027     else
3028       { # create the node as a child of the current element
3029
19
19
15
100
21
26
20
        $ent->{parent}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $ent->{parent});} ;
3030
19
100
24
        if( my $prev_sibling= $twig_current->{last_child})
3031
10
10
8
100
11
15
13
          { $ent->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $ent->{prev_sibling});} ;
3032
10
10
            $prev_sibling->{next_sibling}= $ent;
3033           }
3034         else
3035
9
9
50
14
11
          { if( $twig_current) { $twig_current->{first_child}= $ent; } }
3036
19
19
19
19
15
100
50
20
17
22
26
24
        if( $twig_current) { $twig_current->{empty}=0; $twig_current->{last_child}=$ent; if( $XML::Twig::weakrefs) { weaken( $twig_current->{last_child});} ; }
3037       }
3038
3039     # meant to trigger entity handler, does not seem to be activated at this time
3040     #if( my $handler= $t->{twig_handlers}->{gi}->{$ENT})
3041     # { local $_= $ent; $handler->( $t, $ent); }
3042
3043
35
36
    return $ent;
3044   }
3045
3046 sub parser
3047
384
384
1
486
  { return $_[0]->{twig_parser}; }
3048
3049 # returns the declaration text (or a default one)
3050 sub xmldecl
3051
2462
1
1480
  { my $t= shift;
3052
2462
100
100
9273
    return '' unless( $t->{twig_xmldecl} || $t->{output_encoding});
3053
76
70
    my $decl_string;
3054
76
83
    my $decl= $t->{twig_xmldecl};
3055
76
100
110
    if( $decl)
3056
72
85
      { my $version= $decl->{version};
3057
72
72
        $decl_string= q{<?xml};
3058
72
145
        $decl_string .= qq{ version="$version"};
3059
3060         # encoding can either have been set (in $decl->{output_encoding})
3061         # or come from the document (in $decl->{encoding})
3062
72
100
100
185
        if( $t->{output_encoding})
3063
13
11
          { my $encoding= $t->{output_encoding};
3064
13
18
            $decl_string .= qq{ encoding="$encoding"};
3065           }
3066         elsif( $decl->{encoding})
3067
16
13
          { my $encoding= $decl->{encoding};
3068
16
24
            $decl_string .= qq{ encoding="$encoding"};
3069           }
3070
3071
72
100
123
        if( defined( $decl->{standalone}))
3072
12
17
          { $decl_string .= q{ standalone="};
3073
12
100
28
            $decl_string .= $decl->{standalone} ? "yes" : "no";
3074
12
15
            $decl_string .= q{"};
3075           }
3076
3077
72
91
        $decl_string .= "?>\n";
3078       }
3079     else
3080
4
7
      { my $encoding= $t->{output_encoding};
3081
4
11
        $decl_string= qq{<?xml version="1.0" encoding="$encoding"?>};
3082       }
3083
3084
76
201
    my $output_filter= XML::Twig::Elt::output_filter();
3085
76
100
586
    return $output_filter ? $output_filter->( $decl_string) : $decl_string;
3086   }
3087
3088 sub set_doctype
3089
4
4
1
13
  { my( $t, $name, $system, $public, $internal)= @_;
3090
4
100
11
    $t->{twig_doctype}= {} unless defined $t->{twig_doctype};
3091
4
4
    my $doctype= $t->{twig_doctype};
3092
4
50
9
    $doctype->{name} = $name if( defined $name);
3093
4
100
10
    $doctype->{sysid} = $system if( defined $system);
3094
4
100
6
    $doctype->{pub} = $public if( defined $public);
3095
4
100
11
    $doctype->{internal} = $internal if( defined $internal);
3096   }
3097
3098 sub doctype_name
3099
4
4
1
9
  { my $t= shift;
3100
4
100
9
    my $doctype= $t->{twig_doctype} or return '';
3101
3
50
9
    return $doctype->{name} || '';
3102   }
3103
3104 sub system_id
3105
4
4
1
4
  { my $t= shift;
3106
4
100
8
    my $doctype= $t->{twig_doctype} or return '';
3107
3
100
9
    return $doctype->{sysid} || '';
3108   }
3109
3110 sub public_id
3111
4
4
1
6
  { my $t= shift;
3112
4
100
11
    my $doctype= $t->{twig_doctype} or return '';
3113
3
100
11
    return $doctype->{pub} || '';
3114   }
3115
3116 sub internal_subset
3117
4
4
1
4
  { my $t= shift;
3118
4
100
7
    my $doctype= $t->{twig_doctype} or return '';
3119
3
100
10
    return $doctype->{internal} || '';
3120   }
3121
3122 # return the dtd object
3123 sub dtd
3124
7
7
1
89
  { my $t= shift;
3125
7
35
    return $t->{twig_dtd};
3126   }
3127
3128 # return an element model, or the list of element models
3129 sub model
3130
3
3
1
54
  { my $t= shift;
3131
3
5
    my $elt= shift;
3132
3
100
10
    return $t->dtd->{model}->{$elt} if( $elt);
3133
1
1
2
3
    return (sort keys %{$t->dtd->{model}});
3134   }
3135
3136
3137 # return the entity_list object
3138 sub entity_list
3139
165
165
1
305
  { my $t= shift;
3140
165
345
    return $t->{twig_entity_list};
3141   }
3142
3143 # return the list of entity names
3144 sub entity_names
3145
6
6
1
132
  { my $t= shift;
3146
6
12
    return $t->entity_list->entity_names;
3147   }
3148
3149 # return the entity object
3150 sub entity
3151
14
14
1
47
  { my $t= shift;
3152
14
16
    my $entity_name= shift;
3153
14
18
    return $t->entity_list->ent( $entity_name);
3154   }
3155
3156
3157 sub print_prolog
3158
911
911
1
522
  { my $t= shift;
3159
911
50
50
33
5349
    my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : $t->{twig_output_fh} || select() || \*STDOUT;
3160     ## no critic (TestingAndDebugging::ProhibitNoStrict);
3161
187
187
187
191
688199
216
326971
    no strict 'refs';
3162
911
911
977
1259
    print {$fh} $t->prolog( @_);
3163   }
3164
3165 sub prolog
3166
2464
1
1635
  { my $t= shift;
3167
2464
4
100
2889
10
    if( $t->{no_prolog}){ return ''; }
3168
3169
2460
100
50
5154
    return $t->{no_prolog} ? ''
3170            : defined $t->{no_dtd_output} ? $t->xmldecl
3171            : $t->xmldecl . $t->doctype( @_);
3172   }
3173
3174 sub doctype
3175
2462
1
2218
  { my $t= shift;
3176
2462
2371
    my %args= _normalize_args( @_);
3177
2462
100
4710
    my $update_dtd = $args{UpdateDTD} || '';
3178
2462
1489
    my $doctype_text='';
3179
3180
2462
1904
    my $doctype= $t->{twig_doctype};
3181
3182
2462
100
2538
    if( $doctype)
3183
64
50
162
      { $doctype_text .= qq{<!DOCTYPE $doctype->{name}} if( $doctype->{name});
3184
64
100
120
        $doctype_text .= qq{ PUBLIC "$doctype->{pub}"} if( $doctype->{pub});
3185
64
100
100
259
        $doctype_text .= qq{ SYSTEM} if( $doctype->{sysid} && !$doctype->{pub});
3186
64
100
145
        $doctype_text .= qq{ "$doctype->{sysid}"} if( $doctype->{sysid});
3187       }
3188
3189
2462
100
100
3656
    if( $update_dtd)
3190
10
100
100
67
32
      { if( $doctype)
3191
7
2
12
2
          { my $internal=$doctype->{internal};
3192             # awful hack, but at least it works a little better that what was there before
3193
7
100
18
            if( $internal)
3194               { # remove entity declarations (they will be re-generated from the updated entity list)
3195
5
11
11
11
11
709
36
12
1243
                $internal=~ s{<! \s* ENTITY \s+ $REG_TAG_NAME \s+ ( ("[^"]*"|'[^']*') \s* | SYSTEM [^>]*) >\s*}{}xg;
3196
5
29541
                $internal=~ s{^\n}{};
3197               }
3198
7
50
100
17
            $internal .= $t->entity_list->text ||'' if( $t->entity_list);
3199
7
4
100
17
13
            if( $internal) { $doctype_text .= "[\n$internal]>\n"; }
3200           }
3201         elsif( !$t->{'twig_dtd'} && keys %{$t->entity_list})
3202
2
2
          { $doctype_text .= "<!DOCTYPE " . $t->root->gi . " [\n" . $t->entity_list->text . "\n]>";;}
3203         else
3204
1
2
          { $doctype_text= $t->{twig_dtd};
3205
1
2
            $doctype_text .= $t->dtd_text;
3206           }
3207       }
3208     elsif( $doctype)
3209
57
100
110
      { if( my $internal= $doctype->{internal})
3210           { # add opening and closing brackets if not already there
3211             # plus some spaces and newlines for a nice formating
3212             # I test it here because I can't remember which version of
3213             # XML::Parser need it or not, nor guess which one will in the
3214             # future, so this about the best I can do
3215
25
127
            $internal=~ s{^\s*(\[\s*)?}{ [\n};
3216
25
972
            $internal=~ s{\s*(\]\s*(>\s*)?)?\s*$}{\n]>\n};
3217
25
37
            $doctype_text .= $internal;
3218           }
3219       }
3220
3221
2462
100
2217
    if( $doctype_text)
3222       {
3223         # terrible hack, as I can't figure out in which case the darn prolog
3224         # should get an extra > (depends on XML::Parser and expat versions)
3225
67
1607
        $doctype_text=~ s/(>\s*)*$/>\n/; # if($doctype_text);
3226
3227
67
95
        my $output_filter= XML::Twig::Elt::output_filter();
3228
67
50
294
        return $output_filter ? $output_filter->( $doctype_text) : $doctype_text;
3229       }
3230     else
3231
2395
6798
      { return $doctype_text; }
3232   }
3233
3234 sub _leading_cpi
3235
2463
1596
  { my $t= shift;
3236
2463
100
6050
    my $leading_cpi= $t->{leading_cpi} || return '';
3237
777
901
    return $leading_cpi->sprint( 1);
3238   }
3239
3240 sub _trailing_cpi
3241
2454
1567
  { my $t= shift;
3242
2454
100
4717
    my $trailing_cpi= $t->{trailing_cpi} || return '';
3243
772
804
    return $trailing_cpi->sprint( 1);
3244   }
3245
3246 sub _trailing_cpi_text
3247
2454
1473
  { my $t= shift;
3248
2454
100
6876
    return $t->{trailing_cpi_text} || '';
3249   }
3250
3251 sub print_to_file
3252
3
3
1
5
  { my( $t, $filename)= (shift, shift);
3253
3
4
    my $out_fh;
3254 # open( $out_fh, ">$filename") or _croak( "cannot create file $filename: $!"); # < perl 5.8
3255
3
50
14
    my $mode= $t->{twig_keep_encoding} ? '>' : '>:utf8'; # >= perl 5.8
3256
3
100
79
    open( $out_fh, $mode, $filename) or _croak( "cannot create file $filename: $!"); # >= perl 5.8
3257
2
6
    $t->print( $out_fh, @_);
3258
2
69
    close $out_fh;
3259
2
15
    return $t;
3260   }
3261
3262 # probably only works on *nix (at least the chmod bit)
3263 # first print to a temporary file, then rename that file to the desired file name, then change permissions
3264 # to the original file permissions (or to the current umask)
3265 sub safe_print_to_file
3266
2
2
1
5
  { my( $t, $filename)= (shift, shift);
3267
2
50
14
    my $perm= -f $filename ? (stat $filename)[2] & 07777 : ~umask() ;
3268
2
50
3
    XML::Twig::_use( 'File::Temp') || croak "need File::Temp to use safe_print_to_file\n";
3269
2
72
    my $tmpdir= dirname( $filename);
3270
2
7
    my( $fh, $tmpfilename) = File::Temp::tempfile( DIR => $tmpdir);
3271
1
212
    $t->print_to_file( $tmpfilename, @_);
3272
1
50
0
23
    rename( $tmpfilename, $filename) or unlink $tmpfilename && _croak( "cannot move temporary file to $filename: $!");
3273
1
6
    chmod $perm, $filename;
3274
1
12
    return $t;
3275   }
3276
3277
3278 sub print
3279
56
56
1
371
  { my $t= shift;
3280
56
100
67
204
    my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef;
3281
56
81
    my %args= _normalize_args( @_);
3282
3283
56
100
135
    my $old_select = defined $fh ? select $fh : undef;
3284
56
100
95
    my $old_pretty = defined ($args{PrettyPrint}) ? $t->set_pretty_print( $args{PrettyPrint}) : undef;
3285
56
100
69
    my $old_empty_tag = defined ($args{EmptyTags}) ? $t->set_empty_tag_style( $args{EmptyTags}) : undef;
3286
3287     #if( !$t->{encoding} || lc( $t->{encoding}) eq 'utf-8') { my $out= $fh || \*STDOUT; binmode $out, ':utf8'; }
3288
3289
56
100
67
332
    if( $perl_version > 5.006 && ! $t->{twig_keep_encoding})
3290
22
50
265673
      { if( grep /useperlio=define/, `$^X -V`) # we can only use binmode :utf8 if perl was compiled with useperlio
3291
22
100
327
          { binmode( $fh || \*STDOUT, ":utf8" ); }
3292       }
3293
3294
56
411
     print $t->prolog( %args) . $t->_leading_cpi( %args);
3295
56
297
     $t->{twig_root}->print;
3296
56
67
109
     print $t->_trailing_cpi # trailing comments and pi's (elements, in 'process' mode)
3297          . $t->_trailing_cpi_text # trailing comments and pi's (in 'keep' mode)
3298          . ( ($t->{twig_keep_spaces}||'') && ($t->{trailing_spaces} || ''))
3299          ;
3300
3301
3302
56
100
268
    $t->set_pretty_print( $old_pretty) if( defined $old_pretty);
3303
56
100
95
    $t->set_empty_tag_style( $old_empty_tag) if( defined $old_empty_tag);
3304
56
53
100
84
138
    if( $fh) { select $old_select; }
3305
3306
56
128
    return $t;
3307   }
3308
3309
3310 sub flush
3311
1181
1
864
  { my $t= shift;
3312
3313
1181
50
1461
    $t->_trigger_tdh if $t->{twig_tdh};
3314
3315
1181
100
1320
    return if( $t->{twig_completely_flushed});
3316
3317
1152
100
67
2829
    my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef;
3318
1152
100
2230
    my $old_select= defined $fh ? select $fh : undef;
3319
1152
100
1211
    my $up_to= ref $_[0] ? shift : undef;
3320
1152
1258
    my %args= _normalize_args( @_);
3321
3322
1152
644
    my $old_pretty;
3323
1152
100
1539
    if( defined $args{PrettyPrint})
3324
6
7
      { $old_pretty= $t->set_pretty_print( $args{PrettyPrint});
3325
6
8
        delete $args{PrettyPrint};
3326       }
3327
3328
1152
586
     my $old_empty_tag_style;
3329
1152
100
1200
     if( $args{EmptyTags})
3330
8
10
      { $old_empty_tag_style= $t->set_empty_tag_style( $args{EmptyTags});
3331
8
6
        delete $args{EmptyTags};
3332       }
3333
3334
3335     # the "real" last element processed, as _twig_end has closed it
3336
1152
580
    my $last_elt;
3337
1152
736
    my $flush_trailing_data=0;
3338
1152
100
100
1538
    if( $up_to)
3339
22
18
      { $last_elt= $up_to; }
3340     elsif( $t->{twig_current})
3341
228
285
      { $last_elt= $t->{twig_current}->_last_child; }
3342     else
3343
902
601
      { $last_elt= $t->{twig_root};
3344
902
496
        $flush_trailing_data=1;
3345
902
1140
        $t->{twig_completely_flushed}=1;
3346       }
3347
3348     # flush the DTD unless it has ready flushed (ie root has been flushed)
3349
1152
804
    my $elt= $t->{twig_root};
3350
1152
100
1263
    unless( $elt->_flushed)
3351       { # store flush info so we can auto-flush later
3352
911
100
1019
        if( $t->{twig_autoflush})
3353
902
1060
          { $t->{twig_autoflush_data}={};
3354
902
100
1576
            $t->{twig_autoflush_data}->{fh} = $fh if( $fh);
3355
902
100
1022
            $t->{twig_autoflush_data}->{args} = \@_ if( @_);
3356           }
3357
911
1124
        $t->print_prolog( %args);
3358
911
9727
        print $t->_leading_cpi;
3359       }
3360
3361
1152
8005
    while( $elt)
3362
1596
867
      { my $next_elt;
3363
1596
100
67
2507
        if( $last_elt && $last_elt->in( $elt))
3364           {
3365
422
100
371
            unless( $elt->_flushed)
3366               { # just output the front tag
3367
107
149
                print $elt->start_tag();
3368
107
325
                $elt->_set_flushed;
3369               }
3370
422
322
            $next_elt= $elt->{first_child};
3371           }
3372         else
3373           { # an element before the last one or the last one,
3374
1174
860
            $next_elt= $elt->{next_sibling};
3375
1174
1259
            $elt->_flush();
3376
1174
1215
            $elt->delete;
3377
1174
100
67
3007
            last if( $last_elt && ($elt == $last_elt));
3378           }
3379
444
599
        $elt= $next_elt;
3380       }
3381
3382
1152
100
2066
    if( $flush_trailing_data)
3383
902
1048
      { print $t->_trailing_cpi # trailing comments and pi's (elements, in 'process' mode)
3384             , $t->_trailing_cpi_text # trailing comments and pi's (in 'keep' mode)
3385       }
3386
3387
1152
100
9376
    select $old_select if( defined $old_select);
3388
1152
100
1301
    $t->set_pretty_print( $old_pretty) if( defined $old_pretty);
3389
1152
100
1240
    $t->set_empty_tag_style( $old_empty_tag_style) if( defined $old_empty_tag_style);
3390
3391
1152
100
1431
    if( my $ids= $t->{twig_id_list})
3392
2
5
      { while( my ($id, $elt)= each %$ids)
3393
7
100
13
          { if( ! defined $elt)
3394
2
4
             { delete $t->{twig_id_list}->{$id} }
3395           }
3396       }
3397
3398
1152
3174
    return $t;
3399   }
3400
3401
3402 # flushes up to an element
3403 # this method just reorders the arguments and calls flush
3404 sub flush_up_to
3405
22
22
1
27
  { my $t= shift;
3406
22
19
    my $up_to= shift;
3407
22
100
67
120
    if( isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar'))
3408
11
9
      { my $fh= shift;
3409
11
19
        $t->flush( $fh, $up_to, @_);
3410       }
3411     else
3412
11
28
      { $t->flush( $up_to, @_); }
3413
3414
22
89
    return $t;
3415   }
3416
3417
3418 # same as print except the entire document text is returned as a string
3419 sub sprint
3420
1496
1
2516
  { my $t= shift;
3421
1496
1805
    my %args= _normalize_args( @_);
3422
3423
1496
976
    my $old_pretty;
3424
1496
100
1883
    if( defined $args{PrettyPrint})
3425
8
23
      { $old_pretty= $t->set_pretty_print( $args{PrettyPrint});
3426
8
15
        delete $args{PrettyPrint};
3427       }
3428
3429
1496
1229
     my $old_empty_tag_style;
3430
1496
100
1774
     if( defined $args{EmptyTags})
3431
12
30
      { $old_empty_tag_style= $t->set_empty_tag_style( $args{EmptyTags});
3432
12
18
        delete $args{EmptyTags};
3433       }
3434
3435
1496
50
1911
    my $string= $t->prolog( %args) # xml declaration and doctype
3436                 . $t->_leading_cpi( %args) # leading comments and pi's in 'process' mode
3437                 . ( ($t->{twig_root} && $t->{twig_root}->sprint) || '')
3438                 . $t->_trailing_cpi # trailing comments and pi's (elements, in 'process' mode)
3439                 . $t->_trailing_cpi_text # trailing comments and pi's (in 'keep' mode)
3440                 ;
3441
1496
4
100
100
2932
5
    if( $t->{twig_keep_spaces} && $t->{trailing_spaces}) { $string .= $t->{trailing_spaces}; }
3442
3443
1496
100
1820
    $t->set_pretty_print( $old_pretty) if( defined $old_pretty);
3444
1496
100
1540
    $t->set_empty_tag_style( $old_empty_tag_style) if( defined $old_empty_tag_style);
3445
3446
1496
4118
    return $string;
3447   }
3448
3449
3450 # this method discards useless elements in a tree
3451 # it does the same thing as a flush except it does not print it
3452 # the second argument is an element, the last purged element
3453 # (this argument is usually set through the purge_up_to method)
3454 sub purge
3455
10
10
1
13
  { my $t= shift;
3456
10
7
    my $up_to= shift;
3457
3458
10
50
19
    $t->_trigger_tdh if $t->{twig_tdh};
3459
3460     # the "real" last element processed, as _twig_end has closed it
3461
10
9
    my $last_elt;
3462
10
100
100
15
    if( $up_to)
3463
7
5
      { $last_elt= $up_to; }
3464     elsif( $t->{twig_current})
3465
2
3
      { $last_elt= $t->{twig_current}->_last_child; }
3466     else
3467
1
2
      { $last_elt= $t->{twig_root}; }
3468
3469
10
14
    my $elt= $t->{twig_root};
3470
3471
10
15
    while( $elt)
3472
29
22
      { my $next_elt;
3473
29
100
67
50
        if( $last_elt && $last_elt->in( $elt))
3474
13
15
          { $elt->_set_flushed;
3475
13
9
            $next_elt= $elt->{first_child};
3476           }
3477         else
3478           { # an element before the last one or the last one,
3479
16
10
            $next_elt= $elt->{next_sibling};
3480
16
19
            $elt->delete;
3481
16
100
67
44
            last if( $last_elt && ($elt == $last_elt) );
3482           }
3483
19
32
        $elt= $next_elt;
3484       }
3485
3486
10
100
18
    if( my $ids= $t->{twig_id_list})
3487
2
8
4
100
5
13
7
      { while( my ($id, $elt)= each %$ids) { if( ! defined $elt) { delete $t->{twig_id_list}->{$id} } } }
3488
3489
10
40
    return $t;
3490   }
3491
3492 # flushes up to an element. This method just calls purge
3493 sub purge_up_to
3494
7
7
1
6
  { my $t= shift;
3495
7
11
    return $t->purge( @_);
3496   }
3497
3498 sub root
3499
5179
1
12263
  { return $_[0]->{twig_root}; }
3500
3501 sub normalize
3502
1
1
1
3
  { return $_[0]->root->normalize; }
3503
3504
3505 # create accessor methods on attribute names
3506 { my %accessor; # memorize accessor names so re-creating them won't trigger an error
3507 sub att_accessors
3508   {
3509
6
6
1
44
    my $twig_or_class= shift;
3510
6
100
11
    my $elt_class= ref $twig_or_class ? $twig_or_class->{twig_elt_class}
3511                                       : 'XML::Twig::Elt'
3512                                       ;
3513     ## no critic (TestingAndDebugging::ProhibitNoStrict);
3514
187
187
187
191
647
163
36164
    no strict 'refs';
3515
6
9
    foreach my $att (@_)
3516
8
100
100
44
      { _croak( "attempt to redefine existing method $att using att_accessors")
3517           if( $elt_class->can( $att) && !$accessor{$att});
3518
3519
7
100
11
        if( !$accessor{$att})
3520
5
9
          { *{"$elt_class\::$att"}=
3521                 sub
3522                     :lvalue # > perl 5.5
3523
15
15
22
                  { my $elt= shift;
3524
15
2
100
19
2
                    if( @_) { $elt->{att}->{$att}= $_[0]; }
3525
15
34
                    $elt->{att}->{$att};
3526
5
11
                  };
3527
5
10
            $accessor{$att}=1;
3528           }
3529       }
3530
5
5
    return $twig_or_class;
3531   }
3532 }
3533
3534 { my %accessor; # memorize accessor names so re-creating them won't trigger an error
3535 sub elt_accessors
3536   {
3537
8
8
1
13
    my $twig_or_class= shift;
3538
8
50
16
    my $elt_class= ref $twig_or_class ? $twig_or_class->{twig_elt_class}
3539                                       : 'XML::Twig::Elt'
3540                                       ;
3541
3542     # if arg is a hash ref, it's exp => name, otherwise it's a list of tags
3543
3
9
9
18
    my %exp_to_alias= ref( $_[0]) && isa( $_[0], 'HASH') ? %{$_[0]}
3544
8
100
67
30
                                                         : map { $_ => $_ } @_;
3545     ## no critic (TestingAndDebugging::ProhibitNoStrict);
3546
187
187
187
191
594
160
35994
    no strict 'refs';
3547
8
22
    while( my( $alias, $exp)= each %exp_to_alias )
3548
16
100
100
80
      { if( $elt_class->can( $alias) && !$accessor{$alias})
3549
1
2
          { _croak( "attempt to redefine existing method $alias using elt_accessors"); }
3550
3551
15
100
25
        if( !$accessor{$alias})
3552
9
21
          { *{"$elt_class\::$alias"}=
3553                 sub
3554
18
18
29
                  { my $elt= shift;
3555
18
100
48
                    return wantarray ? $elt->children( $exp) : $elt->first_child( $exp);
3556
9
18
                  };
3557
9
23
            $accessor{$alias}=1;
3558           }
3559       }
3560
7
11
    return $twig_or_class;
3561   }
3562 }
3563
3564 { my %accessor; # memorize accessor names so re-creating them won't trigger an error
3565 sub field_accessors
3566   {
3567
4
4
1
29
    my $twig_or_class= shift;
3568
4
50
8
    my $elt_class= ref $twig_or_class ? $twig_or_class->{twig_elt_class}
3569                                       : 'XML::Twig::Elt'
3570                                       ;
3571
1
4
3
8
    my %exp_to_alias= ref( $_[0]) && isa( $_[0], 'HASH') ? %{$_[0]}
3572
4
100
67
14
                                                         : map { $_ => $_ } @_;
3573
3574     ## no critic (TestingAndDebugging::ProhibitNoStrict);
3575
187
187
187
190
588
165
248677
    no strict 'refs';
3576
4
11
    while( my( $alias, $exp)= each %exp_to_alias )
3577
5
100
100
31
      { if( $elt_class->can( $alias) && !$accessor{$alias})
3578
1
2
          { _croak( "attempt to redefine existing method $exp using field_accessors"); }
3579
4
100
7
        if( !$accessor{$alias})
3580
3
74
          { *{"$elt_class\::$alias"}=
3581                 sub
3582
4
4
9
                  { my $elt= shift;
3583
4
15
                    $elt->field( $exp)
3584
3
11
                  };
3585
3
10
            $accessor{$alias}=1;
3586           }
3587       }
3588
3
5
    return $twig_or_class;
3589   }
3590 }
3591
3592 sub first_elt
3593
122
122
1
352
  { my( $t, $cond)= @_;
3594
122
100
155
    my $root= $t->root || return undef;
3595
121
100
192
    return $root if( $root->passes( $cond));
3596
116
223
    return $root->next_elt( $cond);
3597   }
3598
3599 sub last_elt
3600
8
8
1
62
  { my( $t, $cond)= @_;
3601
8
100
10
    my $root= $t->root || return undef;
3602
7
15
    return $root->last_descendant( $cond);
3603   }
3604
3605 sub next_n_elt
3606
4
4
1
9
  { my( $t, $offset, $cond)= @_;
3607
4
100
6
    $offset -- if( $t->root->matches( $cond) );
3608
4
6
    return $t->root->next_n_elt( $offset, $cond);
3609   }
3610
3611 sub get_xpath
3612
155
155
1
2177
  { my $twig= shift;
3613
155
100
546
    if( isa( $_[0], 'ARRAY'))
3614
1
2
      { my $elt_array= shift;
3615
1
2
2
5
        return _unique_elts( map { $_->get_xpath( @_) } @$elt_array);
3616       }
3617     else
3618
154
180
      { return $twig->root->get_xpath( @_); }
3619   }
3620
3621 # get a list of elts and return a sorted list of unique elts
3622 sub _unique_elts
3623
195
203
195
356
229
  { my @sorted= sort { $a ->cmp( $b) } @_;
3624
195
126
    my @unique;
3625
195
291
    while( my $current= shift @sorted)
3626
313
100
100
885
      { push @unique, $current unless( @unique && ($unique[-1] == $current)); }
3627
195
470
    return @unique;
3628   }
3629
3630 sub findvalue
3631
16
16
1
260
  { my $twig= shift;
3632
16
100
61
    if( isa( $_[0], 'ARRAY'))
3633
1
2
      { my $elt_array= shift;
3634
1
2
2
5
        return join( '', map { $_->findvalue( @_) } @$elt_array);
3635       }
3636     else
3637
15
22
      { return $twig->root->findvalue( @_); }
3638   }
3639
3640 sub findvalues
3641
3
3
1
9
  { my $twig= shift;
3642
3
100
13
    if( isa( $_[0], 'ARRAY'))
3643
1
1
      { my $elt_array= shift;
3644
1
2
1
5
        return map { $_->findvalues( @_) } @$elt_array;
3645       }
3646     else
3647
2
4
      { return $twig->root->findvalues( @_); }
3648   }
3649
3650 sub set_id_seed
3651
2
2
1
7
  { my $t= shift;
3652
2
4
    XML::Twig::Elt->set_id_seed( @_);
3653
2
3
    return $t;
3654   }
3655
3656 # return an array ref to an index, or undef
3657 sub index
3658
10
10
1
11
  { my( $twig, $name, $index)= @_;
3659
10
100
25
    return defined( $index) ? $twig->{_twig_index}->{$name}->[$index] : $twig->{_twig_index}->{$name};
3660   }
3661
3662 # return a list with just the root
3663 # if a condition is given then return an empty list unless the root matches
3664 sub children
3665
82
82
1
89
  { my( $t, $cond)= @_;
3666
82
92
    my $root= $t->root;
3667
82
100
100
174
    unless( $cond && !($root->passes( $cond)) )
3668
79
862
      { return ($root); }
3669     else
3670
3
45
      { return (); }
3671   }
3672
3673
1
1
3
sub _children { return ($_[0]->root); }
3674
3675 # weird, but here for completude
3676 # used to solve (non-sensical) /doc[1] XPath queries
3677 sub child
3678
2
2
1
3
  { my $t= shift;
3679
2
1
    my $nb= shift;
3680
2
4
    return ($t->children( @_))[$nb];
3681   }
3682
3683 sub descendants
3684
95
95
1
152
  { my( $t, $cond)= @_;
3685
95
113
    my $root= $t->root;
3686
95
100
131
    if( $root->passes( $cond) )
3687
44
58
      { return ($root, $root->descendants( $cond)); }
3688     else
3689
51
88
      { return ( $root->descendants( $cond)); }
3690   }
3691
3692
3
3
3
1
40
7
sub simplify { my $t= shift; $t->root->simplify( @_); }
3693
18
18
18
1
55
23
sub subs_text { my $t= shift; $t->root->subs_text( @_); }
3694
18
18
18
1
12
20
sub trim { my $t= shift; $t->root->trim( @_); }
3695
3696
3697 sub set_keep_encoding
3698
3151
1
2488
  { my( $t, $keep)= @_;
3699
3151
3424
    $t->{twig_keep_encoding}= $keep;
3700
3151
3325
    $t->{NoExpand}= $keep;
3701
3151
3810
    return XML::Twig::Elt::set_keep_encoding( $keep);
3702    }
3703
3704 sub set_expand_external_entities
3705
3131
1
3872
  { return XML::Twig::Elt::set_expand_external_entities( @_); }
3706
3707 sub escape_gt
3708
2
2
2
2
1
2
4
3
  { my $t= shift; $t->{twig_escape_gt}= 1; return XML::Twig::Elt::escape_gt( @_); }
3709
3710 sub do_not_escape_gt
3711
1
1
1
1
1
2
1
2
  { my $t= shift; $t->{twig_escape_gt}= 0; return XML::Twig::Elt::do_not_escape_gt( @_); }
3712
3713 sub elt_id
3714
70
70
1
511
  { return $_[0]->{twig_id_list}->{$_[1]}; }
3715
3716 # change it in ALL twigs at the moment
3717 sub change_gi
3718
4
4
1
10
  { my( $twig, $old_gi, $new_gi)= @_;
3719
4
49
    my $index;
3720
4
100
13
    return unless($index= $XML::Twig::gi2index{$old_gi});
3721
2
3
    $XML::Twig::index2gi[$index]= $new_gi;
3722
2
3
    delete $XML::Twig::gi2index{$old_gi};
3723
2
3
    $XML::Twig::gi2index{$new_gi}= $index;
3724
2
1
    return $twig;
3725   }
3726
3727
3728 # builds the DTD from the stored (possibly updated) data
3729 sub dtd_text
3730
5
5
1
9
  { my $t= shift;
3731
5
7
    my $dtd= $t->{twig_dtd};
3732
5
100
14
    my $doctype= $t->{twig_doctype} or return '';
3733
4
10
    my $string= "<!DOCTYPE ".$doctype->{name};
3734
3735
4
7
    $string .= " [\n";
3736
3737
4
4
7
10
    foreach my $gi (@{$dtd->{elt_list}})
3738
5
19
      { $string.= "<!ELEMENT $gi ".$dtd->{model}->{$gi}.">\n" ;
3739
5
100
58
        if( $dtd->{att}->{$gi})
3740
2
3
          { my $attlist= $dtd->{att}->{$gi};
3741
2
3
            $string.= "<!ATTLIST $gi\n";
3742
2
2
2
4
            foreach my $att ( sort keys %{$attlist})
3743               {
3744
4
100
7
                if( $attlist->{$att}->{fixed})
3745
1
3
                  { $string.= " $att $attlist->{$att}->{type} #FIXED $attlist->{$att}->{default}"; }
3746                 else
3747
3
24
                  { $string.= " $att $attlist->{$att}->{type} $attlist->{$att}->{default}"; }
3748
4
5
                $string.= "\n";
3749               }
3750
2
5
            $string.= ">\n";
3751           }
3752       }
3753
4
50
9
    $string.= $t->entity_list->text if( $t->entity_list);
3754
4
6
    $string.= "\n]>\n";
3755
4
13
    return $string;
3756   }
3757
3758 # prints the DTD from the stored (possibly updated) data
3759 sub dtd_print
3760
3
3
1
11
  { my $t= shift;
3761
3
100
67
17
    my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef;
3762
3
2
100
6
6
    if( $fh) { print $fh $t->dtd_text; }
3763
1
1
    else { print $t->dtd_text; }
3764
3
27
    return $t;
3765   }
3766
3767 # build the subs that call directly expat
3768 BEGIN
3769
187
190
821
  { my @expat_methods= qw( depth in_element within_element context
3770                            current_line current_column current_byte
3771                            recognized_string original_string
3772                            xpcroak xpcarp
3773                            base current_element element_index
3774                            xml_escape
3775                            position_in_context);
3776
187
275
    foreach my $method (@expat_methods)
3777       {
3778         ## no critic (TestingAndDebugging::ProhibitNoStrict);
3779
187
187
187
190
640
176
11955
        no strict 'refs';
3780
2992
34
34
280949
1067
        *{$method}= sub { my $t= shift;
3781
34
100
80
                          _croak( "calling $method after parsing is finished") unless( $t->{twig_parsing});
3782
18
51
                          return $t->{twig_parser}->$method(@_);
3783
2992
4751
                        };
3784       }
3785   }
3786
3787 sub path
3788
19
19
1
32
  { my( $t, $gi)= @_;
3789
19
100
23
    if( $t->{twig_map_xmlns})
3790
2
4
5
10
      { return "/" . join( "/", map { $t->_replace_prefix( $_)} ($t->{twig_parser}->context, $gi)); }
3791     else
3792
17
29
      { return "/" . join( "/", ($t->{twig_parser}->context, $gi)); }
3793   }
3794
3795 sub finish
3796
1
1
1
3
  { my $t= shift;
3797
1
3
    return $t->{twig_parser}->finish;
3798   }
3799
3800 # just finish the parse by printing the rest of the document
3801 sub finish_print
3802
6
6
1
26
  { my( $t, $fh)= @_;
3803
6
5
    my $old_fh;
3804
6
100
50
12
    unless( defined $fh)
3805
2
3
      { $t->_set_fh_to_twig_output_fh(); }
3806     elsif( defined $fh)
3807
4
13
      { $old_fh= select $fh;
3808
4
50
10
        $t->{twig_original_selected_fh}= $old_fh if( $old_fh);
3809       }
3810
3811
6
7
    my $p=$t->{twig_parser};
3812
6
100
7
    if( $t->{twig_keep_encoding})
3813
1
4
      { $p->setHandlers( %twig_handlers_finish_print); }
3814     else
3815
5
19
      { $p->setHandlers( %twig_handlers_finish_print_original); }
3816
6
353
    return $t;
3817   }
3818
3819
3115
1
3338
sub set_remove_cdata { return XML::Twig::Elt::set_remove_cdata( @_); }
3820
3821
19
19
1
33
sub output_filter { return XML::Twig::Elt::output_filter( @_); }
3822
3138
1
3427
sub set_output_filter { return XML::Twig::Elt::set_output_filter( @_); }
3823
3824
2
2
1
6
sub output_text_filter { return XML::Twig::Elt::output_text_filter( @_); }
3825
3124
1
3082
sub set_output_text_filter { return XML::Twig::Elt::set_output_text_filter( @_); }
3826
3827 sub set_input_filter
3828
4
4
1
4
  { my( $t, $input_filter)= @_;
3829
4
5
    my $old_filter= $t->{twig_input_filter};
3830
4
100
50
50
67
21
      if( !$input_filter || isa( $input_filter, 'CODE') )
3831
2
4
        { $t->{twig_input_filter}= $input_filter; }
3832       elsif( $input_filter eq 'latin1')
3833
0
0
        { $t->{twig_input_filter}= latin1(); }
3834       elsif( $filter{$input_filter})
3835
0
0
        { $t->{twig_input_filter}= $filter{$input_filter}; }
3836       else
3837
2
3
        { _croak( "invalid input filter: $input_filter"); }
3838
3839
2
2
      return $old_filter;
3840     }
3841
3842 sub set_empty_tag_style
3843
131
131
1
272
  { return XML::Twig::Elt::set_empty_tag_style( @_); }
3844
3845 sub set_pretty_print
3846
121
121
1
18122
  { return XML::Twig::Elt::set_pretty_print( @_); }
3847
3848 sub set_quote
3849
3115
1
3059
  { return XML::Twig::Elt::set_quote( @_); }
3850
3851 sub set_indent
3852
4
4
1
9
  { return XML::Twig::Elt::set_indent( @_); }
3853
3854 sub set_keep_atts_order
3855
3115
3115
1
1915
3140
  { shift; return XML::Twig::Elt::set_keep_atts_order( @_); }
3856
3857 sub keep_atts_order
3858
2
2
1
4
  { return XML::Twig::Elt::keep_atts_order( @_); }
3859
3860 sub set_do_not_escape_amp_in_atts
3861
3130
1
3466
  { return XML::Twig::Elt::set_do_not_escape_amp_in_atts( @_); }
3862
3863 # save and restore package globals (the ones in XML::Twig::Elt)
3864 # should probably return the XML::Twig object itself, but instead
3865 # returns the state (as a hashref) for backward compatibility
3866 sub save_global_state
3867
10
10
1
3528
  { my $t= shift;
3868
10
20
    return $t->{twig_saved_state}= XML::Twig::Elt::global_state();
3869   }
3870
3871 sub restore_global_state
3872
16
16
1
63
  { my $t= shift;
3873
16
34
    XML::Twig::Elt::set_global_state( $t->{twig_saved_state});
3874   }
3875
3876 sub global_state
3877
1
1
1
3
  { return XML::Twig::Elt::global_state(); }
3878
3879 sub set_global_state
3880
1
1
1
3
  { return XML::Twig::Elt::set_global_state( $_[1]); }
3881
3882 sub dispose
3883
3
3
1
2
  { my $t= shift;
3884
3
4
    $t->DESTROY;
3885
3
4
    return;
3886   }
3887
3888 sub DESTROY
3889
2965
5092
  { my $t= shift;
3890
2965
100
67
8445
    if( $t->{twig_root} && isa( $t->{twig_root}, 'XML::Twig::Elt'))
3891
1994
2694
      { $t->{twig_root}->delete }
3892
3893     # added to break circular references
3894
2965
2711
    undef $t->{twig};
3895
2965
100
3385
    undef $t->{twig_root}->{twig} if( $t->{twig_root});
3896
2965
2094
    undef $t->{twig_parser};
3897
3898
2965
22762
    undef %$t;# prevents memory leaks (especially when using mod_perl)
3899
2965
17067
    undef $t;
3900   }
3901
3902
3903 #
3904 # non standard handlers
3905 #
3906
3907 # kludge: expat 1.95.2 calls both Default AND Doctype handlers
3908 # so if the default handler finds '<!DOCTYPE' then it must
3909 # unset itself (_twig_print_doctype will reset it)
3910 sub _twig_print_check_doctype
3911    { # warn " in _twig_print_check_doctype...\n"; # DEBUG handler
3912
3913
15
15
9
    my $p= shift;
3914
15
26
    my $string= $p->recognized_string();
3915
15
50
60
    if( $string eq '<!DOCTYPE')
3916       {
3917
0
0
        $p->setHandlers( Default => undef);
3918
0
0
        $p->setHandlers( Entity => undef);
3919
0
0
        $expat_1_95_2=1;
3920       }
3921     else
3922
15
16
      { print $string; }
3923
3924
15
83
    return;
3925   }
3926
3927
3928 sub _twig_print
3929    { # warn " in _twig_print...\n"; # DEBUG handler
3930
489
489
472
    my $p= shift;
3931
489
50
25
742
    if( $expat_1_95_2 && ($p->recognized_string eq '[') && !$p->{twig}->{expat_1_95_2_seen_bracket})
3932       { # otherwise the opening square bracket of the doctype gets printed twice
3933
0
0
        $p->{twig}->{expat_1_95_2_seen_bracket}=1;
3934       }
3935     else
3936
489
50
476
      { if( $p->{twig}->{twig_right_after_root})
3937
0
0
0
0
0
          { my $s= $p->recognized_string(); print $s if $s=~ m{\S}; }
3938         else
3939
489
646
          { print $p->recognized_string(); }
3940       }
3941
489
2377
    return;
3942   }
3943 # recognized_string does not seem to work for entities, go figure!
3944 # so this handler is used to print them anyway
3945 sub _twig_print_entity
3946    { # warn " in _twig_print_entity...\n"; # DEBUG handler
3947
6
6
42
    my $p= shift;
3948
6
21
    XML::Twig::Entity->new( @_)->print;
3949   }
3950
3951 # kludge: expat 1.95.2 calls both Default AND Doctype handlers
3952 # so if the default handler finds '<!DOCTYPE' then it must
3953 # unset itself (_twig_print_doctype will reset it)
3954 sub _twig_print_original_check_doctype
3955    { # warn " in _twig_print_original_check_doctype...\n"; # DEBUG handler
3956
3957
21
21
23
    my $p= shift;
3958
21
44
    my $string= $p->original_string();
3959
21
50
92
    if( $string eq '<!DOCTYPE')
3960
0
0
      { $p->setHandlers( Default => undef);
3961
0
0
        $p->setHandlers( Entity => undef);
3962
0
0
        $expat_1_95_2=1;
3963       }
3964     else
3965
21
27
      { print $string; }
3966
3967
21
198
    return;
3968   }
3969
3970 sub _twig_print_original
3971    { # warn " in _twig_print_original...\n"; # DEBUG handler
3972
435
435
301
    my $p= shift;
3973
435
663
    print $p->original_string();
3974
435
2093
    return;
3975   }
3976
3977
3978 sub _twig_print_original_doctype
3979    { # warn " in _twig_print_original_doctype...\n"; # DEBUG handler
3980
3981
5
5
108
    my( $p, $name, $sysid, $pubid, $internal)= @_;
3982
5
50
13
    if( $name)
3983       { # with recent versions of XML::Parser original_string does not work,
3984         # hence we need to rebuild the doctype declaration
3985
5
5
        my $doctype='';
3986
5
50
15
        $doctype .= qq{<!DOCTYPE $name} if( $name);
3987
5
50
10
        $doctype .= qq{ PUBLIC "$pubid"} if( $pubid);
3988
5
100
67
15
        $doctype .= qq{ SYSTEM} if( $sysid && !$pubid);
3989
5
100
9
        $doctype .= qq{ "$sysid"} if( $sysid);
3990
5
100
67
18
        $doctype .= ' [' if( $internal && !$expat_1_95_2) ;
3991
5
100
67
21
        $doctype .= qq{>} unless( $internal || $expat_1_95_2);
3992
5
18
        $p->{twig}->{twig_doctype}->{has_internal}=$internal;
3993
5
17
        print $doctype;
3994       }
3995
5
16
    $p->setHandlers( Default => \&_twig_print_original);
3996
5
73
    return;
3997   }
3998
3999 sub _twig_print_doctype
4000    { # warn " in _twig_print_doctype...\n"; # DEBUG handler
4001
11
11
99
    my( $p, $name, $sysid, $pubid, $internal)= @_;
4002
11
50
19
    if( $name)
4003       { # with recent versions of XML::Parser original_string does not work,
4004         # hence we need to rebuild the doctype declaration
4005
11
9
        my $doctype='';
4006
11
50
24
        $doctype .= qq{<!DOCTYPE $name} if( $name);
4007
11
50
17
        $doctype .= qq{ PUBLIC "$pubid"} if( $pubid);
4008
11
100
67
31
        $doctype .= qq{ SYSTEM} if( $sysid && !$pubid);
4009
11
100
26
        $doctype .= qq{ "$sysid"} if( $sysid);
4010
11
100
15
        $doctype .= ' [' if( $internal) ;
4011
11
100
67
33
        $doctype .= qq{>} unless( $internal || $expat_1_95_2);
4012
11
29
        $p->{twig}->{twig_doctype}->{has_internal}=$internal;
4013
11
23
        print $doctype;
4014       }
4015
11
28
    $p->setHandlers( Default => \&_twig_print);
4016
11
127
    return;
4017   }
4018
4019
4020 sub _twig_print_original_default
4021    { # warn " in _twig_print_original_default...\n"; # DEBUG handler
4022
2
2
110
    my $p= shift;
4023
2
7
    print $p->original_string();
4024
2
16
    return;
4025   }
4026
4027 # account for the case where the element is empty
4028 sub _twig_print_end_original
4029    { # warn " in _twig_print_end_original...\n"; # DEBUG handler
4030
10
10
89
    my $p= shift;
4031
10
18
    print $p->original_string();
4032
10
82
    return;
4033   }
4034
4035 sub _twig_start_check_roots
4036    { # warn " in _twig_start_check_roots...\n"; # DEBUG handler
4037
524
524
73185
    my $p= shift;
4038
524
375
    my $gi= shift;
4039
4040
524
400
    my $t= $p->{twig};
4041
4042
524
50
1208
    my $fh= $t->{twig_output_fh} || select() || \*STDOUT;
4043
4044
524
321
    my $ns_decl;
4045
524
100
804
    unless( $p->depth == 0)
4046
420
14
100
1556
22
      { if( $t->{twig_map_xmlns}) { $ns_decl= _replace_ns( $t, \$gi, \@_); }
4047       }
4048
4049
524
1307
    my $context= { $ST_TAG => $gi, @_};
4050
524
50
584
    $context->{$ST_NS}= $ns_decl if $ns_decl;
4051
524
524
314
508
    push @{$t->{_twig_context_stack}}, $context;
4052
524
551
    my %att= @_;
4053
4054
524
100
584
    if( _handler( $t, $t->{twig_roots}, $gi))
4055
144
492
      { $p->setHandlers( %twig_handlers); # restore regular handlers
4056
144
9051
        $t->{twig_root_depth}= $p->depth;
4057
144
144
452
144
        pop @{$t->{_twig_context_stack}}; # will be pushed back in _twig_start
4058
144
186
        _twig_start( $p, $gi, @_);
4059
144
529
        return;
4060       }
4061
4062     # $tag will always be true if it needs to be printed (the tag string is never empty)
4063
380
100
100
786
    my $tag= $t->{twig_default_print} ? $t->{twig_keep_encoding} ? $p->original_string
4064                                                                  : $p->recognized_string
4065                                       : '';
4066
4067
380
100
100
1146
    if( $p->depth == 0)
4068       {
4069         ## no critic (TestingAndDebugging::ProhibitNoStrict);
4070
187
187
187
188
619
176
18405
        no strict 'refs';
4071
102
62
100
412
238
        print {$fh} $tag if( $tag);
4072
102
102
96
117
        pop @{$t->{_twig_context_stack}}; # will be pushed back in _twig_start
4073
102
156
        _twig_start( $p, $gi, @_);
4074
102
165
        $t->root->_set_flushed; # or the root start tag gets output the first time we flush
4075       }
4076     elsif( $t->{twig_starttag_handlers})
4077       { # look for start tag handlers
4078
4079
38
140
        my @handlers= _handler( $t, $t->{twig_starttag_handlers}, $gi);
4080
38
24
        my $last_handler_res;
4081
38
36
        foreach my $handler ( @handlers)
4082
13
30
          { $last_handler_res= $handler->($t, $gi, %att);
4083
13
50
77
            last unless $last_handler_res;
4084           }
4085         ## no critic (TestingAndDebugging::ProhibitNoStrict);
4086
187
187
187
188
561
154
6156
        no strict 'refs';
4087
38
27
50
25
98
44
        print {$fh} $tag if( $tag && (!@handlers || $last_handler_res));
4088       }
4089     else
4090       {
4091         ## no critic (TestingAndDebugging::ProhibitNoStrict);
4092
187
187
187
187
494
161
28858
        no strict 'refs';
4093
240
121
100
819
136
        print {$fh} $tag if( $tag);
4094       }
4095
380
1054
    return;
4096   }
4097
4098 sub _twig_end_check_roots
4099    { # warn " in _twig_end_check_roots...\n"; # DEBUG handler
4100
4101
380
380
446
    my( $p, $gi, %att)= @_;
4102
380
303
    my $t= $p->{twig};
4103     # $tag can be empty (<elt/>), hence the undef and the tests for defined
4104
380
100
100
687
    my $tag= $t->{twig_default_print} ? $t->{twig_keep_encoding} ? $p->original_string
4105                                                                  : $p->recognized_string
4106                                       : undef;
4107
380
50
1373
    my $fh= $t->{twig_output_fh} || select() || \*STDOUT;
4108
4109
380
100
460
    if( $t->{twig_endtag_handlers})
4110       { # look for end tag handlers
4111
40
51
        my @handlers= _handler( $t, $t->{twig_endtag_handlers}, $gi);
4112
40
24
        my $last_handler_res=1;
4113
40
40
        foreach my $handler ( @handlers)
4114
13
100
26
          { $last_handler_res= $handler->($t, $gi) || last; }
4115         #if( ! $last_handler_res)
4116         # { pop @{$t->{_twig_context_stack}}; warn "tested";
4117         # return;
4118         # }
4119       }
4120     {
4121       ## no critic (TestingAndDebugging::ProhibitNoStrict);
4122
187
187
187
380
187
574
160
34846
281
      no strict 'refs';
4123
380
210
100
427
240
      print {$fh} $tag if( defined $tag);
4124     }
4125
380
100
610
    if( $p->depth == 0)
4126       {
4127
102
419
        _twig_end( $p, $gi);
4128
102
145
        $t->root->{end_tag_flushed}=1;
4129       }
4130
4131
380
380
919
323
    pop @{$t->{_twig_context_stack}};
4132
380
906
    return;
4133   }
4134
4135 sub _twig_pi_check_roots
4136    { # warn " in _twig_pi_check_roots...\n"; # DEBUG handler
4137
78
78
114
    my( $p, $target, $data)= @_;
4138
78
70
    my $t= $p->{twig};
4139
78
100
100
194
    my $pi= $t->{twig_default_print} ? $t->{twig_keep_encoding} ? $p->original_string
4140                                                                 : $p->recognized_string
4141                                     : undef;
4142
78
50
338
    my $fh= $t->{twig_output_fh} || select() || \*STDOUT;
4143
4144
78
100
67
201
    if( my $handler= $t->{twig_handlers}->{pi_handlers}->{$target}
4145                      || $t->{twig_handlers}->{pi_handlers}->{''}
4146       )
4147       { # if handler is called on pi, then it needs to be processed as a regular node
4148
60
102
        my @flags= qw( twig_process_pi twig_keep_pi);
4149
60
60
43
90
        my @save= @{$t}{@flags}; # save pi related flags
4150
60
60
48
66
        @{$t}{@flags}= (1, 0); # override them, pi needs to be processed
4151
60
76
        _twig_pi( @_); # call handler on the pi
4152
60
60
54
112
        @{$t}{@flags}= @save;; # restore flag
4153       }
4154     else
4155       {
4156         ## no critic (TestingAndDebugging::ProhibitNoStrict);
4157
187
187
187
187
571
165
363718
        no strict 'refs';
4158
18
18
50
23
23
        print {$fh} $pi if( defined( $pi));
4159       }
4160
78
190
    return;
4161   }
4162
4163
4164 sub _output_ignored
4165
32
32
25
  { my( $t, $p)= @_;
4166
32
23
    my $action= $t->{twig_ignore_action};
4167
4168
32
100
35
    my $get_string= $t->{twig_keep_encoding} ? 'original_string' : 'recognized_string';
4169
4170
32
8
100
29
14
    if( $action eq 'print' ) { print $p->$get_string; }
4171     else
4172
24
14
      { my $string_ref;
4173
24
100
50
33
43
        if( $action eq 'string')
4174
12
10
          { $string_ref= \$t->{twig_buffered_string}; }
4175         elsif( ref( $action) && ref( $action) eq 'SCALAR')
4176
12
8
          { $string_ref= $action; }
4177         else
4178
0
0
          { _croak( "wrong ignore action: $action"); }
4179
4180
24
43
        $$string_ref .= $p->$get_string;
4181       }
4182   }
4183
4184
4185
4186 sub _twig_ignore_start
4187    { # warn " in _twig_ignore_start...\n"; # DEBUG handler
4188
4189
55
55
95
    my( $p, $gi)= @_;
4190
55
42
    my $t= $p->{twig};
4191
55
40
    $t->{twig_ignore_level}++;
4192
55
38
    my $action= $t->{twig_ignore_action};
4193
4194
55
100
94
    $t->_output_ignored( $p) unless $action eq 'discard';
4195
55
107
    return;
4196   }
4197
4198 sub _twig_ignore_end
4199    { # warn " in _twig_ignore_end...\n"; # DEBUG handler
4200
4201
128
128
154
    my( $p, $gi)= @_;
4202
128
82
    my $t= $p->{twig};
4203
4204
128
98
    my $action= $t->{twig_ignore_action};
4205
128
100
137
    $t->_output_ignored( $p) unless $action eq 'discard';
4206
4207
128
162
    $t->{twig_ignore_level}--;
4208
4209
128
100
144
    if( ! $t->{twig_ignore_level})
4210       {
4211
57
41
        $t->{twig_current} = $t->{twig_ignore_elt};
4212
57
91
        $t->{twig_current}->set_twig_current;
4213
4214
57
72
        $t->{twig_ignore_elt}->cut; # there could possibly be a memory leak here (delete would avoid it,
4215                                      # but could also delete elements that should not be deleted)
4216
4217         # restore the saved stack to the current level
4218
57
57
24
154
        splice( @{$t->{_twig_context_stack}}, $p->depth+ 1 );
4219         #warn "stack: ", _dump_stack( $t->{_twig_context_stack}), "\n";
4220
4221
57
57
206
119
        $p->setHandlers( @{$t->{twig_saved_handlers}});
4222         # test for handlers
4223
57
100
3714
        if( $t->{twig_endtag_handlers})
4224           { # look for end tag handlers
4225
14
19
            my @handlers= _handler( $t, $t->{twig_endtag_handlers}, $gi);
4226
14
11
            my $last_handler_res=1;
4227
14
11
            foreach my $handler ( @handlers)
4228
15
100
28
              { $last_handler_res= $handler->($t, $gi) || last; }
4229           }
4230
57
57
82
58
        pop @{$t->{_twig_context_stack}};
4231       };
4232
128
312
    return;
4233   }
4234
4235 #sub _dump_stack { my( $stack)= @_; return join( ":", map { $_->{$ST_TAG} } @$stack); }
4236
4237 sub ignore
4238
60
60
1
63
  { my( $t, $elt, $action)= @_;
4239
60
42
    my $current= $t->{twig_current};
4240
4241
60
6
100
50
740
6
    if( ! ($elt && ref( $elt) && isa( $elt, 'XML::Twig::Elt'))) { $elt= $current; }
4242
4243     #warn "ignore: current = ", $current->tag, ", elt = ", $elt->tag, ")\n";
4244
4245     # we need the ($elt == $current->{last_child}) test because the current element is set to the
4246     # parent _before_ handlers are called (and I can't figure out how to fix this)
4247
60
100
83
153
    unless( ($elt == $current) || ($current->{last_child} && ($elt == $current->{last_child})) || $current->in( $elt))
4248
2
3
      { _croak( "element to be ignored must be ancestor of current element"); }
4249
4250
58
100
101
    $t->{twig_ignore_level}= $current == $elt ? 1 : $t->_level_in_stack( $current) - $t->_level_in_stack($elt) + 1;
4251     #warn "twig_ignore_level: $t->{twig_ignore_level} (current: ", $current->tag, ", elt: ", $elt->tag, ")\n";
4252
58
189
    $t->{twig_ignore_elt} = $elt; # save it, so we can delete it later
4253
4254
58
100
159
    $action ||= 'discard';
4255
58
100
60
238
    if( !($action eq 'print' || $action eq 'string' || ( ref( $action) && ref( $action) eq 'SCALAR')))
4256
49
41
      { $action= 'discard'; }
4257
4258
58
61
    $t->{twig_ignore_action}= $action;
4259
4260
58
43
    my $p= $t->{twig_parser};
4261
58
225
    my @saved_handlers= $p->setHandlers( %twig_handlers_ignore); # set handlers
4262
4263
58
100
3555
    my $get_string= $t->{twig_keep_encoding} ? 'original_string' : 'recognized_string';
4264
4265
58
46
    my $default_handler;
4266
4267
58
100
93
    if( $action ne 'discard')
4268
9
100
10
      { if( $action eq 'print')
4269
2
4
4
10
8
          { $p->setHandlers( Default => sub { print $_[0]->$get_string; }); }
4270         else
4271
7
6
          { my $string_ref;
4272
7
100
50
33
20
            if( $action eq 'string')
4273
4
2
100
8
5
              { if( ! exists $t->{twig_buffered_string}) { $t->{twig_buffered_string}=''; }
4274
4
5
                $string_ref= \$t->{twig_buffered_string};
4275               }
4276             elsif( ref( $action) && ref( $action) eq 'SCALAR')
4277
3
3
              { $string_ref= $action; }
4278
4279
7
14
14
29
41
            $p->setHandlers( Default => sub { $$string_ref .= $_[0]->$get_string; });
4280           }
4281
9
86
        $t->_output_ignored( $p, $action);
4282       }
4283
4284
4285
58
252
    $t->{twig_saved_handlers}= \@saved_handlers; # save current handlers
4286   }
4287
4288 sub _level_in_stack
4289
30
30
25
  { my( $t, $elt)= @_;
4290
30
18
    my $level=1;
4291
30
30
18
36
    foreach my $elt_in_stack ( @{$t->{_twig_context_stack}} )
4292
83
30
100
67
201
54
      { if( $elt_in_stack->{$ST_ELT} && ($elt == $elt_in_stack->{$ST_ELT})) { return $level }
4293
53
36
        $level++;
4294       }
4295   }
4296
4297
4298
4299 # select $t->{twig_output_fh} and store the current selected fh
4300 sub _set_fh_to_twig_output_fh
4301
3182
2080
  { my $t= shift;
4302
3182
2477
    my $output_fh= $t->{twig_output_fh};
4303
3182
100
100
5264
    if( $output_fh && !$t->{twig_output_fh_selected})
4304       { # there is an output fh
4305
59
150
        $t->{twig_selected_fh}= select(); # store the currently selected fh
4306
59
71
        $t->{twig_output_fh_selected}=1;
4307
59
90
        select $output_fh; # select the output fh for the twig
4308       }
4309   }
4310
4311 # select the fh that was stored in $t->{twig_selected_fh}
4312 # (before $t->{twig_output_fh} was selected)
4313 sub _set_fh_to_selected_fh
4314
3084
1948
  { my $t= shift;
4315
3084
100
4264
    return unless( $t->{twig_output_fh});
4316
59
66
    my $selected_fh= $t->{twig_selected_fh};
4317
59
50
    $t->{twig_output_fh_selected}=0;
4318
59
119
    select $selected_fh;
4319
59
65
    return;
4320   }
4321
4322
4323 sub encoding
4324
8
100
8
1
34
  { return $_[0]->{twig_xmldecl}->{encoding} if( $_[0]->{twig_xmldecl}); }
4325
4326 sub set_encoding
4327
5
5
1
11
  { my( $t, $encoding)= @_;
4328
5
100
25
    $t->{twig_xmldecl} ||={};
4329
5
100
10
    $t->set_xml_version( "1.0") unless( $t->xml_version);
4330
5
7
    $t->{twig_xmldecl}->{encoding}= $encoding;
4331
5
7
    return $t;
4332   }
4333
4334 sub output_encoding
4335
2
2
1
7
  { return $_[0]->{output_encoding}; }
4336
4337 sub set_output_encoding
4338
18
18
1
79
  { my( $t, $encoding)= @_;
4339
18
100
26
    my $output_filter= $t->output_filter || '';
4340
4341
18
100
71
147
    if( ($encoding && $encoding !~ m{^utf-?8$}i) || $t->{twig_keep_encoding} || $output_filter)
4342
16
100
45
      { $t->set_output_filter( _encoding_filter( $encoding || '')); }
4343
4344
18
86
    $t->{output_encoding}= $encoding;
4345
18
32
    return $t;
4346   }
4347
4348 sub xml_version
4349
12
100
12
1
53
  { return $_[0]->{twig_xmldecl}->{version} if( $_[0]->{twig_xmldecl}); }
4350
4351 sub set_xml_version
4352
5
5
1
9
  { my( $t, $version)= @_;
4353
5
100
11
    $t->{twig_xmldecl} ||={};
4354
5
8
    $t->{twig_xmldecl}->{version}= $version;
4355
5
7
    return $t;
4356   }
4357
4358 sub standalone
4359
6
100
6
1
24
  { return $_[0]->{twig_xmldecl}->{standalone} if( $_[0]->{twig_xmldecl}); }
4360
4361 sub set_standalone
4362
3
3
1
9
  { my( $t, $standalone)= @_;
4363
3
100
9
    $t->{twig_xmldecl} ||={};
4364
3
100
5
    $t->set_xml_version( "1.0") unless( $t->xml_version);
4365
3
4
    $t->{twig_xmldecl}->{standalone}= $standalone;
4366
3
5
    return $t;
4367   }
4368
4369
4370 # SAX methods
4371
4372 sub toSAX1
4373
2
100
2
1
16
  { _croak( "cannot use toSAX1 while parsing (use flush_toSAX1)") if (defined $_[0]->{twig_parser});
4374
1
4
    shift(@_)->_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX1,
4375                           \&XML::Twig::Elt::_end_tag_data_SAX1
4376              );
4377   }
4378
4379 sub toSAX2
4380
5
100
5
1
280
  { _croak( "cannot use toSAX2 while parsing (use flush_toSAX2)") if (defined $_[0]->{twig_parser});
4381
4
9
    shift(@_)->_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX2,
4382                           \&XML::Twig::Elt::_end_tag_data_SAX2
4383              );
4384   }
4385
4386
4387 sub _toSAX
4388
5
5
8
  { my( $t, $handler, $start_tag_data, $end_tag_data) = @_;
4389
4390
5
100
25
    if( my $start_document = $handler->can( 'start_document'))
4391
4
12
      { $start_document->( $handler); }
4392
4393
5
750
    $t->_prolog_toSAX( $handler);
4394
4395
5
5
50
57
6
    if( $t->root) { $t->root->_toSAX( $handler, $start_tag_data, $end_tag_data) ; }
4396
5
50
45
    if( my $end_document = $handler->can( 'end_document'))
4397
5
11
      { $end_document->( $handler); }
4398   }
4399
4400
4401 sub flush_toSAX1
4402
3
3
1
9
  { shift(@_)->_flush_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX1,
4403                                \&XML::Twig::Elt::_end_tag_data_SAX1
4404              );
4405   }
4406
4407 sub flush_toSAX2
4408
3
3
1
16
  { shift(@_)->_flush_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX2,
4409                                \&XML::Twig::Elt::_end_tag_data_SAX2
4410              );
4411   }
4412
4413 sub _flush_toSAX
4414
6
6
7
  { my( $t, $handler, $start_tag_data, $end_tag_data)= @_;
4415
4416     # the "real" last element processed, as _twig_end has closed it
4417
6
4
    my $last_elt;
4418
6
100
7
    if( $t->{twig_current})
4419
4
8
      { $last_elt= $t->{twig_current}->_last_child; }
4420     else
4421
2
2
      { $last_elt= $t->{twig_root}; }
4422
4423
6
5
    my $elt= $t->{twig_root};
4424
6
100
10
    unless( $elt->_flushed)
4425       { # init unless already done (ie root has been flushed)
4426
2
50
7
        if( my $start_document = $handler->can( 'start_document'))
4427
2
7
          { $start_document->( $handler); }
4428         # flush the DTD
4429
2
394
        $t->_prolog_toSAX( $handler)
4430       }
4431
4432
6
7
    while( $elt)
4433
18
8
      { my $next_elt;
4434
18
100
67
33
        if( $last_elt && $last_elt->in( $elt))
4435           {
4436
6
100
6
            unless( $elt->_flushed)
4437               { # just output the front tag
4438
4
50
10
                if( my $start_element = $handler->can( 'start_element'))
4439
4
50
7
                 { if( my $tag_data= $start_tag_data->( $elt))
4440
4
11
                     { $start_element->( $handler, $tag_data); }
4441                  }
4442
4
100
                $elt->_set_flushed;
4443               }
4444
6
5
            $next_elt= $elt->{first_child};
4445           }
4446         else
4447           { # an element before the last one or the last one,
4448
12
10
            $next_elt= $elt->{next_sibling};
4449
12
13
            $elt->_toSAX( $handler, $start_tag_data, $end_tag_data);
4450
12
60
            $elt->delete;
4451
12
100
67
31
            last if( $last_elt && ($elt == $last_elt));
4452           }
4453
12
29
        $elt= $next_elt;
4454       }
4455
6
100
18
    if( !$t->{twig_parsing})
4456
2
50
7
      { if( my $end_document = $handler->can( 'end_document'))
4457
2
6
          { $end_document->( $handler); }
4458       }
4459   }
4460
4461
4462 sub _prolog_toSAX
4463
7
7
11
  { my( $t, $handler)= @_;
4464
7
9
    $t->_xmldecl_toSAX( $handler);
4465
7
87
    $t->_DTD_toSAX( $handler);
4466   }
4467
4468 sub _xmldecl_toSAX
4469
7
7
5
  { my( $t, $handler)= @_;
4470
7
7
    my $decl= $t->{twig_xmldecl};
4471
7
19
    my $data= { Version => $decl->{version},
4472                 Encoding => $decl->{encoding},
4473                 Standalone => $decl->{standalone},
4474           };
4475
7
100
29
    if( my $xml_decl= $handler->can( 'xml_decl'))
4476
4
8
      { $xml_decl->( $handler, $data); }
4477   }
4478
4479 sub _DTD_toSAX
4480
7
7
6
  { my( $t, $handler)= @_;
4481
7
7
    my $doctype= $t->{twig_doctype};
4482
7
100
11
    return unless( $doctype);
4483
3
7
    my $data= { Name => $doctype->{name},
4484                 PublicId => $doctype->{pub},
4485                 SystemId => $doctype->{sysid},
4486               };
4487
4488
3
100
13
    if( my $start_dtd= $handler->can( 'start_dtd'))
4489
2
5
      { $start_dtd->( $handler, $data); }
4490
4491     # I should call code to export the internal subset here
4492
4493
3
100
50
    if( my $end_dtd= $handler->can( 'end_dtd'))
4494
2
5
      { $end_dtd->( $handler); }
4495   }
4496
4497 # input/output filters
4498
4499 sub latin1
4500
2
2
1
6
  { local $SIG{__DIE__};
4501
2
50
0
0
0
6
    if( _use( 'Encode'))
4502
2
4
      { return encode_convert( 'ISO-8859-15'); }
4503     elsif( _use( 'Text::Iconv'))
4504
0
0
      { return iconv_convert( 'ISO-8859-15'); }
4505     elsif( _use( 'Unicode::Map8') && _use( 'Unicode::String'))
4506
0
0
      { return unicode_convert( 'ISO-8859-15'); }
4507     else
4508
0
0
      { return \&regexp2latin1; }
4509   }
4510
4511 sub _encoding_filter
4512   {
4513
16
16
16
9
46
      { local $SIG{__DIE__};
4514
16
33
42
        my $encoding= $_[1] || $_[0];
4515
16
50
0
0
0
24
        if( _use( 'Encode'))
4516
16
22
          { my $sub= encode_convert( $encoding);
4517
16
53
            return $sub;
4518           }
4519         elsif( _use( 'Text::Iconv'))
4520
0
0
          { return iconv_convert( $encoding); }
4521         elsif( _use( 'Unicode::Map8') && _use( 'Unicode::String'))
4522
0
0
          { return unicode_convert( $encoding); }
4523         }
4524
0
0
    _croak( "Encode, Text::Iconv or Unicode::Map8 and Unicode::String need to be installed in order to use encoding options");
4525   }
4526
4527 # shamelessly lifted from XML::TyePYX (works only with XML::Parse 2.27)
4528 sub regexp2latin1
4529
1
1
1
7
  { my $text=shift;
4530
1
0
4
0
    $text=~s{([\xc0-\xc3])(.)}{ my $hi = ord($1);
4531
0
0
                                my $lo = ord($2);
4532
0
0
                                chr((($hi & 0x03) <<6) | ($lo & 0x3F))
4533                               }ge;
4534
1
2
    return $text;
4535   }
4536
4537
4538 sub html_encode
4539
5
50
5
1
6
  { _use( 'HTML::Entities') or croak "cannot use html_encode: missing HTML::Entities";
4540
5
16
    return HTML::Entities::encode_entities($_[0] );
4541   }
4542
4543 sub safe_encode
4544
19
19
1
16
  { my $str= shift;
4545
19
50
21
      if( $perl_version < 5.008)
4546         { # the no utf8 makes the regexp work in 5.6
4547
187
187
187
187
636
153
742
          no utf8; # = perl 5.6
4548
0
0
0
0
          $str =~ s{([\xC0-\xDF].|[\xE0-\xEF]..|[\xF0-\xFF]...)}
4549                    {_XmlUtf8Decode($1)}egs;
4550         }
4551       else
4552
19
30
        { $str= encode( ascii => $str, $FB_HTMLCREF); }
4553
19
278
      return $str;
4554   }
4555
4556 sub safe_encode_hex
4557
19
19
1
13
  { my $str= shift;
4558
19
50
18
      if( $perl_version < 5.008)
4559         { # the no utf8 makes the regexp work in 5.6
4560
187
187
187
187
19617
212
409
          no utf8; # = perl 5.6
4561
0
0
0
0
          $str =~ s{([\xC0-\xDF].|[\xE0-\xEF]..|[\xF0-\xFF]...)}
4562                    {_XmlUtf8Decode($1, 1)}egs;
4563         }
4564       else
4565
19
27
        { $str= encode( ascii => $str, $FB_XMLCREF); }
4566
19
251
      return $str;
4567   }
4568
4569 # this one shamelessly lifted from XML::DOM
4570 # does NOT work on 5.8.0
4571 sub _XmlUtf8Decode
4572
4
4
15
  { my ($str, $hex) = @_;
4573
4
4
    my $len = length ($str);
4574
4
2
    my $n;
4575
4576
4
50
0
0
0
4
    if ($len == 2)
4577
4
15
      { my @n = unpack "C2", $str;
4578
4
5
        $n = (($n[0] & 0x3f) << 6) + ($n[1] & 0x3f);
4579       }
4580     elsif ($len == 3)
4581
0
0
      { my @n = unpack "C3", $str;
4582
0
0
        $n = (($n[0] & 0x1f) << 12) + (($n[1] & 0x3f) << 6) + ($n[2] & 0x3f);
4583       }
4584     elsif ($len == 4)
4585
0
0
      { my @n = unpack "C4", $str;
4586
0
0
        $n = (($n[0] & 0x0f) << 18) + (($n[1] & 0x3f) << 12)
4587            + (($n[2] & 0x3f) << 6) + ($n[3] & 0x3f);
4588       }
4589     elsif ($len == 1) # just to be complete...
4590
0
0
      { $n = ord ($str); }
4591     else
4592
0
0
      { croak "bad value [$str] for _XmlUtf8Decode"; }
4593
4594
4
100
7
    my $char= $hex ? sprintf ("&#x%x;", $n) : "&#$n;";
4595
4
10
    return $char;
4596   }
4597
4598
4599 sub unicode_convert
4600
1
50
1
1
857185
  { my $enc= $_[1] ? $_[1] : $_[0]; # so the method can be called on the twig or directly
4601
1
50
4
    _use( 'Unicode::Map8') or croak "Unicode::Map8 not available, needed for encoding filter: $!";
4602
1
50
2
    _use( 'Unicode::String') or croak "Unicode::String not available, needed for encoding filter: $!";
4603
1
28
    import Unicode::String qw(utf8);
4604
1
1
1
1
1
50
1
1
47
106
3
1
116
    my $sub= eval qq{ { $NO_WARNINGS;
4605                         my \$cnv;
4606                         BEGIN { \$cnv= Unicode::Map8->new(\$enc)
4607                                      or croak "Can't create converter to \$enc";
4608                               }
4609                         sub { return \$cnv->to8 (utf8(\$_[0])->ucs2); }
4610                       }
4611                     };
4612
1
0
50
3
0
    unless( $sub) { croak $@; }
4613
1
6
    return $sub;
4614   }
4615
4616 sub iconv_convert
4617
4
50
4
1
225002
  { my $enc= $_[1] ? $_[1] : $_[0]; # so the method can be called on the twig or directly
4618
4
50
10
    _use( 'Text::Iconv') or croak "Text::Iconv not available, needed for encoding filter: $!";
4619
4
1
1
1
1
1
1
1
1
50
50
1
1
1
1
169
3
1
106
3
1
117
4
1
    my $sub= eval qq{ { $NO_WARNINGS;
4620                         my \$cnv;
4621                         BEGIN { \$cnv = Text::Iconv->new( 'utf8', \$enc)
4622                                      or croak "Can't create iconv converter to \$enc";
4623                               }
4624                         sub { return \$cnv->convert( \$_[0]); }
4625                       }
4626                     };
4627
4
100
9
    unless( $sub)
4628
1
50
3
      { if( $@=~ m{^Unsupported conversion: Invalid argument})
4629
0
0
          { croak "Unsupported encoding: $enc"; }
4630         else
4631
1
94
          { croak $@; }
4632       }
4633
4634
3
14
    return $sub;
4635   }
4636
4637 sub encode_convert
4638
18
50
18
1
27
  { my $enc= $_[1] ? $_[1] : $_[0]; # so the method can be called on the twig or directly
4639
18
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1051
4
1
108
4
1
101
3
1
116
4
1
103
3
2
106
2
2
112
3
2
102
3
1
109
3
2
109
3
0
102
112
3
1
117
3
1
3
1
116
3
1
114
3
1
117
3
2
117
    my $sub= eval qq{sub { $NO_WARNINGS; return encode( "$enc", \$_[0]); } };
4640
18
50
33
    croak "can't create Encode-based filter: $@" unless( $sub);
4641
18
28
    return $sub;
4642   }
4643
4644
4645 # XML::XPath compatibility
4646
136
136
1
89050
sub getRootNode { return $_[0]; }
4647
17
17
1
103
sub getParentNode { return undef; }
4648
184
184
100
184
1
8726
446
sub getChildNodes { my @children= ($_[0]->root); return wantarray ? @children : \@children; }
4649
4650
3
3
76269
sub _weakrefs { return $weakrefs; }
4651
13
13
100
100
13
294
39
sub _set_weakrefs { $weakrefs=shift() || 0; XML::Twig::Elt::set_destroy()if ! $weakrefs; } # for testing purposes
4652
4653 sub _dump
4654
6
6
12
  { my $t= shift;
4655
6
7
    my $dump='';
4656
4657
6
5
    $dump="document\n"; # should dump twig level data here
4658
6
5
100
12
7
    if( $t->root) { $dump .= $t->root->_dump( @_); }
4659
4660
6
12
    return $dump;
4661
4662   }
4663
4664
4665 1;
4666
4667 ######################################################################
4668 package XML::Twig::Entity_list;
4669 ######################################################################
4670
4671 *isa= *UNIVERSAL::isa;
4672
4673 sub new
4674
3114
2541
  { my $class = shift;
4675
3114
5362
    my $self={ entities => {}, updated => 0};
4676
4677
3114
4488
    bless $self, $class;
4678
3114
4149
    return $self;
4679
4680   }
4681
4682 sub add_new_ent
4683
5
5
5
  { my $ent_list= shift;
4684
5
10
    my $ent= XML::Twig::Entity->new( @_);
4685
5
8
    $ent_list->add( $ent);
4686
5
7
    return $ent_list;
4687   }
4688
4689 sub _add_list
4690
5
5
6
  { my( $ent_list, $to_add)= @_;
4691
5
6
    my $ents_to_add= $to_add->{entities};
4692
5
100
67
18
    return $ent_list unless( $ents_to_add && %$ents_to_add);
4693
3
3
8
8
    @{$ent_list->{entities}}{keys %$ents_to_add}= values %$ents_to_add;
4694
3
4
    $ent_list->{updated}=1;
4695
3
4
    return $ent_list;
4696   }
4697
4698 sub add
4699
82
82
80
  { my( $ent_list, $ent)= @_;
4700
82
156
    $ent_list->{entities}->{$ent->{name}}= $ent;
4701
82
72
    $ent_list->{updated}=1;
4702
82
78
    return $ent_list;
4703   }
4704
4705 sub ent
4706
16
16
20
  { my( $ent_list, $ent_name)= @_;
4707
16
112
    return $ent_list->{entities}->{$ent_name};
4708   }
4709
4710 # can be called with an entity or with an entity name
4711 sub delete
4712
4
4
6
  { my $ent_list= shift;
4713
4
100
16
    if( isa( ref $_[0], 'XML::Twig::Entity'))
4714       { # the second arg is an entity
4715
1
2
        my $ent= shift;
4716
1
3
        delete $ent_list->{entities}->{$ent->{name}};
4717       }
4718     else
4719       { # the second arg was not entity, must be a string then
4720
3
5
        my $name= shift;
4721
3
5
        delete $ent_list->{entities}->{$name};
4722       }
4723
4
7
    $ent_list->{updated}=1;
4724
4
6
    return $ent_list;
4725   }
4726
4727 sub print
4728
2
2
3
  { my ($ent_list, $fh)= @_;
4729
2
50
6
    my $old_select= defined $fh ? select $fh : undef;
4730
4731
2
2
2
12
    foreach my $ent_name ( sort keys %{$ent_list->{entities}})
4732
4
18
      { my $ent= $ent_list->{entities}->{$ent_name};
4733         # we have to test what the entity is or un-defined entities can creep in
4734
4
4
50
11
5
        if( isa( $ent, 'XML::Twig::Entity')) { $ent->print(); }
4735       }
4736
2
50
16
    select $old_select if( defined $old_select);
4737
2
3
    return $ent_list;
4738   }
4739
4740 sub text
4741
16
16
19
  { my ($ent_list)= @_;
4742
16
19
16
14
44
54
    return join "\n", map { $ent_list->{entities}->{$_}->text} sort keys %{$ent_list->{entities}};
4743   }
4744
4745 # return the list of entity names
4746 sub entity_names
4747
6
6
8
  { my $ent_list= shift;
4748
6
6
7
54
    return (sort keys %{$ent_list->{entities}}) ;
4749   }
4750
4751
4752 sub list
4753
5
5
9
  { my ($ent_list)= @_;
4754
5
10
5
6
26
14
    return map { $ent_list->{entities}->{$_} } sort keys %{$ent_list->{entities}};
4755   }
4756
4757 1;
4758
4759 ######################################################################
4760 package XML::Twig::Entity;
4761 ######################################################################
4762
4763 #*isa= *UNIVERSAL::isa;
4764
4765 sub new
4766
88
88
170
  { my( $class, $name, $val, $sysid, $pubid, $ndata, $param)= @_;
4767
88
33
213
    $class= ref( $class) || $class;
4768
4769
88
88
    my $self={};
4770
4771
88
137
    $self->{name} = $name;
4772
88
100
160
    $self->{val} = $val if( defined $val );
4773
88
100
241
    $self->{sysid} = $sysid if( defined $sysid);
4774
88
100
112
    $self->{pubid} = $pubid if( defined $pubid);
4775
88
100
108
    $self->{ndata} = $ndata if( defined $ndata);
4776
88
100
112
    $self->{param} = $param if( defined $param);
4777
4778
88
133
    bless $self, $class;
4779
88
120
    return $self;
4780   }
4781
4782
4783
8
8
115
sub name { return $_[0]->{name}; }
4784
2
2
7
sub val { return $_[0]->{val}; }
4785
3
100
3
14
sub sysid { return defined( $_[0]->{sysid}) ? $_[0]->{sysid} : ''; }
4786
2
50
2
9
sub pubid { return defined( $_[0]->{pubid}) ? $_[0]->{pubid} : ''; }
4787
2
100
2
9
sub ndata { return defined( $_[0]->{ndata}) ? $_[0]->{ndata} : ''; }
4788
2
100
2
13
sub param { return defined( $_[0]->{param}) ? $_[0]->{param} : ''; }
4789
4790
4791 sub print
4792
14
14
188
  { my ($ent, $fh)= @_;
4793
14
21
    my $text= $ent->text;
4794
14
0
50
27
0
    if( $fh) { print $fh $text . "\n"; }
4795
14
62
    else { print $text . "\n"; }
4796   }
4797
4798 sub sprint
4799
3
3
3
  { my ($ent)= @_;
4800
3
3
    return $ent->text;
4801   }
4802
4803 sub text
4804
119
119
115
  { my ($ent)= @_;
4805     #warn "text called: '", $ent->_dump, "'\n";
4806
119
100
189
    return '' if( !$ent->{name});
4807
117
90
    my @tokens;
4808
117
125
    push @tokens, '<!ENTITY';
4809
4810
117
100
165
    push @tokens, '%' if( $ent->{param});
4811
117
117
    push @tokens, $ent->{name};
4812
4813
117
100
50
75
442
    if( defined $ent->{val} && !defined( $ent->{sysid}) && !defined($ent->{pubid}) )
4814
58
86
      { push @tokens, _quoted_val( $ent->{val});
4815       }
4816     elsif( defined $ent->{sysid})
4817
59
100
88
      { push @tokens, 'PUBLIC', _quoted_val( $ent->{pubid}) if( $ent->{pubid});
4818
59
100
105
        push @tokens, 'SYSTEM' unless( $ent->{pubid});
4819
59
88
        push @tokens, _quoted_val( $ent->{sysid});
4820
59
100
130
        push @tokens, 'NDATA', $ent->{ndata} if( $ent->{ndata});
4821       }
4822
117
402
    return join( ' ', @tokens) . '>';
4823   }
4824
4825 sub _quoted_val
4826
121
50
121
261
  { my $q= $_[0]=~ m{"} ? q{'} : q{"};
4827
121
238
    return qq{$q$_[0]$q};
4828   }
4829
4830 sub _dump
4831
1
1
2
2
1
1
4
6
3
  { my( $ent)= @_; return join( " - ", map { "$_ => '$ent->{$_}'" } grep { defined $ent->{$_} } sort keys %$ent); }
4832
4833 1;
4834
4835 ######################################################################
4836 package XML::Twig::Elt;
4837 ######################################################################
4838
4839
187
187
187
187
327966
1105
71838
use Carp;
4840 *isa= *UNIVERSAL::isa;
4841
4842 my $CDATA_START = "<![CDATA[";
4843 my $CDATA_END = "]]>";
4844 my $PI_START = "<?";
4845 my $PI_END = "?>";
4846 my $COMMENT_START = "<!--";
4847 my $COMMENT_END = "-->";
4848
4849 my $XMLNS_URI = 'http://www.w3.org/2000/xmlns/';
4850
4851
4852 BEGIN
4853   { # set some aliases for methods
4854
187
187
370
    *tag = *gi;
4855
187
179
    *name = *gi;
4856
187
176
    *set_tag = *set_gi;
4857
187
150
    *set_name = *set_gi;
4858
187
152
    *find_nodes = *get_xpath; # as in XML::DOM
4859
187
160
    *findnodes = *get_xpath; # as in XML::LibXML
4860
187
160
    *field = *first_child_text;
4861
187
156
    *trimmed_field = *first_child_trimmed_text;
4862
187
155
    *is_field = *contains_only_text;
4863
187
163
    *is = *passes;
4864
187
148
    *matches = *passes;
4865
187
155
    *has_child = *first_child;
4866
187
159
    *has_children = *first_child;
4867
187
148
    *all_children_pass = *all_children_are;
4868
187
160
    *all_children_match= *all_children_are;
4869
187
150
    *getElementsByTagName= *descendants;
4870
187
145
    *find_by_tag_name= *descendants_or_self;
4871
187
153
    *unwrap = *erase;
4872
187
152
    *inner_xml = *xml_string;
4873
187
163
    *outer_xml = *sprint;
4874
187
177
    *add_class = *add_to_class;
4875
4876
187
168
    *first_child_is = *first_child_matches;
4877
187
164
    *last_child_is = *last_child_matches;
4878
187
159
    *next_sibling_is = *next_sibling_matches;
4879
187
178
    *prev_sibling_is = *prev_sibling_matches;
4880
187
161
    *next_elt_is = *next_elt_matches;
4881
187
168
    *prev_elt_is = *prev_elt_matches;
4882
187
160
    *parent_is = *parent_matches;
4883
187
172
    *child_is = *child_matches;
4884
187
158
    *inherited_att = *inherit_att;
4885
4886
187
172
    *sort_children_by_value= *sort_children_on_value;
4887
4888
187
188
    *has_atts= *att_nb;
4889
4890     # imports from XML::Twig
4891
187
242
    *_is_fh= *XML::Twig::_is_fh;
4892
4893     # XML::XPath compatibility
4894
187
164
    *string_value = *text;
4895
187
165
    *toString = *sprint;
4896
187
155
    *getName = *gi;
4897
187
155
    *getRootNode = *twig;
4898
187
169
    *getNextSibling = *_next_sibling;
4899
187
156
    *getPreviousSibling = *_prev_sibling;
4900
187
148
    *isElementNode = *is_elt;
4901
187
157
    *isTextNode = *is_text;
4902
187
154
    *isPI = *is_pi;
4903
187
164
    *isPINode = *is_pi;
4904
187
153
    *isProcessingInstructionNode= *is_pi;
4905
187
154
    *isComment = *is_comment;
4906
187
159
    *isCommentNode = *is_comment;
4907
187
158
    *getTarget = *target;
4908
187
162
    *getFirstChild = *_first_child;
4909
187
162
    *getLastChild = *_last_child;
4910
4911     # try using weak references
4912     # test whether we can use weak references
4913
187
187
171
540
    { local $SIG{__DIE__};
4914
187
50
0
33
8537
      if( eval 'require Scalar::Util' && defined( &Scalar::Util::weaken) )
4915
187
1342222
        { import Scalar::Util qw(weaken); }
4916       elsif( eval 'require WeakRef')
4917
0
0
        { import WeakRef; }
4918     }
4919 }
4920
4921
4922 # can be called as XML::Twig::Elt->new( [[$gi, $atts, [@content]])
4923 # - gi is an optional gi given to the element
4924 # - $atts is a hashref to attributes for the element
4925 # - @content is an optional list of text and elements that will
4926 # be inserted under the element
4927 sub new
4928
49253
4632834
  { my $class= shift;
4929
49253
67
90909
    $class= ref $class || $class;
4930
49253
41632
    my $elt = {};
4931
49253
63941
    bless ($elt, $class);
4932
4933
49253
100
50473
    return $elt unless @_;
4934
4935
49249
1
100
100
149507
3
    if( @_ == 1 && $_[0]=~ m{^\s*<}) { return $class->parse( @_); }
4936
4937     # if a gi is passed then use it
4938
49248
34562
    my $gi= shift;
4939
49248
100
92449
    $elt->{gi}=$XML::Twig::gi2index{$gi} or $elt->set_gi( $gi);
4940
4941
4942
49248
100
52132
    my $atts= ref $_[0] eq 'HASH' ? shift : undef;
4943
4944
49248
100
100
55962
    if( $atts && defined $atts->{$CDATA})
4945
4
4
      { delete $atts->{$CDATA};
4946
4947
4
9
        my $cdata= $class->new( $CDATA => @_);
4948
3
5
        return $class->new( $gi, $atts, $cdata);
4949       }
4950
4951
49244
100
100
100
100
100
107600
    if( $gi eq $PCDATA)
4952
352
151
1
100
412
308
78
      { if( grep { ref $_ } @_) { croak "element $PCDATA can only be created from text"; }
4953
351
602
        $elt->_set_pcdata( join( '', @_));
4954       }
4955     elsif( $gi eq $ENT)
4956
53
54
      { $elt->{ent}= shift; }
4957     elsif( $gi eq $CDATA)
4958
112
9
1
100
169
19
105
      { if( grep { ref $_ } @_) { croak "element $CDATA can only be created from text"; }
4959
111
201
        $elt->_set_cdata( join( '', @_));
4960       }
4961     elsif( $gi eq $COMMENT)
4962
1761
4
1
100
2071
10
77
      { if( grep { ref $_ } @_) { croak "element $COMMENT can only be created from text"; }
4963
1760
2403
        $elt->_set_comment( join( '', @_));
4964       }
4965     elsif( $gi eq $PI)
4966
1552
3
1
100
1863
4
80
      { if( grep { ref $_ } @_) { croak "element $PI can only be created from text"; }
4967
1551
2217
        $elt->_set_pi( shift, join( '', @_));
4968       }
4969     else
4970       { # the rest of the arguments are the content of the element
4971
45414
100
37427
        if( @_)
4972
135
197
          { $elt->set_content( @_); }
4973         else
4974
45279
40994
          { $elt->{empty}= 1; }
4975       }
4976
4977
49240
100
47567
    if( $atts)
4978       { # the attribute hash can be used to pass the asis status
4979
106
2
2
100
148
6
2
        if( defined $atts->{$ASIS}) { $elt->set_asis( $atts->{$ASIS} ); delete $atts->{$ASIS}; }
4980
106
2
2
100
574
3
9
        if( defined $atts->{$EMPTY}) { $elt->{empty}= $atts->{$EMPTY}; delete $atts->{$EMPTY}; }
4981
106
55
100
142
69
        if( keys %$atts) { $elt->set_atts( $atts); }
4982
106
100
129
        $elt->_set_id( $atts->{$ID}) if( $atts->{$ID});
4983       }
4984
4985
49240
49062
    return $elt;
4986   }
4987
4988 # optimized version of $elt->new( PCDATA, $text);
4989 sub _new_pcdata
4990
166
166
118
  { my $class= $_[0];
4991
166
33
242
    $class= ref $class || $class;
4992
166
146
    my $elt = {};
4993
166
225
    bless $elt, $class;
4994
166
50
308
    $elt->{gi}=$XML::Twig::gi2index{$PCDATA} or $elt->set_gi( $PCDATA);
4995
166
243
    $elt->_set_pcdata( $_[1]);
4996
166
148
    return $elt;
4997   }
4998
4999 # this function creates an XM:::Twig::Elt from a string
5000 # it is quite clumsy at the moment, as it just creates a
5001 # new twig then returns its root
5002 # there might also be memory leaks there
5003 # additional arguments are passed to new XML::Twig
5004 sub parse
5005
28
28
529
  { my $class= shift;
5006
28
4
100
56
4
    if( ref( $class)) { $class= ref( $class); }
5007
28
33
    my $string= shift;
5008
28
44
    my %args= @_;
5009
28
78
    my $t= XML::Twig->new(%args);
5010
28
42
    $t->parse( $string);
5011
28
43
    my $elt= $t->root;
5012     # clean-up the node
5013
28
35
    delete $elt->{twig}; # get rid of the twig data
5014
28
27
    delete $elt->{twig_current}; # better get rid of this too
5015
28
7
100
41
10
    if( $t->{twig_id_list}) { $elt->{twig_id_list}= $t->{twig_id_list}; }
5016
28
36
    $elt->cut;
5017
28
26
    undef $t->{twig_root};
5018
28
51
    return $elt;
5019   }
5020
5021 sub set_inner_xml
5022
2
2
2
  { my( $elt, $xml, @args)= @_;
5023
2
4
    my $new_elt= $elt->parse( "<dummy>$xml</dummy>", @args);
5024
2
2
    $elt->cut_children;
5025
2
2
    $new_elt->paste_first_child( $elt);
5026
2
3
    $new_elt->erase;
5027
2
8
    return $elt;
5028   }
5029
5030 sub set_outer_xml
5031
2
2
3
  { my( $elt, $xml, @args)= @_;
5032
2
5
    my $new_elt= $elt->parse( "<dummy>$xml</dummy>", @args);
5033
2
2
    $elt->cut_children;
5034
2
3
    $new_elt->replace( $elt);
5035
2
4
    $new_elt->erase;
5036
2
4
    return $new_elt;
5037   }
5038
5039
5040 sub set_inner_html
5041
8
8
104
  { my( $elt, $html)= @_;
5042
8
15
    my $t= XML::Twig->new->parse_html( "<html>$html</html>");
5043
8
10
    my $new_elt= $t->root;
5044
8
100
100
11
    if( $elt->tag eq 'head')
5045
2
4
      { $new_elt->first_child( 'head')->unwrap;
5046
2
5
        $new_elt->first_child( 'body')->cut;
5047       }
5048     elsif( $elt->tag ne 'html')
5049
4
6
      { $new_elt->first_child( 'head')->cut;
5050
4
12
        $new_elt->first_child( 'body')->unwrap;
5051       }
5052
8
18
    $new_elt->cut;
5053
8
10
    $elt->cut_children;
5054
8
12
    $new_elt->paste_first_child( $elt);
5055
8
9
    $new_elt->erase;
5056
8
15
    return $elt;
5057   }
5058
5059 sub set_gi
5060
966
966
881
  { my ($elt, $gi)= @_;
5061
966
100
1396
    unless( defined $XML::Twig::gi2index{$gi})
5062       { # new gi, create entries in %gi2index and @index2gi
5063
960
1166
        push @XML::Twig::index2gi, $gi;
5064
960
1487
        $XML::Twig::gi2index{$gi}= $#XML::Twig::index2gi;
5065       }
5066
966
1073
    $elt->{gi}= $XML::Twig::gi2index{$gi};
5067
966
833
    return $elt;
5068   }
5069
5070
8283
31251
sub gi { return $XML::Twig::index2gi[$_[0]->{gi}]; }
5071
5072 sub local_name
5073
114
114
776
  { my $elt= shift;
5074
114
128
    return _local_name( $XML::Twig::index2gi[$elt->{'gi'}]);
5075   }
5076
5077 sub ns_prefix
5078
86
86
60
  { my $elt= shift;
5079
86
88
    return _ns_prefix( $XML::Twig::index2gi[$elt->{'gi'}]);
5080   }
5081
5082 # namespace prefix for any qname (can be used for elements or attributes)
5083 sub _ns_prefix
5084
139
139
84
  { my $qname= shift;
5085
139
100
279
    if( $qname=~ m{^([^:]*):})
5086
61
137
      { return $1; }
5087     else
5088
78
112
      { return( ''); } # should it be '' ?
5089   }
5090
5091 # local name for any qname (can be used for elements or attributes)
5092 sub _local_name
5093
127
127
120
  { my $qname= shift;
5094
127
241
    (my $local= $qname)=~ s{^[^:]*:}{};
5095
127
195
    return $local;
5096   }
5097
5098 #sub get_namespace
5099 sub namespace ## no critic (Subroutines::ProhibitNestedSubs);
5100
81
81
65
  { my $elt= shift;
5101
81
100
111
    my $prefix= defined $_[0] ? shift() : $elt->ns_prefix;
5102
81
100
96
    my $ns_att= $prefix ? "xmlns:$prefix" : "xmlns";
5103
81
100
179
    my $expanded= $DEFAULT_NS{$prefix} || $elt->_inherit_att_through_cut( $ns_att) || '';
5104
81
153
    return $expanded;
5105   }
5106
5107 sub declare_missing_ns ## no critic (Subroutines::ProhibitNestedSubs);
5108
1
1
2
  { my $root= shift;
5109
1
1
    my %missing_prefix;
5110
1
12
    my $map= $root->_current_ns_prefix_map;
5111
5112
1
2
    foreach my $prefix (keys %$map)
5113
1
50
2
      { my $prefix_att= $prefix eq '#default' ? 'xmlns' : "xmlns:$prefix";
5114
1
50
3
        if( ! $root->{'att'}->{$prefix_att})
5115
1
2
          { $root->set_att( $prefix_att => $map->{$prefix}); }
5116       }
5117
1
3
    return $root;
5118   }
5119
5120 sub _current_ns_prefix_map
5121
1
1
1
  { my( $elt)= shift;
5122
1
1
    my $map;
5123
1
2
    while( $elt)
5124
2
2
      { foreach my $att ($elt->att_names)
5125
1
50
50
5
          { my $prefix= $att eq 'xmlns' ? '#default'
5126                       : $att=~ m{^xmlns:(.*)$} ? $1
5127                       : next
5128                       ;
5129
1
1
50
3
2
            if( ! exists $map->{$prefix}) { $map->{$prefix}= $elt->{'att'}->{$att}; }
5130           }
5131
2
33
9
        $elt= $elt->{parent} || ($elt->{former} && $elt->{former}->{parent});
5132       }
5133
1
1
    return $map;
5134   }
5135
5136 sub set_ns_decl
5137
5
5
3
  { my( $elt, $uri, $prefix)= @_;
5138
5
100
9
    my $ns_att= $prefix ? "xmlns:$prefix" : 'xmlns';
5139
5
6
    $elt->set_att( $ns_att => $uri);
5140
5
8
    return $elt;
5141   }
5142
5143 sub set_ns_as_default
5144
1
1
2
  { my( $root, $uri)= @_;
5145
1
0
    my @ns_decl_to_remove;
5146
1
3
    foreach my $elt ($root->descendants_or_self)
5147
7
100
67
7
      { if( $elt->_ns_prefix && $elt->namespace eq $uri)
5148
2
3
          { $elt->set_tag( $elt->local_name); }
5149         # store any namespace declaration for that uri
5150
7
1
50
8
7
        foreach my $ns_decl (grep { $_=~ m{xmlns(:|$)} && $elt->{'att'}->{$_} eq $uri } $elt->att_names)
5151
1
3
          { push @ns_decl_to_remove, [$elt, $ns_decl]; }
5152       }
5153
1
2
    $root->set_ns_decl( $uri);
5154     # now remove the ns declarations (if done earlier then descendants of an element with the ns declaration
5155     # are not considered being in the namespace
5156
1
1
    foreach my $ns_decl_to_remove ( @ns_decl_to_remove)
5157
1
1
      { my( $elt, $ns_decl)= @$ns_decl_to_remove;
5158
1
1
        $elt->del_att( $ns_decl);
5159       }
5160
5161
1
3
    return $root;
5162   }
5163
5164
5165
5166 # return #ELT for an element and #PCDATA... for others
5167 sub get_type
5168
6
6
10
  { my $gi_nb= $_[0]->{gi}; # the number, not the string
5169
6
100
13
    return $ELT if( $gi_nb >= $XML::Twig::SPECIAL_GI);
5170
5
9
    return $_[0]->gi;
5171   }
5172
5173 # return the gi if it's a "real" element, 0 otherwise
5174 sub is_elt
5175
4947
100
17992
  { if( $_[0]->{gi} >= $XML::Twig::SPECIAL_GI)
5176
4663
5016
     { return $_[0]->gi; }
5177     else
5178
284
663
      { return 0; }
5179   }
5180
5181
5182 sub is_pcdata
5183
46
46
38
  { my $elt= shift;
5184
46
252
    return (exists $elt->{'pcdata'});
5185   }
5186
5187 sub is_cdata
5188
1
1
3
  { my $elt= shift;
5189
1
2
    return (exists $elt->{'cdata'});
5190   }
5191
5192 sub is_pi
5193
9
9
7
  { my $elt= shift;
5194
9
19
    return (exists $elt->{'target'});
5195   }
5196
5197 sub is_comment
5198
5
5
5
  { my $elt= shift;
5199
5
11
    return (exists $elt->{'comment'});
5200   }
5201
5202 sub is_ent
5203
1
1
1
  { my $elt= shift;
5204
1
33
4
    return (exists $elt->{ent} || $elt->{ent_name});
5205   }
5206
5207
5208 sub is_text
5209
2293
1570
  { my $elt= shift;
5210
2293
100
10875
    return (exists( $elt->{'pcdata'}) || (exists $elt->{'cdata'}));
5211   }
5212
5213 sub is_empty
5214
11
100
11
48
  { return $_[0]->{empty} || 0; }
5215
5216 sub set_empty
5217
7
7
100
7
24
11
  { $_[0]->{empty}= defined( $_[1]) ? $_[1] : 1; return $_[0]; }
5218
5219 sub set_not_empty
5220
2
2
100
2
9
4
  { delete $_[0]->{empty} if( $_[0]->{'empty'}); return $_[0]; }
5221
5222
5223 sub set_asis
5224
20
20
21
  { my $elt=shift;
5225
5226
20
28
    foreach my $descendant ($elt, $elt->_descendants )
5227
27
29
      { $descendant->{asis}= 1;
5228
27
100
52
        if( (exists $descendant->{'cdata'}))
5229
2
50
3
          { $descendant->{gi}=$XML::Twig::gi2index{$PCDATA} or $descendant->set_gi( $PCDATA);
5230
2
3
            $descendant->_set_pcdata( $descendant->{cdata});
5231           }
5232
5233       }
5234
20
28
    return $elt;
5235   }
5236
5237 sub set_not_asis
5238
1
1
2
  { my $elt=shift;
5239
1
2
    foreach my $descendant ($elt, $elt->descendants)
5240
2
50
4
      { delete $descendant->{asis} if $descendant->{asis};}
5241
1
2
    return $elt;
5242   }
5243
5244 sub is_asis
5245
89
89
123
  { return $_[0]->{asis}; }
5246
5247 sub closed
5248
17
17
60
  { my $elt= shift;
5249
17
100
39
    my $t= $elt->twig || return;
5250
16
25
    my $curr_elt= $t->{twig_current};
5251
16
100
29
    return 1 unless( $curr_elt);
5252
14
32
    return $curr_elt->in( $elt);
5253   }
5254
5255 sub set_pcdata
5256
46
46
49
  { my( $elt, $pcdata)= @_;
5257
5258
46
100
63
    if( $elt->{extra_data_in_pcdata})
5259
11
14
      { _try_moving_extra_data( $elt, $pcdata);
5260       }
5261
46
222
    $elt->{pcdata}= $pcdata;
5262
46
41
    return $elt;
5263   }
5264
5265
51
51
82
sub _extra_data_in_pcdata { return $_[0]->{extra_data_in_pcdata}; }
5266
12
12
12
13
11
sub _set_extra_data_in_pcdata { $_[0]->{extra_data_in_pcdata}= $_[1]; return $_[0]; }
5267
3
3
3
6
9
sub _del_extra_data_in_pcdata { delete $_[0]->{extra_data_in_pcdata}; return $_[0]; }
5268 sub _unshift_extra_data_in_pcdata
5269
14
14
7
    { my $e= shift;
5270
14
100
29
      $e->{extra_data_in_pcdata}||=[];
5271
14
14
10
34
      unshift @{$e->{extra_data_in_pcdata}}, { text => shift(), offset => shift() };
5272     }
5273 sub _push_extra_data_in_pcdata
5274
116
116
78
  { my $e= shift;
5275
116
100
258
    $e->{extra_data_in_pcdata}||=[];
5276
116
116
65
277
    push @{$e->{extra_data_in_pcdata}}, { text => shift(), offset => shift() };
5277   }
5278
5279
10
100
10
44
sub _extra_data_before_end_tag { return $_[0]->{extra_data_before_end_tag} || ''; }
5280
332
332
332
487
236
sub _set_extra_data_before_end_tag { $_[0]->{extra_data_before_end_tag}= $_[1]; return $_[0]}
5281
2
2
2
3
3
sub _del_extra_data_before_end_tag { delete $_[0]->{extra_data_before_end_tag}; return $_[0]}
5282 sub _prefix_extra_data_before_end_tag
5283
9
9
7
  { my( $elt, $data)= @_;
5284
9
100
103
    if($elt->{extra_data_before_end_tag})
5285
2
4
      { $elt->{extra_data_before_end_tag}= $data . $elt->{extra_data_before_end_tag}; }
5286     else
5287
7
11
      { $elt->{extra_data_before_end_tag}= $data; }
5288
9
10
    return $elt;
5289   }
5290
5291 # internal, in cases where we know there is no extra_data (inlined anyway!)
5292
681
681
977
sub _set_pcdata { $_[0]->{pcdata}= $_[1]; }
5293
5294 # try to figure out if we can keep the extra_data around
5295 sub _try_moving_extra_data
5296
11
11
9
  { my( $elt, $modified)=@_;
5297
11
12
    my $initial= $elt->{pcdata};
5298
11
7
    my $cpis= $elt->{extra_data_in_pcdata};
5299
5300
11
100
100
40
    if( (my $offset= index( $modified, $initial)) != -1)
5301       { # text has been added
5302
1
1
2
2
        foreach (@$cpis) { $_->{offset}+= $offset; }
5303       }
5304     elsif( ($offset= index( $initial, $modified)) != -1)
5305       { # text has been cut
5306
4
3
        my $len= length( $modified);
5307
4
4
4
6
        foreach my $cpi (@$cpis) { $cpi->{offset} -= $offset; }
5308
4
4
100
5
15
        $elt->_set_extra_data_in_pcdata( [ grep { $_->{offset} >= 0 && $_->{offset} < $len } @$cpis ]);
5309       }
5310     else
5311
6
100
100
10
      { _match_extra_data_words( $elt, $initial, $modified)
5312         || _match_extra_data_chars( $elt, $initial, $modified)
5313         || $elt->_del_extra_data_in_pcdata;
5314       }
5315   }
5316
5317 sub _match_extra_data_words
5318
6
6
8
  { my( $elt, $initial, $modified)= @_;
5319
6
82
    my @initial= split /\b/, $initial;
5320
6
35867
    my @modified= split /\b/, $modified;
5321
5322
6
18
    return _match_extra_data( $elt, length( $initial), \@initial, \@modified);
5323   }
5324
5325 sub _match_extra_data_chars
5326
4
4
6
  { my( $elt, $initial, $modified)= @_;
5327
4
10
    my @initial= split //, $initial;
5328
4
12
    my @modified= split //, $modified;
5329
5330
4
8
    return _match_extra_data( $elt, length( $initial), \@initial, \@modified);
5331   }
5332
5333 sub _match_extra_data
5334
10
10
8
  { my( $elt, $length, $initial, $modified)= @_;
5335
5336
10
17
    my $cpis= $elt->{extra_data_in_pcdata};
5337
5338
10
100
18
    if( @$initial <= @$modified)
5339       {
5340
5
7
        my( $ok, $positions, $offsets)= _pos_offset( $initial, $modified);
5341
5
100
9
        if( $ok)
5342
2
3
          { my $offset=0;
5343
2
3
            my $pos= shift @$positions;
5344
2
3
            foreach my $cpi (@$cpis)
5345
2
3
              { while( $cpi->{offset} >= $pos)
5346
2
1
                  { $offset= shift @$offsets;
5347
2
33
6
                    $pos= shift @$positions || $length +1;
5348                   }
5349
2
3
                $cpi->{offset} += $offset;
5350               }
5351
2
7
            return 1;
5352           }
5353       }
5354     else
5355
5
8
      { my( $ok, $positions, $offsets)= _pos_offset( $modified, $initial);
5356
5
100
9
        if( $ok)
5357           { #print STDERR "pos: ", join( ':', @$positions), "\n",
5358             # "offset: ", join( ':', @$offsets), "\n";
5359
1
1
            my $offset=0;
5360
1
1
            my $pos= shift @$positions;
5361
1
1
            my $prev_pos= 0;
5362
5363
1
1
            foreach my $cpi (@$cpis)
5364
1
2
              { while( $cpi->{offset} >= $pos)
5365
1
1
                  { $offset= shift @$offsets;
5366
1
1
                    $prev_pos= $pos;
5367
1
33
3
                    $pos= shift @$positions || $length +1;
5368                   }
5369
1
2
                $cpi->{offset} -= $offset;
5370
1
1
50
2
2
                if( $cpi->{offset} < $prev_pos) { delete $cpi->{text}; }
5371               }
5372
1
1
1
3
            $elt->_set_extra_data_in_pcdata( [ grep { exists $_->{text} } @$cpis ]);
5373
1
4
            return 1;
5374           }
5375       }
5376
7
25
    return 0;
5377   }
5378
5379
5380 sub _pos_offset
5381
10
10
8
  { my( $short, $long)= @_;
5382
10
6
    my( @pos, @offset);
5383
10
7
    my( $s_length, $l_length)=(0,0);
5384
10
15
    while (@$short)
5385
22
19
      { my $s_word= shift @$short;
5386
22
13
        my $l_word= shift @$long;
5387
22
100
26
        if( $s_word ne $l_word)
5388
10
100
26
          { while( @$long && $s_word ne $l_word)
5389
31
22
              { $l_length += length( $l_word);
5390
31
60
                $l_word= shift @$long;
5391               }
5392
10
7
100
100
24
13
            if( !@$long && $s_word ne $l_word) { return 0; }
5393
3
4
            push @pos, $s_length;
5394
3
2
            push @offset, $l_length - $s_length;
5395           }
5396
15
10
        my $length= length( $s_word);
5397
15
10
        $s_length += $length;
5398
15
16
        $l_length += $length;
5399       }
5400
3
4
    return( 1, \@pos, \@offset);
5401   }
5402
5403 sub append_pcdata
5404
1
1
2
  { $_[0]->{'pcdata'}.= $_[1];
5405
1
1
    return $_[0];
5406   }
5407
5408
13
13
47
sub pcdata { return $_[0]->{pcdata}; }
5409
5410
5411 sub append_extra_data
5412
1
1
3
  { $_[0]->{extra_data}.= $_[1];
5413
1
1
     return $_[0];
5414   }
5415
5416 sub set_extra_data
5417
695
695
739
  { $_[0]->{extra_data}= $_[1];
5418
695
446
    return $_[0];
5419   }
5420
124
100
124
363
sub extra_data { return $_[0]->{extra_data} || ''; }
5421
5422 sub set_target
5423
3103
2113
  { my( $elt, $target)= @_;
5424
3103
2554
    $elt->{target}= $target;
5425
3103
2149
    return $elt;
5426   }
5427
1
1
5
sub target { return $_[0]->{target}; }
5428
5429 sub set_data
5430
5
5
12
  { $_[0]->{'data'}= $_[1];
5431
5
10
    return $_[0];
5432   }
5433
64
64
152
sub data { return $_[0]->{data}; }
5434
5435 sub set_pi
5436
22
22
16
  { my $elt= shift;
5437
22
100
34
    unless( $elt->{gi} == $XML::Twig::gi2index{$PI})
5438
1
1
      { $elt->cut_children;
5439
1
50
2
        $elt->{gi}=$XML::Twig::gi2index{$PI} or $elt->set_gi( $PI);
5440       }
5441
22
23
    return $elt->_set_pi( @_);
5442   }
5443
5444 sub _set_pi
5445
3100
3862
  { $_[0]->set_target( $_[1]);
5446
3100
2791
    $_[0]->{data}= $_[2];
5447
3100
2762
    return $_[0];
5448   }
5449
5450
1488
1502
sub pi_string { my $string= $PI_START . $_[0]->{target};
5451
1488
1030
                my $data= $_[0]->{data};
5452
1488
1487
100
67
3112
1111
                if( defined( $data) && $data ne '') { $string .= " $data"; }
5453
1488
1013
                $string .= $PI_END ;
5454
1488
1768
                return $string;
5455               }
5456
5457 sub set_comment
5458
7
7
11
  { my $elt= shift;
5459
7
100
15
    unless( $elt->{gi} == $XML::Twig::gi2index{$COMMENT})
5460
1
1
      { $elt->cut_children;
5461
1
50
3
        $elt->{gi}=$XML::Twig::gi2index{$COMMENT} or $elt->set_gi( $COMMENT);
5462       }
5463
7
11
    return $elt->_set_comment( @_);
5464   }
5465
5466
3523
3523
3522
2285
sub _set_comment { $_[0]->{comment}= $_[1]; return $_[0]; }
5467
3
3
15
sub comment { return $_[0]->{comment}; }
5468
1767
1811
sub comment_string { return $COMMENT_START . _comment_escaped_string( $_[0]->{comment}) . $COMMENT_END; }
5469 # comments cannot start or end with
5470 sub _comment_escaped_string
5471
1767
1264
  { my( $c)= @_;
5472
1767
1450
    $c=~ s{^-}{ -};
5473
1767
1265
    $c=~ s{-$}{- };
5474
1767
1040
    $c=~ s{--}{- -}g;
5475
1767
3076
    return $c;
5476   }
5477
5478
1
1
1
2
2
sub set_ent { $_[0]->{ent}= $_[1]; return $_[0]; }
5479
2
2
7
sub ent { return $_[0]->{ent}; }
5480
12
12
50
sub ent_name { return substr( $_[0]->{ent}, 1, -1);}
5481
5482 sub set_cdata
5483
2
2
8
  { my $elt= shift;
5484
2
100
7
    unless( $elt->{gi} == $XML::Twig::gi2index{$CDATA})
5485
1
2
      { $elt->cut_children;
5486
1
5
        $elt->insert_new_elt( first_child => $CDATA, @_);
5487
1
2
        return $elt;
5488       }
5489
1
2
    return $elt->_set_cdata( @_);
5490   }
5491
5492 sub _set_cdata
5493
230
230
255
  { $_[0]->{cdata}= $_[1];
5494
230
175
    return $_[0];
5495   }
5496
5497 sub append_cdata
5498
1
1
2
  { $_[0]->{cdata}.= $_[1];
5499
1
2
    return $_[0];
5500   }
5501
47
47
130
sub cdata { return $_[0]->{cdata}; }
5502
5503
5504 sub contains_only_text
5505
42
42
57
  { my $elt= shift;
5506
42
100
29
    return 0 unless $elt->is_elt;
5507
24
21
    foreach my $child ($elt->_children)
5508
25
100
18
      { return 0 if $child->is_elt; }
5509
15
17
    return $elt;
5510   }
5511
5512 sub contains_only
5513
5
5
8
  { my( $elt, $exp)= @_;
5514
5
5
5
5
5
7
7
5
4
6
6
6
9
8
13
10
    my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; };
5515
5
8
    foreach my $child (@children)
5516
7
100
10
      { return 0 unless $child->is( $exp); }
5517
3
100
16
    return @children || 1;
5518   }
5519
5520 sub contains_a_single
5521
202
202
191
  { my( $elt, $exp)= @_;
5522
202
100
839
    my $child= $elt->{first_child} or return 0;
5523
57
100
67
    return 0 unless $child->passes( $exp);
5524
53
100
77
    return 0 if( $child->{next_sibling});
5525
50
143
    return $child;
5526   }
5527
5528
5529 sub root
5530
29906
16682
  { my $elt= shift;
5531
29906
19650
37511
20935
    while( $elt->{parent}) { $elt= $elt->{parent}; }
5532
29906
25367
    return $elt;
5533   }
5534
5535 sub _root_through_cut
5536
2
2
3
  { my $elt= shift;
5537
2
5
67
50
13
20
    while( $elt->{parent} || ($elt->{former} && $elt->{former}->{parent})) { $elt= $elt->{parent} || ($elt->{former} && $elt->{former}->{parent}); }
5538
2
4
    return $elt;
5539   }
5540
5541 sub twig
5542
29884
22338
  { my $elt= shift;
5543
29884
28204
    my $root= $elt->root;
5544
29884
53628
    return $root->{twig};
5545   }
5546
5547 sub _twig_through_cut
5548
1
1
1
  { my $elt= shift;
5549
1
2
    my $root= $elt->_root_through_cut;
5550
1
2
    return $root->{twig};
5551   }
5552
5553
5554 # used for navigation
5555 # returns undef or the element, depending on whether $elt passes $cond
5556 # $cond can be
5557 # - empty: the element passes the condition
5558 # - ELT ('#ELT'): the element passes the condition if it is a "real" element
5559 # - TEXT ('#TEXT'): the element passes if it is a CDATA or PCDATA element
5560 # - a string with an XPath condition (only a subset of XPath is actually
5561 # supported).
5562 # - a regexp: the element passes if its gi matches the regexp
5563 # - a code ref: the element passes if the code, applied on the element,
5564 # returns true
5565
5566 my %cond_cache; # expression => coderef
5567
5568
1
1
149
sub reset_cond_cache { %cond_cache=(); }
5569
5570 {
5571    sub _install_cond
5572
399
399
338
    { my $cond= shift;
5573
399
270
      my $test;
5574
399
288
      my $init='';
5575
5576
399
279
      my $original_cond= $cond;
5577
5578
399
100
715
      my $not= ($cond=~ s{^\s*!}{}) ? '!' : '';
5579
5580
399
1
100
560
2
      if( ref $cond eq 'CODE') { return $cond; }
5581
5582
398
100
414
      if( ref $cond eq 'Regexp')
5583
3
6
        { $test = qq{(\$_[0]->gi=~ /$cond/)}; }
5584       else
5585
395
700
        { my @tests;
5586
395
487
          while( $cond)
5587             {
5588               # the condition is a string
5589
400
4
4
4
100
100
100
100
100
100
100
4
16641
13
4
287
              if( $cond=~ s{$ELT$SEP}{})
5590
5
13
                { push @tests, qq{\$_[0]->is_elt}; }
5591               elsif( $cond=~ s{$TEXT$SEP}{})
5592
14
35
                { push @tests, qq{\$_[0]->is_text}; }
5593               elsif( $cond=~ s{^\s*($REG_TAG_PART)$SEP}{})
5594
189
25733
                { push @tests, _gi_test( $1); }
5595               elsif( $cond=~ s{^\s*($REG_REGEXP)$SEP}{})
5596                 { # /regexp/
5597
2
7
                  push @tests, qq{ \$_[0]->gi=~ $1 };
5598                 }
5599               elsif( $cond=~ s{^\s*($REG_TAG_PART)?\s* # $1
5600                                \[\s*(-?)\s*(\d+)\s*\] # [$2]
5601                                $SEP}{}xo
5602                    )
5603
48
104
                { my( $gi, $neg, $index)= ($1, $2, $3);
5604
48
100
51
                  my $siblings= $neg ? q{$_[0]->_next_siblings} : q{$_[0]->_prev_siblings};
5605
48
100
100
111
                  if( $gi && ($gi ne '*'))
5606                     #{ $test= qq{((\$_[0]->gi eq "$gi") && (scalar( grep { \$_->gi eq "$gi" } $siblings) + 1 == $index))}; }
5607
31
30
                    { push @tests, _and( _gi_test( $gi), qq{ (scalar( grep { \$_->gi eq "$gi" } $siblings) + 1 == $index)}); }
5608                   else
5609
17
47
                    { push @tests, qq{(scalar( $siblings) + 1 == $index)}; }
5610                 }
5611               elsif( $cond=~ s{^\s*($REG_TAG_PART?)\s*($REG_PREDICATE)$SEP}{})
5612
124
290
                { my( $gi, $predicate)= ( $1, $2);
5613
124
215
                  push @tests, _and( _gi_test( $gi), _parse_predicate_in_step( $predicate));
5614                 }
5615               elsif( $cond=~ s{^\s*($REG_NAKED_PREDICATE)$SEP}{})
5616
14
19
                { push @tests, _parse_predicate_in_step( $1); }
5617               else
5618
4
322
                { croak "wrong navigation condition '$original_cond' ($@)"; }
5619             }
5620
391
8
100
741
14
           $test= @tests > 1 ? '(' . join( '||', map { "($_)" } @tests) . ')' : $tests[0];
5621         }
5622
5623       #warn "init: '$init' - test: '$test'\n";
5624
5625
394
691
      my $sub= qq{sub { $NO_WARNINGS; $init; return $not($test) ? \$_[0] : undef; } };
5626
394
103
103
103
37
37
37
41
41
41
51
51
51
36
36
36
39
39
39
32
32
32
24
24
24
4
4
4
4
4
4
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
103
37
41
51
36
39
32
24
4
4
1
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
1
1
1
1
1
1
19693
362
98
8344
125
39
1951
134
44
3002
557
51
3999
119
40
2972
133
41
3615
106
37
2765
87
195
1894
13
77
296
13
5
481
4
1
108
4
1
107
4
1
137
4
0
104
4
1
107
3
1
106
3
1
147
7
2
120
3
1
140
4
1
132
3
1
104
3
1
117
3
1
114
3
1
112
3
1
105
4
1
106
3
1
129
4
1
127
3
1
127
3
1
128
3
2
121
4
1
105
7
2
120
8
3
140
8
3
159
6
2
133
6
2
149
6
2
145
3
1
112
3
1
111
3
1
112
3
2
111
3
1
105
      my $s= eval $sub;
5627       #warn "cond: $cond\n$sub\n";
5628
394
100
688
      if( $@)
5629
5
405
        { croak "wrong navigation condition '$original_cond' ($@);" }
5630
389
1006
      return $s;
5631     }
5632
5633   sub _gi_test
5634
344
344
483
    { my( $full_gi)= @_;
5635
5636       # optimize if the gi exists, including the case where the gi includes a dot
5637
344
394
      my $index= $XML::Twig::gi2index{$full_gi};
5638
344
264
100
471
925
      if( $index) { return qq{\$_[0]->{gi} == $index}; }
5639
5640
80
285
      my( $gi, $class, $id)= $full_gi=~ m{^(.*?)(?:[.]([^.]*)|[#](.*))?$};
5641
5642
80
75
      my $gi_test='';
5643
80
100
100
218
      if( $gi && $gi ne '*' )
5644         { # 2 options, depending on whether the gi exists in gi2index
5645           # start optimization
5646
21
24
          my $index= $XML::Twig::gi2index{$gi};
5647
21
100
29
          if( $index)
5648             { # the gi exists, use its index as a faster shortcut
5649
7
11
              $gi_test = qq{\$_[0]->{gi} == $index};
5650             }
5651           else
5652           # end optimization
5653             { # it does not exist (but might be created later), compare the strings
5654
14
25
              $gi_test = qq{ \$_[0]->gi eq "$gi"};
5655             }
5656         }
5657       else
5658
59
90
        { $gi_test= 1; }
5659
5660
80
135
      my $class_test='';
5661       #warn "class: '$class'";
5662
80
100
103
      if( $class)
5663
13
18
        { $class_test = qq{ defined( \$_[0]->{att}->{class}) && \$_[0]->{att}->{class}=~ m{\\b$class\\b} }; }
5664
5665
80
52
      my $id_test='';
5666       #warn "id: '$id'";
5667
80
100
98
      if( $id)
5668
4
5
        { $id_test = qq{ defined( \$_[0]->{att}->{$ID}) && \$_[0]->{att}->{$ID} eq '$id' }; }
5669
5670
5671       #warn "gi_test: '$gi_test' - class_test: '$class_test' returning ", _and( $gi_test, $class_test);
5672
80
103
      return _and( $gi_test, $class_test, $id_test);
5673   }
5674
5675
5676   # input: the original predicate
5677   sub _parse_predicate_in_step
5678
138
138
135
    { my $cond= shift;
5679
138
466
      my %PERL_ALPHA_TEST= ( '=' => ' eq ', '!=' => ' ne ', '>' => ' gt ', '>=' => ' ge ', '<' => ' lt ', '<=' => ' le ');
5680
5681
138
287
      $cond=~ s{^\s*\[\s*}{};
5682
138
245
      $cond=~ s{\s*\]\s*$}{};
5683
138
381
5974
1137
      $cond=~ s{( ($REG_STRING|$REG_REGEXP) # strings or regexps
5684                    |\@($REG_TAG_NAME)(?=\s*(?:[><=!]|!~|=~)) # @att (followed by a comparison operator)
5685                    |\@($REG_TAG_NAME) # @att (not followed by a comparison operator)
5686
381
100
100
100
100
100
100
100
100
100
50
100
33
100
67
67
67
2428
197
                   |=~|!~ # matching operators
5687
93
339
                   |([><]=?|=|!=)(?=\s*[\d+-]) # test before a number
5688
16
45
                   |([><]=?|=|!=) # test, other cases
5689
1
5
                   |($REG_FUNCTION) # no arg functions
5690
79
265
                   # this bit is a mess, but it is the only solution with this half-baked parser
5691                    |((?:string|text)\(\s*$REG_TAG_NAME\s*\)\s*$REG_MATCH\s*$REG_REGEXP) # string( child) =~ /regexp/
5692
18
76
                   |((?:string|text)\(\s*$REG_TAG_NAME\s*\)\s*!?=\s*$REG_VALUE) # string( child) = "value" (or !=)
5693                    |((?:string|text)\(\s*$REG_TAG_NAME\s*\)\s*[<>]=?\s*$REG_VALUE) # string( child) > "value"
5694
9
17
32
71
                   |(and|or)
5695                 )}
5696                { my( $token, $string, $att, $bare_att, $num_test, $alpha_test, $func, $string_regexp, $string_eq, $string_test, $and_or)
5697                  = ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11);
5698
5699
18
100
112
                 if( defined $string) { $token }
5700
30
126
                 elsif( $att) { "( \$_[0]->{att} && exists( \$_[0]->{att}->{'$att'}) && \$_[0]->{att}->{'$att'})"; }
5701                  elsif( $bare_att) { "(\$_[0]->{att} && defined( \$_[0]->{att}->{'$bare_att'}))"; }
5702                  elsif( $num_test && ($num_test eq '=') ) { "==" } # others tests are unchanged
5703                  elsif( $alpha_test) { $PERL_ALPHA_TEST{$alpha_test} }
5704                  elsif( $func && $func=~ m{^(?:string|text)})
5705                                         { "\$_[0]->text"; }
5706                  elsif( $string_regexp && $string_regexp =~ m{(?:string|text)\(\s*($REG_TAG_NAME)\s*\)\s*($REG_MATCH)\s*($REG_REGEXP)})
5707                                         { "(XML::Twig::_first_n { (\$_->gi eq '$1') && (\$_->text $2 $3) } 1, \$_[0]->_children)"; }
5708                  elsif( $string_eq && $string_eq =~ m{(?:string|text)\(\s*($REG_TAG_NAME)\s*\)\s*(!?=)\s*($REG_VALUE)})
5709                                         {"(XML::Twig::_first_n { (\$_->gi eq '$1') && (\$_->text $PERL_ALPHA_TEST{$2} $3) } 1, \$_[0]->_children)"; }
5710                  elsif( $string_test && $string_test =~ m{(?:string|text)\(\s*($REG_TAG_NAME)\s*\)\s*([<>]=?)\s*($REG_VALUE)})
5711                                         { "(XML::Twig::_first_n { (\$_->gi eq '$1') && (\$_->text $2 $3) } 1, \$_[0]->_children)"; }
5712                  elsif( $and_or) { $and_or eq 'and' ? '&&' : '||' ; }
5713                  else { $token; }
5714                }gexs;
5715
138
552
      return "($cond)";
5716     }
5717
5718
5719   sub _op
5720
82
82
102
    { my $op= shift;
5721
82
78
100
100
96
63
      if( $op eq '=') { $op= 'eq'; }
5722
3
2
      elsif( $op eq '!=') { $op= 'ne'; }
5723
82
187
      return $op;
5724     }
5725
5726   sub passes
5727
4008
2460
    { my( $elt, $cond)= @_;
5728
4008
100
7170
      return $elt unless $cond;
5729
679
67
1640
      my $sub= ($cond_cache{$cond} ||= _install_cond( $cond));
5730
679
11133
      return $sub->( $elt);
5731     }
5732 }
5733
5734 sub set_parent
5735
1
1
2
  { $_[0]->{parent}= $_[1];
5736
1
1
50
2
2
    if( $XML::Twig::weakrefs) { weaken( $_[0]->{parent}); }
5737   }
5738
5739 sub parent
5740
46
46
91
  { my $elt= shift;
5741
46
100
115
    my $cond= shift || return $elt->{parent};
5742
26
39
100
16
103
    do { $elt= $elt->{parent} || return; } until ( $elt->passes( $cond));
5743
22
41
    return $elt;
5744   }
5745
5746 sub set_first_child
5747
50
50
59
  { $_[0]->{'first_child'}= $_[1];
5748   }
5749
5750 sub first_child
5751
668
668
1782
  { my $elt= shift;
5752
668
100
1164
    my $cond= shift || return $elt->{first_child};
5753
428
453
    my $child= $elt->{first_child};
5754
428
67
896
    my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond));
5755
419
100
7929
    while( $child && !$test_cond->( $child))
5756
596
7861
       { $child= $child->{next_sibling}; }
5757
419
22712
    return $child;
5758   }
5759
5760
20
20
111
sub _first_child { return $_[0]->{first_child}; }
5761
273
273
545
sub _last_child { return $_[0]->{last_child}; }
5762
44
44
634
sub _next_sibling { return $_[0]->{next_sibling}; }
5763
76
76
1133
sub _prev_sibling { return $_[0]->{prev_sibling}; }
5764
115
115
296
sub _parent { return $_[0]->{parent}; }
5765
76
76
76
296
76
76
55
33
105
313
1092
sub _next_siblings { my $elt= shift; my @siblings; while( $elt= $elt->{next_sibling}) { push @siblings, $elt; } return @siblings; }
5766
93
93
93
169
93
93
66
54
117
188
1336
sub _prev_siblings { my $elt= shift; my @siblings; while( $elt= $elt->{prev_sibling}) { push @siblings, $elt; } return @siblings; }
5767
5768 # sets a field
5769 # arguments $record, $cond, @content
5770 sub set_field
5771
3
3
6
  { my $record = shift;
5772
3
3
    my $cond = shift;
5773
3
5
    my $child= $record->first_child( $cond);
5774
3
100
5
    if( $child)
5775
1
2
      { $child->set_content( @_); }
5776     else
5777
2
100
89
      { if( $cond=~ m{^\s*($REG_TAG_NAME)})
5778
1
2
          { my $gi= $1;
5779
1
2
            $child= $record->insert_new_elt( last_child => $gi, @_);
5780           }
5781         else
5782
1
79
          { croak "can't create a field name from $cond"; }
5783       }
5784
2
2
    return $child;
5785   }
5786
5787 sub set_last_child
5788
36
36
32
  { $_[0]->{'last_child'}= $_[1];
5789
36
36
50
43
51
    if( $XML::Twig::weakrefs) { weaken( $_[0]->{'last_child'}); }
5790   }
5791
5792 sub last_child
5793
60
60
96
  { my $elt= shift;
5794
60
100
528
    my $cond= shift || return $elt->{last_child};
5795
26
67
57
    my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond));
5796
26
37
    my $child= $elt->{last_child};
5797
26
100
438
    while( $child && !$test_cond->( $child) )
5798
24
369
      { $child= $child->{prev_sibling}; }
5799
26
83
    return $child
5800   }
5801
5802
5803 sub set_prev_sibling
5804
1
1
3
  { $_[0]->{'prev_sibling'}= $_[1];
5805
1
1
50
2
2
    if( $XML::Twig::weakrefs) { weaken( $_[0]->{'prev_sibling'}); }
5806   }
5807
5808 sub prev_sibling
5809
99
99
221
  { my $elt= shift;
5810
99
100
183
    my $cond= shift || return $elt->{prev_sibling};
5811
57
67
94
    my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond));
5812
57
66
    my $sibling= $elt->{prev_sibling};
5813
57
100
582
    while( $sibling && !$test_cond->( $sibling) )
5814
16
141
          { $sibling= $sibling->{prev_sibling}; }
5815
57
97
    return $sibling;
5816   }
5817
5818
1
1
3
sub set_next_sibling { $_[0]->{'next_sibling'}= $_[1]; }
5819
5820 sub next_sibling
5821
618
618
504
  { my $elt= shift;
5822
618
100
1137
    my $cond= shift || return $elt->{next_sibling};
5823
266
33
356
    my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond));
5824
266
192
    my $sibling= $elt->{next_sibling};
5825
266
100
2640
    while( $sibling && !$test_cond->( $sibling) )
5826
131
1112
          { $sibling= $sibling->{next_sibling}; }
5827
266
425
    return $sibling;
5828   }
5829
5830 # methods dealing with the class attribute, convenient if you work with xhtml
5831
53
53
209
sub class { $_[0]->{att}->{class}; }
5832 # lvalue version of class. separate from class to avoid problem like RT#
5833 sub lclass
5834           :lvalue # > perl 5.5
5835
2
2
6
  { $_[0]->{att}->{class}; }
5836
5837
18
18
18
17
20
sub set_class { my( $elt, $class)= @_; $elt->set_att( class => $class); }
5838
5839 # adds a class to an element
5840 sub add_to_class
5841
6
6
5
  { my( $elt, $new_class)= @_;
5842
6
100
8
    return $elt unless $new_class;
5843
5
5
    my $class= $elt->class;
5844
5
8
50
11
13
    my %class= $class ? map { $_ => 1 } split /\s+/, $class : ();
5845
5
8
    $class{$new_class}= 1;
5846
5
15
    $elt->set_class( join( ' ', sort keys %class));
5847   }
5848
5849 sub remove_class
5850
5
5
6
  { my( $elt, $class_to_remove)= @_;
5851
5
50
8
    return $elt unless $class_to_remove;
5852
5
7
    my $class= $elt->class;
5853
5
7
50
9
13
    my %class= $class ? map { $_ => 1 } split /\s+/, $class : ();
5854
5
6
    delete $class{$class_to_remove};
5855
5
10
    $elt->set_class( join( ' ', sort keys %class));
5856   }
5857
5858
2
2
2
1
3
sub att_to_class { my( $elt, $att)= @_; $elt->set_class( $elt->{'att'}->{$att}); }
5859
2
2
2
3
4
sub add_att_to_class { my( $elt, $att)= @_; $elt->add_to_class( $elt->{'att'}->{$att}); }
5860
1
1
1
1
2
sub move_att_to_class { my( $elt, $att)= @_; $elt->add_to_class( $elt->{'att'}->{$att});
5861
1
2
                        $elt->del_att( $att);
5862                       }
5863
2
2
2
2
3
sub tag_to_class { my( $elt)= @_; $elt->set_class( $elt->tag); }
5864
2
2
2
2
2
sub add_tag_to_class { my( $elt)= @_; $elt->add_to_class( $elt->tag); }
5865
1
1
1
1
1
1
2
sub set_tag_class { my( $elt, $new_tag)= @_; $elt->add_tag_to_class; $elt->set_tag( $new_tag); }
5866
5867 sub tag_to_span
5868
2
2
4
  { my( $elt)= @_;
5869
2
100
67
3
    $elt->set_class( $elt->tag) unless( $elt->tag eq 'span' && $elt->class); # set class to span unless it would mean replacing it with span
5870
2
3
    $elt->set_tag( 'span');
5871   }
5872
5873 sub tag_to_div
5874
2
2
2
  { my( $elt)= @_;
5875
2
100
67
3
    $elt->set_class( $elt->tag) unless( $elt->tag eq 'div' && $elt->class); # set class to div unless it would mean replacing it with div
5876
2
3
    $elt->set_tag( 'div');
5877   }
5878
5879 sub in_class
5880
10
10
9
  { my( $elt, $class)= @_;
5881
10
11
    my $elt_class= $elt->class;
5882
10
50
12
    return unless( defined $elt_class);
5883
10
100
8
    return $elt->class=~ m{(?:^|\s)\Q$class\E(?:\s|$)} ? $elt : 0;
5884   }
5885
5886
5887 # get or set all attributes
5888 # argument can be a hash or a hashref
5889 sub set_atts
5890
43713
30133
  { my $elt= shift;
5891
43713
27788
    my %atts;
5892
43713
100
39606
    tie %atts, 'Tie::IxHash' if( keep_atts_order());
5893
43713
113
100
67
234373
200
    %atts= ( (ref( $_[0] || '') eq 'HASH') || isa( $_[0] || '', 'HASH')) ? %{$_[0]} : @_;
5894
43713
45031
    $elt->{att}= \%atts;
5895
43713
18691
100
52681
22649
    if( exists $atts{$ID}) { $elt->_set_id( $atts{$ID}); }
5896
43713
38503
    return $elt;
5897   }
5898
5899
196
196
284
sub atts { return $_[0]->{att}; }
5900
576
576
576
359
1342
sub att_names { return (sort keys %{$_[0]->{att}}); }
5901
4
4
4
46
8
sub del_atts { $_[0]->{att}={}; return $_[0]; }
5902
5903 # get or set a single attribute (set works for several atts)
5904 sub set_att
5905
155
155
200
  { my $elt= shift;
5906
5907
155
100
50
464
    if( $_[0] && ref( $_[0]) && !$_[1])
5908
1
105
      { croak "improper call to set_att, usage is \$elt->set_att( att1 => 'val1', att2 => 'val2',...)"; }
5909
5910
154
100
215
    unless( $elt->{att})
5911
4
4
      { $elt->{att}={};
5912
4
2
100
5
6
        tie %{$elt->{att}}, 'Tie::IxHash' if( keep_atts_order());
5913       }
5914
5915
154
210
    while(@_)
5916
154
156
      { my( $att, $val)= (shift, shift);
5917
154
221
        $elt->{att}->{$att}= $val;
5918
154
55
100
295
84
        if( $att eq $ID) { $elt->_set_id( $val); }
5919       }
5920
154
183
    return $elt;
5921   }
5922
5923
540
540
1496
sub att { $_[0]->{att}->{$_[1]}; }
5924 # lvalue version of att. separate from class to avoid problem like RT#
5925 sub latt
5926           :lvalue # > perl 5.5
5927
5
5
12
  { $_[0]->{att}->{$_[1]}; }
5928
5929 sub del_att
5930
85
85
62
  { my $elt= shift;
5931
85
85
102
146
    while( @_) { delete $elt->{'att'}->{shift()}; }
5932
85
83
    return $elt;
5933   }
5934
5935
5
5
11
sub att_exists { return exists $_[0]->{att}->{$_[1]}; }
5936
5937 # delete an attribute from all descendants of an element
5938 sub strip_att
5939
13
13
11
  { my( $elt, $att)= @_;
5940
13
23
    $_->del_att( $att) foreach ($elt->descendants_or_self( qq{*[\@$att]}));
5941
13
24
    return $elt;
5942   }
5943
5944 sub change_att_name
5945
5
5
24
  { my( $elt, $old_name, $new_name)= @_;
5946
5
6
    my $value= $elt->{'att'}->{$old_name};
5947
5
100
8
    return $elt unless( defined $value);
5948
4
6
    $elt->del_att( $old_name)
5949         ->set_att( $new_name => $value);
5950
4
4
    return $elt;
5951   }
5952
5953 sub lc_attnames
5954
1
1
1
  { my $elt= shift;
5955
1
1
    foreach my $att ($elt->att_names)
5956
3
3
50
9
4
      { if( $att ne lc $att) { $elt->change_att_name( $att, lc $att); } }
5957
1
2
    return $elt;
5958   }
5959
5960
58
58
72
sub set_twig_current { $_[0]->{twig_current}=1; }
5961
1
1
2
sub del_twig_current { delete $_[0]->{twig_current}; }
5962
5963
5964 # get or set the id attribute
5965 sub set_id
5966
51
51
57
  { my( $elt, $id)= @_;
5967
51
100
82
    $elt->del_id() if( exists $elt->{att}->{$ID});
5968
51
75
    $elt->set_att($ID, $id);
5969
51
47
    $elt->_set_id( $id);
5970
51
37
    return $elt;
5971   }
5972
5973 # only set id, does not update the attribute value
5974 sub _set_id
5975
18798
13893
  { my( $elt, $id)= @_;
5976
18798
67
19613
    my $t= $elt->twig || $elt;
5977
18798
32821
    $t->{twig_id_list}->{$id}= $elt;
5978
18798
9795
100
22116
14185
    if( $XML::Twig::weakrefs) { weaken( $t->{twig_id_list}->{$id}); }
5979
18798
14074
    return $elt;
5980   }
5981
5982
830
830
4180
sub id { return $_[0]->{att}->{$ID}; }
5983
5984 # methods used to add ids to elements that don't have one
5985 BEGIN
5986
187
187
312
{ my $id_nb = "0001";
5987
187
674013
  my $id_seed = "twig_id_";
5988
5989   sub set_id_seed ## no critic (Subroutines::ProhibitNestedSubs);
5990
2
2
2
3
3
    { $id_seed= $_[1]; $id_nb=1; }
5991
5992   sub add_id ## no critic (Subroutines::ProhibitNestedSubs);
5993
40
40
34
    { my $elt= shift;
5994
40
100
53
      if( defined $elt->{'att'}->{$ID})
5995
1
2
        { return $elt->{'att'}->{$ID}; }
5996       else
5997
39
50
33
90
        { my $id= $_[0] && ref( $_[0]) && isa( $_[0], 'CODE') ? $_[0]->( $elt) : $id_seed . $id_nb++;
5998
39
43
          $elt->set_id( $id);
5999
39
29
          return $id;
6000         }
6001     }
6002 }
6003
6004
6005
6006 # delete the id attribute and remove the element from the id list
6007 sub del_id
6008
2515
1515
  { my $elt= shift;
6009
2515
5
100
2831
9
    if( ! exists $elt->{att}->{$ID}) { return $elt };
6010
2510
2014
    my $id= $elt->{att}->{$ID};
6011
6012
2510
1992
    delete $elt->{att}->{$ID};
6013
6014
2510
100
2499
    my $t= shift || $elt->twig;
6015
2510
2
100
2324
5
    unless( $t) { return $elt; }
6016
2508
2508
50
4344
2303
    if( exists $t->{twig_id_list}->{$id}) { delete $t->{twig_id_list}->{$id}; }
6017
6018
2508
1816
    return $elt;
6019   }
6020
6021 # return the list of children
6022 sub children
6023
205
205
203
  { my $elt= shift;
6024
205
141
    my @children;
6025
205
304
    my $child= $elt->first_child( @_);
6026
205
268
    while( $child)
6027
546
431
      { push @children, $child;
6028
546
650
        $child= $child->next_sibling( @_);
6029       }
6030
205
1102
    return @children;
6031   }
6032
6033 sub _children
6034
20441
11312
  { my $elt= shift;
6035
20441
12745
    my @children=();
6036
20441
13097
    my $child= $elt->{first_child};
6037
20441
20379
    while( $child)
6038
18358
10951
      { push @children, $child;
6039
18358
21122
        $child= $child->{next_sibling};
6040       }
6041
20441
27612
    return @children;
6042   }
6043
6044 sub children_copy
6045
1
1
2
  { my $elt= shift;
6046
1
2
    my @children;
6047
1
3
    my $child= $elt->first_child( @_);
6048
1
3
    while( $child)
6049
2
5
      { push @children, $child->copy;
6050
2
5
        $child= $child->next_sibling( @_);
6051       }
6052
1
3
    return @children;
6053   }
6054
6055
6056 sub children_count
6057
776
776
428
  { my $elt= shift;
6058
776
476
    my $cond= shift;
6059
776
400
    my $count=0;
6060
776
517
    my $child= $elt->{first_child};
6061
776
756
    while( $child)
6062
1139
100
947
      { $count++ if( $child->passes( $cond));
6063
1139
1312
        $child= $child->{next_sibling};
6064       }
6065
776
795
    return $count;
6066   }
6067
6068 sub children_text
6069
3
3
15
  { my $elt= shift;
6070
4
2
4
2
    return wantarray() ? map { $_->text} $elt->children( @_)
6071
3
100
8
                       : join( '', map { $_->text} $elt->children( @_) )
6072                        ;
6073   }
6074
6075 sub children_trimmed_text
6076
2
2
2
  { my $elt= shift;
6077
2
2
3
3
    return wantarray() ? map { $_->trimmed_text} $elt->children( @_)
6078
2
100
6
                       : join( '', map { $_->trimmed_text} $elt->children( @_) )
6079                        ;
6080   }
6081
6082 sub all_children_are
6083
3
3
2
  { my( $parent, $cond)= @_;
6084
3
5
    foreach my $child ($parent->_children)
6085
4
100
5
      { return 0 unless( $child->passes( $cond)); }
6086
1
2
    return $parent;
6087   }
6088
6089
6090 sub ancestors
6091
1109
645
  { my( $elt, $cond)= @_;
6092
1109
595
    my @ancestors;
6093
1109
1550
    while( $elt->{parent})
6094
2128
1224
      { $elt= $elt->{parent};
6095
2128
100
1733
        push @ancestors, $elt if( $elt->passes( $cond));
6096       }
6097
1109
1277
    return @ancestors;
6098   }
6099
6100 sub ancestors_or_self
6101
12
12
12
  { my( $elt, $cond)= @_;
6102
12
9
    my @ancestors;
6103
12
20
    while( $elt)
6104
38
100
40
      { push @ancestors, $elt if( $elt->passes( $cond));
6105
38
49
        $elt= $elt->{parent};
6106       }
6107
12
50
    return @ancestors;
6108   }
6109
6110
6111 sub _ancestors
6112
2
2
2
  { my( $elt, $include_self)= @_;
6113
2
100
3
    my @ancestors= $include_self ? ($elt) : ();
6114
2
4
4
6
    while( $elt= $elt->{parent}) { push @ancestors, $elt; }
6115
2
4
    return @ancestors;
6116   }
6117
6118
6119 sub inherit_att
6120
5571
3263
  { my $elt= shift;
6121
5571
3559
    my $att= shift;
6122
5571
4
5162
7
    my %tags= map { ($_, 1) } @_;
6123
6124     do
6125
5571
7775
100
50
3044
15639
      { if( (defined $elt->{'att'}->{$att})
6126            && ( !%tags || $tags{$XML::Twig::index2gi[$elt->{'gi'}]})
6127           )
6128
10
22
          { return $elt->{'att'}->{$att}; }
6129       } while( $elt= $elt->{parent});
6130
5561
13163
    return undef;
6131   }
6132
6133 sub _inherit_att_through_cut
6134
76
76
50
  { my $elt= shift;
6135
76
50
    my $att= shift;
6136
76
1
85
5
    my %tags= map { ($_, 1) } @_;
6137
6138     do
6139
76
156
100
67
50
60
478
      { if( (defined $elt->{'att'}->{$att})
6140            && ( !%tags || $tags{$XML::Twig::index2gi[$elt->{'gi'}]})
6141           )
6142
47
129
          { return $elt->{'att'}->{$att}; }
6143       } while( $elt= $elt->{parent} || ($elt->{former} && $elt->{former}->{parent}));
6144
29
77
    return undef;
6145   }
6146
6147
6148 sub current_ns_prefixes
6149
3
3
5
  { my $elt= shift;
6150
3
4
    my %prefix;
6151
3
100
7
    $prefix{''}=1 if( $elt->namespace( ''));
6152
3
8
    while( $elt)
6153
6
3
11
17
9
35
      { my @ns= grep { !m{^xml} } map { m{^([^:]+):} } ($XML::Twig::index2gi[$elt->{'gi'}], $elt->att_names);
6154
6
14
        $prefix{$_}=1 foreach (@ns);
6155
6
15
        $elt= $elt->{parent};
6156       }
6157
6158
3
17
    return (sort keys %prefix);
6159   }
6160
6161 # kinda counter-intuitive actually:
6162 # the next element is found by looking for the next open tag after from the
6163 # current one, which is the first child, if it exists, or the next sibling
6164 # or the first next sibling of an ancestor
6165 # optional arguments are:
6166 # - $subtree_root: a reference to an element, when the next element is not
6167 # within $subtree_root anymore then next_elt returns undef
6168 # - $cond: a condition, next_elt returns the next element matching the condition
6169
6170 sub next_elt
6171
494
494
413
  { my $elt= shift;
6172
494
312
    my $subtree_root= 0;
6173
494
100
67
1272
    $subtree_root= shift if( ref( $_[0]) && isa( $_[0], 'XML::Twig::Elt'));
6174
494
339
    my $cond= shift;
6175
494
278
    my $next_elt;
6176
6177
494
247
    my $ind; # optimization
6178
494
273
    my $test_cond;
6179
494
100
595
    if( $cond) # optimization
6180
477
100
765
      { unless( defined( $ind= $XML::Twig::gi2index{$cond}) ) # optimization
6181
340
67
475
          { $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); } # optimization
6182       } # optimization
6183
6184     do
6185
494
802
100
100
100
60
67
360
1188
      { if( $next_elt= $elt->{first_child})
6186           { # simplest case: the elt has a child
6187           }
6188          elsif( $next_elt= $elt->{next_sibling})
6189           { # no child but a next sibling (just check we stay within the subtree)
6190
6191             # case where elt is subtree_root, is empty and has a sibling
6192
110
100
100
245
            return undef if( $subtree_root && ($elt == $subtree_root));
6193
6194           }
6195         else
6196           { # case where the element has no child and no next sibling:
6197             # get the first next sibling of an ancestor, checking subtree_root
6198
6199             # case where elt is subtree_root, is empty and has no sibling
6200
223
100
100
518
            return undef if( $subtree_root && ($elt == $subtree_root));
6201
6202
221
187
            $next_elt= $elt->{parent};
6203
6204
221
276
            until( $next_elt->{next_sibling})
6205
191
100
100
522
              { return undef if( $subtree_root && ($subtree_root == $next_elt));
6206
86
100
189
                $next_elt= $next_elt->{parent} || return undef;
6207               }
6208
105
100
100
338
            return undef if( $subtree_root && ($subtree_root == $next_elt));
6209
101
83
            $next_elt= $next_elt->{next_sibling};
6210           }
6211
677
8810
      $elt= $next_elt; # just in case we need to loop
6212     } until( ! defined $elt
6213              || ! defined $cond
6214          || (defined $ind && ($elt->{gi} eq $ind)) # optimization
6215          || (defined $test_cond && ($test_cond->( $elt)))
6216                );
6217
6218
369
1683
      return $elt;
6219       }
6220
6221 # return the next_elt within the element
6222 # just call next_elt with the element as first and second argument
6223
20
20
26
sub first_descendant { return $_[0]->next_elt( @_); }
6224
6225 # get the last descendant, # then return the element found or call prev_elt with the condition
6226 sub last_descendant
6227
38
38
32
  { my( $elt, $cond)= @_;
6228
38
37
    my $last_descendant= $elt->_last_descendant;
6229
38
100
100
61
    if( !$cond || $last_descendant->matches( $cond))
6230
23
26
      { return $last_descendant; }
6231     else
6232
15
19
      { return $last_descendant->prev_elt( $elt, $cond); }
6233   }
6234
6235 # no argument allowed here, just go down the last_child recursively
6236 sub _last_descendant
6237
41
41
22
  { my $elt= shift;
6238
41
71
61
88
    while( my $child= $elt->{last_child}) { $elt= $child; }
6239
41
41
    return $elt;
6240   }
6241
6242 # counter-intuitive too:
6243 # the previous element is found by looking
6244 # for the first open tag backwards from the current one
6245 # it's the last descendant of the previous sibling
6246 # if it exists, otherwise it's simply the parent
6247 sub prev_elt
6248
37
37
51
  { my $elt= shift;
6249
37
29
    my $subtree_root= 0;
6250
37
100
67
67
143
    if( defined $_[0] and (ref( $_[0]) && isa( $_[0], 'XML::Twig::Elt')))
6251
17
11
      { $subtree_root= shift ;
6252
17
100
33
        return undef if( $elt == $subtree_root);
6253       }
6254
35
31
    my $cond= shift;
6255     # get prev elt
6256
35
21
    my $prev_elt;
6257     do
6258
35
105
100
26
132
      { return undef if( $elt == $subtree_root);
6259
102
100
124
        if( $prev_elt= $elt->{prev_sibling})
6260
37
52
          { while( $prev_elt->{last_child})
6261
22
35
              { $prev_elt= $prev_elt->{last_child}; }
6262           }
6263         else
6264
65
100
106
          { $prev_elt= $elt->{parent} || return undef; }
6265
95
135
        $elt= $prev_elt; # in case we need to loop
6266       } until( $elt->passes( $cond));
6267
6268
25
58
    return $elt;
6269   }
6270
6271 sub _following_elt
6272
7
7
3
  { my( $elt)= @_;
6273
7
100
23
    while( $elt && !$elt->{next_sibling})
6274
12
27
      { $elt= $elt->{parent}; }
6275
7
100
14
    return $elt ? $elt->{next_sibling} : undef;
6276   }
6277
6278 sub following_elt
6279
7
7
7
  { my( $elt, $cond)= @_;
6280
7
100
10
    $elt= $elt->_following_elt || return undef;
6281
6
100
67
13
    return $elt if( !$cond || $elt->matches( $cond));
6282
2
3
    return $elt->next_elt( $cond);
6283   }
6284
6285 sub following_elts
6286
6
6
6
  { my( $elt, $cond)= @_;
6287
6
4
100
8
3
    if( !$cond) { undef $cond; }
6288
6
8
    my $following= $elt->following_elt( $cond);
6289
6
100
7
    if( $following)
6290
5
5
      { my @followings= $following;
6291
5
6
        while( $following= $following->next_elt( $cond))
6292
6
7
          { push @followings, $following; }
6293
5
34
        return( @followings);
6294       }
6295     else
6296
1
1
      { return (); }
6297   }
6298
6299 sub _preceding_elt
6300
7
7
5
  { my( $elt)= @_;
6301
7
100
20
    while( $elt && !$elt->{prev_sibling})
6302
11
19
      { $elt= $elt->{parent}; }
6303
7
100
15
    return $elt ? $elt->{prev_sibling}->_last_descendant : undef;
6304   }
6305
6306 sub preceding_elt
6307
7
7
7
  { my( $elt, $cond)= @_;
6308
7
100
7
    $elt= $elt->_preceding_elt || return undef;
6309
3
100
67
8
    return $elt if( !$cond || $elt->matches( $cond));
6310
2
3
    return $elt->prev_elt( $cond);
6311   }
6312
6313 sub preceding_elts
6314
7
7
7
  { my( $elt, $cond)= @_;
6315
7
4
100
8
2
    if( !$cond) { undef $cond; }
6316
7
9
    my $preceding= $elt->preceding_elt( $cond);
6317
7
100
6
    if( $preceding)
6318
3
4
      { my @precedings= $preceding;
6319
3
3
        while( $preceding= $preceding->prev_elt( $cond))
6320
4
4
          { push @precedings, $preceding; }
6321
3
29
        return( @precedings);
6322       }
6323     else
6324
4
40
      { return (); }
6325   }
6326
6327 # used in get_xpath
6328 sub _self
6329
6
6
5
  { my( $elt, $cond)= @_;
6330
6
100
30
    return $cond ? $elt->matches( $cond) : $elt;
6331   }
6332
6333 sub next_n_elt
6334
7
7
7
  { my $elt= shift;
6335
7
100
20
    my $offset= shift || return undef;
6336
5
10
    foreach (1..$offset)
6337
7
100
36
      { $elt= $elt->next_elt( @_) || return undef; }
6338
3
7
    return $elt;
6339   }
6340
6341 # checks whether $elt is included in $ancestor, returns 1 in that case
6342 sub in
6343
3222
2132
  { my ($elt, $ancestor)= @_;
6344
3222
100
67
7990
    if( ref( $ancestor) && isa( $ancestor, 'XML::Twig::Elt'))
6345       { # element
6346
3220
3823
100
3873
6765
        while( $elt= $elt->{parent}) { return $elt if( $elt == $ancestor); }
6347       }
6348     else
6349       { # condition
6350
2
2
100
6
5
        while( $elt= $elt->{parent}) { return $elt if( $elt->matches( $ancestor)); }
6351       }
6352
2456
3339
    return 0;
6353   }
6354
6355 sub first_child_text
6356
21
21
28
  { my $elt= shift;
6357
21
100
32
    my $dest=$elt->first_child(@_) or return '';
6358
19
39
    return $dest->text;
6359   }
6360
6361 sub fields
6362
1
1
1
  { my $elt= shift;
6363
1
3
2
3
    return map { $elt->field( $_) } @_;
6364   }
6365
6366 sub first_child_trimmed_text
6367
4
4
6
  { my $elt= shift;
6368
4
100
6
    my $dest=$elt->first_child(@_) or return '';
6369
1
2
    return $dest->trimmed_text;
6370   }
6371
6372 sub first_child_matches
6373
3
3
3
  { my $elt= shift;
6374
3
100
10
    my $dest= $elt->{first_child} or return undef;
6375
2
3
    return $dest->passes( @_);
6376   }
6377
6378 sub last_child_text
6379
3
3
7
  { my $elt= shift;
6380
3
100
5
    my $dest=$elt->last_child(@_) or return '';
6381
2
4
    return $dest->text;
6382   }
6383
6384 sub last_child_trimmed_text
6385
2
2
2
  { my $elt= shift;
6386
2
100
5
    my $dest=$elt->last_child(@_) or return '';
6387
1
3
    return $dest->trimmed_text;
6388   }
6389
6390 sub last_child_matches
6391
6
6
7
  { my $elt= shift;
6392
6
100
13
    my $dest= $elt->{last_child} or return undef;
6393
5
6
    return $dest->passes( @_);
6394   }
6395
6396 sub child_text
6397
2
2
3
  { my $elt= shift;
6398
2
100
5
    my $dest=$elt->child(@_) or return '';
6399
1
3
    return $dest->text;
6400   }
6401
6402 sub child_trimmed_text
6403
2
2
11
  { my $elt= shift;
6404
2
100
4
    my $dest=$elt->child(@_) or return '';
6405
1
2
    return $dest->trimmed_text;
6406   }
6407
6408 sub child_matches
6409
2
2
4
  { my $elt= shift;
6410
2
3
    my $nb= shift;
6411
2
100
4
    my $dest= $elt->child( $nb) or return undef;
6412
1
2
    return $dest->passes( @_);
6413   }
6414
6415 sub prev_sibling_text
6416
2
2
4
  { my $elt= shift;
6417
2
100
3
    my $dest=$elt->_prev_sibling(@_) or return '';
6418
1
2
    return $dest->text;
6419   }
6420
6421 sub prev_sibling_trimmed_text
6422
2
2
3
  { my $elt= shift;
6423
2
100
4
    my $dest=$elt->_prev_sibling(@_) or return '';
6424
1
3
    return $dest->trimmed_text;
6425   }
6426
6427 sub prev_sibling_matches
6428
3
3
6
  { my $elt= shift;
6429
3
100
9
    my $dest= $elt->{prev_sibling} or return undef;
6430
2
5
    return $dest->passes( @_);
6431   }
6432
6433 sub next_sibling_text
6434
3
3
15
  { my $elt= shift;
6435
3
100
6
    my $dest=$elt->next_sibling(@_) or return '';
6436
2
5
    return $dest->text;
6437   }
6438
6439 sub next_sibling_trimmed_text
6440
2
2
2
  { my $elt= shift;
6441
2
100
5
    my $dest=$elt->next_sibling(@_) or return '';
6442
1
2
    return $dest->trimmed_text;
6443   }
6444
6445 sub next_sibling_matches
6446
2
2
3
  { my $elt= shift;
6447
2
100
8
    my $dest= $elt->{next_sibling} or return undef;
6448
1
3
    return $dest->passes( @_);
6449   }
6450
6451 sub prev_elt_text
6452
2
2
3
  { my $elt= shift;
6453
2
100
8
    my $dest=$elt->prev_elt(@_) or return '';
6454
1
2
    return $dest->text;
6455   }
6456
6457 sub prev_elt_trimmed_text
6458
2
2
4
  { my $elt= shift;
6459
2
100
5
    my $dest=$elt->prev_elt(@_) or return '';
6460
1
2
    return $dest->trimmed_text;
6461   }
6462
6463 sub prev_elt_matches
6464
3
3
4
  { my $elt= shift;
6465
3
100
5
    my $dest= $elt->prev_elt or return undef;
6466
2
3
    return $dest->passes( @_);
6467   }
6468
6469 sub next_elt_text
6470
2
2
2
  { my $elt= shift;
6471
2
100
3
    my $dest=$elt->next_elt(@_) or return '';
6472
1
2
    return $dest->text;
6473   }
6474
6475 sub next_elt_trimmed_text
6476
2
2
4
  { my $elt= shift;
6477
2
100
6
    my $dest=$elt->next_elt(@_) or return '';
6478
1
1
    return $dest->trimmed_text;
6479   }
6480
6481 sub next_elt_matches
6482
3
3
5
  { my $elt= shift;
6483
3
100
6
    my $dest= $elt->next_elt or return undef;
6484
2
3
    return $dest->passes( @_);
6485   }
6486
6487 sub parent_text
6488
4
4
7
  { my $elt= shift;
6489
4
100
9
    my $dest=$elt->parent(@_) or return '';
6490
3
8
    return $dest->text;
6491   }
6492
6493 sub parent_trimmed_text
6494
2
2
3
  { my $elt= shift;
6495
2
100
4
    my $dest=$elt->parent(@_) or return '';
6496
1
2
    return $dest->trimmed_text;
6497   }
6498
6499 sub parent_matches
6500
8
8
9
  { my $elt= shift;
6501
8
100
19
    my $dest= $elt->{parent} or return undef;
6502
6
9
    return $dest->passes( @_);
6503   }
6504
6505 sub is_first_child
6506
9
9
14
  { my $elt= shift;
6507
9
100
20
    my $parent= $elt->{parent} or return 0;
6508
8
100
13
    my $first_child= $parent->first_child( @_) or return 0;
6509
7
100
23
    return ($first_child == $elt) ? $elt : 0;
6510   }
6511
6512 sub is_last_child
6513
9
9
25
  { my $elt= shift;
6514
9
100
19
    my $parent= $elt->{parent} or return 0;
6515
8
100
15
    my $last_child= $parent->last_child( @_) or return 0;
6516
7
100
31
    return ($last_child == $elt) ? $elt : 0;
6517   }
6518
6519 # returns the depth level of the element
6520 # if 2 parameter are used then counts the 2cd element name in the
6521 # ancestors list
6522 sub level
6523
726
726
526
  { my( $elt, $cond)= @_;
6524
726
393
    my $level=0;
6525
726
50
807
    my $name=shift || '';
6526
726
973
100
100
875
1998
    while( $elt= $elt->{parent}) { $level++ if( !$cond || $elt->matches( $cond)); }
6527
726
762
    return $level;
6528   }
6529
6530 # checks whether $elt has an ancestor that satisfies $cond, returns the ancestor
6531 sub in_context
6532
18
18
23
  { my ($elt, $cond, $level)= @_;
6533
18
100
31
    $level= -1 unless( $level) ; # $level-- will never hit 0
6534
6535
18
28
    while( $level)
6536
32
100
70
      { $elt= $elt->{parent} or return 0;
6537
24
8
100
34
60
        if( $elt->matches( $cond)) { return $elt; }
6538
16
78
        $level--;
6539       }
6540
2
6
    return 0;
6541   }
6542
6543 sub _descendants
6544
47
47
119
  { my( $subtree_root, $include_self)= @_;
6545
47
100
58
    my @descendants= $include_self ? ($subtree_root) : ();
6546
6547
47
24
    my $elt= $subtree_root;
6548
47
33
    my $next_elt;
6549
6550
47
18
    MAIN: while( 1)
6551
125
100
100
155
      { if( $next_elt= $elt->{first_child})
6552           { # simplest case: the elt has a child
6553           }
6554         elsif( $next_elt= $elt->{next_sibling})
6555           { # no child but a next sibling (just check we stay within the subtree)
6556
6557             # case where elt is subtree_root, is empty and has a sibling
6558
9
100
17
            last MAIN if( $elt == $subtree_root);
6559           }
6560         else
6561           { # case where the element has no child and no next sibling:
6562             # get the first next sibling of an ancestor, checking subtree_root
6563
6564             # case where elt is subtree_root, is empty and has no sibling
6565
57
100
71
            last MAIN if( $elt == $subtree_root);
6566
6567             # backtrack until we find a parent with a next sibling
6568
40
50
45
            $next_elt= $elt->{parent} || last;
6569
40
42
            until( $next_elt->{next_sibling})
6570
37
100
69
              { last MAIN if( $subtree_root == $next_elt);
6571
19
50
35
                $next_elt= $next_elt->{parent} || last MAIN;
6572               }
6573
22
100
24
            last MAIN if( $subtree_root == $next_elt);
6574
16
12
            $next_elt= $next_elt->{next_sibling};
6575           }
6576
78
50
79
        $elt= $next_elt || last MAIN;
6577
78
41
        push @descendants, $elt;
6578       }
6579
47
76
    return @descendants;
6580   }
6581
6582
6583 sub descendants
6584
266
266
240
  { my( $subtree_root, $cond)= @_;
6585
266
211
    my @descendants=();
6586
266
250
    my $elt= $subtree_root;
6587
6588     # this branch is pure optimization for speed: if $cond is a gi replace it
6589     # by the index of the gi and loop here
6590     # start optimization
6591
266
160
    my $ind;
6592
266
100
100
631
    if( !$cond || ( defined ( $ind= $XML::Twig::gi2index{$cond})) )
6593       {
6594
160
101
        my $next_elt;
6595
6596
160
105
        while( 1)
6597
1794
100
100
2073
          { if( $next_elt= $elt->{first_child})
6598                 { # simplest case: the elt has a child
6599                 }
6600              elsif( $next_elt= $elt->{next_sibling})
6601               { # no child but a next sibling (just check we stay within the subtree)
6602
6603                 # case where elt is subtree_root, is empty and has a sibling
6604
234
50
33
530
                last if( $subtree_root && ($elt == $subtree_root));
6605               }
6606             else
6607               { # case where the element has no child and no next sibling:
6608                 # get the first next sibling of an ancestor, checking subtree_root
6609
6610                 # case where elt is subtree_root, is empty and has no sibling
6611
626
100
67
1321
                last if( $subtree_root && ($elt == $subtree_root));
6612
6613                 # backtrack until we find a parent with a next sibling
6614
623
50
794
                $next_elt= $elt->{parent} || last undef;
6615
623
666
                until( $next_elt->{next_sibling})
6616
461
100
67
983
                  { last if( $subtree_root && ($subtree_root == $next_elt));
6617
311
50
555
                    $next_elt= $next_elt->{parent} || last;
6618                   }
6619
623
100
67
1633
                last if( $subtree_root && ($subtree_root == $next_elt));
6620
466
338
                $next_elt= $next_elt->{next_sibling};
6621               }
6622
1634
50
1535
            $elt= $next_elt || last;
6623
1634
100
100
3042
            push @descendants, $elt if( !$cond || ($elt->{gi} eq $ind));
6624           }
6625       }
6626     else
6627     # end optimization
6628       { # branch for a complex condition: use the regular (slow but simple) way
6629
106
149
        while( $elt= $elt->next_elt( $subtree_root, $cond))
6630
202
275
          { push @descendants, $elt; }
6631       }
6632
266
1771
    return @descendants;
6633   }
6634
6635
6636 sub descendants_or_self
6637
49
49
47
  { my( $elt, $cond)= @_;
6638
49
100
65
    my @descendants= $elt->passes( $cond) ? ($elt) : ();
6639
49
84
    push @descendants, $elt->descendants( $cond);
6640
49
124
    return @descendants;
6641   }
6642
6643 sub sibling
6644
9
9
16
  { my $elt= shift;
6645
9
8
    my $nb= shift;
6646
9
100
100
21
    if( $nb > 0)
6647
4
9
      { foreach( 1..$nb)
6648
4
100
12
          { $elt= $elt->next_sibling( @_) or return undef; }
6649       }
6650     elsif( $nb < 0)
6651
3
5
      { foreach( 1..(-$nb))
6652
3
100
8
          { $elt= $elt->prev_sibling( @_) or return undef; }
6653       }
6654     else # $nb == 0
6655
2
4
      { return $elt->passes( $_[0]); }
6656
4
8
    return $elt;
6657   }
6658
6659 sub sibling_text
6660
3
3
7
  { my $elt= sibling( @_);
6661
3
100
12
    return $elt ? $elt->text : undef;
6662   }
6663
6664
6665 sub child
6666
43
43
40
  { my $elt= shift;
6667
43
32
    my $nb= shift;
6668
43
100
48
    if( $nb >= 0)
6669
27
100
39
      { $elt= $elt->first_child( @_) or return undef;
6670
20
30
        foreach( 1..$nb)
6671
20
100
22
          { $elt= $elt->next_sibling( @_) or return undef; }
6672       }
6673     else
6674
16
100
21
      { $elt= $elt->last_child( @_) or return undef;
6675
13
19
        foreach( 2..(-$nb))
6676
16
100
20
          { $elt= $elt->prev_sibling( @_) or return undef; }
6677       }
6678
31
359
    return $elt;
6679   }
6680
6681 sub prev_siblings
6682
24
24
17
  { my $elt= shift;
6683
24
19
    my @siblings=();
6684
24
24
    while( $elt= $elt->prev_sibling( @_))
6685
12
14
      { unshift @siblings, $elt; }
6686
24
73
    return @siblings;
6687   }
6688
6689 sub siblings
6690
9
9
12
  { my $elt= shift;
6691
9
24
13
43
    return grep { $_ ne $elt } $elt->{parent}->children( @_);
6692   }
6693
6694 sub pos
6695
17
17
26
  { my $elt= shift;
6696
17
100
100
29
    return 0 if ($_[0] && !$elt->matches( @_));
6697
9
7
    my $pos=1;
6698
9
11
    $pos++ while( $elt= $elt->prev_sibling( @_));
6699
9
8
    return $pos;
6700   }
6701
6702
6703 sub next_siblings
6704
6
6
6
  { my $elt= shift;
6705
6
6
    my @siblings=();
6706
6
8
    while( $elt= $elt->next_sibling( @_))
6707
8
10
      { push @siblings, $elt; }
6708
6
42
    return @siblings;
6709   }
6710
6711
6712 # used by get_xpath: parses the xpath expression and generates a sub that performs the
6713 # search
6714 { my %axis2method;
6715
187
187
639425
  BEGIN { %axis2method= ( child => 'children',
6716                           descendant => 'descendants',
6717                          'descendant-or-self' => 'descendants_or_self',
6718                           parent => 'parent_is',
6719                           ancestor => 'ancestors',
6720                          'ancestor-or-self' => 'ancestors_or_self',
6721                          'following-sibling' => 'next_siblings',
6722                          'preceding-sibling' => 'prev_siblings',
6723                           following => 'following_elts',
6724                           preceding => 'preceding_elts',
6725                           self => '_self',
6726                         );
6727         }
6728
6729   sub _install_xpath
6730
189
189
180
    { my( $xpath_exp, $type)= @_;
6731
189
174
      my $original_exp= $xpath_exp;
6732
189
147
      my $sub= 'my $elt= shift; my @results;';
6733
6734       # grab the root if expression starts with a /
6735
189
100
100
601
      if( $xpath_exp=~ s{^/}{})
6736
163
169
        { $sub .= '@results= ($elt->twig) || croak "cannot use an XPath query starting with a / on a node not attached to a whole twig";'; }
6737       elsif( $xpath_exp=~ s{^\./}{})
6738
18
25
        { $sub .= '@results= ($elt);'; }
6739       else
6740
8
7
        { $sub .= '@results= ($elt);'; }
6741
6742
6743      #warn "xpath_exp= '$xpath_exp'\n";
6744
6745
189
100
4661
      while( $xpath_exp &&
6746              $xpath_exp=~s{^\s*(/?)
6747                             # the xxx=~/regexp/ is a pain as it includes /
6748                             (\s*(?:(?:($REG_AXIS)::)?(\*|$REG_TAG_PART|\.\.|\.)\s*)?($REG_PREDICATE_ALT*)
6749                             )
6750                             (/|$)}{}xo)
6751
6752
312
12784
        { my( $wildcard, $sub_exp, $axis, $gi, $predicates)= ($1, $2, $3, $4, $5);
6753
312
50
67
469
           if( $axis && ! $gi)
6754
0
0
                { _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp"); }
6755
6756           # grab a parent
6757
312
100
100
520
          if( $sub_exp eq '..')
6758
4
50
9
            { _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp") if( $wildcard);
6759
4
32
              $sub .= '@results= map { $_->{parent}} @results;';
6760             }
6761           # test the element itself
6762           elsif( $sub_exp=~ m{^\.(.*)$}s)
6763
1
3
            { $sub .= "\@results= grep { \$_->matches( q{$1}) } \@results;" }
6764           # grab children
6765           else
6766             {
6767
307
100
343
              if( !$axis)
6768
274
100
315
                { $axis= $wildcard ? 'descendant' : 'child'; }
6769
307
81
100
100
731
67
              if( !$gi or $gi eq '*') { $gi=''; }
6770
307
172
              my $function;
6771
6772               # "special" predicates, that return just one element
6773
307
100
100
100
100
1143
              if( $predicates && ($predicates =~ m{^\s*\[\s*((-\s*)?\d+)\s*\]\s*$}))
6774                 { # [<nb>]
6775
30
36
                  my $offset= $1;
6776
30
100
51
                  $offset-- if( $offset > 0);
6777
30
50
50
75
                  $function= $axis eq 'descendant' ? "next_n_elt( $offset, '$gi')"
6778                            : $axis eq 'child' ? "child( $offset, '$gi')"
6779                            : _croak_and_doublecheck_xpath( $original_exp, "error [$1] not supported along axis '$axis'")
6780                            ;
6781
30
138
                  $sub .= "\@results= grep { \$_ } map { \$_->$function } \@results;"
6782                 }
6783               elsif( $predicates && ($predicates =~ m{^\s*\[\s*last\s*\(\s*\)\s*\]\s*$}) )
6784                 { # last()
6785
3
50
4
                  _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp, usage of // and last() not supported") if( $wildcard);
6786
3
8
                   $sub .= "\@results= map { \$_->last_child( '$gi') } \@results;";
6787                 }
6788               else
6789                 { # follow the axis
6790                   #warn "axis: '$axis' - method: '$axis2method{$axis}' - gi: '$gi'\n";
6791
6792
274
408
                  my $follow_axis= " \$_->$axis2method{$axis}( '$gi')";
6793
274
190
                  my $step= $follow_axis;
6794
6795                   # now filter using the predicate
6796
274
1623
                  while( $predicates=~ s{^\s*($REG_PREDICATE_ALT)\s*}{}o)
6797
143
182
                    { my $pred= $1;
6798
143
197
                      $pred=~ s{^\s*\[\s*}{};
6799
143
244
                      $pred=~ s{\s*\]\s*$}{};
6800
143
102
                      my $test="";
6801
143
82
                      my $pos;
6802
143
100
266
                      if( $pred=~ m{^(-?\s*\d+)$})
6803
18
12
                        { my $pos= $1;
6804
18
100
49
                          if( $step=~ m{^\s*grep(.*) (\$_->\w+\(\s*'[^']*'\s*\))})
6805
16
46
                            { $step= "XML::Twig::_first_n $1 $pos, $2"; }
6806                           else
6807
2
2
50
6
1
                            { if( $pos > 0) { $pos--; }
6808
2
23
                              $step= "($step)[$pos]";
6809                             }
6810                           #warn "number predicate '$pos' - generated step '$step'\n";
6811                         }
6812                       else
6813
125
92
                        { my $syntax_error=0;
6814                           do
6815
125
165
100
100
100
89
883
                            { if( $pred =~ s{^string\(\s*\)\s*=\s*($REG_STRING)\s*}{}o) # string()="string" pred
6816
7
16
                                { $test .= "\$_->text eq $1"; }
6817                               elsif( $pred =~ s{^string\(\s*\)\s*!=\s*($REG_STRING)\s*}{}o) # string()!="string" pred
6818
1
4
                                { $test .= "\$_->text ne $1"; }
6819
165
100
100
100
100
100
100
100
100
100
100
100
3026
                              if( $pred =~ s{^string\(\s*\)\s*=\s*($REG_NUMBER)\s*}{}o) # string()=<number> pred
6820
1
7
                                { $test .= "\$_->text eq $1"; }
6821                               elsif( $pred =~ s{^string\(\s*\)\s*!=\s*($REG_NUMBER)\s*}{}o) # string()!=<number> pred
6822
1
7
                                { $test .= "\$_->text ne $1"; }
6823                               elsif( $pred =~ s{^string\(\s*\)\s*(>|<|>=|<=)\s*($REG_NUMBER)\s*}{}o) # string()!=<number> pred
6824
4
25
                                { $test .= "\$_->text $1 $2"; }
6825
6826                              elsif( $pred =~ s{^string\(\s*\)\s*($REG_MATCH)\s*($REG_REGEXP)\s*}{}o) # string()=~/regex/ pred
6827
10
17
                                { my( $match, $regexp)= ($1, $2);
6828
10
36
                                  $test .= "\$_->text $match $regexp";
6829                                 }
6830                               elsif( $pred =~ s{^string\(\s*\)\s*}{}o) # string() pred
6831
1
5
                                { $test .= "\$_->text"; }
6832                              elsif( $pred=~ s{^@($REG_TAG_NAME)\s*($REG_OP)\s*($REG_STRING|$REG_NUMBER)}{}o) # @att="val" pred
6833
82
94
                                { my( $att, $oper, $val)= ($1, _op( $2), $3);
6834
82
316
                                  $test .= qq{((defined \$_->{'att'}->{"$att"}) && (\$_->{'att'}->{"$att"} $oper $val))};
6835                                 }
6836                              elsif( $pred =~ s{^@($REG_TAG_NAME)\s*($REG_MATCH)\s*($REG_REGEXP)\s*}{}o) # @att=~/regex/ pred XXX
6837
12
18
                                { my( $att, $match, $regexp)= ($1, $2, $3);
6838
12
43
                                  $test .= qq{((defined \$_->{'att'}->{"$att"}) && (\$_->{'att'}->{"$att"} $match $regexp))};;
6839                                 }
6840                              elsif( $pred=~ s{^@($REG_TAG_NAME)\s*}{}o) # @att pred
6841
17
64
                                { $test .= qq{(defined \$_->{'att'}->{"$1"})}; }
6842                              elsif( $pred=~ s{^\s*(?:not|!)\s*@($REG_TAG_NAME)\s*}{}o) # not @att pred
6843
6
22
                                { $test .= qq{((\$_->is_elt) && (not defined \$_->{'att'}->{"$1"}))}; }
6844                               elsif( $pred=~ s{^\s*([()])}{}) # ( or ) (just add to the test)
6845
2
6
                                { $test .= qq{$1}; }
6846                               elsif( $pred=~ s{^\s*(and|or)\s*}{})
6847
19
76
                                { $test .= lc " $1 "; }
6848                               else
6849
10
32
                                { $syntax_error=1; }
6850
6851                              } while( !$syntax_error && $pred);
6852
125
100
136
                           _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp at $pred") if( $pred);
6853
123
291
                           $step= " grep { $test } $step ";
6854                         }
6855                     }
6856                   #warn "step: '$step'";
6857
272
1201
                  $sub .= "\@results= grep { \$_ } map { $step } \@results;";
6858                 }
6859             }
6860         }
6861
6862
187
100
211
      if( $xpath_exp)
6863
2
6
        { _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp around $xpath_exp"); }
6864
6865
185
140
      $sub .= q{return XML::Twig::_unique_elts( @results); };
6866       #warn "generated: '$sub'\n";
6867
185
4
4
4
7
7
7
7
7
7
9
9
9
7
7
7
6
6
6
3
3
3
3
3
3
3
3
3
3
3
3
2
2
2
2
2
2
4
7
7
9
7
6
3
3
3
3
2
2
9229
13
6
520
21
8
548
23
8
638
27
9
659
25
5
674
19
7
659
10
4
489
13
4
463
13
4
450
12
5
445
9
3
348
9
4
353
      my $s= eval "sub { $NO_WARNINGS; $sub }";
6868
185
50
342
      if( $@)
6869
0
0
        { _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp ($@);") }
6870
185
572
      return( $s);
6871     }
6872 }
6873
6874 sub _croak_and_doublecheck_xpath
6875
4
4
6
  { my $xpath_expression= shift;
6876
4
6
    my $mess= join( "\n", @_);
6877
4
100
100
15
    if( $XML::Twig::XPath::VERSION || 0)
6878
1
3
      { my $check_twig= XML::Twig::XPath->new;
6879
1
1
50
2
2
        if( eval { $check_twig->{twig_xp}->_parse( $xpath_expression) })
6880
1
855
          { $mess .= "\nthe expression is a valid XPath statement, and you are using XML::Twig::XPath, but"
6881                    . "\nyou are using either 'find_nodes' or 'get_xpath' where the method you likely wanted"
6882                    . "\nto use is 'findnodes', which is the only one that uses the full XPath engine\n";
6883           }
6884       }
6885
4
482
    croak $mess;
6886   }
6887
6888
6889
6890 { # extremely elaborate caching mechanism
6891   my %xpath; # xpath_expression => subroutine_code;
6892   sub get_xpath
6893
198
198
231
    { my( $elt, $xpath_exp, $offset)= @_;
6894
198
100
586
      my $sub= ($xpath{$xpath_exp} ||= _install_xpath( $xpath_exp));
6895
194
100
3358
      return $sub->( $elt) unless( defined $offset);
6896
13
250
      my @res= $sub->( $elt);
6897
13
31
      return $res[$offset];
6898     }
6899 }
6900
6901
6902 sub findvalues
6903
4
4
3
  { my $elt= shift;
6904
4
6
10
11
    return map { $_->text } $elt->get_xpath( @_);
6905   }
6906
6907 sub findvalue
6908
17
17
9
  { my $elt= shift;
6909
17
17
23
23
    return join '', map { $_->text } $elt->get_xpath( @_);
6910   }
6911
6912
6913 # XML::XPath compatibility
6914
1
1
3
sub getElementById { return $_[0]->twig->elt_id( $_[1]); }
6915
1682
1682
1682
1682
1682
1790
1790
1682
1682
100
86064
1118
1114
1124
1786
1096
1975
1675
2910
sub getChildNodes { my @children= do { my $elt= $_[0]; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; return wantarray ? @children : \@children; }
6916
6917
3105
4327
sub _flushed { return $_[0]->{flushed}; }
6918
227
227
405
sub _set_flushed { $_[0]->{flushed}=1; }
6919
1
1
3
sub _del_flushed { delete $_[0]->{flushed}; }
6920
6921 sub cut
6922
3694
2407
  { my $elt= shift;
6923
3694
2113
    my( $parent, $prev_sibling, $next_sibling);
6924
3694
2762
    $parent= $elt->{parent};
6925
3694
100
7789
    my $a= $elt->{'att'}->{'a'} || 'na';
6926
3694
100
100
6364
    if( ! $parent && $elt->is_elt)
6927       { # are we cutting the root?
6928
3046
2221
        my $t= $elt->{twig};
6929
3046
100
100
6325
        if( $t && ! $t->{twig_parsing})
6930
2871
2956
          { delete $t->{twig_root};
6931
2871
2314
            delete $elt->{twig};
6932
2871
2745
            return $elt;
6933           } # cutt`ing the root
6934         else
6935
175
181
          { return; } # cutting an orphan, returning $elt would break backward compatibility
6936       }
6937
6938     # save the old links, that'll make it easier for some loops
6939
648
668
    foreach my $link ( qw(parent prev_sibling next_sibling) )
6940
1944
2638
      { $elt->{former}->{$link}= $elt->{$link};
6941
1944
1938
100
1963
2872
         if( $XML::Twig::weakrefs) { weaken( $elt->{former}->{$link}); }
6942       }
6943
6944     # if we cut the current element then its parent becomes the current elt
6945
648
100
1331
    if( $elt->{twig_current})
6946
57
41
      { my $twig_current= $elt->{parent};
6947
57
72
        $elt->twig->{twig_current}= $twig_current;
6948
57
60
        $twig_current->{'twig_current'}=1;
6949
57
69
        delete $elt->{'twig_current'};
6950       }
6951
6952
648
100
100
2132
    if( $parent->{first_child} && $parent->{first_child} == $elt)
6953
478
453
      { $parent->{first_child}= $elt->{next_sibling};
6954         # cutting can make the parent empty
6955
478
376
100
647
326
        if( ! $parent->{first_child}) { $parent->{empty}= 1; }
6956       }
6957
6958
648
100
100
1700
    if( $parent->{last_child} && $parent->{last_child} == $elt)
6959
453
453
453
452
100
315
403
499
479
      { $parent->{empty}=0; $parent->{last_child}=$elt->{prev_sibling}; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ;
6960       }
6961
6962
648
100
802
    if( $prev_sibling= $elt->{prev_sibling})
6963
168
161
      { $prev_sibling->{next_sibling}= $elt->{next_sibling}; }
6964
648
100
761
    if( $next_sibling= $elt->{next_sibling})
6965
193
193
192
100
181
221
221
      { $next_sibling->{prev_sibling}=$elt->{prev_sibling}; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; }
6966
6967
6968
648
648
646
100
546
625
595
    $elt->{parent}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ;
6969
648
648
646
100
515
645
683
    $elt->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ;
6970
648
552
    $elt->{next_sibling}= undef;
6971
6972     # merge 2 (now) consecutive text nodes if they are of the same type
6973     # (type can be PCDATA or CDATA)
6974
648
100
100
1154
    if( $prev_sibling && $next_sibling && $prev_sibling->is_text && ( $XML::Twig::index2gi[$prev_sibling->{'gi'}] eq $XML::Twig::index2gi[$next_sibling->{'gi'}]))
6975
9
15
      { $prev_sibling->merge_text( $next_sibling); }
6976
6977
648
647
    return $elt;
6978   }
6979
6980
6981
4
4
9
sub former_next_sibling { return $_[0]->{former}->{next_sibling}; }
6982
3
3
6
sub former_prev_sibling { return $_[0]->{former}->{prev_sibling}; }
6983
3
3
10
sub former_parent { return $_[0]->{former}->{parent}; }
6984
6985 sub cut_children
6986
25
25
29
  { my( $elt, $exp)= @_;
6987
25
43
    my @children= $elt->children( $exp);
6988
25
45
33
47
    foreach (@children) { $_->cut; }
6989
25
23
100
37
19
    if( ! $elt->has_children) { $elt->{empty}= 1; }
6990
25
72
    return @children;
6991   }
6992
6993 sub cut_descendants
6994
3
3
5
  { my( $elt, $exp)= @_;
6995
3
12
    my @descendants= $elt->descendants( $exp);
6996
3
4
4
5
    foreach ($elt->descendants( $exp)) { $_->cut; }
6997
3
1
100
5
1
    if( ! $elt->has_children) { $elt->{empty}= 1; }
6998
3
12
    return @descendants;
6999   }
7000
7001
7002
7003 sub erase
7004
93
93
111
  { my $elt= shift;
7005     #you cannot erase the current element
7006
93
100
125
    if( $elt->{twig_current})
7007
1
73
      { croak "trying to erase an element before it has been completely parsed"; }
7008
92
100
112
    unless( $elt->{parent})
7009       { # trying to erase the root (of a twig or of a cut/new element)
7010
6
6
6
6
6
7
7
6
6
6
7
6
12
7
11
9
        my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; };
7011
6
100
15
        unless( @children == 1)
7012
1
78
          { croak "can only erase an element with no parent if it has a single child"; }
7013
5
8
        $elt->_move_extra_data_after_erase;
7014
5
5
        my $child= shift @children;
7015
5
5
4
100
196
10
6
        $child->{parent}=undef; if( $XML::Twig::weakrefs) { weaken( $child->{parent});} ;
7016
5
7
        my $twig= $elt->twig;
7017
5
8
        $twig->set_root( $child);
7018       }
7019     else
7020       { # normal case
7021
86
102
        $elt->_move_extra_data_after_erase;
7022
86
86
86
86
86
106
106
86
50
55
67
59
98
63
128
103
        my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; };
7023
86
100
98
        if( @children)
7024           { # elt has children, move them up
7025
7026
71
57
            my $first_child= $elt->{first_child};
7027
71
50
            my $prev_sibling=$elt->{prev_sibling};
7028
71
100
67
            if( $prev_sibling)
7029               { # connect first child to previous sibling
7030
21
21
20
100
19
28
21
                $first_child->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $first_child->{prev_sibling});} ;
7031
21
18
                $prev_sibling->{next_sibling}= $first_child;
7032               }
7033             else
7034               { # elt was the first child
7035
50
75
                $elt->{parent}->set_first_child( $first_child);
7036               }
7037
7038
71
48
            my $last_child= $elt->{last_child};
7039
71
50
            my $next_sibling= $elt->{next_sibling};
7040
71
100
68
            if( $next_sibling)
7041               { # connect last child to next sibling
7042
35
33
                $last_child->{next_sibling}= $next_sibling;
7043
35
35
34
100
28
43
46
                $next_sibling->{prev_sibling}=$last_child; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ;
7044               }
7045             else
7046               { # elt was the last child
7047
36
45
                $elt->{parent}->set_last_child( $last_child);
7048               }
7049             # update parent for all siblings
7050
71
84
            foreach my $child (@children)
7051
106
106
104
100
90
104
159
              { $child->{parent}=$elt->{parent}; if( $XML::Twig::weakrefs) { weaken( $child->{parent});} ; }
7052
7053             # merge consecutive text elements if need be
7054
71
100
100
125
            if( $prev_sibling && $prev_sibling->is_text && ($XML::Twig::index2gi[$first_child->{'gi'}] eq $XML::Twig::index2gi[$prev_sibling->{'gi'}]) )
7055
12
17
              { $prev_sibling->merge_text( $first_child); }
7056
71
100
100
123
            if( $next_sibling && $next_sibling->is_text && ($XML::Twig::index2gi[$last_child->{'gi'}] eq $XML::Twig::index2gi[$next_sibling->{'gi'}]) )
7057
10
11
              { $last_child->merge_text( $next_sibling); }
7058
7059             # if parsing and have now a PCDATA text, mark so we can normalize later on if need be
7060
71
9
100
67
124
11
            if( $elt->{parent}->{twig_current} && $elt->{last_child}->is_text) { $elt->{parent}->{twig_to_be_normalized}=1; }
7061
7062             # elt is not referenced any more, so it will be DESTROYed
7063             # so we'd better break the links to its children ## FIX
7064
71
53
            undef $elt->{first_child};
7065
71
48
            undef $elt->{last_child};
7066
71
44
            undef $elt->{parent};
7067
71
46
            undef $elt->{next_sibling};
7068
71
85
            undef $elt->{prev_sibling};
7069
7070           }
7071           { # elt had no child, delete it
7072
86
86
64
100
             $elt->delete;
7073           }
7074
7075       }
7076
91
97
    return $elt;
7077
7078   }
7079
7080 sub _move_extra_data_after_erase
7081
91
91
69
  { my( $elt)= @_;
7082     # extra_data
7083
91
100
125
    if( my $extra_data= $elt->{extra_data})
7084
20
100
29
      { my $target= $elt->{first_child} || $elt->{next_sibling};
7085
20
100
16
        if( $target)
7086           {
7087
14
100
50
12
            if( $target->is( $ELT))
7088
7
100
10
              { $target->set_extra_data( $extra_data . ($target->extra_data || '')); }
7089             elsif( $target->is( $TEXT))
7090
7
9
              { $target->_unshift_extra_data_in_pcdata( $extra_data, 0); } # TO CHECK
7091           }
7092         else
7093
6
14
          { my $parent= $elt->{parent}; # always exists or the erase cannot be performed
7094
6
8
            $parent->_prefix_extra_data_before_end_tag( $extra_data);
7095           }
7096       }
7097
7098      # extra_data_before_end_tag
7099
91
100
112
    if( my $extra_data= $elt->{extra_data_before_end_tag})
7100
13
100
50
17
      { if( my $target= $elt->{next_sibling})
7101
10
100
50
11
          { if( $target->is( $ELT))
7102
3
100
3
              { $target->set_extra_data( $extra_data . ($target->extra_data || '')); }
7103             elsif( $target->is( $TEXT))
7104               {
7105
7
8
                $target->_unshift_extra_data_in_pcdata( $extra_data, 0);
7106              }
7107           }
7108         elsif( my $parent= $elt->{parent})
7109
3
3
          { $parent->_prefix_extra_data_before_end_tag( $extra_data); }
7110        }
7111
7112
91
75
    return $elt;
7113
7114   }
7115 BEGIN
7116
187
187
410826
  { my %method= ( before => \&paste_before,
7117                   after => \&paste_after,
7118                   first_child => \&paste_first_child,
7119                   last_child => \&paste_last_child,
7120                   within => \&paste_within,
7121         );
7122
7123     # paste elt somewhere around ref
7124     # pos can be first_child (default), last_child, before, after or within
7125     sub paste ## no critic (Subroutines::ProhibitNestedSubs);
7126
616
616
553
      { my $elt= shift;
7127
616
100
748
        if( $elt->{parent})
7128
1
80
          { croak "cannot paste an element that belongs to a tree"; }
7129
615
340
        my $pos;
7130
615
328
        my $ref;
7131
615
100
596
        if( ref $_[0])
7132
25
21
          { $pos= 'first_child';
7133
25
100
119
            croak "wrong argument order in paste, should be $_[1] first" if($_[1]);
7134           }
7135         else
7136
590
445
          { $pos= shift; }
7137
7138
614
100
740
        if( my $method= $method{$pos})
7139           {
7140
613
100
100
1930
            unless( ref( $_[0]) && isa( $_[0], 'XML::Twig::Elt'))
7141
4
100
100
8
              { if( ! defined( $_[0]))
7142
1
69
                  { croak "missing target in paste"; }
7143                 elsif( ! ref( $_[0]))
7144
1
71
                  { croak "wrong target type in paste (not a reference), should be XML::Twig::Elt or a subclass"; }
7145                 else
7146
2
2
                  { my $ref= ref $_[0];
7147
2
152
                    croak "wrong target type in paste: '$ref', should be XML::Twig::Elt or a subclass";
7148                   }
7149               }
7150
609
377
            $ref= $_[0];
7151             # check here so error message lists the caller file/line
7152
609
100
100
1563
            if( !$ref->{parent} && ($pos=~ m{^(before|after)$}) && !(exists $elt->{'target'}) && !(exists $elt->{'comment'}))
7153
6
482
              { croak "cannot paste $1 root"; }
7154
603
642
            $elt->$method( @_);
7155           }
7156         else
7157
1
78
          { croak "tried to paste in wrong position '$pos', allowed positions " .
7158               " are 'first_child', 'last_child', 'before', 'after' and " .
7159               "'within'";
7160           }
7161
603
100
67
900
        if( (my $ids= $elt->{twig_id_list}) && (my $t= $ref->twig) )
7162
22
50
39
          { $t->{twig_id_list}||={};
7163
22
55
            foreach my $id (keys %$ids)
7164
24
45
              { $t->{twig_id_list}->{$id}= $ids->{$id};
7165
24
24
50
32
66
                if( $XML::Twig::weakrefs) { weaken( $t->{twig_id_list}->{$id}); }
7166               }
7167           }
7168
603
1823
        return $elt;
7169       }
7170
7171
7172     sub paste_before
7173
16
16
94
      { my( $elt, $ref)= @_;
7174
16
16
        my( $parent, $prev_sibling, $next_sibling );
7175
7176         # trying to paste before an orphan (root or detached wlt)
7177
16
100
34
        unless( $ref->{parent})
7178
5
100
15
          { if( my $t= $ref->twig)
7179
4
100
100
17
              { if( (exists $elt->{'comment'}) || (exists $elt->{'target'})) # we can still do this
7180
2
2
4
2
                  { $t->_add_cpi_outside_of_root( leading_cpi => $elt); return; }
7181                 else
7182
2
152
                  { croak "cannot paste before root"; }
7183               }
7184             else
7185
1
251
              { croak "cannot paste before an orphan element"; }
7186           }
7187
11
12
        $parent= $ref->{parent};
7188
11
12
        $prev_sibling= $ref->{prev_sibling};
7189
11
11
        $next_sibling= $ref;
7190
7191
11
11
11
50
12
19
21
        $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ;
7192
11
6
100
25
8
        if( $parent->{first_child} == $ref) { $parent->{first_child}= $elt; }
7193
7194
11
5
100
21
10
        if( $prev_sibling) { $prev_sibling->{next_sibling}= $elt; }
7195
11
11
11
50
12
15
17
        $elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ;
7196
7197
11
11
11
50
12
16
21
        $next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ;
7198
11
15
        $elt->{next_sibling}= $ref;
7199
11
14
        return $elt;
7200       }
7201
7202      sub paste_after
7203
283
283
303
      { my( $elt, $ref)= @_;
7204
283
150
        my( $parent, $prev_sibling, $next_sibling );
7205
7206         # trying to paste after an orphan (root or detached wlt)
7207
283
100
312
        unless( $ref->{parent})
7208
5
100
8
            { if( my $t= $ref->twig)
7209
4
100
100
17
                { if( (exists $elt->{'comment'}) || (exists $elt->{'target'})) # we can still do this
7210
2
2
3
2
                    { $t->_add_cpi_outside_of_root( trailing_cpi => $elt); return; }
7211                   else
7212
2
202
                    { croak "cannot paste after root"; }
7213                 }
7214               else
7215
1
129
                { croak "cannot paste after an orphan element"; }
7216             }
7217
278
184
        $parent= $ref->{parent};
7218
278
157
        $prev_sibling= $ref;
7219
278
260
        $next_sibling= $ref->{next_sibling};
7220
7221
278
278
278
50
228
305
348
        $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ;
7222
278
170
170
170
170
50
100
411
126
123
180
223
        if( $parent->{last_child}== $ref) { $parent->{empty}=0; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; }
7223
7224
278
239
        $prev_sibling->{next_sibling}= $elt;
7225
278
278
278
50
218
278
359
        $elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ;
7226
7227
278
108
108
108
50
100
277
90
111
276
        if( $next_sibling) { $next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; }
7228
278
305
        $elt->{next_sibling}= $next_sibling;
7229
278
260
        return $elt;
7230
7231       }
7232
7233     sub paste_first_child
7234
70
70
64
      { my( $elt, $ref)= @_;
7235
70
41
        my( $parent, $prev_sibling, $next_sibling );
7236
70
55
        $parent= $ref;
7237
70
58
        $next_sibling= $ref->{first_child};
7238
7239
70
70
69
100
60
89
106
        $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ;
7240
70
60
        $parent->{first_child}= $elt;
7241
70
39
39
39
39
50
100
164
35
35
66
60
        unless( $parent->{last_child}) { $parent->{empty}=0; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; }
7242
7243
70
70
69
100
71
81
76
        $elt->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ;
7244
7245
70
31
31
30
100
100
91
26
39
46
        if( $next_sibling) { $next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; }
7246
70
63
        $elt->{next_sibling}= $next_sibling;
7247
70
66
        return $elt;
7248       }
7249
7250     sub paste_last_child
7251
3537
2423
      { my( $elt, $ref)= @_;
7252
3537
1927
        my( $parent, $prev_sibling, $next_sibling );
7253
3537
2041
        $parent= $ref;
7254
3537
2313
        $prev_sibling= $ref->{last_child};
7255
7256
3537
3537
3531
100
2757
3366
4501
        $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ;
7257
3537
3537
3537
3531
100
2333
2652
3135
3983
         $parent->{empty}=0; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ;
7258
3537
2060
100
3780
1922
        unless( $parent->{first_child}) { $parent->{first_child}= $elt; }
7259
7260
3537
3537
3531
100
2539
3245
2832
        $elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ;
7261
3537
1477
100
3184
1018
        if( $prev_sibling) { $prev_sibling->{next_sibling}= $elt; }
7262
7263
3537
2672
        $elt->{next_sibling}= undef;
7264
3537
2556
        return $elt;
7265       }
7266
7267     sub paste_within
7268
2
2
4
      { my( $elt, $ref, $offset)= @_;
7269
2
100
2
        my $text= $ref->is_text ? $ref : $ref->next_elt( $TEXT, $ref);
7270
2
3
        my $new= $text->split_at( $offset);
7271
2
5
        $elt->paste_before( $new);
7272
2
2
        return $elt;
7273       }
7274   }
7275
7276 # load an element into a structure similar to XML::Simple's
7277 sub simplify
7278
122
122
87
  { my $elt= shift;
7279
7280     # normalize option names
7281
122
183
    my %options= @_;
7282
122
118
276
141
    %options= map { my ($key, $val)= ($_, $options{$_});
7283
118
399
                       $key=~ s{(\w)([A-Z])}{$1_\L$2}g;
7284
118
349
                       $key => $val
7285                      } keys %options;
7286
7287     # check options
7288
122
336
    my @allowed_options= qw( keyattr forcearray noattr content_key
7289                              var var_regexp variables var_attr
7290                              group_tags forcecontent
7291                              normalise_space normalize_space
7292                    );
7293
122
1464
128
1539
    my %allowed_options= map { $_ => 1 } @allowed_options;
7294
122
277
    foreach my $option (keys %options)
7295
118
50
246
      { carp "invalid option $option\n" unless( $allowed_options{$option}); }
7296
7297
122
100
447
    $options{normalise_space} ||= $options{normalize_space} || 0;
7298
7299
122
100
279
    $options{content_key} ||= 'content';
7300
122
100
202
    if( $options{content_key}=~ m{^-})
7301       { # need to remove the - and to activate extra folding
7302
5
8
        $options{content_key}=~ s{^-}{};
7303
5
5
        $options{extra_folding}= 1;
7304       }
7305     else
7306
117
118
      { $options{extra_folding}= 0; }
7307
7308
122
100
214
    $options{forcearray} ||=0;
7309
122
100
207
    if( isa( $options{forcearray}, 'ARRAY'))
7310
5
5
5
3
10
7
      { my %forcearray_tags= map { $_ => 1 } @{$options{forcearray}};
7311
5
8
        $options{forcearray_tags}= \%forcearray_tags;
7312
5
6
        $options{forcearray}= 0;
7313       }
7314
7315
122
100
310
    $options{keyattr} ||= ['name', 'key', 'id'];
7316
122
100
50
194
    if( ref $options{keyattr} eq 'ARRAY')
7317
107
107
62
148
      { foreach my $keyattr (@{$options{keyattr}})
7318
296
658
          { my( $prefix, $att)= ($keyattr=~ m{^([+-])?(.*)});
7319
296
50
563
            $prefix ||= '';
7320
296
372
            $options{key_for_all}->{$att}= 1;
7321
296
50
462
            $options{remove_key_for_all}->{$att}=1 unless( $prefix eq '+');
7322
296
50
428
            $options{prefix_key_for_all}->{$att}=1 if( $prefix eq '-');
7323           }
7324       }
7325     elsif( ref $options{keyattr} eq 'HASH')
7326
15
30
14
58
      { while( my( $elt, $keyattr)= each %{$options{keyattr}})
7327
15
49
         { my( $prefix, $att)= ($keyattr=~ m{^([+-])?(.*)});
7328
15
100
25
           $prefix ||='';
7329
15
28
           $options{key_for_elt}->{$elt}= $att;
7330
15
100
24
           $options{remove_key_for_elt}->{"$elt#$att"}=1 unless( $prefix);
7331
15
100
33
           $options{prefix_key_for_elt}->{"$elt#$att"}=1 if( $prefix eq '-');
7332          }
7333       }
7334
7335
7336
122
67
334
    $options{var}||= $options{var_attr}; # for compat with XML::Simple
7337
122
11
100
124
16
    if( $options{var}) { $options{var_values}= {}; }
7338
111
106
    else { $options{var}=''; }
7339
7340
122
100
142
    if( $options{variables})
7341
7
100
15
      { $options{var}||= 1;
7342
7
8
        $options{var_values}= $options{variables};
7343       }
7344
7345
122
50
67
194
    if( $options{var_regexp} and !$options{var})
7346
0
0
      { warn "var option not used, var_regexp option ignored\n"; }
7347
122
100
234
    $options{var_regexp} ||= '\$\{?(\w+)\}?';
7348
7349
122
194
    $elt->_simplify( \%options);
7350
7351  }
7352
7353 sub _simplify
7354
764
764
536
  { my( $elt, $options)= @_;
7355
7356
764
410
    my $data;
7357
7358
764
595
    my $gi= $XML::Twig::index2gi[$elt->{'gi'}];
7359
764
764
764
764
764
1118
1118
764
364
438
495
495
780
761
1297
939
    my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; };
7360
764
732
100
67
1688
1512
    my %atts= $options->{noattr} || !$elt->{att} ? () : %{$elt->{att}};
7361
764
636
    my $nb_atts= keys %atts;
7362
764
742
    my $nb_children= $elt->children_count + $nb_atts;
7363
7364
764
452
    my %nb_children;
7365
764
1118
666
1014
    foreach (@children) { $nb_children{$_->tag}++; }
7366
764
576
1028
657
    foreach (keys %atts) { $nb_children{$_}++; }
7367
7368
764
485
    my $arrays; # tag => array where elements are stored
7369
7370
7371     # store children
7372
764
615
    foreach my $child (@children)
7373
1118
100
1066
      { if( $child->is_text)
7374           { # generate with a content key
7375
476
498
            my $text= $elt->_text_with_vars( $options);
7376
476
21
100
650
22
            if( $options->{normalise_space} >= 2) { $text= _normalize_space( $text); }
7377
476
100
75
1439
            if( $options->{force_content}
7378                 || $nb_atts
7379                 || (scalar @children > 1)
7380               )
7381
201
467
              { $data->{$options->{content_key}}= $text; }
7382             else
7383
275
392
              { $data= $text; }
7384           }
7385         else
7386           { # element with sub-elements
7387
642
545
            my $child_gi= $XML::Twig::index2gi[$child->{'gi'}];
7388
7389
642
682
            my $child_data= $child->_simplify( $options);
7390
7391             # first see if we need to simplify further the child data
7392             # simplify because of grouped tags
7393
642
100
935
            if( my $grouped_tag= $options->{group_tags}->{$child_gi})
7394               { # check that the child data is a hash with a single field
7395
9
100
67
42
                unless( (ref( $child_data) eq 'HASH')
7396                         && (keys %$child_data == 1)
7397                         && defined ( my $grouped_child_data= $child_data->{$grouped_tag})
7398                       )
7399
3
261
                  { croak "error in grouped tag $child_gi"; }
7400                 else
7401
6
4
                  { $child_data= $grouped_child_data; }
7402               }
7403             # simplify because of extra folding
7404
639
100
705
            if( $options->{extra_folding})
7405
27
100
100
88
              { if( (ref( $child_data) eq 'HASH')
7406                     && (keys %$child_data == 1)
7407                     && defined( my $content= $child_data->{$options->{content_key}})
7408                   )
7409
8
8
                  { $child_data= $content; }
7410               }
7411
7412
639
100
100
100
710
            if( my $keyatt= $child->_key_attr( $options))
7413               { # simplify element with key
7414
207
197
                my $key= $child->{'att'}->{$keyatt};
7415
207
22
100
260
16
                if( $options->{normalise_space} >= 1) { $key= _normalize_space( $key); }
7416
207
516
                $data->{$child_gi}->{$key}= $child_data;
7417               }
7418             elsif( $options->{forcearray}
7419                    || $options->{forcearray_tags}->{$child_gi}
7420                    || ( $nb_children{$child_gi} > 1)
7421                  )
7422               { # simplify element to store in an array
7423
239
100
458
                $data->{$child_gi} ||= [];
7424
239
239
154
478
                push @{$data->{$child_gi}}, $child_data;
7425               }
7426             else
7427               { # simplify element to store as a hash field
7428
193
360
                $data->{$child_gi}= $child_data;
7429               }
7430           }
7431     }
7432
7433     # store atts
7434     # TODO: deal with att that already have an element by that name
7435
761
1024
    foreach my $att (keys %atts)
7436       { # do not store if the att is a key that needs to be removed
7437
576
100
100
1597
        if( $options->{remove_key_for_all}->{$att}
7438             || $options->{remove_key_for_elt}->{"$gi#$att"}
7439           )
7440
201
210
          { next; }
7441
7442
375
100
529
        my $att_text= $options->{var} ? _replace_vars_in_text( $atts{$att}, $options) : $atts{$att} ;
7443
375
15
100
968
16
        if( $options->{normalise_space} >= 2) { $att_text= _normalize_space( $att_text); }
7444
7445
375
100
67
986
        if( $options->{prefix_key_for_all}->{$att}
7446             || $options->{prefix_key_for_elt}->{"$gi#$att"}
7447           )
7448           { # prefix the att
7449
3
5
            $data->{"-$att"}= $att_text;
7450           }
7451         else
7452           { # normal case
7453
372
588
            $data->{$att}= $att_text;
7454           }
7455       }
7456
7457
761
1665
    return $data;
7458   }
7459
7460 sub _key_attr
7461
639
639
464
  { my( $elt, $options)=@_;
7462
639
100
781
    return if( $options->{noattr});
7463
612
100
50
615
    if( $options->{key_for_all})
7464
531
515
      { foreach my $att ($elt->att_names)
7465
238
100
377
          { if( $options->{key_for_all}->{$att})
7466
198
326
              { return $att; }
7467           }
7468       }
7469     elsif( $options->{key_for_elt})
7470
81
100
131
      { if( my $key_for_elt= $options->{key_for_elt}->{$XML::Twig::index2gi[$elt->{'gi'}]} )
7471
12
100
27
          { return $key_for_elt if( defined( $elt->{'att'}->{$key_for_elt})); }
7472       }
7473
405
1721
    return;
7474   }
7475
7476 sub _text_with_vars
7477
476
476
322
  { my( $elt, $options)= @_;
7478
476
243
    my $text;
7479
476
100
445
    if( $options->{var})
7480
68
81
      { $text= _replace_vars_in_text( $elt->text, $options);
7481
68
87
        $elt->_store_var( $options);
7482       }
7483      else
7484
408
453
      { $text= $elt->text; }
7485
476
522
    return $text;
7486   }
7487
7488
7489 sub _normalize_space
7490
58
58
39
  { my $text= shift;
7491
58
103
    $text=~ s{\s+}{ }sg;
7492
58
72
    $text=~ s{^\s}{};
7493
58
69
    $text=~ s{\s$}{};
7494
58
59
    return $text;
7495   }
7496
7497
7498 sub att_nb
7499
16
100
16
48
  { return 0 unless( my $atts= $_[0]->{att});
7500
14
29
    return scalar keys %$atts;
7501   }
7502
7503 sub has_no_atts
7504
5
100
5
19
  { return 1 unless( my $atts= $_[0]->{att});
7505
3
100
13
    return scalar keys %$atts ? 0 : 1;
7506   }
7507
7508 sub _replace_vars_in_text
7509
117
117
94
  { my( $text, $options)= @_;
7510
7511
117
14
100
353
44
    $text=~ s{($options->{var_regexp})}
7512
8
15
             { if( defined( my $value= $options->{var_values}->{$2}))
7513                  { $value }
7514
6
67
               else
7515
6
33
                 { warn "unknown variable $2\n";
7516                    $1
7517                  }
7518              }gex;
7519
117
148
    return $text;
7520   }
7521
7522 sub _store_var
7523
68
68
43
  { my( $elt, $options)= @_;
7524
68
100
141
    if( defined (my $var_name= $elt->{'att'}->{$options->{var}}))
7525
3
4
       { $options->{var_values}->{$var_name}= $elt->text;
7526        }
7527   }
7528
7529
7530 # split a text element at a given offset
7531 sub split_at
7532
86
86
69
  { my( $elt, $offset)= @_;
7533
86
100
100
92
    my $text_elt= $elt->is_text ? $elt : $elt->first_child( $TEXT) || return '';
7534
85
114
    my $string= $text_elt->text;
7535
85
110
    my $left_string= substr( $string, 0, $offset);
7536
85
70
    my $right_string= substr( $string, $offset);
7537
85
33
304
    $text_elt->{pcdata}= (delete $text_elt->{empty} || 1) && $left_string;
7538
85
128
    my $new_elt= $elt->new( $XML::Twig::index2gi[$elt->{'gi'}], $right_string);
7539
85
103
    $new_elt->paste( after => $elt);
7540
85
83
    return $new_elt;
7541   }
7542
7543
7544 # split an element or its text descendants into several, in place
7545 # all elements (new and untouched) are returned
7546 sub split
7547
12
12
17
  { my $elt= shift;
7548
12
7
    my @text_chunks;
7549
12
9
    my @result;
7550
12
1
100
17
2
    if( $elt->is_text) { @text_chunks= ($elt); }
7551
11
16
    else { @text_chunks= $elt->descendants( $TEXT); }
7552
12
15
    foreach my $text_chunk (@text_chunks)
7553
12
18
      { push @result, $text_chunk->_split( 1, @_); }
7554
12
16
    return @result;
7555   }
7556
7557 # split an element or its text descendants into several, in place
7558 # created elements (those which match the regexp) are returned
7559 sub mark
7560
26
26
44
  { my $elt= shift;
7561
26
17
    my @text_chunks;
7562
26
10
    my @result;
7563
26
1
100
24
2
    if( $elt->is_text) { @text_chunks= ($elt); }
7564
25
33
    else { @text_chunks= $elt->descendants( $TEXT); }
7565
26
30
    foreach my $text_chunk (@text_chunks)
7566
26
35
      { push @result, $text_chunk->_split( 0, @_); }
7567
26
49
    return @result;
7568   }
7569
7570 # split a single text element
7571 # return_all defines what is returned: if it is true
7572 # only returns the elements created by matches in the split regexp
7573 # otherwise all elements (new and untouched) are returned
7574
7575
7576 {
7577
7578   sub _split
7579
38
38
27
    { my $elt= shift;
7580
38
20
      my $return_all= shift;
7581
38
26
      my $regexp= shift;
7582
38
25
      my @tags;
7583
7584
38
42
      while( @_)
7585
41
27
        { my $tag= shift();
7586
41
100
42
          if( ref $_[0])
7587
11
28
            { push @tags, { tag => $tag, atts => shift }; }
7588           else
7589
30
72
            { push @tags, { tag => $tag }; }
7590         }
7591
7592
38
2
100
43
3
      unless( @tags) { @tags= { tag => $elt->{parent}->gi }; }
7593
7594
38
24
      my @result; # the returned list of elements
7595
38
46
      my $text= $elt->text;
7596
38
38
      my $gi= $XML::Twig::index2gi[$elt->{'gi'}];
7597
7598       # 2 uses: if split matches then the first substring reuses $elt
7599       # once a split has occurred then the last match needs to be put in
7600       # a new element
7601
38
20
      my $previous_match= 0;
7602
7603
38
514
      while( my( $pre_match, @matches)= $text=~ /^(.*?)$regexp(.*)$/gcs)
7604
55
58
        { $text= pop @matches;
7605
55
100
55
          if( $previous_match)
7606             { # match, not the first one, create a new text ($gi) element
7607
21
50
31
              _utf8_ify( $pre_match) if( $] < 5.010);
7608
21
25
              $elt= $elt->insert_new_elt( after => $gi, $pre_match);
7609
21
100
28
              push @result, $elt if( $return_all);
7610             }
7611           else
7612             { # first match in $elt, re-use $elt for the first sub-string
7613
34
50
47
              _utf8_ify( $pre_match) if( $] < 5.010);
7614
34
42
              $elt->set_text( $pre_match);
7615
34
22
              $previous_match++; # store the fact that there was a match
7616
34
100
42
              push @result, $elt if( $return_all);
7617             }
7618
7619           # now deal with matches captured in the regexp
7620
55
100
57
          if( @matches)
7621             { # match, with capture
7622
43
27
              my $i=0;
7623
43
36
              foreach my $match (@matches)
7624                 { # create new element, text is the match
7625
53
50
60
                  _utf8_ify( $match) if( $] < 5.010);
7626
53
100
79
                  my $tag = _repl_match( $tags[$i]->{tag}, @matches) || '#PCDATA';
7627
53
50
36
                  my $atts = \%{$tags[$i]->{atts}} || {};
7628
53
20
80
20
                  my %atts= map { _repl_match( $_, @matches) => _repl_match( $atts->{$_}, @matches) } keys %$atts;
7629
53
68
                  $elt= $elt->insert_new_elt( after => $tag, \%atts, $match);
7630
53
45
                  push @result, $elt;
7631
53
298
                  $i= ($i + 1) % @tags;
7632                 }
7633             }
7634           else
7635             { # match, no captures
7636
12
9
              my $tag = $tags[0]->{tag};
7637
12
50
9
              my $atts = \%{$tags[0]->{atts}} || {};
7638
12
13
              $elt= $elt->insert_new_elt( after => $tag, $atts);
7639
12
59
              push @result, $elt;
7640             }
7641         }
7642
38
100
100
81
      if( $previous_match && $text)
7643         { # there was at least 1 match, and there is text left after the match
7644
17
18
          $elt= $elt->insert_new_elt( after => $gi, $text);
7645         }
7646
7647
38
100
45
      push @result, $elt if( $return_all);
7648
7649
38
118
      return @result; # return all elements
7650    }
7651
7652 sub _repl_match
7653
93
93
111
  { my( $val, @matches)= @_;
7654
93
113
    $val=~ s{\$(\d+)}{$matches[$1-1]}g;
7655
93
176
    return $val;
7656   }
7657
7658   # evil hack needed as sometimes
7659   my $encode_is_loaded=0; # so we only load Encode once
7660   sub _utf8_ify
7661     {
7662
1
50
25
1
76
      if( $perl_version >= 5.008 and $perl_version < 5.010 and !_keep_encoding())
7663
0
0
0
0
0
0
0
0
0
        { unless( $encode_is_loaded) { require Encode; import Encode; $encode_is_loaded++; }
7664
0
0
          Encode::_utf8_on( $_[0]); # the flag should be set but is not
7665         }
7666     }
7667
7668
7669 }
7670
7671 { my %replace_sub; # cache for complex expressions (expression => sub)
7672
7673   sub subs_text
7674
28
28
32
    { my( $elt, $regexp, $replace)= @_;
7675
7676
28
14
      my $replacement_string;
7677
28
35
      my $is_string= _is_string( $replace);
7678
7679
28
20
      my @parents;
7680
7681
28
50
      foreach my $text_elt ($elt->descendants_or_self( $TEXT))
7682         {
7683
71
100
69
          if( $is_string)
7684
15
19
            { my $text= $text_elt->text;
7685
15
27
77
30
              $text=~ s{$regexp}{ _replace_var( $replace, $1, $2, $3, $4, $5, $6, $7, $8, $9)}egx;
7686
15
19
              $text_elt->set_text( $text);
7687            }
7688           else
7689             {
7690
187
187
187
187
894
181
1012
              no utf8; # = perl 5.6
7691
56
67
112
              my $replace_sub= ( $replace_sub{$replace} ||= _install_replace_sub( $replace));
7692
56
81
              my $text= $text_elt->text;
7693
56
41
              my $pos=0; # used to skip text that was previously matched
7694
56
32
              my $found_hit;
7695
56
499
              while( my( $pre_match_string, $match_string, @var)= ($text=~ m{(.*?)($regexp)}sg))
7696
52
44
                { $found_hit=1;
7697
52
56
                  my $match_start = length( $pre_match_string);
7698
52
100
90
                  my $match = $match_start ? $text_elt->split_at( $match_start + $pos) : $text_elt;
7699
52
41
                  my $match_length = length( $match_string);
7700
52
72
                  my $post_match = $match->split_at( $match_length);
7701
52
995
                  $replace_sub->( $match, @var);
7702
7703                   # go to next
7704
51
40
                  $text_elt= $post_match;
7705
51
57
                  $text= $post_match->text;
7706
7707
51
51
100
50
100
65
533
                  if( $found_hit) { push @parents, $text_elt->{parent} unless $parents[-1] && $parents[-1]== $text_elt->{parent}; }
7708
7709                 }
7710             }
7711         }
7712
7713
27
29
37
47
      foreach my $parent (@parents) { $parent->normalize; }
7714
7715
27
52
      return $elt;
7716     }
7717
7718
7719   sub _is_string
7720
28
100
28
89
    { return ($_[0]=~ m{&e[ln]t}) ? 0: 1 }
7721
7722   sub _replace_var
7723
27
27
72
    { my( $string, @var)= @_;
7724
27
22
      unshift @var, undef;
7725
27
52
      $string=~ s{\$(\d)}{$var[$1]}g;
7726
27
71
      return $string;
7727     }
7728
7729   sub _install_replace_sub
7730
17
17
19
    { my $replace_exp= shift;
7731
17
76
      my @item= split m{(&e[ln]t\s*\([^)]*\))}, $replace_exp;
7732
17
21
      my $sub= q{ my( $match, @var)= @_; my $new; my $last_inserted=$match;};
7733
17
12
      my( $gi, $exp);
7734
17
20
      foreach my $item (@item)
7735
39
100
67
        { next if ! length $item;
7736
25
100
100
62
          if( $item=~ m{^&elt\s*\(([^)]*)\)})
7737
14
23
            { $exp= $1; }
7738           elsif( $item=~ m{^&ent\s*\(\s*([^\s)]*)\s*\)})
7739
3
7
            { $exp= " '#ENT' => $1"; }
7740           else
7741
8
11
            { $exp= qq{ '#PCDATA' => "$item"}; }
7742
25
15
15
52
23
35
          $exp=~ s{\$(\d)}{my $i= $1-1; "\$var[$i]"}eg; # replace references to matches
7743
25
31
          $sub.= qq{ \$new= \$match->new( $exp); };
7744
25
32
          $sub .= q{ $new->paste( after => $last_inserted); $last_inserted=$new;};
7745         }
7746
17
23
      $sub .= q{ $match->delete; };
7747       #$sub=~ s/;/;\n/g; warn "subs: $sub";
7748
17
4
4
4
1
1
1
1
1
1
1
1
1
4
1
1
1
852
14
4
299
3
1
131
3
1
130
3
1
107
      my $coderef= eval "sub { $NO_WARNINGS; $sub }";
7749
17
0
50
35
0
      if( $@) { croak( "invalid replacement expression $replace_exp: ",$@); }
7750
17
55
      return $coderef;
7751     }
7752
7753   }
7754
7755
7756 sub merge_text
7757
50
50
68
  { my( $e1, $e2)= @_;
7758
50
100
167
    croak "invalid merge: can only merge 2 elements"
7759         unless( isa( $e2, 'XML::Twig::Elt'));
7760
49
100
75
53
    croak "invalid merge: can only merge 2 text elements"
7761         unless( $e1->is_text && $e2->is_text && ($e1->gi eq $e2->gi));
7762
7763
47
71
    my $t1_length= length( $e1->text);
7764
7765
47
63
    $e1->set_text( $e1->text . $e2->text);
7766
7767
47
100
70
    if( my $extra_data_in_pcdata= $e2->_extra_data_in_pcdata)
7768
10
16
8
23
      { foreach my $data (@$extra_data_in_pcdata) { $e1->_push_extra_data_in_pcdata( $data->{text}, $data->{offset} + $t1_length); } }
7769
7770
47
58
    $e2->delete;
7771
7772
47
100
    return $e1;
7773   }
7774
7775 sub merge
7776
5
5
4
  { my( $e1, $e2)= @_;
7777
5
8
    my @e2_children= $e2->_children;
7778
5
50
40
10
    if( $e1->_last_child && $e1->_last_child->is_pcdata
7779         && @e2_children && $e2_children[0]->is_pcdata
7780       )
7781
4
5
      { my $t1_length= length( $e1->_last_child->{pcdata});
7782
4
5
        my $child1= $e1->_last_child;
7783
4
6
        my $child2= shift @e2_children;
7784
4
6
        $child1->{pcdata} .= $child2->{pcdata};
7785
7786
4
8
        my $extra_data= $e1->_extra_data_before_end_tag . $e2->extra_data;
7787
7788
4
100
8
        if( $extra_data)
7789
1
2
          { $e1->_del_extra_data_before_end_tag;
7790
1
2
            $child1->_push_extra_data_in_pcdata( $extra_data, $t1_length);
7791           }
7792
7793
4
100
6
        if( my $extra_data_in_pcdata= $child2->_extra_data_in_pcdata)
7794
1
1
1
2
          { foreach my $data (@$extra_data_in_pcdata) { $child1->_push_extra_data_in_pcdata( $data->{text}, $data->{offset} + $t1_length); } }
7795
7796
4
100
5
        if( my $extra_data_before_end_tag= $e2->_extra_data_before_end_tag)
7797
1
2
          { $e1->_set_extra_data_before_end_tag( $extra_data_before_end_tag); }
7798       }
7799
7800
5
1
8
2
    foreach my $e (@e2_children) { $e->move( last_child => $e1); }
7801
7802
5
8
    $e2->delete;
7803
5
8
    return $e1;
7804   }
7805
7806
7807 # recursively copy an element and returns the copy (can be huge and long)
7808 sub copy
7809
86
86
57
  { my $elt= shift;
7810
86
117
    my $copy= $elt->new( $XML::Twig::index2gi[$elt->{'gi'}]);
7811
7812
86
4
100
101
4
    if( $elt->extra_data) { $copy->set_extra_data( $elt->extra_data); }
7813
86
7
100
114
9
    if( $elt->{extra_data_before_end_tag}) { $copy->_set_extra_data_before_end_tag( $elt->{extra_data_before_end_tag}); }
7814
7815
86
2
100
96
3
    if( $elt->is_asis) { $copy->set_asis; }
7816
7817
86
100
100
100
100
100
228
    if( (exists $elt->{'pcdata'}))
7818
36
33
142
      { $copy->{pcdata}= (delete $copy->{empty} || 1) && $elt->{pcdata};
7819
36
7
100
49
10
        if( $elt->{extra_data_in_pcdata}) { $copy->_set_extra_data_in_pcdata( $elt->{extra_data_in_pcdata}); }
7820       }
7821     elsif( (exists $elt->{'cdata'}))
7822
2
3
      { $copy->_set_cdata( $elt->{cdata});
7823
2
0
50
3
0
        if( $elt->{extra_data_in_pcdata}) { $copy->_set_extra_data_in_pcdata( $elt->{extra_data_in_pcdata}); }
7824       }
7825     elsif( (exists $elt->{'target'}))
7826
1
2
      { $copy->_set_pi( $elt->{target}, $elt->{data}); }
7827     elsif( (exists $elt->{'comment'}))
7828
1
2
      { $copy->_set_comment( $elt->{comment}); }
7829     elsif( (exists $elt->{'ent'}))
7830
1
2
      { $copy->{ent}= $elt->{ent}; }
7831     else
7832
45
45
45
45
45
68
68
45
28
32
39
34
60
51
77
74
      { my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; };
7833
45
100
70
        if( my $atts= $elt->{att})
7834
40
22
          { my %atts;
7835
40
100
40
            tie %atts, 'Tie::IxHash' if (keep_atts_order());
7836
40
40
46
61
            %atts= %{$atts}; # we want to do a real copy of the attributes
7837
40
110
            $copy->set_atts( \%atts);
7838           }
7839
45
70
        foreach my $child (@children)
7840
68
109
          { my $child_copy= $child->copy;
7841
68
74
            $child_copy->paste( 'last_child', $copy);
7842           }
7843       }
7844     # save links to the original location, which can be convenient and is used for namespace resolution
7845
86
80
    foreach my $link ( qw(parent prev_sibling next_sibling) )
7846
258
287
      { $copy->{former}->{$link}= $elt->{$link};
7847
258
258
50
248
321
        if( $XML::Twig::weakrefs) { weaken( $copy->{former}->{$link}); }
7848       }
7849
7850
86
81
    $copy->{empty}= $elt->{'empty'};
7851
7852
86
94
    return $copy;
7853   }
7854
7855
7856 sub delete
7857
3460
2302
  { my $elt= shift;
7858
3460
3872
    $elt->cut;
7859
3460
100
4102
    $elt->DESTROY unless $XML::Twig::weakrefs;
7860
3460
31576
    return undef;
7861   }
7862
7863 sub __destroy
7864
20368
13177
  { my $elt= shift;
7865
20368
100
20635
    return if( $XML::Twig::weakrefs);
7866
20111
100
22617
    my $t= shift || $elt->twig; # optional argument, passed in recursive calls
7867
7868
20111
20111
18070
12077
17548
14685
    foreach( @{[$elt->_children]}) { $_->DESTROY( $t); }
7869
7870     # the id reference needs to be destroyed
7871     # lots of tests to avoid warnings during the cleanup phase
7872
20111
100
80
59309
    $elt->del_id( $t) if( $ID && $t && defined( $elt->{att}) && exists( $elt->{att}->{$ID}));
7873
20111
2
2
6
2
100
19897
2
4
6
3
    if( $elt->{former}) { foreach (keys %{$elt->{former}}) { delete $elt->{former}->{$_}; } delete $elt->{former}; }
7874
20111
40222
14122
33227
    foreach (qw( keys %$elt)) { delete $elt->{$_}; }
7875
20111
21673
    undef $elt;
7876   }
7877
7878 BEGIN
7879
194
187
7
100
194
485
259317
22
{ sub set_destroy { if( $XML::Twig::weakrefs) { undef *DESTROY } else { *DESTROY= *__destroy; } }
7880
187
187
231526
  set_destroy();
7881 }
7882
7883 # ignores the element
7884 sub ignore
7885
30
30
48
  { my $elt= shift;
7886
30
34
    my $t= $elt->twig;
7887
30
46
    $t->ignore( $elt, @_);
7888   }
7889
7890 BEGIN {
7891
187
187
303
  my $pretty = 0;
7892
187
238
  my $quote = '"';
7893
187
180
  my $INDENT = ' ';
7894
187
155
  my $empty_tag_style = 0;
7895
187
192
  my $remove_cdata = 0;
7896
187
177
  my $keep_encoding = 0;
7897
187
171
  my $expand_external_entities = 0;
7898
187
163
  my $keep_atts_order = 0;
7899
187
248
  my $do_not_escape_amp_in_atts = 0;
7900
187
214
  my $WRAP = '80';
7901
187
194
  my $REPLACED_ENTS = qq{&<};
7902
7903
187
337
  my ($NSGMLS, $NICE, $INDENTED, $INDENTEDCT, $INDENTEDC, $WRAPPED, $RECORD1, $RECORD2, $INDENTEDA)= (1..9);
7904
187
935
239
1429
  my %KEEP_TEXT_TAG_ON_ONE_LINE= map { $_ => 1 } ( $INDENTED, $INDENTEDCT, $INDENTEDC, $INDENTEDA, $WRAPPED);
7905
187
561
264
646
  my %WRAPPED = map { $_ => 1 } ( $WRAPPED, $INDENTEDA, $INDENTEDC);
7906
7907
187
995
  my %pretty_print_style=
7908     ( none => 0, # no added \n
7909       nsgmls => $NSGMLS, # nsgmls-style, \n in tags
7910       # below this line styles are UNSAFE (the generated XML can be well-formed but invalid)
7911       nice => $NICE, # \n after open/close tags except when the
7912                                 # element starts with text
7913       indented => $INDENTED, # nice plus idented
7914       indented_close_tag => $INDENTEDCT, # nice plus idented
7915       indented_c => $INDENTEDC, # slightly more compact than indented (closing
7916                                 # tags are on the same line)
7917       wrapped => $WRAPPED, # text is wrapped at column
7918       record_c => $RECORD1, # for record-like data (compact)
7919       record => $RECORD2, # for record-like data (not so compact)
7920       indented_a => $INDENTEDA, # nice, indented, and with attributes on separate
7921                                 # lines as the nsgmls style, as well as wrapped
7922                                 # lines - to make the xml friendly to line-oriented tools
7923       cvs => $INDENTEDA, # alias for indented_a
7924     );
7925
7926
187
203
  my ($HTML, $EXPAND)= (1..2);
7927
187
410
  my %empty_tag_style=
7928     ( normal => 0, # <tag/>
7929       html => $HTML, # <tag />
7930       xhtml => $HTML, # <tag />
7931       expand => $EXPAND, # <tag></tag>
7932     );
7933
7934
187
354
  my %quote_style=
7935     ( double => '"',
7936       single => "'",
7937       # smart => "smart",
7938     );
7939
7940
187
168
  my $xml_space_preserve; # set when an element includes xml:space="preserve"
7941
7942
187
143
  my $output_filter; # filters the entire output (including < and >)
7943
187
161
  my $output_text_filter; # filters only the text part (tag names, attributes, pcdata)
7944
7945
187
177
  my $replaced_ents= $REPLACED_ENTS;
7946
7947
7948   # returns those pesky "global" variables so you can switch between twigs
7949   sub global_state ## no critic (Subroutines::ProhibitNestedSubs);
7950     { return
7951
11
11
97
       { pretty => $pretty,
7952          quote => $quote,
7953          indent => $INDENT,
7954          empty_tag_style => $empty_tag_style,
7955          remove_cdata => $remove_cdata,
7956          keep_encoding => $keep_encoding,
7957          expand_external_entities => $expand_external_entities,
7958          output_filter => $output_filter,
7959          output_text_filter => $output_text_filter,
7960          keep_atts_order => $keep_atts_order,
7961          do_not_escape_amp_in_atts => $do_not_escape_amp_in_atts,
7962          wrap => $WRAP,
7963          replaced_ents => $replaced_ents,
7964         };
7965     }
7966
7967   # restores the global variables
7968   sub set_global_state
7969
23
23
30
    { my $state= shift;
7970
23
28
      $pretty = $state->{pretty};
7971
23
23
      $quote = $state->{quote};
7972
23
28
      $INDENT = $state->{indent};
7973
23
25
      $empty_tag_style = $state->{empty_tag_style};
7974
23
25
      $remove_cdata = $state->{remove_cdata};
7975
23
26
      $keep_encoding = $state->{keep_encoding};
7976
23
21
      $expand_external_entities = $state->{expand_external_entities};
7977
23
21
      $output_filter = $state->{output_filter};
7978
23
37
      $output_text_filter = $state->{output_text_filter};
7979
23
19
      $keep_atts_order = $state->{keep_atts_order};
7980
23
20
      $do_not_escape_amp_in_atts = $state->{do_not_escape_amp_in_atts};
7981
23
21
      $WRAP = $state->{wrap};
7982
23
40
      $replaced_ents = $state->{replaced_ents},
7983     }
7984
7985   # sets global state to defaults
7986   sub init_global_state
7987
6
6
127
    { set_global_state(
7988        { pretty => 0,
7989          quote => '"',
7990          indent => $INDENT,
7991          empty_tag_style => 0,
7992          remove_cdata => 0,
7993          keep_encoding => 0,
7994          expand_external_entities => 0,
7995          output_filter => undef,
7996          output_text_filter => undef,
7997          keep_atts_order => undef,
7998          do_not_escape_amp_in_atts => 0,
7999          wrap => $WRAP,
8000          replaced_ents => $REPLACED_ENTS,
8001         });
8002     }
8003
8004
8005   # set the pretty_print style (in $pretty) and returns the old one
8006   # can be called from outside the package with 2 arguments (elt, style)
8007   # or from inside with only one argument (style)
8008   # the style can be either a string (one of the keys of %pretty_print_style
8009   # or a number (presumably an old value saved)
8010   sub set_pretty_print
8011
2067
100
2519
    { my $style= lc( defined $_[1] ? $_[1] : $_[0]); # so we cover both cases
8012
2067
1379
      my $old_pretty= $pretty;
8013
2067
100
4080
      if( $style=~ /^\d+$/)
8014
1945
100
2724
        { croak "invalid pretty print style $style" unless( $style < keys %pretty_print_style);
8015
1944
1314
          $pretty= $style;
8016         }
8017       else
8018
122
100
304
        { croak "invalid pretty print style '$style'" unless( exists $pretty_print_style{$style});
8019
121
132
          $pretty= $pretty_print_style{$style};
8020         }
8021
2065
100
2392
      if( $WRAPPED{$pretty} )
8022
7
50
10
        { XML::Twig::_use( 'Text::Wrap') or croak( "Text::Wrap not available, cannot use style $style"); }
8023
2065
2859
      return $old_pretty;
8024     }
8025
8026
2
2
4
  sub _pretty_print { return $pretty; }
8027
8028   # set the empty tag style (in $empty_tag_style) and returns the old one
8029   # can be called from outside the package with 2 arguments (elt, style)
8030   # or from inside with only one argument (style)
8031   # the style can be either a string (one of the keys of %empty_tag_style
8032   # or a number (presumably an old value saved)
8033   sub set_empty_tag_style
8034
141
100
141
318
    { my $style= lc( defined $_[1] ? $_[1] : $_[0]); # so we cover both cases
8035
141
138
      my $old_style= $empty_tag_style;
8036
141
100
445
      if( $style=~ /^\d+$/)
8037
25
100
119
        { croak "invalid empty tag style $style"
8038         unless( $style < keys %empty_tag_style);
8039
24
17
        $empty_tag_style= $style;
8040         }
8041       else
8042
116
100
289
        { croak "invalid empty tag style '$style'"
8043             unless( exists $empty_tag_style{$style});
8044
115
163
          $empty_tag_style= $empty_tag_style{$style};
8045         }
8046
139
187
      return $old_style;
8047     }
8048
8049   sub _pretty_print_styles
8050
27
718
100
27
102
872
    { return (sort { $pretty_print_style{$a} <=> $pretty_print_style{$b} || $a cmp $b } keys %pretty_print_style); }
8051
8052   sub set_quote
8053
3115
33
3904
    { my $style= $_[1] || $_[0];
8054
3115
2003
      my $old_quote= $quote;
8055
3115
100
3824
      croak "invalid quote '$style'" unless( exists $quote_style{$style});
8056
3114
2408
      $quote= $quote_style{$style};
8057
3114
2409
      return $old_quote;
8058     }
8059
8060   sub set_remove_cdata
8061
3115
50
3054
    { my $new_value= defined $_[1] ? $_[1] : $_[0];
8062
3115
1935
      my $old_value= $remove_cdata;
8063
3115
1577
      $remove_cdata= $new_value;
8064
3115
2240
      return $old_value;
8065     }
8066
8067
8068   sub set_indent
8069
5
100
5
9
    { my $new_value= defined $_[1] ? $_[1] : $_[0];
8070
5
7
      my $old_value= $INDENT;
8071
5
7
      $INDENT= $new_value;
8072
5
6
      return $old_value;
8073     }
8074
8075   sub set_wrap
8076
9
100
9
117
    { my $new_value= defined $_[1] ? $_[1] : $_[0];
8077
9
5
      my $old_value= $WRAP;
8078
9
5
      $WRAP= $new_value;
8079
9
19
      return $old_value;
8080     }
8081
8082
8083   sub set_keep_encoding
8084
3152
50
3379
    { my $new_value= defined $_[1] ? $_[1] : $_[0];
8085
3152
2016
      my $old_value= $keep_encoding;
8086
3152
1832
      $keep_encoding= $new_value;
8087
3152
2639
      return $old_value;
8088    }
8089
8090   sub set_replaced_ents
8091
1
50
1
3
    { my $new_value= defined $_[1] ? $_[1] : $_[0];
8092
1
1
      my $old_value= $replaced_ents;
8093
1
1
      $replaced_ents= $new_value;
8094
1
2
      return $old_value;
8095    }
8096
8097   sub do_not_escape_gt
8098
1
1
1
    { my $old_value= $replaced_ents;
8099
1
1
      $replaced_ents= q{&<}; # & needs to be first
8100
1
1
      return $old_value;
8101     }
8102
8103   sub escape_gt
8104
2
2
2
    { my $old_value= $replaced_ents;
8105
2
1
      $replaced_ents= qq{&<>}; # & needs to be first
8106
2
2
      return $old_value;
8107     }
8108
8109
4
4
16
  sub _keep_encoding { return $keep_encoding; } # so I can use elsewhere in the module
8110
8111   sub set_do_not_escape_amp_in_atts
8112
3130
50
3459
    { my $new_value= defined $_[1] ? $_[1] : $_[0];
8113
3130
2334
      my $old_value= $do_not_escape_amp_in_atts;
8114
3130
1871
      $do_not_escape_amp_in_atts= $new_value;
8115
3130
2288
      return $old_value;
8116    }
8117
8118
162
162
215
  sub output_filter { return $output_filter; }
8119
2
2
2
  sub output_text_filter { return $output_text_filter; }
8120
8121   sub set_output_filter
8122
3138
50
3197
    { my $new_value= defined $_[1] ? $_[1] : $_[0]; # can be called in object/non-object mode
8123       # if called in object mode with no argument, the filter is undefined
8124
3138
1
100
67
9131
2
      if( isa( $new_value, 'XML::Twig::Elt') || isa( $new_value, 'XML::Twig')) { undef $new_value; }
8125
3138
1855
      my $old_value= $output_filter;
8126
3138
100
100
100
100
4608
      if( !$new_value || isa( $new_value, 'CODE') )
8127
3131
1999
        { $output_filter= $new_value; }
8128       elsif( $new_value eq 'latin1')
8129
1
2
        { $output_filter= XML::Twig::latin1();
8130         }
8131       elsif( $XML::Twig::filter{$new_value})
8132
5
4
        { $output_filter= $XML::Twig::filter{$new_value}; }
8133       else
8134
1
74
        { croak "invalid output filter '$new_value'"; }
8135
8136
3137
2712
      return $old_value;
8137     }
8138
8139   sub set_output_text_filter
8140
3124
100
3107
    { my $new_value= defined $_[1] ? $_[1] : $_[0]; # can be called in object/non-object mode
8141       # if called in object mode with no argument, the filter is undefined
8142
3124
1
100
67
8255
1
      if( isa( $new_value, 'XML::Twig::Elt') || isa( $new_value, 'XML::Twig')) { undef $new_value; }
8143
3124
1840
      my $old_value= $output_text_filter;
8144
3124
100
50
100
100
4091
      if( !$new_value || isa( $new_value, 'CODE') )
8145
3116
1922
        { $output_text_filter= $new_value; }
8146       elsif( $new_value eq 'latin1')
8147
0
0
        { $output_text_filter= XML::Twig::latin1();
8148         }
8149       elsif( $XML::Twig::filter{$new_value})
8150
7
7
        { $output_text_filter= $XML::Twig::filter{$new_value}; }
8151       else
8152
1
75
        { croak "invalid output text filter '$new_value'"; }
8153
8154
3123
2363
      return $old_value;
8155     }
8156
8157   sub set_expand_external_entities
8158
3131
100
3958
    { my $new_value= defined $_[1] ? $_[1] : $_[0];
8159
3131
2107
      my $old_value= $expand_external_entities;
8160
3131
2004
      $expand_external_entities= $new_value;
8161
3131
3231
      return $old_value;
8162     }
8163
8164   sub set_keep_atts_order
8165
3116
50
3102
    { my $new_value= defined $_[1] ? $_[1] : $_[0];
8166
3116
1822
      my $old_value= $keep_atts_order;
8167
3116
1848
      $keep_atts_order= $new_value;
8168
3116
2130
      return $old_value;
8169
8170    }
8171
8172
43759
54976
  sub keep_atts_order { return $keep_atts_order; } # so I can use elsewhere in the module
8173
8174
187
179
  my %html_empty_elt;
8175
187
1870
187
397
573647
  BEGIN { %html_empty_elt= map { $_ => 1} qw( base meta link hr br param img area input col); }
8176
8177   sub start_tag
8178
5251
4063
    { my( $elt, $option)= @_;
8179
8180
8181
5251
100
6034
      return if( $elt->{gi} < $XML::Twig::SPECIAL_GI);
8182
8183
5250
100
8917
      my $extra_data= $elt->{extra_data} || '';
8184
8185
5250
4469
      my $gi= $XML::Twig::index2gi[$elt->{'gi'}];
8186
5250
3471
      my $att= $elt->{att}; # should be $elt->{att}, optimized into a pure hash look-up
8187
8188
5250
50
5371
      my $ns_map= $att ? $att->{'#original_gi'} : '';
8189
5250
11
100
5147
12
      if( $ns_map) { $gi= _restore_original_prefix( $ns_map, $gi); }
8190
5250
4005
      $gi=~ s{^#default:}{}; # remove default prefix
8191
8192
5250
17
100
5405
57
      if( $output_text_filter) { $gi= $output_text_filter->( $gi); }
8193
8194       # get the attribute and their values
8195
5250
100
100
168186
      my $att_sep = $pretty==$NSGMLS ? "\n"
8196                   : $pretty==$INDENTEDA ? "\n" . $INDENT x ($elt->level+1) . ' '
8197                   : ' '
8198                   ;
8199
8200
5250
4774
      my $replace_in_att_value= $replaced_ents . "$quote\t\r\n";
8201
5250
37
100
67
7453
31
      if( $option->{escape_gt} && $replaced_ents !~ m{>}) { $replace_in_att_value.= '>'; }
8202
8203
5250
2692
      my $tag;
8204
5250
1338
57
5193
100
4353
3383
136
9479
      my @att_names= grep { !( $_=~ m{^#(?!default:)} ) } $keep_atts_order ? keys %{$att} : sort keys %{$att};
8205
5250
100
5570
      if( @att_names)
8206
787
1319
100
655
1275
        { my $atts= join $att_sep, map { my $output_att_name= $ns_map ? _restore_original_prefix( $ns_map, $_) : $_;
8207
1319
100
1382
                                          if( $output_text_filter)
8208
4
5
                                            { $output_att_name= $output_text_filter->( $output_att_name); }
8209
1319
2359
                                          $output_att_name . '=' . $quote . _att_xml_string( $att->{$_}, $replace_in_att_value) . $quote
8210
8211                                         }
8212                                         @att_names
8213                                    ;
8214
787
1
100
100
1183
2
           if( $pretty==$INDENTEDA && @att_names == 1) { $att_sep= ' '; }
8215
787
952
           $tag= "<$gi$att_sep$atts";
8216         }
8217       else
8218
4463
3522
        { $tag= "<$gi"; }
8219
8220
5250
100
5634
      $tag .= "\n" if($pretty==$NSGMLS);
8221
8222
8223       # force empty if suitable HTML tag, otherwise use the value from the input tree
8224
5250
100
100
7741
      if( ($empty_tag_style eq $HTML) && !$elt->{first_child} && !$elt->{extra_data_before_end_tag} && $html_empty_elt{$gi})
8225
36
38
        { $elt->{empty}= 1; }
8226
5250
50
100
6054
      my $empty= defined $elt->{empty} ? $elt->{empty}
8227                : $elt->{first_child} ? 0
8228                : 1;
8229
8230
5250
100
100
100
100
100
67
67
100
100
13218
      $tag .= (!$elt->{empty} || $elt->{extra_data_before_end_tag}) ? '>' # element has content
8231             : (($empty_tag_style eq $HTML) && $html_empty_elt{$gi}) ? ' />' # html empty element
8232                                                                                      # cvs-friendly format
8233             : ( $pretty == $INDENTEDA && @att_names > 1) ? "\n" . $INDENT x $elt->level . "/>"
8234             : ( $pretty == $INDENTEDA && @att_names == 1) ? " />"
8235             : $empty_tag_style ? "></" . $XML::Twig::index2gi[$elt->{'gi'}] . ">" # $empty_tag_style is $HTML or $EXPAND
8236             : '/>'
8237             ;
8238
8239
5250
3
100
67
10259
3
      if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )) { $tag= ''; }
8240
8241 #warn "TRACE: ", $tag,": ", Encode::is_utf8( $tag) ? "has flag" : "FLAG NOT SET";
8242
8243
5250
4726
50
100
5165
12603
      unless( $pretty) { return defined( $extra_data) ? $extra_data . $tag : $tag; }
8244
8245
524
364
      my $prefix='';
8246
524
285
      my $return=''; # '' or \n is to be printed before the tag
8247
524
309
      my $indent=0; # number of indents before the tag
8248
8249
524
100
100
100
100
1140
      if( $pretty==$RECORD1)
8250
29
24
        { my $level= $elt->level;
8251
29
100
32
          $return= "\n" if( $level < 2);
8252
29
100
32
          $indent= 1 if( $level == 1);
8253         }
8254
8255      elsif( $pretty==$RECORD2)
8256
4
2
        { $return= "\n";
8257
4
5
          $indent= $elt->level;
8258         }
8259
8260       elsif( $pretty==$NICE)
8261
3
3
        { my $parent= $elt->{parent};
8262
3
100
67
8
          unless( !$parent || $parent->{contains_text})
8263
2
1
            { $return= "\n"; }
8264
3
100
80
10
          $elt->{contains_text}= 1 if( ($parent && $parent->{contains_text})
8265                                      || $elt->contains_text);
8266         }
8267
8268       elsif( $KEEP_TEXT_TAG_ON_ONE_LINE{$pretty})
8269
483
466
        { my $parent= $elt->{parent};
8270
483
100
100
1057
          unless( !$parent || $parent->{contains_text})
8271
393
235
            { $return= "\n";
8272
393
437
              $indent= $elt->level;
8273             }
8274
483
100
100
1345
          $elt->{contains_text}= 1 if( ($parent && $parent->{contains_text})
8275                                      || $elt->contains_text);
8276         }
8277
8278
524
100
67
991
      if( $return || $indent)
8279         { # check for elements in which spaces should be kept
8280
410
442
          my $t= $elt->twig;
8281
410
50
500
          return $extra_data . $tag if( $xml_space_preserve);
8282
410
100
67
885
          if( $t && $t->{twig_keep_spaces_in})
8283
3
5
            { foreach my $ancestor ($elt->ancestors)
8284
3
50
8
                { return $extra_data . $tag if( $t->{twig_keep_spaces_in}->{$XML::Twig::index2gi[$ancestor->{'gi'}]}) }
8285             }
8286
8287
410
373
          $prefix= $INDENT x $indent;
8288
410
100
457
          if( $extra_data)
8289
6
31
            { $extra_data=~ s{\s+$}{};
8290
6
11
              $extra_data=~ s{^\s+}{};
8291
6
12
              $extra_data= $prefix . $extra_data . $return;
8292             }
8293         }
8294
8295
8296
524
1270
      return $return . $extra_data . $prefix . $tag;
8297     }
8298
8299   sub end_tag
8300
5200
3300
    { my $elt= shift;
8301
5200
100
25
13270
      return '' if( ($elt->{gi}<$XML::Twig::SPECIAL_GI)
8302                      || ($elt->{'empty'} && !$elt->{extra_data_before_end_tag})
8303                    );
8304
4048
2600
      my $tag= "<";
8305
4048
3869
      my $gi= $XML::Twig::index2gi[$elt->{'gi'}];
8306
8307
4048
11
100
5117
12
      if( my $map= $elt->{'att'}->{'#original_gi'}) { $gi= _restore_original_prefix( $map, $gi); }
8308
4048
2628
      $gi=~ s{^#default:}{}; # remove default prefix
8309
8310
4048
16
100
3786
53
      if( $output_text_filter) { $gi= $output_text_filter->( $XML::Twig::index2gi[$elt->{'gi'}]); }
8311
4048
4544
      $tag .= "/$gi>";
8312
8313
4048
100
8776
      $tag = ($elt->{extra_data_before_end_tag} || '') . $tag;
8314
8315
4048
3
100
67
7136
2
      if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )) { $tag= ''; }
8316
8317
4048
100
7525
      return $tag unless $pretty;
8318
8319
420
278
      my $prefix='';
8320
420
237
      my $return=0; # 1 if a \n is to be printed before the tag
8321
420
237
      my $indent=0; # number of indents before the tag
8322
8323
420
100
100
100
100
2036
      if( $pretty==$RECORD1)
8324
29
100
20
        { $return= 1 if( $elt->level == 0);
8325         }
8326
8327      elsif( $pretty==$RECORD2)
8328
4
100
5
        { unless( $elt->contains_text)
8329
2
3
            { $return= 1 ;
8330
2
2
              $indent= $elt->level;
8331             }
8332         }
8333
8334       elsif( $pretty==$NICE)
8335
3
4
        { my $parent= $elt->{parent};
8336
3
50
40
25
19
          if( ( ($parent && !$parent->{contains_text}) || !$parent )
8337             && ( !$elt->{contains_text}
8338              && ($elt->{has_flushed_child} || $elt->{first_child})
8339            )
8340          )
8341
1
1
            { $return= 1; }
8342         }
8343
8344       elsif( $KEEP_TEXT_TAG_ON_ONE_LINE{$pretty})
8345
380
258
        { my $parent= $elt->{parent};
8346
380
100
60
50
6471
          if( ( ($parent && !$parent->{contains_text}) || !$parent )
8347             && ( !$elt->{contains_text}
8348              && ($elt->{has_flushed_child} || $elt->{first_child})
8349            )
8350          )
8351
141
87
            { $return= 1;
8352
141
168
              $indent= $elt->level;
8353             }
8354         }
8355
8356
420
100
67
892
      if( $return || $indent)
8357         { # check for elements in which spaces should be kept
8358
146
193
          my $t= $elt->twig;
8359
146
50
177
          return $tag if( $xml_space_preserve);
8360
146
100
67
383
          if( $t && $t->{twig_keep_spaces_in})
8361
1
1
            { foreach my $ancestor ($elt, $elt->ancestors)
8362
1
50
3
                { return $tag if( $t->{twig_keep_spaces_in}->{$XML::Twig::index2gi[$ancestor->{'gi'}]}) }
8363             }
8364
8365
146
146
100
50
178
197
          if( $return) { $prefix= ($pretty== $INDENTEDCT) ? "\n$INDENT" : "\n"; }
8366
146
151
          $prefix.= $INDENT x $indent;
8367     }
8368
8369       # add a \n at the end of the document (after the root element)
8370
420
100
488
      $tag .= "\n" unless( $elt->{parent});
8371
8372
420
654
      return $prefix . $tag;
8373     }
8374
8375   sub _restore_original_prefix
8376
33
33
24
    { my( $map, $name)= @_;
8377
33
25
      my $prefix= _ns_prefix( $name);
8378
33
100
49
      if( my $original_prefix= $map->{$prefix})
8379
26
100
28
        { if( $original_prefix eq '#default')
8380
8
28
            { $name=~ s{^$prefix:}{}; }
8381           else
8382
18
114
            { $name=~ s{^$prefix(?=:)}{$original_prefix}; }
8383         }
8384
33
43
      return $name;
8385     }
8386
8387   # buffer used to hold the text to print/sprint, to avoid passing it back and forth between methods
8388
187
166
  my @sprint;
8389
8390   # $elt is an element to print
8391   # $fh is an optional filehandle to print to
8392   # $pretty is an optional value, if true a \n is printed after the < of the
8393   # opening tag
8394   sub print
8395
112
112
145
    { my $elt= shift;
8396
8397
112
100
67
530
      my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef;
8398
112
100
208
      my $old_select= defined $fh ? select $fh : undef;
8399
112
235
      print $elt->sprint( @_);
8400
112
100
508
      select $old_select if( defined $old_select);
8401     }
8402
8403
8404 # those next 2 methods need to be refactored, they are copies of the same methods in XML::Twig
8405 sub print_to_file
8406
2
2
2
  { my( $elt, $filename)= (shift, shift);
8407
2
2
    my $out_fh;
8408 # open( $out_fh, ">$filename") or _croak( "cannot create file $filename: $!"); # < perl 5.8
8409
2
50
6
    my $mode= $keep_encoding ? '>' : '>:utf8'; # >= perl 5.8
8410
2
50
47
    open( $out_fh, $mode, $filename) or _croak( "cannot create file $filename: $!"); # >= perl 5.8
8411
2
8
    $elt->print( $out_fh, @_);
8412
2
82
    close $out_fh;
8413
2
12
    return $elt;
8414   }
8415
8416 # probably only works on *nix (at least the chmod bit)
8417 # first print to a temporary file, then rename that file to the desired file name, then change permissions
8418 # to the original file permissions (or to the current umask)
8419 sub safe_print_to_file
8420
1
1
2
  { my( $elt, $filename)= (shift, shift);
8421
1
50
9
    my $perm= -f $filename ? (stat $filename)[2] & 07777 : ~umask() ;
8422
1
50
2
    XML::Twig::_use( 'File::Temp') || croak "need File::Temp to use safe_print_to_file\n";
8423
1
50
2
    XML::Twig::_use( 'File::Basename') || croak "need File::Basename to use safe_print_to_file\n";
8424
1
107
    my $tmpdir= File::Basename::dirname( $filename);
8425
1
7
    my( $fh, $tmpfilename) = File::Temp::tempfile( DIR => $tmpdir);
8426
1
263
    $elt->print_to_file( $tmpfilename, @_);
8427
1
50
0
16
    rename( $tmpfilename, $filename) or unlink $tmpfilename && _croak( "cannot move temporary file to $filename: $!");
8428
1
4
    chmod $perm, $filename;
8429
1
6
    return $elt;
8430   }
8431
8432
8433   # same as print but does not output the start tag if the element
8434   # is marked as flushed
8435   sub flush
8436
18
18
42
    { my $elt= shift;
8437
18
50
67
84
      my $up_to= $_[0] && isa( $_[0], 'XML::Twig::Elt') ? shift : $elt;
8438
18
75
      $elt->twig->flush_up_to( $up_to, @_);
8439     }
8440   sub purge
8441
4
4
5
    { my $elt= shift;
8442
4
100
67
10
      my $up_to= $_[0] && isa( $_[0], 'XML::Twig::Elt') ? shift : $elt;
8443
4
5
      $elt->twig->purge_up_to( $up_to, @_);
8444     }
8445
8446   sub _flush
8447
2130
1366
    { my $elt= shift;
8448
8449
2130
1132
      my $pretty;
8450
2130
50
33
8432
      my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef;
8451
2130
50
2200
      my $old_select= defined $fh ? select $fh : undef;
8452
2130
100
2383
      my $old_pretty= defined ($pretty= shift) ? set_pretty_print( $pretty) : undef;
8453
8454
2130
50
50
2300
      $xml_space_preserve= 1 if( ($elt->inherit_att( 'xml:space') || '') eq 'preserve');
8455
8456
2130
2434
      $elt->__flush();
8457
8458
2130
4950
      $xml_space_preserve= 0;
8459
8460
2130
50
2112
      select $old_select if( defined $old_select);
8461
2130
100
2652
      set_pretty_print( $old_pretty) if( defined $old_pretty);
8462     }
8463
8464   sub __flush
8465
2130
1291
    { my $elt= shift;
8466
8467
2130
100
2415
      if( $elt->{gi} >= $XML::Twig::SPECIAL_GI)
8468
1477
50
2842
        { my $preserve= ($elt->{'att'}->{'xml:space'} || '') eq 'preserve';
8469
1477
50
1471
          $xml_space_preserve++ if $preserve;
8470
1477
100
1319
          unless( $elt->_flushed)
8471
1383
1312
            { print $elt->start_tag();
8472             }
8473
8474           # flush the children
8475
1477
1477
1477
1477
1477
956
956
1477
10951
982
1179
1019
1644
730
1180
1589
          my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; };
8476
1477
1565
          foreach my $child (@children)
8477
956
1076
            { $child->_flush( $pretty); }
8478
1477
1477
50
2640
2649
          unless( $elt->{end_tag_flushed}) { print $elt->end_tag; }
8479
1477
50
10525
          $xml_space_preserve-- if $preserve;
8480           # used for pretty printing
8481
1477
575
100
2131
765
          if( my $parent= $elt->{parent}) { $parent->{has_flushed_child}= 1; }
8482         }
8483       else # text or special element
8484
653
413
        { my $text;
8485
653
500
100
100
100
50
0
941
572
          if( (exists $elt->{'pcdata'})) { $text= $elt->pcdata_xml_string;
8486
500
50
682
                                     if( my $parent= $elt->{parent})
8487
500
522
                                       { $parent->{contains_text}= 1; }
8488                                    }
8489
5
12
          elsif( (exists $elt->{'cdata'})) { $text= $elt->cdata_string;
8490
5
50
12
                                     if( my $parent= $elt->{parent})
8491
5
8
                                       { $parent->{contains_text}= 1; }
8492                                    }
8493
2
5
          elsif( (exists $elt->{'target'})) { $text= $elt->pi_string; }
8494
146
169
          elsif( (exists $elt->{'comment'})) { $text= $elt->comment_string; }
8495
0
0
          elsif( (exists $elt->{'ent'})) { $text= $elt->ent_string; }
8496
8497
653
50
1490
          print $output_filter ? $output_filter->( $text) : $text;
8498         }
8499     }
8500
8501
8502   sub xml_text
8503
26
26
33
    { my( $elt, @options)= @_;
8504
8505
26
1
1
100
67
42
4
2
      if( @options && grep { lc( $_) eq 'no_recurse' } @options) { return $elt->xml_text_only; }
8506
8507
25
19
      my $string='';
8508
8509
25
100
100
100
50
43
      if( ($elt->{gi} >= $XML::Twig::SPECIAL_GI) )
8510         { # sprint the children
8511
9
100
16
          my $child= $elt->{first_child} || '';
8512
9
12
          while( $child)
8513
14
23
            { $string.= $child->xml_text;
8514
14
23
            } continue { $child= $child->{next_sibling}; }
8515         }
8516
13
100
21
      elsif( (exists $elt->{'pcdata'})) { $string .= $output_filter ? $output_filter->($elt->pcdata_xml_string)
8517                                                            : $elt->pcdata_xml_string;
8518                                }
8519
1
50
3
      elsif( (exists $elt->{'cdata'})) { $string .= $output_filter ? $output_filter->($elt->cdata_string)
8520                                                            : $elt->cdata_string;
8521                                }
8522
2
3
      elsif( (exists $elt->{'ent'})) { $string .= $elt->ent_string; }
8523
8524
25
56
      return $string;
8525     }
8526
8527   sub xml_text_only
8528
1
5
100
100
1
2
5
    { return join '', map { $_->xml_text if( $_->is_text || (exists $_->{'ent'})) } $_[0]->_children; }
8529
8530   # same as print but except... it does not print but rather returns the string
8531   # if the second parameter is set then only the content is returned, not the
8532   # start and end tags of the element (but the tags of the included elements are
8533   # returned)
8534
8535   sub sprint
8536
3401
2595
    { my $elt= shift;
8537
3401
1916
      my( $old_pretty, $old_empty_tag_style);
8538
8539
3401
100
100
6403
      if( $_[0] && isa( $_[0], 'HASH'))
8540
7
7
8
16
        { my %args= XML::Twig::_normalize_args( %{shift()});
8541
7
4
100
12
7
          if( defined $args{PrettyPrint}) { $old_pretty = set_pretty_print( $args{PrettyPrint}); }
8542
7
3
100
11
4
           if( defined $args{EmptyTags}) { $old_empty_tag_style = set_empty_tag_style( $args{EmptyTags}); }
8543         }
8544
8545
3401
100
100
3821
      $xml_space_preserve= 1 if( ($elt->inherit_att( 'xml:space') || '') eq 'preserve');
8546
8547
3401
4029
      @sprint=();
8548
3401
3901
      $elt->_sprint( @_);
8549
3401
4178
      my $sprint= join( '', @sprint);
8550
3401
23
100
3415
289
      if( $output_filter) { $sprint= $output_filter->( $sprint); }
8551
8552
3401
100
40
8381
      if( ( ($pretty== $WRAPPED) || ($pretty==$INDENTEDC)) && !$xml_space_preserve)
8553
6
9
        { $sprint= _wrap_text( $sprint); }
8554
3401
2242
      $xml_space_preserve= 0;
8555
8556
8557
3401
4
100
3350
5
      if( defined $old_pretty) { set_pretty_print( $old_pretty); }
8558
3401
3
100
3123
3
      if( defined $old_empty_tag_style) { set_empty_tag_style( $old_empty_tag_style); }
8559
8560
3401
8494
      return $sprint;
8561     }
8562
8563   sub _wrap_text
8564
6
6
9
    { my( $string)= @_;
8565
6
6
      my $wrapped;
8566
6
45
      foreach my $line (split /\n/, $string)
8567
22
57
        { my( $initial_indent)= $line=~ m{^(\s*)};
8568
22
52
          my $wrapped_line= Text::Wrap::wrap( '', $initial_indent . $INDENT, $line) . "\n";
8569
8570           # fix glitch with Text::wrap when the first line is long and does not include spaces
8571           # the first line ends up being too short by 2 chars, but we'll have to live with it!
8572
22
71249
          $wrapped_line=~ s{^ +\n }{}s; # this prefix needs to be removed
8573
8574
22
44
          $wrapped .= $wrapped_line;
8575         }
8576
8577
6
12
      return $wrapped;
8578     }
8579
8580
8581   sub _sprint
8582
10272
5939
    { my $elt= shift;
8583
10272
100
15575
      my $no_tag= shift || 0;
8584       # in case there's some comments or PI's piggybacking
8585
8586
10272
100
9843
      if( $elt->{gi} >= $XML::Twig::SPECIAL_GI)
8587         {
8588
5284
100
9822
          my $preserve= ($elt->{'att'}->{'xml:space'} || '') eq 'preserve';
8589
5284
100
5563
          $xml_space_preserve++ if $preserve;
8590
8591
5284
100
7432
          push @sprint, $elt->start_tag unless( $no_tag);
8592
8593           # sprint the children
8594
5284
4158
          my $child= $elt->{first_child};
8595
5284
5634
          while( $child)
8596
6871
6734
            { $child->_sprint;
8597
6871
8339
              $child= $child->{next_sibling};
8598             }
8599
5284
100
7014
          push @sprint, $elt->end_tag unless( $no_tag);
8600
5284
100
6099
          $xml_space_preserve-- if $preserve;
8601         }
8602       else
8603
4988
100
5441
        { push @sprint, $elt->{extra_data} if( $elt->{extra_data}) ;
8604
4988
1779
100
100
100
100
50
7695
1890
          if( (exists $elt->{'pcdata'})) { push @sprint, $elt->pcdata_xml_string; }
8605
57
78
          elsif( (exists $elt->{'cdata'})) { push @sprint, $elt->cdata_string; }
8606
1483
1
100
100
1760
2
          elsif( (exists $elt->{'target'})) { if( ($pretty >= $INDENTED) && !$elt->{parent}->{contains_text}) { push @sprint, "\n" . $INDENT x $elt->level; }
8607
1483
1606
                                     push @sprint, $elt->pi_string;
8608                                    }
8609
1619
1
100
100
1956
2
          elsif( (exists $elt->{'comment'})) { if( ($pretty >= $INDENTED) && !$elt->{parent}->{contains_text}) { push @sprint, "\n" . $INDENT x $elt->level; }
8610
1619
1573
                                     push @sprint, $elt->comment_string;
8611                                    }
8612
50
61
          elsif( (exists $elt->{'ent'})) { push @sprint, $elt->ent_string; }
8613         }
8614
8615
10272
7002
      return;
8616     }
8617
8618   # just a shortcut to $elt->sprint( 1)
8619   sub xml_string
8620
9
9
11
    { my $elt= shift;
8621
9
100
31
      isa( $_[0], 'HASH') ? $elt->sprint( shift(), 1) : $elt->sprint( 1);
8622     }
8623
8624   sub pcdata_xml_string
8625
2293
1446
    { my $elt= shift;
8626
2293
100
2618
      if( defined( my $string= $elt->{pcdata}) )
8627         {
8628
2292
100
2114
          if( ! $elt->{extra_data_in_pcdata})
8629             {
8630
2207
100
75
10095
              $string=~ s/([$replaced_ents])/$XML::Twig::base_ent{$1}/g unless( !$replaced_ents || $keep_encoding || $elt->{asis});
8631
2207
2482
              $string=~ s{\Q]]>}{]]&gt;}g;
8632             }
8633           else
8634
85
108
            { _gen_mark( $string); # used by _(un)?protect_extra_data
8635
85
85
160
109
              foreach my $data (reverse @{$elt->{extra_data_in_pcdata}})
8636
122
163
                { my $substr= substr( $string, $data->{offset});
8637
122
100
67
261
                  if( $keep_encoding || $elt->{asis})
8638
30
43
                    { substr( $string, $data->{offset}, 0, $data->{text}); }
8639                   else
8640
92
113
                    { substr( $string, $data->{offset}, 0, _protect_extra_data( $data->{text})); }
8641                 }
8642
85
100
67
210
              unless( $keep_encoding || $elt->{asis})
8643                 {
8644
65
256
                  $string=~ s{([$replaced_ents])}{$XML::Twig::base_ent{$1}}g ;
8645
65
92
                  $string=~ s{\Q]]>}{]]&gt;}g;
8646
65
68
                  _unprotect_extra_data( $string);
8647                 }
8648             }
8649
2292
100
3921
          return $output_text_filter ? $output_text_filter->( $string) : $string;
8650         }
8651       else
8652
1
4
        { return ''; }
8653     }
8654
8655
187
187
176
167
  { my $mark;
8656
187
899513
    my( %char2ent, %ent2char);
8657     BEGIN
8658
187
187
657
      { %char2ent= ( '<' => 'lt', '&' => 'amp', '>' => 'gt');
8659
187
561
493
213660
        %ent2char= map { $char2ent{$_} => $_ } keys %char2ent;
8660       }
8661
8662     # generate a unique mark (a string) not found in the string,
8663     # used to mark < and & in the extra data
8664     sub _gen_mark
8665
85
85
54
      { $mark="AAAA";
8666
85
175
        $mark++ while( index( $_[0], $mark) > -1);
8667
85
67
        return $mark;
8668       }
8669
8670     sub _protect_extra_data
8671
92
92
78
      { my( $extra_data)= @_;
8672
92
642
        $extra_data=~ s{([<&>])}{:$mark:$char2ent{$1}:}g;
8673
92
268
        return $extra_data;
8674       }
8675
8676     sub _unprotect_extra_data
8677
65
65
656
      { $_[0]=~ s{:$mark:(\w+):}{$ent2char{$1}}g; }
8678
8679   }
8680
8681   sub cdata_string
8682
65
65
77
    { my $cdata= $_[0]->{cdata};
8683
65
1
100
83
3
      unless( defined $cdata) { return ''; }
8684
64
100
75
      if( $remove_cdata)
8685
1
16
        { $cdata=~ s/([$replaced_ents])/$XML::Twig::base_ent{$1}/g; }
8686       else
8687
63
94
        { $cdata= $CDATA_START . $cdata . $CDATA_END; }
8688
64
93
      return $cdata;
8689    }
8690
8691   sub att_xml_string
8692
1
1
1
    { my $elt= shift;
8693
1
2
      my $att= shift;
8694
8695
1
3
      my $replace= $replaced_ents . "$quote\n\r\t";
8696
1
0
0
25
5
0
      if($_[0] && $_[0]->{escape_gt} && ($replace!~ m{>}) ) { $replace .='>'; }
8697
8698
1
50
3
      if( defined (my $string= $elt->{att}->{$att}))
8699
0
0
        { return _att_xml_string( $string, $replace); }
8700       else
8701
1
5
        { return ''; }
8702     }
8703
8704   # escaped xml string for an attribute value
8705   sub _att_xml_string
8706
1319
1258
    { my( $string, $escape)= @_;
8707
1319
1
100
1978
3
      if( !defined( $string)) { return ''; }
8708
1318
100
1073
      if( $keep_encoding)
8709
181
220
        { $string=~ s{$quote}{$XML::Twig::base_ent{$quote}}g;
8710         }
8711       else
8712         {
8713
1137
100
856
          if( $do_not_escape_amp_in_atts)
8714
3
4
            { $escape=~ s{^.}{}; # seems like the most backward compatible way to remove & from the list
8715
3
14
              $string=~ s{([$escape])}{$XML::Twig::base_ent{$1}}g;
8716
3
7
              $string=~ s{&(?!(\w+|#\d+|[xX][0-9a-fA-F]+);)}{&amp;}g; # dodgy: escape & that do not start an entity
8717             }
8718           else
8719
1134
2872
            { $string=~ s{([$escape])}{$XML::Twig::base_ent{$1}}g;
8720
1134
1212
              $string=~ s{\Q]]>}{]]&gt;}g;
8721             }
8722         }
8723
8724
1318
100
3299
      return $output_text_filter ? $output_text_filter->( $string) : $string;
8725     }
8726
8727   sub ent_string
8728
52
52
34
    { my $ent= shift;
8729
52
46
      my $ent_text= $ent->{ent};
8730
52
35
      my( $t, $el, $ent_string);
8731
52
100
40
99
      if( $expand_external_entities
8732           && ($t= $ent->twig)
8733           && ($el= $t->entity_list)
8734           && ($ent_string= $el->{entities}->{$ent->ent_name}->{val})
8735         )
8736
9
15
        { return $ent_string; }
8737        else
8738
43
58
         { return $ent_text; }
8739     }
8740
8741   # returns just the text, no tags, for an element
8742   sub text
8743
3157
4458
    { my( $elt, @options)= @_;
8744
8745
3157
2
2
100
67
3810
8
3
      if( @options && grep { lc( $_) eq 'no_recurse' } @options) { return $elt->text_only; }
8746
8747
3155
1880
      my $string;
8748
8749
3155
1777
100
100
100
100
100
5614
3337
      if( (exists $elt->{'pcdata'})) { return $elt->{pcdata}; }
8750
12
31
      elsif( (exists $elt->{'cdata'})) { return $elt->{cdata}; }
8751
2
3
      elsif( (exists $elt->{'target'})) { return $elt->pi_string;}
8752
2
6
      elsif( (exists $elt->{'comment'})) { return $elt->{comment}; }
8753
2
4
      elsif( (exists $elt->{'ent'})) { return $elt->{ent} ; }
8754
8755
1360
100
1630
      my $child= $elt->{first_child} ||'';
8756
1360
1506
      while( $child)
8757         {
8758
1451
1435
          my $child_text= $child->text;
8759
1451
50
1947
          $string.= defined( $child_text) ? $child_text : '';
8760
1451
1880
        } continue { $child= $child->{next_sibling}; }
8761
8762
1360
26
100
1420
22
      unless( defined $string) { $string=''; }
8763
8764
1360
50
6978
      return $output_text_filter ? $output_text_filter->( $string) : $string;
8765     }
8766
8767   sub text_only
8768
4
12
100
100
4
10
13
    { return join '', map { $_->text if( $_->is_text || (exists $_->{'ent'})) } $_[0]->_children; }
8769
8770   sub trimmed_text
8771
14
14
12
    { my $elt= shift;
8772
14
20
      my $text= $elt->text( @_);
8773
14
57
      $text=~ s{\s+}{ }sg;
8774
14
22
      $text=~ s{^\s*}{};
8775
14
48
      $text=~ s{\s*$}{};
8776
14
42
      return $text;
8777     }
8778
8779   sub trim
8780
18
18
14
    { my( $elt)= @_;
8781
18
19
      my $pcdata= $elt->first_descendant( $TEXT);
8782
18
21
      (my $pcdata_text= $pcdata->text)=~ s{^\s+}{}s;
8783
18
25
      $pcdata->set_text( $pcdata_text);
8784
18
17
      $pcdata= $elt->last_descendant( $TEXT);
8785
18
22
      ($pcdata_text= $pcdata->text)=~ s{\s+$}{};
8786
18
19
      $pcdata->set_text( $pcdata_text);
8787
18
19
      foreach my $pcdata ($elt->descendants( $TEXT))
8788
31
31
        { ($pcdata_text= $pcdata->text)=~ s{\s+}{ }g;
8789
31
37
          $pcdata->set_text( $pcdata_text);
8790         }
8791
18
33
      return $elt;
8792     }
8793
8794
8795   # remove cdata sections (turns them into regular pcdata) in an element
8796   sub remove_cdata
8797
2
2
5
    { my $elt= shift;
8798
2
5
      foreach my $cdata ($elt->descendants_or_self( $CDATA))
8799
2
100
3
        { if( $keep_encoding)
8800
1
2
            { my $data= $cdata->{cdata};
8801
1
6
              $data=~ s{([&<"'])}{$XML::Twig::base_ent{$1}}g;
8802
1
33
6
              $cdata->{pcdata}= (delete $cdata->{empty} || 1) && $data;
8803             }
8804           else
8805
1
33
5
            { $cdata->{pcdata}= (delete $cdata->{empty} || 1) && $cdata->{cdata}; }
8806
2
50
4
          $cdata->{gi}=$XML::Twig::gi2index{$PCDATA} or $cdata->set_gi( $PCDATA);
8807
2
6
          undef $cdata->{cdata};
8808         }
8809     }
8810
8811
2
2
10
sub _is_private { return _is_private_name( $_[0]->gi); }
8812
4
4
15
sub _is_private_name { return $_[0]=~ m{^#(?!default:)}; }
8813
8814
8815 } # end of block containing package globals ($pretty_print, $quotes, keep_encoding...)
8816
8817 # merges consecutive #PCDATAs in am element
8818 sub normalize
8819
35
35
25
  { my( $elt)= @_;
8820
35
51
    my @descendants= $elt->descendants( $PCDATA);
8821
35
63
    while( my $desc= shift @descendants)
8822
149
18
18
100
200
21
53
      { if( ! length $desc->{pcdata}) { $desc->delete; next; }
8823
131
100
527
        while( @descendants && $desc->{next_sibling} && $desc->{next_sibling}== $descendants[0])
8824
15
17
          { my $to_merge= shift @descendants;
8825
15
22
            $desc->merge_text( $to_merge);
8826           }
8827       }
8828
35
57
    return $elt;
8829   }
8830
8831 # SAX export methods
8832 sub toSAX1
8833
2
2
6
  { _toSAX(@_, \&_start_tag_data_SAX1, \&_end_tag_data_SAX1); }
8834
8835 sub toSAX2
8836
2
2
6
  { _toSAX(@_, \&_start_tag_data_SAX2, \&_end_tag_data_SAX2); }
8837
8838 sub _toSAX
8839
72
72
54
  { my( $elt, $handler, $start_tag_data, $end_tag_data)= @_;
8840
72
100
88
    if( $elt->{gi} >= $XML::Twig::SPECIAL_GI)
8841
43
46
      { my $data= $start_tag_data->( $elt);
8842
43
123
        _start_prefix_mapping( $elt, $handler, $data);
8843
43
100
67
135
        if( $data && (my $start_element = $handler->can( 'start_element')))
8844
39
35
100
46
79
          { unless( $elt->_flushed) { $start_element->( $handler, $data); } }
8845
8846
43
1194
        foreach my $child ($elt->_children)
8847
51
304
          { $child->_toSAX( $handler, $start_tag_data, $end_tag_data); }
8848
8849
43
100
67
474
        if( (my $data= $end_tag_data->( $elt)) && (my $end_element = $handler->can( 'end_element')) )
8850
39
73
          { $end_element->( $handler, $data); }
8851
43
1220
        _end_prefix_mapping( $elt, $handler);
8852       }
8853     else # text or special element
8854
29
100
100
100
100
100
67
100
67
126
      { if( (exists $elt->{'pcdata'}) && (my $characters= $handler->can( 'characters')))
8855
20
49
          { $characters->( $handler, { Data => $elt->{pcdata} }); }
8856         elsif( (exists $elt->{'cdata'}))
8857
2
50
8
          { if( my $start_cdata= $handler->can( 'start_cdata'))
8858
2
5
              { $start_cdata->( $handler); }
8859
2
50
80
            if( my $characters= $handler->can( 'characters'))
8860
2
6
              { $characters->( $handler, {Data => $elt->{cdata} }); }
8861
2
50
12
            if( my $end_cdata= $handler->can( 'end_cdata'))
8862
2
5
              { $end_cdata->( $handler); }
8863           }
8864         elsif( ((exists $elt->{'target'})) && (my $pi= $handler->can( 'processing_instruction')))
8865
2
9
          { $pi->( $handler, { Target =>$elt->{target}, Data => $elt->{data} }); }
8866         elsif( ((exists $elt->{'comment'})) && (my $comment= $handler->can( 'comment')))
8867
2
9
          { $comment->( $handler, { Data => $elt->{comment} }); }
8868         elsif( ((exists $elt->{'ent'})))
8869           {
8870
1
50
0
6
            if( my $se= $handler->can( 'skipped_entity'))
8871
1
3
              { $se->( $handler, { Name => $elt->ent_name }); }
8872             elsif( my $characters= $handler->can( 'characters'))
8873
0
0
0
              { if( defined $elt->ent_string)
8874
0
0
                  { $characters->( $handler, {Data => $elt->ent_string}); }
8875                 else
8876
0
0
                  { $characters->( $handler, {Data => $elt->ent_name}); }
8877               }
8878           }
8879
8880       }
8881   }
8882
8883 sub _start_tag_data_SAX1
8884
15
15
9
  { my( $elt)= @_;
8885
15
16
    my $name= $XML::Twig::index2gi[$elt->{'gi'}];
8886
15
100
67
39
    return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') ));
8887
13
12
    my $attributes={};
8888
13
8
    my $atts= $elt->{att};
8889
13
27
    while( my( $att, $value)= each %$atts)
8890
4
100
19
      { $attributes->{$att}= $value unless( ( $att=~ m{^#(?!default:)} )); }
8891
13
22
    my $data= { Name => $name, Attributes => $attributes};
8892
13
16
    return $data;
8893   }
8894
8895 sub _end_tag_data_SAX1
8896
13
13
10
  { my( $elt)= @_;
8897
13
100
67
30
    return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') ));
8898
11
50
    return { Name => $XML::Twig::index2gi[$elt->{'gi'}] };
8899   }
8900
8901 sub _start_tag_data_SAX2
8902
32
32
24
  { my( $elt)= @_;
8903
32
30
    my $data={};
8904
8905
32
36
    my $name= $XML::Twig::index2gi[$elt->{'gi'}];
8906
32
100
67
72
    return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') ));
8907
30
37
    $data->{Name} = $name;
8908
30
30
    $data->{Prefix} = $elt->ns_prefix;
8909
30
36
    $data->{LocalName} = $elt->local_name;
8910
30
28
    $data->{NamespaceURI} = $elt->namespace;
8911
8912     # save a copy of the data so we can re-use it for the end tag
8913
30
87
    my %sax2_data= %$data;
8914
30
35
    $elt->{twig_elt_SAX2_data}= \%sax2_data;
8915
8916     # add the attributes
8917
30
38
    $data->{Attributes}= $elt->_atts_to_SAX2;
8918
8919
30
28
    return $data;
8920   }
8921
8922 sub _atts_to_SAX2
8923
30
30
15
  { my $elt= shift;
8924
30
27
    my $SAX2_atts= {};
8925
30
30
20
49
    foreach my $att (keys %{$elt->{att}})
8926       {
8927
15
100
32
        next if( ( $att=~ m{^#(?!default:)} ));
8928
13
11
        my $SAX2_att={};
8929
13
13
        $SAX2_att->{Name} = $att;
8930
13
13
        $SAX2_att->{Prefix} = _ns_prefix( $att);
8931
13
9
        $SAX2_att->{LocalName} = _local_name( $att);
8932
13
48
        $SAX2_att->{NamespaceURI} = $elt->namespace( $SAX2_att->{Prefix});
8933
13
20
        $SAX2_att->{Value} = $elt->{'att'}->{$att};
8934
13
17
        my $SAX2_att_name= "{$SAX2_att->{NamespaceURI}}$SAX2_att->{LocalName}";
8935
8936
13
26
        $SAX2_atts->{$SAX2_att_name}= $SAX2_att;
8937       }
8938
30
37
    return $SAX2_atts;
8939   }
8940
8941 sub _start_prefix_mapping
8942
43
43
38
  { my( $elt, $handler, $data)= @_;
8943
43
13
100
100
100
118
94
    if( my $start_prefix_mapping= $handler->can( 'start_prefix_mapping')
8944
30
76
        and my @new_prefix_mappings= grep { /^\{[^}]*\}xmlns/ || /^\{$XMLNS_URI\}/ } keys %{$data->{Attributes}}
8945       )
8946
7
7
      { foreach my $prefix (@new_prefix_mappings)
8947
7
15
          { my $prefix_string= $data->{Attributes}->{$prefix}->{LocalName};
8948
7
3
100
8
3
            if( $prefix_string eq 'xmlns') { $prefix_string=''; }
8949
7
14
            my $prefix_data=
8950               { Prefix => $prefix_string,
8951                  NamespaceURI => $data->{Attributes}->{$prefix}->{Value}
8952               };
8953
7
13
            $start_prefix_mapping->( $handler, $prefix_data);
8954
7
100
122
            $elt->{twig_end_prefix_mapping} ||= [];
8955
7
7
5
18
            push @{$elt->{twig_end_prefix_mapping}}, $prefix_string;
8956           }
8957       }
8958   }
8959
8960 sub _end_prefix_mapping
8961
43
43
32
  { my( $elt, $handler)= @_;
8962
43
100
111
    if( my $end_prefix_mapping= $handler->can( 'end_prefix_mapping'))
8963
30
30
15
99
      { foreach my $prefix (@{$elt->{twig_end_prefix_mapping}})
8964
9
31
          { $end_prefix_mapping->( $handler, { Prefix => $prefix} ); }
8965       }
8966   }
8967
8968 sub _end_tag_data_SAX2
8969
30
30
73
  { my( $elt)= @_;
8970
30
100
67
71
    return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') ));
8971
28
96
    return $elt->{twig_elt_SAX2_data};
8972   }
8973
8974 sub contains_text
8975
487
487
333
  { my $elt= shift;
8976
487
333
    my $child= $elt->{first_child};
8977
487
518
    while ($child)
8978
472
100
100
504
      { return 1 if( $child->is_text || (exists $child->{'ent'}));
8979
248
332
        $child= $child->{next_sibling};
8980       }
8981
263
507
    return 0;
8982   }
8983
8984 # creates a single pcdata element containing the text as child of the element
8985 # options:
8986 # - force_pcdata: when set to a true value forces the text to be in a #PCDATA
8987 # even if the original element was a #CDATA
8988 sub set_text
8989
209
209
383
  { my( $elt, $string, %option)= @_;
8990
8991
209
100
100
100
368
    if( $XML::Twig::index2gi[$elt->{'gi'}] eq $PCDATA)
8992
157
67
605
      { return $elt->{pcdata}= (delete $elt->{empty} || 1) && $string; }
8993     elsif( $XML::Twig::index2gi[$elt->{'gi'}] eq $CDATA)
8994
12
100
17
      { if( $option{force_pcdata})
8995
2
50
9
          { $elt->{gi}=$XML::Twig::gi2index{$PCDATA} or $elt->set_gi( $PCDATA);
8996
2
5
            $elt->_set_cdata('');
8997
2
33
47
            return $elt->{pcdata}= (delete $elt->{empty} || 1) && $string;
8998           }
8999         else
9000
10
12
          { return $elt->_set_cdata( $string); }
9001       }
9002     elsif( $elt->contains_a_single( $PCDATA) )
9003       { # optimized so we have a slight chance of not loosing embedded comments and pi's
9004
33
61
        $elt->{first_child}->set_pcdata( $string);
9005
33
57
        return $elt;
9006       }
9007
9008
7
7
9
19
    foreach my $child (@{[$elt->_children]})
9009
4
7
      { $child->delete; }
9010
9011
7
29
    my $pcdata= $elt->_new_pcdata( $string);
9012
7
18
    $pcdata->paste( $elt);
9013
9014
7
6
    $elt->{empty}=0;
9015
9016
7
11
    return $elt;
9017   }
9018
9019 # set the content of an element from a list of strings and elements
9020 sub set_content
9021
167
167
141
  { my $elt= shift;
9022
9023
167
100
234
    return $elt unless defined $_[0];
9024
9025     # attributes can be given as a hash (passed by ref)
9026
166
100
206
    if( ref $_[0] eq 'HASH')
9027
2
4
      { my $atts= shift;
9028
2
4
        $elt->del_atts; # usually useless but better safe than sorry
9029
2
5
        $elt->set_atts( $atts);
9030
2
100
7
        return $elt unless defined $_[0];
9031       }
9032
9033     # check next argument for #EMPTY
9034
165
100
100
445
    if( !(ref $_[0]) && ($_[0] eq $EMPTY) )
9035
6
6
4
6
      { $elt->{empty}= 1; return $elt; }
9036
9037     # case where we really want to do a set_text, the element is '#PCDATA'
9038     # or contains a single PCDATA and we only want to add text in it
9039
159
100
100
100
50
394
    if( ($XML::Twig::index2gi[$elt->{'gi'}] eq $PCDATA || $elt->contains_a_single( $PCDATA))
9040         && (@_ == 1) && !( ref $_[0]))
9041
14
21
      { $elt->set_text( $_[0]);
9042
14
16
        return $elt;
9043       }
9044     elsif( ($XML::Twig::index2gi[$elt->{'gi'}] eq $CDATA) && (@_ == 1) && !( ref $_[0]))
9045
1
2
      { $elt->_set_cdata( $_[0]);
9046
1
1
        return $elt;
9047       }
9048
9049     # delete the children
9050
144
144
93
192
    foreach my $child (@{[$elt->_children]})
9051
7
10
      { $child->delete; }
9052
9053
144
144
50
212
139
    if( @_) { $elt->{empty}=0; }
9054
9055
144
142
    foreach my $child (@_)
9056
167
100
67
313
      { if( ref( $child) && isa( $child, 'XML::Twig::Elt'))
9057           { # argument is an element
9058
27
34
            $child->paste( 'last_child', $elt);
9059           }
9060         else
9061           { # argument is a string
9062
140
100
100
244
            if( (my $pcdata= $elt->{last_child}) && $elt->{last_child}->is_pcdata)
9063               { # previous child is also pcdata: just concatenate
9064
4
33
29
                $pcdata->{pcdata}= (delete $pcdata->{empty} || 1) && $pcdata->{pcdata} . $child
9065               }
9066             else
9067               { # previous child is not a string: create a new pcdata element
9068
136
176
                $pcdata= $elt->_new_pcdata( $child);
9069
136
189
                $pcdata->paste( 'last_child', $elt);
9070               }
9071           }
9072       }
9073
9074
9075
144
156
    return $elt;
9076   }
9077
9078 # inserts an element (whose gi is given) as child of the element
9079 # all children of the element are now children of the new element
9080 # returns the new element
9081 sub insert
9082
7
7
16
  { my ($elt, @args)= @_;
9083     # first cut the children
9084
7
7
7
7
7
4
4
7
8
5
8
8
16
5
9
10
    my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; };
9085
7
10
    foreach my $child (@children)
9086
4
13
      { $child->cut; }
9087     # insert elements
9088
7
19
    while( my $gi= shift @args)
9089
7
15
      { my $new_elt= $elt->new( $gi);
9090         # add attributes if needed
9091
7
100
67
23
        if( defined( $args[0]) && ( isa( $args[0], 'HASH')) )
9092
2
5
          { $new_elt->set_atts( shift @args); }
9093         # paste the element
9094
7
12
        $new_elt->paste( $elt);
9095
7
6
        $elt->{empty}=0;
9096
7
15
        $elt= $new_elt;
9097       }
9098     # paste back the children
9099
7
8
    foreach my $child (@children)
9100
4
6
      { $child->paste( 'last_child', $elt); }
9101
7
12
    return $elt;
9102   }
9103
9104 # insert a new element
9105 # $elt->insert_new_element( $opt_position, $gi, $opt_atts_hash, @opt_content);
9106 # the element is created with the same syntax as new
9107 # position is the same as in paste, first_child by default
9108 sub insert_new_elt
9109
133
133
99
  { my $elt= shift;
9110
133
99
    my $position= $_[0];
9111
133
100
100
457
    if( ($position eq 'before') || ($position eq 'after')
9112          || ($position eq 'first_child') || ($position eq 'last_child'))
9113
129
81
      { shift; }
9114     else
9115
4
4
      { $position= 'first_child'; }
9116
9117
133
198
    my $new_elt= $elt->new( @_);
9118
133
142
    $new_elt->paste( $position, $elt);
9119
9120     #if( defined $new_elt->{'att'}->{$ID}) { $new_elt->set_id( $new_elt->{'att'}->{$ID}); }
9121
9122
133
107
    return $new_elt;
9123   }
9124
9125 # wraps an element in elements which gi's are given as arguments
9126 # $elt->wrap_in( 'td', 'tr', 'table') wraps the element as a single
9127 # cell in a table for example
9128 # returns the new element
9129 sub wrap_in
9130
18
18
26
  { my $elt= shift;
9131
18
32
    while( my $gi = shift @_)
9132
18
22
      { my $new_elt = $elt->new( $gi);
9133
18
100
31
        if( $elt->{twig_current})
9134
2
4
          { my $t= $elt->twig;
9135
2
3
            $t->{twig_current}= $new_elt;
9136
2
8
            delete $elt->{'twig_current'};
9137
2
2
            $new_elt->{'twig_current'}=1;
9138           }
9139
9140
18
100
28
        if( my $parent= $elt->{parent})
9141
15
15
15
50
15
22
24
          { $new_elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $new_elt->{parent});} ;
9142
15
11
100
25
11
            if( $parent->{first_child} == $elt) { $parent->{first_child}= $new_elt; }
9143
15
8
8
8
8
50
100
23
9
8
10
14
             if( $parent->{last_child} == $elt) { $parent->{empty}=0; $parent->{last_child}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; }
9144           }
9145         else
9146           { # wrapping the root
9147
3
7
            my $twig= $elt->twig;
9148
3
50
25
15
            if( $twig && $twig->root && ($twig->root eq $elt) )
9149
3
6
              { $twig->set_root( $new_elt);
9150               }
9151           }
9152
9153
18
100
29
        if( my $prev_sibling= $elt->{prev_sibling})
9154
4
4
4
50
4
8
6
          { $new_elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $new_elt->{prev_sibling});} ;
9155
4
4
            $prev_sibling->{next_sibling}= $new_elt;
9156           }
9157
9158
18
100
33
        if( my $next_sibling= $elt->{next_sibling})
9159
7
7
          { $new_elt->{next_sibling}= $next_sibling;
9160
7
7
7
50
7
10
8
            $next_sibling->{prev_sibling}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ;
9161           }
9162
18
20
        $new_elt->{first_child}= $elt;
9163
18
18
18
17
100
17
19
19
22
         $new_elt->{empty}=0; $new_elt->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $new_elt->{last_child});} ;
9164
9165
18
18
17
100
18
1498
25
        $elt->{parent}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ;
9166
18
18
17
100
22
24
18
        $elt->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ;
9167
18
23
        $elt->{next_sibling}= undef;
9168
9169         # add the attributes if the next argument is a hash ref
9170
18
100
67
57
        if( defined( $_[0]) && (isa( $_[0], 'HASH')) )
9171
10
15
          { $new_elt->set_atts( shift @_); }
9172
9173
18
35
        $elt= $new_elt;
9174       }
9175
9176
18
25
    return $elt;
9177   }
9178
9179 sub replace
9180
14
14
19
  { my( $elt, $ref)= @_;
9181
9182
14
1
100
26
2
    if( $elt->{parent}) { $elt->cut; }
9183
9184
14
100
50
33
25
    if( my $parent= $ref->{parent})
9185
13
13
11
100
14
22
19
      { $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ;
9186
13
7
100
29
7
        if( $parent->{first_child} == $ref) { $parent->{first_child}= $elt; }
9187
13
7
7
7
6
100
100
25
8
8
12
8
        if( $parent->{last_child} == $ref) { $parent->{empty}=0; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; }
9188       }
9189     elsif( $ref->twig && $ref == $ref->twig->root)
9190
1
1
      { $ref->twig->set_root( $elt); }
9191
9192
14
100
28
    if( my $prev_sibling= $ref->{prev_sibling})
9193
6
6
5
100
8
10
6
      { $elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ;
9194
6
6
        $prev_sibling->{next_sibling}= $elt;
9195       }
9196
14
100
24
    if( my $next_sibling= $ref->{next_sibling})
9197
6
7
      { $elt->{next_sibling}= $next_sibling;
9198
6
6
5
100
6
13
9
        $next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ;
9199       }
9200
9201
14
14
11
100
16
18
15
    $ref->{parent}=undef; if( $XML::Twig::weakrefs) { weaken( $ref->{parent});} ;
9202
14
14
11
100
16
21
14
    $ref->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $ref->{prev_sibling});} ;
9203
14
11
    $ref->{next_sibling}= undef;
9204
14
24
    return $ref;
9205   }
9206
9207 sub replace_with
9208
2
2
4
  { my $ref= shift;
9209
2
2
    my $elt= shift;
9210
2
4
    $elt->replace( $ref);
9211
2
6
    foreach my $new_elt (reverse @_)
9212
2
5
      { $new_elt->paste( after => $elt); }
9213
2
4
    return $elt;
9214   }
9215
9216
9217 # move an element, same syntax as paste, except the element is first cut
9218 sub move
9219
8
8
11
  { my $elt= shift;
9220
8
11
    $elt->cut;
9221
8
12
    $elt->paste( @_);
9222
8
10
    return $elt;
9223   }
9224
9225
9226 # adds a prefix to an element, creating a pcdata child if needed
9227 sub prefix
9228
23
23
47
  { my ($elt, $prefix, $option)= @_;
9229
23
100
100
77
    my $asis= ($option && ($option eq 'asis')) ? 1 : 0;
9230
23
100
100
50
60
133
    if( (exists $elt->{'pcdata'})
9231         && (($asis && $elt->{asis}) || (!$asis && ! $elt->{asis}))
9232       )
9233
4
33
23
      { $elt->{pcdata}= (delete $elt->{empty} || 1) && $prefix . $elt->{pcdata}; }
9234     elsif( $elt->{first_child} && $elt->{first_child}->is_pcdata
9235         && ( ($asis && $elt->{first_child}->{asis})
9236             || (!$asis && ! $elt->{first_child}->{asis}))
9237          )
9238       {
9239
6
17
        $elt->{first_child}->set_pcdata( $prefix . $elt->{first_child}->pcdata);
9240       }
9241     else
9242
13
23
      { my $new_elt= $elt->_new_pcdata( $prefix);
9243
13
100
28
        my $pos= (exists $elt->{'pcdata'}) ? 'before' : 'first_child';
9244
13
19
        $new_elt->paste( $pos => $elt);
9245
13
5
100
21
8
        if( $asis) { $new_elt->set_asis; }
9246       }
9247
23
32
    return $elt;
9248   }
9249
9250 # adds a suffix to an element, creating a pcdata child if needed
9251 sub suffix
9252
19
19
31
  { my ($elt, $suffix, $option)= @_;
9253
19
100
100
58
    my $asis= ($option && ($option eq 'asis')) ? 1 : 0;
9254
19
100
100
50
60
98
    if( (exists $elt->{'pcdata'})
9255         && (($asis && $elt->{asis}) || (!$asis && ! $elt->{asis}))
9256       )
9257
3
33
15
      { $elt->{pcdata}= (delete $elt->{empty} || 1) && $elt->{pcdata} . $suffix; }
9258     elsif( $elt->{last_child} && $elt->{last_child}->is_pcdata
9259         && ( ($asis && $elt->{last_child}->{asis})
9260             || (!$asis && ! $elt->{last_child}->{asis}))
9261          )
9262
6
14
      { $elt->{last_child}->set_pcdata( $elt->{last_child}->pcdata . $suffix); }
9263     else
9264
10
17
      { my $new_elt= $elt->_new_pcdata( $suffix);
9265
10
100
18
        my $pos= (exists $elt->{'pcdata'}) ? 'after' : 'last_child';
9266
10
14
        $new_elt->paste( $pos => $elt);
9267
10
4
100
14
6
        if( $asis) { $new_elt->set_asis; }
9268       }
9269
19
29
    return $elt;
9270   }
9271
9272 # create a path to an element ('/root/.../gi)
9273 sub path
9274
21
21
92
  { my $elt= shift;
9275
21
21
    my @context= ( $elt, $elt->ancestors);
9276
21
55
18
40
    return "/" . join( "/", reverse map {$_->gi} @context);
9277   }
9278
9279 sub xpath
9280
6
6
46
  { my $elt= shift;
9281
6
4
    my $xpath;
9282
6
8
    foreach my $ancestor (reverse $elt->ancestors_or_self)
9283
18
16
      { my $gi= $XML::Twig::index2gi[$ancestor->{'gi'}];
9284
18
12
        $xpath.= "/$gi";
9285
18
20
        my $index= $ancestor->prev_siblings( $gi) + 1;
9286
18
100
100
43
        unless( ($index == 1) && !$ancestor->next_sibling( $gi))
9287
10
24
          { $xpath.= "[$index]"; }
9288       }
9289
6
14
    return $xpath;
9290   }
9291
9292 # methods used mainly by wrap_children
9293
9294 # return a string with the
9295 # for an element <foo><elt att="val">...</elt><elt2/><elt>...</elt></foo>
9296 # returns '<elt att="val"><elt2><elt>'
9297 sub _stringify_struct
9298
13
13
14
  { my( $elt, %opt)= @_;
9299
13
7
    my $string='';
9300
13
16
    my $pretty_print= set_pretty_print( 'none');
9301
13
18
    foreach my $child ($elt->_children)
9302
37
37
50
43
55
      { $child->add_id; $string .= $child->start_tag( { escape_gt => 1 }) ||''; }
9303
13
16
    set_pretty_print( $pretty_print);
9304
13
19
    return $string;
9305   }
9306
9307 # wrap a series of elements in a new one
9308 sub _wrap_range
9309
9
9
9
  { my $elt= shift;
9310
9
6
    my $gi= shift;
9311
9
50
25
    my $atts= isa( $_[0], 'HASH') ? shift : undef;
9312
9
16
    my $range= shift; # the string with the tags to wrap
9313
9314
9
12
    my $t= $elt->twig;
9315
9316     # get the tags to wrap
9317
9
7
    my @to_wrap;
9318
9
52
    while( $range=~ m{<\w+\s+[^>]*id=("[^"]*"|'[^']*')[^>]*>}g)
9319
14
23
      { push @to_wrap, $t->elt_id( substr( $1, 1, -1)); }
9320
9321
9
50
15
    return '' unless @to_wrap;
9322
9323
9
5
    my $to_wrap= shift @to_wrap;
9324
9
20
    my %atts= %$atts;
9325
9
12
    my $new_elt= $to_wrap->wrap_in( $gi, \%atts);
9326
9
14
    $_->move( last_child => $new_elt) foreach (@to_wrap);
9327
9328
9
45
    return '';
9329   }
9330
9331 # wrap children matching a regexp in a new element
9332 sub wrap_children
9333
13
13
21
  { my( $elt, $regexp, $gi, $atts)= @_;
9334
9335
13
100
32
    $atts ||={};
9336
9337
13
16
    my $elt_as_string= $elt->_stringify_struct; # stringify the elt structure
9338
13
13
49
14
    $regexp=~ s{(<[^>]*>)}{_match_expr( $1)}eg; # in the regexp, replace gi's by the proper regexp
9339
13
9
1
1
1
1
449
21388
3
1
124
    $elt_as_string=~ s{($regexp)}{$elt->_wrap_range( $gi, $atts, $1)}eg; # then do the actual replace
9340
9341
13
38
    return $elt;
9342   }
9343
9344 sub _match_expr
9345
22
22
159
  { my $tag= shift;
9346
22
27
    my( $gi, %atts)= XML::Twig::_parse_start_tag( $tag);
9347
13
69
    return _match_tag( $gi, %atts);
9348   }
9349
9350
9351 sub _match_tag
9352
13
13
16
  { my( $elt, %atts)= @_;
9353
13
14
    my $string= "<$elt\\b";
9354
13
23
    foreach my $key (sort keys %atts)
9355
14
10
      { my $val= qq{\Q$atts{$key}\E};
9356
14
25
        $string.= qq{[^>]*$key=(?:"$val"|'$val')};
9357       }
9358
13
12
    $string.= qq{[^>]*>};
9359
13
35
    return "(?:$string)";
9360   }
9361
9362 sub field_to_att
9363
6
6
11
  { my( $elt, $cond, $att)= @_;
9364
6
67
20
    $att ||= $cond;
9365
6
100
11
    my $child= $elt->first_child( $cond) or return undef;
9366
4
9
    $elt->set_att( $att => $child->text);
9367
4
7
    $child->cut;
9368
4
19
    return $elt;
9369   }
9370
9371 sub att_to_field
9372
5
5
22
  { my( $elt, $att, $tag)= @_;
9373
5
67
13
    $tag ||= $att;
9374
5
10
    my $child= $elt->insert_new_elt( first_child => $tag, $elt->{'att'}->{$att});
9375
5
9
    $elt->del_att( $att);
9376
5
8
    return $elt;
9377   }
9378
9379 # sort children methods
9380
9381 sub sort_children_on_field
9382
2
2
3
  { my $elt = shift;
9383
2
2
    my $field = shift;
9384
2
9
9
8
11
    my $get_key= sub { return $_[0]->field( $field) };
9385
2
5
    return $elt->sort_children( $get_key, @_);
9386   }
9387
9388 sub sort_children_on_att
9389
1
1
1
  { my $elt = shift;
9390
1
2
    my $att = shift;
9391
1
3
3
4
9
    my $get_key= sub { return $_[0]->{'att'}->{$att} };
9392
1
3
    return $elt->sort_children( $get_key, @_);
9393   }
9394
9395 sub sort_children_on_value
9396
1
1
1
  { my $elt = shift;
9397     #my $get_key= eval qq{ sub { $NO_WARNINGS; return \$_[0]->text } };
9398
1
2
    my $get_key= \&text;
9399
1
3
    return $elt->sort_children( $get_key, @_);
9400   }
9401
9402 sub sort_children
9403
5
5
21
  { my( $elt, $get_key, %opt)=@_;
9404
5
100
21
    $opt{order} ||= 'normal';
9405
5
100
12
    $opt{type} ||= 'alpha';
9406
5
100
13
    my( $par_a, $par_b)= ($opt{order} eq 'reverse') ? qw( b a) : qw ( a b) ;
9407
5
100
11
    my $op= ($opt{type} eq 'numeric') ? '<=>' : 'cmp' ;
9408
5
10
    my @children= $elt->cut_children;
9409
5
100
100
13
    if( $opt{type} eq 'numeric')
9410
9
14
9
15
      { @children= map { $_->[1] }
9411
9
9
                    sort { $a->[0] <=> $b->[0] }
9412
2
3
                    map { [ $get_key->( $_), $_] } @children;
9413       }
9414     elsif( $opt{type} eq 'alpha')
9415
6
5
16
7
      { @children= map { $_->[1] }
9416
6
4
                    sort { $a->[0] cmp $b->[0] }
9417
2
2
                    map { [ $get_key->( $_), $_] } @children;
9418       }
9419     else
9420
1
97
      { croak "wrong sort type '$opt{type}', should be either 'alpha' or 'numeric'"; }
9421
9422
4
100
10
    @children= reverse @children if( $opt{order} eq 'reverse');
9423
4
9
    $elt->set_content( @children);
9424   }
9425
9426
9427 # comparison methods
9428
9429 sub before
9430
6
6
28
  { my( $a, $b)=@_;
9431
6
4
2
100
6
4
2
    if( $a->cmp( $b) == -1) { return 1; } else { return 0; }
9432   }
9433
9434 sub after
9435
3
3
13
  { my( $a, $b)=@_;
9436
3
1
2
100
3
1
3
    if( $a->cmp( $b) == 1) { return 1; } else { return 0; }
9437   }
9438
9439 sub lt
9440
2
2
2
  { my( $a, $b)=@_;
9441
2
100
3
    return 1 if( $a->cmp( $b) == -1);
9442
1
2
    return 0;
9443   }
9444
9445 sub le
9446
2
2
4
  { my( $a, $b)=@_;
9447
2
100
3
    return 1 unless( $a->cmp( $b) == 1);
9448
1
2
    return 0;
9449   }
9450
9451 sub gt
9452
2
2
2
  { my( $a, $b)=@_;
9453
2
100
3
    return 1 if( $a->cmp( $b) == 1);
9454
1
3
    return 0;
9455   }
9456
9457 sub ge
9458
2
2
2
  { my( $a, $b)=@_;
9459
2
100
2
    return 1 unless( $a->cmp( $b) == -1);
9460
1
3
    return 0;
9461   }
9462
9463
9464 sub cmp
9465
886
886
544
  { my( $a, $b)=@_;
9466
9467     # easy cases
9468
886
100
1136
    return 0 if( $a == $b);
9469
843
100
858
    return 1 if( $a->in($b)); # a in b => a starts after b
9470
696
100
651
    return -1 if( $b->in($a)); # b in a => a starts before b
9471
9472     # ancestors does not include the element itself
9473
538
573
    my @a_pile= ($a, $a->ancestors);
9474
538
478
    my @b_pile= ($b, $b->ancestors);
9475
9476     # the 2 elements are not in the same twig
9477
538
100
711
    return undef unless( $a_pile[-1] == $b_pile[-1]);
9478
9479     # find the first non common ancestors (they are siblings)
9480
536
365
    my $a_anc= pop @a_pile;
9481
536
328
    my $b_anc= pop @b_pile;
9482
9483
536
1007
    while( $a_anc == $b_anc)
9484
810
490
      { $a_anc= pop @a_pile;
9485
810
920
        $b_anc= pop @b_pile;
9486       }
9487
9488     # from there move left and right and figure out the order
9489
536
411
    my( $a_prev, $a_next, $b_prev, $b_next)= ($a_anc, $a_anc, $b_anc, $b_anc);
9490
536
296
    while()
9491
538
100
882
      { $a_prev= $a_prev->{prev_sibling} || return( -1);
9492
323
100
588
        return 1 if( $a_prev == $b_next);
9493
145
100
227
        $a_next= $a_next->{next_sibling} || return( 1);
9494
114
100
1214
        return -1 if( $a_next == $b_prev);
9495
32
100
58
        $b_prev= $b_prev->{prev_sibling} || return( 1);
9496
25
100
62
        return -1 if( $b_prev == $a_next);
9497
7
100
13
        $b_next= $b_next->{next_sibling} || return( -1);
9498
5
100
13
        return 1 if( $b_next == $a_prev);
9499       }
9500   }
9501
9502 sub _dump
9503
53
53
42
  { my( $elt, $option)= @_;
9504
9505
53
100
64
    my $atts = defined $option->{atts} ? $option->{atts} : 1;
9506
53
100
49
    my $extra = defined $option->{extra} ? $option->{extra} : 0;
9507
53
100
47
    my $short_text = defined $option->{short_text} ? $option->{short_text} : 40;
9508
9509
53
33
    my $sp= '| ';
9510
53
45
    my $indent= $sp x $elt->level;
9511
53
45
    my $indent_sp= ' ' x $elt->level;
9512
9513
53
34
    my $dump='';
9514
53
100
46
    if( $elt->is_elt)
9515       {
9516
27
32
        $dump .= $indent . '|-' . $XML::Twig::index2gi[$elt->{'gi'}];
9517
9518
27
100
100
45
        if( $atts && (my @atts= $elt->att_names) )
9519
3
3
4
12
          { $dump .= ' ' . join( ' ', map { qq{$_="} . $elt->{'att'}->{$_} . qq{"} } @atts); }
9520
9521
27
24
        $dump .= "\n";
9522
27
10
100
31
13
        if( $extra) { $dump .= $elt->_dump_extra_data( $indent, $indent_sp, $short_text); }
9523
27
47
27
27
27
27
47
47
27
22
74
14
21
21
28
29
51
33
        $dump .= join( "", map { $_->_dump( $option) } do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; });
9524       }
9525     else
9526       {
9527
26
100
50
100
100
50
37
        if( (exists $elt->{'pcdata'}))
9528
20
25
          { $dump .= "$indent|-PCDATA: '" . _short_text( $elt->{pcdata}, $short_text) . "'\n" }
9529         elsif( (exists $elt->{'ent'}))
9530
0
0
          { $dump .= "$indent|-ENTITY: '" . _short_text( $elt->{ent}, $short_text) . "'\n" }
9531         elsif( (exists $elt->{'cdata'}))
9532
3
9
          { $dump .= "$indent|-CDATA: '" . _short_text( $elt->{cdata}, $short_text) . "'\n" }
9533         elsif( (exists $elt->{'comment'}))
9534
1
2
          { $dump .= "$indent|-COMMENT: '" . _short_text( $elt->comment_string, $short_text) . "'\n" }
9535         elsif( (exists $elt->{'target'}))
9536
2
5
          { $dump .= "$indent|-PI: '" . $elt->{target} . "' - '" . _short_text( $elt->{data}, $short_text) . "'\n" }
9537
26
8
100
32
9
        if( $extra) { $dump .= $elt->_dump_extra_data( $indent, $indent_sp, $short_text); }
9538       }
9539
53
89
    return $dump;
9540   }
9541
9542 sub _dump_extra_data
9543
18
18
15
  { my( $elt, $indent, $indent_sp, $short_text)= @_;
9544
18
11
    my $dump='';
9545
18
100
19
    if( $elt->extra_data)
9546
2
3
      { my $extra_data = $indent . "|-- (cpi before) '" . _short_text( $elt->extra_data, $short_text) . "'";
9547
2
4
        $extra_data=~ s{\n}{$indent_sp}g;
9548
2
2
        $dump .= $extra_data . "\n";
9549       }
9550
18
100
22
    if( $elt->{extra_data_in_pcdata})
9551
1
1
1
1
      { foreach my $data ( @{$elt->{extra_data_in_pcdata}})
9552
2
6
          { my $extra_data = $indent . "|-- (cpi offset $data->{offset}) '" . _short_text( $data->{text}, $short_text) . "'";
9553
2
2
            $extra_data=~ s{\n}{$indent_sp}g;
9554
2
3
            $dump .= $extra_data . "\n";
9555           }
9556       }
9557
18
100
20
    if( $elt->{extra_data_before_end_tag})
9558
1
3
      { my $extra_data = $indent . "|-- (cpi end) '" . _short_text( $elt->{extra_data_before_end_tag}, $short_text) . "'";
9559
1
2
        $extra_data=~ s{\n}{$indent_sp}g;
9560
1
1
        $dump .= $extra_data . "\n";
9561       }
9562
18
24
    return $dump;
9563   }
9564
9565
9566 sub _short_text
9567
32
32
69
  { my( $string, $length)= @_;
9568
32
25
100
100
83
45
    if( !$length || (length( $string) < $length) ) { return $string; }
9569
7
9
    my $l1= (length( $string) -5) /2;
9570
7
6
    my $l2= length( $string) - ($l1 + 5);
9571
7
18
    return substr( $string, 0, $l1) . ' ... ' . substr( $string, -$l2);
9572   }
9573
9574
9575
235
235
297
sub _and { return _join_defined( ' && ', @_); }
9576
235
550
235
243
1173
sub _join_defined { return join( shift(), grep { $_ } @_); }
9577
9578 1;