, "fix the attribute
|
1080
|
|
|
|
|
|
|
} |
|
1081
|
|
|
|
|
|
|
my $q= '
|
|
1082
|
|
|
|
|
|
|
if( _use( 'HTML::Entities::Numbered')) { $$xml=name2hex_xml( $$xml); } |
|
1083
|
|
|
|
|
|
|
if( $$xml=~ m{$q}) |
|
1084
|
|
|
|
|
|
|
{ $$xml=~ s{$q}{
|
|
1085
|
|
|
|
|
|
|
} |
|
1086
|
|
|
|
|
|
|
else |
|
1087
|
|
|
|
|
|
|
{ my $encoding= _encoding_from_meta( $tree); |
|
1088
|
|
|
|
|
|
|
unless( keys %xml_parser_encoding) { %xml_parser_encoding= _xml_parser_encodings(); } |
|
1089
|
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
if( ! $add_decl) |
|
1091
|
|
|
|
|
|
|
{ if( $xml_parser_encoding{$encoding}) |
|
1092
|
|
|
|
|
|
|
{ $add_decl=1; } |
|
1093
|
|
|
|
|
|
|
elsif( $encoding eq 'euc-jp' && $xml_parser_encoding{'x-euc-jp-jisx0221'}) |
|
1094
|
|
|
|
|
|
|
{ $encoding="x-euc-jp-jisx0221"; $add_decl=1;} |
|
1095
|
|
|
|
|
|
|
elsif( $encoding eq 'shift-jis' && $xml_parser_encoding{'x-sjis-jisx0221'}) |
|
1096
|
|
|
|
|
|
|
{ $encoding="x-sjis-jisx0221"; $add_decl=1;} |
|
1097
|
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
if( $add_decl) |
|
1099
|
|
|
|
|
|
|
{ $$xml=~ s{^(<\?xml.*?\?>)?}{}s; |
|
1100
|
|
|
|
|
|
|
#warn " added decl (encoding $encoding)\n"; |
|
1101
|
|
|
|
|
|
|
} |
|
1102
|
|
|
|
|
|
|
else |
|
1103
|
|
|
|
|
|
|
{ $$xml=~ s{^(<\?xml.*?\?>)?}{}s; |
|
1104
|
|
|
|
|
|
|
#warn " converting to utf8 from $encoding\n"; |
|
1105
|
|
|
|
|
|
|
$$xml= _to_utf8( $encoding, $$xml); |
|
1106
|
|
|
|
|
|
|
} |
|
1107
|
|
|
|
|
|
|
} |
|
1108
|
|
|
|
|
|
|
else |
|
1109
|
|
|
|
|
|
|
{ $$xml=~ s{^(<\?xml.*?\?>)?}{}s; |
|
1110
|
|
|
|
|
|
|
#warn " converting to utf8 from $encoding\n"; |
|
1111
|
|
|
|
|
|
|
$$xml= _to_utf8( $encoding, $$xml); |
|
1112
|
|
|
|
|
|
|
} |
|
1113
|
|
|
|
|
|
|
} |
|
1114
|
|
|
|
|
|
|
} |
|
1115
|
|
|
|
|
|
|
} |
|
1116
|
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
# some versions of HTML::TreeBuilder escape CDATA sections |
|
1118
|
|
|
|
|
|
|
$$xml=~ s{(<!\[CDATA\[.*?\]\]>)}{_unescape_cdata( $1)}eg; |
|
1119
|
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
} |
|
1121
|
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
sub _xml_parser_encodings |
|
1123
|
|
|
|
|
|
|
{ my @encodings=( 'iso-8859-1'); # this one is included by default, there is no map for it in @INC |
|
1124
|
|
|
|
|
|
|
foreach my $inc (@INC) |
|
1125
|
|
|
|
|
|
|
{ push @encodings, map { basename( $_, '.enc') } glob( File::Spec->catdir( $inc => XML => Parser => Encodings => '*.enc')); } |
|
1126
|
|
|
|
|
|
|
return map { $_ => 1 } @encodings; |
|
1127
|
|
|
|
|
|
|
} |
|
1128
|
|
|
|
|
|
|
} |
|
1129
|
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
sub _unescape_cdata |
|
1132
|
|
|
|
|
|
|
{ my( $cdata)= @_; |
|
1133
|
|
|
|
|
|
|
$cdata=~s{<}{<}g; |
|
1134
|
|
|
|
|
|
|
$cdata=~s{>}{>}g; |
|
1135
|
|
|
|
|
|
|
$cdata=~s{&}{&}g; |
|
1136
|
|
|
|
|
|
|
return $cdata; |
|
1137
|
|
|
|
|
|
|
} |
|
1138
|
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
sub _as_XML { |
|
1140
|
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
# fork of HTML::Element::as_XML, which is a little too buggy and inconsistent between versions for my liking |
|
1142
|
|
|
|
|
|
|
my ($elt) = @_; |
|
1143
|
|
|
|
|
|
|
my $xml= ''; |
|
1144
|
|
|
|
|
|
|
my $empty_element_map = $elt->_empty_element_map; |
|
1145
|
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
my ( $tag, $node, $start ); # per-iteration scratch |
|
1147
|
|
|
|
|
|
|
$elt->traverse( |
|
1148
|
|
|
|
|
|
|
sub { |
|
1149
|
|
|
|
|
|
|
( $node, $start ) = @_; |
|
1150
|
|
|
|
|
|
|
if ( ref $node ) |
|
1151
|
|
|
|
|
|
|
{ # it's an element |
|
1152
|
|
|
|
|
|
|
$tag = $node->{'_tag'}; |
|
1153
|
|
|
|
|
|
|
if ($start) |
|
1154
|
|
|
|
|
|
|
{ # on the way in |
|
1155
|
|
|
|
|
|
|
foreach my $att ( grep { ! m{^(_|/$)} } keys %$node ) |
|
1156
|
|
|
|
|
|
|
{ # fix attribute names instead of dying |
|
1157
|
|
|
|
|
|
|
my $new_att= $att; |
|
1158
|
|
|
|
|
|
|
if( $att=~ m{^\d}) { $new_att= "a$att"; } |
|
1159
|
|
|
|
|
|
|
$new_att=~ s{[^\w\d:_-]}{}g; |
|
1160
|
|
|
|
|
|
|
$new_att ||= 'a'; |
|
1161
|
|
|
|
|
|
|
if( $new_att ne $att) { $node->{$new_att}= delete $node->{$att}; } |
|
1162
|
|
|
|
|
|
|
} |
|
1163
|
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
if ( $empty_element_map->{$tag} && (!@{ $node->{'_content'} || []}) ) |
|
1165
|
|
|
|
|
|
|
{ $xml.= $node->starttag_XML( undef, 1 ); } |
|
1166
|
|
|
|
|
|
|
else |
|
1167
|
|
|
|
|
|
|
{ $xml.= $node->starttag_XML(undef); } |
|
1168
|
|
|
|
|
|
|
} |
|
1169
|
|
|
|
|
|
|
else |
|
1170
|
|
|
|
|
|
|
{ # on the way out |
|
1171
|
|
|
|
|
|
|
unless ( $empty_element_map->{$tag} and !@{ $node->{'_content'} || [] } ) |
|
1172
|
|
|
|
|
|
|
{ $xml.= $node->endtag_XML(); |
|
1173
|
|
|
|
|
|
|
} # otherwise it will have been an <... /> tag. |
|
1174
|
|
|
|
|
|
|
} |
|
1175
|
|
|
|
|
|
|
} |
|
1176
|
|
|
|
|
|
|
elsif( $node=~ /
|
|
1177
|
|
|
|
|
|
|
{ foreach my $chunk (split /()/s, $node) # chunks are CDATA sections or normal text |
|
1178
|
|
|
|
|
|
|
{ $xml.= $chunk =~ m{
|
|
1179
|
|
|
|
|
|
|
} |
|
1180
|
|
|
|
|
|
|
else # it's just text |
|
1181
|
|
|
|
|
|
|
{ $xml .= _xml_escape($node); } |
|
1182
|
|
|
|
|
|
|
1; # keep traversing |
|
1183
|
|
|
|
|
|
|
} |
|
1184
|
|
|
|
|
|
|
); |
|
1185
|
|
|
|
|
|
|
return $xml; |
|
1186
|
|
|
|
|
|
|
} |
|
1187
|
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
sub _xml_escape |
|
1189
|
|
|
|
|
|
|
{ my( $html)= @_; |
|
1190
|
|
|
|
|
|
|
$html =~ s{&(?! # An ampersand that isn't followed by... |
|
1191
|
|
|
|
|
|
|
( \#[0-9]+; | # A hash mark, digits and semicolon, or |
|
1192
|
|
|
|
|
|
|
\#x[0-9a-fA-F]+; | # A hash mark, "x", hex digits and semicolon, or |
|
1193
|
|
|
|
|
|
|
[\w]+; # A valid unicode entity name and semicolon |
|
1194
|
|
|
|
|
|
|
) |
|
1195
|
|
|
|
|
|
|
) |
|
1196
|
|
|
|
|
|
|
} |
|
1197
|
|
|
|
|
|
|
{&}gx if 0; # Needs to be escaped to amp |
|
1198
|
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
$html=~ s{&}{&}g; |
|
1200
|
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
# in old versions of HTML::TreeBuilder & can come out as &Amp; |
|
1202
|
|
|
|
|
|
|
if( $HTML::TreeBuilder::VERSION && $HTML::TreeBuilder::VERSION <= 3.23) { $html=~ s{&Amp;}{&}g; } |
|
1203
|
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
# simple character escapes |
|
1205
|
|
|
|
|
|
|
$html =~ s/</g; |
|
1206
|
|
|
|
|
|
|
$html =~ s/>/>/g; |
|
1207
|
|
|
|
|
|
|
$html =~ s/"/"/g; |
|
1208
|
|
|
|
|
|
|
$html =~ s/'/'/g; |
|
1209
|
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
return $html; |
|
1211
|
|
|
|
|
|
|
} |
|
1212
|
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
|
|
1216
|
|
|
|
|
|
|
sub _check_xml |
|
1217
|
|
|
|
|
|
|
{ my( $xml)= @_; # $xml is a ref to the xml string |
|
1218
|
|
|
|
|
|
|
my $ok= eval { XML::Parser->new->parse( $$xml); }; |
|
1219
|
|
|
|
|
|
|
#if( $ok) { warn " parse OK\n"; } |
|
1220
|
|
|
|
|
|
|
return $ok; |
|
1221
|
|
|
|
|
|
|
} |
|
1222
|
|
|
|
|
|
|
|
|
1223
|
|
|
|
|
|
|
sub _encoding_from_meta |
|
1224
|
|
|
|
|
|
|
{ my( $tree)= @_; |
|
1225
|
|
|
|
|
|
|
my $enc="iso-8859-1"; |
|
1226
|
|
|
|
|
|
|
my @meta= $tree->find( 'meta'); |
|
1227
|
|
|
|
|
|
|
foreach my $meta (@meta) |
|
1228
|
|
|
|
|
|
|
{ if( $meta->{'http-equiv'} && ($meta->{'http-equiv'} =~ m{^\s*content-type\s*}i) |
|
1229
|
|
|
|
|
|
|
&& $meta->{content} && ($meta->{content} =~ m{^\s*text/html\s*;\s*charset\s*=\s*(\S*)\s*}i) |
|
1230
|
|
|
|
|
|
|
) |
|
1231
|
|
|
|
|
|
|
{ $enc= lc $1; |
|
1232
|
|
|
|
|
|
|
#warn " encoding from meta tag is '$enc'\n"; |
|
1233
|
|
|
|
|
|
|
last; |
|
1234
|
|
|
|
|
|
|
} |
|
1235
|
|
|
|
|
|
|
} |
|
1236
|
|
|
|
|
|
|
return $enc; |
|
1237
|
|
|
|
|
|
|
} |
|
1238
|
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
{ sub _to_utf8 |
|
1240
|
|
|
|
|
|
|
{ my( $encoding, $string)= @_; |
|
1241
|
|
|
|
|
|
|
local $SIG{__DIE__}; |
|
1242
|
|
|
|
|
|
|
if( _use( 'Encode')) |
|
1243
|
|
|
|
|
|
|
{ Encode::from_to( $string, $encoding => 'utf8', 0x0400); } # 0x0400 is Encode::FB_XMLCREF |
|
1244
|
|
|
|
|
|
|
elsif( _use( 'Text::Iconv')) |
|
1245
|
|
|
|
|
|
|
{ my $converter = eval { Text::Iconv->new( $encoding => "utf8") }; |
|
1246
|
|
|
|
|
|
|
if( $converter) { $string= $converter->convert( $string); } |
|
1247
|
|
|
|
|
|
|
} |
|
1248
|
|
|
|
|
|
|
elsif( _use( 'Unicode::Map8') && _use( 'Unicode::String')) |
|
1249
|
|
|
|
|
|
|
{ my $map= Unicode::Map8->new( $encoding); |
|
1250
|
|
|
|
|
|
|
$string= $map->tou( $string)->utf8; |
|
1251
|
|
|
|
|
|
|
} |
|
1252
|
|
|
|
|
|
|
$string=~ s{[\x00-\x08\x0B\x0C\x0E-\x1F]}{}g; # get rid of control chars, portable in 5.6 |
|
1253
|
|
|
|
|
|
|
return $string; |
|
1254
|
|
|
|
|
|
|
} |
|
1255
|
|
|
|
|
|
|
} |
|
1256
|
|
|
|
|
|
|
|
|
1257
|
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
sub _indent_xhtml |
|
1259
|
|
|
|
|
|
|
{ my( $xhtml)= @_; # $xhtml is a ref |
|
1260
|
|
|
|
|
|
|
my %block_tag= map { $_ => 1 } qw( html |
|
1261
|
|
|
|
|
|
|
head |
|
1262
|
|
|
|
|
|
|
meta title link script base |
|
1263
|
|
|
|
|
|
|
body |
|
1264
|
|
|
|
|
|
|
h1 h2 h3 h4 h5 h6 |
|
1265
|
|
|
|
|
|
|
p br address blockquote pre |
|
1266
|
|
|
|
|
|
|
ol ul li dd dl dt |
|
1267
|
|
|
|
|
|
|
table tr td th tbody tfoot thead col colgroup caption |
|
1268
|
|
|
|
|
|
|
div frame frameset hr |
|
1269
|
|
|
|
|
|
|
); |
|
1270
|
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
my $level=0; |
|
1272
|
|
|
|
|
|
|
$$xhtml=~ s{( (?:|[CDATA[.*?]]>)) # ignore comments and CDATA sections |
|
1273
|
|
|
|
|
|
|
| <(\w+)((?:\s+\w+\s*=\s*(?:"[^"]*"|'[^']*'))*\s*/>) # empty tag |
|
1274
|
|
|
|
|
|
|
| <(\w+) # start tag |
|
1275
|
|
|
|
|
|
|
|(\w+) # end tag |
|
1276
|
|
|
|
|
|
|
) |
|
1277
|
|
|
|
|
|
|
} |
|
1278
|
|
|
|
|
|
|
{ if( $2 && $block_tag{$2}) { my $indent= " " x $level; |
|
1279
|
|
|
|
|
|
|
"\n$indent<$2$3"; |
|
1280
|
|
|
|
|
|
|
} |
|
1281
|
|
|
|
|
|
|
elsif( $4 && $block_tag{$4}) { my $indent= " " x $level; |
|
1282
|
|
|
|
|
|
|
$level++ unless( $4=~ m{/>}); |
|
1283
|
|
|
|
|
|
|
my $nl= $4 eq 'html' ? '' : "\n"; |
|
1284
|
|
|
|
|
|
|
"$nl$indent<$4"; |
|
1285
|
|
|
|
|
|
|
} |
|
1286
|
|
|
|
|
|
|
elsif( $5 && $block_tag{$5}) { $level--; "$5"; } |
|
1287
|
|
|
|
|
|
|
else { $1; } |
|
1288
|
|
|
|
|
|
|
}xesg; |
|
1289
|
|
|
|
|
|
|
} |
|
1290
|
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
sub add_stylesheet |
|
1293
|
|
|
|
|
|
|
{ my( $t, $type, $href)= @_; |
|
1294
|
|
|
|
|
|
|
my %text_type= map { $_ => 1 } qw( xsl css); |
|
1295
|
|
|
|
|
|
|
my $ss= $t->{twig_elt_class}->new( $PI); |
|
1296
|
|
|
|
|
|
|
if( $text_type{$type}) |
|
1297
|
|
|
|
|
|
|
{ $ss->_set_pi( 'xml-stylesheet', qq{type="text/$type" href="$href"}); } |
|
1298
|
|
|
|
|
|
|
else |
|
1299
|
|
|
|
|
|
|
{ croak "unsupported style sheet type '$type'"; } |
|
1300
|
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
$t->_add_cpi_outside_of_root( leading_cpi => $ss); |
|
1302
|
|
|
|
|
|
|
return $t; |
|
1303
|
|
|
|
|
|
|
} |
|
1304
|
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
{ my %used; # module => 1 if require ok, 0 otherwise |
|
1306
|
|
|
|
|
|
|
my %disallowed; # for testing, refuses to _use modules in this hash |
|
1307
|
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
sub _disallow_use ## no critic (Subroutines::ProhibitNestedSubs); |
|
1309
|
|
|
|
|
|
|
{ my( @modules)= @_; |
|
1310
|
|
|
|
|
|
|
$disallowed{$_}= 1 foreach (@modules); |
|
1311
|
|
|
|
|
|
|
} |
|
1312
|
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
sub _allow_use ## no critic (Subroutines::ProhibitNestedSubs); |
|
1314
|
|
|
|
|
|
|
{ my( @modules)= @_; |
|
1315
|
|
|
|
|
|
|
$disallowed{$_}= 0 foreach (@modules); |
|
1316
|
|
|
|
|
|
|
} |
|
1317
|
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
sub _use ## no critic (Subroutines::ProhibitNestedSubs); |
|
1319
|
|
|
|
|
|
|
{ my( $module, $version)= @_; |
|
1320
|
|
|
|
|
|
|
$version ||= 0; |
|
1321
|
|
|
|
|
|
|
if( $disallowed{$module}) { return 0; } |
|
1322
|
|
|
|
|
|
|
if( $used{$module}) { return 1; } |
|
1323
|
|
|
|
|
|
|
if( eval "require $module") { import $module; $used{$module}= 1; # no critic ProhibitStringyEval |
|
1324
|
|
|
|
|
|
|
if( $version) |
|
1325
|
|
|
|
|
|
|
{ |
|
1326
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
|
1327
|
|
|
|
|
|
|
no strict 'refs'; |
|
1328
|
|
|
|
|
|
|
if( ${"${module}::VERSION"} >= $version ) { return 1; } |
|
1329
|
|
|
|
|
|
|
else { return 0; } |
|
1330
|
|
|
|
|
|
|
} |
|
1331
|
|
|
|
|
|
|
else |
|
1332
|
|
|
|
|
|
|
{ return 1; } |
|
1333
|
|
|
|
|
|
|
} |
|
1334
|
|
|
|
|
|
|
else { $used{$module}= 0; return 0; } |
|
1335
|
|
|
|
|
|
|
} |
|
1336
|
|
|
|
|
|
|
} |
|
1337
|
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
# used to solve the [n] predicates while avoiding getting the entire list |
|
1339
|
|
|
|
|
|
|
# needs a prototype to accept passing bare blocks |
|
1340
|
|
|
|
|
|
|
sub _first_n(&$@) ## no critic (Subroutines::ProhibitSubroutinePrototypes); |
|
1341
|
|
|
|
|
|
|
{ my $coderef= shift; |
|
1342
|
|
|
|
|
|
|
my $n= shift; |
|
1343
|
|
|
|
|
|
|
my $i=0; |
|
1344
|
|
|
|
|
|
|
if( $n > 0) |
|
1345
|
|
|
|
|
|
|
{ foreach (@_) { if( &$coderef) { $i++; return $_ if( $i == $n); } } } |
|
1346
|
|
|
|
|
|
|
elsif( $n < 0) |
|
1347
|
|
|
|
|
|
|
{ foreach (reverse @_) { if( &$coderef) { $i--; return $_ if( $i == $n); } } } |
|
1348
|
|
|
|
|
|
|
else |
|
1349
|
|
|
|
|
|
|
{ croak "illegal position number 0"; } |
|
1350
|
|
|
|
|
|
|
return undef; |
|
1351
|
|
|
|
|
|
|
} |
|
1352
|
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
sub _slurp_uri |
|
1354
|
|
|
|
|
|
|
{ my( $uri, $base)= @_; |
|
1355
|
|
|
|
|
|
|
if( $uri=~ m{^\w+://}) { _use( 'LWP::Simple'); return LWP::Simple::get( $uri); } |
|
1356
|
|
|
|
|
|
|
else { return _slurp( _based_filename( $uri, $base)); } |
|
1357
|
|
|
|
|
|
|
} |
|
1358
|
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
sub _based_filename |
|
1360
|
|
|
|
|
|
|
{ my( $filename, $base)= @_; |
|
1361
|
|
|
|
|
|
|
# cf. XML/Parser.pm's file_ext_ent_handler |
|
1362
|
|
|
|
|
|
|
if (defined($base) and not ($filename =~ m{^(?:[\\/]|\w+:)})) |
|
1363
|
|
|
|
|
|
|
{ my $newpath = $base; |
|
1364
|
|
|
|
|
|
|
$newpath =~ s{[^\\/:]*$}{$filename}; |
|
1365
|
|
|
|
|
|
|
$filename = $newpath; |
|
1366
|
|
|
|
|
|
|
} |
|
1367
|
|
|
|
|
|
|
return $filename; |
|
1368
|
|
|
|
|
|
|
} |
|
1369
|
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
sub _slurp |
|
1371
|
|
|
|
|
|
|
{ my( $filename)= @_; |
|
1372
|
|
|
|
|
|
|
my $to_slurp; |
|
1373
|
|
|
|
|
|
|
open( $to_slurp, "<$filename") or croak "cannot open '$filename': $!"; |
|
1374
|
|
|
|
|
|
|
local $/= undef; |
|
1375
|
|
|
|
|
|
|
my $content= <$to_slurp>; |
|
1376
|
|
|
|
|
|
|
close $to_slurp; |
|
1377
|
|
|
|
|
|
|
return $content; |
|
1378
|
|
|
|
|
|
|
} |
|
1379
|
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
sub _slurp_fh |
|
1381
|
|
|
|
|
|
|
{ my( $fh)= @_; |
|
1382
|
|
|
|
|
|
|
local $/= undef; |
|
1383
|
|
|
|
|
|
|
my $content= <$fh>; |
|
1384
|
|
|
|
|
|
|
return $content; |
|
1385
|
|
|
|
|
|
|
} |
|
1386
|
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
# I should really add extra options to allow better configuration of the |
|
1388
|
|
|
|
|
|
|
# LWP::UserAgent object |
|
1389
|
|
|
|
|
|
|
# this method forks (except on VMS!) |
|
1390
|
|
|
|
|
|
|
# - the child gets the data and copies it to the pipe, |
|
1391
|
|
|
|
|
|
|
# - the parent reads the stream and sends it to XML::Parser |
|
1392
|
|
|
|
|
|
|
# the data is cut it chunks the size of the XML::Parser::Expat buffer |
|
1393
|
|
|
|
|
|
|
# the method returns the twig and the status |
|
1394
|
|
|
|
|
|
|
sub _parseurl |
|
1395
|
|
|
|
|
|
|
{ my( $t, $safe, $url, $agent)= @_; |
|
1396
|
|
|
|
|
|
|
_use( 'LWP') || croak "LWP not available, needed to use parseurl methods"; |
|
1397
|
|
|
|
|
|
|
if( $^O ne 'VMS') |
|
1398
|
|
|
|
|
|
|
{ pipe( README, WRITEME) or croak "cannot create connected pipes: $!"; |
|
1399
|
|
|
|
|
|
|
if( my $pid= fork) |
|
1400
|
|
|
|
|
|
|
{ # parent code: parse the incoming file |
|
1401
|
|
|
|
|
|
|
close WRITEME; # no need to write |
|
1402
|
|
|
|
|
|
|
my $result= $safe ? $t->safe_parse( \*README) : $t->parse( \*README); |
|
1403
|
|
|
|
|
|
|
close README; |
|
1404
|
|
|
|
|
|
|
return $@ ? 0 : $t; |
|
1405
|
|
|
|
|
|
|
} |
|
1406
|
|
|
|
|
|
|
else |
|
1407
|
|
|
|
|
|
|
{ # child |
|
1408
|
|
|
|
|
|
|
close README; # no need to read |
|
1409
|
|
|
|
|
|
|
local $|=1; |
|
1410
|
|
|
|
|
|
|
$agent ||= LWP::UserAgent->new; |
|
1411
|
|
|
|
|
|
|
my $request = HTTP::Request->new( GET => $url); |
|
1412
|
|
|
|
|
|
|
# _pass_url_content is called with chunks of data the same size as |
|
1413
|
|
|
|
|
|
|
# the XML::Parser buffer |
|
1414
|
|
|
|
|
|
|
my $response = $agent->request( $request, |
|
1415
|
|
|
|
|
|
|
sub { _pass_url_content( \*WRITEME, @_); }, $BUFSIZE); |
|
1416
|
|
|
|
|
|
|
$response->is_success or croak "$url ", $response->message; |
|
1417
|
|
|
|
|
|
|
close WRITEME; |
|
1418
|
|
|
|
|
|
|
CORE::exit(); # CORE is there for mod_perl (which redefines exit) |
|
1419
|
|
|
|
|
|
|
} |
|
1420
|
|
|
|
|
|
|
} |
|
1421
|
|
|
|
|
|
|
else |
|
1422
|
|
|
|
|
|
|
{ # VMS branch (hard to test!) |
|
1423
|
|
|
|
|
|
|
local $|=1; |
|
1424
|
|
|
|
|
|
|
$agent ||= LWP::UserAgent->new; |
|
1425
|
|
|
|
|
|
|
my $request = HTTP::Request->new( GET => $url); |
|
1426
|
|
|
|
|
|
|
my $response = $agent->request( $request); |
|
1427
|
|
|
|
|
|
|
$response->is_success or croak "$url ", $response->message; |
|
1428
|
|
|
|
|
|
|
my $result= $safe ? $t->safe_parse($response->content) : $t->parse($response->content); |
|
1429
|
|
|
|
|
|
|
return $@ ? 0 : $t; |
|
1430
|
|
|
|
|
|
|
} |
|
1431
|
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
} |
|
1433
|
|
|
|
|
|
|
|
|
1434
|
|
|
|
|
|
|
# get the (hopefully!) XML data from the URL and |
|
1435
|
|
|
|
|
|
|
sub _pass_url_content |
|
1436
|
|
|
|
|
|
|
{ my( $fh, $data, $response, $protocol)= @_; |
|
1437
|
|
|
|
|
|
|
print {$fh} $data; |
|
1438
|
|
|
|
|
|
|
} |
|
1439
|
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
sub add_options |
|
1441
|
|
|
|
|
|
|
{ my %args= map { $_, 1 } @_; |
|
1442
|
|
|
|
|
|
|
%args= _normalize_args( %args); |
|
1443
|
|
|
|
|
|
|
foreach (keys %args) { $valid_option{$_}++; } |
|
1444
|
|
|
|
|
|
|
} |
|
1445
|
|
|
|
|
|
|
|
|
1446
|
|
|
|
|
|
|
sub _pretty_print_styles { return XML::Twig::Elt::_pretty_print_styles(); } |
|
1447
|
|
|
|
|
|
|
|
|
1448
|
|
|
|
|
|
|
sub _twig_store_internal_dtd |
|
1449
|
|
|
|
|
|
|
{ |
|
1450
|
|
|
|
|
|
|
# warn " in _twig_store_internal_dtd...\n"; # DEBUG handler |
|
1451
|
|
|
|
|
|
|
my( $p, $string)= @_; |
|
1452
|
|
|
|
|
|
|
my $t= $p->{twig}; |
|
1453
|
|
|
|
|
|
|
if( $t->{twig_keep_encoding}) { $string= $p->original_string(); } |
|
1454
|
|
|
|
|
|
|
$t->{twig_doctype}->{internal} .= $string; |
|
1455
|
|
|
|
|
|
|
return; |
|
1456
|
|
|
|
|
|
|
} |
|
1457
|
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
sub _twig_stop_storing_internal_dtd |
|
1459
|
|
|
|
|
|
|
{ # warn " in _twig_stop_storing_internal_dtd...\n"; # DEBUG handler |
|
1460
|
|
|
|
|
|
|
my $p= shift; |
|
1461
|
|
|
|
|
|
|
if( @saved_default_handler && defined $saved_default_handler[1]) |
|
1462
|
|
|
|
|
|
|
{ $p->setHandlers( @saved_default_handler); } |
|
1463
|
|
|
|
|
|
|
else |
|
1464
|
|
|
|
|
|
|
{ |
|
1465
|
|
|
|
|
|
|
$p->setHandlers( Default => undef); |
|
1466
|
|
|
|
|
|
|
} |
|
1467
|
|
|
|
|
|
|
$p->{twig}->{twig_doctype}->{internal}=~ s{^\s*\[}{}; |
|
1468
|
|
|
|
|
|
|
$p->{twig}->{twig_doctype}->{internal}=~ s{\]\s*$}{}; |
|
1469
|
|
|
|
|
|
|
return; |
|
1470
|
|
|
|
|
|
|
} |
|
1471
|
|
|
|
|
|
|
|
|
1472
|
|
|
|
|
|
|
sub _twig_doctype_fin_print |
|
1473
|
|
|
|
|
|
|
{ # warn " in _twig_doctype_fin_print...\n"; # DEBUG handler |
|
1474
|
|
|
|
|
|
|
my( $p)= shift; |
|
1475
|
|
|
|
|
|
|
if( $p->{twig}->{twig_doctype}->{has_internal} && !$expat_1_95_2) { print ' ]>'; } |
|
1476
|
|
|
|
|
|
|
return; |
|
1477
|
|
|
|
|
|
|
} |
|
1478
|
|
|
|
|
|
|
|
|
1479
|
|
|
|
|
|
|
|
|
1480
|
|
|
|
|
|
|
sub _normalize_args |
|
1481
|
|
|
|
|
|
|
{ my %normalized_args; |
|
1482
|
|
|
|
|
|
|
while( my $key= shift ) |
|
1483
|
|
|
|
|
|
|
{ $key= join '', map { ucfirst } split /_/, $key; |
|
1484
|
|
|
|
|
|
|
#$key= "Twig".$key unless( substr( $key, 0, 4) eq 'Twig'); |
|
1485
|
|
|
|
|
|
|
$normalized_args{$key}= shift ; |
|
1486
|
|
|
|
|
|
|
} |
|
1487
|
|
|
|
|
|
|
return %normalized_args; |
|
1488
|
|
|
|
|
|
|
} |
|
1489
|
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
sub _is_fh { return unless $_[0]; return $_[0] if( isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar')); } |
|
1491
|
|
|
|
|
|
|
|
|
1492
|
|
|
|
|
|
|
sub _set_handler |
|
1493
|
|
|
|
|
|
|
{ my( $handlers, $whole_path, $handler)= @_; |
|
1494
|
|
|
|
|
|
|
|
|
1495
|
|
|
|
|
|
|
my $H_SPECIAL = qr{($ALL|$DEFAULT|$COMMENT|$TEXT)}; |
|
1496
|
|
|
|
|
|
|
my $H_PI = qr{(\?|$PI)\s*(([^\s]*)\s*)}; |
|
1497
|
|
|
|
|
|
|
my $H_LEVEL = qr{level \s* \( \s* ([0-9]+) \s* \)}x; |
|
1498
|
|
|
|
|
|
|
my $H_REGEXP = qr{\(\?([\^xism]*)(-[\^xism]*)?:(.*)\)}x; |
|
1499
|
|
|
|
|
|
|
my $H_XPATH = qr{(/?/?$REG_TAG_PART? \s* ($REG_PREDICATE\s*)?)+}x; |
|
1500
|
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
my $prev_handler; |
|
1502
|
|
|
|
|
|
|
|
|
1503
|
|
|
|
|
|
|
my $cpath= $whole_path; |
|
1504
|
|
|
|
|
|
|
#warn "\$cpath: '$cpath\n"; |
|
1505
|
|
|
|
|
|
|
while( $cpath && $cpath=~ s{^\s*($H_SPECIAL|$H_PI|$H_LEVEL|$H_REGEXP|$H_XPATH)\s*($|\|)}{}) |
|
1506
|
|
|
|
|
|
|
{ my $path= $1; |
|
1507
|
|
|
|
|
|
|
#warn "\$cpath: '$cpath' - $path: '$path'\n"; |
|
1508
|
|
|
|
|
|
|
$prev_handler ||= $handlers->{handlers}->{string}->{$path} || undef; # $prev_handler gets the first found handler |
|
1509
|
|
|
|
|
|
|
|
|
1510
|
|
|
|
|
|
|
_set_special_handler ( $handlers, $path, $handler, $prev_handler) |
|
1511
|
|
|
|
|
|
|
|| _set_pi_handler ( $handlers, $path, $handler, $prev_handler) |
|
1512
|
|
|
|
|
|
|
|| _set_level_handler ( $handlers, $path, $handler, $prev_handler) |
|
1513
|
|
|
|
|
|
|
|| _set_regexp_handler ( $handlers, $path, $handler, $prev_handler) |
|
1514
|
|
|
|
|
|
|
|| _set_xpath_handler ( $handlers, $path, $handler, $prev_handler) |
|
1515
|
|
|
|
|
|
|
|| croak "unrecognized expression in handler: '$whole_path'"; |
|
1516
|
|
|
|
|
|
|
|
|
1517
|
|
|
|
|
|
|
# this both takes care of the simple (gi) handlers and store |
|
1518
|
|
|
|
|
|
|
# the handler code reference for other handlers |
|
1519
|
|
|
|
|
|
|
$handlers->{handlers}->{string}->{$path}= $handler; |
|
1520
|
|
|
|
|
|
|
} |
|
1521
|
|
|
|
|
|
|
|
|
1522
|
|
|
|
|
|
|
if( $cpath) { croak "unrecognized expression in handler: '$whole_path'"; } |
|
1523
|
|
|
|
|
|
|
|
|
1524
|
|
|
|
|
|
|
return $prev_handler; |
|
1525
|
|
|
|
|
|
|
} |
|
1526
|
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
sub _set_special_handler |
|
1529
|
|
|
|
|
|
|
{ my( $handlers, $path, $handler, $prev_handler)= @_; |
|
1530
|
|
|
|
|
|
|
if( $path =~ m{^\s*($ALL|$DEFAULT|$COMMENT|$TEXT)\s*$}io ) |
|
1531
|
|
|
|
|
|
|
{ $handlers->{handlers}->{$1}= $handler; |
|
1532
|
|
|
|
|
|
|
return 1; |
|
1533
|
|
|
|
|
|
|
} |
|
1534
|
|
|
|
|
|
|
else |
|
1535
|
|
|
|
|
|
|
{ return 0; } |
|
1536
|
|
|
|
|
|
|
} |
|
1537
|
|
|
|
|
|
|
|
|
1538
|
|
|
|
|
|
|
sub _set_xpath_handler |
|
1539
|
|
|
|
|
|
|
{ my( $handlers, $path, $handler, $prev_handler)= @_; |
|
1540
|
|
|
|
|
|
|
if( my $handler_data= _parse_xpath_handler( $path, $handler)) |
|
1541
|
|
|
|
|
|
|
{ _add_handler( $handlers, $handler_data, $path, $prev_handler); |
|
1542
|
|
|
|
|
|
|
return 1; |
|
1543
|
|
|
|
|
|
|
} |
|
1544
|
|
|
|
|
|
|
else |
|
1545
|
|
|
|
|
|
|
{ return 0; } |
|
1546
|
|
|
|
|
|
|
} |
|
1547
|
|
|
|
|
|
|
|
|
1548
|
|
|
|
|
|
|
sub _add_handler |
|
1549
|
|
|
|
|
|
|
{ my( $handlers, $handler_data, $path, $prev_handler)= @_; |
|
1550
|
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
my $tag= $handler_data->{tag}; |
|
1552
|
|
|
|
|
|
|
my @handlers= $handlers->{xpath_handler}->{$tag} ? @{$handlers->{xpath_handler}->{$tag}} : (); |
|
1553
|
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
if( $prev_handler) { @handlers= grep { $_->{path} ne $path } @handlers; } |
|
1555
|
|
|
|
|
|
|
|
|
1556
|
|
|
|
|
|
|
push @handlers, $handler_data if( $handler_data->{handler}); |
|
1557
|
|
|
|
|
|
|
|
|
1558
|
|
|
|
|
|
|
if( @handlers > 1) |
|
1559
|
|
|
|
|
|
|
{ @handlers= sort { (($b->{score}->{type} || 0) <=> ($a->{score}->{type} || 0)) |
|
1560
|
|
|
|
|
|
|
|| (($b->{score}->{anchored} || 0) <=> ($a->{score}->{anchored} || 0)) |
|
1561
|
|
|
|
|
|
|
|| (($b->{score}->{steps} || 0) <=> ($a->{score}->{steps} || 0)) |
|
1562
|
|
|
|
|
|
|
|| (($b->{score}->{predicates} || 0) <=> ($a->{score}->{predicates} || 0)) |
|
1563
|
|
|
|
|
|
|
|| (($b->{score}->{tests} || 0) <=> ($a->{score}->{tests} || 0)) |
|
1564
|
|
|
|
|
|
|
|| ($a->{path} cmp $b->{path}) |
|
1565
|
|
|
|
|
|
|
} @handlers; |
|
1566
|
|
|
|
|
|
|
} |
|
1567
|
|
|
|
|
|
|
|
|
1568
|
|
|
|
|
|
|
$handlers->{xpath_handler}->{$tag}= \@handlers; |
|
1569
|
|
|
|
|
|
|
} |
|
1570
|
|
|
|
|
|
|
|
|
1571
|
|
|
|
|
|
|
sub _set_pi_handler |
|
1572
|
|
|
|
|
|
|
{ my( $handlers, $path, $handler, $prev_handler)= @_; |
|
1573
|
|
|
|
|
|
|
# PI conditions ( '?target' => \&handler or '?' => \&handler |
|
1574
|
|
|
|
|
|
|
# or '#PItarget' => \&handler or '#PI' => \&handler) |
|
1575
|
|
|
|
|
|
|
if( $path=~ /^\s*(?:\?|$PI)\s*(?:([^\s]*)\s*)$/) |
|
1576
|
|
|
|
|
|
|
{ my $target= $1 || ''; |
|
1577
|
|
|
|
|
|
|
# update the path_handlers count, knowing that |
|
1578
|
|
|
|
|
|
|
# either the previous or the new handler can be undef |
|
1579
|
|
|
|
|
|
|
$handlers->{pi_handlers}->{$1}= $handler; |
|
1580
|
|
|
|
|
|
|
return 1; |
|
1581
|
|
|
|
|
|
|
} |
|
1582
|
|
|
|
|
|
|
else |
|
1583
|
|
|
|
|
|
|
{ return 0; |
|
1584
|
|
|
|
|
|
|
} |
|
1585
|
|
|
|
|
|
|
} |
|
1586
|
|
|
|
|
|
|
|
|
1587
|
|
|
|
|
|
|
sub _set_level_handler |
|
1588
|
|
|
|
|
|
|
{ my( $handlers, $path, $handler, $prev_handler)= @_; |
|
1589
|
|
|
|
|
|
|
if( $path =~ m{^ \s* level \s* \( \s* ([0-9]+) \s* \) \s* $}ox ) |
|
1590
|
|
|
|
|
|
|
{ my $level= $1; |
|
1591
|
|
|
|
|
|
|
my $sub= sub { my( $stack)= @_; return( ($stack->[-1]->{$ST_TAG} !~ m{^#}) && (scalar @$stack == $level + 1) ) }; |
|
1592
|
|
|
|
|
|
|
my $handler_data= { tag=> '*', score => { type => $LEVEL_TRIGGER}, trigger => $sub, |
|
1593
|
|
|
|
|
|
|
path => $path, handler => $handler, test_on_text => 0 |
|
1594
|
|
|
|
|
|
|
}; |
|
1595
|
|
|
|
|
|
|
_add_handler( $handlers, $handler_data, $path, $prev_handler); |
|
1596
|
|
|
|
|
|
|
return 1; |
|
1597
|
|
|
|
|
|
|
} |
|
1598
|
|
|
|
|
|
|
else |
|
1599
|
|
|
|
|
|
|
{ return 0; } |
|
1600
|
|
|
|
|
|
|
} |
|
1601
|
|
|
|
|
|
|
|
|
1602
|
|
|
|
|
|
|
sub _set_regexp_handler |
|
1603
|
|
|
|
|
|
|
{ my( $handlers, $path, $handler, $prev_handler)= @_; |
|
1604
|
|
|
|
|
|
|
# if the expression was a regexp it is now a string (it was stringified when it became a hash key) |
|
1605
|
|
|
|
|
|
|
if( $path=~ m{^\(\?([\^xism]*)(?:-[\^xism]*)?:(.*)\)$}) |
|
1606
|
|
|
|
|
|
|
{ my $regexp= qr/(?$1:$2)/; # convert it back into a regexp |
|
1607
|
|
|
|
|
|
|
my $sub= sub { my( $stack)= @_; return( $stack->[-1]->{$ST_TAG} =~ $regexp ) }; |
|
1608
|
|
|
|
|
|
|
my $handler_data= { tag=> '*', score => { type => $REGEXP_TRIGGER} , trigger => $sub, |
|
1609
|
|
|
|
|
|
|
path => $path, handler => $handler, test_on_text => 0 |
|
1610
|
|
|
|
|
|
|
}; |
|
1611
|
|
|
|
|
|
|
_add_handler( $handlers, $handler_data, $path, $prev_handler); |
|
1612
|
|
|
|
|
|
|
return 1; |
|
1613
|
|
|
|
|
|
|
} |
|
1614
|
|
|
|
|
|
|
else |
|
1615
|
|
|
|
|
|
|
{ return 0; } |
|
1616
|
|
|
|
|
|
|
} |
|
1617
|
|
|
|
|
|
|
|
|
1618
|
|
|
|
|
|
|
my $DEBUG_HANDLER= 0; # 0 or 1 (output the handler checking code) or 2 (super verbose) |
|
1619
|
|
|
|
|
|
|
my $handler_string; # store the handler itself |
|
1620
|
|
|
|
|
|
|
sub _set_debug_handler { $DEBUG_HANDLER= shift; } |
|
1621
|
|
|
|
|
|
|
sub _warn_debug_handler { if( $DEBUG_HANDLER < 3) { warn @_; } else { $handler_string .= join( '', @_); } } |
|
1622
|
|
|
|
|
|
|
sub _return_debug_handler { my $string= $handler_string; $handler_string=''; return $string; } |
|
1623
|
|
|
|
|
|
|
|
|
1624
|
|
|
|
|
|
|
sub _parse_xpath_handler |
|
1625
|
|
|
|
|
|
|
{ my( $xpath, $handler)= @_; |
|
1626
|
|
|
|
|
|
|
my $xpath_original= $xpath; |
|
1627
|
|
|
|
|
|
|
|
|
1628
|
|
|
|
|
|
|
|
|
1629
|
|
|
|
|
|
|
if( $DEBUG_HANDLER >=1) { _warn_debug_handler( "\n\nparsing path '$xpath'\n"); } |
|
1630
|
|
|
|
|
|
|
|
|
1631
|
|
|
|
|
|
|
my $path_to_check= $xpath; |
|
1632
|
|
|
|
|
|
|
$path_to_check=~ s{/?/?$REG_TAG_PART?\s*(?:$REG_PREDICATE\s*)?}{}g; |
|
1633
|
|
|
|
|
|
|
if( $DEBUG_HANDLER && $path_to_check=~ /\S/) { _warn_debug_handler( "left: $path_to_check\n"); } |
|
1634
|
|
|
|
|
|
|
return if( $path_to_check=~ /\S/); |
|
1635
|
|
|
|
|
|
|
|
|
1636
|
|
|
|
|
|
|
(my $xpath_to_display= $xpath)=~ s{(["{}'\[\]\@\$])}{\\$1}g; |
|
1637
|
|
|
|
|
|
|
|
|
1638
|
|
|
|
|
|
|
my @xpath_steps; |
|
1639
|
|
|
|
|
|
|
my $last_token_is_sep; |
|
1640
|
|
|
|
|
|
|
|
|
1641
|
|
|
|
|
|
|
while( $xpath=~ s{^\s* |
|
1642
|
|
|
|
|
|
|
( (//?) # separator |
|
1643
|
|
|
|
|
|
|
| (?:$REG_TAG_PART\s*(?:$REG_PREDICATE\s*)?) # tag name and optional predicate |
|
1644
|
|
|
|
|
|
|
| (?:$REG_PREDICATE) # just a predicate |
|
1645
|
|
|
|
|
|
|
) |
|
1646
|
|
|
|
|
|
|
} |
|
1647
|
|
|
|
|
|
|
{}x |
|
1648
|
|
|
|
|
|
|
) |
|
1649
|
|
|
|
|
|
|
{ # check that we have alternating separators and steps |
|
1650
|
|
|
|
|
|
|
if( $2) # found a separator |
|
1651
|
|
|
|
|
|
|
{ if( $last_token_is_sep) { return 0; } # 2 separators in a row |
|
1652
|
|
|
|
|
|
|
$last_token_is_sep= 1; |
|
1653
|
|
|
|
|
|
|
} |
|
1654
|
|
|
|
|
|
|
else |
|
1655
|
|
|
|
|
|
|
{ if( defined( $last_token_is_sep) && !$last_token_is_sep) { return 0; } # 2 steps in a row |
|
1656
|
|
|
|
|
|
|
$last_token_is_sep= 0; |
|
1657
|
|
|
|
|
|
|
} |
|
1658
|
|
|
|
|
|
|
|
|
1659
|
|
|
|
|
|
|
push @xpath_steps, $1; |
|
1660
|
|
|
|
|
|
|
} |
|
1661
|
|
|
|
|
|
|
if( $last_token_is_sep) { return 0; } # expression cannot end with a separator |
|
1662
|
|
|
|
|
|
|
|
|
1663
|
|
|
|
|
|
|
my $i=-1; |
|
1664
|
|
|
|
|
|
|
|
|
1665
|
|
|
|
|
|
|
my $perlfunc= _join_n( $NO_WARNINGS . ';', |
|
1666
|
|
|
|
|
|
|
q|my( $stack)= @_; |, |
|
1667
|
|
|
|
|
|
|
q|my @current_elts= (scalar @$stack); |, |
|
1668
|
|
|
|
|
|
|
q|my @new_current_elts; |, |
|
1669
|
|
|
|
|
|
|
q|my $elt; |, |
|
1670
|
|
|
|
|
|
|
($DEBUG_HANDLER >= 1) && (qq#warn q{checking path '$xpath_to_display'\n};#), |
|
1671
|
|
|
|
|
|
|
); |
|
1672
|
|
|
|
|
|
|
|
|
1673
|
|
|
|
|
|
|
|
|
1674
|
|
|
|
|
|
|
my $last_tag=''; |
|
1675
|
|
|
|
|
|
|
my $anchored= $xpath_original=~ m{^\s*/(?!/)} ? 1 : 0; |
|
1676
|
|
|
|
|
|
|
my $score={ type => $XPATH_TRIGGER, anchored => $anchored }; |
|
1677
|
|
|
|
|
|
|
my $flag= { test_on_text => 0 }; |
|
1678
|
|
|
|
|
|
|
my $sep='/'; # '/' or '//' |
|
1679
|
|
|
|
|
|
|
while( my $xpath_step= pop @xpath_steps) |
|
1680
|
|
|
|
|
|
|
{ my( $tag, $predicate)= $xpath_step =~ m{^($REG_TAG_PART)?(?:\[(.*)\])?\s*$}; |
|
1681
|
|
|
|
|
|
|
$score->{steps}++; |
|
1682
|
|
|
|
|
|
|
$tag||='*'; |
|
1683
|
|
|
|
|
|
|
|
|
1684
|
|
|
|
|
|
|
my $warn_empty_stack= $DEBUG_HANDLER >= 2 ? qq{warn "return with empty stack\\n";} : ''; |
|
1685
|
|
|
|
|
|
|
|
|
1686
|
|
|
|
|
|
|
if( $predicate) |
|
1687
|
|
|
|
|
|
|
{ if( $DEBUG_HANDLER >= 2) { _warn_debug_handler( "predicate is: '$predicate'\n"); } |
|
1688
|
|
|
|
|
|
|
# changes $predicate (from an XPath expression to a Perl one) |
|
1689
|
|
|
|
|
|
|
if( $predicate=~ m{^\s*$REG_NUMBER\s*$}) { croak "position selector [$predicate] not supported on twig_handlers"; } |
|
1690
|
|
|
|
|
|
|
_parse_predicate_in_handler( $predicate, $flag, $score); |
|
1691
|
|
|
|
|
|
|
if( $DEBUG_HANDLER >= 2) { _warn_debug_handler( "predicate becomes: '$predicate'\n"); } |
|
1692
|
|
|
|
|
|
|
} |
|
1693
|
|
|
|
|
|
|
|
|
1694
|
|
|
|
|
|
|
my $tag_cond= _tag_cond( $tag); |
|
1695
|
|
|
|
|
|
|
my $cond= join( " && ", grep { $_ } $tag_cond, $predicate) || 1; |
|
1696
|
|
|
|
|
|
|
|
|
1697
|
|
|
|
|
|
|
if( $css_sel && $tag=~ m{\.}) { $tag=~s{\.[^.]*$}{}; $tag ||='*'; } |
|
1698
|
|
|
|
|
|
|
$tag=~ s{(.)#.+$}{$1}; |
|
1699
|
|
|
|
|
|
|
|
|
1700
|
|
|
|
|
|
|
$last_tag ||= $tag; |
|
1701
|
|
|
|
|
|
|
|
|
1702
|
|
|
|
|
|
|
if( $sep eq '/') |
|
1703
|
|
|
|
|
|
|
{ |
|
1704
|
|
|
|
|
|
|
$perlfunc .= sprintf( _join_n( q#foreach my $current_elt (@current_elts) #, |
|
1705
|
|
|
|
|
|
|
q# { next if( !$current_elt); #, |
|
1706
|
|
|
|
|
|
|
q# $current_elt--; #, |
|
1707
|
|
|
|
|
|
|
q# $elt= $stack->[$current_elt]; #, |
|
1708
|
|
|
|
|
|
|
q# if( %s) { push @new_current_elts, $current_elt;} #, |
|
1709
|
|
|
|
|
|
|
q# } #, |
|
1710
|
|
|
|
|
|
|
), |
|
1711
|
|
|
|
|
|
|
$cond |
|
1712
|
|
|
|
|
|
|
); |
|
1713
|
|
|
|
|
|
|
} |
|
1714
|
|
|
|
|
|
|
elsif( $sep eq '//') |
|
1715
|
|
|
|
|
|
|
{ |
|
1716
|
|
|
|
|
|
|
$perlfunc .= sprintf( _join_n( q#foreach my $current_elt (@current_elts) #, |
|
1717
|
|
|
|
|
|
|
q# { next if( !$current_elt); #, |
|
1718
|
|
|
|
|
|
|
q# $current_elt--; #, |
|
1719
|
|
|
|
|
|
|
q# my $candidate= $current_elt; #, |
|
1720
|
|
|
|
|
|
|
q# while( $candidate >=0) #, |
|
1721
|
|
|
|
|
|
|
q# { $elt= $stack->[$candidate]; #, |
|
1722
|
|
|
|
|
|
|
q# if( %s) { push @new_current_elts, $candidate;} #, |
|
1723
|
|
|
|
|
|
|
q# $candidate--; #, |
|
1724
|
|
|
|
|
|
|
q# } #, |
|
1725
|
|
|
|
|
|
|
q# } #, |
|
1726
|
|
|
|
|
|
|
), |
|
1727
|
|
|
|
|
|
|
$cond |
|
1728
|
|
|
|
|
|
|
); |
|
1729
|
|
|
|
|
|
|
} |
|
1730
|
|
|
|
|
|
|
my $warn= $DEBUG_HANDLER >= 2 ? _join_n( qq#warn qq%fail at cond '$cond'%;#) : ''; |
|
1731
|
|
|
|
|
|
|
$perlfunc .= sprintf( _join_n( q#unless( @new_current_elts) { %s return 0; } #, |
|
1732
|
|
|
|
|
|
|
q#@current_elts= @new_current_elts; #, |
|
1733
|
|
|
|
|
|
|
q#@new_current_elts=(); #, |
|
1734
|
|
|
|
|
|
|
), |
|
1735
|
|
|
|
|
|
|
$warn |
|
1736
|
|
|
|
|
|
|
); |
|
1737
|
|
|
|
|
|
|
|
|
1738
|
|
|
|
|
|
|
$sep= pop @xpath_steps; |
|
1739
|
|
|
|
|
|
|
} |
|
1740
|
|
|
|
|
|
|
|
|
1741
|
|
|
|
|
|
|
if( $anchored) # there should be a better way, but this works |
|
1742
|
|
|
|
|
|
|
{ |
|
1743
|
|
|
|
|
|
|
my $warn= $DEBUG_HANDLER >= 2 ? _join_n( qq#warn qq{fail, stack not empty};#) : ''; |
|
1744
|
|
|
|
|
|
|
$perlfunc .= sprintf( _join_n( q#if( ! grep { $_ == 0 } @current_elts) { %s return 0;}#), $warn); |
|
1745
|
|
|
|
|
|
|
} |
|
1746
|
|
|
|
|
|
|
|
|
1747
|
|
|
|
|
|
|
$perlfunc.= qq{warn "handler for '$xpath_to_display' triggered\\n";\n} if( $DEBUG_HANDLER >=2); |
|
1748
|
|
|
|
|
|
|
$perlfunc.= qq{return q{$xpath_original};\n}; |
|
1749
|
|
|
|
|
|
|
_warn_debug_handler( "\nperlfunc:\n$perlfunc\n") if( $DEBUG_HANDLER>=1); |
|
1750
|
|
|
|
|
|
|
my $s= eval "sub { $perlfunc }"; |
|
1751
|
|
|
|
|
|
|
if( $@) |
|
1752
|
|
|
|
|
|
|
{ croak "wrong handler condition '$xpath' ($@);" } |
|
1753
|
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
_warn_debug_handler( "last tag: '$last_tag', test_on_text: '$flag->{test_on_text}'\n") if( $DEBUG_HANDLER >=1); |
|
1755
|
|
|
|
|
|
|
_warn_debug_handler( "score: ", join( ' ', map { "$_: $score->{$_}" } sort keys %$score), "\n") if( $DEBUG_HANDLER >=1); |
|
1756
|
|
|
|
|
|
|
return { tag=> $last_tag, score => $score, trigger => $s, path => $xpath_original, handler => $handler, test_on_text => $flag->{test_on_text} }; |
|
1757
|
|
|
|
|
|
|
} |
|
1758
|
|
|
|
|
|
|
|
|
1759
|
|
|
|
|
|
|
sub _join_n { return join( "\n", @_, ''); } |
|
1760
|
|
|
|
|
|
|
|
|
1761
|
|
|
|
|
|
|
# the "tag" part can be , . or # (where tag can be *, or start with # for hidden tags) |
|
1762
|
|
|
|
|
|
|
sub _tag_cond |
|
1763
|
|
|
|
|
|
|
{ my( $full_tag)= @_; |
|
1764
|
|
|
|
|
|
|
|
|
1765
|
|
|
|
|
|
|
my( $tag, $class, $id); |
|
1766
|
|
|
|
|
|
|
if( $full_tag=~ m{^(.+)#(.+)$}) |
|
1767
|
|
|
|
|
|
|
{ ($tag, $id)= ($1, $2); } # # |
|
1768
|
|
|
|
|
|
|
else |
|
1769
|
|
|
|
|
|
|
{ ( $tag, $class)= $css_sel ? $full_tag=~ m{^(.*?)(?:\.([^.]*))?$} : ($full_tag, undef); } |
|
1770
|
|
|
|
|
|
|
|
|
1771
|
|
|
|
|
|
|
my $tag_cond = $tag && $tag ne '*' ? qq#(\$elt->{'$ST_TAG'} eq "$tag")# : ''; |
|
1772
|
|
|
|
|
|
|
my $id_cond = defined $id ? qq#(\$elt->{id} eq "$id")# : ''; |
|
1773
|
|
|
|
|
|
|
my $class_cond = defined $class ? qq#(\$elt->{class}=~ m{(^| )$class( |\$)})# : ''; |
|
1774
|
|
|
|
|
|
|
|
|
1775
|
|
|
|
|
|
|
my $full_cond= join( ' && ', grep { $_ } ( $tag_cond, $class_cond, $id_cond)); |
|
1776
|
|
|
|
|
|
|
|
|
1777
|
|
|
|
|
|
|
return $full_cond; |
|
1778
|
|
|
|
|
|
|
} |
|
1779
|
|
|
|
|
|
|
|
|
1780
|
|
|
|
|
|
|
# input: the predicate ($_[0]) which will be changed in place |
|
1781
|
|
|
|
|
|
|
# flags, a hashref with various flags (like test_on_text) |
|
1782
|
|
|
|
|
|
|
# the score |
|
1783
|
|
|
|
|
|
|
sub _parse_predicate_in_handler |
|
1784
|
|
|
|
|
|
|
{ my( $flag, $score)= @_[1..2]; |
|
1785
|
|
|
|
|
|
|
$_[0]=~ s{( ($REG_STRING) # strings |
|
1786
|
|
|
|
|
|
|
|\@($REG_TAG_NAME)(\s* $REG_MATCH \s* $REG_REGEXP) # @att and regexp |
|
1787
|
|
|
|
|
|
|
|\@($REG_TAG_NAME)(?=\s*(?:[><=!])) # @att followed by a comparison operator |
|
1788
|
|
|
|
|
|
|
|\@($REG_TAG_NAME) # @att (not followed by a comparison operator) |
|
1789
|
|
|
|
|
|
|
|=~|!~ # matching operators |
|
1790
|
|
|
|
|
|
|
|([><]=?|=|!=)(?=\s*[\d+-]) # test before a number |
|
1791
|
|
|
|
|
|
|
|([><]=?|=|!=) # test, other cases |
|
1792
|
|
|
|
|
|
|
|($REG_FUNCTION) # no arg functions |
|
1793
|
|
|
|
|
|
|
# this bit is a mess, but it is the only solution with this half-baked parser |
|
1794
|
|
|
|
|
|
|
|(string\(\s*$REG_NAME\s*\)\s*$REG_MATCH\s*$REG_REGEXP) # string( child)=~ /regexp/ |
|
1795
|
|
|
|
|
|
|
|(string\(\s*$REG_NAME\s*\)\s*$REG_COMP\s*$REG_STRING) # string( child) = "value" (or other test) |
|
1796
|
|
|
|
|
|
|
|(string\(\s*$REG_NAME\s*\)\s*$REG_COMP\s*$REG_NUMBER) # string( child) = nb (or other test) |
|
1797
|
|
|
|
|
|
|
|(and|or) |
|
1798
|
|
|
|
|
|
|
# |($REG_NAME(?=\s*(and|or|$))) # nested tag name (needs to be after all other unquoted strings) |
|
1799
|
|
|
|
|
|
|
|($REG_TAG_IN_PREDICATE) # nested tag name (needs to be after all other unquoted strings) |
|
1800
|
|
|
|
|
|
|
|
|
1801
|
|
|
|
|
|
|
)} |
|
1802
|
|
|
|
|
|
|
{ my( $token, $str, $att_re_name, $att_re_regexp, $att, $bare_att, $num_test, $alpha_test, $func, $str_regexp, $str_test_alpha, $str_test_num, $and_or, $tag) |
|
1803
|
|
|
|
|
|
|
= ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14); |
|
1804
|
|
|
|
|
|
|
|
|
1805
|
|
|
|
|
|
|
$score->{predicates}++; |
|
1806
|
|
|
|
|
|
|
|
|
1807
|
|
|
|
|
|
|
# store tests on text (they are not always allowed) |
|
1808
|
|
|
|
|
|
|
if( $func || $str_regexp || $str_test_num || $str_test_alpha ) { $flag->{test_on_text}= 1; } |
|
1809
|
|
|
|
|
|
|
|
|
1810
|
|
|
|
|
|
|
if( defined $str) { $token } |
|
1811
|
|
|
|
|
|
|
elsif( $tag) { qq{(\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->has_child( '$tag'))} } |
|
1812
|
|
|
|
|
|
|
elsif( $att) { $att=~ m{^#} ? qq{ (\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->{att}->{'$att'})} |
|
1813
|
|
|
|
|
|
|
: qq{\$elt->{'$att'}} |
|
1814
|
|
|
|
|
|
|
} |
|
1815
|
|
|
|
|
|
|
elsif( $att_re_name) { $att_re_name=~ m{^#} ? qq{ (\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->{att}->{'$att_re_name'}$att_re_regexp)} |
|
1816
|
|
|
|
|
|
|
: qq{\$elt->{'$att_re_name'}$att_re_regexp} |
|
1817
|
|
|
|
|
|
|
} |
|
1818
|
|
|
|
|
|
|
# for some reason Devel::Cover flags the following lines as not tested. They are though. |
|
1819
|
|
|
|
|
|
|
elsif( $bare_att) { $bare_att=~ m{^#} ? qq{(\$elt->{'$ST_ELT'} && defined(\$elt->{'$ST_ELT'}->{att}->{'$bare_att'}))} |
|
1820
|
|
|
|
|
|
|
: qq{defined( \$elt->{'$bare_att'})} |
|
1821
|
|
|
|
|
|
|
} |
|
1822
|
|
|
|
|
|
|
elsif( $num_test && ($num_test eq '=') ) { "==" } # others tests are unchanged |
|
1823
|
|
|
|
|
|
|
elsif( $alpha_test) { $PERL_ALPHA_TEST{$alpha_test} } |
|
1824
|
|
|
|
|
|
|
elsif( $func && $func=~ m{^string}) |
|
1825
|
|
|
|
|
|
|
{ "\$elt->{'$ST_ELT'}->text"; } |
|
1826
|
|
|
|
|
|
|
elsif( $str_regexp && $str_regexp =~ m{string\(\s*($REG_TAG_NAME)\s*\)\s*($REG_MATCH)\s*($REG_REGEXP)}) |
|
1827
|
|
|
|
|
|
|
{ "defined( _first_n { \$_->text $2 $3 } 1, \$elt->{'$ST_ELT'}->_children( '$1'))"; } |
|
1828
|
|
|
|
|
|
|
elsif( $str_test_alpha && $str_test_alpha =~ m{string\(\s*($REG_TAG_NAME)\s*\)\s*($REG_COMP)\s*($REG_STRING)}) |
|
1829
|
|
|
|
|
|
|
{ my( $tag, $op, $str)= ($1, $2, $3); |
|
1830
|
|
|
|
|
|
|
$str=~ s{(?<=.)'(?=.)}{\\'}g; # escape a quote within the string |
|
1831
|
|
|
|
|
|
|
$str=~ s{^"}{'}; |
|
1832
|
|
|
|
|
|
|
$str=~ s{"$}{'}; |
|
1833
|
|
|
|
|
|
|
"defined( _first_n { \$_->text $PERL_ALPHA_TEST{$op} $str } 1, \$elt->{'$ST_ELT'}->children( '$tag'))"; } |
|
1834
|
|
|
|
|
|
|
elsif( $str_test_num && $str_test_num =~ m{string\(\s*($REG_TAG_NAME)\s*\)\s*($REG_COMP)\s*($REG_NUMBER)}) |
|
1835
|
|
|
|
|
|
|
{ my $test= ($2 eq '=') ? '==' : $2; |
|
1836
|
|
|
|
|
|
|
"defined( _first_n { \$_->text $test $3 } 1, \$elt->{'$ST_ELT'}->children( '$1'))"; |
|
1837
|
|
|
|
|
|
|
} |
|
1838
|
|
|
|
|
|
|
elsif( $and_or) { $score->{tests}++; $and_or eq 'and' ? '&&' : '||' ; } |
|
1839
|
|
|
|
|
|
|
else { $token; } |
|
1840
|
|
|
|
|
|
|
}gexs; |
|
1841
|
|
|
|
|
|
|
} |
|
1842
|
|
|
|
|
|
|
|
|
1843
|
|
|
|
|
|
|
|
|
1844
|
|
|
|
|
|
|
sub setCharHandler |
|
1845
|
|
|
|
|
|
|
{ my( $t, $handler)= @_; |
|
1846
|
|
|
|
|
|
|
$t->{twig_char_handler}= $handler; |
|
1847
|
|
|
|
|
|
|
} |
|
1848
|
|
|
|
|
|
|
|
|
1849
|
|
|
|
|
|
|
|
|
1850
|
|
|
|
|
|
|
sub _reset_handlers |
|
1851
|
|
|
|
|
|
|
{ my $handlers= shift; |
|
1852
|
|
|
|
|
|
|
delete $handlers->{handlers}; |
|
1853
|
|
|
|
|
|
|
delete $handlers->{path_handlers}; |
|
1854
|
|
|
|
|
|
|
delete $handlers->{subpath_handlers}; |
|
1855
|
|
|
|
|
|
|
$handlers->{attcond_handlers_exp}=[] if( $handlers->{attcond_handlers}); |
|
1856
|
|
|
|
|
|
|
delete $handlers->{attcond_handlers}; |
|
1857
|
|
|
|
|
|
|
} |
|
1858
|
|
|
|
|
|
|
|
|
1859
|
|
|
|
|
|
|
sub _set_handlers |
|
1860
|
|
|
|
|
|
|
{ my $handlers= shift || return; |
|
1861
|
|
|
|
|
|
|
my $set_handlers= {}; |
|
1862
|
|
|
|
|
|
|
foreach my $path (keys %{$handlers}) |
|
1863
|
|
|
|
|
|
|
{ _set_handler( $set_handlers, $path, $handlers->{$path}); } |
|
1864
|
|
|
|
|
|
|
|
|
1865
|
|
|
|
|
|
|
return $set_handlers; |
|
1866
|
|
|
|
|
|
|
} |
|
1867
|
|
|
|
|
|
|
|
|
1868
|
|
|
|
|
|
|
|
|
1869
|
|
|
|
|
|
|
sub setTwigHandler |
|
1870
|
|
|
|
|
|
|
{ my( $t, $path, $handler)= @_; |
|
1871
|
|
|
|
|
|
|
$t->{twig_handlers} ||={}; |
|
1872
|
|
|
|
|
|
|
return _set_handler( $t->{twig_handlers}, $path, $handler); |
|
1873
|
|
|
|
|
|
|
} |
|
1874
|
|
|
|
|
|
|
|
|
1875
|
|
|
|
|
|
|
sub setTwigHandlers |
|
1876
|
|
|
|
|
|
|
{ my( $t, $handlers)= @_; |
|
1877
|
|
|
|
|
|
|
my $previous_handlers= $t->{twig_handlers} || undef; |
|
1878
|
|
|
|
|
|
|
_reset_handlers( $t->{twig_handlers}); |
|
1879
|
|
|
|
|
|
|
$t->{twig_handlers}= _set_handlers( $handlers); |
|
1880
|
|
|
|
|
|
|
return $previous_handlers; |
|
1881
|
|
|
|
|
|
|
} |
|
1882
|
|
|
|
|
|
|
|
|
1883
|
|
|
|
|
|
|
sub setStartTagHandler |
|
1884
|
|
|
|
|
|
|
{ my( $t, $path, $handler)= @_; |
|
1885
|
|
|
|
|
|
|
$t->{twig_starttag_handlers}||={}; |
|
1886
|
|
|
|
|
|
|
return _set_handler( $t->{twig_starttag_handlers}, $path, $handler); |
|
1887
|
|
|
|
|
|
|
} |
|
1888
|
|
|
|
|
|
|
|
|
1889
|
|
|
|
|
|
|
sub setStartTagHandlers |
|
1890
|
|
|
|
|
|
|
{ my( $t, $handlers)= @_; |
|
1891
|
|
|
|
|
|
|
my $previous_handlers= $t->{twig_starttag_handlers} || undef; |
|
1892
|
|
|
|
|
|
|
_reset_handlers( $t->{twig_starttag_handlers}); |
|
1893
|
|
|
|
|
|
|
$t->{twig_starttag_handlers}= _set_handlers( $handlers); |
|
1894
|
|
|
|
|
|
|
return $previous_handlers; |
|
1895
|
|
|
|
|
|
|
} |
|
1896
|
|
|
|
|
|
|
|
|
1897
|
|
|
|
|
|
|
sub setIgnoreEltsHandler |
|
1898
|
|
|
|
|
|
|
{ my( $t, $path, $action)= @_; |
|
1899
|
|
|
|
|
|
|
$t->{twig_ignore_elts_handlers}||={}; |
|
1900
|
|
|
|
|
|
|
return _set_handler( $t->{twig_ignore_elts_handlers}, $path, $action ); |
|
1901
|
|
|
|
|
|
|
} |
|
1902
|
|
|
|
|
|
|
|
|
1903
|
|
|
|
|
|
|
sub setIgnoreEltsHandlers |
|
1904
|
|
|
|
|
|
|
{ my( $t, $handlers)= @_; |
|
1905
|
|
|
|
|
|
|
my $previous_handlers= $t->{twig_ignore_elts_handlers}; |
|
1906
|
|
|
|
|
|
|
_reset_handlers( $t->{twig_ignore_elts_handlers}); |
|
1907
|
|
|
|
|
|
|
$t->{twig_ignore_elts_handlers}= _set_handlers( $handlers); |
|
1908
|
|
|
|
|
|
|
return $previous_handlers; |
|
1909
|
|
|
|
|
|
|
} |
|
1910
|
|
|
|
|
|
|
|
|
1911
|
|
|
|
|
|
|
sub setEndTagHandler |
|
1912
|
|
|
|
|
|
|
{ my( $t, $path, $handler)= @_; |
|
1913
|
|
|
|
|
|
|
$t->{twig_endtag_handlers}||={}; |
|
1914
|
|
|
|
|
|
|
return _set_handler( $t->{twig_endtag_handlers}, $path,$handler); |
|
1915
|
|
|
|
|
|
|
} |
|
1916
|
|
|
|
|
|
|
|
|
1917
|
|
|
|
|
|
|
sub setEndTagHandlers |
|
1918
|
|
|
|
|
|
|
{ my( $t, $handlers)= @_; |
|
1919
|
|
|
|
|
|
|
my $previous_handlers= $t->{twig_endtag_handlers}; |
|
1920
|
|
|
|
|
|
|
_reset_handlers( $t->{twig_endtag_handlers}); |
|
1921
|
|
|
|
|
|
|
$t->{twig_endtag_handlers}= _set_handlers( $handlers); |
|
1922
|
|
|
|
|
|
|
return $previous_handlers; |
|
1923
|
|
|
|
|
|
|
} |
|
1924
|
|
|
|
|
|
|
|
|
1925
|
|
|
|
|
|
|
# a little more complex: set the twig_handlers only if a code ref is given |
|
1926
|
|
|
|
|
|
|
sub setTwigRoots |
|
1927
|
|
|
|
|
|
|
{ my( $t, $handlers)= @_; |
|
1928
|
|
|
|
|
|
|
my $previous_roots= $t->{twig_roots}; |
|
1929
|
|
|
|
|
|
|
_reset_handlers($t->{twig_roots}); |
|
1930
|
|
|
|
|
|
|
$t->{twig_roots}= _set_handlers( $handlers); |
|
1931
|
|
|
|
|
|
|
|
|
1932
|
|
|
|
|
|
|
_check_illegal_twig_roots_handlers( $t->{twig_roots}); |
|
1933
|
|
|
|
|
|
|
|
|
1934
|
|
|
|
|
|
|
foreach my $path (keys %{$handlers}) |
|
1935
|
|
|
|
|
|
|
{ $t->{twig_handlers}||= {}; |
|
1936
|
|
|
|
|
|
|
_set_handler( $t->{twig_handlers}, $path, $handlers->{$path}) |
|
1937
|
|
|
|
|
|
|
if( ref($handlers->{$path}) && isa( $handlers->{$path}, 'CODE')); |
|
1938
|
|
|
|
|
|
|
} |
|
1939
|
|
|
|
|
|
|
return $previous_roots; |
|
1940
|
|
|
|
|
|
|
} |
|
1941
|
|
|
|
|
|
|
|
|
1942
|
|
|
|
|
|
|
sub _check_illegal_twig_roots_handlers |
|
1943
|
|
|
|
|
|
|
{ my( $handlers)= @_; |
|
1944
|
|
|
|
|
|
|
foreach my $tag_handlers (values %{$handlers->{xpath_handler}}) |
|
1945
|
|
|
|
|
|
|
{ foreach my $handler_data (@$tag_handlers) |
|
1946
|
|
|
|
|
|
|
{ if( my $type= $handler_data->{test_on_text}) |
|
1947
|
|
|
|
|
|
|
{ croak "string() condition not supported on twig_roots option"; } |
|
1948
|
|
|
|
|
|
|
} |
|
1949
|
|
|
|
|
|
|
} |
|
1950
|
|
|
|
|
|
|
return; |
|
1951
|
|
|
|
|
|
|
} |
|
1952
|
|
|
|
|
|
|
|
|
1953
|
|
|
|
|
|
|
|
|
1954
|
|
|
|
|
|
|
# just store the reference to the expat object in the twig |
|
1955
|
|
|
|
|
|
|
sub _twig_init |
|
1956
|
|
|
|
|
|
|
{ # warn " in _twig_init...\n"; # DEBUG handler |
|
1957
|
|
|
|
|
|
|
|
|
1958
|
|
|
|
|
|
|
my $p= shift; |
|
1959
|
|
|
|
|
|
|
my $t=$p->{twig}; |
|
1960
|
|
|
|
|
|
|
|
|
1961
|
|
|
|
|
|
|
if( $t->{twig_parsing} ) { croak "cannot reuse a twig that is already parsing"; } |
|
1962
|
|
|
|
|
|
|
$t->{twig_parsing}=1; |
|
1963
|
|
|
|
|
|
|
|
|
1964
|
|
|
|
|
|
|
$t->{twig_parser}= $p; |
|
1965
|
|
|
|
|
|
|
if( $weakrefs) { weaken( $t->{twig_parser}); } |
|
1966
|
|
|
|
|
|
|
|
|
1967
|
|
|
|
|
|
|
# in case they had been created by a previous parse |
|
1968
|
|
|
|
|
|
|
delete $t->{twig_dtd}; |
|
1969
|
|
|
|
|
|
|
delete $t->{twig_doctype}; |
|
1970
|
|
|
|
|
|
|
delete $t->{twig_xmldecl}; |
|
1971
|
|
|
|
|
|
|
delete $t->{twig_root}; |
|
1972
|
|
|
|
|
|
|
|
|
1973
|
|
|
|
|
|
|
# if needed set the output filehandle |
|
1974
|
|
|
|
|
|
|
$t->_set_fh_to_twig_output_fh(); |
|
1975
|
|
|
|
|
|
|
return; |
|
1976
|
|
|
|
|
|
|
} |
|
1977
|
|
|
|
|
|
|
|
|
1978
|
|
|
|
|
|
|
# uses eval to catch the parser's death |
|
1979
|
|
|
|
|
|
|
sub safe_parse |
|
1980
|
|
|
|
|
|
|
{ my $t= shift; |
|
1981
|
|
|
|
|
|
|
eval { $t->parse( @_); } ; |
|
1982
|
|
|
|
|
|
|
return $@ ? $t->_reset_twig_after_error : $t; |
|
1983
|
|
|
|
|
|
|
} |
|
1984
|
|
|
|
|
|
|
|
|
1985
|
|
|
|
|
|
|
sub safe_parsefile |
|
1986
|
|
|
|
|
|
|
{ my $t= shift; |
|
1987
|
|
|
|
|
|
|
eval { $t->parsefile( @_); } ; |
|
1988
|
|
|
|
|
|
|
return $@ ? $t->_reset_twig_after_error : $t; |
|
1989
|
|
|
|
|
|
|
} |
|
1990
|
|
|
|
|
|
|
|
|
1991
|
|
|
|
|
|
|
# restore a twig in a proper state so it can be reused for a new parse |
|
1992
|
|
|
|
|
|
|
sub _reset_twig |
|
1993
|
|
|
|
|
|
|
{ my $t= shift; |
|
1994
|
|
|
|
|
|
|
$t->{twig_parsing}= 0; |
|
1995
|
|
|
|
|
|
|
delete $t->{twig_current}; |
|
1996
|
|
|
|
|
|
|
delete $t->{extra_data}; |
|
1997
|
|
|
|
|
|
|
delete $t->{twig_dtd}; |
|
1998
|
|
|
|
|
|
|
delete $t->{twig_in_pcdata}; |
|
1999
|
|
|
|
|
|
|
delete $t->{twig_in_cdata}; |
|
2000
|
|
|
|
|
|
|
delete $t->{twig_stored_space}; |
|
2001
|
|
|
|
|
|
|
delete $t->{twig_entity_list}; |
|
2002
|
|
|
|
|
|
|
$t->root->delete if( $t->root); |
|
2003
|
|
|
|
|
|
|
delete $t->{twig_root}; |
|
2004
|
|
|
|
|
|
|
return $t; |
|
2005
|
|
|
|
|
|
|
} |
|
2006
|
|
|
|
|
|
|
|
|
2007
|
|
|
|
|
|
|
sub _reset_twig_after_error |
|
2008
|
|
|
|
|
|
|
{ my $t= shift; |
|
2009
|
|
|
|
|
|
|
$t->_reset_twig; |
|
2010
|
|
|
|
|
|
|
return undef; |
|
2011
|
|
|
|
|
|
|
} |
|
2012
|
|
|
|
|
|
|
|
|
2013
|
|
|
|
|
|
|
|
|
2014
|
|
|
|
|
|
|
sub _add_or_discard_stored_spaces |
|
2015
|
|
|
|
|
|
|
{ my $t= shift; |
|
2016
|
|
|
|
|
|
|
|
|
2017
|
|
|
|
|
|
|
$t->{twig_right_after_root}=0; #XX |
|
2018
|
|
|
|
|
|
|
|
|
2019
|
|
|
|
|
|
|
my $current= $t->{twig_current} or return; # ugly hack, with ignore on, twig_current can disappear |
|
2020
|
|
|
|
|
|
|
return unless length $t->{twig_stored_spaces}; |
|
2021
|
|
|
|
|
|
|
my $current_gi= $XML::Twig::index2gi[$current->{'gi'}]; |
|
2022
|
|
|
|
|
|
|
|
|
2023
|
|
|
|
|
|
|
if( ! $t->{twig_discard_all_spaces}) |
|
2024
|
|
|
|
|
|
|
{ if( ! defined( $t->{twig_space_policy}->{$current_gi})) |
|
2025
|
|
|
|
|
|
|
{ $t->{twig_space_policy}->{$current_gi}= _space_policy( $t, $current_gi); } |
|
2026
|
|
|
|
|
|
|
if( $t->{twig_space_policy}->{$current_gi} || ($t->{twig_stored_spaces}!~ m{\n}) || $t->{twig_preserve_space}) |
|
2027
|
|
|
|
|
|
|
{ _insert_pcdata( $t, $t->{twig_stored_spaces} ); } |
|
2028
|
|
|
|
|
|
|
} |
|
2029
|
|
|
|
|
|
|
|
|
2030
|
|
|
|
|
|
|
$t->{twig_stored_spaces}=''; |
|
2031
|
|
|
|
|
|
|
|
|
2032
|
|
|
|
|
|
|
return; |
|
2033
|
|
|
|
|
|
|
} |
|
2034
|
|
|
|
|
|
|
|
|
2035
|
|
|
|
|
|
|
# the default twig handlers, which build the tree |
|
2036
|
|
|
|
|
|
|
sub _twig_start |
|
2037
|
|
|
|
|
|
|
{ # warn " in _twig_start...\n"; # DEBUG handler |
|
2038
|
|
|
|
|
|
|
|
|
2039
|
|
|
|
|
|
|
#foreach my $s (@_) { next if ref $s; warn "$s: ", is_utf8( $s) ? "has flag" : "FLAG NOT SET"; } # YYY |
|
2040
|
|
|
|
|
|
|
|
|
2041
|
|
|
|
|
|
|
my ($p, $gi, @att)= @_; |
|
2042
|
|
|
|
|
|
|
my $t=$p->{twig}; |
|
2043
|
|
|
|
|
|
|
|
|
2044
|
|
|
|
|
|
|
# empty the stored pcdata (space stored in case they are really part of |
|
2045
|
|
|
|
|
|
|
# a pcdata element) or stored it if the space policy dictates so |
|
2046
|
|
|
|
|
|
|
# create a pcdata element with the spaces if need be |
|
2047
|
|
|
|
|
|
|
_add_or_discard_stored_spaces( $t); |
|
2048
|
|
|
|
|
|
|
my $parent= $t->{twig_current}; |
|
2049
|
|
|
|
|
|
|
|
|
2050
|
|
|
|
|
|
|
# if we were parsing PCDATA then we exit the pcdata |
|
2051
|
|
|
|
|
|
|
if( $t->{twig_in_pcdata}) |
|
2052
|
|
|
|
|
|
|
{ $t->{twig_in_pcdata}= 0; |
|
2053
|
|
|
|
|
|
|
delete $parent->{'twig_current'}; |
|
2054
|
|
|
|
|
|
|
$parent= $parent->{parent}; |
|
2055
|
|
|
|
|
|
|
} |
|
2056
|
|
|
|
|
|
|
|
|
2057
|
|
|
|
|
|
|
# if we choose to keep the encoding then we need to parse the tag |
|
2058
|
|
|
|
|
|
|
if( my $func = $t->{parse_start_tag}) |
|
2059
|
|
|
|
|
|
|
{ ($gi, @att)= &$func($p->original_string); } |
|
2060
|
|
|
|
|
|
|
elsif( $t->{twig_entities_in_attribute}) |
|
2061
|
|
|
|
|
|
|
{ |
|
2062
|
|
|
|
|
|
|
($gi,@att)= _parse_start_tag( $p->recognized_string); |
|
2063
|
|
|
|
|
|
|
$t->{twig_entities_in_attribute}=0; |
|
2064
|
|
|
|
|
|
|
} |
|
2065
|
|
|
|
|
|
|
|
|
2066
|
|
|
|
|
|
|
# if we are using an external DTD, we need to fill the default attributes |
|
2067
|
|
|
|
|
|
|
if( $t->{twig_read_external_dtd}) { _fill_default_atts( $t, $gi, \@att); } |
|
2068
|
|
|
|
|
|
|
|
|
2069
|
|
|
|
|
|
|
# filter the input data if need be |
|
2070
|
|
|
|
|
|
|
if( my $filter= $t->{twig_input_filter}) |
|
2071
|
|
|
|
|
|
|
{ $gi= $filter->( $gi); |
|
2072
|
|
|
|
|
|
|
foreach my $att (@att) { $att= $filter->($att); } |
|
2073
|
|
|
|
|
|
|
} |
|
2074
|
|
|
|
|
|
|
|
|
2075
|
|
|
|
|
|
|
my $ns_decl; |
|
2076
|
|
|
|
|
|
|
if( $t->{twig_map_xmlns}) |
|
2077
|
|
|
|
|
|
|
{ $ns_decl= _replace_ns( $t, \$gi, \@att); } |
|
2078
|
|
|
|
|
|
|
|
|
2079
|
|
|
|
|
|
|
my $elt= $t->{twig_elt_class}->new( $gi); |
|
2080
|
|
|
|
|
|
|
$elt->set_atts( @att); |
|
2081
|
|
|
|
|
|
|
|
|
2082
|
|
|
|
|
|
|
# now we can store the tag and atts |
|
2083
|
|
|
|
|
|
|
my $context= { $ST_TAG => $gi, $ST_ELT => $elt, @att}; |
|
2084
|
|
|
|
|
|
|
$context->{$ST_NS}= $ns_decl if $ns_decl; |
|
2085
|
|
|
|
|
|
|
if( $weakrefs) { weaken( $context->{$ST_ELT}); } |
|
2086
|
|
|
|
|
|
|
push @{$t->{_twig_context_stack}}, $context; |
|
2087
|
|
|
|
|
|
|
|
|
2088
|
|
|
|
|
|
|
delete $parent->{'twig_current'} if( $parent); |
|
2089
|
|
|
|
|
|
|
$t->{twig_current}= $elt; |
|
2090
|
|
|
|
|
|
|
$elt->{'twig_current'}=1; |
|
2091
|
|
|
|
|
|
|
|
|
2092
|
|
|
|
|
|
|
if( $parent) |
|
2093
|
|
|
|
|
|
|
{ my $prev_sibling= $parent->{last_child}; |
|
2094
|
|
|
|
|
|
|
if( $prev_sibling) |
|
2095
|
|
|
|
|
|
|
{ $prev_sibling->{next_sibling}= $elt; |
|
2096
|
|
|
|
|
|
|
$elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; |
|
2097
|
|
|
|
|
|
|
} |
|
2098
|
|
|
|
|
|
|
|
|
2099
|
|
|
|
|
|
|
$elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; |
|
2100
|
|
|
|
|
|
|
unless( $parent->{first_child}) { $parent->{first_child}= $elt; } |
|
2101
|
|
|
|
|
|
|
delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; |
|
2102
|
|
|
|
|
|
|
} |
|
2103
|
|
|
|
|
|
|
else |
|
2104
|
|
|
|
|
|
|
{ # processing root |
|
2105
|
|
|
|
|
|
|
$t->set_root( $elt); |
|
2106
|
|
|
|
|
|
|
# call dtd handler if need be |
|
2107
|
|
|
|
|
|
|
$t->{twig_dtd_handler}->($t, $t->{twig_dtd}) |
|
2108
|
|
|
|
|
|
|
if( defined $t->{twig_dtd_handler}); |
|
2109
|
|
|
|
|
|
|
|
|
2110
|
|
|
|
|
|
|
# set this so we can catch external entities |
|
2111
|
|
|
|
|
|
|
# (the handler was modified during DTD processing) |
|
2112
|
|
|
|
|
|
|
if( $t->{twig_default_print}) |
|
2113
|
|
|
|
|
|
|
{ $p->setHandlers( Default => \&_twig_print); } |
|
2114
|
|
|
|
|
|
|
elsif( $t->{twig_roots}) |
|
2115
|
|
|
|
|
|
|
{ $p->setHandlers( Default => sub { return }); } |
|
2116
|
|
|
|
|
|
|
else |
|
2117
|
|
|
|
|
|
|
{ $p->setHandlers( Default => \&_twig_default); } |
|
2118
|
|
|
|
|
|
|
} |
|
2119
|
|
|
|
|
|
|
|
|
2120
|
|
|
|
|
|
|
$elt->{empty}= $p->recognized_string=~ m{/\s*>$}s ? 1 : 0; |
|
2121
|
|
|
|
|
|
|
|
|
2122
|
|
|
|
|
|
|
$elt->{extra_data}= $t->{extra_data} if( $t->{extra_data}); |
|
2123
|
|
|
|
|
|
|
$t->{extra_data}=''; |
|
2124
|
|
|
|
|
|
|
|
|
2125
|
|
|
|
|
|
|
# if the element is ID-ed then store that info |
|
2126
|
|
|
|
|
|
|
my $id= $elt->{'att'}->{$ID}; |
|
2127
|
|
|
|
|
|
|
if( defined $id) |
|
2128
|
|
|
|
|
|
|
{ $t->{twig_id_list}->{$id}= $elt; |
|
2129
|
|
|
|
|
|
|
if( $weakrefs) { weaken( $t->{twig_id_list}->{$id}); } |
|
2130
|
|
|
|
|
|
|
} |
|
2131
|
|
|
|
|
|
|
|
|
2132
|
|
|
|
|
|
|
# call user handler if need be |
|
2133
|
|
|
|
|
|
|
if( $t->{twig_starttag_handlers}) |
|
2134
|
|
|
|
|
|
|
{ # call all appropriate handlers |
|
2135
|
|
|
|
|
|
|
my @handlers= _handler( $t, $t->{twig_starttag_handlers}, $gi); |
|
2136
|
|
|
|
|
|
|
|
|
2137
|
|
|
|
|
|
|
local $_= $elt; |
|
2138
|
|
|
|
|
|
|
|
|
2139
|
|
|
|
|
|
|
foreach my $handler ( @handlers) |
|
2140
|
|
|
|
|
|
|
{ $handler->($t, $elt) || last; } |
|
2141
|
|
|
|
|
|
|
# call _all_ handler if needed |
|
2142
|
|
|
|
|
|
|
if( my $all= $t->{twig_starttag_handlers}->{handlers}->{$ALL}) |
|
2143
|
|
|
|
|
|
|
{ $all->($t, $elt); } |
|
2144
|
|
|
|
|
|
|
} |
|
2145
|
|
|
|
|
|
|
|
|
2146
|
|
|
|
|
|
|
# check if the tag is in the list of tags to be ignored |
|
2147
|
|
|
|
|
|
|
if( $t->{twig_ignore_elts_handlers}) |
|
2148
|
|
|
|
|
|
|
{ my @handlers= _handler( $t, $t->{twig_ignore_elts_handlers}, $gi); |
|
2149
|
|
|
|
|
|
|
# only the first handler counts, it contains the action (discard/print/string) |
|
2150
|
|
|
|
|
|
|
if( @handlers) { my $action= shift @handlers; $t->ignore( $elt, $action); } |
|
2151
|
|
|
|
|
|
|
} |
|
2152
|
|
|
|
|
|
|
|
|
2153
|
|
|
|
|
|
|
if( $elt->{'att'}->{'xml:space'} && ( $elt->{'att'}->{'xml:space'} eq 'preserve')) { $t->{twig_preserve_space}++; } |
|
2154
|
|
|
|
|
|
|
|
|
2155
|
|
|
|
|
|
|
|
|
2156
|
|
|
|
|
|
|
return; |
|
2157
|
|
|
|
|
|
|
} |
|
2158
|
|
|
|
|
|
|
|
|
2159
|
|
|
|
|
|
|
sub _replace_ns |
|
2160
|
|
|
|
|
|
|
{ my( $t, $gi, $atts)= @_; |
|
2161
|
|
|
|
|
|
|
my $decls; |
|
2162
|
|
|
|
|
|
|
foreach my $new_prefix ( $t->parser->new_ns_prefixes) |
|
2163
|
|
|
|
|
|
|
{ my $uri= $t->parser->expand_ns_prefix( $new_prefix); |
|
2164
|
|
|
|
|
|
|
# replace the prefix if it is mapped |
|
2165
|
|
|
|
|
|
|
$decls->{$new_prefix}= $uri; |
|
2166
|
|
|
|
|
|
|
if( !$t->{twig_keep_original_prefix} && (my $mapped_prefix= $t->{twig_map_xmlns}->{$uri})) |
|
2167
|
|
|
|
|
|
|
{ $new_prefix= $mapped_prefix; } |
|
2168
|
|
|
|
|
|
|
# now put the namespace declaration back in the element |
|
2169
|
|
|
|
|
|
|
if( $new_prefix eq '#default') |
|
2170
|
|
|
|
|
|
|
{ push @$atts, "xmlns" => $uri; } |
|
2171
|
|
|
|
|
|
|
else |
|
2172
|
|
|
|
|
|
|
{ push @$atts, "xmlns:$new_prefix" => $uri; } |
|
2173
|
|
|
|
|
|
|
} |
|
2174
|
|
|
|
|
|
|
|
|
2175
|
|
|
|
|
|
|
if( $t->{twig_keep_original_prefix}) |
|
2176
|
|
|
|
|
|
|
{ # things become more complex: we need to find the original prefix |
|
2177
|
|
|
|
|
|
|
# and store both prefixes |
|
2178
|
|
|
|
|
|
|
my $ns_info= $t->_ns_info( $$gi); |
|
2179
|
|
|
|
|
|
|
my $map_att; |
|
2180
|
|
|
|
|
|
|
if( $ns_info->{mapped_prefix}) |
|
2181
|
|
|
|
|
|
|
{ $$gi= "$ns_info->{mapped_prefix}:$$gi"; |
|
2182
|
|
|
|
|
|
|
$map_att->{$ns_info->{mapped_prefix}}= $ns_info->{prefix}; |
|
2183
|
|
|
|
|
|
|
} |
|
2184
|
|
|
|
|
|
|
my $att_name=1; |
|
2185
|
|
|
|
|
|
|
foreach( @$atts) |
|
2186
|
|
|
|
|
|
|
{ if( $att_name) |
|
2187
|
|
|
|
|
|
|
{ |
|
2188
|
|
|
|
|
|
|
my $ns_info= $t->_ns_info( $_); |
|
2189
|
|
|
|
|
|
|
if( $ns_info->{mapped_prefix}) |
|
2190
|
|
|
|
|
|
|
{ $_= "$ns_info->{mapped_prefix}:$_"; |
|
2191
|
|
|
|
|
|
|
$map_att->{$ns_info->{mapped_prefix}}= $ns_info->{prefix}; |
|
2192
|
|
|
|
|
|
|
} |
|
2193
|
|
|
|
|
|
|
$att_name=0; |
|
2194
|
|
|
|
|
|
|
} |
|
2195
|
|
|
|
|
|
|
else |
|
2196
|
|
|
|
|
|
|
{ $att_name=1; } |
|
2197
|
|
|
|
|
|
|
} |
|
2198
|
|
|
|
|
|
|
push @$atts, '#original_gi', $map_att if( $map_att); |
|
2199
|
|
|
|
|
|
|
} |
|
2200
|
|
|
|
|
|
|
else |
|
2201
|
|
|
|
|
|
|
{ $$gi= $t->_replace_prefix( $$gi); |
|
2202
|
|
|
|
|
|
|
my $att_name=1; |
|
2203
|
|
|
|
|
|
|
foreach( @$atts) |
|
2204
|
|
|
|
|
|
|
{ if( $att_name) { $_= $t->_replace_prefix( $_); $att_name=0; } |
|
2205
|
|
|
|
|
|
|
else { $att_name=1; } |
|
2206
|
|
|
|
|
|
|
} |
|
2207
|
|
|
|
|
|
|
} |
|
2208
|
|
|
|
|
|
|
return $decls; |
|
2209
|
|
|
|
|
|
|
} |
|
2210
|
|
|
|
|
|
|
|
|
2211
|
|
|
|
|
|
|
|
|
2212
|
|
|
|
|
|
|
# extract prefix, local_name, uri, mapped_prefix from a name |
|
2213
|
|
|
|
|
|
|
# will only work if called from a start or end tag handler |
|
2214
|
|
|
|
|
|
|
sub _ns_info |
|
2215
|
|
|
|
|
|
|
{ my( $t, $name)= @_; |
|
2216
|
|
|
|
|
|
|
my $ns_info={}; |
|
2217
|
|
|
|
|
|
|
my $p= $t->parser; |
|
2218
|
|
|
|
|
|
|
$ns_info->{uri}= $p->namespace( $name); |
|
2219
|
|
|
|
|
|
|
return $ns_info unless( $ns_info->{uri}); |
|
2220
|
|
|
|
|
|
|
|
|
2221
|
|
|
|
|
|
|
$ns_info->{prefix}= _a_proper_ns_prefix( $p, $ns_info->{uri}); |
|
2222
|
|
|
|
|
|
|
$ns_info->{mapped_prefix}= $t->{twig_map_xmlns}->{$ns_info->{uri}} || $ns_info->{prefix}; |
|
2223
|
|
|
|
|
|
|
|
|
2224
|
|
|
|
|
|
|
return $ns_info; |
|
2225
|
|
|
|
|
|
|
} |
|
2226
|
|
|
|
|
|
|
|
|
2227
|
|
|
|
|
|
|
sub _a_proper_ns_prefix |
|
2228
|
|
|
|
|
|
|
{ my( $p, $uri)= @_; |
|
2229
|
|
|
|
|
|
|
foreach my $prefix ($p->current_ns_prefixes) |
|
2230
|
|
|
|
|
|
|
{ if( $p->expand_ns_prefix( $prefix) eq $uri) |
|
2231
|
|
|
|
|
|
|
{ return $prefix; } |
|
2232
|
|
|
|
|
|
|
} |
|
2233
|
|
|
|
|
|
|
return; |
|
2234
|
|
|
|
|
|
|
} |
|
2235
|
|
|
|
|
|
|
|
|
2236
|
|
|
|
|
|
|
# returns the uri bound to a prefix in the original document |
|
2237
|
|
|
|
|
|
|
# only works in a handler |
|
2238
|
|
|
|
|
|
|
# can be used to deal with xsi:type attributes |
|
2239
|
|
|
|
|
|
|
sub original_uri |
|
2240
|
|
|
|
|
|
|
{ my( $t, $prefix)= @_; |
|
2241
|
|
|
|
|
|
|
my $ST_NS = '##ns' ; |
|
2242
|
|
|
|
|
|
|
foreach my $ns (map { $_->{$ST_NS} if $_->{$ST_NS} } reverse @{$t->{_twig_context_stack}}) |
|
2243
|
|
|
|
|
|
|
{ return $ns->{$prefix} || next; } |
|
2244
|
|
|
|
|
|
|
return; |
|
2245
|
|
|
|
|
|
|
} |
|
2246
|
|
|
|
|
|
|
|
|
2247
|
|
|
|
|
|
|
|
|
2248
|
|
|
|
|
|
|
sub _fill_default_atts |
|
2249
|
|
|
|
|
|
|
{ my( $t, $gi, $atts)= @_; |
|
2250
|
|
|
|
|
|
|
my $dtd= $t->{twig_dtd}; |
|
2251
|
|
|
|
|
|
|
my $attlist= $dtd->{att}->{$gi}; |
|
2252
|
|
|
|
|
|
|
my %value= @$atts; |
|
2253
|
|
|
|
|
|
|
foreach my $att (keys %$attlist) |
|
2254
|
|
|
|
|
|
|
{ if( !exists( $value{$att}) |
|
2255
|
|
|
|
|
|
|
&& exists( $attlist->{$att}->{default}) |
|
2256
|
|
|
|
|
|
|
&& ( $attlist->{$att}->{default} ne '#IMPLIED') |
|
2257
|
|
|
|
|
|
|
) |
|
2258
|
|
|
|
|
|
|
{ # the quotes are included in the default, so we need to remove them |
|
2259
|
|
|
|
|
|
|
my $default_value= substr( $attlist->{$att}->{default}, 1, -1); |
|
2260
|
|
|
|
|
|
|
push @$atts, $att, $default_value; |
|
2261
|
|
|
|
|
|
|
} |
|
2262
|
|
|
|
|
|
|
} |
|
2263
|
|
|
|
|
|
|
return; |
|
2264
|
|
|
|
|
|
|
} |
|
2265
|
|
|
|
|
|
|
|
|
2266
|
|
|
|
|
|
|
|
|
2267
|
|
|
|
|
|
|
# the default function to parse a start tag (in keep_encoding mode) |
|
2268
|
|
|
|
|
|
|
# can be overridden with the parse_start_tag method |
|
2269
|
|
|
|
|
|
|
# only works for 1-byte character sets |
|
2270
|
|
|
|
|
|
|
sub _parse_start_tag |
|
2271
|
|
|
|
|
|
|
{ my $string= shift; |
|
2272
|
|
|
|
|
|
|
my( $gi, @atts); |
|
2273
|
|
|
|
|
|
|
|
|
2274
|
|
|
|
|
|
|
# get the gi (between < and the first space, / or > character) |
|
2275
|
|
|
|
|
|
|
#if( $string=~ s{^<\s*([^\s>/]*)[\s>/]*}{}s) |
|
2276
|
|
|
|
|
|
|
if( $string=~ s{^<\s*($REG_TAG_NAME)\s*[\s>/]}{}s) |
|
2277
|
|
|
|
|
|
|
{ $gi= $1; } |
|
2278
|
|
|
|
|
|
|
else |
|
2279
|
|
|
|
|
|
|
{ croak "error parsing tag '$string'"; } |
|
2280
|
|
|
|
|
|
|
while( $string=~ s{^([^\s=]*)\s*=\s*(["'])(.*?)\2\s*}{}s) |
|
2281
|
|
|
|
|
|
|
{ push @atts, $1, $3; } |
|
2282
|
|
|
|
|
|
|
return $gi, @atts; |
|
2283
|
|
|
|
|
|
|
} |
|
2284
|
|
|
|
|
|
|
|
|
2285
|
|
|
|
|
|
|
sub set_root |
|
2286
|
|
|
|
|
|
|
{ my( $t, $elt)= @_; |
|
2287
|
|
|
|
|
|
|
$t->{twig_root}= $elt; |
|
2288
|
|
|
|
|
|
|
if( $elt) |
|
2289
|
|
|
|
|
|
|
{ $elt->{twig}= $t; |
|
2290
|
|
|
|
|
|
|
if( $weakrefs) { weaken( $elt->{twig}); } |
|
2291
|
|
|
|
|
|
|
} |
|
2292
|
|
|
|
|
|
|
return $t; |
|
2293
|
|
|
|
|
|
|
} |
|
2294
|
|
|
|
|
|
|
|
|
2295
|
|
|
|
|
|
|
sub _twig_end |
|
2296
|
|
|
|
|
|
|
{ # warn " in _twig_end...\n"; # DEBUG handler |
|
2297
|
|
|
|
|
|
|
my ($p, $gi) = @_; |
|
2298
|
|
|
|
|
|
|
|
|
2299
|
|
|
|
|
|
|
my $t=$p->{twig}; |
|
2300
|
|
|
|
|
|
|
|
|
2301
|
|
|
|
|
|
|
if( $t->{twig_in_pcdata} && (my $text_handler= $t->{TwigHandlers}->{$TEXT}) ) |
|
2302
|
|
|
|
|
|
|
{ local $_= $t->{twig_current}; $text_handler->( $t, $_) if $_; |
|
2303
|
|
|
|
|
|
|
} |
|
2304
|
|
|
|
|
|
|
|
|
2305
|
|
|
|
|
|
|
if( $t->{twig_map_xmlns}) { $gi= $t->_replace_prefix( $gi); } |
|
2306
|
|
|
|
|
|
|
|
|
2307
|
|
|
|
|
|
|
_add_or_discard_stored_spaces( $t); |
|
2308
|
|
|
|
|
|
|
|
|
2309
|
|
|
|
|
|
|
# the new twig_current is the parent |
|
2310
|
|
|
|
|
|
|
my $elt= $t->{twig_current}; |
|
2311
|
|
|
|
|
|
|
delete $elt->{'twig_current'}; |
|
2312
|
|
|
|
|
|
|
|
|
2313
|
|
|
|
|
|
|
# if we were parsing PCDATA then we exit the pcdata too |
|
2314
|
|
|
|
|
|
|
if( $t->{twig_in_pcdata}) |
|
2315
|
|
|
|
|
|
|
{ |
|
2316
|
|
|
|
|
|
|
$t->{twig_in_pcdata}= 0; |
|
2317
|
|
|
|
|
|
|
$elt= $elt->{parent} if($elt->{parent}); |
|
2318
|
|
|
|
|
|
|
delete $elt->{'twig_current'}; |
|
2319
|
|
|
|
|
|
|
} |
|
2320
|
|
|
|
|
|
|
|
|
2321
|
|
|
|
|
|
|
# parent is the new current element |
|
2322
|
|
|
|
|
|
|
my $parent= $elt->{parent}; |
|
2323
|
|
|
|
|
|
|
$t->{twig_current}= $parent; |
|
2324
|
|
|
|
|
|
|
|
|
2325
|
|
|
|
|
|
|
if( $parent) |
|
2326
|
|
|
|
|
|
|
{ $parent->{'twig_current'}=1; |
|
2327
|
|
|
|
|
|
|
# twig_to_be_normalized |
|
2328
|
|
|
|
|
|
|
if( $parent->{twig_to_be_normalized}) { $parent->normalize; $parent->{twig_to_be_normalized}=0; } |
|
2329
|
|
|
|
|
|
|
} |
|
2330
|
|
|
|
|
|
|
|
|
2331
|
|
|
|
|
|
|
if( $t->{extra_data}) |
|
2332
|
|
|
|
|
|
|
{ $elt->_set_extra_data_before_end_tag( $t->{extra_data}); |
|
2333
|
|
|
|
|
|
|
$t->{extra_data}=''; |
|
2334
|
|
|
|
|
|
|
} |
|
2335
|
|
|
|
|
|
|
|
|
2336
|
|
|
|
|
|
|
if( $t->{twig_handlers}) |
|
2337
|
|
|
|
|
|
|
{ # look for handlers |
|
2338
|
|
|
|
|
|
|
my @handlers= _handler( $t, $t->{twig_handlers}, $gi); |
|
2339
|
|
|
|
|
|
|
|
|
2340
|
|
|
|
|
|
|
if( $t->{twig_tdh}) |
|
2341
|
|
|
|
|
|
|
{ if( @handlers) { push @{$t->{twig_handlers_to_trigger}}, [ $elt, \@handlers ]; } |
|
2342
|
|
|
|
|
|
|
if( my $all= $t->{twig_handlers}->{handlers}->{$ALL}) |
|
2343
|
|
|
|
|
|
|
{ push @{$t->{twig_handlers_to_trigger}}, [ $elt, [$all] ]; } |
|
2344
|
|
|
|
|
|
|
} |
|
2345
|
|
|
|
|
|
|
else |
|
2346
|
|
|
|
|
|
|
{ |
|
2347
|
|
|
|
|
|
|
local $_= $elt; # so we can use $_ in the handlers |
|
2348
|
|
|
|
|
|
|
|
|
2349
|
|
|
|
|
|
|
foreach my $handler ( @handlers) |
|
2350
|
|
|
|
|
|
|
{ $handler->($t, $elt) || last; } |
|
2351
|
|
|
|
|
|
|
# call _all_ handler if needed |
|
2352
|
|
|
|
|
|
|
my $all= $t->{twig_handlers}->{handlers}->{$ALL}; |
|
2353
|
|
|
|
|
|
|
if( $all) |
|
2354
|
|
|
|
|
|
|
{ $all->($t, $elt); } |
|
2355
|
|
|
|
|
|
|
if( @handlers || $all) { $t->{twig_right_after_root}=0; } |
|
2356
|
|
|
|
|
|
|
} |
|
2357
|
|
|
|
|
|
|
} |
|
2358
|
|
|
|
|
|
|
|
|
2359
|
|
|
|
|
|
|
# if twig_roots is set for the element then set appropriate handler |
|
2360
|
|
|
|
|
|
|
if( $t->{twig_root_depth} and ($p->depth == $t->{twig_root_depth}) ) |
|
2361
|
|
|
|
|
|
|
{ if( $t->{twig_default_print}) |
|
2362
|
|
|
|
|
|
|
{ # select the proper fh (and store the currently selected one) |
|
2363
|
|
|
|
|
|
|
$t->_set_fh_to_twig_output_fh(); |
|
2364
|
|
|
|
|
|
|
if( !$p->depth==1) { $t->{twig_right_after_root}=1; } #XX |
|
2365
|
|
|
|
|
|
|
if( $t->{twig_keep_encoding}) |
|
2366
|
|
|
|
|
|
|
{ $p->setHandlers( %twig_handlers_roots_print_original); } |
|
2367
|
|
|
|
|
|
|
else |
|
2368
|
|
|
|
|
|
|
{ $p->setHandlers( %twig_handlers_roots_print); } |
|
2369
|
|
|
|
|
|
|
} |
|
2370
|
|
|
|
|
|
|
else |
|
2371
|
|
|
|
|
|
|
{ $p->setHandlers( %twig_handlers_roots); } |
|
2372
|
|
|
|
|
|
|
} |
|
2373
|
|
|
|
|
|
|
|
|
2374
|
|
|
|
|
|
|
if( $elt->{'att'}->{'xml:space'} && ( $elt->{'att'}->{'xml:space'} eq 'preserve')) { $t->{twig_preserve_space}--; } |
|
2375
|
|
|
|
|
|
|
|
|
2376
|
|
|
|
|
|
|
pop @{$t->{_twig_context_stack}}; |
|
2377
|
|
|
|
|
|
|
return; |
|
2378
|
|
|
|
|
|
|
} |
|
2379
|
|
|
|
|
|
|
|
|
2380
|
|
|
|
|
|
|
sub _trigger_tdh |
|
2381
|
|
|
|
|
|
|
{ my( $t)= @_; |
|
2382
|
|
|
|
|
|
|
|
|
2383
|
|
|
|
|
|
|
if( @{$t->{twig_handlers_to_trigger}}) |
|
2384
|
|
|
|
|
|
|
{ my @handlers_to_trigger_now= sort { $a->[0]->cmp( $b->[0]) } @{$t->{twig_handlers_to_trigger}}; |
|
2385
|
|
|
|
|
|
|
foreach my $elt_handlers (@handlers_to_trigger_now) |
|
2386
|
|
|
|
|
|
|
{ my( $handled_elt, $handlers_to_trigger)= @$elt_handlers; |
|
2387
|
|
|
|
|
|
|
foreach my $handler ( @$handlers_to_trigger) |
|
2388
|
|
|
|
|
|
|
{ local $_= $handled_elt; $handler->($t, $handled_elt) || last; } |
|
2389
|
|
|
|
|
|
|
} |
|
2390
|
|
|
|
|
|
|
} |
|
2391
|
|
|
|
|
|
|
return; |
|
2392
|
|
|
|
|
|
|
} |
|
2393
|
|
|
|
|
|
|
|
|
2394
|
|
|
|
|
|
|
# return the list of handler that can be activated for an element |
|
2395
|
|
|
|
|
|
|
# (either of CODE ref's or 1's for twig_roots) |
|
2396
|
|
|
|
|
|
|
|
|
2397
|
|
|
|
|
|
|
sub _handler |
|
2398
|
|
|
|
|
|
|
{ my( $t, $handlers, $gi)= @_; |
|
2399
|
|
|
|
|
|
|
|
|
2400
|
|
|
|
|
|
|
my @found_handlers=(); |
|
2401
|
|
|
|
|
|
|
my $found_handler; |
|
2402
|
|
|
|
|
|
|
|
|
2403
|
|
|
|
|
|
|
foreach my $handler ( map { @$_ } grep { $_ } $handlers->{xpath_handler}->{$gi}, $handlers->{xpath_handler}->{'*'}) |
|
2404
|
|
|
|
|
|
|
{ my $trigger= $handler->{trigger}; |
|
2405
|
|
|
|
|
|
|
if( my $found_path= $trigger->( $t->{_twig_context_stack})) |
|
2406
|
|
|
|
|
|
|
{ my $found_handler= $handler->{handler}; |
|
2407
|
|
|
|
|
|
|
push @found_handlers, $found_handler; |
|
2408
|
|
|
|
|
|
|
} |
|
2409
|
|
|
|
|
|
|
} |
|
2410
|
|
|
|
|
|
|
|
|
2411
|
|
|
|
|
|
|
# if no handler found call default handler if defined |
|
2412
|
|
|
|
|
|
|
if( !@found_handlers && defined $handlers->{handlers}->{$DEFAULT}) |
|
2413
|
|
|
|
|
|
|
{ push @found_handlers, $handlers->{handlers}->{$DEFAULT}; } |
|
2414
|
|
|
|
|
|
|
|
|
2415
|
|
|
|
|
|
|
if( @found_handlers and $t->{twig_do_not_chain_handlers}) |
|
2416
|
|
|
|
|
|
|
{ @found_handlers= ($found_handlers[0]); } |
|
2417
|
|
|
|
|
|
|
|
|
2418
|
|
|
|
|
|
|
return @found_handlers; # empty if no handler found |
|
2419
|
|
|
|
|
|
|
|
|
2420
|
|
|
|
|
|
|
} |
|
2421
|
|
|
|
|
|
|
|
|
2422
|
|
|
|
|
|
|
|
|
2423
|
|
|
|
|
|
|
sub _replace_prefix |
|
2424
|
|
|
|
|
|
|
{ my( $t, $name)= @_; |
|
2425
|
|
|
|
|
|
|
my $p= $t->parser; |
|
2426
|
|
|
|
|
|
|
my $uri= $p->namespace( $name); |
|
2427
|
|
|
|
|
|
|
# try to get the namespace from default if none is found (for attributes) |
|
2428
|
|
|
|
|
|
|
# this should probably be an option |
|
2429
|
|
|
|
|
|
|
if( !$uri and( $name!~/^xml/)) { $uri= $p->expand_ns_prefix( '#default'); } |
|
2430
|
|
|
|
|
|
|
if( $uri) |
|
2431
|
|
|
|
|
|
|
{ if (my $mapped_prefix= $t->{twig_map_xmlns}->{$uri} || $DEFAULT_URI2NS{$uri}) |
|
2432
|
|
|
|
|
|
|
{ return "$mapped_prefix:$name"; } |
|
2433
|
|
|
|
|
|
|
else |
|
2434
|
|
|
|
|
|
|
{ my $prefix= _a_proper_ns_prefix( $p, $uri); |
|
2435
|
|
|
|
|
|
|
if( $prefix eq '#default') { $prefix=''; } |
|
2436
|
|
|
|
|
|
|
return $prefix ? "$prefix:$name" : $name; |
|
2437
|
|
|
|
|
|
|
} |
|
2438
|
|
|
|
|
|
|
} |
|
2439
|
|
|
|
|
|
|
else |
|
2440
|
|
|
|
|
|
|
{ return $name; } |
|
2441
|
|
|
|
|
|
|
} |
|
2442
|
|
|
|
|
|
|
|
|
2443
|
|
|
|
|
|
|
|
|
2444
|
|
|
|
|
|
|
sub _twig_char |
|
2445
|
|
|
|
|
|
|
{ # warn " in _twig_char...\n"; # DEBUG handler |
|
2446
|
|
|
|
|
|
|
|
|
2447
|
|
|
|
|
|
|
my ($p, $string)= @_; |
|
2448
|
|
|
|
|
|
|
my $t=$p->{twig}; |
|
2449
|
|
|
|
|
|
|
|
|
2450
|
|
|
|
|
|
|
if( $t->{twig_keep_encoding}) |
|
2451
|
|
|
|
|
|
|
{ if( !$t->{twig_in_cdata}) |
|
2452
|
|
|
|
|
|
|
{ $string= $p->original_string(); } |
|
2453
|
|
|
|
|
|
|
else |
|
2454
|
|
|
|
|
|
|
{ |
|
2455
|
|
|
|
|
|
|
use bytes; # > perl 5.5 |
|
2456
|
|
|
|
|
|
|
if( length( $string) < 1024) |
|
2457
|
|
|
|
|
|
|
{ $string= $p->original_string(); } |
|
2458
|
|
|
|
|
|
|
else |
|
2459
|
|
|
|
|
|
|
{ #warn "dodgy case"; |
|
2460
|
|
|
|
|
|
|
# TODO original_string does not hold the entire string, but $string is wrong |
|
2461
|
|
|
|
|
|
|
# I believe due to a bug in XML::Parser |
|
2462
|
|
|
|
|
|
|
# for now, we use the original string, even if it means that it's been converted to utf8 |
|
2463
|
|
|
|
|
|
|
} |
|
2464
|
|
|
|
|
|
|
} |
|
2465
|
|
|
|
|
|
|
} |
|
2466
|
|
|
|
|
|
|
|
|
2467
|
|
|
|
|
|
|
if( $t->{twig_input_filter}) { $string= $t->{twig_input_filter}->( $string); } |
|
2468
|
|
|
|
|
|
|
if( $t->{twig_char_handler}) { $string= $t->{twig_char_handler}->( $string); } |
|
2469
|
|
|
|
|
|
|
|
|
2470
|
|
|
|
|
|
|
my $elt= $t->{twig_current}; |
|
2471
|
|
|
|
|
|
|
|
|
2472
|
|
|
|
|
|
|
if( $t->{twig_in_cdata}) |
|
2473
|
|
|
|
|
|
|
{ # text is the continuation of a previously created cdata |
|
2474
|
|
|
|
|
|
|
$elt->{cdata}.= $t->{twig_stored_spaces} . $string; |
|
2475
|
|
|
|
|
|
|
} |
|
2476
|
|
|
|
|
|
|
elsif( $t->{twig_in_pcdata}) |
|
2477
|
|
|
|
|
|
|
{ # text is the continuation of a previously created pcdata |
|
2478
|
|
|
|
|
|
|
if( $t->{extra_data}) |
|
2479
|
|
|
|
|
|
|
{ $elt->_push_extra_data_in_pcdata( $t->{extra_data}, length( $elt->{pcdata})); |
|
2480
|
|
|
|
|
|
|
$t->{extra_data}=''; |
|
2481
|
|
|
|
|
|
|
} |
|
2482
|
|
|
|
|
|
|
$elt->{pcdata}.= $string; |
|
2483
|
|
|
|
|
|
|
} |
|
2484
|
|
|
|
|
|
|
else |
|
2485
|
|
|
|
|
|
|
{ |
|
2486
|
|
|
|
|
|
|
# text is just space, which might be discarded later |
|
2487
|
|
|
|
|
|
|
if( $string=~/\A\s*\Z/s) |
|
2488
|
|
|
|
|
|
|
{ |
|
2489
|
|
|
|
|
|
|
if( $t->{extra_data}) |
|
2490
|
|
|
|
|
|
|
{ # we got extra data (comment, pi), lets add the spaces to it |
|
2491
|
|
|
|
|
|
|
$t->{extra_data} .= $string; |
|
2492
|
|
|
|
|
|
|
} |
|
2493
|
|
|
|
|
|
|
else |
|
2494
|
|
|
|
|
|
|
{ # no extra data, just store the spaces |
|
2495
|
|
|
|
|
|
|
$t->{twig_stored_spaces}.= $string; |
|
2496
|
|
|
|
|
|
|
} |
|
2497
|
|
|
|
|
|
|
} |
|
2498
|
|
|
|
|
|
|
else |
|
2499
|
|
|
|
|
|
|
{ my $new_elt= _insert_pcdata( $t, $t->{twig_stored_spaces}.$string); |
|
2500
|
|
|
|
|
|
|
delete $elt->{'twig_current'}; |
|
2501
|
|
|
|
|
|
|
$new_elt->{'twig_current'}=1; |
|
2502
|
|
|
|
|
|
|
$t->{twig_current}= $new_elt; |
|
2503
|
|
|
|
|
|
|
$t->{twig_in_pcdata}=1; |
|
2504
|
|
|
|
|
|
|
if( $t->{extra_data}) |
|
2505
|
|
|
|
|
|
|
{ $new_elt->_push_extra_data_in_pcdata( $t->{extra_data}, 0); |
|
2506
|
|
|
|
|
|
|
$t->{extra_data}=''; |
|
2507
|
|
|
|
|
|
|
} |
|
2508
|
|
|
|
|
|
|
} |
|
2509
|
|
|
|
|
|
|
} |
|
2510
|
|
|
|
|
|
|
return; |
|
2511
|
|
|
|
|
|
|
} |
|
2512
|
|
|
|
|
|
|
|
|
2513
|
|
|
|
|
|
|
sub _twig_cdatastart |
|
2514
|
|
|
|
|
|
|
{ # warn " in _twig_cdatastart...\n"; # DEBUG handler |
|
2515
|
|
|
|
|
|
|
|
|
2516
|
|
|
|
|
|
|
my $p= shift; |
|
2517
|
|
|
|
|
|
|
my $t=$p->{twig}; |
|
2518
|
|
|
|
|
|
|
|
|
2519
|
|
|
|
|
|
|
$t->{twig_in_cdata}=1; |
|
2520
|
|
|
|
|
|
|
my $cdata= $t->{twig_elt_class}->new( $CDATA); |
|
2521
|
|
|
|
|
|
|
my $twig_current= $t->{twig_current}; |
|
2522
|
|
|
|
|
|
|
|
|
2523
|
|
|
|
|
|
|
if( $t->{twig_in_pcdata}) |
|
2524
|
|
|
|
|
|
|
{ # create the node as a sibling of the PCDATA |
|
2525
|
|
|
|
|
|
|
$cdata->{prev_sibling}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $cdata->{prev_sibling});} ; |
|
2526
|
|
|
|
|
|
|
$twig_current->{next_sibling}= $cdata; |
|
2527
|
|
|
|
|
|
|
my $parent= $twig_current->{parent}; |
|
2528
|
|
|
|
|
|
|
$cdata->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $cdata->{parent});} ; |
|
2529
|
|
|
|
|
|
|
delete $parent->{empty}; $parent->{last_child}=$cdata; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; |
|
2530
|
|
|
|
|
|
|
$t->{twig_in_pcdata}=0; |
|
2531
|
|
|
|
|
|
|
} |
|
2532
|
|
|
|
|
|
|
else |
|
2533
|
|
|
|
|
|
|
{ # we have to create a PCDATA element if we need to store spaces |
|
2534
|
|
|
|
|
|
|
if( $t->_space_policy($XML::Twig::index2gi[$twig_current->{'gi'}]) && $t->{twig_stored_spaces}) |
|
2535
|
|
|
|
|
|
|
{ _insert_pcdata( $t, $t->{twig_stored_spaces}); } |
|
2536
|
|
|
|
|
|
|
$t->{twig_stored_spaces}=''; |
|
2537
|
|
|
|
|
|
|
|
|
2538
|
|
|
|
|
|
|
# create the node as a child of the current element |
|
2539
|
|
|
|
|
|
|
$cdata->{parent}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $cdata->{parent});} ; |
|
2540
|
|
|
|
|
|
|
if( my $prev_sibling= $twig_current->{last_child}) |
|
2541
|
|
|
|
|
|
|
{ $cdata->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $cdata->{prev_sibling});} ; |
|
2542
|
|
|
|
|
|
|
$prev_sibling->{next_sibling}= $cdata; |
|
2543
|
|
|
|
|
|
|
} |
|
2544
|
|
|
|
|
|
|
else |
|
2545
|
|
|
|
|
|
|
{ $twig_current->{first_child}= $cdata; } |
|
2546
|
|
|
|
|
|
|
delete $twig_current->{empty}; $twig_current->{last_child}=$cdata; if( $XML::Twig::weakrefs) { weaken( $twig_current->{last_child});} ; |
|
2547
|
|
|
|
|
|
|
|
|
2548
|
|
|
|
|
|
|
} |
|
2549
|
|
|
|
|
|
|
|
|
2550
|
|
|
|
|
|
|
delete $twig_current->{'twig_current'}; |
|
2551
|
|
|
|
|
|
|
$t->{twig_current}= $cdata; |
|
2552
|
|
|
|
|
|
|
$cdata->{'twig_current'}=1; |
|
2553
|
|
|
|
|
|
|
if( $t->{extra_data}) { $cdata->set_extra_data( $t->{extra_data}); $t->{extra_data}='' }; |
|
2554
|
|
|
|
|
|
|
return; |
|
2555
|
|
|
|
|
|
|
} |
|
2556
|
|
|
|
|
|
|
|
|
2557
|
|
|
|
|
|
|
sub _twig_cdataend |
|
2558
|
|
|
|
|
|
|
{ # warn " in _twig_cdataend...\n"; # DEBUG handler |
|
2559
|
|
|
|
|
|
|
|
|
2560
|
|
|
|
|
|
|
my $p= shift; |
|
2561
|
|
|
|
|
|
|
my $t=$p->{twig}; |
|
2562
|
|
|
|
|
|
|
|
|
2563
|
|
|
|
|
|
|
$t->{twig_in_cdata}=0; |
|
2564
|
|
|
|
|
|
|
|
|
2565
|
|
|
|
|
|
|
my $elt= $t->{twig_current}; |
|
2566
|
|
|
|
|
|
|
delete $elt->{'twig_current'}; |
|
2567
|
|
|
|
|
|
|
my $cdata= $elt->{cdata}; |
|
2568
|
|
|
|
|
|
|
$elt->{cdata}= $cdata; |
|
2569
|
|
|
|
|
|
|
|
|
2570
|
|
|
|
|
|
|
push @{$t->{_twig_context_stack}}, { $ST_TAG => $CDATA }; |
|
2571
|
|
|
|
|
|
|
|
|
2572
|
|
|
|
|
|
|
if( $t->{twig_handlers}) |
|
2573
|
|
|
|
|
|
|
{ # look for handlers |
|
2574
|
|
|
|
|
|
|
my @handlers= _handler( $t, $t->{twig_handlers}, $CDATA); |
|
2575
|
|
|
|
|
|
|
local $_= $elt; # so we can use $_ in the handlers |
|
2576
|
|
|
|
|
|
|
foreach my $handler ( @handlers) { $handler->($t, $elt) || last; } |
|
2577
|
|
|
|
|
|
|
} |
|
2578
|
|
|
|
|
|
|
|
|
2579
|
|
|
|
|
|
|
pop @{$t->{_twig_context_stack}}; |
|
2580
|
|
|
|
|
|
|
|
|
2581
|
|
|
|
|
|
|
$elt= $elt->{parent}; |
|
2582
|
|
|
|
|
|
|
$t->{twig_current}= $elt; |
|
2583
|
|
|
|
|
|
|
$elt->{'twig_current'}=1; |
|
2584
|
|
|
|
|
|
|
|
|
2585
|
|
|
|
|
|
|
$t->{twig_long_cdata}=0; |
|
2586
|
|
|
|
|
|
|
return; |
|
2587
|
|
|
|
|
|
|
} |
|
2588
|
|
|
|
|
|
|
|
|
2589
|
|
|
|
|
|
|
sub _pi_elt_handlers |
|
2590
|
|
|
|
|
|
|
{ my( $t, $pi)= @_; |
|
2591
|
|
|
|
|
|
|
my $pi_handlers= $t->{twig_handlers}->{pi_handlers} || return; |
|
2592
|
|
|
|
|
|
|
foreach my $handler ( $pi_handlers->{$pi->{target}}, $pi_handlers->{''}) |
|
2593
|
|
|
|
|
|
|
{ if( $handler) { local $_= $pi; $handler->( $t, $pi) || last; } } |
|
2594
|
|
|
|
|
|
|
} |
|
2595
|
|
|
|
|
|
|
|
|
2596
|
|
|
|
|
|
|
sub _pi_text_handler |
|
2597
|
|
|
|
|
|
|
{ my( $t, $target, $data)= @_; |
|
2598
|
|
|
|
|
|
|
if( my $handler= $t->{twig_handlers}->{pi_handlers}->{$target}) |
|
2599
|
|
|
|
|
|
|
{ return $handler->( $t, $target, $data); } |
|
2600
|
|
|
|
|
|
|
if( my $handler= $t->{twig_handlers}->{pi_handlers}->{''}) |
|
2601
|
|
|
|
|
|
|
{ return $handler->( $t, $target, $data); } |
|
2602
|
|
|
|
|
|
|
return defined( $data) && $data ne '' ? "$target $data?>" : "$target?>" ; |
|
2603
|
|
|
|
|
|
|
} |
|
2604
|
|
|
|
|
|
|
|
|
2605
|
|
|
|
|
|
|
sub _comment_elt_handler |
|
2606
|
|
|
|
|
|
|
{ my( $t, $comment)= @_; |
|
2607
|
|
|
|
|
|
|
if( my $handler= $t->{twig_handlers}->{handlers}->{$COMMENT}) |
|
2608
|
|
|
|
|
|
|
{ local $_= $comment; $handler->($t, $comment); } |
|
2609
|
|
|
|
|
|
|
} |
|
2610
|
|
|
|
|
|
|
|
|
2611
|
|
|
|
|
|
|
sub _comment_text_handler |
|
2612
|
|
|
|
|
|
|
{ my( $t, $comment)= @_; |
|
2613
|
|
|
|
|
|
|
if( my $handler= $t->{twig_handlers}->{handlers}->{$COMMENT}) |
|
2614
|
|
|
|
|
|
|
{ $comment= $handler->($t, $comment); |
|
2615
|
|
|
|
|
|
|
if( !defined $comment || $comment eq '') { return ''; } |
|
2616
|
|
|
|
|
|
|
} |
|
2617
|
|
|
|
|
|
|
return ""; |
|
2618
|
|
|
|
|
|
|
} |
|
2619
|
|
|
|
|
|
|
|
|
2620
|
|
|
|
|
|
|
|
|
2621
|
|
|
|
|
|
|
|
|
2622
|
|
|
|
|
|
|
sub _twig_comment |
|
2623
|
|
|
|
|
|
|
{ # warn " in _twig_comment...\n"; # DEBUG handler |
|
2624
|
|
|
|
|
|
|
|
|
2625
|
|
|
|
|
|
|
my( $p, $comment_text)= @_; |
|
2626
|
|
|
|
|
|
|
my $t=$p->{twig}; |
|
2627
|
|
|
|
|
|
|
|
|
2628
|
|
|
|
|
|
|
if( $t->{twig_keep_encoding}) { $comment_text= substr( $p->original_string(), 4, -3); } |
|
2629
|
|
|
|
|
|
|
|
|
2630
|
|
|
|
|
|
|
$t->_twig_pi_comment( $p, $COMMENT, $t->{twig_keep_comments}, $t->{twig_process_comments}, |
|
2631
|
|
|
|
|
|
|
'_set_comment', '_comment_elt_handler', '_comment_text_handler', $comment_text |
|
2632
|
|
|
|
|
|
|
); |
|
2633
|
|
|
|
|
|
|
return; |
|
2634
|
|
|
|
|
|
|
} |
|
2635
|
|
|
|
|
|
|
|
|
2636
|
|
|
|
|
|
|
sub _twig_pi |
|
2637
|
|
|
|
|
|
|
{ # warn " in _twig_pi...\n"; # DEBUG handler |
|
2638
|
|
|
|
|
|
|
|
|
2639
|
|
|
|
|
|
|
my( $p, $target, $data)= @_; |
|
2640
|
|
|
|
|
|
|
my $t=$p->{twig}; |
|
2641
|
|
|
|
|
|
|
|
|
2642
|
|
|
|
|
|
|
if( $t->{twig_keep_encoding}) |
|
2643
|
|
|
|
|
|
|
{ my $pi_text= substr( $p->original_string(), 2, -2); |
|
2644
|
|
|
|
|
|
|
($target, $data)= split( /\s+/, $pi_text, 2); |
|
2645
|
|
|
|
|
|
|
} |
|
2646
|
|
|
|
|
|
|
|
|
2647
|
|
|
|
|
|
|
$t->_twig_pi_comment( $p, $PI, $t->{twig_keep_pi}, $t->{twig_process_pi}, |
|
2648
|
|
|
|
|
|
|
'_set_pi', '_pi_elt_handlers', '_pi_text_handler', $target, $data |
|
2649
|
|
|
|
|
|
|
); |
|
2650
|
|
|
|
|
|
|
return; |
|
2651
|
|
|
|
|
|
|
} |
|
2652
|
|
|
|
|
|
|
|
|
2653
|
|
|
|
|
|
|
sub _twig_pi_comment |
|
2654
|
|
|
|
|
|
|
{ my( $t, $p, $type, $keep, $process, $set, $elt_handler, $text_handler, @parser_args)= @_; |
|
2655
|
|
|
|
|
|
|
|
|
2656
|
|
|
|
|
|
|
if( $t->{twig_input_filter}) |
|
2657
|
|
|
|
|
|
|
{ foreach my $arg (@parser_args) { $arg= $t->{twig_input_filter}->( $arg); } } |
|
2658
|
|
|
|
|
|
|
|
|
2659
|
|
|
|
|
|
|
# if pi/comments are to be kept then we piggyback them to the current element |
|
2660
|
|
|
|
|
|
|
if( $keep) |
|
2661
|
|
|
|
|
|
|
{ # first add spaces |
|
2662
|
|
|
|
|
|
|
if( $t->{twig_stored_spaces}) |
|
2663
|
|
|
|
|
|
|
{ $t->{extra_data}.= $t->{twig_stored_spaces}; |
|
2664
|
|
|
|
|
|
|
$t->{twig_stored_spaces}= ''; |
|
2665
|
|
|
|
|
|
|
} |
|
2666
|
|
|
|
|
|
|
|
|
2667
|
|
|
|
|
|
|
my $extra_data= $t->$text_handler( @parser_args); |
|
2668
|
|
|
|
|
|
|
$t->{extra_data}.= $extra_data; |
|
2669
|
|
|
|
|
|
|
|
|
2670
|
|
|
|
|
|
|
} |
|
2671
|
|
|
|
|
|
|
elsif( $process) |
|
2672
|
|
|
|
|
|
|
{ |
|
2673
|
|
|
|
|
|
|
my $twig_current= $t->{twig_current}; # defined unless we are outside of the root |
|
2674
|
|
|
|
|
|
|
|
|
2675
|
|
|
|
|
|
|
my $elt= $t->{twig_elt_class}->new( $type); |
|
2676
|
|
|
|
|
|
|
$elt->$set( @parser_args); |
|
2677
|
|
|
|
|
|
|
if( $t->{extra_data}) |
|
2678
|
|
|
|
|
|
|
{ $elt->set_extra_data( $t->{extra_data}); |
|
2679
|
|
|
|
|
|
|
$t->{extra_data}=''; |
|
2680
|
|
|
|
|
|
|
} |
|
2681
|
|
|
|
|
|
|
|
|
2682
|
|
|
|
|
|
|
unless( $t->root) |
|
2683
|
|
|
|
|
|
|
{ $t->_add_cpi_outside_of_root( leading_cpi => $elt); |
|
2684
|
|
|
|
|
|
|
} |
|
2685
|
|
|
|
|
|
|
elsif( $t->{twig_in_pcdata}) |
|
2686
|
|
|
|
|
|
|
{ # create the node as a sibling of the PCDATA |
|
2687
|
|
|
|
|
|
|
$elt->paste_after( $twig_current); |
|
2688
|
|
|
|
|
|
|
$t->{twig_in_pcdata}=0; |
|
2689
|
|
|
|
|
|
|
} |
|
2690
|
|
|
|
|
|
|
elsif( $twig_current) |
|
2691
|
|
|
|
|
|
|
{ # we have to create a PCDATA element if we need to store spaces |
|
2692
|
|
|
|
|
|
|
if( $t->_space_policy($XML::Twig::index2gi[$twig_current->{'gi'}]) && $t->{twig_stored_spaces}) |
|
2693
|
|
|
|
|
|
|
{ _insert_pcdata( $t, $t->{twig_stored_spaces}); } |
|
2694
|
|
|
|
|
|
|
$t->{twig_stored_spaces}=''; |
|
2695
|
|
|
|
|
|
|
# create the node as a child of the current element |
|
2696
|
|
|
|
|
|
|
$elt->paste_last_child( $twig_current); |
|
2697
|
|
|
|
|
|
|
} |
|
2698
|
|
|
|
|
|
|
else |
|
2699
|
|
|
|
|
|
|
{ $t->_add_cpi_outside_of_root( trailing_cpi => $elt); } |
|
2700
|
|
|
|
|
|
|
|
|
2701
|
|
|
|
|
|
|
if( $twig_current) |
|
2702
|
|
|
|
|
|
|
{ delete $twig_current->{'twig_current'}; |
|
2703
|
|
|
|
|
|
|
my $parent= $elt->{parent}; |
|
2704
|
|
|
|
|
|
|
$t->{twig_current}= $parent; |
|
2705
|
|
|
|
|
|
|
$parent->{'twig_current'}=1; |
|
2706
|
|
|
|
|
|
|
} |
|
2707
|
|
|
|
|
|
|
|
|
2708
|
|
|
|
|
|
|
$t->$elt_handler( $elt); |
|
2709
|
|
|
|
|
|
|
} |
|
2710
|
|
|
|
|
|
|
|
|
2711
|
|
|
|
|
|
|
} |
|
2712
|
|
|
|
|
|
|
|
|
2713
|
|
|
|
|
|
|
|
|
2714
|
|
|
|
|
|
|
# add a comment or pi before the first element |
|
2715
|
|
|
|
|
|
|
sub _add_cpi_outside_of_root |
|
2716
|
|
|
|
|
|
|
{ my($t, $type, $elt)= @_; # $type is 'leading_cpi' or 'trailing_cpi' |
|
2717
|
|
|
|
|
|
|
$t->{$type} ||= $t->{twig_elt_class}->new( '#CPI'); |
|
2718
|
|
|
|
|
|
|
# create the node as a child of the current element |
|
2719
|
|
|
|
|
|
|
$elt->paste_last_child( $t->{$type}); |
|
2720
|
|
|
|
|
|
|
return $t; |
|
2721
|
|
|
|
|
|
|
} |
|
2722
|
|
|
|
|
|
|
|
|
2723
|
|
|
|
|
|
|
sub _twig_final |
|
2724
|
|
|
|
|
|
|
{ # warn " in _twig_final...\n"; # DEBUG handler |
|
2725
|
|
|
|
|
|
|
|
|
2726
|
|
|
|
|
|
|
my $p= shift; |
|
2727
|
|
|
|
|
|
|
my $t= $p->isa( 'XML::Twig') ? $p : $p->{twig}; |
|
2728
|
|
|
|
|
|
|
|
|
2729
|
|
|
|
|
|
|
# store trailing data |
|
2730
|
|
|
|
|
|
|
if( $t->{extra_data}) { $t->{trailing_cpi_text} = $t->{extra_data}; $t->{extra_data}=''; } |
|
2731
|
|
|
|
|
|
|
$t->{trailing_spaces}= $t->{twig_stored_spaces} || ''; |
|
2732
|
|
|
|
|
|
|
my $s= $t->{twig_stored_spaces}; $s=~s{\n}{\\n}g; |
|
2733
|
|
|
|
|
|
|
if( $t->{twig_stored_spaces}) { my $s= $t->{twig_stored_spaces}; } |
|
2734
|
|
|
|
|
|
|
|
|
2735
|
|
|
|
|
|
|
# restore the selected filehandle if needed |
|
2736
|
|
|
|
|
|
|
$t->_set_fh_to_selected_fh(); |
|
2737
|
|
|
|
|
|
|
|
|
2738
|
|
|
|
|
|
|
$t->_trigger_tdh if( $t->{twig_tdh}); |
|
2739
|
|
|
|
|
|
|
|
|
2740
|
|
|
|
|
|
|
select $t->{twig_original_selected_fh} if($t->{twig_original_selected_fh}); # probably dodgy |
|
2741
|
|
|
|
|
|
|
|
|
2742
|
|
|
|
|
|
|
if( exists $t->{twig_autoflush_data}) |
|
2743
|
|
|
|
|
|
|
{ my @args; |
|
2744
|
|
|
|
|
|
|
push @args, $t->{twig_autoflush_data}->{fh} if( $t->{twig_autoflush_data}->{fh}); |
|
2745
|
|
|
|
|
|
|
push @args, @{$t->{twig_autoflush_data}->{args}} if( $t->{twig_autoflush_data}->{args}); |
|
2746
|
|
|
|
|
|
|
$t->flush( @args); |
|
2747
|
|
|
|
|
|
|
delete $t->{twig_autoflush_data}; |
|
2748
|
|
|
|
|
|
|
$t->root->delete if $t->root; |
|
2749
|
|
|
|
|
|
|
} |
|
2750
|
|
|
|
|
|
|
|
|
2751
|
|
|
|
|
|
|
# tries to clean-up (probably not very well at the moment) |
|
2752
|
|
|
|
|
|
|
#undef $p->{twig}; |
|
2753
|
|
|
|
|
|
|
undef $t->{twig_parser}; |
|
2754
|
|
|
|
|
|
|
delete $t->{twig_parsing}; |
|
2755
|
|
|
|
|
|
|
@{$t}{ qw( twig_parser twig_parsing _twig_context_stack twig_current) }=(); |
|
2756
|
|
|
|
|
|
|
|
|
2757
|
|
|
|
|
|
|
return $t; |
|
2758
|
|
|
|
|
|
|
} |
|
2759
|
|
|
|
|
|
|
|
|
2760
|
|
|
|
|
|
|
sub _insert_pcdata |
|
2761
|
|
|
|
|
|
|
{ my( $t, $string)= @_; |
|
2762
|
|
|
|
|
|
|
# create a new PCDATA element |
|
2763
|
|
|
|
|
|
|
my $parent= $t->{twig_current}; # always defined |
|
2764
|
|
|
|
|
|
|
my $elt; |
|
2765
|
|
|
|
|
|
|
if( exists $t->{twig_alt_elt_class}) |
|
2766
|
|
|
|
|
|
|
{ $elt= $t->{twig_elt_class}->new( $PCDATA); |
|
2767
|
|
|
|
|
|
|
$elt->{pcdata}= $string; |
|
2768
|
|
|
|
|
|
|
} |
|
2769
|
|
|
|
|
|
|
else |
|
2770
|
|
|
|
|
|
|
{ $elt= bless( { gi => $XML::Twig::gi2index{$PCDATA}, pcdata => $string }, 'XML::Twig::Elt'); } |
|
2771
|
|
|
|
|
|
|
|
|
2772
|
|
|
|
|
|
|
my $prev_sibling= $parent->{last_child}; |
|
2773
|
|
|
|
|
|
|
if( $prev_sibling) |
|
2774
|
|
|
|
|
|
|
{ $prev_sibling->{next_sibling}= $elt; |
|
2775
|
|
|
|
|
|
|
$elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; |
|
2776
|
|
|
|
|
|
|
} |
|
2777
|
|
|
|
|
|
|
else |
|
2778
|
|
|
|
|
|
|
{ $parent->{first_child}= $elt; } |
|
2779
|
|
|
|
|
|
|
|
|
2780
|
|
|
|
|
|
|
$elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; |
|
2781
|
|
|
|
|
|
|
delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; |
|
2782
|
|
|
|
|
|
|
$t->{twig_stored_spaces}=''; |
|
2783
|
|
|
|
|
|
|
return $elt; |
|
2784
|
|
|
|
|
|
|
} |
|
2785
|
|
|
|
|
|
|
|
|
2786
|
|
|
|
|
|
|
sub _space_policy |
|
2787
|
|
|
|
|
|
|
{ my( $t, $gi)= @_; |
|
2788
|
|
|
|
|
|
|
my $policy; |
|
2789
|
|
|
|
|
|
|
$policy=0 if( $t->{twig_discard_spaces}); |
|
2790
|
|
|
|
|
|
|
$policy=1 if( $t->{twig_keep_spaces}); |
|
2791
|
|
|
|
|
|
|
$policy=1 if( $t->{twig_keep_spaces_in} |
|
2792
|
|
|
|
|
|
|
&& $t->{twig_keep_spaces_in}->{$gi}); |
|
2793
|
|
|
|
|
|
|
$policy=0 if( $t->{twig_discard_spaces_in} |
|
2794
|
|
|
|
|
|
|
&& $t->{twig_discard_spaces_in}->{$gi}); |
|
2795
|
|
|
|
|
|
|
return $policy; |
|
2796
|
|
|
|
|
|
|
} |
|
2797
|
|
|
|
|
|
|
|
|
2798
|
|
|
|
|
|
|
|
|
2799
|
|
|
|
|
|
|
sub _twig_entity |
|
2800
|
|
|
|
|
|
|
{ # warn " in _twig_entity...\n"; # DEBUG handler |
|
2801
|
|
|
|
|
|
|
my( $p, $name, $val, $sysid, $pubid, $ndata, $param)= @_; |
|
2802
|
|
|
|
|
|
|
my $t=$p->{twig}; |
|
2803
|
|
|
|
|
|
|
|
|
2804
|
|
|
|
|
|
|
#{ no warnings; my $base= $p->base; warn "_twig_entity called: expand: '$t->{twig_expand_external_ents}', base: '$base', name: '$name', val: '$val', sysid: '$sysid', pubid: '$pubid', ndata: '$ndata', param: '$param'\n";} |
|
2805
|
|
|
|
|
|
|
|
|
2806
|
|
|
|
|
|
|
my $missing_entity=0; |
|
2807
|
|
|
|
|
|
|
|
|
2808
|
|
|
|
|
|
|
if( $sysid) |
|
2809
|
|
|
|
|
|
|
{ if($ndata) |
|
2810
|
|
|
|
|
|
|
{ if( ! -f _based_filename( $sysid, $p->base)) { $missing_entity= 1; } |
|
2811
|
|
|
|
|
|
|
} |
|
2812
|
|
|
|
|
|
|
else |
|
2813
|
|
|
|
|
|
|
{ if( $t->{twig_expand_external_ents}) |
|
2814
|
|
|
|
|
|
|
{ $val= eval { _slurp_uri( $sysid, $p->base) }; |
|
2815
|
|
|
|
|
|
|
if( ! defined $val) |
|
2816
|
|
|
|
|
|
|
{ if( $t->{twig_extern_ent_nofail}) |
|
2817
|
|
|
|
|
|
|
{ $missing_entity= 1; } |
|
2818
|
|
|
|
|
|
|
else |
|
2819
|
|
|
|
|
|
|
{ _croak( "cannot load SYSTEM entity '$name' from '$sysid': $@", 3); } |
|
2820
|
|
|
|
|
|
|
} |
|
2821
|
|
|
|
|
|
|
} |
|
2822
|
|
|
|
|
|
|
} |
|
2823
|
|
|
|
|
|
|
} |
|
2824
|
|
|
|
|
|
|
|
|
2825
|
|
|
|
|
|
|
my $ent=XML::Twig::Entity->new( $name, $val, $sysid, $pubid, $ndata, $param); |
|
2826
|
|
|
|
|
|
|
if( $missing_entity) { $t->{twig_missing_system_entities}->{$name}= $ent; } |
|
2827
|
|
|
|
|
|
|
|
|
2828
|
|
|
|
|
|
|
my $entity_list= $t->entity_list; |
|
2829
|
|
|
|
|
|
|
if( $entity_list) { $entity_list->add( $ent); } |
|
2830
|
|
|
|
|
|
|
|
|
2831
|
|
|
|
|
|
|
if( $parser_version > 2.27) |
|
2832
|
|
|
|
|
|
|
{ # this is really ugly, but with some versions of XML::Parser the value |
|
2833
|
|
|
|
|
|
|
# of the entity is not properly returned by the default handler |
|
2834
|
|
|
|
|
|
|
my $ent_decl= $ent->text; |
|
2835
|
|
|
|
|
|
|
if( $t->{twig_keep_encoding}) |
|
2836
|
|
|
|
|
|
|
{ if( defined $ent->{val} && ($ent_decl !~ /["']/)) |
|
2837
|
|
|
|
|
|
|
{ my $val= $ent->{val}; |
|
2838
|
|
|
|
|
|
|
$ent_decl .= $val =~ /"/ ? qq{'$val' } : qq{"$val" }; |
|
2839
|
|
|
|
|
|
|
} |
|
2840
|
|
|
|
|
|
|
# for my solaris box (perl 5.6.1, XML::Parser 2.31, expat?) |
|
2841
|
|
|
|
|
|
|
$t->{twig_doctype}->{internal}=~ s{
|
|
2842
|
|
|
|
|
|
|
} |
|
2843
|
|
|
|
|
|
|
$t->{twig_doctype}->{internal} .= $ent_decl |
|
2844
|
|
|
|
|
|
|
unless( $t->{twig_doctype}->{internal}=~ m{
|
|
2845
|
|
|
|
|
|
|
} |
|
2846
|
|
|
|
|
|
|
|
|
2847
|
|
|
|
|
|
|
return; |
|
2848
|
|
|
|
|
|
|
} |
|
2849
|
|
|
|
|
|
|
|
|
2850
|
|
|
|
|
|
|
sub _twig_notation |
|
2851
|
|
|
|
|
|
|
{ my( $p, $name, $base, $sysid, $pubid ) = @_; |
|
2852
|
|
|
|
|
|
|
my $t = $p->{twig}; |
|
2853
|
|
|
|
|
|
|
|
|
2854
|
|
|
|
|
|
|
my $notation = XML::Twig::Notation->new( $name, $base, $sysid, $pubid ); |
|
2855
|
|
|
|
|
|
|
my $notation_list = $t->notation_list(); |
|
2856
|
|
|
|
|
|
|
if( $notation_list ) { $notation_list->add( $notation ); } |
|
2857
|
|
|
|
|
|
|
|
|
2858
|
|
|
|
|
|
|
# internal should get the recognized_string, but XML::Parser does not provide it |
|
2859
|
|
|
|
|
|
|
# so we need to re-create it ( $notation->text) and stick it there. |
|
2860
|
|
|
|
|
|
|
$t->{twig_doctype}->{internal} .= $notation->text; |
|
2861
|
|
|
|
|
|
|
|
|
2862
|
|
|
|
|
|
|
return; |
|
2863
|
|
|
|
|
|
|
} |
|
2864
|
|
|
|
|
|
|
|
|
2865
|
|
|
|
|
|
|
|
|
2866
|
|
|
|
|
|
|
sub _twig_extern_ent |
|
2867
|
|
|
|
|
|
|
{ # warn " in _twig_extern_ent...I (", $_[0]->original_string, ")\n"; # DEBUG handler |
|
2868
|
|
|
|
|
|
|
my( $p, $base, $sysid, $pubid)= @_; |
|
2869
|
|
|
|
|
|
|
my $t= $p->{twig}; |
|
2870
|
|
|
|
|
|
|
if( $t->{twig_no_expand}) |
|
2871
|
|
|
|
|
|
|
{ my $ent_name= $t->{twig_keep_encoding} ? $p->original_string : $p->recognized_string; |
|
2872
|
|
|
|
|
|
|
_twig_insert_ent( $t, $ent_name); |
|
2873
|
|
|
|
|
|
|
return ''; |
|
2874
|
|
|
|
|
|
|
} |
|
2875
|
|
|
|
|
|
|
my $ent_content= eval { $t->{twig_ext_ent_handler}->( $p, $base, $sysid) }; |
|
2876
|
|
|
|
|
|
|
if( ! defined $ent_content) |
|
2877
|
|
|
|
|
|
|
{ |
|
2878
|
|
|
|
|
|
|
my $ent_name = $p->recognized_string; |
|
2879
|
|
|
|
|
|
|
my $file = _based_filename( $sysid, $base); |
|
2880
|
|
|
|
|
|
|
my $error_message= "cannot expand $ent_name - cannot load '$file'"; |
|
2881
|
|
|
|
|
|
|
if( $t->{twig_extern_ent_nofail}) { return ""; } |
|
2882
|
|
|
|
|
|
|
else { _croak( $error_message); } |
|
2883
|
|
|
|
|
|
|
} |
|
2884
|
|
|
|
|
|
|
return $ent_content; |
|
2885
|
|
|
|
|
|
|
} |
|
2886
|
|
|
|
|
|
|
|
|
2887
|
|
|
|
|
|
|
# I use this so I can change the $Carp::CarpLevel (which determines how many call frames to skip when reporting an error) |
|
2888
|
|
|
|
|
|
|
sub _croak |
|
2889
|
|
|
|
|
|
|
{ my( $message, $level)= @_; |
|
2890
|
|
|
|
|
|
|
$Carp::CarpLevel= $level || 0; |
|
2891
|
|
|
|
|
|
|
croak $message; |
|
2892
|
|
|
|
|
|
|
} |
|
2893
|
|
|
|
|
|
|
|
|
2894
|
|
|
|
|
|
|
sub _twig_xmldecl |
|
2895
|
|
|
|
|
|
|
{ # warn " in _twig_xmldecl...\n"; # DEBUG handler |
|
2896
|
|
|
|
|
|
|
|
|
2897
|
|
|
|
|
|
|
my $p= shift; |
|
2898
|
|
|
|
|
|
|
my $t=$p->{twig}; |
|
2899
|
|
|
|
|
|
|
$t->{twig_xmldecl}||={}; # could have been set by set_output_encoding |
|
2900
|
|
|
|
|
|
|
$t->{twig_xmldecl}->{version}= shift; |
|
2901
|
|
|
|
|
|
|
$t->{twig_xmldecl}->{encoding}= shift; |
|
2902
|
|
|
|
|
|
|
$t->{twig_xmldecl}->{standalone}= shift; |
|
2903
|
|
|
|
|
|
|
return; |
|
2904
|
|
|
|
|
|
|
} |
|
2905
|
|
|
|
|
|
|
|
|
2906
|
|
|
|
|
|
|
sub _twig_doctype |
|
2907
|
|
|
|
|
|
|
{ # warn " in _twig_doctype...\n"; # DEBUG handler |
|
2908
|
|
|
|
|
|
|
my( $p, $name, $sysid, $pub, $internal)= @_; |
|
2909
|
|
|
|
|
|
|
my $t=$p->{twig}; |
|
2910
|
|
|
|
|
|
|
$t->{twig_doctype}||= {}; # create |
|
2911
|
|
|
|
|
|
|
$t->{twig_doctype}->{name}= $name; # always there |
|
2912
|
|
|
|
|
|
|
$t->{twig_doctype}->{sysid}= $sysid; # |
|
2913
|
|
|
|
|
|
|
$t->{twig_doctype}->{pub}= $pub; # |
|
2914
|
|
|
|
|
|
|
|
|
2915
|
|
|
|
|
|
|
# now let's try to cope with XML::Parser 2.28 and above |
|
2916
|
|
|
|
|
|
|
if( $parser_version > 2.27) |
|
2917
|
|
|
|
|
|
|
{ @saved_default_handler= $p->setHandlers( Default => \&_twig_store_internal_dtd, |
|
2918
|
|
|
|
|
|
|
Entity => \&_twig_entity, |
|
2919
|
|
|
|
|
|
|
); |
|
2920
|
|
|
|
|
|
|
$p->setHandlers( DoctypeFin => \&_twig_stop_storing_internal_dtd); |
|
2921
|
|
|
|
|
|
|
$t->{twig_doctype}->{internal}=''; |
|
2922
|
|
|
|
|
|
|
} |
|
2923
|
|
|
|
|
|
|
else |
|
2924
|
|
|
|
|
|
|
# for XML::Parser before 2.28 |
|
2925
|
|
|
|
|
|
|
{ $internal||=''; |
|
2926
|
|
|
|
|
|
|
$internal=~ s{^\s*\[}{}; |
|
2927
|
|
|
|
|
|
|
$internal=~ s{]\s*$}{}; |
|
2928
|
|
|
|
|
|
|
$t->{twig_doctype}->{internal}=$internal; |
|
2929
|
|
|
|
|
|
|
} |
|
2930
|
|
|
|
|
|
|
|
|
2931
|
|
|
|
|
|
|
# now check if we want to get the DTD info |
|
2932
|
|
|
|
|
|
|
if( $t->{twig_read_external_dtd} && $sysid) |
|
2933
|
|
|
|
|
|
|
{ # let's build a fake document with an internal DTD |
|
2934
|
|
|
|
|
|
|
if( $t->{DTDBase}) |
|
2935
|
|
|
|
|
|
|
{ _use( 'File::Spec'); |
|
2936
|
|
|
|
|
|
|
$sysid=File::Spec->catfile($t->{DTDBase}, $sysid); |
|
2937
|
|
|
|
|
|
|
} |
|
2938
|
|
|
|
|
|
|
my $dtd= _slurp_uri( $sysid); |
|
2939
|
|
|
|
|
|
|
# if the DTD includes an XML declaration, it needs to be moved before the DOCTYPE bit |
|
2940
|
|
|
|
|
|
|
if( $dtd=~ s{^(\s*<\?xml(\s+\w+\s*=\s*("[^"]*"|'[^']*'))*\s*\?>)}{}) |
|
2941
|
|
|
|
|
|
|
{ $dtd= "$1<$name/>"; } |
|
2942
|
|
|
|
|
|
|
else |
|
2943
|
|
|
|
|
|
|
{ $dtd= "<$name/>"; } |
|
2944
|
|
|
|
|
|
|
|
|
2945
|
|
|
|
|
|
|
$t->save_global_state(); # save the globals (they will be reset by the following new) |
|
2946
|
|
|
|
|
|
|
my $t_dtd= XML::Twig->new( load_DTD => 1, ParseParamEnt => 1, error_context => $t->{ErrorContext} || 0); # create a temp twig |
|
2947
|
|
|
|
|
|
|
$t_dtd->parse( $dtd); # parse it |
|
2948
|
|
|
|
|
|
|
$t->{twig_dtd}= $t_dtd->{twig_dtd}; # grab the dtd info |
|
2949
|
|
|
|
|
|
|
#$t->{twig_dtd_is_external}=1; |
|
2950
|
|
|
|
|
|
|
$t->entity_list->_add_list( $t_dtd->entity_list) if( $t_dtd->entity_list); # grab the entity info |
|
2951
|
|
|
|
|
|
|
$t->notation_list->_add_list( $t_dtd->notation_list) if( $t_dtd->notation_list); # grab the notation info |
|
2952
|
|
|
|
|
|
|
$t->restore_global_state(); |
|
2953
|
|
|
|
|
|
|
} |
|
2954
|
|
|
|
|
|
|
return; |
|
2955
|
|
|
|
|
|
|
} |
|
2956
|
|
|
|
|
|
|
|
|
2957
|
|
|
|
|
|
|
sub _twig_element |
|
2958
|
|
|
|
|
|
|
{ # warn " in _twig_element...\n"; # DEBUG handler |
|
2959
|
|
|
|
|
|
|
|
|
2960
|
|
|
|
|
|
|
my( $p, $name, $model)= @_; |
|
2961
|
|
|
|
|
|
|
my $t=$p->{twig}; |
|
2962
|
|
|
|
|
|
|
$t->{twig_dtd}||= {}; # may create the dtd |
|
2963
|
|
|
|
|
|
|
$t->{twig_dtd}->{model}||= {}; # may create the model hash |
|
2964
|
|
|
|
|
|
|
$t->{twig_dtd}->{elt_list}||= []; # ordered list of elements |
|
2965
|
|
|
|
|
|
|
push @{$t->{twig_dtd}->{elt_list}}, $name; # store the elt |
|
2966
|
|
|
|
|
|
|
$t->{twig_dtd}->{model}->{$name}= $model; # store the model |
|
2967
|
|
|
|
|
|
|
if( ($parser_version > 2.27) && ($t->{twig_doctype}->{internal}=~ m{(^|>)\s*$}) ) |
|
2968
|
|
|
|
|
|
|
{ my $text= $XML::Twig::Elt::keep_encoding ? $p->original_string : $p->recognized_string; |
|
2969
|
|
|
|
|
|
|
unless( $text) |
|
2970
|
|
|
|
|
|
|
{ # this version of XML::Parser does not return the text in the *_string method |
|
2971
|
|
|
|
|
|
|
# we need to rebuild it |
|
2972
|
|
|
|
|
|
|
$text= ""; |
|
2973
|
|
|
|
|
|
|
} |
|
2974
|
|
|
|
|
|
|
$t->{twig_doctype}->{internal} .= $text; |
|
2975
|
|
|
|
|
|
|
} |
|
2976
|
|
|
|
|
|
|
return; |
|
2977
|
|
|
|
|
|
|
} |
|
2978
|
|
|
|
|
|
|
|
|
2979
|
|
|
|
|
|
|
sub _twig_attlist |
|
2980
|
|
|
|
|
|
|
{ # warn " in _twig_attlist...\n"; # DEBUG handler |
|
2981
|
|
|
|
|
|
|
|
|
2982
|
|
|
|
|
|
|
my( $p, $gi, $att, $type, $default, $fixed)= @_; |
|
2983
|
|
|
|
|
|
|
#warn "in attlist: gi: '$gi', att: '$att', type: '$type', default: '$default', fixed: '$fixed'\n"; |
|
2984
|
|
|
|
|
|
|
my $t=$p->{twig}; |
|
2985
|
|
|
|
|
|
|
$t->{twig_dtd}||= {}; # create dtd if need be |
|
2986
|
|
|
|
|
|
|
$t->{twig_dtd}->{$gi}||= {}; # create elt if need be |
|
2987
|
|
|
|
|
|
|
#$t->{twig_dtd}->{$gi}->{att}||= {}; # create att if need be |
|
2988
|
|
|
|
|
|
|
if( ($parser_version > 2.27) && ($t->{twig_doctype}->{internal}=~ m{(^|>)\s*$}) ) |
|
2989
|
|
|
|
|
|
|
{ my $text= $XML::Twig::Elt::keep_encoding ? $p->original_string : $p->recognized_string; |
|
2990
|
|
|
|
|
|
|
unless( $text) |
|
2991
|
|
|
|
|
|
|
{ # this version of XML::Parser does not return the text in the *_string method |
|
2992
|
|
|
|
|
|
|
# we need to rebuild it |
|
2993
|
|
|
|
|
|
|
my $att_decl="$att $type"; |
|
2994
|
|
|
|
|
|
|
$att_decl .= " #FIXED" if( $fixed); |
|
2995
|
|
|
|
|
|
|
$att_decl .= " $default" if( defined $default); |
|
2996
|
|
|
|
|
|
|
# 2 cases: there is already an attlist on that element or not |
|
2997
|
|
|
|
|
|
|
if( $t->{twig_dtd}->{att}->{$gi}) |
|
2998
|
|
|
|
|
|
|
{ # there is already an attlist, add to it |
|
2999
|
|
|
|
|
|
|
$t->{twig_doctype}->{internal}=~ s{(} |
|
3000
|
|
|
|
|
|
|
{ "$1$2\n" . ' ' x length( $1) . "$att_decl\n>"}es; |
|
3001
|
|
|
|
|
|
|
} |
|
3002
|
|
|
|
|
|
|
else |
|
3003
|
|
|
|
|
|
|
{ # create the attlist |
|
3004
|
|
|
|
|
|
|
$t->{twig_doctype}->{internal}.= "" |
|
3005
|
|
|
|
|
|
|
} |
|
3006
|
|
|
|
|
|
|
} |
|
3007
|
|
|
|
|
|
|
} |
|
3008
|
|
|
|
|
|
|
$t->{twig_dtd}->{att}->{$gi}->{$att}= {} ; |
|
3009
|
|
|
|
|
|
|
$t->{twig_dtd}->{att}->{$gi}->{$att}->{type}= $type; |
|
3010
|
|
|
|
|
|
|
$t->{twig_dtd}->{att}->{$gi}->{$att}->{default}= $default if( defined $default); |
|
3011
|
|
|
|
|
|
|
$t->{twig_dtd}->{att}->{$gi}->{$att}->{fixed}= $fixed; |
|
3012
|
|
|
|
|
|
|
return; |
|
3013
|
|
|
|
|
|
|
} |
|
3014
|
|
|
|
|
|
|
|
|
3015
|
|
|
|
|
|
|
sub _twig_default |
|
3016
|
|
|
|
|
|
|
{ # warn " in _twig_default...\n"; # DEBUG handler |
|
3017
|
|
|
|
|
|
|
|
|
3018
|
|
|
|
|
|
|
my( $p, $string)= @_; |
|
3019
|
|
|
|
|
|
|
|
|
3020
|
|
|
|
|
|
|
my $t= $p->{twig}; |
|
3021
|
|
|
|
|
|
|
|
|
3022
|
|
|
|
|
|
|
# we need to process the data in 2 cases: entity, or spaces after the closing tag |
|
3023
|
|
|
|
|
|
|
|
|
3024
|
|
|
|
|
|
|
# after the closing tag (no twig_current and root has been created) |
|
3025
|
|
|
|
|
|
|
if( ! $t->{twig_current} && $t->{twig_root} && $string=~ m{^\s+$}m) { $t->{twig_stored_spaces} .= $string; } |
|
3026
|
|
|
|
|
|
|
|
|
3027
|
|
|
|
|
|
|
# process only if we have an entity |
|
3028
|
|
|
|
|
|
|
if( $string=~ m{^&([^;]*);$}) |
|
3029
|
|
|
|
|
|
|
{ # the entity has to be pure pcdata, or we have a problem |
|
3030
|
|
|
|
|
|
|
if( ($p->original_string=~ m{^<}) && ($p->original_string=~ m{>$}) ) |
|
3031
|
|
|
|
|
|
|
{ # string is a tag, entity is in an attribute |
|
3032
|
|
|
|
|
|
|
$t->{twig_entities_in_attribute}=1 if( $t->{twig_do_not_escape_amp_in_atts}); |
|
3033
|
|
|
|
|
|
|
} |
|
3034
|
|
|
|
|
|
|
else |
|
3035
|
|
|
|
|
|
|
{ my $ent; |
|
3036
|
|
|
|
|
|
|
if( $t->{twig_keep_encoding}) |
|
3037
|
|
|
|
|
|
|
{ _twig_char( $p, $string); |
|
3038
|
|
|
|
|
|
|
$ent= substr( $string, 1, -1); |
|
3039
|
|
|
|
|
|
|
} |
|
3040
|
|
|
|
|
|
|
else |
|
3041
|
|
|
|
|
|
|
{ $ent= _twig_insert_ent( $t, $string); |
|
3042
|
|
|
|
|
|
|
} |
|
3043
|
|
|
|
|
|
|
|
|
3044
|
|
|
|
|
|
|
return $ent; |
|
3045
|
|
|
|
|
|
|
} |
|
3046
|
|
|
|
|
|
|
} |
|
3047
|
|
|
|
|
|
|
} |
|
3048
|
|
|
|
|
|
|
|
|
3049
|
|
|
|
|
|
|
sub _twig_insert_ent |
|
3050
|
|
|
|
|
|
|
{ |
|
3051
|
|
|
|
|
|
|
my( $t, $string)=@_; |
|
3052
|
|
|
|
|
|
|
|
|
3053
|
|
|
|
|
|
|
my $twig_current= $t->{twig_current}; |
|
3054
|
|
|
|
|
|
|
|
|
3055
|
|
|
|
|
|
|
my $ent= $t->{twig_elt_class}->new( $ENT); |
|
3056
|
|
|
|
|
|
|
$ent->{ent}= $string; |
|
3057
|
|
|
|
|
|
|
|
|
3058
|
|
|
|
|
|
|
_add_or_discard_stored_spaces( $t); |
|
3059
|
|
|
|
|
|
|
|
|
3060
|
|
|
|
|
|
|
if( $t->{twig_in_pcdata}) |
|
3061
|
|
|
|
|
|
|
{ # create the node as a sibling of the #PCDATA |
|
3062
|
|
|
|
|
|
|
|
|
3063
|
|
|
|
|
|
|
$ent->{prev_sibling}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $ent->{prev_sibling});} ; |
|
3064
|
|
|
|
|
|
|
$twig_current->{next_sibling}= $ent; |
|
3065
|
|
|
|
|
|
|
my $parent= $twig_current->{parent}; |
|
3066
|
|
|
|
|
|
|
$ent->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $ent->{parent});} ; |
|
3067
|
|
|
|
|
|
|
delete $parent->{empty}; $parent->{last_child}=$ent; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; |
|
3068
|
|
|
|
|
|
|
# the twig_current is now the parent |
|
3069
|
|
|
|
|
|
|
delete $twig_current->{'twig_current'}; |
|
3070
|
|
|
|
|
|
|
$t->{twig_current}= $parent; |
|
3071
|
|
|
|
|
|
|
# we left pcdata |
|
3072
|
|
|
|
|
|
|
$t->{twig_in_pcdata}=0; |
|
3073
|
|
|
|
|
|
|
} |
|
3074
|
|
|
|
|
|
|
else |
|
3075
|
|
|
|
|
|
|
{ # create the node as a child of the current element |
|
3076
|
|
|
|
|
|
|
$ent->{parent}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $ent->{parent});} ; |
|
3077
|
|
|
|
|
|
|
if( my $prev_sibling= $twig_current->{last_child}) |
|
3078
|
|
|
|
|
|
|
{ $ent->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $ent->{prev_sibling});} ; |
|
3079
|
|
|
|
|
|
|
$prev_sibling->{next_sibling}= $ent; |
|
3080
|
|
|
|
|
|
|
} |
|
3081
|
|
|
|
|
|
|
else |
|
3082
|
|
|
|
|
|
|
{ if( $twig_current) { $twig_current->{first_child}= $ent; } } |
|
3083
|
|
|
|
|
|
|
if( $twig_current) { delete $twig_current->{empty}; $twig_current->{last_child}=$ent; if( $XML::Twig::weakrefs) { weaken( $twig_current->{last_child});} ; } |
|
3084
|
|
|
|
|
|
|
} |
|
3085
|
|
|
|
|
|
|
|
|
3086
|
|
|
|
|
|
|
# meant to trigger entity handler, does not seem to be activated at this time |
|
3087
|
|
|
|
|
|
|
#if( my $handler= $t->{twig_handlers}->{gi}->{$ENT}) |
|
3088
|
|
|
|
|
|
|
# { local $_= $ent; $handler->( $t, $ent); } |
|
3089
|
|
|
|
|
|
|
|
|
3090
|
|
|
|
|
|
|
return $ent; |
|
3091
|
|
|
|
|
|
|
} |
|
3092
|
|
|
|
|
|
|
|
|
3093
|
|
|
|
|
|
|
sub parser |
|
3094
|
|
|
|
|
|
|
{ return $_[0]->{twig_parser}; } |
|
3095
|
|
|
|
|
|
|
|
|
3096
|
|
|
|
|
|
|
# returns the declaration text (or a default one) |
|
3097
|
|
|
|
|
|
|
sub xmldecl |
|
3098
|
|
|
|
|
|
|
{ my $t= shift; |
|
3099
|
|
|
|
|
|
|
return '' unless( $t->{twig_xmldecl} || $t->{output_encoding}); |
|
3100
|
|
|
|
|
|
|
my $decl_string; |
|
3101
|
|
|
|
|
|
|
my $decl= $t->{twig_xmldecl}; |
|
3102
|
|
|
|
|
|
|
if( $decl) |
|
3103
|
|
|
|
|
|
|
{ my $version= $decl->{version}; |
|
3104
|
|
|
|
|
|
|
$decl_string= q{
|
|
3105
|
|
|
|
|
|
|
$decl_string .= qq{ version="$version"}; |
|
3106
|
|
|
|
|
|
|
|
|
3107
|
|
|
|
|
|
|
# encoding can either have been set (in $decl->{output_encoding}) |
|
3108
|
|
|
|
|
|
|
# or come from the document (in $decl->{encoding}) |
|
3109
|
|
|
|
|
|
|
if( $t->{output_encoding}) |
|
3110
|
|
|
|
|
|
|
{ my $encoding= $t->{output_encoding}; |
|
3111
|
|
|
|
|
|
|
$decl_string .= qq{ encoding="$encoding"}; |
|
3112
|
|
|
|
|
|
|
} |
|
3113
|
|
|
|
|
|
|
elsif( $decl->{encoding}) |
|
3114
|
|
|
|
|
|
|
{ my $encoding= $decl->{encoding}; |
|
3115
|
|
|
|
|
|
|
$decl_string .= qq{ encoding="$encoding"}; |
|
3116
|
|
|
|
|
|
|
} |
|
3117
|
|
|
|
|
|
|
|
|
3118
|
|
|
|
|
|
|
if( defined( $decl->{standalone})) |
|
3119
|
|
|
|
|
|
|
{ $decl_string .= q{ standalone="}; |
|
3120
|
|
|
|
|
|
|
$decl_string .= $decl->{standalone} ? "yes" : "no"; |
|
3121
|
|
|
|
|
|
|
$decl_string .= q{"}; |
|
3122
|
|
|
|
|
|
|
} |
|
3123
|
|
|
|
|
|
|
|
|
3124
|
|
|
|
|
|
|
$decl_string .= "?>\n"; |
|
3125
|
|
|
|
|
|
|
} |
|
3126
|
|
|
|
|
|
|
else |
|
3127
|
|
|
|
|
|
|
{ my $encoding= $t->{output_encoding}; |
|
3128
|
|
|
|
|
|
|
$decl_string= qq{}; |
|
3129
|
|
|
|
|
|
|
} |
|
3130
|
|
|
|
|
|
|
|
|
3131
|
|
|
|
|
|
|
my $output_filter= XML::Twig::Elt::output_filter(); |
|
3132
|
|
|
|
|
|
|
return $output_filter ? $output_filter->( $decl_string) : $decl_string; |
|
3133
|
|
|
|
|
|
|
} |
|
3134
|
|
|
|
|
|
|
|
|
3135
|
|
|
|
|
|
|
sub set_doctype |
|
3136
|
|
|
|
|
|
|
{ my( $t, $name, $system, $public, $internal)= @_; |
|
3137
|
|
|
|
|
|
|
$t->{twig_doctype}= {} unless defined $t->{twig_doctype}; |
|
3138
|
|
|
|
|
|
|
my $doctype= $t->{twig_doctype}; |
|
3139
|
|
|
|
|
|
|
$doctype->{name} = $name if( defined $name); |
|
3140
|
|
|
|
|
|
|
$doctype->{sysid} = $system if( defined $system); |
|
3141
|
|
|
|
|
|
|
$doctype->{pub} = $public if( defined $public); |
|
3142
|
|
|
|
|
|
|
$doctype->{internal} = $internal if( defined $internal); |
|
3143
|
|
|
|
|
|
|
} |
|
3144
|
|
|
|
|
|
|
|
|
3145
|
|
|
|
|
|
|
sub doctype_name |
|
3146
|
|
|
|
|
|
|
{ my $t= shift; |
|
3147
|
|
|
|
|
|
|
my $doctype= $t->{twig_doctype} or return ''; |
|
3148
|
|
|
|
|
|
|
return $doctype->{name} || ''; |
|
3149
|
|
|
|
|
|
|
} |
|
3150
|
|
|
|
|
|
|
|
|
3151
|
|
|
|
|
|
|
sub system_id |
|
3152
|
|
|
|
|
|
|
{ my $t= shift; |
|
3153
|
|
|
|
|
|
|
my $doctype= $t->{twig_doctype} or return ''; |
|
3154
|
|
|
|
|
|
|
return $doctype->{sysid} || ''; |
|
3155
|
|
|
|
|
|
|
} |
|
3156
|
|
|
|
|
|
|
|
|
3157
|
|
|
|
|
|
|
sub public_id |
|
3158
|
|
|
|
|
|
|
{ my $t= shift; |
|
3159
|
|
|
|
|
|
|
my $doctype= $t->{twig_doctype} or return ''; |
|
3160
|
|
|
|
|
|
|
return $doctype->{pub} || ''; |
|
3161
|
|
|
|
|
|
|
} |
|
3162
|
|
|
|
|
|
|
|
|
3163
|
|
|
|
|
|
|
sub internal_subset |
|
3164
|
|
|
|
|
|
|
{ my $t= shift; |
|
3165
|
|
|
|
|
|
|
my $doctype= $t->{twig_doctype} or return ''; |
|
3166
|
|
|
|
|
|
|
return $doctype->{internal} || ''; |
|
3167
|
|
|
|
|
|
|
} |
|
3168
|
|
|
|
|
|
|
|
|
3169
|
|
|
|
|
|
|
# return the dtd object |
|
3170
|
|
|
|
|
|
|
sub dtd |
|
3171
|
|
|
|
|
|
|
{ my $t= shift; |
|
3172
|
|
|
|
|
|
|
return $t->{twig_dtd}; |
|
3173
|
|
|
|
|
|
|
} |
|
3174
|
|
|
|
|
|
|
|
|
3175
|
|
|
|
|
|
|
# return an element model, or the list of element models |
|
3176
|
|
|
|
|
|
|
sub model |
|
3177
|
|
|
|
|
|
|
{ my $t= shift; |
|
3178
|
|
|
|
|
|
|
my $elt= shift; |
|
3179
|
|
|
|
|
|
|
return $t->dtd->{model}->{$elt} if( $elt); |
|
3180
|
|
|
|
|
|
|
return (sort keys %{$t->dtd->{model}}); |
|
3181
|
|
|
|
|
|
|
} |
|
3182
|
|
|
|
|
|
|
|
|
3183
|
|
|
|
|
|
|
|
|
3184
|
|
|
|
|
|
|
# return the entity_list object |
|
3185
|
|
|
|
|
|
|
sub entity_list |
|
3186
|
|
|
|
|
|
|
{ my $t= shift; |
|
3187
|
|
|
|
|
|
|
return $t->{twig_entity_list}; |
|
3188
|
|
|
|
|
|
|
} |
|
3189
|
|
|
|
|
|
|
|
|
3190
|
|
|
|
|
|
|
# return the list of entity names |
|
3191
|
|
|
|
|
|
|
sub entity_names |
|
3192
|
|
|
|
|
|
|
{ my $t= shift; |
|
3193
|
|
|
|
|
|
|
return $t->entity_list->entity_names; |
|
3194
|
|
|
|
|
|
|
} |
|
3195
|
|
|
|
|
|
|
|
|
3196
|
|
|
|
|
|
|
# return the entity object |
|
3197
|
|
|
|
|
|
|
sub entity |
|
3198
|
|
|
|
|
|
|
{ my $t= shift; |
|
3199
|
|
|
|
|
|
|
my $entity_name= shift; |
|
3200
|
|
|
|
|
|
|
return $t->entity_list->ent( $entity_name); |
|
3201
|
|
|
|
|
|
|
} |
|
3202
|
|
|
|
|
|
|
|
|
3203
|
|
|
|
|
|
|
# return the notation_list object |
|
3204
|
|
|
|
|
|
|
sub notation_list |
|
3205
|
|
|
|
|
|
|
{ my $t= shift; |
|
3206
|
|
|
|
|
|
|
return $t->{twig_notation_list}; |
|
3207
|
|
|
|
|
|
|
} |
|
3208
|
|
|
|
|
|
|
|
|
3209
|
|
|
|
|
|
|
# return the list of notation names |
|
3210
|
|
|
|
|
|
|
sub notation_names |
|
3211
|
|
|
|
|
|
|
{ my $t= shift; |
|
3212
|
|
|
|
|
|
|
return $t->notation_list->notation_names; |
|
3213
|
|
|
|
|
|
|
} |
|
3214
|
|
|
|
|
|
|
|
|
3215
|
|
|
|
|
|
|
# return the notation object |
|
3216
|
|
|
|
|
|
|
sub notation |
|
3217
|
|
|
|
|
|
|
{ my $t= shift; |
|
3218
|
|
|
|
|
|
|
my $notation_name= shift; |
|
3219
|
|
|
|
|
|
|
return $t->notation_list->notation( $notation_name); |
|
3220
|
|
|
|
|
|
|
} |
|
3221
|
|
|
|
|
|
|
|
|
3222
|
|
|
|
|
|
|
|
|
3223
|
|
|
|
|
|
|
|
|
3224
|
|
|
|
|
|
|
|
|
3225
|
|
|
|
|
|
|
sub print_prolog |
|
3226
|
|
|
|
|
|
|
{ my $t= shift; |
|
3227
|
|
|
|
|
|
|
my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : $t->{twig_output_fh} || select() || \*STDOUT; |
|
3228
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
|
3229
|
|
|
|
|
|
|
no strict 'refs'; |
|
3230
|
|
|
|
|
|
|
print {$fh} $t->prolog( @_); |
|
3231
|
|
|
|
|
|
|
} |
|
3232
|
|
|
|
|
|
|
|
|
3233
|
|
|
|
|
|
|
sub prolog |
|
3234
|
|
|
|
|
|
|
{ my $t= shift; |
|
3235
|
|
|
|
|
|
|
if( $t->{no_prolog}){ return ''; } |
|
3236
|
|
|
|
|
|
|
|
|
3237
|
|
|
|
|
|
|
return $t->{no_prolog} ? '' |
|
3238
|
|
|
|
|
|
|
: defined $t->{no_dtd_output} ? $t->xmldecl |
|
3239
|
|
|
|
|
|
|
: $t->xmldecl . $t->doctype( @_); |
|
3240
|
|
|
|
|
|
|
} |
|
3241
|
|
|
|
|
|
|
|
|
3242
|
|
|
|
|
|
|
sub doctype |
|
3243
|
|
|
|
|
|
|
{ my $t= shift; |
|
3244
|
|
|
|
|
|
|
my %args= _normalize_args( @_); |
|
3245
|
|
|
|
|
|
|
my $update_dtd = $args{UpdateDTD} || ''; |
|
3246
|
|
|
|
|
|
|
my $doctype_text=''; |
|
3247
|
|
|
|
|
|
|
|
|
3248
|
|
|
|
|
|
|
my $doctype= $t->{twig_doctype}; |
|
3249
|
|
|
|
|
|
|
|
|
3250
|
|
|
|
|
|
|
if( $doctype) |
|
3251
|
|
|
|
|
|
|
{ $doctype_text .= qq{{name}} if( $doctype->{name}); |
|
3252
|
|
|
|
|
|
|
$doctype_text .= qq{ PUBLIC "$doctype->{pub}"} if( $doctype->{pub}); |
|
3253
|
|
|
|
|
|
|
$doctype_text .= qq{ SYSTEM} if( $doctype->{sysid} && !$doctype->{pub}); |
|
3254
|
|
|
|
|
|
|
$doctype_text .= qq{ "$doctype->{sysid}"} if( $doctype->{sysid}); |
|
3255
|
|
|
|
|
|
|
} |
|
3256
|
|
|
|
|
|
|
|
|
3257
|
|
|
|
|
|
|
if( $update_dtd) |
|
3258
|
|
|
|
|
|
|
{ if( $doctype) |
|
3259
|
|
|
|
|
|
|
{ my $internal=$doctype->{internal}; |
|
3260
|
|
|
|
|
|
|
# awful hack, but at least it works a little better that what was there before |
|
3261
|
|
|
|
|
|
|
if( $internal) |
|
3262
|
|
|
|
|
|
|
{ # remove entity and notation declarations (they will be re-generated from the updated entity list) |
|
3263
|
|
|
|
|
|
|
$internal=~ s{]*) >\s*}{}xg; |
|
3264
|
|
|
|
|
|
|
$internal=~ s{\s*}{}sxg; |
|
3265
|
|
|
|
|
|
|
$internal=~ s{^\n}{}; |
|
3266
|
|
|
|
|
|
|
} |
|
3267
|
|
|
|
|
|
|
$internal .= $t->entity_list->text ||'' if( $t->entity_list); |
|
3268
|
|
|
|
|
|
|
$internal .= $t->notation_list->text ||'' if( $t->notation_list); |
|
3269
|
|
|
|
|
|
|
if( $internal) { $doctype_text .= "[\n$internal]>\n"; } |
|
3270
|
|
|
|
|
|
|
} |
|
3271
|
|
|
|
|
|
|
elsif( !$t->{'twig_dtd'} && ( keys %{$t->entity_list} || keys %{$t->notation_list} ) ) |
|
3272
|
|
|
|
|
|
|
{ $doctype_text .= "root->gi . " [\n" . $t->entity_list->text . $t->notation_list->text . "\n]>";} |
|
3273
|
|
|
|
|
|
|
else |
|
3274
|
|
|
|
|
|
|
{ $doctype_text= $t->{twig_dtd}; |
|
3275
|
|
|
|
|
|
|
$doctype_text .= $t->dtd_text; |
|
3276
|
|
|
|
|
|
|
} |
|
3277
|
|
|
|
|
|
|
} |
|
3278
|
|
|
|
|
|
|
elsif( $doctype) |
|
3279
|
|
|
|
|
|
|
{ if( my $internal= $doctype->{internal}) |
|
3280
|
|
|
|
|
|
|
{ # add opening and closing brackets if not already there |
|
3281
|
|
|
|
|
|
|
# plus some spaces and newlines for a nice formating |
|
3282
|
|
|
|
|
|
|
# I test it here because I can't remember which version of |
|
3283
|
|
|
|
|
|
|
# XML::Parser need it or not, nor guess which one will in the |
|
3284
|
|
|
|
|
|
|
# future, so this about the best I can do |
|
3285
|
|
|
|
|
|
|
$internal=~ s{^\s*(\[\s*)?}{ [\n}; |
|
3286
|
|
|
|
|
|
|
$internal=~ s{\s*(\]\s*(>\s*)?)?\s*$}{\n]>\n}; |
|
3287
|
|
|
|
|
|
|
|
|
3288
|
|
|
|
|
|
|
# XML::Parser does not include the NOTATION declarations in the DTD |
|
3289
|
|
|
|
|
|
|
# at least in the current version. So put them back |
|
3290
|
|
|
|
|
|
|
#if( $t->notation_list && $internal !~ m{
|
|
3291
|
|
|
|
|
|
|
# { $internal=~ s{(\n]>\n)$}{ "\n" . $t->notation_list->text . $1}es; } |
|
3292
|
|
|
|
|
|
|
|
|
3293
|
|
|
|
|
|
|
$doctype_text .= $internal; |
|
3294
|
|
|
|
|
|
|
} |
|
3295
|
|
|
|
|
|
|
} |
|
3296
|
|
|
|
|
|
|
|
|
3297
|
|
|
|
|
|
|
if( $doctype_text) |
|
3298
|
|
|
|
|
|
|
{ |
|
3299
|
|
|
|
|
|
|
# terrible hack, as I can't figure out in which case the darn prolog |
|
3300
|
|
|
|
|
|
|
# should get an extra > (depends on XML::Parser and expat versions) |
|
3301
|
|
|
|
|
|
|
$doctype_text=~ s/(>\s*)*$/>\n/; # if($doctype_text); |
|
3302
|
|
|
|
|
|
|
|
|
3303
|
|
|
|
|
|
|
my $output_filter= XML::Twig::Elt::output_filter(); |
|
3304
|
|
|
|
|
|
|
return $output_filter ? $output_filter->( $doctype_text) : $doctype_text; |
|
3305
|
|
|
|
|
|
|
} |
|
3306
|
|
|
|
|
|
|
else |
|
3307
|
|
|
|
|
|
|
{ return $doctype_text; } |
|
3308
|
|
|
|
|
|
|
} |
|
3309
|
|
|
|
|
|
|
|
|
3310
|
|
|
|
|
|
|
sub _leading_cpi |
|
3311
|
|
|
|
|
|
|
{ my $t= shift; |
|
3312
|
|
|
|
|
|
|
my $leading_cpi= $t->{leading_cpi} || return ''; |
|
3313
|
|
|
|
|
|
|
return $leading_cpi->sprint( 1); |
|
3314
|
|
|
|
|
|
|
} |
|
3315
|
|
|
|
|
|
|
|
|
3316
|
|
|
|
|
|
|
sub _trailing_cpi |
|
3317
|
|
|
|
|
|
|
{ my $t= shift; |
|
3318
|
|
|
|
|
|
|
my $trailing_cpi= $t->{trailing_cpi} || return ''; |
|
3319
|
|
|
|
|
|
|
return $trailing_cpi->sprint( 1); |
|
3320
|
|
|
|
|
|
|
} |
|
3321
|
|
|
|
|
|
|
|
|
3322
|
|
|
|
|
|
|
sub _trailing_cpi_text |
|
3323
|
|
|
|
|
|
|
{ my $t= shift; |
|
3324
|
|
|
|
|
|
|
return $t->{trailing_cpi_text} || ''; |
|
3325
|
|
|
|
|
|
|
} |
|
3326
|
|
|
|
|
|
|
|
|
3327
|
|
|
|
|
|
|
sub print_to_file |
|
3328
|
|
|
|
|
|
|
{ my( $t, $filename)= (shift, shift); |
|
3329
|
|
|
|
|
|
|
my $out_fh; |
|
3330
|
|
|
|
|
|
|
# open( $out_fh, ">$filename") or _croak( "cannot create file $filename: $!"); # < perl 5.8 |
|
3331
|
|
|
|
|
|
|
my $mode= $t->{twig_keep_encoding} && ! _use_perlio() ? '>' : '>:utf8'; # >= perl 5.8 |
|
3332
|
|
|
|
|
|
|
open( $out_fh, $mode, $filename) or _croak( "cannot create file $filename: $!"); # >= perl 5.8 |
|
3333
|
|
|
|
|
|
|
$t->print( $out_fh, @_); |
|
3334
|
|
|
|
|
|
|
close $out_fh; |
|
3335
|
|
|
|
|
|
|
return $t; |
|
3336
|
|
|
|
|
|
|
} |
|
3337
|
|
|
|
|
|
|
|
|
3338
|
|
|
|
|
|
|
# probably only works on *nix (at least the chmod bit) |
|
3339
|
|
|
|
|
|
|
# first print to a temporary file, then rename that file to the desired file name, then change permissions |
|
3340
|
|
|
|
|
|
|
# to the original file permissions (or to the current umask) |
|
3341
|
|
|
|
|
|
|
sub safe_print_to_file |
|
3342
|
|
|
|
|
|
|
{ my( $t, $filename)= (shift, shift); |
|
3343
|
|
|
|
|
|
|
my $perm= -f $filename ? (stat $filename)[2] & 07777 : ~umask() ; |
|
3344
|
|
|
|
|
|
|
XML::Twig::_use( 'File::Temp') || croak "need File::Temp to use safe_print_to_file\n"; |
|
3345
|
|
|
|
|
|
|
my $tmpdir= dirname( $filename); |
|
3346
|
|
|
|
|
|
|
my( $fh, $tmpfilename) = File::Temp::tempfile( DIR => $tmpdir); |
|
3347
|
|
|
|
|
|
|
$t->print_to_file( $tmpfilename, @_); |
|
3348
|
|
|
|
|
|
|
rename( $tmpfilename, $filename) or unlink $tmpfilename && _croak( "cannot move temporary file to $filename: $!"); |
|
3349
|
|
|
|
|
|
|
chmod $perm, $filename; |
|
3350
|
|
|
|
|
|
|
return $t; |
|
3351
|
|
|
|
|
|
|
} |
|
3352
|
|
|
|
|
|
|
|
|
3353
|
|
|
|
|
|
|
|
|
3354
|
|
|
|
|
|
|
sub print |
|
3355
|
|
|
|
|
|
|
{ my $t= shift; |
|
3356
|
|
|
|
|
|
|
my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; |
|
3357
|
|
|
|
|
|
|
my %args= _normalize_args( @_); |
|
3358
|
|
|
|
|
|
|
|
|
3359
|
|
|
|
|
|
|
my $old_select = defined $fh ? select $fh : undef; |
|
3360
|
|
|
|
|
|
|
my $old_pretty = defined ($args{PrettyPrint}) ? $t->set_pretty_print( $args{PrettyPrint}) : undef; |
|
3361
|
|
|
|
|
|
|
my $old_empty_tag = defined ($args{EmptyTags}) ? $t->set_empty_tag_style( $args{EmptyTags}) : undef; |
|
3362
|
|
|
|
|
|
|
|
|
3363
|
|
|
|
|
|
|
#if( !$t->{encoding} || lc( $t->{encoding}) eq 'utf-8') { my $out= $fh || \*STDOUT; binmode $out, ':utf8'; } |
|
3364
|
|
|
|
|
|
|
|
|
3365
|
|
|
|
|
|
|
if( $perl_version > 5.006 && ! $t->{twig_keep_encoding} && _use_perlio() ) { binmode( $fh || \*STDOUT, ":utf8" ); } |
|
3366
|
|
|
|
|
|
|
|
|
3367
|
|
|
|
|
|
|
print $t->prolog( %args) . $t->_leading_cpi( %args); |
|
3368
|
|
|
|
|
|
|
$t->{twig_root}->print; |
|
3369
|
|
|
|
|
|
|
print $t->_trailing_cpi # trailing comments and pi's (elements, in 'process' mode) |
|
3370
|
|
|
|
|
|
|
. $t->_trailing_cpi_text # trailing comments and pi's (in 'keep' mode) |
|
3371
|
|
|
|
|
|
|
. ( ($t->{twig_keep_spaces}||'') && ($t->{trailing_spaces} || '')) |
|
3372
|
|
|
|
|
|
|
; |
|
3373
|
|
|
|
|
|
|
|
|
3374
|
|
|
|
|
|
|
|
|
3375
|
|
|
|
|
|
|
$t->set_pretty_print( $old_pretty) if( defined $old_pretty); |
|
3376
|
|
|
|
|
|
|
$t->set_empty_tag_style( $old_empty_tag) if( defined $old_empty_tag); |
|
3377
|
|
|
|
|
|
|
if( $fh) { select $old_select; } |
|
3378
|
|
|
|
|
|
|
|
|
3379
|
|
|
|
|
|
|
return $t; |
|
3380
|
|
|
|
|
|
|
} |
|
3381
|
|
|
|
|
|
|
|
|
3382
|
|
|
|
|
|
|
|
|
3383
|
|
|
|
|
|
|
sub flush |
|
3384
|
|
|
|
|
|
|
{ my $t= shift; |
|
3385
|
|
|
|
|
|
|
|
|
3386
|
|
|
|
|
|
|
$t->_trigger_tdh if $t->{twig_tdh}; |
|
3387
|
|
|
|
|
|
|
|
|
3388
|
|
|
|
|
|
|
return if( $t->{twig_completely_flushed}); |
|
3389
|
|
|
|
|
|
|
|
|
3390
|
|
|
|
|
|
|
my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; |
|
3391
|
|
|
|
|
|
|
my $old_select= defined $fh ? select $fh : undef; |
|
3392
|
|
|
|
|
|
|
my $up_to= ref $_[0] ? shift : undef; |
|
3393
|
|
|
|
|
|
|
my %args= _normalize_args( @_); |
|
3394
|
|
|
|
|
|
|
|
|
3395
|
|
|
|
|
|
|
my $old_pretty; |
|
3396
|
|
|
|
|
|
|
if( defined $args{PrettyPrint}) |
|
3397
|
|
|
|
|
|
|
{ $old_pretty= $t->set_pretty_print( $args{PrettyPrint}); |
|
3398
|
|
|
|
|
|
|
delete $args{PrettyPrint}; |
|
3399
|
|
|
|
|
|
|
} |
|
3400
|
|
|
|
|
|
|
|
|
3401
|
|
|
|
|
|
|
my $old_empty_tag_style; |
|
3402
|
|
|
|
|
|
|
if( $args{EmptyTags}) |
|
3403
|
|
|
|
|
|
|
{ $old_empty_tag_style= $t->set_empty_tag_style( $args{EmptyTags}); |
|
3404
|
|
|
|
|
|
|
delete $args{EmptyTags}; |
|
3405
|
|
|
|
|
|
|
} |
|
3406
|
|
|
|
|
|
|
|
|
3407
|
|
|
|
|
|
|
|
|
3408
|
|
|
|
|
|
|
# the "real" last element processed, as _twig_end has closed it |
|
3409
|
|
|
|
|
|
|
my $last_elt; |
|
3410
|
|
|
|
|
|
|
my $flush_trailing_data=0; |
|
3411
|
|
|
|
|
|
|
if( $up_to) |
|
3412
|
|
|
|
|
|
|
{ $last_elt= $up_to; } |
|
3413
|
|
|
|
|
|
|
elsif( $t->{twig_current}) |
|
3414
|
|
|
|
|
|
|
{ $last_elt= $t->{twig_current}->{last_child}; } |
|
3415
|
|
|
|
|
|
|
else |
|
3416
|
|
|
|
|
|
|
{ $last_elt= $t->{twig_root}; |
|
3417
|
|
|
|
|
|
|
$flush_trailing_data=1; |
|
3418
|
|
|
|
|
|
|
$t->{twig_completely_flushed}=1; |
|
3419
|
|
|
|
|
|
|
} |
|
3420
|
|
|
|
|
|
|
|
|
3421
|
|
|
|
|
|
|
# flush the DTD unless it has ready flushed (ie root has been flushed) |
|
3422
|
|
|
|
|
|
|
my $elt= $t->{twig_root}; |
|
3423
|
|
|
|
|
|
|
unless( $elt->{'flushed'}) |
|
3424
|
|
|
|
|
|
|
{ # store flush info so we can auto-flush later |
|
3425
|
|
|
|
|
|
|
if( $t->{twig_autoflush}) |
|
3426
|
|
|
|
|
|
|
{ $t->{twig_autoflush_data}={}; |
|
3427
|
|
|
|
|
|
|
$t->{twig_autoflush_data}->{fh} = $fh if( $fh); |
|
3428
|
|
|
|
|
|
|
$t->{twig_autoflush_data}->{args} = \@_ if( @_); |
|
3429
|
|
|
|
|
|
|
} |
|
3430
|
|
|
|
|
|
|
$t->print_prolog( %args); |
|
3431
|
|
|
|
|
|
|
print $t->_leading_cpi; |
|
3432
|
|
|
|
|
|
|
} |
|
3433
|
|
|
|
|
|
|
|
|
3434
|
|
|
|
|
|
|
while( $elt) |
|
3435
|
|
|
|
|
|
|
{ my $next_elt; |
|
3436
|
|
|
|
|
|
|
if( $last_elt && $last_elt->in( $elt)) |
|
3437
|
|
|
|
|
|
|
{ |
|
3438
|
|
|
|
|
|
|
unless( $elt->{'flushed'}) |
|
3439
|
|
|
|
|
|
|
{ # just output the front tag |
|
3440
|
|
|
|
|
|
|
print $elt->start_tag(); |
|
3441
|
|
|
|
|
|
|
$elt->{'flushed'}=1; |
|
3442
|
|
|
|
|
|
|
} |
|
3443
|
|
|
|
|
|
|
$next_elt= $elt->{first_child}; |
|
3444
|
|
|
|
|
|
|
} |
|
3445
|
|
|
|
|
|
|
else |
|
3446
|
|
|
|
|
|
|
{ # an element before the last one or the last one, |
|
3447
|
|
|
|
|
|
|
$next_elt= $elt->{next_sibling}; |
|
3448
|
|
|
|
|
|
|
$elt->_flush(); |
|
3449
|
|
|
|
|
|
|
$elt->delete; |
|
3450
|
|
|
|
|
|
|
last if( $last_elt && ($elt == $last_elt)); |
|
3451
|
|
|
|
|
|
|
} |
|
3452
|
|
|
|
|
|
|
$elt= $next_elt; |
|
3453
|
|
|
|
|
|
|
} |
|
3454
|
|
|
|
|
|
|
|
|
3455
|
|
|
|
|
|
|
if( $flush_trailing_data) |
|
3456
|
|
|
|
|
|
|
{ print $t->_trailing_cpi # trailing comments and pi's (elements, in 'process' mode) |
|
3457
|
|
|
|
|
|
|
, $t->_trailing_cpi_text # trailing comments and pi's (in 'keep' mode) |
|
3458
|
|
|
|
|
|
|
} |
|
3459
|
|
|
|
|
|
|
|
|
3460
|
|
|
|
|
|
|
select $old_select if( defined $old_select); |
|
3461
|
|
|
|
|
|
|
$t->set_pretty_print( $old_pretty) if( defined $old_pretty); |
|
3462
|
|
|
|
|
|
|
$t->set_empty_tag_style( $old_empty_tag_style) if( defined $old_empty_tag_style); |
|
3463
|
|
|
|
|
|
|
|
|
3464
|
|
|
|
|
|
|
if( my $ids= $t->{twig_id_list}) |
|
3465
|
|
|
|
|
|
|
{ while( my ($id, $elt)= each %$ids) |
|
3466
|
|
|
|
|
|
|
{ if( ! defined $elt) |
|
3467
|
|
|
|
|
|
|
{ delete $t->{twig_id_list}->{$id} } |
|
3468
|
|
|
|
|
|
|
} |
|
3469
|
|
|
|
|
|
|
} |
|
3470
|
|
|
|
|
|
|
|
|
3471
|
|
|
|
|
|
|
return $t; |
|
3472
|
|
|
|
|
|
|
} |
|
3473
|
|
|
|
|
|
|
|
|
3474
|
|
|
|
|
|
|
|
|
3475
|
|
|
|
|
|
|
# flushes up to an element |
|
3476
|
|
|
|
|
|
|
# this method just reorders the arguments and calls flush |
|
3477
|
|
|
|
|
|
|
sub flush_up_to |
|
3478
|
|
|
|
|
|
|
{ my $t= shift; |
|
3479
|
|
|
|
|
|
|
my $up_to= shift; |
|
3480
|
|
|
|
|
|
|
if( isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar')) |
|
3481
|
|
|
|
|
|
|
{ my $fh= shift; |
|
3482
|
|
|
|
|
|
|
$t->flush( $fh, $up_to, @_); |
|
3483
|
|
|
|
|
|
|
} |
|
3484
|
|
|
|
|
|
|
else |
|
3485
|
|
|
|
|
|
|
{ $t->flush( $up_to, @_); } |
|
3486
|
|
|
|
|
|
|
|
|
3487
|
|
|
|
|
|
|
return $t; |
|
3488
|
|
|
|
|
|
|
} |
|
3489
|
|
|
|
|
|
|
|
|
3490
|
|
|
|
|
|
|
|
|
3491
|
|
|
|
|
|
|
# same as print except the entire document text is returned as a string |
|
3492
|
|
|
|
|
|
|
sub sprint |
|
3493
|
|
|
|
|
|
|
{ my $t= shift; |
|
3494
|
|
|
|
|
|
|
my %args= _normalize_args( @_); |
|
3495
|
|
|
|
|
|
|
|
|
3496
|
|
|
|
|
|
|
my $old_pretty; |
|
3497
|
|
|
|
|
|
|
if( defined $args{PrettyPrint}) |
|
3498
|
|
|
|
|
|
|
{ $old_pretty= $t->set_pretty_print( $args{PrettyPrint}); |
|
3499
|
|
|
|
|
|
|
delete $args{PrettyPrint}; |
|
3500
|
|
|
|
|
|
|
} |
|
3501
|
|
|
|
|
|
|
|
|
3502
|
|
|
|
|
|
|
my $old_empty_tag_style; |
|
3503
|
|
|
|
|
|
|
if( defined $args{EmptyTags}) |
|
3504
|
|
|
|
|
|
|
{ $old_empty_tag_style= $t->set_empty_tag_style( $args{EmptyTags}); |
|
3505
|
|
|
|
|
|
|
delete $args{EmptyTags}; |
|
3506
|
|
|
|
|
|
|
} |
|
3507
|
|
|
|
|
|
|
|
|
3508
|
|
|
|
|
|
|
my $string= $t->prolog( %args) # xml declaration and doctype |
|
3509
|
|
|
|
|
|
|
. $t->_leading_cpi( %args) # leading comments and pi's in 'process' mode |
|
3510
|
|
|
|
|
|
|
. ( ($t->{twig_root} && $t->{twig_root}->sprint) || '') |
|
3511
|
|
|
|
|
|
|
. $t->_trailing_cpi # trailing comments and pi's (elements, in 'process' mode) |
|
3512
|
|
|
|
|
|
|
. $t->_trailing_cpi_text # trailing comments and pi's (in 'keep' mode) |
|
3513
|
|
|
|
|
|
|
; |
|
3514
|
|
|
|
|
|
|
if( $t->{twig_keep_spaces} && $t->{trailing_spaces}) { $string .= $t->{trailing_spaces}; } |
|
3515
|
|
|
|
|
|
|
|
|
3516
|
|
|
|
|
|
|
$t->set_pretty_print( $old_pretty) if( defined $old_pretty); |
|
3517
|
|
|
|
|
|
|
$t->set_empty_tag_style( $old_empty_tag_style) if( defined $old_empty_tag_style); |
|
3518
|
|
|
|
|
|
|
|
|
3519
|
|
|
|
|
|
|
return $string; |
|
3520
|
|
|
|
|
|
|
} |
|
3521
|
|
|
|
|
|
|
|
|
3522
|
|
|
|
|
|
|
|
|
3523
|
|
|
|
|
|
|
# this method discards useless elements in a tree |
|
3524
|
|
|
|
|
|
|
# it does the same thing as a flush except it does not print it |
|
3525
|
|
|
|
|
|
|
# the second argument is an element, the last purged element |
|
3526
|
|
|
|
|
|
|
# (this argument is usually set through the purge_up_to method) |
|
3527
|
|
|
|
|
|
|
sub purge |
|
3528
|
|
|
|
|
|
|
{ my $t= shift; |
|
3529
|
|
|
|
|
|
|
my $up_to= shift; |
|
3530
|
|
|
|
|
|
|
|
|
3531
|
|
|
|
|
|
|
$t->_trigger_tdh if $t->{twig_tdh}; |
|
3532
|
|
|
|
|
|
|
|
|
3533
|
|
|
|
|
|
|
# the "real" last element processed, as _twig_end has closed it |
|
3534
|
|
|
|
|
|
|
my $last_elt; |
|
3535
|
|
|
|
|
|
|
if( $up_to) |
|
3536
|
|
|
|
|
|
|
{ $last_elt= $up_to; } |
|
3537
|
|
|
|
|
|
|
elsif( $t->{twig_current}) |
|
3538
|
|
|
|
|
|
|
{ $last_elt= $t->{twig_current}->{last_child}; } |
|
3539
|
|
|
|
|
|
|
else |
|
3540
|
|
|
|
|
|
|
{ $last_elt= $t->{twig_root}; } |
|
3541
|
|
|
|
|
|
|
|
|
3542
|
|
|
|
|
|
|
my $elt= $t->{twig_root}; |
|
3543
|
|
|
|
|
|
|
|
|
3544
|
|
|
|
|
|
|
while( $elt) |
|
3545
|
|
|
|
|
|
|
{ my $next_elt; |
|
3546
|
|
|
|
|
|
|
if( $last_elt && $last_elt->in( $elt)) |
|
3547
|
|
|
|
|
|
|
{ $elt->{'flushed'}=1; |
|
3548
|
|
|
|
|
|
|
$next_elt= $elt->{first_child}; |
|
3549
|
|
|
|
|
|
|
} |
|
3550
|
|
|
|
|
|
|
else |
|
3551
|
|
|
|
|
|
|
{ # an element before the last one or the last one, |
|
3552
|
|
|
|
|
|
|
$next_elt= $elt->{next_sibling}; |
|
3553
|
|
|
|
|
|
|
$elt->delete; |
|
3554
|
|
|
|
|
|
|
last if( $last_elt && ($elt == $last_elt) ); |
|
3555
|
|
|
|
|
|
|
} |
|
3556
|
|
|
|
|
|
|
$elt= $next_elt; |
|
3557
|
|
|
|
|
|
|
} |
|
3558
|
|
|
|
|
|
|
|
|
3559
|
|
|
|
|
|
|
if( my $ids= $t->{twig_id_list}) |
|
3560
|
|
|
|
|
|
|
{ while( my ($id, $elt)= each %$ids) { if( ! defined $elt) { delete $t->{twig_id_list}->{$id} } } } |
|
3561
|
|
|
|
|
|
|
|
|
3562
|
|
|
|
|
|
|
return $t; |
|
3563
|
|
|
|
|
|
|
} |
|
3564
|
|
|
|
|
|
|
|
|
3565
|
|
|
|
|
|
|
# flushes up to an element. This method just calls purge |
|
3566
|
|
|
|
|
|
|
sub purge_up_to |
|
3567
|
|
|
|
|
|
|
{ my $t= shift; |
|
3568
|
|
|
|
|
|
|
return $t->purge( @_); |
|
3569
|
|
|
|
|
|
|
} |
|
3570
|
|
|
|
|
|
|
|
|
3571
|
|
|
|
|
|
|
sub root |
|
3572
|
|
|
|
|
|
|
{ return $_[0]->{twig_root}; } |
|
3573
|
|
|
|
|
|
|
|
|
3574
|
|
|
|
|
|
|
sub normalize |
|
3575
|
|
|
|
|
|
|
{ return $_[0]->root->normalize; } |
|
3576
|
|
|
|
|
|
|
|
|
3577
|
|
|
|
|
|
|
|
|
3578
|
|
|
|
|
|
|
# create accessor methods on attribute names |
|
3579
|
|
|
|
|
|
|
{ my %accessor; # memorize accessor names so re-creating them won't trigger an error |
|
3580
|
|
|
|
|
|
|
sub att_accessors |
|
3581
|
|
|
|
|
|
|
{ |
|
3582
|
|
|
|
|
|
|
my $twig_or_class= shift; |
|
3583
|
|
|
|
|
|
|
my $elt_class= ref $twig_or_class ? $twig_or_class->{twig_elt_class} |
|
3584
|
|
|
|
|
|
|
: 'XML::Twig::Elt' |
|
3585
|
|
|
|
|
|
|
; |
|
3586
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
|
3587
|
|
|
|
|
|
|
no strict 'refs'; |
|
3588
|
|
|
|
|
|
|
foreach my $att (@_) |
|
3589
|
|
|
|
|
|
|
{ _croak( "attempt to redefine existing method $att using att_accessors") |
|
3590
|
|
|
|
|
|
|
if( $elt_class->can( $att) && !$accessor{$att}); |
|
3591
|
|
|
|
|
|
|
|
|
3592
|
|
|
|
|
|
|
if( !$accessor{$att}) |
|
3593
|
|
|
|
|
|
|
{ *{"$elt_class\::$att"}= |
|
3594
|
|
|
|
|
|
|
sub |
|
3595
|
|
|
|
|
|
|
:lvalue # > perl 5.5 |
|
3596
|
|
|
|
|
|
|
{ my $elt= shift; |
|
3597
|
|
|
|
|
|
|
if( @_) { $elt->{att}->{$att}= $_[0]; } |
|
3598
|
|
|
|
|
|
|
$elt->{att}->{$att}; |
|
3599
|
|
|
|
|
|
|
}; |
|
3600
|
|
|
|
|
|
|
$accessor{$att}=1; |
|
3601
|
|
|
|
|
|
|
} |
|
3602
|
|
|
|
|
|
|
} |
|
3603
|
|
|
|
|
|
|
return $twig_or_class; |
|
3604
|
|
|
|
|
|
|
} |
|
3605
|
|
|
|
|
|
|
} |
|
3606
|
|
|
|
|
|
|
|
|
3607
|
|
|
|
|
|
|
{ my %accessor; # memorize accessor names so re-creating them won't trigger an error |
|
3608
|
|
|
|
|
|
|
sub elt_accessors |
|
3609
|
|
|
|
|
|
|
{ |
|
3610
|
|
|
|
|
|
|
my $twig_or_class= shift; |
|
3611
|
|
|
|
|
|
|
my $elt_class= ref $twig_or_class ? $twig_or_class->{twig_elt_class} |
|
3612
|
|
|
|
|
|
|
: 'XML::Twig::Elt' |
|
3613
|
|
|
|
|
|
|
; |
|
3614
|
|
|
|
|
|
|
|
|
3615
|
|
|
|
|
|
|
# if arg is a hash ref, it's exp => name, otherwise it's a list of tags |
|
3616
|
|
|
|
|
|
|
my %exp_to_alias= ref( $_[0]) && isa( $_[0], 'HASH') ? %{$_[0]} |
|
3617
|
|
|
|
|
|
|
: map { $_ => $_ } @_; |
|
3618
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
|
3619
|
|
|
|
|
|
|
no strict 'refs'; |
|
3620
|
|
|
|
|
|
|
while( my( $alias, $exp)= each %exp_to_alias ) |
|
3621
|
|
|
|
|
|
|
{ if( $elt_class->can( $alias) && !$accessor{$alias}) |
|
3622
|
|
|
|
|
|
|
{ _croak( "attempt to redefine existing method $alias using elt_accessors"); } |
|
3623
|
|
|
|
|
|
|
|
|
3624
|
|
|
|
|
|
|
if( !$accessor{$alias}) |
|
3625
|
|
|
|
|
|
|
{ *{"$elt_class\::$alias"}= |
|
3626
|
|
|
|
|
|
|
sub |
|
3627
|
|
|
|
|
|
|
{ my $elt= shift; |
|
3628
|
|
|
|
|
|
|
return wantarray ? $elt->children( $exp) : $elt->first_child( $exp); |
|
3629
|
|
|
|
|
|
|
}; |
|
3630
|
|
|
|
|
|
|
$accessor{$alias}=1; |
|
3631
|
|
|
|
|
|
|
} |
|
3632
|
|
|
|
|
|
|
} |
|
3633
|
|
|
|
|
|
|
return $twig_or_class; |
|
3634
|
|
|
|
|
|
|
} |
|
3635
|
|
|
|
|
|
|
} |
|
3636
|
|
|
|
|
|
|
|
|
3637
|
|
|
|
|
|
|
{ my %accessor; # memorize accessor names so re-creating them won't trigger an error |
|
3638
|
|
|
|
|
|
|
sub field_accessors |
|
3639
|
|
|
|
|
|
|
{ |
|
3640
|
|
|
|
|
|
|
my $twig_or_class= shift; |
|
3641
|
|
|
|
|
|
|
my $elt_class= ref $twig_or_class ? $twig_or_class->{twig_elt_class} |
|
3642
|
|
|
|
|
|
|
: 'XML::Twig::Elt' |
|
3643
|
|
|
|
|
|
|
; |
|
3644
|
|
|
|
|
|
|
my %exp_to_alias= ref( $_[0]) && isa( $_[0], 'HASH') ? %{$_[0]} |
|
3645
|
|
|
|
|
|
|
: map { $_ => $_ } @_; |
|
3646
|
|
|
|
|
|
|
|
|
3647
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
|
3648
|
|
|
|
|
|
|
no strict 'refs'; |
|
3649
|
|
|
|
|
|
|
while( my( $alias, $exp)= each %exp_to_alias ) |
|
3650
|
|
|
|
|
|
|
{ if( $elt_class->can( $alias) && !$accessor{$alias}) |
|
3651
|
|
|
|
|
|
|
{ _croak( "attempt to redefine existing method $exp using field_accessors"); } |
|
3652
|
|
|
|
|
|
|
if( !$accessor{$alias}) |
|
3653
|
|
|
|
|
|
|
{ *{"$elt_class\::$alias"}= |
|
3654
|
|
|
|
|
|
|
sub |
|
3655
|
|
|
|
|
|
|
{ my $elt= shift; |
|
3656
|
|
|
|
|
|
|
$elt->field( $exp) |
|
3657
|
|
|
|
|
|
|
}; |
|
3658
|
|
|
|
|
|
|
$accessor{$alias}=1; |
|
3659
|
|
|
|
|
|
|
} |
|
3660
|
|
|
|
|
|
|
} |
|
3661
|
|
|
|
|
|
|
return $twig_or_class; |
|
3662
|
|
|
|
|
|
|
} |
|
3663
|
|
|
|
|
|
|
} |
|
3664
|
|
|
|
|
|
|
|
|
3665
|
|
|
|
|
|
|
sub first_elt |
|
3666
|
|
|
|
|
|
|
{ my( $t, $cond)= @_; |
|
3667
|
|
|
|
|
|
|
my $root= $t->root || return undef; |
|
3668
|
|
|
|
|
|
|
return $root if( $root->passes( $cond)); |
|
3669
|
|
|
|
|
|
|
return $root->next_elt( $cond); |
|
3670
|
|
|
|
|
|
|
} |
|
3671
|
|
|
|
|
|
|
|
|
3672
|
|
|
|
|
|
|
sub last_elt |
|
3673
|
|
|
|
|
|
|
{ my( $t, $cond)= @_; |
|
3674
|
|
|
|
|
|
|
my $root= $t->root || return undef; |
|
3675
|
|
|
|
|
|
|
return $root->last_descendant( $cond); |
|
3676
|
|
|
|
|
|
|
} |
|
3677
|
|
|
|
|
|
|
|
|
3678
|
|
|
|
|
|
|
sub next_n_elt |
|
3679
|
|
|
|
|
|
|
{ my( $t, $offset, $cond)= @_; |
|
3680
|
|
|
|
|
|
|
$offset -- if( $t->root->matches( $cond) ); |
|
3681
|
|
|
|
|
|
|
return $t->root->next_n_elt( $offset, $cond); |
|
3682
|
|
|
|
|
|
|
} |
|
3683
|
|
|
|
|
|
|
|
|
3684
|
|
|
|
|
|
|
sub get_xpath |
|
3685
|
|
|
|
|
|
|
{ my $twig= shift; |
|
3686
|
|
|
|
|
|
|
if( isa( $_[0], 'ARRAY')) |
|
3687
|
|
|
|
|
|
|
{ my $elt_array= shift; |
|
3688
|
|
|
|
|
|
|
return _unique_elts( map { $_->get_xpath( @_) } @$elt_array); |
|
3689
|
|
|
|
|
|
|
} |
|
3690
|
|
|
|
|
|
|
else |
|
3691
|
|
|
|
|
|
|
{ return $twig->root->get_xpath( @_); } |
|
3692
|
|
|
|
|
|
|
} |
|
3693
|
|
|
|
|
|
|
|
|
3694
|
|
|
|
|
|
|
# get a list of elts and return a sorted list of unique elts |
|
3695
|
|
|
|
|
|
|
sub _unique_elts |
|
3696
|
|
|
|
|
|
|
{ my @sorted= sort { $a ->cmp( $b) } @_; |
|
3697
|
|
|
|
|
|
|
my @unique; |
|
3698
|
|
|
|
|
|
|
while( my $current= shift @sorted) |
|
3699
|
|
|
|
|
|
|
{ push @unique, $current unless( @unique && ($unique[-1] == $current)); } |
|
3700
|
|
|
|
|
|
|
return @unique; |
|
3701
|
|
|
|
|
|
|
} |
|
3702
|
|
|
|
|
|
|
|
|
3703
|
|
|
|
|
|
|
sub findvalue |
|
3704
|
|
|
|
|
|
|
{ my $twig= shift; |
|
3705
|
|
|
|
|
|
|
if( isa( $_[0], 'ARRAY')) |
|
3706
|
|
|
|
|
|
|
{ my $elt_array= shift; |
|
3707
|
|
|
|
|
|
|
return join( '', map { $_->findvalue( @_) } @$elt_array); |
|
3708
|
|
|
|
|
|
|
} |
|
3709
|
|
|
|
|
|
|
else |
|
3710
|
|
|
|
|
|
|
{ return $twig->root->findvalue( @_); } |
|
3711
|
|
|
|
|
|
|
} |
|
3712
|
|
|
|
|
|
|
|
|
3713
|
|
|
|
|
|
|
sub findvalues |
|
3714
|
|
|
|
|
|
|
{ my $twig= shift; |
|
3715
|
|
|
|
|
|
|
if( isa( $_[0], 'ARRAY')) |
|
3716
|
|
|
|
|
|
|
{ my $elt_array= shift; |
|
3717
|
|
|
|
|
|
|
return map { $_->findvalues( @_) } @$elt_array; |
|
3718
|
|
|
|
|
|
|
} |
|
3719
|
|
|
|
|
|
|
else |
|
3720
|
|
|
|
|
|
|
{ return $twig->root->findvalues( @_); } |
|
3721
|
|
|
|
|
|
|
} |
|
3722
|
|
|
|
|
|
|
|
|
3723
|
|
|
|
|
|
|
sub set_id_seed |
|
3724
|
|
|
|
|
|
|
{ my $t= shift; |
|
3725
|
|
|
|
|
|
|
XML::Twig::Elt->set_id_seed( @_); |
|
3726
|
|
|
|
|
|
|
return $t; |
|
3727
|
|
|
|
|
|
|
} |
|
3728
|
|
|
|
|
|
|
|
|
3729
|
|
|
|
|
|
|
# return an array ref to an index, or undef |
|
3730
|
|
|
|
|
|
|
sub index |
|
3731
|
|
|
|
|
|
|
{ my( $twig, $name, $index)= @_; |
|
3732
|
|
|
|
|
|
|
return defined( $index) ? $twig->{_twig_index}->{$name}->[$index] : $twig->{_twig_index}->{$name}; |
|
3733
|
|
|
|
|
|
|
} |
|
3734
|
|
|
|
|
|
|
|
|
3735
|
|
|
|
|
|
|
# return a list with just the root |
|
3736
|
|
|
|
|
|
|
# if a condition is given then return an empty list unless the root matches |
|
3737
|
|
|
|
|
|
|
sub children |
|
3738
|
|
|
|
|
|
|
{ my( $t, $cond)= @_; |
|
3739
|
|
|
|
|
|
|
my $root= $t->root; |
|
3740
|
|
|
|
|
|
|
unless( $cond && !($root->passes( $cond)) ) |
|
3741
|
|
|
|
|
|
|
{ return ($root); } |
|
3742
|
|
|
|
|
|
|
else |
|
3743
|
|
|
|
|
|
|
{ return (); } |
|
3744
|
|
|
|
|
|
|
} |
|
3745
|
|
|
|
|
|
|
|
|
3746
|
|
|
|
|
|
|
sub _children { return ($_[0]->root); } |
|
3747
|
|
|
|
|
|
|
|
|
3748
|
|
|
|
|
|
|
# weird, but here for completude |
|
3749
|
|
|
|
|
|
|
# used to solve (non-sensical) /doc[1] XPath queries |
|
3750
|
|
|
|
|
|
|
sub child |
|
3751
|
|
|
|
|
|
|
{ my $t= shift; |
|
3752
|
|
|
|
|
|
|
my $nb= shift; |
|
3753
|
|
|
|
|
|
|
return ($t->children( @_))[$nb]; |
|
3754
|
|
|
|
|
|
|
} |
|
3755
|
|
|
|
|
|
|
|
|
3756
|
|
|
|
|
|
|
sub descendants |
|
3757
|
|
|
|
|
|
|
{ my( $t, $cond)= @_; |
|
3758
|
|
|
|
|
|
|
my $root= $t->root; |
|
3759
|
|
|
|
|
|
|
if( $root->passes( $cond) ) |
|
3760
|
|
|
|
|
|
|
{ return ($root, $root->descendants( $cond)); } |
|
3761
|
|
|
|
|
|
|
else |
|
3762
|
|
|
|
|
|
|
{ return ( $root->descendants( $cond)); } |
|
3763
|
|
|
|
|
|
|
} |
|
3764
|
|
|
|
|
|
|
|
|
3765
|
|
|
|
|
|
|
sub simplify { my $t= shift; $t->root->simplify( @_); } |
|
3766
|
|
|
|
|
|
|
sub subs_text { my $t= shift; $t->root->subs_text( @_); } |
|
3767
|
|
|
|
|
|
|
sub trim { my $t= shift; $t->root->trim( @_); } |
|
3768
|
|
|
|
|
|
|
|
|
3769
|
|
|
|
|
|
|
|
|
3770
|
|
|
|
|
|
|
sub set_keep_encoding |
|
3771
|
|
|
|
|
|
|
{ my( $t, $keep)= @_; |
|
3772
|
|
|
|
|
|
|
$t->{twig_keep_encoding}= $keep; |
|
3773
|
|
|
|
|
|
|
$t->{NoExpand}= $keep; |
|
3774
|
|
|
|
|
|
|
return XML::Twig::Elt::set_keep_encoding( $keep); |
|
3775
|
|
|
|
|
|
|
} |
|
3776
|
|
|
|
|
|
|
|
|
3777
|
|
|
|
|
|
|
sub set_expand_external_entities |
|
3778
|
|
|
|
|
|
|
{ return XML::Twig::Elt::set_expand_external_entities( @_); } |
|
3779
|
|
|
|
|
|
|
|
|
3780
|
|
|
|
|
|
|
sub escape_gt |
|
3781
|
|
|
|
|
|
|
{ my $t= shift; $t->{twig_escape_gt}= 1; return XML::Twig::Elt::escape_gt( @_); } |
|
3782
|
|
|
|
|
|
|
|
|
3783
|
|
|
|
|
|
|
sub do_not_escape_gt |
|
3784
|
|
|
|
|
|
|
{ my $t= shift; $t->{twig_escape_gt}= 0; return XML::Twig::Elt::do_not_escape_gt( @_); } |
|
3785
|
|
|
|
|
|
|
|
|
3786
|
|
|
|
|
|
|
sub elt_id |
|
3787
|
|
|
|
|
|
|
{ return $_[0]->{twig_id_list}->{$_[1]}; } |
|
3788
|
|
|
|
|
|
|
|
|
3789
|
|
|
|
|
|
|
# change it in ALL twigs at the moment |
|
3790
|
|
|
|
|
|
|
sub change_gi |
|
3791
|
|
|
|
|
|
|
{ my( $twig, $old_gi, $new_gi)= @_; |
|
3792
|
|
|
|
|
|
|
my $index; |
|
3793
|
|
|
|
|
|
|
return unless($index= $XML::Twig::gi2index{$old_gi}); |
|
3794
|
|
|
|
|
|
|
$XML::Twig::index2gi[$index]= $new_gi; |
|
3795
|
|
|
|
|
|
|
delete $XML::Twig::gi2index{$old_gi}; |
|
3796
|
|
|
|
|
|
|
$XML::Twig::gi2index{$new_gi}= $index; |
|
3797
|
|
|
|
|
|
|
return $twig; |
|
3798
|
|
|
|
|
|
|
} |
|
3799
|
|
|
|
|
|
|
|
|
3800
|
|
|
|
|
|
|
|
|
3801
|
|
|
|
|
|
|
# builds the DTD from the stored (possibly updated) data |
|
3802
|
|
|
|
|
|
|
sub dtd_text |
|
3803
|
|
|
|
|
|
|
{ my $t= shift; |
|
3804
|
|
|
|
|
|
|
my $dtd= $t->{twig_dtd}; |
|
3805
|
|
|
|
|
|
|
my $doctype= $t->{twig_doctype} or return ''; |
|
3806
|
|
|
|
|
|
|
my $string= "{name}; |
|
3807
|
|
|
|
|
|
|
|
|
3808
|
|
|
|
|
|
|
$string .= " [\n"; |
|
3809
|
|
|
|
|
|
|
|
|
3810
|
|
|
|
|
|
|
foreach my $gi (@{$dtd->{elt_list}}) |
|
3811
|
|
|
|
|
|
|
{ $string.= "{model}->{$gi}.">\n" ; |
|
3812
|
|
|
|
|
|
|
if( $dtd->{att}->{$gi}) |
|
3813
|
|
|
|
|
|
|
{ my $attlist= $dtd->{att}->{$gi}; |
|
3814
|
|
|
|
|
|
|
$string.= "
|
|
3815
|
|
|
|
|
|
|
foreach my $att ( sort keys %{$attlist}) |
|
3816
|
|
|
|
|
|
|
{ |
|
3817
|
|
|
|
|
|
|
if( $attlist->{$att}->{fixed}) |
|
3818
|
|
|
|
|
|
|
{ $string.= " $att $attlist->{$att}->{type} #FIXED $attlist->{$att}->{default}"; } |
|
3819
|
|
|
|
|
|
|
else |
|
3820
|
|
|
|
|
|
|
{ $string.= " $att $attlist->{$att}->{type} $attlist->{$att}->{default}"; } |
|
3821
|
|
|
|
|
|
|
$string.= "\n"; |
|
3822
|
|
|
|
|
|
|
} |
|
3823
|
|
|
|
|
|
|
$string.= ">\n"; |
|
3824
|
|
|
|
|
|
|
} |
|
3825
|
|
|
|
|
|
|
} |
|
3826
|
|
|
|
|
|
|
$string.= $t->entity_list->text if( $t->entity_list); |
|
3827
|
|
|
|
|
|
|
$string.= "\n]>\n"; |
|
3828
|
|
|
|
|
|
|
return $string; |
|
3829
|
|
|
|
|
|
|
} |
|
3830
|
|
|
|
|
|
|
|
|
3831
|
|
|
|
|
|
|
# prints the DTD from the stored (possibly updated) data |
|
3832
|
|
|
|
|
|
|
sub dtd_print |
|
3833
|
|
|
|
|
|
|
{ my $t= shift; |
|
3834
|
|
|
|
|
|
|
my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; |
|
3835
|
|
|
|
|
|
|
if( $fh) { print $fh $t->dtd_text; } |
|
3836
|
|
|
|
|
|
|
else { print $t->dtd_text; } |
|
3837
|
|
|
|
|
|
|
return $t; |
|
3838
|
|
|
|
|
|
|
} |
|
3839
|
|
|
|
|
|
|
|
|
3840
|
|
|
|
|
|
|
# build the subs that call directly expat |
|
3841
|
|
|
|
|
|
|
BEGIN |
|
3842
|
|
|
|
|
|
|
{ my @expat_methods= qw( depth in_element within_element context |
|
3843
|
|
|
|
|
|
|
current_line current_column current_byte |
|
3844
|
|
|
|
|
|
|
recognized_string original_string |
|
3845
|
|
|
|
|
|
|
xpcroak xpcarp |
|
3846
|
|
|
|
|
|
|
base current_element element_index |
|
3847
|
|
|
|
|
|
|
xml_escape |
|
3848
|
|
|
|
|
|
|
position_in_context); |
|
3849
|
|
|
|
|
|
|
foreach my $method (@expat_methods) |
|
3850
|
|
|
|
|
|
|
{ |
|
3851
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
|
3852
|
|
|
|
|
|
|
no strict 'refs'; |
|
3853
|
|
|
|
|
|
|
*{$method}= sub { my $t= shift; |
|
3854
|
|
|
|
|
|
|
_croak( "calling $method after parsing is finished") unless( $t->{twig_parsing}); |
|
3855
|
|
|
|
|
|
|
return $t->{twig_parser}->$method(@_); |
|
3856
|
|
|
|
|
|
|
}; |
|
3857
|
|
|
|
|
|
|
} |
|
3858
|
|
|
|
|
|
|
} |
|
3859
|
|
|
|
|
|
|
|
|
3860
|
|
|
|
|
|
|
sub path |
|
3861
|
|
|
|
|
|
|
{ my( $t, $gi)= @_; |
|
3862
|
|
|
|
|
|
|
if( $t->{twig_map_xmlns}) |
|
3863
|
|
|
|
|
|
|
{ return "/" . join( "/", map { $t->_replace_prefix( $_)} ($t->{twig_parser}->context, $gi)); } |
|
3864
|
|
|
|
|
|
|
else |
|
3865
|
|
|
|
|
|
|
{ return "/" . join( "/", ($t->{twig_parser}->context, $gi)); } |
|
3866
|
|
|
|
|
|
|
} |
|
3867
|
|
|
|
|
|
|
|
|
3868
|
|
|
|
|
|
|
sub finish |
|
3869
|
|
|
|
|
|
|
{ my $t= shift; |
|
3870
|
|
|
|
|
|
|
return $t->{twig_parser}->finish; |
|
3871
|
|
|
|
|
|
|
} |
|
3872
|
|
|
|
|
|
|
|
|
3873
|
|
|
|
|
|
|
# just finish the parse by printing the rest of the document |
|
3874
|
|
|
|
|
|
|
sub finish_print |
|
3875
|
|
|
|
|
|
|
{ my( $t, $fh)= @_; |
|
3876
|
|
|
|
|
|
|
my $old_fh; |
|
3877
|
|
|
|
|
|
|
unless( defined $fh) |
|
3878
|
|
|
|
|
|
|
{ $t->_set_fh_to_twig_output_fh(); } |
|
3879
|
|
|
|
|
|
|
elsif( defined $fh) |
|
3880
|
|
|
|
|
|
|
{ $old_fh= select $fh; |
|
3881
|
|
|
|
|
|
|
$t->{twig_original_selected_fh}= $old_fh if( $old_fh); |
|
3882
|
|
|
|
|
|
|
} |
|
3883
|
|
|
|
|
|
|
|
|
3884
|
|
|
|
|
|
|
my $p=$t->{twig_parser}; |
|
3885
|
|
|
|
|
|
|
if( $t->{twig_keep_encoding}) |
|
3886
|
|
|
|
|
|
|
{ $p->setHandlers( %twig_handlers_finish_print); } |
|
3887
|
|
|
|
|
|
|
else |
|
3888
|
|
|
|
|
|
|
{ $p->setHandlers( %twig_handlers_finish_print_original); } |
|
3889
|
|
|
|
|
|
|
return $t; |
|
3890
|
|
|
|
|
|
|
} |
|
3891
|
|
|
|
|
|
|
|
|
3892
|
|
|
|
|
|
|
sub set_remove_cdata { return XML::Twig::Elt::set_remove_cdata( @_); } |
|
3893
|
|
|
|
|
|
|
|
|
3894
|
|
|
|
|
|
|
sub output_filter { return XML::Twig::Elt::output_filter( @_); } |
|
3895
|
|
|
|
|
|
|
sub set_output_filter { return XML::Twig::Elt::set_output_filter( @_); } |
|
3896
|
|
|
|
|
|
|
|
|
3897
|
|
|
|
|
|
|
sub output_text_filter { return XML::Twig::Elt::output_text_filter( @_); } |
|
3898
|
|
|
|
|
|
|
sub set_output_text_filter { return XML::Twig::Elt::set_output_text_filter( @_); } |
|
3899
|
|
|
|
|
|
|
|
|
3900
|
|
|
|
|
|
|
sub set_input_filter |
|
3901
|
|
|
|
|
|
|
{ my( $t, $input_filter)= @_; |
|
3902
|
|
|
|
|
|
|
my $old_filter= $t->{twig_input_filter}; |
|
3903
|
|
|
|
|
|
|
if( !$input_filter || isa( $input_filter, 'CODE') ) |
|
3904
|
|
|
|
|
|
|
{ $t->{twig_input_filter}= $input_filter; } |
|
3905
|
|
|
|
|
|
|
elsif( $input_filter eq 'latin1') |
|
3906
|
|
|
|
|
|
|
{ $t->{twig_input_filter}= latin1(); } |
|
3907
|
|
|
|
|
|
|
elsif( $filter{$input_filter}) |
|
3908
|
|
|
|
|
|
|
{ $t->{twig_input_filter}= $filter{$input_filter}; } |
|
3909
|
|
|
|
|
|
|
else |
|
3910
|
|
|
|
|
|
|
{ _croak( "invalid input filter: $input_filter"); } |
|
3911
|
|
|
|
|
|
|
|
|
3912
|
|
|
|
|
|
|
return $old_filter; |
|
3913
|
|
|
|
|
|
|
} |
|
3914
|
|
|
|
|
|
|
|
|
3915
|
|
|
|
|
|
|
sub set_empty_tag_style |
|
3916
|
|
|
|
|
|
|
{ return XML::Twig::Elt::set_empty_tag_style( @_); } |
|
3917
|
|
|
|
|
|
|
|
|
3918
|
|
|
|
|
|
|
sub set_pretty_print |
|
3919
|
|
|
|
|
|
|
{ return XML::Twig::Elt::set_pretty_print( @_); } |
|
3920
|
|
|
|
|
|
|
|
|
3921
|
|
|
|
|
|
|
sub set_quote |
|
3922
|
|
|
|
|
|
|
{ return XML::Twig::Elt::set_quote( @_); } |
|
3923
|
|
|
|
|
|
|
|
|
3924
|
|
|
|
|
|
|
sub set_indent |
|
3925
|
|
|
|
|
|
|
{ return XML::Twig::Elt::set_indent( @_); } |
|
3926
|
|
|
|
|
|
|
|
|
3927
|
|
|
|
|
|
|
sub set_keep_atts_order |
|
3928
|
|
|
|
|
|
|
{ shift; return XML::Twig::Elt::set_keep_atts_order( @_); } |
|
3929
|
|
|
|
|
|
|
|
|
3930
|
|
|
|
|
|
|
sub keep_atts_order |
|
3931
|
|
|
|
|
|
|
{ return XML::Twig::Elt::keep_atts_order( @_); } |
|
3932
|
|
|
|
|
|
|
|
|
3933
|
|
|
|
|
|
|
sub set_do_not_escape_amp_in_atts |
|
3934
|
|
|
|
|
|
|
{ return XML::Twig::Elt::set_do_not_escape_amp_in_atts( @_); } |
|
3935
|
|
|
|
|
|
|
|
|
3936
|
|
|
|
|
|
|
# save and restore package globals (the ones in XML::Twig::Elt) |
|
3937
|
|
|
|
|
|
|
# should probably return the XML::Twig object itself, but instead |
|
3938
|
|
|
|
|
|
|
# returns the state (as a hashref) for backward compatibility |
|
3939
|
|
|
|
|
|
|
sub save_global_state |
|
3940
|
|
|
|
|
|
|
{ my $t= shift; |
|
3941
|
|
|
|
|
|
|
return $t->{twig_saved_state}= XML::Twig::Elt::global_state(); |
|
3942
|
|
|
|
|
|
|
} |
|
3943
|
|
|
|
|
|
|
|
|
3944
|
|
|
|
|
|
|
sub restore_global_state |
|
3945
|
|
|
|
|
|
|
{ my $t= shift; |
|
3946
|
|
|
|
|
|
|
XML::Twig::Elt::set_global_state( $t->{twig_saved_state}); |
|
3947
|
|
|
|
|
|
|
} |
|
3948
|
|
|
|
|
|
|
|
|
3949
|
|
|
|
|
|
|
sub global_state |
|
3950
|
|
|
|
|
|
|
{ return XML::Twig::Elt::global_state(); } |
|
3951
|
|
|
|
|
|
|
|
|
3952
|
|
|
|
|
|
|
sub set_global_state |
|
3953
|
|
|
|
|
|
|
{ return XML::Twig::Elt::set_global_state( $_[1]); } |
|
3954
|
|
|
|
|
|
|
|
|
3955
|
|
|
|
|
|
|
sub dispose |
|
3956
|
|
|
|
|
|
|
{ my $t= shift; |
|
3957
|
|
|
|
|
|
|
$t->DESTROY; |
|
3958
|
|
|
|
|
|
|
return; |
|
3959
|
|
|
|
|
|
|
} |
|
3960
|
|
|
|
|
|
|
|
|
3961
|
|
|
|
|
|
|
sub DESTROY |
|
3962
|
|
|
|
|
|
|
{ my $t= shift; |
|
3963
|
|
|
|
|
|
|
if( $t->{twig_root} && isa( $t->{twig_root}, 'XML::Twig::Elt')) |
|
3964
|
|
|
|
|
|
|
{ $t->{twig_root}->delete } |
|
3965
|
|
|
|
|
|
|
|
|
3966
|
|
|
|
|
|
|
# added to break circular references |
|
3967
|
|
|
|
|
|
|
undef $t->{twig}; |
|
3968
|
|
|
|
|
|
|
undef $t->{twig_root}->{twig} if( $t->{twig_root}); |
|
3969
|
|
|
|
|
|
|
undef $t->{twig_parser}; |
|
3970
|
|
|
|
|
|
|
|
|
3971
|
|
|
|
|
|
|
undef %$t;# prevents memory leaks (especially when using mod_perl) |
|
3972
|
|
|
|
|
|
|
undef $t; |
|
3973
|
|
|
|
|
|
|
} |
|
3974
|
|
|
|
|
|
|
|
|
3975
|
|
|
|
|
|
|
# return true if perl was compiled using perlio |
|
3976
|
|
|
|
|
|
|
# if perl is not available return true, these days perlio should be used |
|
3977
|
|
|
|
|
|
|
sub _use_perlio |
|
3978
|
|
|
|
|
|
|
{ my $perl= _this_perl(); |
|
3979
|
|
|
|
|
|
|
return $perl ? grep /useperlio=define/, `$perl -V` : 1; |
|
3980
|
|
|
|
|
|
|
} |
|
3981
|
|
|
|
|
|
|
|
|
3982
|
|
|
|
|
|
|
# returns the parth to the perl executable (if available) |
|
3983
|
|
|
|
|
|
|
sub _this_perl |
|
3984
|
|
|
|
|
|
|
{ # straight from perlvar |
|
3985
|
|
|
|
|
|
|
my $secure_perl_path= $Config{perlpath}; |
|
3986
|
|
|
|
|
|
|
if ($^O ne 'VMS') |
|
3987
|
|
|
|
|
|
|
{ $secure_perl_path .= $Config{_exe} unless $secure_perl_path =~ m/$Config{_exe}$/i; } |
|
3988
|
|
|
|
|
|
|
if( ! -f $secure_perl_path) { $secure_perl_path= ''; } # when perl is not available (PDK) |
|
3989
|
|
|
|
|
|
|
return $secure_perl_path; |
|
3990
|
|
|
|
|
|
|
} |
|
3991
|
|
|
|
|
|
|
|
|
3992
|
|
|
|
|
|
|
# |
|
3993
|
|
|
|
|
|
|
# non standard handlers |
|
3994
|
|
|
|
|
|
|
# |
|
3995
|
|
|
|
|
|
|
|
|
3996
|
|
|
|
|
|
|
# kludge: expat 1.95.2 calls both Default AND Doctype handlers |
|
3997
|
|
|
|
|
|
|
# so if the default handler finds '
|
|
3998
|
|
|
|
|
|
|
# unset itself (_twig_print_doctype will reset it) |
|
3999
|
|
|
|
|
|
|
sub _twig_print_check_doctype |
|
4000
|
|
|
|
|
|
|
{ # warn " in _twig_print_check_doctype...\n"; # DEBUG handler |
|
4001
|
|
|
|
|
|
|
|
|
4002
|
|
|
|
|
|
|
my $p= shift; |
|
4003
|
|
|
|
|
|
|
my $string= $p->recognized_string(); |
|
4004
|
|
|
|
|
|
|
if( $string eq '
|
|
4005
|
|
|
|
|
|
|
{ |
|
4006
|
|
|
|
|
|
|
$p->setHandlers( Default => undef); |
|
4007
|
|
|
|
|
|
|
$p->setHandlers( Entity => undef); |
|
4008
|
|
|
|
|
|
|
$expat_1_95_2=1; |
|
4009
|
|
|
|
|
|
|
} |
|
4010
|
|
|
|
|
|
|
else |
|
4011
|
|
|
|
|
|
|
{ print $string; } |
|
4012
|
|
|
|
|
|
|
|
|
4013
|
|
|
|
|
|
|
return; |
|
4014
|
|
|
|
|
|
|
} |
|
4015
|
|
|
|
|
|
|
|
|
4016
|
|
|
|
|
|
|
|
|
4017
|
|
|
|
|
|
|
sub _twig_print |
|
4018
|
|
|
|
|
|
|
{ # warn " in _twig_print...\n"; # DEBUG handler |
|
4019
|
|
|
|
|
|
|
my $p= shift; |
|
4020
|
|
|
|
|
|
|
if( $expat_1_95_2 && ($p->recognized_string eq '[') && !$p->{twig}->{expat_1_95_2_seen_bracket}) |
|
4021
|
|
|
|
|
|
|
{ # otherwise the opening square bracket of the doctype gets printed twice |
|
4022
|
|
|
|
|
|
|
$p->{twig}->{expat_1_95_2_seen_bracket}=1; |
|
4023
|
|
|
|
|
|
|
} |
|
4024
|
|
|
|
|
|
|
else |
|
4025
|
|
|
|
|
|
|
{ if( $p->{twig}->{twig_right_after_root}) |
|
4026
|
|
|
|
|
|
|
{ my $s= $p->recognized_string(); print $s if $s=~ m{\S}; } |
|
4027
|
|
|
|
|
|
|
else |
|
4028
|
|
|
|
|
|
|
{ print $p->recognized_string(); } |
|
4029
|
|
|
|
|
|
|
} |
|
4030
|
|
|
|
|
|
|
return; |
|
4031
|
|
|
|
|
|
|
} |
|
4032
|
|
|
|
|
|
|
# recognized_string does not seem to work for entities, go figure! |
|
4033
|
|
|
|
|
|
|
# so this handler is used to print them anyway |
|
4034
|
|
|
|
|
|
|
sub _twig_print_entity |
|
4035
|
|
|
|
|
|
|
{ # warn " in _twig_print_entity...\n"; # DEBUG handler |
|
4036
|
|
|
|
|
|
|
my $p= shift; |
|
4037
|
|
|
|
|
|
|
XML::Twig::Entity->new( @_)->print; |
|
4038
|
|
|
|
|
|
|
} |
|
4039
|
|
|
|
|
|
|
|
|
4040
|
|
|
|
|
|
|
# kludge: expat 1.95.2 calls both Default AND Doctype handlers |
|
4041
|
|
|
|
|
|
|
# so if the default handler finds '
|
|
4042
|
|
|
|
|
|
|
# unset itself (_twig_print_doctype will reset it) |
|
4043
|
|
|
|
|
|
|
sub _twig_print_original_check_doctype |
|
4044
|
|
|
|
|
|
|
{ # warn " in _twig_print_original_check_doctype...\n"; # DEBUG handler |
|
4045
|
|
|
|
|
|
|
|
|
4046
|
|
|
|
|
|
|
my $p= shift; |
|
4047
|
|
|
|
|
|
|
my $string= $p->original_string(); |
|
4048
|
|
|
|
|
|
|
if( $string eq '
|
|
4049
|
|
|
|
|
|
|
{ $p->setHandlers( Default => undef); |
|
4050
|
|
|
|
|
|
|
$p->setHandlers( Entity => undef); |
|
4051
|
|
|
|
|
|
|
$expat_1_95_2=1; |
|
4052
|
|
|
|
|
|
|
} |
|
4053
|
|
|
|
|
|
|
else |
|
4054
|
|
|
|
|
|
|
{ print $string; } |
|
4055
|
|
|
|
|
|
|
|
|
4056
|
|
|
|
|
|
|
return; |
|
4057
|
|
|
|
|
|
|
} |
|
4058
|
|
|
|
|
|
|
|
|
4059
|
|
|
|
|
|
|
sub _twig_print_original |
|
4060
|
|
|
|
|
|
|
{ # warn " in _twig_print_original...\n"; # DEBUG handler |
|
4061
|
|
|
|
|
|
|
my $p= shift; |
|
4062
|
|
|
|
|
|
|
print $p->original_string(); |
|
4063
|
|
|
|
|
|
|
return; |
|
4064
|
|
|
|
|
|
|
} |
|
4065
|
|
|
|
|
|
|
|
|
4066
|
|
|
|
|
|
|
|
|
4067
|
|
|
|
|
|
|
sub _twig_print_original_doctype |
|
4068
|
|
|
|
|
|
|
{ # warn " in _twig_print_original_doctype...\n"; # DEBUG handler |
|
4069
|
|
|
|
|
|
|
|
|
4070
|
|
|
|
|
|
|
my( $p, $name, $sysid, $pubid, $internal)= @_; |
|
4071
|
|
|
|
|
|
|
if( $name) |
|
4072
|
|
|
|
|
|
|
{ # with recent versions of XML::Parser original_string does not work, |
|
4073
|
|
|
|
|
|
|
# hence we need to rebuild the doctype declaration |
|
4074
|
|
|
|
|
|
|
my $doctype=''; |
|
4075
|
|
|
|
|
|
|
$doctype .= qq{
|
|
4076
|
|
|
|
|
|
|
$doctype .= qq{ PUBLIC "$pubid"} if( $pubid); |
|
4077
|
|
|
|
|
|
|
$doctype .= qq{ SYSTEM} if( $sysid && !$pubid); |
|
4078
|
|
|
|
|
|
|
$doctype .= qq{ "$sysid"} if( $sysid); |
|
4079
|
|
|
|
|
|
|
$doctype .= ' [' if( $internal && !$expat_1_95_2) ; |
|
4080
|
|
|
|
|
|
|
$doctype .= qq{>} unless( $internal || $expat_1_95_2); |
|
4081
|
|
|
|
|
|
|
$p->{twig}->{twig_doctype}->{has_internal}=$internal; |
|
4082
|
|
|
|
|
|
|
print $doctype; |
|
4083
|
|
|
|
|
|
|
} |
|
4084
|
|
|
|
|
|
|
$p->setHandlers( Default => \&_twig_print_original); |
|
4085
|
|
|
|
|
|
|
return; |
|
4086
|
|
|
|
|
|
|
} |
|
4087
|
|
|
|
|
|
|
|
|
4088
|
|
|
|
|
|
|
sub _twig_print_doctype |
|
4089
|
|
|
|
|
|
|
{ # warn " in _twig_print_doctype...\n"; # DEBUG handler |
|
4090
|
|
|
|
|
|
|
my( $p, $name, $sysid, $pubid, $internal)= @_; |
|
4091
|
|
|
|
|
|
|
if( $name) |
|
4092
|
|
|
|
|
|
|
{ # with recent versions of XML::Parser original_string does not work, |
|
4093
|
|
|
|
|
|
|
# hence we need to rebuild the doctype declaration |
|
4094
|
|
|
|
|
|
|
my $doctype=''; |
|
4095
|
|
|
|
|
|
|
$doctype .= qq{
|
|
4096
|
|
|
|
|
|
|
$doctype .= qq{ PUBLIC "$pubid"} if( $pubid); |
|
4097
|
|
|
|
|
|
|
$doctype .= qq{ SYSTEM} if( $sysid && !$pubid); |
|
4098
|
|
|
|
|
|
|
$doctype .= qq{ "$sysid"} if( $sysid); |
|
4099
|
|
|
|
|
|
|
$doctype .= ' [' if( $internal) ; |
|
4100
|
|
|
|
|
|
|
$doctype .= qq{>} unless( $internal || $expat_1_95_2); |
|
4101
|
|
|
|
|
|
|
$p->{twig}->{twig_doctype}->{has_internal}=$internal; |
|
4102
|
|
|
|
|
|
|
print $doctype; |
|
4103
|
|
|
|
|
|
|
} |
|
4104
|
|
|
|
|
|
|
$p->setHandlers( Default => \&_twig_print); |
|
4105
|
|
|
|
|
|
|
return; |
|
4106
|
|
|
|
|
|
|
} |
|
4107
|
|
|
|
|
|
|
|
|
4108
|
|
|
|
|
|
|
|
|
4109
|
|
|
|
|
|
|
sub _twig_print_original_default |
|
4110
|
|
|
|
|
|
|
{ # warn " in _twig_print_original_default...\n"; # DEBUG handler |
|
4111
|
|
|
|
|
|
|
my $p= shift; |
|
4112
|
|
|
|
|
|
|
print $p->original_string(); |
|
4113
|
|
|
|
|
|
|
return; |
|
4114
|
|
|
|
|
|
|
} |
|
4115
|
|
|
|
|
|
|
|
|
4116
|
|
|
|
|
|
|
# account for the case where the element is empty |
|
4117
|
|
|
|
|
|
|
sub _twig_print_end_original |
|
4118
|
|
|
|
|
|
|
{ # warn " in _twig_print_end_original...\n"; # DEBUG handler |
|
4119
|
|
|
|
|
|
|
my $p= shift; |
|
4120
|
|
|
|
|
|
|
print $p->original_string(); |
|
4121
|
|
|
|
|
|
|
return; |
|
4122
|
|
|
|
|
|
|
} |
|
4123
|
|
|
|
|
|
|
|
|
4124
|
|
|
|
|
|
|
sub _twig_start_check_roots |
|
4125
|
|
|
|
|
|
|
{ # warn " in _twig_start_check_roots...\n"; # DEBUG handler |
|
4126
|
|
|
|
|
|
|
my $p= shift; |
|
4127
|
|
|
|
|
|
|
my $gi= shift; |
|
4128
|
|
|
|
|
|
|
|
|
4129
|
|
|
|
|
|
|
my $t= $p->{twig}; |
|
4130
|
|
|
|
|
|
|
|
|
4131
|
|
|
|
|
|
|
my $fh= $t->{twig_output_fh} || select() || \*STDOUT; |
|
4132
|
|
|
|
|
|
|
|
|
4133
|
|
|
|
|
|
|
my $ns_decl; |
|
4134
|
|
|
|
|
|
|
unless( $p->depth == 0) |
|
4135
|
|
|
|
|
|
|
{ if( $t->{twig_map_xmlns}) { $ns_decl= _replace_ns( $t, \$gi, \@_); } |
|
4136
|
|
|
|
|
|
|
} |
|
4137
|
|
|
|
|
|
|
|
|
4138
|
|
|
|
|
|
|
my $context= { $ST_TAG => $gi, @_}; |
|
4139
|
|
|
|
|
|
|
$context->{$ST_NS}= $ns_decl if $ns_decl; |
|
4140
|
|
|
|
|
|
|
push @{$t->{_twig_context_stack}}, $context; |
|
4141
|
|
|
|
|
|
|
my %att= @_; |
|
4142
|
|
|
|
|
|
|
|
|
4143
|
|
|
|
|
|
|
if( _handler( $t, $t->{twig_roots}, $gi)) |
|
4144
|
|
|
|
|
|
|
{ $p->setHandlers( %twig_handlers); # restore regular handlers |
|
4145
|
|
|
|
|
|
|
$t->{twig_root_depth}= $p->depth; |
|
4146
|
|
|
|
|
|
|
pop @{$t->{_twig_context_stack}}; # will be pushed back in _twig_start |
|
4147
|
|
|
|
|
|
|
_twig_start( $p, $gi, @_); |
|
4148
|
|
|
|
|
|
|
return; |
|
4149
|
|
|
|
|
|
|
} |
|
4150
|
|
|
|
|
|
|
|
|
4151
|
|
|
|
|
|
|
# $tag will always be true if it needs to be printed (the tag string is never empty) |
|
4152
|
|
|
|
|
|
|
my $tag= $t->{twig_default_print} ? $t->{twig_keep_encoding} ? $p->original_string |
|
4153
|
|
|
|
|
|
|
: $p->recognized_string |
|
4154
|
|
|
|
|
|
|
: ''; |
|
4155
|
|
|
|
|
|
|
|
|
4156
|
|
|
|
|
|
|
if( $p->depth == 0) |
|
4157
|
|
|
|
|
|
|
{ |
|
4158
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
|
4159
|
|
|
|
|
|
|
no strict 'refs'; |
|
4160
|
|
|
|
|
|
|
print {$fh} $tag if( $tag); |
|
4161
|
|
|
|
|
|
|
pop @{$t->{_twig_context_stack}}; # will be pushed back in _twig_start |
|
4162
|
|
|
|
|
|
|
_twig_start( $p, $gi, @_); |
|
4163
|
|
|
|
|
|
|
$t->root->{'flushed'}=1; # or the root start tag gets output the first time we flush |
|
4164
|
|
|
|
|
|
|
} |
|
4165
|
|
|
|
|
|
|
elsif( $t->{twig_starttag_handlers}) |
|
4166
|
|
|
|
|
|
|
{ # look for start tag handlers |
|
4167
|
|
|
|
|
|
|
|
|
4168
|
|
|
|
|
|
|
my @handlers= _handler( $t, $t->{twig_starttag_handlers}, $gi); |
|
4169
|
|
|
|
|
|
|
my $last_handler_res; |
|
4170
|
|
|
|
|
|
|
foreach my $handler ( @handlers) |
|
4171
|
|
|
|
|
|
|
{ $last_handler_res= $handler->($t, $gi, %att); |
|
4172
|
|
|
|
|
|
|
last unless $last_handler_res; |
|
4173
|
|
|
|
|
|
|
} |
|
4174
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
|
4175
|
|
|
|
|
|
|
no strict 'refs'; |
|
4176
|
|
|
|
|
|
|
print {$fh} $tag if( $tag && (!@handlers || $last_handler_res)); |
|
4177
|
|
|
|
|
|
|
} |
|
4178
|
|
|
|
|
|
|
else |
|
4179
|
|
|
|
|
|
|
{ |
|
4180
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
|
4181
|
|
|
|
|
|
|
no strict 'refs'; |
|
4182
|
|
|
|
|
|
|
print {$fh} $tag if( $tag); |
|
4183
|
|
|
|
|
|
|
} |
|
4184
|
|
|
|
|
|
|
return; |
|
4185
|
|
|
|
|
|
|
} |
|
4186
|
|
|
|
|
|
|
|
|
4187
|
|
|
|
|
|
|
sub _twig_end_check_roots |
|
4188
|
|
|
|
|
|
|
{ # warn " in _twig_end_check_roots...\n"; # DEBUG handler |
|
4189
|
|
|
|
|
|
|
|
|
4190
|
|
|
|
|
|
|
my( $p, $gi, %att)= @_; |
|
4191
|
|
|
|
|
|
|
my $t= $p->{twig}; |
|
4192
|
|
|
|
|
|
|
# $tag can be empty (), hence the undef and the tests for defined |
|
4193
|
|
|
|
|
|
|
my $tag= $t->{twig_default_print} ? $t->{twig_keep_encoding} ? $p->original_string |
|
4194
|
|
|
|
|
|
|
: $p->recognized_string |
|
4195
|
|
|
|
|
|
|
: undef; |
|
4196
|
|
|
|
|
|
|
my $fh= $t->{twig_output_fh} || select() || \*STDOUT; |
|
4197
|
|
|
|
|
|
|
|
|
4198
|
|
|
|
|
|
|
if( $t->{twig_endtag_handlers}) |
|
4199
|
|
|
|
|
|
|
{ # look for end tag handlers |
|
4200
|
|
|
|
|
|
|
my @handlers= _handler( $t, $t->{twig_endtag_handlers}, $gi); |
|
4201
|
|
|
|
|
|
|
my $last_handler_res=1; |
|
4202
|
|
|
|
|
|
|
foreach my $handler ( @handlers) |
|
4203
|
|
|
|
|
|
|
{ $last_handler_res= $handler->($t, $gi) || last; } |
|
4204
|
|
|
|
|
|
|
#if( ! $last_handler_res) |
|
4205
|
|
|
|
|
|
|
# { pop @{$t->{_twig_context_stack}}; warn "tested"; |
|
4206
|
|
|
|
|
|
|
# return; |
|
4207
|
|
|
|
|
|
|
# } |
|
4208
|
|
|
|
|
|
|
} |
|
4209
|
|
|
|
|
|
|
{ |
|
4210
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
|
4211
|
|
|
|
|
|
|
no strict 'refs'; |
|
4212
|
|
|
|
|
|
|
print {$fh} $tag if( defined $tag); |
|
4213
|
|
|
|
|
|
|
} |
|
4214
|
|
|
|
|
|
|
if( $p->depth == 0) |
|
4215
|
|
|
|
|
|
|
{ |
|
4216
|
|
|
|
|
|
|
_twig_end( $p, $gi); |
|
4217
|
|
|
|
|
|
|
$t->root->{end_tag_flushed}=1; |
|
4218
|
|
|
|
|
|
|
} |
|
4219
|
|
|
|
|
|
|
|
|
4220
|
|
|
|
|
|
|
pop @{$t->{_twig_context_stack}}; |
|
4221
|
|
|
|
|
|
|
return; |
|
4222
|
|
|
|
|
|
|
} |
|
4223
|
|
|
|
|
|
|
|
|
4224
|
|
|
|
|
|
|
sub _twig_pi_check_roots |
|
4225
|
|
|
|
|
|
|
{ # warn " in _twig_pi_check_roots...\n"; # DEBUG handler |
|
4226
|
|
|
|
|
|
|
my( $p, $target, $data)= @_; |
|
4227
|
|
|
|
|
|
|
my $t= $p->{twig}; |
|
4228
|
|
|
|
|
|
|
my $pi= $t->{twig_default_print} ? $t->{twig_keep_encoding} ? $p->original_string |
|
4229
|
|
|
|
|
|
|
: $p->recognized_string |
|
4230
|
|
|
|
|
|
|
: undef; |
|
4231
|
|
|
|
|
|
|
my $fh= $t->{twig_output_fh} || select() || \*STDOUT; |
|
4232
|
|
|
|
|
|
|
|
|
4233
|
|
|
|
|
|
|
if( my $handler= $t->{twig_handlers}->{pi_handlers}->{$target} |
|
4234
|
|
|
|
|
|
|
|| $t->{twig_handlers}->{pi_handlers}->{''} |
|
4235
|
|
|
|
|
|
|
) |
|
4236
|
|
|
|
|
|
|
{ # if handler is called on pi, then it needs to be processed as a regular node |
|
4237
|
|
|
|
|
|
|
my @flags= qw( twig_process_pi twig_keep_pi); |
|
4238
|
|
|
|
|
|
|
my @save= @{$t}{@flags}; # save pi related flags |
|
4239
|
|
|
|
|
|
|
@{$t}{@flags}= (1, 0); # override them, pi needs to be processed |
|
4240
|
|
|
|
|
|
|
_twig_pi( @_); # call handler on the pi |
|
4241
|
|
|
|
|
|
|
@{$t}{@flags}= @save;; # restore flag |
|
4242
|
|
|
|
|
|
|
} |
|
4243
|
|
|
|
|
|
|
else |
|
4244
|
|
|
|
|
|
|
{ |
|
4245
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
|
4246
|
|
|
|
|
|
|
no strict 'refs'; |
|
4247
|
|
|
|
|
|
|
print {$fh} $pi if( defined( $pi)); |
|
4248
|
|
|
|
|
|
|
} |
|
4249
|
|
|
|
|
|
|
return; |
|
4250
|
|
|
|
|
|
|
} |
|
4251
|
|
|
|
|
|
|
|
|
4252
|
|
|
|
|
|
|
|
|
4253
|
|
|
|
|
|
|
sub _output_ignored |
|
4254
|
|
|
|
|
|
|
{ my( $t, $p)= @_; |
|
4255
|
|
|
|
|
|
|
my $action= $t->{twig_ignore_action}; |
|
4256
|
|
|
|
|
|
|
|
|
4257
|
|
|
|
|
|
|
my $get_string= $t->{twig_keep_encoding} ? 'original_string' : 'recognized_string'; |
|
4258
|
|
|
|
|
|
|
|
|
4259
|
|
|
|
|
|
|
if( $action eq 'print' ) { print $p->$get_string; } |
|
4260
|
|
|
|
|
|
|
else |
|
4261
|
|
|
|
|
|
|
{ my $string_ref; |
|
4262
|
|
|
|
|
|
|
if( $action eq 'string') |
|
4263
|
|
|
|
|
|
|
{ $string_ref= \$t->{twig_buffered_string}; } |
|
4264
|
|
|
|
|
|
|
elsif( ref( $action) && ref( $action) eq 'SCALAR') |
|
4265
|
|
|
|
|
|
|
{ $string_ref= $action; } |
|
4266
|
|
|
|
|
|
|
else |
|
4267
|
|
|
|
|
|
|
{ _croak( "wrong ignore action: $action"); } |
|
4268
|
|
|
|
|
|
|
|
|
4269
|
|
|
|
|
|
|
$$string_ref .= $p->$get_string; |
|
4270
|
|
|
|
|
|
|
} |
|
4271
|
|
|
|
|
|
|
} |
|
4272
|
|
|
|
|
|
|
|
|
4273
|
|
|
|
|
|
|
|
|
4274
|
|
|
|
|
|
|
|
|
4275
|
|
|
|
|
|
|
sub _twig_ignore_start |
|
4276
|
|
|
|
|
|
|
{ # warn " in _twig_ignore_start...\n"; # DEBUG handler |
|
4277
|
|
|
|
|
|
|
|
|
4278
|
|
|
|
|
|
|
my( $p, $gi)= @_; |
|
4279
|
|
|
|
|
|
|
my $t= $p->{twig}; |
|
4280
|
|
|
|
|
|
|
$t->{twig_ignore_level}++; |
|
4281
|
|
|
|
|
|
|
my $action= $t->{twig_ignore_action}; |
|
4282
|
|
|
|
|
|
|
|
|
4283
|
|
|
|
|
|
|
$t->_output_ignored( $p) unless $action eq 'discard'; |
|
4284
|
|
|
|
|
|
|
return; |
|
4285
|
|
|
|
|
|
|
} |
|
4286
|
|
|
|
|
|
|
|
|
4287
|
|
|
|
|
|
|
sub _twig_ignore_end |
|
4288
|
|
|
|
|
|
|
{ # warn " in _twig_ignore_end...\n"; # DEBUG handler |
|
4289
|
|
|
|
|
|
|
|
|
4290
|
|
|
|
|
|
|
my( $p, $gi)= @_; |
|
4291
|
|
|
|
|
|
|
my $t= $p->{twig}; |
|
4292
|
|
|
|
|
|
|
|
|
4293
|
|
|
|
|
|
|
my $action= $t->{twig_ignore_action}; |
|
4294
|
|
|
|
|
|
|
$t->_output_ignored( $p) unless $action eq 'discard'; |
|
4295
|
|
|
|
|
|
|
|
|
4296
|
|
|
|
|
|
|
$t->{twig_ignore_level}--; |
|
4297
|
|
|
|
|
|
|
|
|
4298
|
|
|
|
|
|
|
if( ! $t->{twig_ignore_level}) |
|
4299
|
|
|
|
|
|
|
{ |
|
4300
|
|
|
|
|
|
|
$t->{twig_current} = $t->{twig_ignore_elt}; |
|
4301
|
|
|
|
|
|
|
$t->{twig_current}->{'twig_current'}=1; |
|
4302
|
|
|
|
|
|
|
|
|
4303
|
|
|
|
|
|
|
$t->{twig_ignore_elt}->cut; # there could possibly be a memory leak here (delete would avoid it, |
|
4304
|
|
|
|
|
|
|
# but could also delete elements that should not be deleted) |
|
4305
|
|
|
|
|
|
|
|
|
4306
|
|
|
|
|
|
|
# restore the saved stack to the current level |
|
4307
|
|
|
|
|
|
|
splice( @{$t->{_twig_context_stack}}, $p->depth+ 1 ); |
|
4308
|
|
|
|
|
|
|
#warn "stack: ", _dump_stack( $t->{_twig_context_stack}), "\n"; |
|
4309
|
|
|
|
|
|
|
|
|
4310
|
|
|
|
|
|
|
$p->setHandlers( @{$t->{twig_saved_handlers}}); |
|
4311
|
|
|
|
|
|
|
# test for handlers |
|
4312
|
|
|
|
|
|
|
if( $t->{twig_endtag_handlers}) |
|
4313
|
|
|
|
|
|
|
{ # look for end tag handlers |
|
4314
|
|
|
|
|
|
|
my @handlers= _handler( $t, $t->{twig_endtag_handlers}, $gi); |
|
4315
|
|
|
|
|
|
|
my $last_handler_res=1; |
|
4316
|
|
|
|
|
|
|
foreach my $handler ( @handlers) |
|
4317
|
|
|
|
|
|
|
{ $last_handler_res= $handler->($t, $gi) || last; } |
|
4318
|
|
|
|
|
|
|
} |
|
4319
|
|
|
|
|
|
|
pop @{$t->{_twig_context_stack}}; |
|
4320
|
|
|
|
|
|
|
}; |
|
4321
|
|
|
|
|
|
|
return; |
|
4322
|
|
|
|
|
|
|
} |
|
4323
|
|
|
|
|
|
|
|
|
4324
|
|
|
|
|
|
|
#sub _dump_stack { my( $stack)= @_; return join( ":", map { $_->{$ST_TAG} } @$stack); } |
|
4325
|
|
|
|
|
|
|
|
|
4326
|
|
|
|
|
|
|
sub ignore |
|
4327
|
|
|
|
|
|
|
{ my( $t, $elt, $action)= @_; |
|
4328
|
|
|
|
|
|
|
my $current= $t->{twig_current}; |
|
4329
|
|
|
|
|
|
|
|
|
4330
|
|
|
|
|
|
|
if( ! ($elt && ref( $elt) && isa( $elt, 'XML::Twig::Elt'))) { $elt= $current; } |
|
4331
|
|
|
|
|
|
|
|
|
4332
|
|
|
|
|
|
|
#warn "ignore: current = ", $current->tag, ", elt = ", $elt->tag, ")\n"; |
|
4333
|
|
|
|
|
|
|
|
|
4334
|
|
|
|
|
|
|
# we need the ($elt == $current->{last_child}) test because the current element is set to the |
|
4335
|
|
|
|
|
|
|
# parent _before_ handlers are called (and I can't figure out how to fix this) |
|
4336
|
|
|
|
|
|
|
unless( ($elt == $current) || ($current->{last_child} && ($elt == $current->{last_child})) || $current->in( $elt)) |
|
4337
|
|
|
|
|
|
|
{ _croak( "element to be ignored must be ancestor of current element"); } |
|
4338
|
|
|
|
|
|
|
|
|
4339
|
|
|
|
|
|
|
$t->{twig_ignore_level}= $current == $elt ? 1 : $t->_level_in_stack( $current) - $t->_level_in_stack($elt) + 1; |
|
4340
|
|
|
|
|
|
|
#warn "twig_ignore_level: $t->{twig_ignore_level} (current: ", $current->tag, ", elt: ", $elt->tag, ")\n"; |
|
4341
|
|
|
|
|
|
|
$t->{twig_ignore_elt} = $elt; # save it, so we can delete it later |
|
4342
|
|
|
|
|
|
|
|
|
4343
|
|
|
|
|
|
|
$action ||= 'discard'; |
|
4344
|
|
|
|
|
|
|
if( !($action eq 'print' || $action eq 'string' || ( ref( $action) && ref( $action) eq 'SCALAR'))) |
|
4345
|
|
|
|
|
|
|
{ $action= 'discard'; } |
|
4346
|
|
|
|
|
|
|
|
|
4347
|
|
|
|
|
|
|
$t->{twig_ignore_action}= $action; |
|
4348
|
|
|
|
|
|
|
|
|
4349
|
|
|
|
|
|
|
my $p= $t->{twig_parser}; |
|
4350
|
|
|
|
|
|
|
my @saved_handlers= $p->setHandlers( %twig_handlers_ignore); # set handlers |
|
4351
|
|
|
|
|
|
|
|
|
4352
|
|
|
|
|
|
|
my $get_string= $t->{twig_keep_encoding} ? 'original_string' : 'recognized_string'; |
|
4353
|
|
|
|
|
|
|
|
|
4354
|
|
|
|
|
|
|
my $default_handler; |
|
4355
|
|
|
|
|
|
|
|
|
4356
|
|
|
|
|
|
|
if( $action ne 'discard') |
|
4357
|
|
|
|
|
|
|
{ if( $action eq 'print') |
|
4358
|
|
|
|
|
|
|
{ $p->setHandlers( Default => sub { print $_[0]->$get_string; }); } |
|
4359
|
|
|
|
|
|
|
else |
|
4360
|
|
|
|
|
|
|
{ my $string_ref; |
|
4361
|
|
|
|
|
|
|
if( $action eq 'string') |
|
4362
|
|
|
|
|
|
|
{ if( ! exists $t->{twig_buffered_string}) { $t->{twig_buffered_string}=''; } |
|
4363
|
|
|
|
|
|
|
$string_ref= \$t->{twig_buffered_string}; |
|
4364
|
|
|
|
|
|
|
} |
|
4365
|
|
|
|
|
|
|
elsif( ref( $action) && ref( $action) eq 'SCALAR') |
|
4366
|
|
|
|
|
|
|
{ $string_ref= $action; } |
|
4367
|
|
|
|
|
|
|
|
|
4368
|
|
|
|
|
|
|
$p->setHandlers( Default => sub { $$string_ref .= $_[0]->$get_string; }); |
|
4369
|
|
|
|
|
|
|
} |
|
4370
|
|
|
|
|
|
|
$t->_output_ignored( $p, $action); |
|
4371
|
|
|
|
|
|
|
} |
|
4372
|
|
|
|
|
|
|
|
|
4373
|
|
|
|
|
|
|
|
|
4374
|
|
|
|
|
|
|
$t->{twig_saved_handlers}= \@saved_handlers; # save current handlers |
|
4375
|
|
|
|
|
|
|
} |
|
4376
|
|
|
|
|
|
|
|
|
4377
|
|
|
|
|
|
|
sub _level_in_stack |
|
4378
|
|
|
|
|
|
|
{ my( $t, $elt)= @_; |
|
4379
|
|
|
|
|
|
|
my $level=1; |
|
4380
|
|
|
|
|
|
|
foreach my $elt_in_stack ( @{$t->{_twig_context_stack}} ) |
|
4381
|
|
|
|
|
|
|
{ if( $elt_in_stack->{$ST_ELT} && ($elt == $elt_in_stack->{$ST_ELT})) { return $level } |
|
4382
|
|
|
|
|
|
|
$level++; |
|
4383
|
|
|
|
|
|
|
} |
|
4384
|
|
|
|
|
|
|
} |
|
4385
|
|
|
|
|
|
|
|
|
4386
|
|
|
|
|
|
|
|
|
4387
|
|
|
|
|
|
|
|
|
4388
|
|
|
|
|
|
|
# select $t->{twig_output_fh} and store the current selected fh |
|
4389
|
|
|
|
|
|
|
sub _set_fh_to_twig_output_fh |
|
4390
|
|
|
|
|
|
|
{ my $t= shift; |
|
4391
|
|
|
|
|
|
|
my $output_fh= $t->{twig_output_fh}; |
|
4392
|
|
|
|
|
|
|
if( $output_fh && !$t->{twig_output_fh_selected}) |
|
4393
|
|
|
|
|
|
|
{ # there is an output fh |
|
4394
|
|
|
|
|
|
|
$t->{twig_selected_fh}= select(); # store the currently selected fh |
|
4395
|
|
|
|
|
|
|
$t->{twig_output_fh_selected}=1; |
|
4396
|
|
|
|
|
|
|
select $output_fh; # select the output fh for the twig |
|
4397
|
|
|
|
|
|
|
} |
|
4398
|
|
|
|
|
|
|
} |
|
4399
|
|
|
|
|
|
|
|
|
4400
|
|
|
|
|
|
|
# select the fh that was stored in $t->{twig_selected_fh} |
|
4401
|
|
|
|
|
|
|
# (before $t->{twig_output_fh} was selected) |
|
4402
|
|
|
|
|
|
|
sub _set_fh_to_selected_fh |
|
4403
|
|
|
|
|
|
|
{ my $t= shift; |
|
4404
|
|
|
|
|
|
|
return unless( $t->{twig_output_fh}); |
|
4405
|
|
|
|
|
|
|
my $selected_fh= $t->{twig_selected_fh}; |
|
4406
|
|
|
|
|
|
|
$t->{twig_output_fh_selected}=0; |
|
4407
|
|
|
|
|
|
|
select $selected_fh; |
|
4408
|
|
|
|
|
|
|
return; |
|
4409
|
|
|
|
|
|
|
} |
|
4410
|
|
|
|
|
|
|
|
|
4411
|
|
|
|
|
|
|
|
|
4412
|
|
|
|
|
|
|
sub encoding |
|
4413
|
|
|
|
|
|
|
{ return $_[0]->{twig_xmldecl}->{encoding} if( $_[0]->{twig_xmldecl}); } |
|
4414
|
|
|
|
|
|
|
|
|
4415
|
|
|
|
|
|
|
sub set_encoding |
|
4416
|
|
|
|
|
|
|
{ my( $t, $encoding)= @_; |
|
4417
|
|
|
|
|
|
|
$t->{twig_xmldecl} ||={}; |
|
4418
|
|
|
|
|
|
|
$t->set_xml_version( "1.0") unless( $t->xml_version); |
|
4419
|
|
|
|
|
|
|
$t->{twig_xmldecl}->{encoding}= $encoding; |
|
4420
|
|
|
|
|
|
|
return $t; |
|
4421
|
|
|
|
|
|
|
} |
|
4422
|
|
|
|
|
|
|
|
|
4423
|
|
|
|
|
|
|
sub output_encoding |
|
4424
|
|
|
|
|
|
|
{ return $_[0]->{output_encoding}; } |
|
4425
|
|
|
|
|
|
|
|
|
4426
|
|
|
|
|
|
|
sub set_output_encoding |
|
4427
|
|
|
|
|
|
|
{ my( $t, $encoding)= @_; |
|
4428
|
|
|
|
|
|
|
my $output_filter= $t->output_filter || ''; |
|
4429
|
|
|
|
|
|
|
|
|
4430
|
|
|
|
|
|
|
if( ($encoding && $encoding !~ m{^utf-?8$}i) || $t->{twig_keep_encoding} || $output_filter) |
|
4431
|
|
|
|
|
|
|
{ $t->set_output_filter( _encoding_filter( $encoding || '')); } |
|
4432
|
|
|
|
|
|
|
|
|
4433
|
|
|
|
|
|
|
$t->{output_encoding}= $encoding; |
|
4434
|
|
|
|
|
|
|
return $t; |
|
4435
|
|
|
|
|
|
|
} |
|
4436
|
|
|
|
|
|
|
|
|
4437
|
|
|
|
|
|
|
sub xml_version |
|
4438
|
|
|
|
|
|
|
{ return $_[0]->{twig_xmldecl}->{version} if( $_[0]->{twig_xmldecl}); } |
|
4439
|
|
|
|
|
|
|
|
|
4440
|
|
|
|
|
|
|
sub set_xml_version |
|
4441
|
|
|
|
|
|
|
{ my( $t, $version)= @_; |
|
4442
|
|
|
|
|
|
|
$t->{twig_xmldecl} ||={}; |
|
4443
|
|
|
|
|
|
|
$t->{twig_xmldecl}->{version}= $version; |
|
4444
|
|
|
|
|
|
|
return $t; |
|
4445
|
|
|
|
|
|
|
} |
|
4446
|
|
|
|
|
|
|
|
|
4447
|
|
|
|
|
|
|
sub standalone |
|
4448
|
|
|
|
|
|
|
{ return $_[0]->{twig_xmldecl}->{standalone} if( $_[0]->{twig_xmldecl}); } |
|
4449
|
|
|
|
|
|
|
|
|
4450
|
|
|
|
|
|
|
sub set_standalone |
|
4451
|
|
|
|
|
|
|
{ my( $t, $standalone)= @_; |
|
4452
|
|
|
|
|
|
|
$t->{twig_xmldecl} ||={}; |
|
4453
|
|
|
|
|
|
|
$t->set_xml_version( "1.0") unless( $t->xml_version); |
|
4454
|
|
|
|
|
|
|
$t->{twig_xmldecl}->{standalone}= $standalone; |
|
4455
|
|
|
|
|
|
|
return $t; |
|
4456
|
|
|
|
|
|
|
} |
|
4457
|
|
|
|
|
|
|
|
|
4458
|
|
|
|
|
|
|
|
|
4459
|
|
|
|
|
|
|
# SAX methods |
|
4460
|
|
|
|
|
|
|
|
|
4461
|
|
|
|
|
|
|
sub toSAX1 |
|
4462
|
|
|
|
|
|
|
{ _croak( "cannot use toSAX1 while parsing (use flush_toSAX1)") if (defined $_[0]->{twig_parser}); |
|
4463
|
|
|
|
|
|
|
shift(@_)->_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX1, |
|
4464
|
|
|
|
|
|
|
\&XML::Twig::Elt::_end_tag_data_SAX1 |
|
4465
|
|
|
|
|
|
|
); |
|
4466
|
|
|
|
|
|
|
} |
|
4467
|
|
|
|
|
|
|
|
|
4468
|
|
|
|
|
|
|
sub toSAX2 |
|
4469
|
|
|
|
|
|
|
{ _croak( "cannot use toSAX2 while parsing (use flush_toSAX2)") if (defined $_[0]->{twig_parser}); |
|
4470
|
|
|
|
|
|
|
shift(@_)->_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX2, |
|
4471
|
|
|
|
|
|
|
\&XML::Twig::Elt::_end_tag_data_SAX2 |
|
4472
|
|
|
|
|
|
|
); |
|
4473
|
|
|
|
|
|
|
} |
|
4474
|
|
|
|
|
|
|
|
|
4475
|
|
|
|
|
|
|
|
|
4476
|
|
|
|
|
|
|
sub _toSAX |
|
4477
|
|
|
|
|
|
|
{ my( $t, $handler, $start_tag_data, $end_tag_data) = @_; |
|
4478
|
|
|
|
|
|
|
|
|
4479
|
|
|
|
|
|
|
if( my $start_document = $handler->can( 'start_document')) |
|
4480
|
|
|
|
|
|
|
{ $start_document->( $handler); } |
|
4481
|
|
|
|
|
|
|
|
|
4482
|
|
|
|
|
|
|
$t->_prolog_toSAX( $handler); |
|
4483
|
|
|
|
|
|
|
|
|
4484
|
|
|
|
|
|
|
if( $t->root) { $t->root->_toSAX( $handler, $start_tag_data, $end_tag_data) ; } |
|
4485
|
|
|
|
|
|
|
if( my $end_document = $handler->can( 'end_document')) |
|
4486
|
|
|
|
|
|
|
{ $end_document->( $handler); } |
|
4487
|
|
|
|
|
|
|
} |
|
4488
|
|
|
|
|
|
|
|
|
4489
|
|
|
|
|
|
|
|
|
4490
|
|
|
|
|
|
|
sub flush_toSAX1 |
|
4491
|
|
|
|
|
|
|
{ shift(@_)->_flush_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX1, |
|
4492
|
|
|
|
|
|
|
\&XML::Twig::Elt::_end_tag_data_SAX1 |
|
4493
|
|
|
|
|
|
|
); |
|
4494
|
|
|
|
|
|
|
} |
|
4495
|
|
|
|
|
|
|
|
|
4496
|
|
|
|
|
|
|
sub flush_toSAX2 |
|
4497
|
|
|
|
|
|
|
{ shift(@_)->_flush_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX2, |
|
4498
|
|
|
|
|
|
|
\&XML::Twig::Elt::_end_tag_data_SAX2 |
|
4499
|
|
|
|
|
|
|
); |
|
4500
|
|
|
|
|
|
|
} |
|
4501
|
|
|
|
|
|
|
|
|
4502
|
|
|
|
|
|
|
sub _flush_toSAX |
|
4503
|
|
|
|
|
|
|
{ my( $t, $handler, $start_tag_data, $end_tag_data)= @_; |
|
4504
|
|
|
|
|
|
|
|
|
4505
|
|
|
|
|
|
|
# the "real" last element processed, as _twig_end has closed it |
|
4506
|
|
|
|
|
|
|
my $last_elt; |
|
4507
|
|
|
|
|
|
|
if( $t->{twig_current}) |
|
4508
|
|
|
|
|
|
|
{ $last_elt= $t->{twig_current}->{last_child}; } |
|
4509
|
|
|
|
|
|
|
else |
|
4510
|
|
|
|
|
|
|
{ $last_elt= $t->{twig_root}; } |
|
4511
|
|
|
|
|
|
|
|
|
4512
|
|
|
|
|
|
|
my $elt= $t->{twig_root}; |
|
4513
|
|
|
|
|
|
|
unless( $elt->{'flushed'}) |
|
4514
|
|
|
|
|
|
|
{ # init unless already done (ie root has been flushed) |
|
4515
|
|
|
|
|
|
|
if( my $start_document = $handler->can( 'start_document')) |
|
4516
|
|
|
|
|
|
|
{ $start_document->( $handler); } |
|
4517
|
|
|
|
|
|
|
# flush the DTD |
|
4518
|
|
|
|
|
|
|
$t->_prolog_toSAX( $handler) |
|
4519
|
|
|
|
|
|
|
} |
|
4520
|
|
|
|
|
|
|
|
|
4521
|
|
|
|
|
|
|
while( $elt) |
|
4522
|
|
|
|
|
|
|
{ my $next_elt; |
|
4523
|
|
|
|
|
|
|
if( $last_elt && $last_elt->in( $elt)) |
|
4524
|
|
|
|
|
|
|
{ |
|
4525
|
|
|
|
|
|
|
unless( $elt->{'flushed'}) |
|
4526
|
|
|
|
|
|
|
{ # just output the front tag |
|
4527
|
|
|
|
|
|
|
if( my $start_element = $handler->can( 'start_element')) |
|
4528
|
|
|
|
|
|
|
{ if( my $tag_data= $start_tag_data->( $elt)) |
|
4529
|
|
|
|
|
|
|
{ $start_element->( $handler, $tag_data); } |
|
4530
|
|
|
|
|
|
|
} |
|
4531
|
|
|
|
|
|
|
$elt->{'flushed'}=1; |
|
4532
|
|
|
|
|
|
|
} |
|
4533
|
|
|
|
|
|
|
$next_elt= $elt->{first_child}; |
|
4534
|
|
|
|
|
|
|
} |
|
4535
|
|
|
|
|
|
|
else |
|
4536
|
|
|
|
|
|
|
{ # an element before the last one or the last one, |
|
4537
|
|
|
|
|
|
|
$next_elt= $elt->{next_sibling}; |
|
4538
|
|
|
|
|
|
|
$elt->_toSAX( $handler, $start_tag_data, $end_tag_data); |
|
4539
|
|
|
|
|
|
|
$elt->delete; |
|
4540
|
|
|
|
|
|
|
last if( $last_elt && ($elt == $last_elt)); |
|
4541
|
|
|
|
|
|
|
} |
|
4542
|
|
|
|
|
|
|
$elt= $next_elt; |
|
4543
|
|
|
|
|
|
|
} |
|
4544
|
|
|
|
|
|
|
if( !$t->{twig_parsing}) |
|
4545
|
|
|
|
|
|
|
{ if( my $end_document = $handler->can( 'end_document')) |
|
4546
|
|
|
|
|
|
|
{ $end_document->( $handler); } |
|
4547
|
|
|
|
|
|
|
} |
|
4548
|
|
|
|
|
|
|
} |
|
4549
|
|
|
|
|
|
|
|
|
4550
|
|
|
|
|
|
|
|
|
4551
|
|
|
|
|
|
|
sub _prolog_toSAX |
|
4552
|
|
|
|
|
|
|
{ my( $t, $handler)= @_; |
|
4553
|
|
|
|
|
|
|
$t->_xmldecl_toSAX( $handler); |
|
4554
|
|
|
|
|
|
|
$t->_DTD_toSAX( $handler); |
|
4555
|
|
|
|
|
|
|
} |
|
4556
|
|
|
|
|
|
|
|
|
4557
|
|
|
|
|
|
|
sub _xmldecl_toSAX |
|
4558
|
|
|
|
|
|
|
{ my( $t, $handler)= @_; |
|
4559
|
|
|
|
|
|
|
my $decl= $t->{twig_xmldecl}; |
|
4560
|
|
|
|
|
|
|
my $data= { Version => $decl->{version}, |
|
4561
|
|
|
|
|
|
|
Encoding => $decl->{encoding}, |
|
4562
|
|
|
|
|
|
|
Standalone => $decl->{standalone}, |
|
4563
|
|
|
|
|
|
|
}; |
|
4564
|
|
|
|
|
|
|
if( my $xml_decl= $handler->can( 'xml_decl')) |
|
4565
|
|
|
|
|
|
|
{ $xml_decl->( $handler, $data); } |
|
4566
|
|
|
|
|
|
|
} |
|
4567
|
|
|
|
|
|
|
|
|
4568
|
|
|
|
|
|
|
sub _DTD_toSAX |
|
4569
|
|
|
|
|
|
|
{ my( $t, $handler)= @_; |
|
4570
|
|
|
|
|
|
|
my $doctype= $t->{twig_doctype}; |
|
4571
|
|
|
|
|
|
|
return unless( $doctype); |
|
4572
|
|
|
|
|
|
|
my $data= { Name => $doctype->{name}, |
|
4573
|
|
|
|
|
|
|
PublicId => $doctype->{pub}, |
|
4574
|
|
|
|
|
|
|
SystemId => $doctype->{sysid}, |
|
4575
|
|
|
|
|
|
|
}; |
|
4576
|
|
|
|
|
|
|
|
|
4577
|
|
|
|
|
|
|
if( my $start_dtd= $handler->can( 'start_dtd')) |
|
4578
|
|
|
|
|
|
|
{ $start_dtd->( $handler, $data); } |
|
4579
|
|
|
|
|
|
|
|
|
4580
|
|
|
|
|
|
|
# I should call code to export the internal subset here |
|
4581
|
|
|
|
|
|
|
|
|
4582
|
|
|
|
|
|
|
if( my $end_dtd= $handler->can( 'end_dtd')) |
|
4583
|
|
|
|
|
|
|
{ $end_dtd->( $handler); } |
|
4584
|
|
|
|
|
|
|
} |
|
4585
|
|
|
|
|
|
|
|
|
4586
|
|
|
|
|
|
|
# input/output filters |
|
4587
|
|
|
|
|
|
|
|
|
4588
|
|
|
|
|
|
|
sub latin1 |
|
4589
|
|
|
|
|
|
|
{ local $SIG{__DIE__}; |
|
4590
|
|
|
|
|
|
|
if( _use( 'Encode')) |
|
4591
|
|
|
|
|
|
|
{ return encode_convert( 'ISO-8859-15'); } |
|
4592
|
|
|
|
|
|
|
elsif( _use( 'Text::Iconv')) |
|
4593
|
|
|
|
|
|
|
{ return iconv_convert( 'ISO-8859-15'); } |
|
4594
|
|
|
|
|
|
|
elsif( _use( 'Unicode::Map8') && _use( 'Unicode::String')) |
|
4595
|
|
|
|
|
|
|
{ return unicode_convert( 'ISO-8859-15'); } |
|
4596
|
|
|
|
|
|
|
else |
|
4597
|
|
|
|
|
|
|
{ return \®exp2latin1; } |
|
4598
|
|
|
|
|
|
|
} |
|
4599
|
|
|
|
|
|
|
|
|
4600
|
|
|
|
|
|
|
sub _encoding_filter |
|
4601
|
|
|
|
|
|
|
{ |
|
4602
|
|
|
|
|
|
|
{ local $SIG{__DIE__}; |
|
4603
|
|
|
|
|
|
|
my $encoding= $_[1] || $_[0]; |
|
4604
|
|
|
|
|
|
|
if( _use( 'Encode')) |
|
4605
|
|
|
|
|
|
|
{ my $sub= encode_convert( $encoding); |
|
4606
|
|
|
|
|
|
|
return $sub; |
|
4607
|
|
|
|
|
|
|
} |
|
4608
|
|
|
|
|
|
|
elsif( _use( 'Text::Iconv')) |
|
4609
|
|
|
|
|
|
|
{ return iconv_convert( $encoding); } |
|
4610
|
|
|
|
|
|
|
elsif( _use( 'Unicode::Map8') && _use( 'Unicode::String')) |
|
4611
|
|
|
|
|
|
|
{ return unicode_convert( $encoding); } |
|
4612
|
|
|
|
|
|
|
} |
|
4613
|
|
|
|
|
|
|
_croak( "Encode, Text::Iconv or Unicode::Map8 and Unicode::String need to be installed in order to use encoding options"); |
|
4614
|
|
|
|
|
|
|
} |
|
4615
|
|
|
|
|
|
|
|
|
4616
|
|
|
|
|
|
|
# shamelessly lifted from XML::TyePYX (works only with XML::Parse 2.27) |
|
4617
|
|
|
|
|
|
|
sub regexp2latin1 |
|
4618
|
|
|
|
|
|
|
{ my $text=shift; |
|
4619
|
|
|
|
|
|
|
$text=~s{([\xc0-\xc3])(.)}{ my $hi = ord($1); |
|
4620
|
|
|
|
|
|
|
my $lo = ord($2); |
|
4621
|
|
|
|
|
|
|
chr((($hi & 0x03) <<6) | ($lo & 0x3F)) |
|
4622
|
|
|
|
|
|
|
}ge; |
|
4623
|
|
|
|
|
|
|
return $text; |
|
4624
|
|
|
|
|
|
|
} |
|
4625
|
|
|
|
|
|
|
|
|
4626
|
|
|
|
|
|
|
|
|
4627
|
|
|
|
|
|
|
sub html_encode |
|
4628
|
|
|
|
|
|
|
{ _use( 'HTML::Entities') or croak "cannot use html_encode: missing HTML::Entities"; |
|
4629
|
|
|
|
|
|
|
return HTML::Entities::encode_entities($_[0] ); |
|
4630
|
|
|
|
|
|
|
} |
|
4631
|
|
|
|
|
|
|
|
|
4632
|
|
|
|
|
|
|
sub safe_encode |
|
4633
|
|
|
|
|
|
|
{ my $str= shift; |
|
4634
|
|
|
|
|
|
|
if( $perl_version < 5.008) |
|
4635
|
|
|
|
|
|
|
{ # the no utf8 makes the regexp work in 5.6 |
|
4636
|
|
|
|
|
|
|
no utf8; # = perl 5.6 |
|
4637
|
|
|
|
|
|
|
$str =~ s{([\xC0-\xDF].|[\xE0-\xEF]..|[\xF0-\xFF]...)} |
|
4638
|
|
|
|
|
|
|
{_XmlUtf8Decode($1)}egs; |
|
4639
|
|
|
|
|
|
|
} |
|
4640
|
|
|
|
|
|
|
else |
|
4641
|
|
|
|
|
|
|
{ $str= encode( ascii => $str, $FB_HTMLCREF); } |
|
4642
|
|
|
|
|
|
|
return $str; |
|
4643
|
|
|
|
|
|
|
} |
|
4644
|
|
|
|
|
|
|
|
|
4645
|
|
|
|
|
|
|
sub safe_encode_hex |
|
4646
|
|
|
|
|
|
|
{ my $str= shift; |
|
4647
|
|
|
|
|
|
|
if( $perl_version < 5.008) |
|
4648
|
|
|
|
|
|
|
{ # the no utf8 makes the regexp work in 5.6 |
|
4649
|
|
|
|
|
|
|
no utf8; # = perl 5.6 |
|
4650
|
|
|
|
|
|
|
$str =~ s{([\xC0-\xDF].|[\xE0-\xEF]..|[\xF0-\xFF]...)} |
|
4651
|
|
|
|
|
|
|
{_XmlUtf8Decode($1, 1)}egs; |
|
4652
|
|
|
|
|
|
|
} |
|
4653
|
|
|
|
|
|
|
else |
|
4654
|
|
|
|
|
|
|
{ $str= encode( ascii => $str, $FB_XMLCREF); } |
|
4655
|
|
|
|
|
|
|
return $str; |
|
4656
|
|
|
|
|
|
|
} |
|
4657
|
|
|
|
|
|
|
|
|
4658
|
|
|
|
|
|
|
# this one shamelessly lifted from XML::DOM |
|
4659
|
|
|
|
|
|
|
# does NOT work on 5.8.0 |
|
4660
|
|
|
|
|
|
|
sub _XmlUtf8Decode |
|
4661
|
|
|
|
|
|
|
{ my ($str, $hex) = @_; |
|
4662
|
|
|
|
|
|
|
my $len = length ($str); |
|
4663
|
|
|
|
|
|
|
my $n; |
|
4664
|
|
|
|
|
|
|
|
|
4665
|
|
|
|
|
|
|
if ($len == 2) |
|
4666
|
|
|
|
|
|
|
{ my @n = unpack "C2", $str; |
|
4667
|
|
|
|
|
|
|
$n = (($n[0] & 0x3f) << 6) + ($n[1] & 0x3f); |
|
4668
|
|
|
|
|
|
|
} |
|
4669
|
|
|
|
|
|
|
elsif ($len == 3) |
|
4670
|
|
|
|
|
|
|
{ my @n = unpack "C3", $str; |
|
4671
|
|
|
|
|
|
|
$n = (($n[0] & 0x1f) << 12) + (($n[1] & 0x3f) << 6) + ($n[2] & 0x3f); |
|
4672
|
|
|
|
|
|
|
} |
|
4673
|
|
|
|
|
|
|
elsif ($len == 4) |
|
4674
|
|
|
|
|
|
|
{ my @n = unpack "C4", $str; |
|
4675
|
|
|
|
|
|
|
$n = (($n[0] & 0x0f) << 18) + (($n[1] & 0x3f) << 12) |
|
4676
|
|
|
|
|
|
|
+ (($n[2] & 0x3f) << 6) + ($n[3] & 0x3f); |
|
4677
|
|
|
|
|
|
|
} |
|
4678
|
|
|
|
|
|
|
elsif ($len == 1) # just to be complete... |
|
4679
|
|
|
|
|
|
|
{ $n = ord ($str); } |
|
4680
|
|
|
|
|
|
|
else |
|
4681
|
|
|
|
|
|
|
{ croak "bad value [$str] for _XmlUtf8Decode"; } |
|
4682
|
|
|
|
|
|
|
|
|
4683
|
|
|
|
|
|
|
my $char= $hex ? sprintf ("%x;", $n) : "$n;"; |
|
4684
|
|
|
|
|
|
|
return $char; |
|
4685
|
|
|
|
|
|
|
} |
|
4686
|
|
|
|
|
|
|
|
|
4687
|
|
|
|
|
|
|
|
|
4688
|
|
|
|
|
|
|
sub unicode_convert |
|
4689
|
|
|
|
|
|
|
{ my $enc= $_[1] ? $_[1] : $_[0]; # so the method can be called on the twig or directly |
|
4690
|
|
|
|
|
|
|
_use( 'Unicode::Map8') or croak "Unicode::Map8 not available, needed for encoding filter: $!"; |
|
4691
|
|
|
|
|
|
|
_use( 'Unicode::String') or croak "Unicode::String not available, needed for encoding filter: $!"; |
|
4692
|
|
|
|
|
|
|
import Unicode::String qw(utf8); |
|
4693
|
|
|
|
|
|
|
my $sub= eval qq{ { $NO_WARNINGS; |
|
4694
|
|
|
|
|
|
|
my \$cnv; |
|
4695
|
|
|
|
|
|
|
BEGIN { \$cnv= Unicode::Map8->new(\$enc) |
|
4696
|
|
|
|
|
|
|
or croak "Can't create converter to \$enc"; |
|
4697
|
|
|
|
|
|
|
} |
|
4698
|
|
|
|
|
|
|
sub { return \$cnv->to8 (utf8(\$_[0])->ucs2); } |
|
4699
|
|
|
|
|
|
|
} |
|
4700
|
|
|
|
|
|
|
}; |
|
4701
|
|
|
|
|
|
|
unless( $sub) { croak $@; } |
|
4702
|
|
|
|
|
|
|
return $sub; |
|
4703
|
|
|
|
|
|
|
} |
|
4704
|
|
|
|
|
|
|
|
|
4705
|
|
|
|
|
|
|
sub iconv_convert |
|
4706
|
|
|
|
|
|
|
{ my $enc= $_[1] ? $_[1] : $_[0]; # so the method can be called on the twig or directly |
|
4707
|
|
|
|
|
|
|
_use( 'Text::Iconv') or croak "Text::Iconv not available, needed for encoding filter: $!"; |
|
4708
|
|
|
|
|
|
|
my $sub= eval qq{ { $NO_WARNINGS; |
|
4709
|
|
|
|
|
|
|
my \$cnv; |
|
4710
|
|
|
|
|
|
|
BEGIN { \$cnv = Text::Iconv->new( 'utf8', \$enc) |
|
4711
|
|
|
|
|
|
|
or croak "Can't create iconv converter to \$enc"; |
|
4712
|
|
|
|
|
|
|
} |
|
4713
|
|
|
|
|
|
|
sub { return \$cnv->convert( \$_[0]); } |
|
4714
|
|
|
|
|
|
|
} |
|
4715
|
|
|
|
|
|
|
}; |
|
4716
|
|
|
|
|
|
|
unless( $sub) |
|
4717
|
|
|
|
|
|
|
{ if( $@=~ m{^Unsupported conversion: Invalid argument}) |
|
4718
|
|
|
|
|
|
|
{ croak "Unsupported encoding: $enc"; } |
|
4719
|
|
|
|
|
|
|
else |
|
4720
|
|
|
|
|
|
|
{ croak $@; } |
|
4721
|
|
|
|
|
|
|
} |
|
4722
|
|
|
|
|
|
|
|
|
4723
|
|
|
|
|
|
|
return $sub; |
|
4724
|
|
|
|
|
|
|
} |
|
4725
|
|
|
|
|
|
|
|
|
4726
|
|
|
|
|
|
|
sub encode_convert |
|
4727
|
|
|
|
|
|
|
{ my $enc= $_[1] ? $_[1] : $_[0]; # so the method can be called on the twig or directly |
|
4728
|
|
|
|
|
|
|
my $sub= eval qq{sub { $NO_WARNINGS; return encode( "$enc", \$_[0]); } }; |
|
4729
|
|
|
|
|
|
|
croak "can't create Encode-based filter: $@" unless( $sub); |
|
4730
|
|
|
|
|
|
|
return $sub; |
|
4731
|
|
|
|
|
|
|
} |
|
4732
|
|
|
|
|
|
|
|
|
4733
|
|
|
|
|
|
|
|
|
4734
|
|
|
|
|
|
|
# XML::XPath compatibility |
|
4735
|
|
|
|
|
|
|
sub getRootNode { return $_[0]; } |
|
4736
|
|
|
|
|
|
|
sub getParentNode { return undef; } |
|
4737
|
|
|
|
|
|
|
sub getChildNodes { my @children= ($_[0]->root); return wantarray ? @children : \@children; } |
|
4738
|
|
|
|
|
|
|
|
|
4739
|
|
|
|
|
|
|
sub _weakrefs { return $weakrefs; } |
|
4740
|
|
|
|
|
|
|
sub _set_weakrefs { $weakrefs=shift() || 0; XML::Twig::Elt::set_destroy()if ! $weakrefs; } # for testing purposes |
|
4741
|
|
|
|
|
|
|
|
|
4742
|
|
|
|
|
|
|
sub _dump |
|
4743
|
|
|
|
|
|
|
{ my $t= shift; |
|
4744
|
|
|
|
|
|
|
my $dump=''; |
|
4745
|
|
|
|
|
|
|
|
|
4746
|
|
|
|
|
|
|
$dump="document\n"; # should dump twig level data here |
|
4747
|
|
|
|
|
|
|
if( $t->root) { $dump .= $t->root->_dump( @_); } |
|
4748
|
|
|
|
|
|
|
|
|
4749
|
|
|
|
|
|
|
return $dump; |
|
4750
|
|
|
|
|
|
|
|
|
4751
|
|
|
|
|
|
|
} |
|
4752
|
|
|
|
|
|
|
|
|
4753
|
|
|
|
|
|
|
|
|
4754
|
|
|
|
|
|
|
1; |
|
4755
|
|
|
|
|
|
|
|
|
4756
|
|
|
|
|
|
|
###################################################################### |
|
4757
|
|
|
|
|
|
|
package XML::Twig::Entity_list; |
|
4758
|
|
|
|
|
|
|
###################################################################### |
|
4759
|
|
|
|
|
|
|
|
|
4760
|
|
|
|
|
|
|
*isa= *UNIVERSAL::isa; |
|
4761
|
|
|
|
|
|
|
|
|
4762
|
|
|
|
|
|
|
sub new |
|
4763
|
|
|
|
|
|
|
{ my $class = shift; |
|
4764
|
|
|
|
|
|
|
my $self={ entities => {}, updated => 0}; |
|
4765
|
|
|
|
|
|
|
|
|
4766
|
|
|
|
|
|
|
bless $self, $class; |
|
4767
|
|
|
|
|
|
|
return $self; |
|
4768
|
|
|
|
|
|
|
|
|
4769
|
|
|
|
|
|
|
} |
|
4770
|
|
|
|
|
|
|
|
|
4771
|
|
|
|
|
|
|
sub add_new_ent |
|
4772
|
|
|
|
|
|
|
{ my $ent_list= shift; |
|
4773
|
|
|
|
|
|
|
my $ent= XML::Twig::Entity->new( @_); |
|
4774
|
|
|
|
|
|
|
$ent_list->add( $ent); |
|
4775
|
|
|
|
|
|
|
return $ent_list; |
|
4776
|
|
|
|
|
|
|
} |
|
4777
|
|
|
|
|
|
|
|
|
4778
|
|
|
|
|
|
|
sub _add_list |
|
4779
|
|
|
|
|
|
|
{ my( $ent_list, $to_add)= @_; |
|
4780
|
|
|
|
|
|
|
my $ents_to_add= $to_add->{entities}; |
|
4781
|
|
|
|
|
|
|
return $ent_list unless( $ents_to_add && %$ents_to_add); |
|
4782
|
|
|
|
|
|
|
@{$ent_list->{entities}}{keys %$ents_to_add}= values %$ents_to_add; |
|
4783
|
|
|
|
|
|
|
$ent_list->{updated}=1; |
|
4784
|
|
|
|
|
|
|
return $ent_list; |
|
4785
|
|
|
|
|
|
|
} |
|
4786
|
|
|
|
|
|
|
|
|
4787
|
|
|
|
|
|
|
sub add |
|
4788
|
|
|
|
|
|
|
{ my( $ent_list, $ent)= @_; |
|
4789
|
|
|
|
|
|
|
$ent_list->{entities}->{$ent->{name}}= $ent; |
|
4790
|
|
|
|
|
|
|
$ent_list->{updated}=1; |
|
4791
|
|
|
|
|
|
|
return $ent_list; |
|
4792
|
|
|
|
|
|
|
} |
|
4793
|
|
|
|
|
|
|
|
|
4794
|
|
|
|
|
|
|
sub ent |
|
4795
|
|
|
|
|
|
|
{ my( $ent_list, $ent_name)= @_; |
|
4796
|
|
|
|
|
|
|
return $ent_list->{entities}->{$ent_name}; |
|
4797
|
|
|
|
|
|
|
} |
|
4798
|
|
|
|
|
|
|
|
|
4799
|
|
|
|
|
|
|
# can be called with an entity or with an entity name |
|
4800
|
|
|
|
|
|
|
sub delete |
|
4801
|
|
|
|
|
|
|
{ my $ent_list= shift; |
|
4802
|
|
|
|
|
|
|
if( isa( ref $_[0], 'XML::Twig::Entity')) |
|
4803
|
|
|
|
|
|
|
{ # the second arg is an entity |
|
4804
|
|
|
|
|
|
|
my $ent= shift; |
|
4805
|
|
|
|
|
|
|
delete $ent_list->{entities}->{$ent->{name}}; |
|
4806
|
|
|
|
|
|
|
} |
|
4807
|
|
|
|
|
|
|
else |
|
4808
|
|
|
|
|
|
|
{ # the second arg was not entity, must be a string then |
|
4809
|
|
|
|
|
|
|
my $name= shift; |
|
4810
|
|
|
|
|
|
|
delete $ent_list->{entities}->{$name}; |
|
4811
|
|
|
|
|
|
|
} |
|
4812
|
|
|
|
|
|
|
$ent_list->{updated}=1; |
|
4813
|
|
|
|
|
|
|
return $ent_list; |
|
4814
|
|
|
|
|
|
|
} |
|
4815
|
|
|
|
|
|
|
|
|
4816
|
|
|
|
|
|
|
sub print |
|
4817
|
|
|
|
|
|
|
{ my ($ent_list, $fh)= @_; |
|
4818
|
|
|
|
|
|
|
my $old_select= defined $fh ? select $fh : undef; |
|
4819
|
|
|
|
|
|
|
|
|
4820
|
|
|
|
|
|
|
foreach my $ent_name ( sort keys %{$ent_list->{entities}}) |
|
4821
|
|
|
|
|
|
|
{ my $ent= $ent_list->{entities}->{$ent_name}; |
|
4822
|
|
|
|
|
|
|
# we have to test what the entity is or un-defined entities can creep in |
|
4823
|
|
|
|
|
|
|
if( isa( $ent, 'XML::Twig::Entity')) { $ent->print(); } |
|
4824
|
|
|
|
|
|
|
} |
|
4825
|
|
|
|
|
|
|
select $old_select if( defined $old_select); |
|
4826
|
|
|
|
|
|
|
return $ent_list; |
|
4827
|
|
|
|
|
|
|
} |
|
4828
|
|
|
|
|
|
|
|
|
4829
|
|
|
|
|
|
|
sub text |
|
4830
|
|
|
|
|
|
|
{ my ($ent_list)= @_; |
|
4831
|
|
|
|
|
|
|
return join "\n", map { $ent_list->{entities}->{$_}->text} sort keys %{$ent_list->{entities}}; |
|
4832
|
|
|
|
|
|
|
} |
|
4833
|
|
|
|
|
|
|
|
|
4834
|
|
|
|
|
|
|
# return the list of entity names |
|
4835
|
|
|
|
|
|
|
sub entity_names |
|
4836
|
|
|
|
|
|
|
{ my $ent_list= shift; |
|
4837
|
|
|
|
|
|
|
return (sort keys %{$ent_list->{entities}}) ; |
|
4838
|
|
|
|
|
|
|
} |
|
4839
|
|
|
|
|
|
|
|
|
4840
|
|
|
|
|
|
|
|
|
4841
|
|
|
|
|
|
|
sub list |
|
4842
|
|
|
|
|
|
|
{ my ($ent_list)= @_; |
|
4843
|
|
|
|
|
|
|
return map { $ent_list->{entities}->{$_} } sort keys %{$ent_list->{entities}}; |
|
4844
|
|
|
|
|
|
|
} |
|
4845
|
|
|
|
|
|
|
|
|
4846
|
|
|
|
|
|
|
1; |
|
4847
|
|
|
|
|
|
|
|
|
4848
|
|
|
|
|
|
|
###################################################################### |
|
4849
|
|
|
|
|
|
|
package XML::Twig::Entity; |
|
4850
|
|
|
|
|
|
|
###################################################################### |
|
4851
|
|
|
|
|
|
|
|
|
4852
|
|
|
|
|
|
|
#*isa= *UNIVERSAL::isa; |
|
4853
|
|
|
|
|
|
|
|
|
4854
|
|
|
|
|
|
|
sub new |
|
4855
|
|
|
|
|
|
|
{ my( $class, $name, $val, $sysid, $pubid, $ndata, $param)= @_; |
|
4856
|
|
|
|
|
|
|
$class= ref( $class) || $class; |
|
4857
|
|
|
|
|
|
|
|
|
4858
|
|
|
|
|
|
|
my $self={}; |
|
4859
|
|
|
|
|
|
|
|
|
4860
|
|
|
|
|
|
|
$self->{name} = $name; |
|
4861
|
|
|
|
|
|
|
$self->{val} = $val if( defined $val ); |
|
4862
|
|
|
|
|
|
|
$self->{sysid} = $sysid if( defined $sysid); |
|
4863
|
|
|
|
|
|
|
$self->{pubid} = $pubid if( defined $pubid); |
|
4864
|
|
|
|
|
|
|
$self->{ndata} = $ndata if( defined $ndata); |
|
4865
|
|
|
|
|
|
|
$self->{param} = $param if( defined $param); |
|
4866
|
|
|
|
|
|
|
|
|
4867
|
|
|
|
|
|
|
bless $self, $class; |
|
4868
|
|
|
|
|
|
|
return $self; |
|
4869
|
|
|
|
|
|
|
} |
|
4870
|
|
|
|
|
|
|
|
|
4871
|
|
|
|
|
|
|
|
|
4872
|
|
|
|
|
|
|
sub name { return $_[0]->{name}; } |
|
4873
|
|
|
|
|
|
|
sub val { return $_[0]->{val}; } |
|
4874
|
|
|
|
|
|
|
sub sysid { return defined( $_[0]->{sysid}) ? $_[0]->{sysid} : ''; } |
|
4875
|
|
|
|
|
|
|
sub pubid { return defined( $_[0]->{pubid}) ? $_[0]->{pubid} : ''; } |
|
4876
|
|
|
|
|
|
|
sub ndata { return defined( $_[0]->{ndata}) ? $_[0]->{ndata} : ''; } |
|
4877
|
|
|
|
|
|
|
sub param { return defined( $_[0]->{param}) ? $_[0]->{param} : ''; } |
|
4878
|
|
|
|
|
|
|
|
|
4879
|
|
|
|
|
|
|
|
|
4880
|
|
|
|
|
|
|
sub print |
|
4881
|
|
|
|
|
|
|
{ my ($ent, $fh)= @_; |
|
4882
|
|
|
|
|
|
|
my $text= $ent->text; |
|
4883
|
|
|
|
|
|
|
if( $fh) { print $fh $text . "\n"; } |
|
4884
|
|
|
|
|
|
|
else { print $text . "\n"; } |
|
4885
|
|
|
|
|
|
|
} |
|
4886
|
|
|
|
|
|
|
|
|
4887
|
|
|
|
|
|
|
sub sprint |
|
4888
|
|
|
|
|
|
|
{ my ($ent)= @_; |
|
4889
|
|
|
|
|
|
|
return $ent->text; |
|
4890
|
|
|
|
|
|
|
} |
|
4891
|
|
|
|
|
|
|
|
|
4892
|
|
|
|
|
|
|
sub text |
|
4893
|
|
|
|
|
|
|
{ my ($ent)= @_; |
|
4894
|
|
|
|
|
|
|
#warn "text called: '", $ent->_dump, "'\n"; |
|
4895
|
|
|
|
|
|
|
return '' if( !$ent->{name}); |
|
4896
|
|
|
|
|
|
|
my @tokens; |
|
4897
|
|
|
|
|
|
|
push @tokens, '
|
|
4898
|
|
|
|
|
|
|
|
|
4899
|
|
|
|
|
|
|
push @tokens, '%' if( $ent->{param}); |
|
4900
|
|
|
|
|
|
|
push @tokens, $ent->{name}; |
|
4901
|
|
|
|
|
|
|
|
|
4902
|
|
|
|
|
|
|
if( defined $ent->{val} && !defined( $ent->{sysid}) && !defined($ent->{pubid}) ) |
|
4903
|
|
|
|
|
|
|
{ push @tokens, _quoted_val( $ent->{val}); |
|
4904
|
|
|
|
|
|
|
} |
|
4905
|
|
|
|
|
|
|
elsif( defined $ent->{sysid}) |
|
4906
|
|
|
|
|
|
|
{ push @tokens, 'PUBLIC', _quoted_val( $ent->{pubid}) if( $ent->{pubid}); |
|
4907
|
|
|
|
|
|
|
push @tokens, 'SYSTEM' unless( $ent->{pubid}); |
|
4908
|
|
|
|
|
|
|
push @tokens, _quoted_val( $ent->{sysid}); |
|
4909
|
|
|
|
|
|
|
push @tokens, 'NDATA', $ent->{ndata} if( $ent->{ndata}); |
|
4910
|
|
|
|
|
|
|
} |
|
4911
|
|
|
|
|
|
|
return join( ' ', @tokens) . '>'; |
|
4912
|
|
|
|
|
|
|
} |
|
4913
|
|
|
|
|
|
|
|
|
4914
|
|
|
|
|
|
|
sub _quoted_val |
|
4915
|
|
|
|
|
|
|
{ my $q= $_[0]=~ m{"} ? q{'} : q{"}; |
|
4916
|
|
|
|
|
|
|
return qq{$q$_[0]$q}; |
|
4917
|
|
|
|
|
|
|
} |
|
4918
|
|
|
|
|
|
|
|
|
4919
|
|
|
|
|
|
|
sub _dump |
|
4920
|
|
|
|
|
|
|
{ my( $ent)= @_; return join( " - ", map { "$_ => '$ent->{$_}'" } grep { defined $ent->{$_} } sort keys %$ent); } |
|
4921
|
|
|
|
|
|
|
|
|
4922
|
|
|
|
|
|
|
1; |
|
4923
|
|
|
|
|
|
|
|
|
4924
|
|
|
|
|
|
|
###################################################################### |
|
4925
|
|
|
|
|
|
|
package XML::Twig::Notation_list; |
|
4926
|
|
|
|
|
|
|
###################################################################### |
|
4927
|
|
|
|
|
|
|
|
|
4928
|
|
|
|
|
|
|
*isa= *UNIVERSAL::isa; |
|
4929
|
|
|
|
|
|
|
|
|
4930
|
|
|
|
|
|
|
sub new |
|
4931
|
|
|
|
|
|
|
{ my $class = shift; |
|
4932
|
|
|
|
|
|
|
my $self={ notations => {}, updated => 0}; |
|
4933
|
|
|
|
|
|
|
|
|
4934
|
|
|
|
|
|
|
bless $self, $class; |
|
4935
|
|
|
|
|
|
|
return $self; |
|
4936
|
|
|
|
|
|
|
|
|
4937
|
|
|
|
|
|
|
} |
|
4938
|
|
|
|
|
|
|
|
|
4939
|
|
|
|
|
|
|
sub add_new_notation |
|
4940
|
|
|
|
|
|
|
{ my $notation_list= shift; |
|
4941
|
|
|
|
|
|
|
my $notation= XML::Twig::Notation->new( @_); |
|
4942
|
|
|
|
|
|
|
$notation_list->add( $notation); |
|
4943
|
|
|
|
|
|
|
return $notation_list; |
|
4944
|
|
|
|
|
|
|
} |
|
4945
|
|
|
|
|
|
|
|
|
4946
|
|
|
|
|
|
|
sub _add_list |
|
4947
|
|
|
|
|
|
|
{ my( $notation_list, $to_add)= @_; |
|
4948
|
|
|
|
|
|
|
my $notations_to_add= $to_add->{notations}; |
|
4949
|
|
|
|
|
|
|
return $notation_list unless( $notations_to_add && %$notations_to_add); |
|
4950
|
|
|
|
|
|
|
@{$notation_list->{notations}}{keys %$notations_to_add}= values %$notations_to_add; |
|
4951
|
|
|
|
|
|
|
$notation_list->{updated}=1; |
|
4952
|
|
|
|
|
|
|
return $notation_list; |
|
4953
|
|
|
|
|
|
|
} |
|
4954
|
|
|
|
|
|
|
|
|
4955
|
|
|
|
|
|
|
sub add |
|
4956
|
|
|
|
|
|
|
{ my( $notation_list, $notation)= @_; |
|
4957
|
|
|
|
|
|
|
$notation_list->{notations}->{$notation->{name}}= $notation; |
|
4958
|
|
|
|
|
|
|
$notation_list->{updated}=1; |
|
4959
|
|
|
|
|
|
|
return $notation_list; |
|
4960
|
|
|
|
|
|
|
} |
|
4961
|
|
|
|
|
|
|
|
|
4962
|
|
|
|
|
|
|
sub notation |
|
4963
|
|
|
|
|
|
|
{ my( $notation_list, $notation_name)= @_; |
|
4964
|
|
|
|
|
|
|
return $notation_list->{notations}->{$notation_name}; |
|
4965
|
|
|
|
|
|
|
} |
|
4966
|
|
|
|
|
|
|
|
|
4967
|
|
|
|
|
|
|
# can be called with an notation or with an notation name |
|
4968
|
|
|
|
|
|
|
sub delete |
|
4969
|
|
|
|
|
|
|
{ my $notation_list= shift; |
|
4970
|
|
|
|
|
|
|
if( isa( ref $_[0], 'XML::Twig::Notation')) |
|
4971
|
|
|
|
|
|
|
{ # the second arg is an notation |
|
4972
|
|
|
|
|
|
|
my $notation= shift; |
|
4973
|
|
|
|
|
|
|
delete $notation_list->{notations}->{$notation->{name}}; |
|
4974
|
|
|
|
|
|
|
} |
|
4975
|
|
|
|
|
|
|
else |
|
4976
|
|
|
|
|
|
|
{ # the second arg was not notation, must be a string then |
|
4977
|
|
|
|
|
|
|
my $name= shift; |
|
4978
|
|
|
|
|
|
|
delete $notation_list->{notations}->{$name}; |
|
4979
|
|
|
|
|
|
|
} |
|
4980
|
|
|
|
|
|
|
$notation_list->{updated}=1; |
|
4981
|
|
|
|
|
|
|
return $notation_list; |
|
4982
|
|
|
|
|
|
|
} |
|
4983
|
|
|
|
|
|
|
|
|
4984
|
|
|
|
|
|
|
sub print |
|
4985
|
|
|
|
|
|
|
{ my ($notation_list, $fh)= @_; |
|
4986
|
|
|
|
|
|
|
my $old_select= defined $fh ? select $fh : undef; |
|
4987
|
|
|
|
|
|
|
|
|
4988
|
|
|
|
|
|
|
foreach my $notation_name ( sort keys %{$notation_list->{notations}}) |
|
4989
|
|
|
|
|
|
|
{ my $notation= $notation_list->{notations}->{$notation_name}; |
|
4990
|
|
|
|
|
|
|
# we have to test what the notation is or un-defined notations can creep in |
|
4991
|
|
|
|
|
|
|
if( isa( $notation, 'XML::Twig::Notation')) { $notation->print(); } |
|
4992
|
|
|
|
|
|
|
} |
|
4993
|
|
|
|
|
|
|
select $old_select if( defined $old_select); |
|
4994
|
|
|
|
|
|
|
return $notation_list; |
|
4995
|
|
|
|
|
|
|
} |
|
4996
|
|
|
|
|
|
|
|
|
4997
|
|
|
|
|
|
|
sub text |
|
4998
|
|
|
|
|
|
|
{ my ($notation_list)= @_; |
|
4999
|
|
|
|
|
|
|
return join "\n", map { $notation_list->{notations}->{$_}->text} sort keys %{$notation_list->{notations}}; |
|
5000
|
|
|
|
|
|
|
} |
|
5001
|
|
|
|
|
|
|
|
|
5002
|
|
|
|
|
|
|
# return the list of notation names |
|
5003
|
|
|
|
|
|
|
sub notation_names |
|
5004
|
|
|
|
|
|
|
{ my $notation_list= shift; |
|
5005
|
|
|
|
|
|
|
return (sort keys %{$notation_list->{notations}}) ; |
|
5006
|
|
|
|
|
|
|
} |
|
5007
|
|
|
|
|
|
|
|
|
5008
|
|
|
|
|
|
|
|
|
5009
|
|
|
|
|
|
|
sub list |
|
5010
|
|
|
|
|
|
|
{ my ($notation_list)= @_; |
|
5011
|
|
|
|
|
|
|
return map { $notation_list->{notations}->{$_} } sort keys %{$notation_list->{notations}}; |
|
5012
|
|
|
|
|
|
|
} |
|
5013
|
|
|
|
|
|
|
|
|
5014
|
|
|
|
|
|
|
1; |
|
5015
|
|
|
|
|
|
|
|
|
5016
|
|
|
|
|
|
|
###################################################################### |
|
5017
|
|
|
|
|
|
|
package XML::Twig::Notation; |
|
5018
|
|
|
|
|
|
|
###################################################################### |
|
5019
|
|
|
|
|
|
|
|
|
5020
|
|
|
|
|
|
|
#*isa= *UNIVERSAL::isa; |
|
5021
|
|
|
|
|
|
|
|
|
5022
|
|
|
|
|
|
|
BEGIN |
|
5023
|
|
|
|
|
|
|
{ *sprint= *text; |
|
5024
|
|
|
|
|
|
|
} |
|
5025
|
|
|
|
|
|
|
|
|
5026
|
|
|
|
|
|
|
sub new |
|
5027
|
|
|
|
|
|
|
{ my( $class, $name, $base, $sysid, $pubid)= @_; |
|
5028
|
|
|
|
|
|
|
$class= ref( $class) || $class; |
|
5029
|
|
|
|
|
|
|
|
|
5030
|
|
|
|
|
|
|
my $self={}; |
|
5031
|
|
|
|
|
|
|
|
|
5032
|
|
|
|
|
|
|
$self->{name} = $name; |
|
5033
|
|
|
|
|
|
|
$self->{base} = $base if( defined $base ); |
|
5034
|
|
|
|
|
|
|
$self->{sysid} = $sysid if( defined $sysid); |
|
5035
|
|
|
|
|
|
|
$self->{pubid} = $pubid if( defined $pubid); |
|
5036
|
|
|
|
|
|
|
|
|
5037
|
|
|
|
|
|
|
bless $self, $class; |
|
5038
|
|
|
|
|
|
|
return $self; |
|
5039
|
|
|
|
|
|
|
} |
|
5040
|
|
|
|
|
|
|
|
|
5041
|
|
|
|
|
|
|
|
|
5042
|
|
|
|
|
|
|
sub name { return $_[0]->{name}; } |
|
5043
|
|
|
|
|
|
|
sub base { return $_[0]->{base}; } |
|
5044
|
|
|
|
|
|
|
sub sysid { return $_[0]->{sysid}; } |
|
5045
|
|
|
|
|
|
|
sub pubid { return $_[0]->{pubid}; } |
|
5046
|
|
|
|
|
|
|
|
|
5047
|
|
|
|
|
|
|
|
|
5048
|
|
|
|
|
|
|
sub print |
|
5049
|
|
|
|
|
|
|
{ my ($notation, $fh)= @_; |
|
5050
|
|
|
|
|
|
|
my $text= $notation->text; |
|
5051
|
|
|
|
|
|
|
if( $fh) { print $fh $text . "\n"; } |
|
5052
|
|
|
|
|
|
|
else { print $text . "\n"; } |
|
5053
|
|
|
|
|
|
|
} |
|
5054
|
|
|
|
|
|
|
|
|
5055
|
|
|
|
|
|
|
sub text |
|
5056
|
|
|
|
|
|
|
{ my ($notation)= @_; |
|
5057
|
|
|
|
|
|
|
return '' if( !$notation->{name}); |
|
5058
|
|
|
|
|
|
|
my @tokens; |
|
5059
|
|
|
|
|
|
|
push @tokens, '
|
|
5060
|
|
|
|
|
|
|
push @tokens, $notation->{name}; |
|
5061
|
|
|
|
|
|
|
push @tokens, ( 'PUBLIC', _quoted_val( $notation->{pubid} ) ) if $notation->{pubid}; |
|
5062
|
|
|
|
|
|
|
push @tokens, ( 'SYSTEM') if ! $notation->{pubid} && $notation->{sysid}; |
|
5063
|
|
|
|
|
|
|
push @tokens, (_quoted_val( $notation->{sysid}) ) if $notation->{sysid}; |
|
5064
|
|
|
|
|
|
|
|
|
5065
|
|
|
|
|
|
|
return join( ' ', @tokens) . '>'; |
|
5066
|
|
|
|
|
|
|
} |
|
5067
|
|
|
|
|
|
|
|
|
5068
|
|
|
|
|
|
|
sub _quoted_val |
|
5069
|
|
|
|
|
|
|
{ my $q= $_[0]=~ m{"} ? q{'} : q{"}; |
|
5070
|
|
|
|
|
|
|
return qq{$q$_[0]$q}; |
|
5071
|
|
|
|
|
|
|
} |
|
5072
|
|
|
|
|
|
|
|
|
5073
|
|
|
|
|
|
|
sub _dump |
|
5074
|
|
|
|
|
|
|
{ my( $notation)= @_; return join( " - ", map { "$_ => '$notation->{$_}'" } grep { defined $notation->{$_} } sort keys %$notation); } |
|
5075
|
|
|
|
|
|
|
|
|
5076
|
|
|
|
|
|
|
1; |
|
5077
|
|
|
|
|
|
|
|
|
5078
|
|
|
|
|
|
|
###################################################################### |
|
5079
|
|
|
|
|
|
|
package XML::Twig::Elt; |
|
5080
|
|
|
|
|
|
|
###################################################################### |
|
5081
|
|
|
|
|
|
|
|
|
5082
|
|
|
|
|
|
|
use Carp; |
|
5083
|
|
|
|
|
|
|
*isa= *UNIVERSAL::isa; |
|
5084
|
|
|
|
|
|
|
|
|
5085
|
|
|
|
|
|
|
my $CDATA_START = "
|
|
5086
|
|
|
|
|
|
|
my $CDATA_END = "]]>"; |
|
5087
|
|
|
|
|
|
|
my $PI_START = ""; |
|
5088
|
|
|
|
|
|
|
my $PI_END = "?>"; |
|
5089
|
|
|
|
|
|
|
my $COMMENT_START = ""; |
|
5091
|
|
|
|
|
|
|
|
|
5092
|
|
|
|
|
|
|
my $XMLNS_URI = 'http://www.w3.org/2000/xmlns/'; |
|
5093
|
|
|
|
|
|
|
|
|
5094
|
|
|
|
|
|
|
|
|
5095
|
|
|
|
|
|
|
BEGIN |
|
5096
|
|
|
|
|
|
|
{ # set some aliases for methods |
|
5097
|
|
|
|
|
|
|
*tag = *gi; |
|
5098
|
|
|
|
|
|
|
*name = *gi; |
|
5099
|
|
|
|
|
|
|
*set_tag = *set_gi; |
|
5100
|
|
|
|
|
|
|
*set_name = *set_gi; |
|
5101
|
|
|
|
|
|
|
*find_nodes = *get_xpath; # as in XML::DOM |
|
5102
|
|
|
|
|
|
|
*findnodes = *get_xpath; # as in XML::LibXML |
|
5103
|
|
|
|
|
|
|
*field = *first_child_text; |
|
5104
|
|
|
|
|
|
|
*trimmed_field = *first_child_trimmed_text; |
|
5105
|
|
|
|
|
|
|
*is_field = *contains_only_text; |
|
5106
|
|
|
|
|
|
|
*is = *passes; |
|
5107
|
|
|
|
|
|
|
*matches = *passes; |
|
5108
|
|
|
|
|
|
|
*has_child = *first_child; |
|
5109
|
|
|
|
|
|
|
*has_children = *first_child; |
|
5110
|
|
|
|
|
|
|
*all_children_pass = *all_children_are; |
|
5111
|
|
|
|
|
|
|
*all_children_match= *all_children_are; |
|
5112
|
|
|
|
|
|
|
*getElementsByTagName= *descendants; |
|
5113
|
|
|
|
|
|
|
*find_by_tag_name= *descendants_or_self; |
|
5114
|
|
|
|
|
|
|
*unwrap = *erase; |
|
5115
|
|
|
|
|
|
|
*inner_xml = *xml_string; |
|
5116
|
|
|
|
|
|
|
*outer_xml = *sprint; |
|
5117
|
|
|
|
|
|
|
*add_class = *add_to_class; |
|
5118
|
|
|
|
|
|
|
|
|
5119
|
|
|
|
|
|
|
*first_child_is = *first_child_matches; |
|
5120
|
|
|
|
|
|
|
*last_child_is = *last_child_matches; |
|
5121
|
|
|
|
|
|
|
*next_sibling_is = *next_sibling_matches; |
|
5122
|
|
|
|
|
|
|
*prev_sibling_is = *prev_sibling_matches; |
|
5123
|
|
|
|
|
|
|
*next_elt_is = *next_elt_matches; |
|
5124
|
|
|
|
|
|
|
*prev_elt_is = *prev_elt_matches; |
|
5125
|
|
|
|
|
|
|
*parent_is = *parent_matches; |
|
5126
|
|
|
|
|
|
|
*child_is = *child_matches; |
|
5127
|
|
|
|
|
|
|
*inherited_att = *inherit_att; |
|
5128
|
|
|
|
|
|
|
|
|
5129
|
|
|
|
|
|
|
*sort_children_by_value= *sort_children_on_value; |
|
5130
|
|
|
|
|
|
|
|
|
5131
|
|
|
|
|
|
|
*has_atts= *att_nb; |
|
5132
|
|
|
|
|
|
|
|
|
5133
|
|
|
|
|
|
|
# imports from XML::Twig |
|
5134
|
|
|
|
|
|
|
*_is_fh= *XML::Twig::_is_fh; |
|
5135
|
|
|
|
|
|
|
|
|
5136
|
|
|
|
|
|
|
# XML::XPath compatibility |
|
5137
|
|
|
|
|
|
|
*string_value = *text; |
|
5138
|
|
|
|
|
|
|
*toString = *sprint; |
|
5139
|
|
|
|
|
|
|
*getName = *gi; |
|
5140
|
|
|
|
|
|
|
*getRootNode = *twig; |
|
5141
|
|
|
|
|
|
|
*getNextSibling = *_next_sibling; |
|
5142
|
|
|
|
|
|
|
*getPreviousSibling = *_prev_sibling; |
|
5143
|
|
|
|
|
|
|
*isElementNode = *is_elt; |
|
5144
|
|
|
|
|
|
|
*isTextNode = *is_text; |
|
5145
|
|
|
|
|
|
|
*isPI = *is_pi; |
|
5146
|
|
|
|
|
|
|
*isPINode = *is_pi; |
|
5147
|
|
|
|
|
|
|
*isProcessingInstructionNode= *is_pi; |
|
5148
|
|
|
|
|
|
|
*isComment = *is_comment; |
|
5149
|
|
|
|
|
|
|
*isCommentNode = *is_comment; |
|
5150
|
|
|
|
|
|
|
*getTarget = *target; |
|
5151
|
|
|
|
|
|
|
*getFirstChild = *_first_child; |
|
5152
|
|
|
|
|
|
|
*getLastChild = *_last_child; |
|
5153
|
|
|
|
|
|
|
|
|
5154
|
|
|
|
|
|
|
# try using weak references |
|
5155
|
|
|
|
|
|
|
# test whether we can use weak references |
|
5156
|
|
|
|
|
|
|
{ local $SIG{__DIE__}; |
|
5157
|
|
|
|
|
|
|
if( eval 'require Scalar::Util' && defined( &Scalar::Util::weaken) ) |
|
5158
|
|
|
|
|
|
|
{ import Scalar::Util qw(weaken); } |
|
5159
|
|
|
|
|
|
|
elsif( eval 'require WeakRef') |
|
5160
|
|
|
|
|
|
|
{ import WeakRef; } |
|
5161
|
|
|
|
|
|
|
} |
|
5162
|
|
|
|
|
|
|
} |
|
5163
|
|
|
|
|
|
|
|
|
5164
|
|
|
|
|
|
|
|
|
5165
|
|
|
|
|
|
|
# can be called as XML::Twig::Elt->new( [[$gi, $atts, [@content]]) |
|
5166
|
|
|
|
|
|
|
# - gi is an optional gi given to the element |
|
5167
|
|
|
|
|
|
|
# - $atts is a hashref to attributes for the element |
|
5168
|
|
|
|
|
|
|
# - @content is an optional list of text and elements that will |
|
5169
|
|
|
|
|
|
|
# be inserted under the element |
|
5170
|
|
|
|
|
|
|
sub new |
|
5171
|
|
|
|
|
|
|
{ my $class= shift; |
|
5172
|
|
|
|
|
|
|
$class= ref $class || $class; |
|
5173
|
|
|
|
|
|
|
my $elt = {}; |
|
5174
|
|
|
|
|
|
|
bless ($elt, $class); |
|
5175
|
|
|
|
|
|
|
|
|
5176
|
|
|
|
|
|
|
return $elt unless @_; |
|
5177
|
|
|
|
|
|
|
|
|
5178
|
|
|
|
|
|
|
if( @_ == 1 && $_[0]=~ m{^\s*<}) { return $class->parse( @_); } |
|
5179
|
|
|
|
|
|
|
|
|
5180
|
|
|
|
|
|
|
# if a gi is passed then use it |
|
5181
|
|
|
|
|
|
|
my $gi= shift; |
|
5182
|
|
|
|
|
|
|
$elt->{gi}=$XML::Twig::gi2index{$gi} or $elt->set_gi( $gi); |
|
5183
|
|
|
|
|
|
|
|
|
5184
|
|
|
|
|
|
|
|
|
5185
|
|
|
|
|
|
|
my $atts= ref $_[0] eq 'HASH' ? shift : undef; |
|
5186
|
|
|
|
|
|
|
|
|
5187
|
|
|
|
|
|
|
if( $atts && defined $atts->{$CDATA}) |
|
5188
|
|
|
|
|
|
|
{ delete $atts->{$CDATA}; |
|
5189
|
|
|
|
|
|
|
|
|
5190
|
|
|
|
|
|
|
my $cdata= $class->new( $CDATA => @_); |
|
5191
|
|
|
|
|
|
|
return $class->new( $gi, $atts, $cdata); |
|
5192
|
|
|
|
|
|
|
} |
|
5193
|
|
|
|
|
|
|
|
|
5194
|
|
|
|
|
|
|
if( $gi eq $PCDATA) |
|
5195
|
|
|
|
|
|
|
{ if( grep { ref $_ } @_) { croak "element $PCDATA can only be created from text"; } |
|
5196
|
|
|
|
|
|
|
$elt->{pcdata}= join '', @_; |
|
5197
|
|
|
|
|
|
|
} |
|
5198
|
|
|
|
|
|
|
elsif( $gi eq $ENT) |
|
5199
|
|
|
|
|
|
|
{ $elt->{ent}= shift; } |
|
5200
|
|
|
|
|
|
|
elsif( $gi eq $CDATA) |
|
5201
|
|
|
|
|
|
|
{ if( grep { ref $_ } @_) { croak "element $CDATA can only be created from text"; } |
|
5202
|
|
|
|
|
|
|
$elt->{cdata}= join '', @_; |
|
5203
|
|
|
|
|
|
|
} |
|
5204
|
|
|
|
|
|
|
elsif( $gi eq $COMMENT) |
|
5205
|
|
|
|
|
|
|
{ if( grep { ref $_ } @_) { croak "element $COMMENT can only be created from text"; } |
|
5206
|
|
|
|
|
|
|
$elt->{comment}= join '', @_; |
|
5207
|
|
|
|
|
|
|
} |
|
5208
|
|
|
|
|
|
|
elsif( $gi eq $PI) |
|
5209
|
|
|
|
|
|
|
{ if( grep { ref $_ } @_) { croak "element $PI can only be created from text"; } |
|
5210
|
|
|
|
|
|
|
$elt->_set_pi( shift, join '', @_); |
|
5211
|
|
|
|
|
|
|
} |
|
5212
|
|
|
|
|
|
|
else |
|
5213
|
|
|
|
|
|
|
{ # the rest of the arguments are the content of the element |
|
5214
|
|
|
|
|
|
|
if( @_) |
|
5215
|
|
|
|
|
|
|
{ $elt->set_content( @_); } |
|
5216
|
|
|
|
|
|
|
else |
|
5217
|
|
|
|
|
|
|
{ $elt->{empty}= 1; } |
|
5218
|
|
|
|
|
|
|
} |
|
5219
|
|
|
|
|
|
|
|
|
5220
|
|
|
|
|
|
|
if( $atts) |
|
5221
|
|
|
|
|
|
|
{ # the attribute hash can be used to pass the asis status |
|
5222
|
|
|
|
|
|
|
if( defined $atts->{$ASIS}) { $elt->set_asis( $atts->{$ASIS} ); delete $atts->{$ASIS}; } |
|
5223
|
|
|
|
|
|
|
if( defined $atts->{$EMPTY}) { $elt->{empty}= $atts->{$EMPTY}; delete $atts->{$EMPTY}; } |
|
5224
|
|
|
|
|
|
|
if( keys %$atts) { $elt->set_atts( $atts); } |
|
5225
|
|
|
|
|
|
|
$elt->_set_id( $atts->{$ID}) if( $atts->{$ID}); |
|
5226
|
|
|
|
|
|
|
} |
|
5227
|
|
|
|
|
|
|
|
|
5228
|
|
|
|
|
|
|
return $elt; |
|
5229
|
|
|
|
|
|
|
} |
|
5230
|
|
|
|
|
|
|
|
|
5231
|
|
|
|
|
|
|
# optimized version of $elt->new( PCDATA, $text); |
|
5232
|
|
|
|
|
|
|
sub _new_pcdata |
|
5233
|
|
|
|
|
|
|
{ my $class= $_[0]; |
|
5234
|
|
|
|
|
|
|
$class= ref $class || $class; |
|
5235
|
|
|
|
|
|
|
my $elt = {}; |
|
5236
|
|
|
|
|
|
|
bless $elt, $class; |
|
5237
|
|
|
|
|
|
|
$elt->{gi}=$XML::Twig::gi2index{$PCDATA} or $elt->set_gi( $PCDATA); |
|
5238
|
|
|
|
|
|
|
$elt->{pcdata}= $_[1]; |
|
5239
|
|
|
|
|
|
|
return $elt; |
|
5240
|
|
|
|
|
|
|
} |
|
5241
|
|
|
|
|
|
|
|
|
5242
|
|
|
|
|
|
|
# this function creates an XM:::Twig::Elt from a string |
|
5243
|
|
|
|
|
|
|
# it is quite clumsy at the moment, as it just creates a |
|
5244
|
|
|
|
|
|
|
# new twig then returns its root |
|
5245
|
|
|
|
|
|
|
# there might also be memory leaks there |
|
5246
|
|
|
|
|
|
|
# additional arguments are passed to new XML::Twig |
|
5247
|
|
|
|
|
|
|
sub parse |
|
5248
|
|
|
|
|
|
|
{ my $class= shift; |
|
5249
|
|
|
|
|
|
|
if( ref( $class)) { $class= ref( $class); } |
|
5250
|
|
|
|
|
|
|
my $string= shift; |
|
5251
|
|
|
|
|
|
|
my %args= @_; |
|
5252
|
|
|
|
|
|
|
my $t= XML::Twig->new(%args); |
|
5253
|
|
|
|
|
|
|
$t->parse( $string); |
|
5254
|
|
|
|
|
|
|
my $elt= $t->root; |
|
5255
|
|
|
|
|
|
|
# clean-up the node |
|
5256
|
|
|
|
|
|
|
delete $elt->{twig}; # get rid of the twig data |
|
5257
|
|
|
|
|
|
|
delete $elt->{twig_current}; # better get rid of this too |
|
5258
|
|
|
|
|
|
|
if( $t->{twig_id_list}) { $elt->{twig_id_list}= $t->{twig_id_list}; } |
|
5259
|
|
|
|
|
|
|
$elt->cut; |
|
5260
|
|
|
|
|
|
|
undef $t->{twig_root}; |
|
5261
|
|
|
|
|
|
|
return $elt; |
|
5262
|
|
|
|
|
|
|
} |
|
5263
|
|
|
|
|
|
|
|
|
5264
|
|
|
|
|
|
|
sub set_inner_xml |
|
5265
|
|
|
|
|
|
|
{ my( $elt, $xml, @args)= @_; |
|
5266
|
|
|
|
|
|
|
my $new_elt= $elt->parse( "$xml", @args); |
|
5267
|
|
|
|
|
|
|
$elt->cut_children; |
|
5268
|
|
|
|
|
|
|
$new_elt->paste_first_child( $elt); |
|
5269
|
|
|
|
|
|
|
$new_elt->erase; |
|
5270
|
|
|
|
|
|
|
return $elt; |
|
5271
|
|
|
|
|
|
|
} |
|
5272
|
|
|
|
|
|
|
|
|
5273
|
|
|
|
|
|
|
sub set_outer_xml |
|
5274
|
|
|
|
|
|
|
{ my( $elt, $xml, @args)= @_; |
|
5275
|
|
|
|
|
|
|
my $new_elt= $elt->parse( "$xml", @args); |
|
5276
|
|
|
|
|
|
|
$elt->cut_children; |
|
5277
|
|
|
|
|
|
|
$new_elt->replace( $elt); |
|
5278
|
|
|
|
|
|
|
$new_elt->erase; |
|
5279
|
|
|
|
|
|
|
return $new_elt; |
|
5280
|
|
|
|
|
|
|
} |
|
5281
|
|
|
|
|
|
|
|
|
5282
|
|
|
|
|
|
|
|
|
5283
|
|
|
|
|
|
|
sub set_inner_html |
|
5284
|
|
|
|
|
|
|
{ my( $elt, $html)= @_; |
|
5285
|
|
|
|
|
|
|
my $t= XML::Twig->new->parse_html( "$html"); |
|
5286
|
|
|
|
|
|
|
my $new_elt= $t->root; |
|
5287
|
|
|
|
|
|
|
if( $elt->tag eq 'head') |
|
5288
|
|
|
|
|
|
|
{ $new_elt->first_child( 'head')->unwrap; |
|
5289
|
|
|
|
|
|
|
$new_elt->first_child( 'body')->cut; |
|
5290
|
|
|
|
|
|
|
} |
|
5291
|
|
|
|
|
|
|
elsif( $elt->tag ne 'html') |
|
5292
|
|
|
|
|
|
|
{ $new_elt->first_child( 'head')->cut; |
|
5293
|
|
|
|
|
|
|
$new_elt->first_child( 'body')->unwrap; |
|
5294
|
|
|
|
|
|
|
} |
|
5295
|
|
|
|
|
|
|
$new_elt->cut; |
|
5296
|
|
|
|
|
|
|
$elt->cut_children; |
|
5297
|
|
|
|
|
|
|
$new_elt->paste_first_child( $elt); |
|
5298
|
|
|
|
|
|
|
$new_elt->erase; |
|
5299
|
|
|
|
|
|
|
return $elt; |
|
5300
|
|
|
|
|
|
|
} |
|
5301
|
|
|
|
|
|
|
|
|
5302
|
|
|
|
|
|
|
sub set_gi |
|
5303
|
|
|
|
|
|
|
{ my ($elt, $gi)= @_; |
|
5304
|
|
|
|
|
|
|
unless( defined $XML::Twig::gi2index{$gi}) |
|
5305
|
|
|
|
|
|
|
{ # new gi, create entries in %gi2index and @index2gi |
|
5306
|
|
|
|
|
|
|
push @XML::Twig::index2gi, $gi; |
|
5307
|
|
|
|
|
|
|
$XML::Twig::gi2index{$gi}= $#XML::Twig::index2gi; |
|
5308
|
|
|
|
|
|
|
} |
|
5309
|
|
|
|
|
|
|
$elt->{gi}= $XML::Twig::gi2index{$gi}; |
|
5310
|
|
|
|
|
|
|
return $elt; |
|
5311
|
|
|
|
|
|
|
} |
|
5312
|
|
|
|
|
|
|
|
|
5313
|
|
|
|
|
|
|
sub gi { return $XML::Twig::index2gi[$_[0]->{gi}]; } |
|
5314
|
|
|
|
|
|
|
|
|
5315
|
|
|
|
|
|
|
sub local_name |
|
5316
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5317
|
|
|
|
|
|
|
return _local_name( $XML::Twig::index2gi[$elt->{'gi'}]); |
|
5318
|
|
|
|
|
|
|
} |
|
5319
|
|
|
|
|
|
|
|
|
5320
|
|
|
|
|
|
|
sub ns_prefix |
|
5321
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5322
|
|
|
|
|
|
|
return _ns_prefix( $XML::Twig::index2gi[$elt->{'gi'}]); |
|
5323
|
|
|
|
|
|
|
} |
|
5324
|
|
|
|
|
|
|
|
|
5325
|
|
|
|
|
|
|
# namespace prefix for any qname (can be used for elements or attributes) |
|
5326
|
|
|
|
|
|
|
sub _ns_prefix |
|
5327
|
|
|
|
|
|
|
{ my $qname= shift; |
|
5328
|
|
|
|
|
|
|
if( $qname=~ m{^([^:]*):}) |
|
5329
|
|
|
|
|
|
|
{ return $1; } |
|
5330
|
|
|
|
|
|
|
else |
|
5331
|
|
|
|
|
|
|
{ return( ''); } # should it be '' ? |
|
5332
|
|
|
|
|
|
|
} |
|
5333
|
|
|
|
|
|
|
|
|
5334
|
|
|
|
|
|
|
# local name for any qname (can be used for elements or attributes) |
|
5335
|
|
|
|
|
|
|
sub _local_name |
|
5336
|
|
|
|
|
|
|
{ my $qname= shift; |
|
5337
|
|
|
|
|
|
|
(my $local= $qname)=~ s{^[^:]*:}{}; |
|
5338
|
|
|
|
|
|
|
return $local; |
|
5339
|
|
|
|
|
|
|
} |
|
5340
|
|
|
|
|
|
|
|
|
5341
|
|
|
|
|
|
|
#sub get_namespace |
|
5342
|
|
|
|
|
|
|
sub namespace ## no critic (Subroutines::ProhibitNestedSubs); |
|
5343
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5344
|
|
|
|
|
|
|
my $prefix= defined $_[0] ? shift() : $elt->ns_prefix; |
|
5345
|
|
|
|
|
|
|
my $ns_att= $prefix ? "xmlns:$prefix" : "xmlns"; |
|
5346
|
|
|
|
|
|
|
my $expanded= $DEFAULT_NS{$prefix} || $elt->_inherit_att_through_cut( $ns_att) || ''; |
|
5347
|
|
|
|
|
|
|
return $expanded; |
|
5348
|
|
|
|
|
|
|
} |
|
5349
|
|
|
|
|
|
|
|
|
5350
|
|
|
|
|
|
|
sub declare_missing_ns ## no critic (Subroutines::ProhibitNestedSubs); |
|
5351
|
|
|
|
|
|
|
{ my $root= shift; |
|
5352
|
|
|
|
|
|
|
my %missing_prefix; |
|
5353
|
|
|
|
|
|
|
my $map= $root->_current_ns_prefix_map; |
|
5354
|
|
|
|
|
|
|
|
|
5355
|
|
|
|
|
|
|
foreach my $prefix (keys %$map) |
|
5356
|
|
|
|
|
|
|
{ my $prefix_att= $prefix eq '#default' ? 'xmlns' : "xmlns:$prefix"; |
|
5357
|
|
|
|
|
|
|
if( ! $root->{'att'}->{$prefix_att}) |
|
5358
|
|
|
|
|
|
|
{ $root->set_att( $prefix_att => $map->{$prefix}); } |
|
5359
|
|
|
|
|
|
|
} |
|
5360
|
|
|
|
|
|
|
return $root; |
|
5361
|
|
|
|
|
|
|
} |
|
5362
|
|
|
|
|
|
|
|
|
5363
|
|
|
|
|
|
|
sub _current_ns_prefix_map |
|
5364
|
|
|
|
|
|
|
{ my( $elt)= shift; |
|
5365
|
|
|
|
|
|
|
my $map; |
|
5366
|
|
|
|
|
|
|
while( $elt) |
|
5367
|
|
|
|
|
|
|
{ foreach my $att ($elt->att_names) |
|
5368
|
|
|
|
|
|
|
{ my $prefix= $att eq 'xmlns' ? '#default' |
|
5369
|
|
|
|
|
|
|
: $att=~ m{^xmlns:(.*)$} ? $1 |
|
5370
|
|
|
|
|
|
|
: next |
|
5371
|
|
|
|
|
|
|
; |
|
5372
|
|
|
|
|
|
|
if( ! exists $map->{$prefix}) { $map->{$prefix}= $elt->{'att'}->{$att}; } |
|
5373
|
|
|
|
|
|
|
} |
|
5374
|
|
|
|
|
|
|
$elt= $elt->{parent} || ($elt->{former} && $elt->{former}->{parent}); |
|
5375
|
|
|
|
|
|
|
} |
|
5376
|
|
|
|
|
|
|
return $map; |
|
5377
|
|
|
|
|
|
|
} |
|
5378
|
|
|
|
|
|
|
|
|
5379
|
|
|
|
|
|
|
sub set_ns_decl |
|
5380
|
|
|
|
|
|
|
{ my( $elt, $uri, $prefix)= @_; |
|
5381
|
|
|
|
|
|
|
my $ns_att= $prefix ? "xmlns:$prefix" : 'xmlns'; |
|
5382
|
|
|
|
|
|
|
$elt->set_att( $ns_att => $uri); |
|
5383
|
|
|
|
|
|
|
return $elt; |
|
5384
|
|
|
|
|
|
|
} |
|
5385
|
|
|
|
|
|
|
|
|
5386
|
|
|
|
|
|
|
sub set_ns_as_default |
|
5387
|
|
|
|
|
|
|
{ my( $root, $uri)= @_; |
|
5388
|
|
|
|
|
|
|
my @ns_decl_to_remove; |
|
5389
|
|
|
|
|
|
|
foreach my $elt ($root->descendants_or_self) |
|
5390
|
|
|
|
|
|
|
{ if( $elt->_ns_prefix && $elt->namespace eq $uri) |
|
5391
|
|
|
|
|
|
|
{ $elt->set_tag( $elt->local_name); } |
|
5392
|
|
|
|
|
|
|
# store any namespace declaration for that uri |
|
5393
|
|
|
|
|
|
|
foreach my $ns_decl (grep { $_=~ m{xmlns(:|$)} && $elt->{'att'}->{$_} eq $uri } $elt->att_names) |
|
5394
|
|
|
|
|
|
|
{ push @ns_decl_to_remove, [$elt, $ns_decl]; } |
|
5395
|
|
|
|
|
|
|
} |
|
5396
|
|
|
|
|
|
|
$root->set_ns_decl( $uri); |
|
5397
|
|
|
|
|
|
|
# now remove the ns declarations (if done earlier then descendants of an element with the ns declaration |
|
5398
|
|
|
|
|
|
|
# are not considered being in the namespace |
|
5399
|
|
|
|
|
|
|
foreach my $ns_decl_to_remove ( @ns_decl_to_remove) |
|
5400
|
|
|
|
|
|
|
{ my( $elt, $ns_decl)= @$ns_decl_to_remove; |
|
5401
|
|
|
|
|
|
|
$elt->del_att( $ns_decl); |
|
5402
|
|
|
|
|
|
|
} |
|
5403
|
|
|
|
|
|
|
|
|
5404
|
|
|
|
|
|
|
return $root; |
|
5405
|
|
|
|
|
|
|
} |
|
5406
|
|
|
|
|
|
|
|
|
5407
|
|
|
|
|
|
|
|
|
5408
|
|
|
|
|
|
|
|
|
5409
|
|
|
|
|
|
|
# return #ELT for an element and #PCDATA... for others |
|
5410
|
|
|
|
|
|
|
sub get_type |
|
5411
|
|
|
|
|
|
|
{ my $gi_nb= $_[0]->{gi}; # the number, not the string |
|
5412
|
|
|
|
|
|
|
return $ELT if( $gi_nb >= $XML::Twig::SPECIAL_GI); |
|
5413
|
|
|
|
|
|
|
return $_[0]->gi; |
|
5414
|
|
|
|
|
|
|
} |
|
5415
|
|
|
|
|
|
|
|
|
5416
|
|
|
|
|
|
|
# return the gi if it's a "real" element, 0 otherwise |
|
5417
|
|
|
|
|
|
|
sub is_elt |
|
5418
|
|
|
|
|
|
|
{ if( $_[0]->{gi} >= $XML::Twig::SPECIAL_GI) |
|
5419
|
|
|
|
|
|
|
{ return $_[0]->gi; } |
|
5420
|
|
|
|
|
|
|
else |
|
5421
|
|
|
|
|
|
|
{ return 0; } |
|
5422
|
|
|
|
|
|
|
} |
|
5423
|
|
|
|
|
|
|
|
|
5424
|
|
|
|
|
|
|
|
|
5425
|
|
|
|
|
|
|
sub is_pcdata |
|
5426
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5427
|
|
|
|
|
|
|
return (exists $elt->{'pcdata'}); |
|
5428
|
|
|
|
|
|
|
} |
|
5429
|
|
|
|
|
|
|
|
|
5430
|
|
|
|
|
|
|
sub is_cdata |
|
5431
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5432
|
|
|
|
|
|
|
return (exists $elt->{'cdata'}); |
|
5433
|
|
|
|
|
|
|
} |
|
5434
|
|
|
|
|
|
|
|
|
5435
|
|
|
|
|
|
|
sub is_pi |
|
5436
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5437
|
|
|
|
|
|
|
return (exists $elt->{'target'}); |
|
5438
|
|
|
|
|
|
|
} |
|
5439
|
|
|
|
|
|
|
|
|
5440
|
|
|
|
|
|
|
sub is_comment |
|
5441
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5442
|
|
|
|
|
|
|
return (exists $elt->{'comment'}); |
|
5443
|
|
|
|
|
|
|
} |
|
5444
|
|
|
|
|
|
|
|
|
5445
|
|
|
|
|
|
|
sub is_ent |
|
5446
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5447
|
|
|
|
|
|
|
return (exists $elt->{ent} || $elt->{ent_name}); |
|
5448
|
|
|
|
|
|
|
} |
|
5449
|
|
|
|
|
|
|
|
|
5450
|
|
|
|
|
|
|
|
|
5451
|
|
|
|
|
|
|
sub is_text |
|
5452
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5453
|
|
|
|
|
|
|
return (exists( $elt->{'pcdata'}) || (exists $elt->{'cdata'})); |
|
5454
|
|
|
|
|
|
|
} |
|
5455
|
|
|
|
|
|
|
|
|
5456
|
|
|
|
|
|
|
sub is_empty |
|
5457
|
|
|
|
|
|
|
{ return $_[0]->{empty} || 0; } |
|
5458
|
|
|
|
|
|
|
|
|
5459
|
|
|
|
|
|
|
sub set_empty |
|
5460
|
|
|
|
|
|
|
{ $_[0]->{empty}= defined( $_[1]) ? $_[1] : 1; return $_[0]; } |
|
5461
|
|
|
|
|
|
|
|
|
5462
|
|
|
|
|
|
|
sub set_not_empty |
|
5463
|
|
|
|
|
|
|
{ delete $_[0]->{empty} if( $_[0]->{'empty'}); return $_[0]; } |
|
5464
|
|
|
|
|
|
|
|
|
5465
|
|
|
|
|
|
|
|
|
5466
|
|
|
|
|
|
|
sub set_asis |
|
5467
|
|
|
|
|
|
|
{ my $elt=shift; |
|
5468
|
|
|
|
|
|
|
|
|
5469
|
|
|
|
|
|
|
foreach my $descendant ($elt, $elt->_descendants ) |
|
5470
|
|
|
|
|
|
|
{ $descendant->{asis}= 1; |
|
5471
|
|
|
|
|
|
|
if( (exists $descendant->{'cdata'})) |
|
5472
|
|
|
|
|
|
|
{ $descendant->{gi}=$XML::Twig::gi2index{$PCDATA} or $descendant->set_gi( $PCDATA); |
|
5473
|
|
|
|
|
|
|
$descendant->{pcdata}= $descendant->{cdata}; |
|
5474
|
|
|
|
|
|
|
} |
|
5475
|
|
|
|
|
|
|
|
|
5476
|
|
|
|
|
|
|
} |
|
5477
|
|
|
|
|
|
|
return $elt; |
|
5478
|
|
|
|
|
|
|
} |
|
5479
|
|
|
|
|
|
|
|
|
5480
|
|
|
|
|
|
|
sub set_not_asis |
|
5481
|
|
|
|
|
|
|
{ my $elt=shift; |
|
5482
|
|
|
|
|
|
|
foreach my $descendant ($elt, $elt->descendants) |
|
5483
|
|
|
|
|
|
|
{ delete $descendant->{asis} if $descendant->{asis};} |
|
5484
|
|
|
|
|
|
|
return $elt; |
|
5485
|
|
|
|
|
|
|
} |
|
5486
|
|
|
|
|
|
|
|
|
5487
|
|
|
|
|
|
|
sub is_asis |
|
5488
|
|
|
|
|
|
|
{ return $_[0]->{asis}; } |
|
5489
|
|
|
|
|
|
|
|
|
5490
|
|
|
|
|
|
|
sub closed |
|
5491
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5492
|
|
|
|
|
|
|
my $t= $elt->twig || return; |
|
5493
|
|
|
|
|
|
|
my $curr_elt= $t->{twig_current}; |
|
5494
|
|
|
|
|
|
|
return 1 unless( $curr_elt); |
|
5495
|
|
|
|
|
|
|
return $curr_elt->in( $elt); |
|
5496
|
|
|
|
|
|
|
} |
|
5497
|
|
|
|
|
|
|
|
|
5498
|
|
|
|
|
|
|
sub set_pcdata |
|
5499
|
|
|
|
|
|
|
{ my( $elt, $pcdata)= @_; |
|
5500
|
|
|
|
|
|
|
|
|
5501
|
|
|
|
|
|
|
if( $elt->{extra_data_in_pcdata}) |
|
5502
|
|
|
|
|
|
|
{ _try_moving_extra_data( $elt, $pcdata); |
|
5503
|
|
|
|
|
|
|
} |
|
5504
|
|
|
|
|
|
|
$elt->{pcdata}= $pcdata; |
|
5505
|
|
|
|
|
|
|
return $elt; |
|
5506
|
|
|
|
|
|
|
} |
|
5507
|
|
|
|
|
|
|
|
|
5508
|
|
|
|
|
|
|
sub _extra_data_in_pcdata { return $_[0]->{extra_data_in_pcdata}; } |
|
5509
|
|
|
|
|
|
|
sub _set_extra_data_in_pcdata { $_[0]->{extra_data_in_pcdata}= $_[1]; return $_[0]; } |
|
5510
|
|
|
|
|
|
|
sub _del_extra_data_in_pcdata { delete $_[0]->{extra_data_in_pcdata}; return $_[0]; } |
|
5511
|
|
|
|
|
|
|
sub _unshift_extra_data_in_pcdata |
|
5512
|
|
|
|
|
|
|
{ my $e= shift; |
|
5513
|
|
|
|
|
|
|
$e->{extra_data_in_pcdata}||=[]; |
|
5514
|
|
|
|
|
|
|
unshift @{$e->{extra_data_in_pcdata}}, { text => shift(), offset => shift() }; |
|
5515
|
|
|
|
|
|
|
} |
|
5516
|
|
|
|
|
|
|
sub _push_extra_data_in_pcdata |
|
5517
|
|
|
|
|
|
|
{ my $e= shift; |
|
5518
|
|
|
|
|
|
|
$e->{extra_data_in_pcdata}||=[]; |
|
5519
|
|
|
|
|
|
|
push @{$e->{extra_data_in_pcdata}}, { text => shift(), offset => shift() }; |
|
5520
|
|
|
|
|
|
|
} |
|
5521
|
|
|
|
|
|
|
|
|
5522
|
|
|
|
|
|
|
sub _extra_data_before_end_tag { return $_[0]->{extra_data_before_end_tag} || ''; } |
|
5523
|
|
|
|
|
|
|
sub _set_extra_data_before_end_tag { $_[0]->{extra_data_before_end_tag}= $_[1]; return $_[0]} |
|
5524
|
|
|
|
|
|
|
sub _del_extra_data_before_end_tag { delete $_[0]->{extra_data_before_end_tag}; return $_[0]} |
|
5525
|
|
|
|
|
|
|
sub _prefix_extra_data_before_end_tag |
|
5526
|
|
|
|
|
|
|
{ my( $elt, $data)= @_; |
|
5527
|
|
|
|
|
|
|
if($elt->{extra_data_before_end_tag}) |
|
5528
|
|
|
|
|
|
|
{ $elt->{extra_data_before_end_tag}= $data . $elt->{extra_data_before_end_tag}; } |
|
5529
|
|
|
|
|
|
|
else |
|
5530
|
|
|
|
|
|
|
{ $elt->{extra_data_before_end_tag}= $data; } |
|
5531
|
|
|
|
|
|
|
return $elt; |
|
5532
|
|
|
|
|
|
|
} |
|
5533
|
|
|
|
|
|
|
|
|
5534
|
|
|
|
|
|
|
# internal, in cases where we know there is no extra_data (inlined anyway!) |
|
5535
|
|
|
|
|
|
|
sub _set_pcdata { $_[0]->{pcdata}= $_[1]; } |
|
5536
|
|
|
|
|
|
|
|
|
5537
|
|
|
|
|
|
|
# try to figure out if we can keep the extra_data around |
|
5538
|
|
|
|
|
|
|
sub _try_moving_extra_data |
|
5539
|
|
|
|
|
|
|
{ my( $elt, $modified)=@_; |
|
5540
|
|
|
|
|
|
|
my $initial= $elt->{pcdata}; |
|
5541
|
|
|
|
|
|
|
my $cpis= $elt->{extra_data_in_pcdata}; |
|
5542
|
|
|
|
|
|
|
|
|
5543
|
|
|
|
|
|
|
if( (my $offset= index( $modified, $initial)) != -1) |
|
5544
|
|
|
|
|
|
|
{ # text has been added |
|
5545
|
|
|
|
|
|
|
foreach (@$cpis) { $_->{offset}+= $offset; } |
|
5546
|
|
|
|
|
|
|
} |
|
5547
|
|
|
|
|
|
|
elsif( ($offset= index( $initial, $modified)) != -1) |
|
5548
|
|
|
|
|
|
|
{ # text has been cut |
|
5549
|
|
|
|
|
|
|
my $len= length( $modified); |
|
5550
|
|
|
|
|
|
|
foreach my $cpi (@$cpis) { $cpi->{offset} -= $offset; } |
|
5551
|
|
|
|
|
|
|
$elt->_set_extra_data_in_pcdata( [ grep { $_->{offset} >= 0 && $_->{offset} < $len } @$cpis ]); |
|
5552
|
|
|
|
|
|
|
} |
|
5553
|
|
|
|
|
|
|
else |
|
5554
|
|
|
|
|
|
|
{ _match_extra_data_words( $elt, $initial, $modified) |
|
5555
|
|
|
|
|
|
|
|| _match_extra_data_chars( $elt, $initial, $modified) |
|
5556
|
|
|
|
|
|
|
|| $elt->_del_extra_data_in_pcdata; |
|
5557
|
|
|
|
|
|
|
} |
|
5558
|
|
|
|
|
|
|
} |
|
5559
|
|
|
|
|
|
|
|
|
5560
|
|
|
|
|
|
|
sub _match_extra_data_words |
|
5561
|
|
|
|
|
|
|
{ my( $elt, $initial, $modified)= @_; |
|
5562
|
|
|
|
|
|
|
my @initial= split /\b/, $initial; |
|
5563
|
|
|
|
|
|
|
my @modified= split /\b/, $modified; |
|
5564
|
|
|
|
|
|
|
|
|
5565
|
|
|
|
|
|
|
return _match_extra_data( $elt, length( $initial), \@initial, \@modified); |
|
5566
|
|
|
|
|
|
|
} |
|
5567
|
|
|
|
|
|
|
|
|
5568
|
|
|
|
|
|
|
sub _match_extra_data_chars |
|
5569
|
|
|
|
|
|
|
{ my( $elt, $initial, $modified)= @_; |
|
5570
|
|
|
|
|
|
|
my @initial= split //, $initial; |
|
5571
|
|
|
|
|
|
|
my @modified= split //, $modified; |
|
5572
|
|
|
|
|
|
|
|
|
5573
|
|
|
|
|
|
|
return _match_extra_data( $elt, length( $initial), \@initial, \@modified); |
|
5574
|
|
|
|
|
|
|
} |
|
5575
|
|
|
|
|
|
|
|
|
5576
|
|
|
|
|
|
|
sub _match_extra_data |
|
5577
|
|
|
|
|
|
|
{ my( $elt, $length, $initial, $modified)= @_; |
|
5578
|
|
|
|
|
|
|
|
|
5579
|
|
|
|
|
|
|
my $cpis= $elt->{extra_data_in_pcdata}; |
|
5580
|
|
|
|
|
|
|
|
|
5581
|
|
|
|
|
|
|
if( @$initial <= @$modified) |
|
5582
|
|
|
|
|
|
|
{ |
|
5583
|
|
|
|
|
|
|
my( $ok, $positions, $offsets)= _pos_offset( $initial, $modified); |
|
5584
|
|
|
|
|
|
|
if( $ok) |
|
5585
|
|
|
|
|
|
|
{ my $offset=0; |
|
5586
|
|
|
|
|
|
|
my $pos= shift @$positions; |
|
5587
|
|
|
|
|
|
|
foreach my $cpi (@$cpis) |
|
5588
|
|
|
|
|
|
|
{ while( $cpi->{offset} >= $pos) |
|
5589
|
|
|
|
|
|
|
{ $offset= shift @$offsets; |
|
5590
|
|
|
|
|
|
|
$pos= shift @$positions || $length +1; |
|
5591
|
|
|
|
|
|
|
} |
|
5592
|
|
|
|
|
|
|
$cpi->{offset} += $offset; |
|
5593
|
|
|
|
|
|
|
} |
|
5594
|
|
|
|
|
|
|
return 1; |
|
5595
|
|
|
|
|
|
|
} |
|
5596
|
|
|
|
|
|
|
} |
|
5597
|
|
|
|
|
|
|
else |
|
5598
|
|
|
|
|
|
|
{ my( $ok, $positions, $offsets)= _pos_offset( $modified, $initial); |
|
5599
|
|
|
|
|
|
|
if( $ok) |
|
5600
|
|
|
|
|
|
|
{ #print STDERR "pos: ", join( ':', @$positions), "\n", |
|
5601
|
|
|
|
|
|
|
# "offset: ", join( ':', @$offsets), "\n"; |
|
5602
|
|
|
|
|
|
|
my $offset=0; |
|
5603
|
|
|
|
|
|
|
my $pos= shift @$positions; |
|
5604
|
|
|
|
|
|
|
my $prev_pos= 0; |
|
5605
|
|
|
|
|
|
|
|
|
5606
|
|
|
|
|
|
|
foreach my $cpi (@$cpis) |
|
5607
|
|
|
|
|
|
|
{ while( $cpi->{offset} >= $pos) |
|
5608
|
|
|
|
|
|
|
{ $offset= shift @$offsets; |
|
5609
|
|
|
|
|
|
|
$prev_pos= $pos; |
|
5610
|
|
|
|
|
|
|
$pos= shift @$positions || $length +1; |
|
5611
|
|
|
|
|
|
|
} |
|
5612
|
|
|
|
|
|
|
$cpi->{offset} -= $offset; |
|
5613
|
|
|
|
|
|
|
if( $cpi->{offset} < $prev_pos) { delete $cpi->{text}; } |
|
5614
|
|
|
|
|
|
|
} |
|
5615
|
|
|
|
|
|
|
$elt->_set_extra_data_in_pcdata( [ grep { exists $_->{text} } @$cpis ]); |
|
5616
|
|
|
|
|
|
|
return 1; |
|
5617
|
|
|
|
|
|
|
} |
|
5618
|
|
|
|
|
|
|
} |
|
5619
|
|
|
|
|
|
|
return 0; |
|
5620
|
|
|
|
|
|
|
} |
|
5621
|
|
|
|
|
|
|
|
|
5622
|
|
|
|
|
|
|
|
|
5623
|
|
|
|
|
|
|
sub _pos_offset |
|
5624
|
|
|
|
|
|
|
{ my( $short, $long)= @_; |
|
5625
|
|
|
|
|
|
|
my( @pos, @offset); |
|
5626
|
|
|
|
|
|
|
my( $s_length, $l_length)=(0,0); |
|
5627
|
|
|
|
|
|
|
while (@$short) |
|
5628
|
|
|
|
|
|
|
{ my $s_word= shift @$short; |
|
5629
|
|
|
|
|
|
|
my $l_word= shift @$long; |
|
5630
|
|
|
|
|
|
|
if( $s_word ne $l_word) |
|
5631
|
|
|
|
|
|
|
{ while( @$long && $s_word ne $l_word) |
|
5632
|
|
|
|
|
|
|
{ $l_length += length( $l_word); |
|
5633
|
|
|
|
|
|
|
$l_word= shift @$long; |
|
5634
|
|
|
|
|
|
|
} |
|
5635
|
|
|
|
|
|
|
if( !@$long && $s_word ne $l_word) { return 0; } |
|
5636
|
|
|
|
|
|
|
push @pos, $s_length; |
|
5637
|
|
|
|
|
|
|
push @offset, $l_length - $s_length; |
|
5638
|
|
|
|
|
|
|
} |
|
5639
|
|
|
|
|
|
|
my $length= length( $s_word); |
|
5640
|
|
|
|
|
|
|
$s_length += $length; |
|
5641
|
|
|
|
|
|
|
$l_length += $length; |
|
5642
|
|
|
|
|
|
|
} |
|
5643
|
|
|
|
|
|
|
return( 1, \@pos, \@offset); |
|
5644
|
|
|
|
|
|
|
} |
|
5645
|
|
|
|
|
|
|
|
|
5646
|
|
|
|
|
|
|
sub append_pcdata |
|
5647
|
|
|
|
|
|
|
{ $_[0]->{'pcdata'}.= $_[1]; |
|
5648
|
|
|
|
|
|
|
return $_[0]; |
|
5649
|
|
|
|
|
|
|
} |
|
5650
|
|
|
|
|
|
|
|
|
5651
|
|
|
|
|
|
|
sub pcdata { return $_[0]->{pcdata}; } |
|
5652
|
|
|
|
|
|
|
|
|
5653
|
|
|
|
|
|
|
|
|
5654
|
|
|
|
|
|
|
sub append_extra_data |
|
5655
|
|
|
|
|
|
|
{ $_[0]->{extra_data}.= $_[1]; |
|
5656
|
|
|
|
|
|
|
return $_[0]; |
|
5657
|
|
|
|
|
|
|
} |
|
5658
|
|
|
|
|
|
|
|
|
5659
|
|
|
|
|
|
|
sub set_extra_data |
|
5660
|
|
|
|
|
|
|
{ $_[0]->{extra_data}= $_[1]; |
|
5661
|
|
|
|
|
|
|
return $_[0]; |
|
5662
|
|
|
|
|
|
|
} |
|
5663
|
|
|
|
|
|
|
sub extra_data { return $_[0]->{extra_data} || ''; } |
|
5664
|
|
|
|
|
|
|
|
|
5665
|
|
|
|
|
|
|
sub set_target |
|
5666
|
|
|
|
|
|
|
{ my( $elt, $target)= @_; |
|
5667
|
|
|
|
|
|
|
$elt->{target}= $target; |
|
5668
|
|
|
|
|
|
|
return $elt; |
|
5669
|
|
|
|
|
|
|
} |
|
5670
|
|
|
|
|
|
|
sub target { return $_[0]->{target}; } |
|
5671
|
|
|
|
|
|
|
|
|
5672
|
|
|
|
|
|
|
sub set_data |
|
5673
|
|
|
|
|
|
|
{ $_[0]->{'data'}= $_[1]; |
|
5674
|
|
|
|
|
|
|
return $_[0]; |
|
5675
|
|
|
|
|
|
|
} |
|
5676
|
|
|
|
|
|
|
sub data { return $_[0]->{data}; } |
|
5677
|
|
|
|
|
|
|
|
|
5678
|
|
|
|
|
|
|
sub set_pi |
|
5679
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5680
|
|
|
|
|
|
|
unless( $elt->{gi} == $XML::Twig::gi2index{$PI}) |
|
5681
|
|
|
|
|
|
|
{ $elt->cut_children; |
|
5682
|
|
|
|
|
|
|
$elt->{gi}=$XML::Twig::gi2index{$PI} or $elt->set_gi( $PI); |
|
5683
|
|
|
|
|
|
|
} |
|
5684
|
|
|
|
|
|
|
return $elt->_set_pi( @_); |
|
5685
|
|
|
|
|
|
|
} |
|
5686
|
|
|
|
|
|
|
|
|
5687
|
|
|
|
|
|
|
sub _set_pi |
|
5688
|
|
|
|
|
|
|
{ $_[0]->set_target( $_[1]); |
|
5689
|
|
|
|
|
|
|
$_[0]->{data}= $_[2]; |
|
5690
|
|
|
|
|
|
|
return $_[0]; |
|
5691
|
|
|
|
|
|
|
} |
|
5692
|
|
|
|
|
|
|
|
|
5693
|
|
|
|
|
|
|
sub pi_string { my $string= $PI_START . $_[0]->{target}; |
|
5694
|
|
|
|
|
|
|
my $data= $_[0]->{data}; |
|
5695
|
|
|
|
|
|
|
if( defined( $data) && $data ne '') { $string .= " $data"; } |
|
5696
|
|
|
|
|
|
|
$string .= $PI_END ; |
|
5697
|
|
|
|
|
|
|
return $string; |
|
5698
|
|
|
|
|
|
|
} |
|
5699
|
|
|
|
|
|
|
|
|
5700
|
|
|
|
|
|
|
sub set_comment |
|
5701
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5702
|
|
|
|
|
|
|
unless( $elt->{gi} == $XML::Twig::gi2index{$COMMENT}) |
|
5703
|
|
|
|
|
|
|
{ $elt->cut_children; |
|
5704
|
|
|
|
|
|
|
$elt->{gi}=$XML::Twig::gi2index{$COMMENT} or $elt->set_gi( $COMMENT); |
|
5705
|
|
|
|
|
|
|
} |
|
5706
|
|
|
|
|
|
|
$elt->{comment}= $_[0]; |
|
5707
|
|
|
|
|
|
|
return $elt; |
|
5708
|
|
|
|
|
|
|
} |
|
5709
|
|
|
|
|
|
|
|
|
5710
|
|
|
|
|
|
|
sub _set_comment { $_[0]->{comment}= $_[1]; return $_[0]; } |
|
5711
|
|
|
|
|
|
|
sub comment { return $_[0]->{comment}; } |
|
5712
|
|
|
|
|
|
|
sub comment_string { return $COMMENT_START . _comment_escaped_string( $_[0]->{comment}) . $COMMENT_END; } |
|
5713
|
|
|
|
|
|
|
# comments cannot start or end with |
|
5714
|
|
|
|
|
|
|
sub _comment_escaped_string |
|
5715
|
|
|
|
|
|
|
{ my( $c)= @_; |
|
5716
|
|
|
|
|
|
|
$c=~ s{^-}{ -}; |
|
5717
|
|
|
|
|
|
|
$c=~ s{-$}{- }; |
|
5718
|
|
|
|
|
|
|
$c=~ s{--}{- -}g; |
|
5719
|
|
|
|
|
|
|
return $c; |
|
5720
|
|
|
|
|
|
|
} |
|
5721
|
|
|
|
|
|
|
|
|
5722
|
|
|
|
|
|
|
sub set_ent { $_[0]->{ent}= $_[1]; return $_[0]; } |
|
5723
|
|
|
|
|
|
|
sub ent { return $_[0]->{ent}; } |
|
5724
|
|
|
|
|
|
|
sub ent_name { return substr( $_[0]->{ent}, 1, -1);} |
|
5725
|
|
|
|
|
|
|
|
|
5726
|
|
|
|
|
|
|
sub set_cdata |
|
5727
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5728
|
|
|
|
|
|
|
unless( $elt->{gi} == $XML::Twig::gi2index{$CDATA}) |
|
5729
|
|
|
|
|
|
|
{ $elt->cut_children; |
|
5730
|
|
|
|
|
|
|
$elt->insert_new_elt( first_child => $CDATA, @_); |
|
5731
|
|
|
|
|
|
|
return $elt; |
|
5732
|
|
|
|
|
|
|
} |
|
5733
|
|
|
|
|
|
|
$elt->{cdata}= $_[0]; |
|
5734
|
|
|
|
|
|
|
return $_[0]; |
|
5735
|
|
|
|
|
|
|
} |
|
5736
|
|
|
|
|
|
|
|
|
5737
|
|
|
|
|
|
|
sub _set_cdata |
|
5738
|
|
|
|
|
|
|
{ $_[0]->{cdata}= $_[1]; |
|
5739
|
|
|
|
|
|
|
return $_[0]; |
|
5740
|
|
|
|
|
|
|
} |
|
5741
|
|
|
|
|
|
|
|
|
5742
|
|
|
|
|
|
|
sub append_cdata |
|
5743
|
|
|
|
|
|
|
{ $_[0]->{cdata}.= $_[1]; |
|
5744
|
|
|
|
|
|
|
return $_[0]; |
|
5745
|
|
|
|
|
|
|
} |
|
5746
|
|
|
|
|
|
|
sub cdata { return $_[0]->{cdata}; } |
|
5747
|
|
|
|
|
|
|
|
|
5748
|
|
|
|
|
|
|
|
|
5749
|
|
|
|
|
|
|
sub contains_only_text |
|
5750
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5751
|
|
|
|
|
|
|
return 0 unless $elt->is_elt; |
|
5752
|
|
|
|
|
|
|
foreach my $child ($elt->_children) |
|
5753
|
|
|
|
|
|
|
{ return 0 if $child->is_elt; } |
|
5754
|
|
|
|
|
|
|
return $elt; |
|
5755
|
|
|
|
|
|
|
} |
|
5756
|
|
|
|
|
|
|
|
|
5757
|
|
|
|
|
|
|
sub contains_only |
|
5758
|
|
|
|
|
|
|
{ my( $elt, $exp)= @_; |
|
5759
|
|
|
|
|
|
|
my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; |
|
5760
|
|
|
|
|
|
|
foreach my $child (@children) |
|
5761
|
|
|
|
|
|
|
{ return 0 unless $child->is( $exp); } |
|
5762
|
|
|
|
|
|
|
return @children || 1; |
|
5763
|
|
|
|
|
|
|
} |
|
5764
|
|
|
|
|
|
|
|
|
5765
|
|
|
|
|
|
|
sub contains_a_single |
|
5766
|
|
|
|
|
|
|
{ my( $elt, $exp)= @_; |
|
5767
|
|
|
|
|
|
|
my $child= $elt->{first_child} or return 0; |
|
5768
|
|
|
|
|
|
|
return 0 unless $child->passes( $exp); |
|
5769
|
|
|
|
|
|
|
return 0 if( $child->{next_sibling}); |
|
5770
|
|
|
|
|
|
|
return $child; |
|
5771
|
|
|
|
|
|
|
} |
|
5772
|
|
|
|
|
|
|
|
|
5773
|
|
|
|
|
|
|
|
|
5774
|
|
|
|
|
|
|
sub root |
|
5775
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5776
|
|
|
|
|
|
|
while( $elt->{parent}) { $elt= $elt->{parent}; } |
|
5777
|
|
|
|
|
|
|
return $elt; |
|
5778
|
|
|
|
|
|
|
} |
|
5779
|
|
|
|
|
|
|
|
|
5780
|
|
|
|
|
|
|
sub _root_through_cut |
|
5781
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5782
|
|
|
|
|
|
|
while( $elt->{parent} || ($elt->{former} && $elt->{former}->{parent})) { $elt= $elt->{parent} || ($elt->{former} && $elt->{former}->{parent}); } |
|
5783
|
|
|
|
|
|
|
return $elt; |
|
5784
|
|
|
|
|
|
|
} |
|
5785
|
|
|
|
|
|
|
|
|
5786
|
|
|
|
|
|
|
sub twig |
|
5787
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5788
|
|
|
|
|
|
|
my $root= $elt->root; |
|
5789
|
|
|
|
|
|
|
return $root->{twig}; |
|
5790
|
|
|
|
|
|
|
} |
|
5791
|
|
|
|
|
|
|
|
|
5792
|
|
|
|
|
|
|
sub _twig_through_cut |
|
5793
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5794
|
|
|
|
|
|
|
my $root= $elt->_root_through_cut; |
|
5795
|
|
|
|
|
|
|
return $root->{twig}; |
|
5796
|
|
|
|
|
|
|
} |
|
5797
|
|
|
|
|
|
|
|
|
5798
|
|
|
|
|
|
|
|
|
5799
|
|
|
|
|
|
|
# used for navigation |
|
5800
|
|
|
|
|
|
|
# returns undef or the element, depending on whether $elt passes $cond |
|
5801
|
|
|
|
|
|
|
# $cond can be |
|
5802
|
|
|
|
|
|
|
# - empty: the element passes the condition |
|
5803
|
|
|
|
|
|
|
# - ELT ('#ELT'): the element passes the condition if it is a "real" element |
|
5804
|
|
|
|
|
|
|
# - TEXT ('#TEXT'): the element passes if it is a CDATA or PCDATA element |
|
5805
|
|
|
|
|
|
|
# - a string with an XPath condition (only a subset of XPath is actually |
|
5806
|
|
|
|
|
|
|
# supported). |
|
5807
|
|
|
|
|
|
|
# - a regexp: the element passes if its gi matches the regexp |
|
5808
|
|
|
|
|
|
|
# - a code ref: the element passes if the code, applied on the element, |
|
5809
|
|
|
|
|
|
|
# returns true |
|
5810
|
|
|
|
|
|
|
|
|
5811
|
|
|
|
|
|
|
my %cond_cache; # expression => coderef |
|
5812
|
|
|
|
|
|
|
|
|
5813
|
|
|
|
|
|
|
sub reset_cond_cache { %cond_cache=(); } |
|
5814
|
|
|
|
|
|
|
|
|
5815
|
|
|
|
|
|
|
{ |
|
5816
|
|
|
|
|
|
|
sub _install_cond |
|
5817
|
|
|
|
|
|
|
{ my $cond= shift; |
|
5818
|
|
|
|
|
|
|
my $test; |
|
5819
|
|
|
|
|
|
|
my $init=''; |
|
5820
|
|
|
|
|
|
|
|
|
5821
|
|
|
|
|
|
|
my $original_cond= $cond; |
|
5822
|
|
|
|
|
|
|
|
|
5823
|
|
|
|
|
|
|
my $not= ($cond=~ s{^\s*!}{}) ? '!' : ''; |
|
5824
|
|
|
|
|
|
|
|
|
5825
|
|
|
|
|
|
|
if( ref $cond eq 'CODE') { return $cond; } |
|
5826
|
|
|
|
|
|
|
|
|
5827
|
|
|
|
|
|
|
if( ref $cond eq 'Regexp') |
|
5828
|
|
|
|
|
|
|
{ $test = qq{(\$_[0]->gi=~ /$cond/)}; } |
|
5829
|
|
|
|
|
|
|
else |
|
5830
|
|
|
|
|
|
|
{ my @tests; |
|
5831
|
|
|
|
|
|
|
while( $cond) |
|
5832
|
|
|
|
|
|
|
{ |
|
5833
|
|
|
|
|
|
|
# the condition is a string |
|
5834
|
|
|
|
|
|
|
if( $cond=~ s{$ELT$SEP}{}) |
|
5835
|
|
|
|
|
|
|
{ push @tests, qq{\$_[0]->is_elt}; } |
|
5836
|
|
|
|
|
|
|
elsif( $cond=~ s{$TEXT$SEP}{}) |
|
5837
|
|
|
|
|
|
|
{ push @tests, qq{\$_[0]->is_text}; } |
|
5838
|
|
|
|
|
|
|
elsif( $cond=~ s{^\s*($REG_TAG_PART)$SEP}{}) |
|
5839
|
|
|
|
|
|
|
{ push @tests, _gi_test( $1); } |
|
5840
|
|
|
|
|
|
|
elsif( $cond=~ s{^\s*($REG_REGEXP)$SEP}{}) |
|
5841
|
|
|
|
|
|
|
{ # /regexp/ |
|
5842
|
|
|
|
|
|
|
push @tests, qq{ \$_[0]->gi=~ $1 }; |
|
5843
|
|
|
|
|
|
|
} |
|
5844
|
|
|
|
|
|
|
elsif( $cond=~ s{^\s*($REG_TAG_PART)?\s* # $1 |
|
5845
|
|
|
|
|
|
|
\[\s*(-?)\s*(\d+)\s*\] # [$2] |
|
5846
|
|
|
|
|
|
|
$SEP}{}xo |
|
5847
|
|
|
|
|
|
|
) |
|
5848
|
|
|
|
|
|
|
{ my( $gi, $neg, $index)= ($1, $2, $3); |
|
5849
|
|
|
|
|
|
|
my $siblings= $neg ? q{$_[0]->_next_siblings} : q{$_[0]->_prev_siblings}; |
|
5850
|
|
|
|
|
|
|
if( $gi && ($gi ne '*')) |
|
5851
|
|
|
|
|
|
|
#{ $test= qq{((\$_[0]->gi eq "$gi") && (scalar( grep { \$_->gi eq "$gi" } $siblings) + 1 == $index))}; } |
|
5852
|
|
|
|
|
|
|
{ push @tests, _and( _gi_test( $gi), qq{ (scalar( grep { \$_->gi eq "$gi" } $siblings) + 1 == $index)}); } |
|
5853
|
|
|
|
|
|
|
else |
|
5854
|
|
|
|
|
|
|
{ push @tests, qq{(scalar( $siblings) + 1 == $index)}; } |
|
5855
|
|
|
|
|
|
|
} |
|
5856
|
|
|
|
|
|
|
elsif( $cond=~ s{^\s*($REG_TAG_PART?)\s*($REG_PREDICATE)$SEP}{}) |
|
5857
|
|
|
|
|
|
|
{ my( $gi, $predicate)= ( $1, $2); |
|
5858
|
|
|
|
|
|
|
push @tests, _and( _gi_test( $gi), _parse_predicate_in_step( $predicate)); |
|
5859
|
|
|
|
|
|
|
} |
|
5860
|
|
|
|
|
|
|
elsif( $cond=~ s{^\s*($REG_NAKED_PREDICATE)$SEP}{}) |
|
5861
|
|
|
|
|
|
|
{ push @tests, _parse_predicate_in_step( $1); } |
|
5862
|
|
|
|
|
|
|
else |
|
5863
|
|
|
|
|
|
|
{ croak "wrong navigation condition '$original_cond' ($@)"; } |
|
5864
|
|
|
|
|
|
|
} |
|
5865
|
|
|
|
|
|
|
$test= @tests > 1 ? '(' . join( '||', map { "($_)" } @tests) . ')' : $tests[0]; |
|
5866
|
|
|
|
|
|
|
} |
|
5867
|
|
|
|
|
|
|
|
|
5868
|
|
|
|
|
|
|
#warn "init: '$init' - test: '$test'\n"; |
|
5869
|
|
|
|
|
|
|
|
|
5870
|
|
|
|
|
|
|
my $sub= qq{sub { $NO_WARNINGS; $init; return $not($test) ? \$_[0] : undef; } }; |
|
5871
|
|
|
|
|
|
|
my $s= eval $sub; |
|
5872
|
|
|
|
|
|
|
#warn "cond: $cond\n$sub\n"; |
|
5873
|
|
|
|
|
|
|
if( $@) |
|
5874
|
|
|
|
|
|
|
{ croak "wrong navigation condition '$original_cond' ($@);" } |
|
5875
|
|
|
|
|
|
|
return $s; |
|
5876
|
|
|
|
|
|
|
} |
|
5877
|
|
|
|
|
|
|
|
|
5878
|
|
|
|
|
|
|
sub _gi_test |
|
5879
|
|
|
|
|
|
|
{ my( $full_gi)= @_; |
|
5880
|
|
|
|
|
|
|
|
|
5881
|
|
|
|
|
|
|
# optimize if the gi exists, including the case where the gi includes a dot |
|
5882
|
|
|
|
|
|
|
my $index= $XML::Twig::gi2index{$full_gi}; |
|
5883
|
|
|
|
|
|
|
if( $index) { return qq{\$_[0]->{gi} == $index}; } |
|
5884
|
|
|
|
|
|
|
|
|
5885
|
|
|
|
|
|
|
my( $gi, $class, $id)= $full_gi=~ m{^(.*?)(?:[.]([^.]*)|[#](.*))?$}; |
|
5886
|
|
|
|
|
|
|
|
|
5887
|
|
|
|
|
|
|
my $gi_test=''; |
|
5888
|
|
|
|
|
|
|
if( $gi && $gi ne '*' ) |
|
5889
|
|
|
|
|
|
|
{ # 2 options, depending on whether the gi exists in gi2index |
|
5890
|
|
|
|
|
|
|
# start optimization |
|
5891
|
|
|
|
|
|
|
my $index= $XML::Twig::gi2index{$gi}; |
|
5892
|
|
|
|
|
|
|
if( $index) |
|
5893
|
|
|
|
|
|
|
{ # the gi exists, use its index as a faster shortcut |
|
5894
|
|
|
|
|
|
|
$gi_test = qq{\$_[0]->{gi} == $index}; |
|
5895
|
|
|
|
|
|
|
} |
|
5896
|
|
|
|
|
|
|
else |
|
5897
|
|
|
|
|
|
|
# end optimization |
|
5898
|
|
|
|
|
|
|
{ # it does not exist (but might be created later), compare the strings |
|
5899
|
|
|
|
|
|
|
$gi_test = qq{ \$_[0]->gi eq "$gi"}; |
|
5900
|
|
|
|
|
|
|
} |
|
5901
|
|
|
|
|
|
|
} |
|
5902
|
|
|
|
|
|
|
else |
|
5903
|
|
|
|
|
|
|
{ $gi_test= 1; } |
|
5904
|
|
|
|
|
|
|
|
|
5905
|
|
|
|
|
|
|
my $class_test=''; |
|
5906
|
|
|
|
|
|
|
#warn "class: '$class'"; |
|
5907
|
|
|
|
|
|
|
if( $class) |
|
5908
|
|
|
|
|
|
|
{ $class_test = qq{ defined( \$_[0]->{att}->{class}) && \$_[0]->{att}->{class}=~ m{\\b$class\\b} }; } |
|
5909
|
|
|
|
|
|
|
|
|
5910
|
|
|
|
|
|
|
my $id_test=''; |
|
5911
|
|
|
|
|
|
|
#warn "id: '$id'"; |
|
5912
|
|
|
|
|
|
|
if( $id) |
|
5913
|
|
|
|
|
|
|
{ $id_test = qq{ defined( \$_[0]->{att}->{$ID}) && \$_[0]->{att}->{$ID} eq '$id' }; } |
|
5914
|
|
|
|
|
|
|
|
|
5915
|
|
|
|
|
|
|
|
|
5916
|
|
|
|
|
|
|
#warn "gi_test: '$gi_test' - class_test: '$class_test' returning ", _and( $gi_test, $class_test); |
|
5917
|
|
|
|
|
|
|
return _and( $gi_test, $class_test, $id_test); |
|
5918
|
|
|
|
|
|
|
} |
|
5919
|
|
|
|
|
|
|
|
|
5920
|
|
|
|
|
|
|
|
|
5921
|
|
|
|
|
|
|
# input: the original predicate |
|
5922
|
|
|
|
|
|
|
sub _parse_predicate_in_step |
|
5923
|
|
|
|
|
|
|
{ my $cond= shift; |
|
5924
|
|
|
|
|
|
|
my %PERL_ALPHA_TEST= ( '=' => ' eq ', '!=' => ' ne ', '>' => ' gt ', '>=' => ' ge ', '<' => ' lt ', '<=' => ' le '); |
|
5925
|
|
|
|
|
|
|
|
|
5926
|
|
|
|
|
|
|
$cond=~ s{^\s*\[\s*}{}; |
|
5927
|
|
|
|
|
|
|
$cond=~ s{\s*\]\s*$}{}; |
|
5928
|
|
|
|
|
|
|
$cond=~ s{( ($REG_STRING|$REG_REGEXP) # strings or regexps |
|
5929
|
|
|
|
|
|
|
|\@($REG_TAG_NAME)(?=\s*(?:[><=!]|!~|=~)) # @att (followed by a comparison operator) |
|
5930
|
|
|
|
|
|
|
|\@($REG_TAG_NAME) # @att (not followed by a comparison operator) |
|
5931
|
|
|
|
|
|
|
|=~|!~ # matching operators |
|
5932
|
|
|
|
|
|
|
|([><]=?|=|!=)(?=\s*[\d+-]) # test before a number |
|
5933
|
|
|
|
|
|
|
|([><]=?|=|!=) # test, other cases |
|
5934
|
|
|
|
|
|
|
|($REG_FUNCTION) # no arg functions |
|
5935
|
|
|
|
|
|
|
# this bit is a mess, but it is the only solution with this half-baked parser |
|
5936
|
|
|
|
|
|
|
|((?:string|text)\(\s*$REG_TAG_NAME\s*\)\s*$REG_MATCH\s*$REG_REGEXP) # string( child) =~ /regexp/ |
|
5937
|
|
|
|
|
|
|
|((?:string|text)\(\s*$REG_TAG_NAME\s*\)\s*!?=\s*$REG_VALUE) # string( child) = "value" (or !=) |
|
5938
|
|
|
|
|
|
|
|((?:string|text)\(\s*$REG_TAG_NAME\s*\)\s*[<>]=?\s*$REG_VALUE) # string( child) > "value" |
|
5939
|
|
|
|
|
|
|
|(and|or) |
|
5940
|
|
|
|
|
|
|
)} |
|
5941
|
|
|
|
|
|
|
{ my( $token, $string, $att, $bare_att, $num_test, $alpha_test, $func, $string_regexp, $string_eq, $string_test, $and_or) |
|
5942
|
|
|
|
|
|
|
= ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11); |
|
5943
|
|
|
|
|
|
|
|
|
5944
|
|
|
|
|
|
|
if( defined $string) { $token } |
|
5945
|
|
|
|
|
|
|
elsif( $att) { "( \$_[0]->{att} && exists( \$_[0]->{att}->{'$att'}) && \$_[0]->{att}->{'$att'})"; } |
|
5946
|
|
|
|
|
|
|
elsif( $bare_att) { "(\$_[0]->{att} && defined( \$_[0]->{att}->{'$bare_att'}))"; } |
|
5947
|
|
|
|
|
|
|
elsif( $num_test && ($num_test eq '=') ) { "==" } # others tests are unchanged |
|
5948
|
|
|
|
|
|
|
elsif( $alpha_test) { $PERL_ALPHA_TEST{$alpha_test} } |
|
5949
|
|
|
|
|
|
|
elsif( $func && $func=~ m{^(?:string|text)}) |
|
5950
|
|
|
|
|
|
|
{ "\$_[0]->text"; } |
|
5951
|
|
|
|
|
|
|
elsif( $string_regexp && $string_regexp =~ m{(?:string|text)\(\s*($REG_TAG_NAME)\s*\)\s*($REG_MATCH)\s*($REG_REGEXP)}) |
|
5952
|
|
|
|
|
|
|
{ "(XML::Twig::_first_n { (\$_->gi eq '$1') && (\$_->text $2 $3) } 1, \$_[0]->_children)"; } |
|
5953
|
|
|
|
|
|
|
elsif( $string_eq && $string_eq =~ m{(?:string|text)\(\s*($REG_TAG_NAME)\s*\)\s*(!?=)\s*($REG_VALUE)}) |
|
5954
|
|
|
|
|
|
|
{"(XML::Twig::_first_n { (\$_->gi eq '$1') && (\$_->text $PERL_ALPHA_TEST{$2} $3) } 1, \$_[0]->_children)"; } |
|
5955
|
|
|
|
|
|
|
elsif( $string_test && $string_test =~ m{(?:string|text)\(\s*($REG_TAG_NAME)\s*\)\s*([<>]=?)\s*($REG_VALUE)}) |
|
5956
|
|
|
|
|
|
|
{ "(XML::Twig::_first_n { (\$_->gi eq '$1') && (\$_->text $2 $3) } 1, \$_[0]->_children)"; } |
|
5957
|
|
|
|
|
|
|
elsif( $and_or) { $and_or eq 'and' ? '&&' : '||' ; } |
|
5958
|
|
|
|
|
|
|
else { $token; } |
|
5959
|
|
|
|
|
|
|
}gexs; |
|
5960
|
|
|
|
|
|
|
return "($cond)"; |
|
5961
|
|
|
|
|
|
|
} |
|
5962
|
|
|
|
|
|
|
|
|
5963
|
|
|
|
|
|
|
|
|
5964
|
|
|
|
|
|
|
sub _op |
|
5965
|
|
|
|
|
|
|
{ my $op= shift; |
|
5966
|
|
|
|
|
|
|
if( $op eq '=') { $op= 'eq'; } |
|
5967
|
|
|
|
|
|
|
elsif( $op eq '!=') { $op= 'ne'; } |
|
5968
|
|
|
|
|
|
|
return $op; |
|
5969
|
|
|
|
|
|
|
} |
|
5970
|
|
|
|
|
|
|
|
|
5971
|
|
|
|
|
|
|
sub passes |
|
5972
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
|
5973
|
|
|
|
|
|
|
return $elt unless $cond; |
|
5974
|
|
|
|
|
|
|
my $sub= ($cond_cache{$cond} ||= _install_cond( $cond)); |
|
5975
|
|
|
|
|
|
|
return $sub->( $elt); |
|
5976
|
|
|
|
|
|
|
} |
|
5977
|
|
|
|
|
|
|
} |
|
5978
|
|
|
|
|
|
|
|
|
5979
|
|
|
|
|
|
|
sub set_parent |
|
5980
|
|
|
|
|
|
|
{ $_[0]->{parent}= $_[1]; |
|
5981
|
|
|
|
|
|
|
if( $XML::Twig::weakrefs) { weaken( $_[0]->{parent}); } |
|
5982
|
|
|
|
|
|
|
} |
|
5983
|
|
|
|
|
|
|
|
|
5984
|
|
|
|
|
|
|
sub parent |
|
5985
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5986
|
|
|
|
|
|
|
my $cond= shift || return $elt->{parent}; |
|
5987
|
|
|
|
|
|
|
do { $elt= $elt->{parent} || return; } until ( $elt->passes( $cond)); |
|
5988
|
|
|
|
|
|
|
return $elt; |
|
5989
|
|
|
|
|
|
|
} |
|
5990
|
|
|
|
|
|
|
|
|
5991
|
|
|
|
|
|
|
sub set_first_child |
|
5992
|
|
|
|
|
|
|
{ $_[0]->{'first_child'}= $_[1]; |
|
5993
|
|
|
|
|
|
|
} |
|
5994
|
|
|
|
|
|
|
|
|
5995
|
|
|
|
|
|
|
sub first_child |
|
5996
|
|
|
|
|
|
|
{ my $elt= shift; |
|
5997
|
|
|
|
|
|
|
my $cond= shift || return $elt->{first_child}; |
|
5998
|
|
|
|
|
|
|
my $child= $elt->{first_child}; |
|
5999
|
|
|
|
|
|
|
my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); |
|
6000
|
|
|
|
|
|
|
while( $child && !$test_cond->( $child)) |
|
6001
|
|
|
|
|
|
|
{ $child= $child->{next_sibling}; } |
|
6002
|
|
|
|
|
|
|
return $child; |
|
6003
|
|
|
|
|
|
|
} |
|
6004
|
|
|
|
|
|
|
|
|
6005
|
|
|
|
|
|
|
sub _first_child { return $_[0]->{first_child}; } |
|
6006
|
|
|
|
|
|
|
sub _last_child { return $_[0]->{last_child}; } |
|
6007
|
|
|
|
|
|
|
sub _next_sibling { return $_[0]->{next_sibling}; } |
|
6008
|
|
|
|
|
|
|
sub _prev_sibling { return $_[0]->{prev_sibling}; } |
|
6009
|
|
|
|
|
|
|
sub _parent { return $_[0]->{parent}; } |
|
6010
|
|
|
|
|
|
|
sub _next_siblings { my $elt= shift; my @siblings; while( $elt= $elt->{next_sibling}) { push @siblings, $elt; } return @siblings; } |
|
6011
|
|
|
|
|
|
|
sub _prev_siblings { my $elt= shift; my @siblings; while( $elt= $elt->{prev_sibling}) { push @siblings, $elt; } return @siblings; } |
|
6012
|
|
|
|
|
|
|
|
|
6013
|
|
|
|
|
|
|
# sets a field |
|
6014
|
|
|
|
|
|
|
# arguments $record, $cond, @content |
|
6015
|
|
|
|
|
|
|
sub set_field |
|
6016
|
|
|
|
|
|
|
{ my $record = shift; |
|
6017
|
|
|
|
|
|
|
my $cond = shift; |
|
6018
|
|
|
|
|
|
|
my $child= $record->first_child( $cond); |
|
6019
|
|
|
|
|
|
|
if( $child) |
|
6020
|
|
|
|
|
|
|
{ $child->set_content( @_); } |
|
6021
|
|
|
|
|
|
|
else |
|
6022
|
|
|
|
|
|
|
{ if( $cond=~ m{^\s*($REG_TAG_NAME)}) |
|
6023
|
|
|
|
|
|
|
{ my $gi= $1; |
|
6024
|
|
|
|
|
|
|
$child= $record->insert_new_elt( last_child => $gi, @_); |
|
6025
|
|
|
|
|
|
|
} |
|
6026
|
|
|
|
|
|
|
else |
|
6027
|
|
|
|
|
|
|
{ croak "can't create a field name from $cond"; } |
|
6028
|
|
|
|
|
|
|
} |
|
6029
|
|
|
|
|
|
|
return $child; |
|
6030
|
|
|
|
|
|
|
} |
|
6031
|
|
|
|
|
|
|
|
|
6032
|
|
|
|
|
|
|
sub set_last_child |
|
6033
|
|
|
|
|
|
|
{ $_[0]->{'last_child'}= $_[1]; |
|
6034
|
|
|
|
|
|
|
delete $_->[0]->{empty}; |
|
6035
|
|
|
|
|
|
|
if( $XML::Twig::weakrefs) { weaken( $_[0]->{'last_child'}); } |
|
6036
|
|
|
|
|
|
|
} |
|
6037
|
|
|
|
|
|
|
|
|
6038
|
|
|
|
|
|
|
sub last_child |
|
6039
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6040
|
|
|
|
|
|
|
my $cond= shift || return $elt->{last_child}; |
|
6041
|
|
|
|
|
|
|
my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); |
|
6042
|
|
|
|
|
|
|
my $child= $elt->{last_child}; |
|
6043
|
|
|
|
|
|
|
while( $child && !$test_cond->( $child) ) |
|
6044
|
|
|
|
|
|
|
{ $child= $child->{prev_sibling}; } |
|
6045
|
|
|
|
|
|
|
return $child |
|
6046
|
|
|
|
|
|
|
} |
|
6047
|
|
|
|
|
|
|
|
|
6048
|
|
|
|
|
|
|
|
|
6049
|
|
|
|
|
|
|
sub set_prev_sibling |
|
6050
|
|
|
|
|
|
|
{ $_[0]->{'prev_sibling'}= $_[1]; |
|
6051
|
|
|
|
|
|
|
if( $XML::Twig::weakrefs) { weaken( $_[0]->{'prev_sibling'}); } |
|
6052
|
|
|
|
|
|
|
} |
|
6053
|
|
|
|
|
|
|
|
|
6054
|
|
|
|
|
|
|
sub prev_sibling |
|
6055
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6056
|
|
|
|
|
|
|
my $cond= shift || return $elt->{prev_sibling}; |
|
6057
|
|
|
|
|
|
|
my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); |
|
6058
|
|
|
|
|
|
|
my $sibling= $elt->{prev_sibling}; |
|
6059
|
|
|
|
|
|
|
while( $sibling && !$test_cond->( $sibling) ) |
|
6060
|
|
|
|
|
|
|
{ $sibling= $sibling->{prev_sibling}; } |
|
6061
|
|
|
|
|
|
|
return $sibling; |
|
6062
|
|
|
|
|
|
|
} |
|
6063
|
|
|
|
|
|
|
|
|
6064
|
|
|
|
|
|
|
sub set_next_sibling { $_[0]->{'next_sibling'}= $_[1]; } |
|
6065
|
|
|
|
|
|
|
|
|
6066
|
|
|
|
|
|
|
sub next_sibling |
|
6067
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6068
|
|
|
|
|
|
|
my $cond= shift || return $elt->{next_sibling}; |
|
6069
|
|
|
|
|
|
|
my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); |
|
6070
|
|
|
|
|
|
|
my $sibling= $elt->{next_sibling}; |
|
6071
|
|
|
|
|
|
|
while( $sibling && !$test_cond->( $sibling) ) |
|
6072
|
|
|
|
|
|
|
{ $sibling= $sibling->{next_sibling}; } |
|
6073
|
|
|
|
|
|
|
return $sibling; |
|
6074
|
|
|
|
|
|
|
} |
|
6075
|
|
|
|
|
|
|
|
|
6076
|
|
|
|
|
|
|
# methods dealing with the class attribute, convenient if you work with xhtml |
|
6077
|
|
|
|
|
|
|
sub class { $_[0]->{att}->{class}; } |
|
6078
|
|
|
|
|
|
|
# lvalue version of class. separate from class to avoid problem like RT# |
|
6079
|
|
|
|
|
|
|
sub lclass |
|
6080
|
|
|
|
|
|
|
:lvalue # > perl 5.5 |
|
6081
|
|
|
|
|
|
|
{ $_[0]->{att}->{class}; } |
|
6082
|
|
|
|
|
|
|
|
|
6083
|
|
|
|
|
|
|
sub set_class { my( $elt, $class)= @_; $elt->set_att( class => $class); } |
|
6084
|
|
|
|
|
|
|
|
|
6085
|
|
|
|
|
|
|
# adds a class to an element |
|
6086
|
|
|
|
|
|
|
sub add_to_class |
|
6087
|
|
|
|
|
|
|
{ my( $elt, $new_class)= @_; |
|
6088
|
|
|
|
|
|
|
return $elt unless $new_class; |
|
6089
|
|
|
|
|
|
|
my $class= $elt->class; |
|
6090
|
|
|
|
|
|
|
my %class= $class ? map { $_ => 1 } split /\s+/, $class : (); |
|
6091
|
|
|
|
|
|
|
$class{$new_class}= 1; |
|
6092
|
|
|
|
|
|
|
$elt->set_class( join( ' ', sort keys %class)); |
|
6093
|
|
|
|
|
|
|
} |
|
6094
|
|
|
|
|
|
|
|
|
6095
|
|
|
|
|
|
|
sub remove_class |
|
6096
|
|
|
|
|
|
|
{ my( $elt, $class_to_remove)= @_; |
|
6097
|
|
|
|
|
|
|
return $elt unless $class_to_remove; |
|
6098
|
|
|
|
|
|
|
my $class= $elt->class; |
|
6099
|
|
|
|
|
|
|
my %class= $class ? map { $_ => 1 } split /\s+/, $class : (); |
|
6100
|
|
|
|
|
|
|
delete $class{$class_to_remove}; |
|
6101
|
|
|
|
|
|
|
$elt->set_class( join( ' ', sort keys %class)); |
|
6102
|
|
|
|
|
|
|
} |
|
6103
|
|
|
|
|
|
|
|
|
6104
|
|
|
|
|
|
|
sub att_to_class { my( $elt, $att)= @_; $elt->set_class( $elt->{'att'}->{$att}); } |
|
6105
|
|
|
|
|
|
|
sub add_att_to_class { my( $elt, $att)= @_; $elt->add_to_class( $elt->{'att'}->{$att}); } |
|
6106
|
|
|
|
|
|
|
sub move_att_to_class { my( $elt, $att)= @_; $elt->add_to_class( $elt->{'att'}->{$att}); |
|
6107
|
|
|
|
|
|
|
$elt->del_att( $att); |
|
6108
|
|
|
|
|
|
|
} |
|
6109
|
|
|
|
|
|
|
sub tag_to_class { my( $elt)= @_; $elt->set_class( $elt->tag); } |
|
6110
|
|
|
|
|
|
|
sub add_tag_to_class { my( $elt)= @_; $elt->add_to_class( $elt->tag); } |
|
6111
|
|
|
|
|
|
|
sub set_tag_class { my( $elt, $new_tag)= @_; $elt->add_tag_to_class; $elt->set_tag( $new_tag); } |
|
6112
|
|
|
|
|
|
|
|
|
6113
|
|
|
|
|
|
|
sub tag_to_span |
|
6114
|
|
|
|
|
|
|
{ my( $elt)= @_; |
|
6115
|
|
|
|
|
|
|
$elt->set_class( $elt->tag) unless( $elt->tag eq 'span' && $elt->class); # set class to span unless it would mean replacing it with span |
|
6116
|
|
|
|
|
|
|
$elt->set_tag( 'span'); |
|
6117
|
|
|
|
|
|
|
} |
|
6118
|
|
|
|
|
|
|
|
|
6119
|
|
|
|
|
|
|
sub tag_to_div |
|
6120
|
|
|
|
|
|
|
{ my( $elt)= @_; |
|
6121
|
|
|
|
|
|
|
$elt->set_class( $elt->tag) unless( $elt->tag eq 'div' && $elt->class); # set class to div unless it would mean replacing it with div |
|
6122
|
|
|
|
|
|
|
$elt->set_tag( 'div'); |
|
6123
|
|
|
|
|
|
|
} |
|
6124
|
|
|
|
|
|
|
|
|
6125
|
|
|
|
|
|
|
sub in_class |
|
6126
|
|
|
|
|
|
|
{ my( $elt, $class)= @_; |
|
6127
|
|
|
|
|
|
|
my $elt_class= $elt->class; |
|
6128
|
|
|
|
|
|
|
return unless( defined $elt_class); |
|
6129
|
|
|
|
|
|
|
return $elt->class=~ m{(?:^|\s)\Q$class\E(?:\s|$)} ? $elt : 0; |
|
6130
|
|
|
|
|
|
|
} |
|
6131
|
|
|
|
|
|
|
|
|
6132
|
|
|
|
|
|
|
|
|
6133
|
|
|
|
|
|
|
# get or set all attributes |
|
6134
|
|
|
|
|
|
|
# argument can be a hash or a hashref |
|
6135
|
|
|
|
|
|
|
sub set_atts |
|
6136
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6137
|
|
|
|
|
|
|
my %atts; |
|
6138
|
|
|
|
|
|
|
tie %atts, 'Tie::IxHash' if( keep_atts_order()); |
|
6139
|
|
|
|
|
|
|
%atts= @_ == 1 ? %{$_[0]} : @_; |
|
6140
|
|
|
|
|
|
|
$elt->{att}= \%atts; |
|
6141
|
|
|
|
|
|
|
if( exists $atts{$ID}) { $elt->_set_id( $atts{$ID}); } |
|
6142
|
|
|
|
|
|
|
return $elt; |
|
6143
|
|
|
|
|
|
|
} |
|
6144
|
|
|
|
|
|
|
|
|
6145
|
|
|
|
|
|
|
sub atts { return $_[0]->{att}; } |
|
6146
|
|
|
|
|
|
|
sub att_names { return (sort keys %{$_[0]->{att}}); } |
|
6147
|
|
|
|
|
|
|
sub del_atts { $_[0]->{att}={}; return $_[0]; } |
|
6148
|
|
|
|
|
|
|
|
|
6149
|
|
|
|
|
|
|
# get or set a single attribute (set works for several atts) |
|
6150
|
|
|
|
|
|
|
sub set_att |
|
6151
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6152
|
|
|
|
|
|
|
|
|
6153
|
|
|
|
|
|
|
if( $_[0] && ref( $_[0]) && !$_[1]) |
|
6154
|
|
|
|
|
|
|
{ croak "improper call to set_att, usage is \$elt->set_att( att1 => 'val1', att2 => 'val2',...)"; } |
|
6155
|
|
|
|
|
|
|
|
|
6156
|
|
|
|
|
|
|
unless( $elt->{att}) |
|
6157
|
|
|
|
|
|
|
{ $elt->{att}={}; |
|
6158
|
|
|
|
|
|
|
tie %{$elt->{att}}, 'Tie::IxHash' if( keep_atts_order()); |
|
6159
|
|
|
|
|
|
|
} |
|
6160
|
|
|
|
|
|
|
|
|
6161
|
|
|
|
|
|
|
while(@_) |
|
6162
|
|
|
|
|
|
|
{ my( $att, $val)= (shift, shift); |
|
6163
|
|
|
|
|
|
|
$elt->{att}->{$att}= $val; |
|
6164
|
|
|
|
|
|
|
if( $att eq $ID) { $elt->_set_id( $val); } |
|
6165
|
|
|
|
|
|
|
} |
|
6166
|
|
|
|
|
|
|
return $elt; |
|
6167
|
|
|
|
|
|
|
} |
|
6168
|
|
|
|
|
|
|
|
|
6169
|
|
|
|
|
|
|
sub att { $_[0]->{att}->{$_[1]}; } |
|
6170
|
|
|
|
|
|
|
# lvalue version of att. separate from class to avoid problem like RT# |
|
6171
|
|
|
|
|
|
|
sub latt |
|
6172
|
|
|
|
|
|
|
:lvalue # > perl 5.5 |
|
6173
|
|
|
|
|
|
|
{ $_[0]->{att}->{$_[1]}; } |
|
6174
|
|
|
|
|
|
|
|
|
6175
|
|
|
|
|
|
|
sub del_att |
|
6176
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6177
|
|
|
|
|
|
|
while( @_) { delete $elt->{'att'}->{shift()}; } |
|
6178
|
|
|
|
|
|
|
return $elt; |
|
6179
|
|
|
|
|
|
|
} |
|
6180
|
|
|
|
|
|
|
|
|
6181
|
|
|
|
|
|
|
sub att_exists { return exists $_[0]->{att}->{$_[1]}; } |
|
6182
|
|
|
|
|
|
|
|
|
6183
|
|
|
|
|
|
|
# delete an attribute from all descendants of an element |
|
6184
|
|
|
|
|
|
|
sub strip_att |
|
6185
|
|
|
|
|
|
|
{ my( $elt, $att)= @_; |
|
6186
|
|
|
|
|
|
|
$_->del_att( $att) foreach ($elt->descendants_or_self( qq{*[\@$att]})); |
|
6187
|
|
|
|
|
|
|
return $elt; |
|
6188
|
|
|
|
|
|
|
} |
|
6189
|
|
|
|
|
|
|
|
|
6190
|
|
|
|
|
|
|
sub change_att_name |
|
6191
|
|
|
|
|
|
|
{ my( $elt, $old_name, $new_name)= @_; |
|
6192
|
|
|
|
|
|
|
my $value= $elt->{'att'}->{$old_name}; |
|
6193
|
|
|
|
|
|
|
return $elt unless( defined $value); |
|
6194
|
|
|
|
|
|
|
$elt->del_att( $old_name) |
|
6195
|
|
|
|
|
|
|
->set_att( $new_name => $value); |
|
6196
|
|
|
|
|
|
|
return $elt; |
|
6197
|
|
|
|
|
|
|
} |
|
6198
|
|
|
|
|
|
|
|
|
6199
|
|
|
|
|
|
|
sub lc_attnames |
|
6200
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6201
|
|
|
|
|
|
|
foreach my $att ($elt->att_names) |
|
6202
|
|
|
|
|
|
|
{ if( $att ne lc $att) { $elt->change_att_name( $att, lc $att); } } |
|
6203
|
|
|
|
|
|
|
return $elt; |
|
6204
|
|
|
|
|
|
|
} |
|
6205
|
|
|
|
|
|
|
|
|
6206
|
|
|
|
|
|
|
sub set_twig_current { $_[0]->{twig_current}=1; } |
|
6207
|
|
|
|
|
|
|
sub del_twig_current { delete $_[0]->{twig_current}; } |
|
6208
|
|
|
|
|
|
|
|
|
6209
|
|
|
|
|
|
|
|
|
6210
|
|
|
|
|
|
|
# get or set the id attribute |
|
6211
|
|
|
|
|
|
|
sub set_id |
|
6212
|
|
|
|
|
|
|
{ my( $elt, $id)= @_; |
|
6213
|
|
|
|
|
|
|
$elt->del_id() if( exists $elt->{att}->{$ID}); |
|
6214
|
|
|
|
|
|
|
$elt->set_att($ID, $id); |
|
6215
|
|
|
|
|
|
|
$elt->_set_id( $id); |
|
6216
|
|
|
|
|
|
|
return $elt; |
|
6217
|
|
|
|
|
|
|
} |
|
6218
|
|
|
|
|
|
|
|
|
6219
|
|
|
|
|
|
|
# only set id, does not update the attribute value |
|
6220
|
|
|
|
|
|
|
sub _set_id |
|
6221
|
|
|
|
|
|
|
{ my( $elt, $id)= @_; |
|
6222
|
|
|
|
|
|
|
my $t= $elt->twig || $elt; |
|
6223
|
|
|
|
|
|
|
$t->{twig_id_list}->{$id}= $elt; |
|
6224
|
|
|
|
|
|
|
if( $XML::Twig::weakrefs) { weaken( $t->{twig_id_list}->{$id}); } |
|
6225
|
|
|
|
|
|
|
return $elt; |
|
6226
|
|
|
|
|
|
|
} |
|
6227
|
|
|
|
|
|
|
|
|
6228
|
|
|
|
|
|
|
sub id { return $_[0]->{att}->{$ID}; } |
|
6229
|
|
|
|
|
|
|
|
|
6230
|
|
|
|
|
|
|
# methods used to add ids to elements that don't have one |
|
6231
|
|
|
|
|
|
|
BEGIN |
|
6232
|
|
|
|
|
|
|
{ my $id_nb = "0001"; |
|
6233
|
|
|
|
|
|
|
my $id_seed = "twig_id_"; |
|
6234
|
|
|
|
|
|
|
|
|
6235
|
|
|
|
|
|
|
sub set_id_seed ## no critic (Subroutines::ProhibitNestedSubs); |
|
6236
|
|
|
|
|
|
|
{ $id_seed= $_[1]; $id_nb=1; } |
|
6237
|
|
|
|
|
|
|
|
|
6238
|
|
|
|
|
|
|
sub add_id ## no critic (Subroutines::ProhibitNestedSubs); |
|
6239
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6240
|
|
|
|
|
|
|
if( defined $elt->{'att'}->{$ID}) |
|
6241
|
|
|
|
|
|
|
{ return $elt->{'att'}->{$ID}; } |
|
6242
|
|
|
|
|
|
|
else |
|
6243
|
|
|
|
|
|
|
{ my $id= $_[0] && ref( $_[0]) && isa( $_[0], 'CODE') ? $_[0]->( $elt) : $id_seed . $id_nb++; |
|
6244
|
|
|
|
|
|
|
$elt->set_id( $id); |
|
6245
|
|
|
|
|
|
|
return $id; |
|
6246
|
|
|
|
|
|
|
} |
|
6247
|
|
|
|
|
|
|
} |
|
6248
|
|
|
|
|
|
|
} |
|
6249
|
|
|
|
|
|
|
|
|
6250
|
|
|
|
|
|
|
|
|
6251
|
|
|
|
|
|
|
|
|
6252
|
|
|
|
|
|
|
# delete the id attribute and remove the element from the id list |
|
6253
|
|
|
|
|
|
|
sub del_id |
|
6254
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6255
|
|
|
|
|
|
|
if( ! exists $elt->{att}->{$ID}) { return $elt }; |
|
6256
|
|
|
|
|
|
|
my $id= $elt->{att}->{$ID}; |
|
6257
|
|
|
|
|
|
|
|
|
6258
|
|
|
|
|
|
|
delete $elt->{att}->{$ID}; |
|
6259
|
|
|
|
|
|
|
|
|
6260
|
|
|
|
|
|
|
my $t= shift || $elt->twig; |
|
6261
|
|
|
|
|
|
|
unless( $t) { return $elt; } |
|
6262
|
|
|
|
|
|
|
if( exists $t->{twig_id_list}->{$id}) { delete $t->{twig_id_list}->{$id}; } |
|
6263
|
|
|
|
|
|
|
|
|
6264
|
|
|
|
|
|
|
return $elt; |
|
6265
|
|
|
|
|
|
|
} |
|
6266
|
|
|
|
|
|
|
|
|
6267
|
|
|
|
|
|
|
# return the list of children |
|
6268
|
|
|
|
|
|
|
sub children |
|
6269
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6270
|
|
|
|
|
|
|
my @children; |
|
6271
|
|
|
|
|
|
|
my $child= $elt->first_child( @_); |
|
6272
|
|
|
|
|
|
|
while( $child) |
|
6273
|
|
|
|
|
|
|
{ push @children, $child; |
|
6274
|
|
|
|
|
|
|
$child= $child->next_sibling( @_); |
|
6275
|
|
|
|
|
|
|
} |
|
6276
|
|
|
|
|
|
|
return @children; |
|
6277
|
|
|
|
|
|
|
} |
|
6278
|
|
|
|
|
|
|
|
|
6279
|
|
|
|
|
|
|
sub _children |
|
6280
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6281
|
|
|
|
|
|
|
my @children=(); |
|
6282
|
|
|
|
|
|
|
my $child= $elt->{first_child}; |
|
6283
|
|
|
|
|
|
|
while( $child) |
|
6284
|
|
|
|
|
|
|
{ push @children, $child; |
|
6285
|
|
|
|
|
|
|
$child= $child->{next_sibling}; |
|
6286
|
|
|
|
|
|
|
} |
|
6287
|
|
|
|
|
|
|
return @children; |
|
6288
|
|
|
|
|
|
|
} |
|
6289
|
|
|
|
|
|
|
|
|
6290
|
|
|
|
|
|
|
sub children_copy |
|
6291
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6292
|
|
|
|
|
|
|
my @children; |
|
6293
|
|
|
|
|
|
|
my $child= $elt->first_child( @_); |
|
6294
|
|
|
|
|
|
|
while( $child) |
|
6295
|
|
|
|
|
|
|
{ push @children, $child->copy; |
|
6296
|
|
|
|
|
|
|
$child= $child->next_sibling( @_); |
|
6297
|
|
|
|
|
|
|
} |
|
6298
|
|
|
|
|
|
|
return @children; |
|
6299
|
|
|
|
|
|
|
} |
|
6300
|
|
|
|
|
|
|
|
|
6301
|
|
|
|
|
|
|
|
|
6302
|
|
|
|
|
|
|
sub children_count |
|
6303
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6304
|
|
|
|
|
|
|
my $cond= shift; |
|
6305
|
|
|
|
|
|
|
my $count=0; |
|
6306
|
|
|
|
|
|
|
my $child= $elt->{first_child}; |
|
6307
|
|
|
|
|
|
|
while( $child) |
|
6308
|
|
|
|
|
|
|
{ $count++ if( $child->passes( $cond)); |
|
6309
|
|
|
|
|
|
|
$child= $child->{next_sibling}; |
|
6310
|
|
|
|
|
|
|
} |
|
6311
|
|
|
|
|
|
|
return $count; |
|
6312
|
|
|
|
|
|
|
} |
|
6313
|
|
|
|
|
|
|
|
|
6314
|
|
|
|
|
|
|
sub children_text |
|
6315
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6316
|
|
|
|
|
|
|
return wantarray() ? map { $_->text} $elt->children( @_) |
|
6317
|
|
|
|
|
|
|
: join( '', map { $_->text} $elt->children( @_) ) |
|
6318
|
|
|
|
|
|
|
; |
|
6319
|
|
|
|
|
|
|
} |
|
6320
|
|
|
|
|
|
|
|
|
6321
|
|
|
|
|
|
|
sub children_trimmed_text |
|
6322
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6323
|
|
|
|
|
|
|
return wantarray() ? map { $_->trimmed_text} $elt->children( @_) |
|
6324
|
|
|
|
|
|
|
: join( '', map { $_->trimmed_text} $elt->children( @_) ) |
|
6325
|
|
|
|
|
|
|
; |
|
6326
|
|
|
|
|
|
|
} |
|
6327
|
|
|
|
|
|
|
|
|
6328
|
|
|
|
|
|
|
sub all_children_are |
|
6329
|
|
|
|
|
|
|
{ my( $parent, $cond)= @_; |
|
6330
|
|
|
|
|
|
|
foreach my $child ($parent->_children) |
|
6331
|
|
|
|
|
|
|
{ return 0 unless( $child->passes( $cond)); } |
|
6332
|
|
|
|
|
|
|
return $parent; |
|
6333
|
|
|
|
|
|
|
} |
|
6334
|
|
|
|
|
|
|
|
|
6335
|
|
|
|
|
|
|
|
|
6336
|
|
|
|
|
|
|
sub ancestors |
|
6337
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
|
6338
|
|
|
|
|
|
|
my @ancestors; |
|
6339
|
|
|
|
|
|
|
while( $elt->{parent}) |
|
6340
|
|
|
|
|
|
|
{ $elt= $elt->{parent}; |
|
6341
|
|
|
|
|
|
|
push @ancestors, $elt if( $elt->passes( $cond)); |
|
6342
|
|
|
|
|
|
|
} |
|
6343
|
|
|
|
|
|
|
return @ancestors; |
|
6344
|
|
|
|
|
|
|
} |
|
6345
|
|
|
|
|
|
|
|
|
6346
|
|
|
|
|
|
|
sub ancestors_or_self |
|
6347
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
|
6348
|
|
|
|
|
|
|
my @ancestors; |
|
6349
|
|
|
|
|
|
|
while( $elt) |
|
6350
|
|
|
|
|
|
|
{ push @ancestors, $elt if( $elt->passes( $cond)); |
|
6351
|
|
|
|
|
|
|
$elt= $elt->{parent}; |
|
6352
|
|
|
|
|
|
|
} |
|
6353
|
|
|
|
|
|
|
return @ancestors; |
|
6354
|
|
|
|
|
|
|
} |
|
6355
|
|
|
|
|
|
|
|
|
6356
|
|
|
|
|
|
|
|
|
6357
|
|
|
|
|
|
|
sub _ancestors |
|
6358
|
|
|
|
|
|
|
{ my( $elt, $include_self)= @_; |
|
6359
|
|
|
|
|
|
|
my @ancestors= $include_self ? ($elt) : (); |
|
6360
|
|
|
|
|
|
|
while( $elt= $elt->{parent}) { push @ancestors, $elt; } |
|
6361
|
|
|
|
|
|
|
return @ancestors; |
|
6362
|
|
|
|
|
|
|
} |
|
6363
|
|
|
|
|
|
|
|
|
6364
|
|
|
|
|
|
|
|
|
6365
|
|
|
|
|
|
|
sub inherit_att |
|
6366
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6367
|
|
|
|
|
|
|
my $att= shift; |
|
6368
|
|
|
|
|
|
|
my %tags= map { ($_, 1) } @_; |
|
6369
|
|
|
|
|
|
|
|
|
6370
|
|
|
|
|
|
|
do |
|
6371
|
|
|
|
|
|
|
{ if( (defined $elt->{'att'}->{$att}) |
|
6372
|
|
|
|
|
|
|
&& ( !%tags || $tags{$XML::Twig::index2gi[$elt->{'gi'}]}) |
|
6373
|
|
|
|
|
|
|
) |
|
6374
|
|
|
|
|
|
|
{ return $elt->{'att'}->{$att}; } |
|
6375
|
|
|
|
|
|
|
} while( $elt= $elt->{parent}); |
|
6376
|
|
|
|
|
|
|
return undef; |
|
6377
|
|
|
|
|
|
|
} |
|
6378
|
|
|
|
|
|
|
|
|
6379
|
|
|
|
|
|
|
sub _inherit_att_through_cut |
|
6380
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6381
|
|
|
|
|
|
|
my $att= shift; |
|
6382
|
|
|
|
|
|
|
my %tags= map { ($_, 1) } @_; |
|
6383
|
|
|
|
|
|
|
|
|
6384
|
|
|
|
|
|
|
do |
|
6385
|
|
|
|
|
|
|
{ if( (defined $elt->{'att'}->{$att}) |
|
6386
|
|
|
|
|
|
|
&& ( !%tags || $tags{$XML::Twig::index2gi[$elt->{'gi'}]}) |
|
6387
|
|
|
|
|
|
|
) |
|
6388
|
|
|
|
|
|
|
{ return $elt->{'att'}->{$att}; } |
|
6389
|
|
|
|
|
|
|
} while( $elt= $elt->{parent} || ($elt->{former} && $elt->{former}->{parent})); |
|
6390
|
|
|
|
|
|
|
return undef; |
|
6391
|
|
|
|
|
|
|
} |
|
6392
|
|
|
|
|
|
|
|
|
6393
|
|
|
|
|
|
|
|
|
6394
|
|
|
|
|
|
|
sub current_ns_prefixes |
|
6395
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6396
|
|
|
|
|
|
|
my %prefix; |
|
6397
|
|
|
|
|
|
|
$prefix{''}=1 if( $elt->namespace( '')); |
|
6398
|
|
|
|
|
|
|
while( $elt) |
|
6399
|
|
|
|
|
|
|
{ my @ns= grep { !m{^xml} } map { m{^([^:]+):} } ($XML::Twig::index2gi[$elt->{'gi'}], $elt->att_names); |
|
6400
|
|
|
|
|
|
|
$prefix{$_}=1 foreach (@ns); |
|
6401
|
|
|
|
|
|
|
$elt= $elt->{parent}; |
|
6402
|
|
|
|
|
|
|
} |
|
6403
|
|
|
|
|
|
|
|
|
6404
|
|
|
|
|
|
|
return (sort keys %prefix); |
|
6405
|
|
|
|
|
|
|
} |
|
6406
|
|
|
|
|
|
|
|
|
6407
|
|
|
|
|
|
|
# kinda counter-intuitive actually: |
|
6408
|
|
|
|
|
|
|
# the next element is found by looking for the next open tag after from the |
|
6409
|
|
|
|
|
|
|
# current one, which is the first child, if it exists, or the next sibling |
|
6410
|
|
|
|
|
|
|
# or the first next sibling of an ancestor |
|
6411
|
|
|
|
|
|
|
# optional arguments are: |
|
6412
|
|
|
|
|
|
|
# - $subtree_root: a reference to an element, when the next element is not |
|
6413
|
|
|
|
|
|
|
# within $subtree_root anymore then next_elt returns undef |
|
6414
|
|
|
|
|
|
|
# - $cond: a condition, next_elt returns the next element matching the condition |
|
6415
|
|
|
|
|
|
|
|
|
6416
|
|
|
|
|
|
|
sub next_elt |
|
6417
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6418
|
|
|
|
|
|
|
my $subtree_root= 0; |
|
6419
|
|
|
|
|
|
|
$subtree_root= shift if( ref( $_[0]) && isa( $_[0], 'XML::Twig::Elt')); |
|
6420
|
|
|
|
|
|
|
my $cond= shift; |
|
6421
|
|
|
|
|
|
|
my $next_elt; |
|
6422
|
|
|
|
|
|
|
|
|
6423
|
|
|
|
|
|
|
my $ind; # optimization |
|
6424
|
|
|
|
|
|
|
my $test_cond; |
|
6425
|
|
|
|
|
|
|
if( $cond) # optimization |
|
6426
|
|
|
|
|
|
|
{ unless( defined( $ind= $XML::Twig::gi2index{$cond}) ) # optimization |
|
6427
|
|
|
|
|
|
|
{ $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); } # optimization |
|
6428
|
|
|
|
|
|
|
} # optimization |
|
6429
|
|
|
|
|
|
|
|
|
6430
|
|
|
|
|
|
|
do |
|
6431
|
|
|
|
|
|
|
{ if( $next_elt= $elt->{first_child}) |
|
6432
|
|
|
|
|
|
|
{ # simplest case: the elt has a child |
|
6433
|
|
|
|
|
|
|
} |
|
6434
|
|
|
|
|
|
|
elsif( $next_elt= $elt->{next_sibling}) |
|
6435
|
|
|
|
|
|
|
{ # no child but a next sibling (just check we stay within the subtree) |
|
6436
|
|
|
|
|
|
|
|
|
6437
|
|
|
|
|
|
|
# case where elt is subtree_root, is empty and has a sibling |
|
6438
|
|
|
|
|
|
|
return undef if( $subtree_root && ($elt == $subtree_root)); |
|
6439
|
|
|
|
|
|
|
|
|
6440
|
|
|
|
|
|
|
} |
|
6441
|
|
|
|
|
|
|
else |
|
6442
|
|
|
|
|
|
|
{ # case where the element has no child and no next sibling: |
|
6443
|
|
|
|
|
|
|
# get the first next sibling of an ancestor, checking subtree_root |
|
6444
|
|
|
|
|
|
|
|
|
6445
|
|
|
|
|
|
|
# case where elt is subtree_root, is empty and has no sibling |
|
6446
|
|
|
|
|
|
|
return undef if( $subtree_root && ($elt == $subtree_root)); |
|
6447
|
|
|
|
|
|
|
|
|
6448
|
|
|
|
|
|
|
$next_elt= $elt->{parent} || return undef; |
|
6449
|
|
|
|
|
|
|
|
|
6450
|
|
|
|
|
|
|
until( $next_elt->{next_sibling}) |
|
6451
|
|
|
|
|
|
|
{ return undef if( $subtree_root && ($subtree_root == $next_elt)); |
|
6452
|
|
|
|
|
|
|
$next_elt= $next_elt->{parent} || return undef; |
|
6453
|
|
|
|
|
|
|
} |
|
6454
|
|
|
|
|
|
|
return undef if( $subtree_root && ($subtree_root == $next_elt)); |
|
6455
|
|
|
|
|
|
|
$next_elt= $next_elt->{next_sibling}; |
|
6456
|
|
|
|
|
|
|
} |
|
6457
|
|
|
|
|
|
|
$elt= $next_elt; # just in case we need to loop |
|
6458
|
|
|
|
|
|
|
} until( ! defined $elt |
|
6459
|
|
|
|
|
|
|
|| ! defined $cond |
|
6460
|
|
|
|
|
|
|
|| (defined $ind && ($elt->{gi} eq $ind)) # optimization |
|
6461
|
|
|
|
|
|
|
|| (defined $test_cond && ($test_cond->( $elt))) |
|
6462
|
|
|
|
|
|
|
); |
|
6463
|
|
|
|
|
|
|
|
|
6464
|
|
|
|
|
|
|
return $elt; |
|
6465
|
|
|
|
|
|
|
} |
|
6466
|
|
|
|
|
|
|
|
|
6467
|
|
|
|
|
|
|
# return the next_elt within the element |
|
6468
|
|
|
|
|
|
|
# just call next_elt with the element as first and second argument |
|
6469
|
|
|
|
|
|
|
sub first_descendant { return $_[0]->next_elt( @_); } |
|
6470
|
|
|
|
|
|
|
|
|
6471
|
|
|
|
|
|
|
# get the last descendant, # then return the element found or call prev_elt with the condition |
|
6472
|
|
|
|
|
|
|
sub last_descendant |
|
6473
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
|
6474
|
|
|
|
|
|
|
my $last_descendant= $elt->_last_descendant; |
|
6475
|
|
|
|
|
|
|
if( !$cond || $last_descendant->matches( $cond)) |
|
6476
|
|
|
|
|
|
|
{ return $last_descendant; } |
|
6477
|
|
|
|
|
|
|
else |
|
6478
|
|
|
|
|
|
|
{ return $last_descendant->prev_elt( $elt, $cond); } |
|
6479
|
|
|
|
|
|
|
} |
|
6480
|
|
|
|
|
|
|
|
|
6481
|
|
|
|
|
|
|
# no argument allowed here, just go down the last_child recursively |
|
6482
|
|
|
|
|
|
|
sub _last_descendant |
|
6483
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6484
|
|
|
|
|
|
|
while( my $child= $elt->{last_child}) { $elt= $child; } |
|
6485
|
|
|
|
|
|
|
return $elt; |
|
6486
|
|
|
|
|
|
|
} |
|
6487
|
|
|
|
|
|
|
|
|
6488
|
|
|
|
|
|
|
# counter-intuitive too: |
|
6489
|
|
|
|
|
|
|
# the previous element is found by looking |
|
6490
|
|
|
|
|
|
|
# for the first open tag backwards from the current one |
|
6491
|
|
|
|
|
|
|
# it's the last descendant of the previous sibling |
|
6492
|
|
|
|
|
|
|
# if it exists, otherwise it's simply the parent |
|
6493
|
|
|
|
|
|
|
sub prev_elt |
|
6494
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6495
|
|
|
|
|
|
|
my $subtree_root= 0; |
|
6496
|
|
|
|
|
|
|
if( defined $_[0] and (ref( $_[0]) && isa( $_[0], 'XML::Twig::Elt'))) |
|
6497
|
|
|
|
|
|
|
{ $subtree_root= shift ; |
|
6498
|
|
|
|
|
|
|
return undef if( $elt == $subtree_root); |
|
6499
|
|
|
|
|
|
|
} |
|
6500
|
|
|
|
|
|
|
my $cond= shift; |
|
6501
|
|
|
|
|
|
|
# get prev elt |
|
6502
|
|
|
|
|
|
|
my $prev_elt; |
|
6503
|
|
|
|
|
|
|
do |
|
6504
|
|
|
|
|
|
|
{ return undef if( $elt == $subtree_root); |
|
6505
|
|
|
|
|
|
|
if( $prev_elt= $elt->{prev_sibling}) |
|
6506
|
|
|
|
|
|
|
{ while( $prev_elt->{last_child}) |
|
6507
|
|
|
|
|
|
|
{ $prev_elt= $prev_elt->{last_child}; } |
|
6508
|
|
|
|
|
|
|
} |
|
6509
|
|
|
|
|
|
|
else |
|
6510
|
|
|
|
|
|
|
{ $prev_elt= $elt->{parent} || return undef; } |
|
6511
|
|
|
|
|
|
|
$elt= $prev_elt; # in case we need to loop |
|
6512
|
|
|
|
|
|
|
} until( $elt->passes( $cond)); |
|
6513
|
|
|
|
|
|
|
|
|
6514
|
|
|
|
|
|
|
return $elt; |
|
6515
|
|
|
|
|
|
|
} |
|
6516
|
|
|
|
|
|
|
|
|
6517
|
|
|
|
|
|
|
sub _following_elt |
|
6518
|
|
|
|
|
|
|
{ my( $elt)= @_; |
|
6519
|
|
|
|
|
|
|
while( $elt && !$elt->{next_sibling}) |
|
6520
|
|
|
|
|
|
|
{ $elt= $elt->{parent}; } |
|
6521
|
|
|
|
|
|
|
return $elt ? $elt->{next_sibling} : undef; |
|
6522
|
|
|
|
|
|
|
} |
|
6523
|
|
|
|
|
|
|
|
|
6524
|
|
|
|
|
|
|
sub following_elt |
|
6525
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
|
6526
|
|
|
|
|
|
|
$elt= $elt->_following_elt || return undef; |
|
6527
|
|
|
|
|
|
|
return $elt if( !$cond || $elt->matches( $cond)); |
|
6528
|
|
|
|
|
|
|
return $elt->next_elt( $cond); |
|
6529
|
|
|
|
|
|
|
} |
|
6530
|
|
|
|
|
|
|
|
|
6531
|
|
|
|
|
|
|
sub following_elts |
|
6532
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
|
6533
|
|
|
|
|
|
|
if( !$cond) { undef $cond; } |
|
6534
|
|
|
|
|
|
|
my $following= $elt->following_elt( $cond); |
|
6535
|
|
|
|
|
|
|
if( $following) |
|
6536
|
|
|
|
|
|
|
{ my @followings= $following; |
|
6537
|
|
|
|
|
|
|
while( $following= $following->next_elt( $cond)) |
|
6538
|
|
|
|
|
|
|
{ push @followings, $following; } |
|
6539
|
|
|
|
|
|
|
return( @followings); |
|
6540
|
|
|
|
|
|
|
} |
|
6541
|
|
|
|
|
|
|
else |
|
6542
|
|
|
|
|
|
|
{ return (); } |
|
6543
|
|
|
|
|
|
|
} |
|
6544
|
|
|
|
|
|
|
|
|
6545
|
|
|
|
|
|
|
sub _preceding_elt |
|
6546
|
|
|
|
|
|
|
{ my( $elt)= @_; |
|
6547
|
|
|
|
|
|
|
while( $elt && !$elt->{prev_sibling}) |
|
6548
|
|
|
|
|
|
|
{ $elt= $elt->{parent}; } |
|
6549
|
|
|
|
|
|
|
return $elt ? $elt->{prev_sibling}->_last_descendant : undef; |
|
6550
|
|
|
|
|
|
|
} |
|
6551
|
|
|
|
|
|
|
|
|
6552
|
|
|
|
|
|
|
sub preceding_elt |
|
6553
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
|
6554
|
|
|
|
|
|
|
$elt= $elt->_preceding_elt || return undef; |
|
6555
|
|
|
|
|
|
|
return $elt if( !$cond || $elt->matches( $cond)); |
|
6556
|
|
|
|
|
|
|
return $elt->prev_elt( $cond); |
|
6557
|
|
|
|
|
|
|
} |
|
6558
|
|
|
|
|
|
|
|
|
6559
|
|
|
|
|
|
|
sub preceding_elts |
|
6560
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
|
6561
|
|
|
|
|
|
|
if( !$cond) { undef $cond; } |
|
6562
|
|
|
|
|
|
|
my $preceding= $elt->preceding_elt( $cond); |
|
6563
|
|
|
|
|
|
|
if( $preceding) |
|
6564
|
|
|
|
|
|
|
{ my @precedings= $preceding; |
|
6565
|
|
|
|
|
|
|
while( $preceding= $preceding->prev_elt( $cond)) |
|
6566
|
|
|
|
|
|
|
{ push @precedings, $preceding; } |
|
6567
|
|
|
|
|
|
|
return( @precedings); |
|
6568
|
|
|
|
|
|
|
} |
|
6569
|
|
|
|
|
|
|
else |
|
6570
|
|
|
|
|
|
|
{ return (); } |
|
6571
|
|
|
|
|
|
|
} |
|
6572
|
|
|
|
|
|
|
|
|
6573
|
|
|
|
|
|
|
# used in get_xpath |
|
6574
|
|
|
|
|
|
|
sub _self |
|
6575
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
|
6576
|
|
|
|
|
|
|
return $cond ? $elt->matches( $cond) : $elt; |
|
6577
|
|
|
|
|
|
|
} |
|
6578
|
|
|
|
|
|
|
|
|
6579
|
|
|
|
|
|
|
sub next_n_elt |
|
6580
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6581
|
|
|
|
|
|
|
my $offset= shift || return undef; |
|
6582
|
|
|
|
|
|
|
foreach (1..$offset) |
|
6583
|
|
|
|
|
|
|
{ $elt= $elt->next_elt( @_) || return undef; } |
|
6584
|
|
|
|
|
|
|
return $elt; |
|
6585
|
|
|
|
|
|
|
} |
|
6586
|
|
|
|
|
|
|
|
|
6587
|
|
|
|
|
|
|
# checks whether $elt is included in $ancestor, returns 1 in that case |
|
6588
|
|
|
|
|
|
|
sub in |
|
6589
|
|
|
|
|
|
|
{ my ($elt, $ancestor)= @_; |
|
6590
|
|
|
|
|
|
|
if( ref( $ancestor) && isa( $ancestor, 'XML::Twig::Elt')) |
|
6591
|
|
|
|
|
|
|
{ # element |
|
6592
|
|
|
|
|
|
|
while( $elt= $elt->{parent}) { return $elt if( $elt == $ancestor); } |
|
6593
|
|
|
|
|
|
|
} |
|
6594
|
|
|
|
|
|
|
else |
|
6595
|
|
|
|
|
|
|
{ # condition |
|
6596
|
|
|
|
|
|
|
while( $elt= $elt->{parent}) { return $elt if( $elt->matches( $ancestor)); } |
|
6597
|
|
|
|
|
|
|
} |
|
6598
|
|
|
|
|
|
|
return 0; |
|
6599
|
|
|
|
|
|
|
} |
|
6600
|
|
|
|
|
|
|
|
|
6601
|
|
|
|
|
|
|
sub first_child_text |
|
6602
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6603
|
|
|
|
|
|
|
my $dest=$elt->first_child(@_) or return ''; |
|
6604
|
|
|
|
|
|
|
return $dest->text; |
|
6605
|
|
|
|
|
|
|
} |
|
6606
|
|
|
|
|
|
|
|
|
6607
|
|
|
|
|
|
|
sub fields |
|
6608
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6609
|
|
|
|
|
|
|
return map { $elt->field( $_) } @_; |
|
6610
|
|
|
|
|
|
|
} |
|
6611
|
|
|
|
|
|
|
|
|
6612
|
|
|
|
|
|
|
sub first_child_trimmed_text |
|
6613
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6614
|
|
|
|
|
|
|
my $dest=$elt->first_child(@_) or return ''; |
|
6615
|
|
|
|
|
|
|
return $dest->trimmed_text; |
|
6616
|
|
|
|
|
|
|
} |
|
6617
|
|
|
|
|
|
|
|
|
6618
|
|
|
|
|
|
|
sub first_child_matches |
|
6619
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6620
|
|
|
|
|
|
|
my $dest= $elt->{first_child} or return undef; |
|
6621
|
|
|
|
|
|
|
return $dest->passes( @_); |
|
6622
|
|
|
|
|
|
|
} |
|
6623
|
|
|
|
|
|
|
|
|
6624
|
|
|
|
|
|
|
sub last_child_text |
|
6625
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6626
|
|
|
|
|
|
|
my $dest=$elt->last_child(@_) or return ''; |
|
6627
|
|
|
|
|
|
|
return $dest->text; |
|
6628
|
|
|
|
|
|
|
} |
|
6629
|
|
|
|
|
|
|
|
|
6630
|
|
|
|
|
|
|
sub last_child_trimmed_text |
|
6631
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6632
|
|
|
|
|
|
|
my $dest=$elt->last_child(@_) or return ''; |
|
6633
|
|
|
|
|
|
|
return $dest->trimmed_text; |
|
6634
|
|
|
|
|
|
|
} |
|
6635
|
|
|
|
|
|
|
|
|
6636
|
|
|
|
|
|
|
sub last_child_matches |
|
6637
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6638
|
|
|
|
|
|
|
my $dest= $elt->{last_child} or return undef; |
|
6639
|
|
|
|
|
|
|
return $dest->passes( @_); |
|
6640
|
|
|
|
|
|
|
} |
|
6641
|
|
|
|
|
|
|
|
|
6642
|
|
|
|
|
|
|
sub child_text |
|
6643
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6644
|
|
|
|
|
|
|
my $dest=$elt->child(@_) or return ''; |
|
6645
|
|
|
|
|
|
|
return $dest->text; |
|
6646
|
|
|
|
|
|
|
} |
|
6647
|
|
|
|
|
|
|
|
|
6648
|
|
|
|
|
|
|
sub child_trimmed_text |
|
6649
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6650
|
|
|
|
|
|
|
my $dest=$elt->child(@_) or return ''; |
|
6651
|
|
|
|
|
|
|
return $dest->trimmed_text; |
|
6652
|
|
|
|
|
|
|
} |
|
6653
|
|
|
|
|
|
|
|
|
6654
|
|
|
|
|
|
|
sub child_matches |
|
6655
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6656
|
|
|
|
|
|
|
my $nb= shift; |
|
6657
|
|
|
|
|
|
|
my $dest= $elt->child( $nb) or return undef; |
|
6658
|
|
|
|
|
|
|
return $dest->passes( @_); |
|
6659
|
|
|
|
|
|
|
} |
|
6660
|
|
|
|
|
|
|
|
|
6661
|
|
|
|
|
|
|
sub prev_sibling_text |
|
6662
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6663
|
|
|
|
|
|
|
my $dest= $elt->_prev_sibling(@_) or return ''; |
|
6664
|
|
|
|
|
|
|
return $dest->text; |
|
6665
|
|
|
|
|
|
|
} |
|
6666
|
|
|
|
|
|
|
|
|
6667
|
|
|
|
|
|
|
sub prev_sibling_trimmed_text |
|
6668
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6669
|
|
|
|
|
|
|
my $dest= $elt->_prev_sibling(@_) or return ''; |
|
6670
|
|
|
|
|
|
|
return $dest->trimmed_text; |
|
6671
|
|
|
|
|
|
|
} |
|
6672
|
|
|
|
|
|
|
|
|
6673
|
|
|
|
|
|
|
sub prev_sibling_matches |
|
6674
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6675
|
|
|
|
|
|
|
my $dest= $elt->{prev_sibling} or return undef; |
|
6676
|
|
|
|
|
|
|
return $dest->passes( @_); |
|
6677
|
|
|
|
|
|
|
} |
|
6678
|
|
|
|
|
|
|
|
|
6679
|
|
|
|
|
|
|
sub next_sibling_text |
|
6680
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6681
|
|
|
|
|
|
|
my $dest= $elt->next_sibling(@_) or return ''; |
|
6682
|
|
|
|
|
|
|
return $dest->text; |
|
6683
|
|
|
|
|
|
|
} |
|
6684
|
|
|
|
|
|
|
|
|
6685
|
|
|
|
|
|
|
sub next_sibling_trimmed_text |
|
6686
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6687
|
|
|
|
|
|
|
my $dest= $elt->next_sibling(@_) or return ''; |
|
6688
|
|
|
|
|
|
|
return $dest->trimmed_text; |
|
6689
|
|
|
|
|
|
|
} |
|
6690
|
|
|
|
|
|
|
|
|
6691
|
|
|
|
|
|
|
sub next_sibling_matches |
|
6692
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6693
|
|
|
|
|
|
|
my $dest= $elt->{next_sibling} or return undef; |
|
6694
|
|
|
|
|
|
|
return $dest->passes( @_); |
|
6695
|
|
|
|
|
|
|
} |
|
6696
|
|
|
|
|
|
|
|
|
6697
|
|
|
|
|
|
|
sub prev_elt_text |
|
6698
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6699
|
|
|
|
|
|
|
my $dest= $elt->prev_elt(@_) or return ''; |
|
6700
|
|
|
|
|
|
|
return $dest->text; |
|
6701
|
|
|
|
|
|
|
} |
|
6702
|
|
|
|
|
|
|
|
|
6703
|
|
|
|
|
|
|
sub prev_elt_trimmed_text |
|
6704
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6705
|
|
|
|
|
|
|
my $dest= $elt->prev_elt(@_) or return ''; |
|
6706
|
|
|
|
|
|
|
return $dest->trimmed_text; |
|
6707
|
|
|
|
|
|
|
} |
|
6708
|
|
|
|
|
|
|
|
|
6709
|
|
|
|
|
|
|
sub prev_elt_matches |
|
6710
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6711
|
|
|
|
|
|
|
my $dest= $elt->prev_elt or return undef; |
|
6712
|
|
|
|
|
|
|
return $dest->passes( @_); |
|
6713
|
|
|
|
|
|
|
} |
|
6714
|
|
|
|
|
|
|
|
|
6715
|
|
|
|
|
|
|
sub next_elt_text |
|
6716
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6717
|
|
|
|
|
|
|
my $dest= $elt->next_elt(@_) or return ''; |
|
6718
|
|
|
|
|
|
|
return $dest->text; |
|
6719
|
|
|
|
|
|
|
} |
|
6720
|
|
|
|
|
|
|
|
|
6721
|
|
|
|
|
|
|
sub next_elt_trimmed_text |
|
6722
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6723
|
|
|
|
|
|
|
my $dest= $elt->next_elt(@_) or return ''; |
|
6724
|
|
|
|
|
|
|
return $dest->trimmed_text; |
|
6725
|
|
|
|
|
|
|
} |
|
6726
|
|
|
|
|
|
|
|
|
6727
|
|
|
|
|
|
|
sub next_elt_matches |
|
6728
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6729
|
|
|
|
|
|
|
my $dest= $elt->next_elt or return undef; |
|
6730
|
|
|
|
|
|
|
return $dest->passes( @_); |
|
6731
|
|
|
|
|
|
|
} |
|
6732
|
|
|
|
|
|
|
|
|
6733
|
|
|
|
|
|
|
sub parent_text |
|
6734
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6735
|
|
|
|
|
|
|
my $dest= $elt->parent(@_) or return ''; |
|
6736
|
|
|
|
|
|
|
return $dest->text; |
|
6737
|
|
|
|
|
|
|
} |
|
6738
|
|
|
|
|
|
|
|
|
6739
|
|
|
|
|
|
|
sub parent_trimmed_text |
|
6740
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6741
|
|
|
|
|
|
|
my $dest= $elt->parent(@_) or return ''; |
|
6742
|
|
|
|
|
|
|
return $dest->trimmed_text; |
|
6743
|
|
|
|
|
|
|
} |
|
6744
|
|
|
|
|
|
|
|
|
6745
|
|
|
|
|
|
|
sub parent_matches |
|
6746
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6747
|
|
|
|
|
|
|
my $dest= $elt->{parent} or return undef; |
|
6748
|
|
|
|
|
|
|
return $dest->passes( @_); |
|
6749
|
|
|
|
|
|
|
} |
|
6750
|
|
|
|
|
|
|
|
|
6751
|
|
|
|
|
|
|
sub is_first_child |
|
6752
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6753
|
|
|
|
|
|
|
my $parent= $elt->{parent} or return 0; |
|
6754
|
|
|
|
|
|
|
my $first_child= $parent->first_child( @_) or return 0; |
|
6755
|
|
|
|
|
|
|
return ($first_child == $elt) ? $elt : 0; |
|
6756
|
|
|
|
|
|
|
} |
|
6757
|
|
|
|
|
|
|
|
|
6758
|
|
|
|
|
|
|
sub is_last_child |
|
6759
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6760
|
|
|
|
|
|
|
my $parent= $elt->{parent} or return 0; |
|
6761
|
|
|
|
|
|
|
my $last_child= $parent->last_child( @_) or return 0; |
|
6762
|
|
|
|
|
|
|
return ($last_child == $elt) ? $elt : 0; |
|
6763
|
|
|
|
|
|
|
} |
|
6764
|
|
|
|
|
|
|
|
|
6765
|
|
|
|
|
|
|
# returns the depth level of the element |
|
6766
|
|
|
|
|
|
|
# if 2 parameter are used then counts the 2cd element name in the |
|
6767
|
|
|
|
|
|
|
# ancestors list |
|
6768
|
|
|
|
|
|
|
sub level |
|
6769
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
|
6770
|
|
|
|
|
|
|
my $level=0; |
|
6771
|
|
|
|
|
|
|
my $name=shift || ''; |
|
6772
|
|
|
|
|
|
|
while( $elt= $elt->{parent}) { $level++ if( !$cond || $elt->matches( $cond)); } |
|
6773
|
|
|
|
|
|
|
return $level; |
|
6774
|
|
|
|
|
|
|
} |
|
6775
|
|
|
|
|
|
|
|
|
6776
|
|
|
|
|
|
|
# checks whether $elt has an ancestor that satisfies $cond, returns the ancestor |
|
6777
|
|
|
|
|
|
|
sub in_context |
|
6778
|
|
|
|
|
|
|
{ my ($elt, $cond, $level)= @_; |
|
6779
|
|
|
|
|
|
|
$level= -1 unless( $level) ; # $level-- will never hit 0 |
|
6780
|
|
|
|
|
|
|
|
|
6781
|
|
|
|
|
|
|
while( $level) |
|
6782
|
|
|
|
|
|
|
{ $elt= $elt->{parent} or return 0; |
|
6783
|
|
|
|
|
|
|
if( $elt->matches( $cond)) { return $elt; } |
|
6784
|
|
|
|
|
|
|
$level--; |
|
6785
|
|
|
|
|
|
|
} |
|
6786
|
|
|
|
|
|
|
return 0; |
|
6787
|
|
|
|
|
|
|
} |
|
6788
|
|
|
|
|
|
|
|
|
6789
|
|
|
|
|
|
|
sub _descendants |
|
6790
|
|
|
|
|
|
|
{ my( $subtree_root, $include_self)= @_; |
|
6791
|
|
|
|
|
|
|
my @descendants= $include_self ? ($subtree_root) : (); |
|
6792
|
|
|
|
|
|
|
|
|
6793
|
|
|
|
|
|
|
my $elt= $subtree_root; |
|
6794
|
|
|
|
|
|
|
my $next_elt; |
|
6795
|
|
|
|
|
|
|
|
|
6796
|
|
|
|
|
|
|
MAIN: while( 1) |
|
6797
|
|
|
|
|
|
|
{ if( $next_elt= $elt->{first_child}) |
|
6798
|
|
|
|
|
|
|
{ # simplest case: the elt has a child |
|
6799
|
|
|
|
|
|
|
} |
|
6800
|
|
|
|
|
|
|
elsif( $next_elt= $elt->{next_sibling}) |
|
6801
|
|
|
|
|
|
|
{ # no child but a next sibling (just check we stay within the subtree) |
|
6802
|
|
|
|
|
|
|
|
|
6803
|
|
|
|
|
|
|
# case where elt is subtree_root, is empty and has a sibling |
|
6804
|
|
|
|
|
|
|
last MAIN if( $elt == $subtree_root); |
|
6805
|
|
|
|
|
|
|
} |
|
6806
|
|
|
|
|
|
|
else |
|
6807
|
|
|
|
|
|
|
{ # case where the element has no child and no next sibling: |
|
6808
|
|
|
|
|
|
|
# get the first next sibling of an ancestor, checking subtree_root |
|
6809
|
|
|
|
|
|
|
|
|
6810
|
|
|
|
|
|
|
# case where elt is subtree_root, is empty and has no sibling |
|
6811
|
|
|
|
|
|
|
last MAIN if( $elt == $subtree_root); |
|
6812
|
|
|
|
|
|
|
|
|
6813
|
|
|
|
|
|
|
# backtrack until we find a parent with a next sibling |
|
6814
|
|
|
|
|
|
|
$next_elt= $elt->{parent} || last; |
|
6815
|
|
|
|
|
|
|
until( $next_elt->{next_sibling}) |
|
6816
|
|
|
|
|
|
|
{ last MAIN if( $subtree_root == $next_elt); |
|
6817
|
|
|
|
|
|
|
$next_elt= $next_elt->{parent} || last MAIN; |
|
6818
|
|
|
|
|
|
|
} |
|
6819
|
|
|
|
|
|
|
last MAIN if( $subtree_root == $next_elt); |
|
6820
|
|
|
|
|
|
|
$next_elt= $next_elt->{next_sibling}; |
|
6821
|
|
|
|
|
|
|
} |
|
6822
|
|
|
|
|
|
|
$elt= $next_elt || last MAIN; |
|
6823
|
|
|
|
|
|
|
push @descendants, $elt; |
|
6824
|
|
|
|
|
|
|
} |
|
6825
|
|
|
|
|
|
|
return @descendants; |
|
6826
|
|
|
|
|
|
|
} |
|
6827
|
|
|
|
|
|
|
|
|
6828
|
|
|
|
|
|
|
|
|
6829
|
|
|
|
|
|
|
sub descendants |
|
6830
|
|
|
|
|
|
|
{ my( $subtree_root, $cond)= @_; |
|
6831
|
|
|
|
|
|
|
my @descendants=(); |
|
6832
|
|
|
|
|
|
|
my $elt= $subtree_root; |
|
6833
|
|
|
|
|
|
|
|
|
6834
|
|
|
|
|
|
|
# this branch is pure optimization for speed: if $cond is a gi replace it |
|
6835
|
|
|
|
|
|
|
# by the index of the gi and loop here |
|
6836
|
|
|
|
|
|
|
# start optimization |
|
6837
|
|
|
|
|
|
|
my $ind; |
|
6838
|
|
|
|
|
|
|
if( !$cond || ( defined ( $ind= $XML::Twig::gi2index{$cond})) ) |
|
6839
|
|
|
|
|
|
|
{ |
|
6840
|
|
|
|
|
|
|
my $next_elt; |
|
6841
|
|
|
|
|
|
|
|
|
6842
|
|
|
|
|
|
|
while( 1) |
|
6843
|
|
|
|
|
|
|
{ if( $next_elt= $elt->{first_child}) |
|
6844
|
|
|
|
|
|
|
{ # simplest case: the elt has a child |
|
6845
|
|
|
|
|
|
|
} |
|
6846
|
|
|
|
|
|
|
elsif( $next_elt= $elt->{next_sibling}) |
|
6847
|
|
|
|
|
|
|
{ # no child but a next sibling (just check we stay within the subtree) |
|
6848
|
|
|
|
|
|
|
|
|
6849
|
|
|
|
|
|
|
# case where elt is subtree_root, is empty and has a sibling |
|
6850
|
|
|
|
|
|
|
last if( $subtree_root && ($elt == $subtree_root)); |
|
6851
|
|
|
|
|
|
|
} |
|
6852
|
|
|
|
|
|
|
else |
|
6853
|
|
|
|
|
|
|
{ # case where the element has no child and no next sibling: |
|
6854
|
|
|
|
|
|
|
# get the first next sibling of an ancestor, checking subtree_root |
|
6855
|
|
|
|
|
|
|
|
|
6856
|
|
|
|
|
|
|
# case where elt is subtree_root, is empty and has no sibling |
|
6857
|
|
|
|
|
|
|
last if( $subtree_root && ($elt == $subtree_root)); |
|
6858
|
|
|
|
|
|
|
|
|
6859
|
|
|
|
|
|
|
# backtrack until we find a parent with a next sibling |
|
6860
|
|
|
|
|
|
|
$next_elt= $elt->{parent} || last undef; |
|
6861
|
|
|
|
|
|
|
until( $next_elt->{next_sibling}) |
|
6862
|
|
|
|
|
|
|
{ last if( $subtree_root && ($subtree_root == $next_elt)); |
|
6863
|
|
|
|
|
|
|
$next_elt= $next_elt->{parent} || last; |
|
6864
|
|
|
|
|
|
|
} |
|
6865
|
|
|
|
|
|
|
last if( $subtree_root && ($subtree_root == $next_elt)); |
|
6866
|
|
|
|
|
|
|
$next_elt= $next_elt->{next_sibling}; |
|
6867
|
|
|
|
|
|
|
} |
|
6868
|
|
|
|
|
|
|
$elt= $next_elt || last; |
|
6869
|
|
|
|
|
|
|
push @descendants, $elt if( !$cond || ($elt->{gi} eq $ind)); |
|
6870
|
|
|
|
|
|
|
} |
|
6871
|
|
|
|
|
|
|
} |
|
6872
|
|
|
|
|
|
|
else |
|
6873
|
|
|
|
|
|
|
# end optimization |
|
6874
|
|
|
|
|
|
|
{ # branch for a complex condition: use the regular (slow but simple) way |
|
6875
|
|
|
|
|
|
|
while( $elt= $elt->next_elt( $subtree_root, $cond)) |
|
6876
|
|
|
|
|
|
|
{ push @descendants, $elt; } |
|
6877
|
|
|
|
|
|
|
} |
|
6878
|
|
|
|
|
|
|
return @descendants; |
|
6879
|
|
|
|
|
|
|
} |
|
6880
|
|
|
|
|
|
|
|
|
6881
|
|
|
|
|
|
|
|
|
6882
|
|
|
|
|
|
|
sub descendants_or_self |
|
6883
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
|
6884
|
|
|
|
|
|
|
my @descendants= $elt->passes( $cond) ? ($elt) : (); |
|
6885
|
|
|
|
|
|
|
push @descendants, $elt->descendants( $cond); |
|
6886
|
|
|
|
|
|
|
return @descendants; |
|
6887
|
|
|
|
|
|
|
} |
|
6888
|
|
|
|
|
|
|
|
|
6889
|
|
|
|
|
|
|
sub sibling |
|
6890
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6891
|
|
|
|
|
|
|
my $nb= shift; |
|
6892
|
|
|
|
|
|
|
if( $nb > 0) |
|
6893
|
|
|
|
|
|
|
{ foreach( 1..$nb) |
|
6894
|
|
|
|
|
|
|
{ $elt= $elt->next_sibling( @_) or return undef; } |
|
6895
|
|
|
|
|
|
|
} |
|
6896
|
|
|
|
|
|
|
elsif( $nb < 0) |
|
6897
|
|
|
|
|
|
|
{ foreach( 1..(-$nb)) |
|
6898
|
|
|
|
|
|
|
{ $elt= $elt->prev_sibling( @_) or return undef; } |
|
6899
|
|
|
|
|
|
|
} |
|
6900
|
|
|
|
|
|
|
else # $nb == 0 |
|
6901
|
|
|
|
|
|
|
{ return $elt->passes( $_[0]); } |
|
6902
|
|
|
|
|
|
|
return $elt; |
|
6903
|
|
|
|
|
|
|
} |
|
6904
|
|
|
|
|
|
|
|
|
6905
|
|
|
|
|
|
|
sub sibling_text |
|
6906
|
|
|
|
|
|
|
{ my $elt= sibling( @_); |
|
6907
|
|
|
|
|
|
|
return $elt ? $elt->text : undef; |
|
6908
|
|
|
|
|
|
|
} |
|
6909
|
|
|
|
|
|
|
|
|
6910
|
|
|
|
|
|
|
|
|
6911
|
|
|
|
|
|
|
sub child |
|
6912
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6913
|
|
|
|
|
|
|
my $nb= shift; |
|
6914
|
|
|
|
|
|
|
if( $nb >= 0) |
|
6915
|
|
|
|
|
|
|
{ $elt= $elt->first_child( @_) or return undef; |
|
6916
|
|
|
|
|
|
|
foreach( 1..$nb) |
|
6917
|
|
|
|
|
|
|
{ $elt= $elt->next_sibling( @_) or return undef; } |
|
6918
|
|
|
|
|
|
|
} |
|
6919
|
|
|
|
|
|
|
else |
|
6920
|
|
|
|
|
|
|
{ $elt= $elt->last_child( @_) or return undef; |
|
6921
|
|
|
|
|
|
|
foreach( 2..(-$nb)) |
|
6922
|
|
|
|
|
|
|
{ $elt= $elt->prev_sibling( @_) or return undef; } |
|
6923
|
|
|
|
|
|
|
} |
|
6924
|
|
|
|
|
|
|
return $elt; |
|
6925
|
|
|
|
|
|
|
} |
|
6926
|
|
|
|
|
|
|
|
|
6927
|
|
|
|
|
|
|
sub prev_siblings |
|
6928
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6929
|
|
|
|
|
|
|
my @siblings=(); |
|
6930
|
|
|
|
|
|
|
while( $elt= $elt->prev_sibling( @_)) |
|
6931
|
|
|
|
|
|
|
{ unshift @siblings, $elt; } |
|
6932
|
|
|
|
|
|
|
return @siblings; |
|
6933
|
|
|
|
|
|
|
} |
|
6934
|
|
|
|
|
|
|
|
|
6935
|
|
|
|
|
|
|
sub siblings |
|
6936
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6937
|
|
|
|
|
|
|
return grep { $_ ne $elt } $elt->{parent}->children( @_); |
|
6938
|
|
|
|
|
|
|
} |
|
6939
|
|
|
|
|
|
|
|
|
6940
|
|
|
|
|
|
|
sub pos |
|
6941
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6942
|
|
|
|
|
|
|
return 0 if ($_[0] && !$elt->matches( @_)); |
|
6943
|
|
|
|
|
|
|
my $pos=1; |
|
6944
|
|
|
|
|
|
|
$pos++ while( $elt= $elt->prev_sibling( @_)); |
|
6945
|
|
|
|
|
|
|
return $pos; |
|
6946
|
|
|
|
|
|
|
} |
|
6947
|
|
|
|
|
|
|
|
|
6948
|
|
|
|
|
|
|
|
|
6949
|
|
|
|
|
|
|
sub next_siblings |
|
6950
|
|
|
|
|
|
|
{ my $elt= shift; |
|
6951
|
|
|
|
|
|
|
my @siblings=(); |
|
6952
|
|
|
|
|
|
|
while( $elt= $elt->next_sibling( @_)) |
|
6953
|
|
|
|
|
|
|
{ push @siblings, $elt; } |
|
6954
|
|
|
|
|
|
|
return @siblings; |
|
6955
|
|
|
|
|
|
|
} |
|
6956
|
|
|
|
|
|
|
|
|
6957
|
|
|
|
|
|
|
|
|
6958
|
|
|
|
|
|
|
# used by get_xpath: parses the xpath expression and generates a sub that performs the |
|
6959
|
|
|
|
|
|
|
# search |
|
6960
|
|
|
|
|
|
|
{ my %axis2method; |
|
6961
|
|
|
|
|
|
|
BEGIN { %axis2method= ( child => 'children', |
|
6962
|
|
|
|
|
|
|
descendant => 'descendants', |
|
6963
|
|
|
|
|
|
|
'descendant-or-self' => 'descendants_or_self', |
|
6964
|
|
|
|
|
|
|
parent => 'parent_is', |
|
6965
|
|
|
|
|
|
|
ancestor => 'ancestors', |
|
6966
|
|
|
|
|
|
|
'ancestor-or-self' => 'ancestors_or_self', |
|
6967
|
|
|
|
|
|
|
'following-sibling' => 'next_siblings', |
|
6968
|
|
|
|
|
|
|
'preceding-sibling' => 'prev_siblings', |
|
6969
|
|
|
|
|
|
|
following => 'following_elts', |
|
6970
|
|
|
|
|
|
|
preceding => 'preceding_elts', |
|
6971
|
|
|
|
|
|
|
self => '_self', |
|
6972
|
|
|
|
|
|
|
); |
|
6973
|
|
|
|
|
|
|
} |
|
6974
|
|
|
|
|
|
|
|
|
6975
|
|
|
|
|
|
|
sub _install_xpath |
|
6976
|
|
|
|
|
|
|
{ my( $xpath_exp, $type)= @_; |
|
6977
|
|
|
|
|
|
|
my $original_exp= $xpath_exp; |
|
6978
|
|
|
|
|
|
|
my $sub= 'my $elt= shift; my @results;'; |
|
6979
|
|
|
|
|
|
|
|
|
6980
|
|
|
|
|
|
|
# grab the root if expression starts with a / |
|
6981
|
|
|
|
|
|
|
if( $xpath_exp=~ s{^/}{}) |
|
6982
|
|
|
|
|
|
|
{ $sub .= '@results= ($elt->twig) || croak "cannot use an XPath query starting with a / on a node not attached to a whole twig";'; } |
|
6983
|
|
|
|
|
|
|
elsif( $xpath_exp=~ s{^\./}{}) |
|
6984
|
|
|
|
|
|
|
{ $sub .= '@results= ($elt);'; } |
|
6985
|
|
|
|
|
|
|
else |
|
6986
|
|
|
|
|
|
|
{ $sub .= '@results= ($elt);'; } |
|
6987
|
|
|
|
|
|
|
|
|
6988
|
|
|
|
|
|
|
|
|
6989
|
|
|
|
|
|
|
#warn "xpath_exp= '$xpath_exp'\n"; |
|
6990
|
|
|
|
|
|
|
|
|
6991
|
|
|
|
|
|
|
while( $xpath_exp && |
|
6992
|
|
|
|
|
|
|
$xpath_exp=~s{^\s*(/?) |
|
6993
|
|
|
|
|
|
|
# the xxx=~/regexp/ is a pain as it includes / |
|
6994
|
|
|
|
|
|
|
(\s*(?:(?:($REG_AXIS)::)?(\*|$REG_TAG_PART|\.\.|\.)\s*)?($REG_PREDICATE_ALT*) |
|
6995
|
|
|
|
|
|
|
) |
|
6996
|
|
|
|
|
|
|
(/|$)}{}xo) |
|
6997
|
|
|
|
|
|
|
|
|
6998
|
|
|
|
|
|
|
{ my( $wildcard, $sub_exp, $axis, $gi, $predicates)= ($1, $2, $3, $4, $5); |
|
6999
|
|
|
|
|
|
|
if( $axis && ! $gi) |
|
7000
|
|
|
|
|
|
|
{ _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp"); } |
|
7001
|
|
|
|
|
|
|
|
|
7002
|
|
|
|
|
|
|
# grab a parent |
|
7003
|
|
|
|
|
|
|
if( $sub_exp eq '..') |
|
7004
|
|
|
|
|
|
|
{ _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp") if( $wildcard); |
|
7005
|
|
|
|
|
|
|
$sub .= '@results= map { $_->{parent}} @results;'; |
|
7006
|
|
|
|
|
|
|
} |
|
7007
|
|
|
|
|
|
|
# test the element itself |
|
7008
|
|
|
|
|
|
|
elsif( $sub_exp=~ m{^\.(.*)$}s) |
|
7009
|
|
|
|
|
|
|
{ $sub .= "\@results= grep { \$_->matches( q{$1}) } \@results;" } |
|
7010
|
|
|
|
|
|
|
# grab children |
|
7011
|
|
|
|
|
|
|
else |
|
7012
|
|
|
|
|
|
|
{ |
|
7013
|
|
|
|
|
|
|
if( !$axis) |
|
7014
|
|
|
|
|
|
|
{ $axis= $wildcard ? 'descendant' : 'child'; } |
|
7015
|
|
|
|
|
|
|
if( !$gi or $gi eq '*') { $gi=''; } |
|
7016
|
|
|
|
|
|
|
my $function; |
|
7017
|
|
|
|
|
|
|
|
|
7018
|
|
|
|
|
|
|
# "special" predicates, that return just one element |
|
7019
|
|
|
|
|
|
|
if( $predicates && ($predicates =~ m{^\s*\[\s*((-\s*)?\d+)\s*\]\s*$})) |
|
7020
|
|
|
|
|
|
|
{ # [] |
|
7021
|
|
|
|
|
|
|
my $offset= $1; |
|
7022
|
|
|
|
|
|
|
$offset-- if( $offset > 0); |
|
7023
|
|
|
|
|
|
|
$function= $axis eq 'descendant' ? "next_n_elt( $offset, '$gi')" |
|
7024
|
|
|
|
|
|
|
: $axis eq 'child' ? "child( $offset, '$gi')" |
|
7025
|
|
|
|
|
|
|
: _croak_and_doublecheck_xpath( $original_exp, "error [$1] not supported along axis '$axis'") |
|
7026
|
|
|
|
|
|
|
; |
|
7027
|
|
|
|
|
|
|
$sub .= "\@results= grep { \$_ } map { \$_->$function } \@results;" |
|
7028
|
|
|
|
|
|
|
} |
|
7029
|
|
|
|
|
|
|
elsif( $predicates && ($predicates =~ m{^\s*\[\s*last\s*\(\s*\)\s*\]\s*$}) ) |
|
7030
|
|
|
|
|
|
|
{ # last() |
|
7031
|
|
|
|
|
|
|
_croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp, usage of // and last() not supported") if( $wildcard); |
|
7032
|
|
|
|
|
|
|
$sub .= "\@results= map { \$_->last_child( '$gi') } \@results;"; |
|
7033
|
|
|
|
|
|
|
} |
|
7034
|
|
|
|
|
|
|
else |
|
7035
|
|
|
|
|
|
|
{ # follow the axis |
|
7036
|
|
|
|
|
|
|
#warn "axis: '$axis' - method: '$axis2method{$axis}' - gi: '$gi'\n"; |
|
7037
|
|
|
|
|
|
|
|
|
7038
|
|
|
|
|
|
|
my $follow_axis= " \$_->$axis2method{$axis}( '$gi')"; |
|
7039
|
|
|
|
|
|
|
my $step= $follow_axis; |
|
7040
|
|
|
|
|
|
|
|
|
7041
|
|
|
|
|
|
|
# now filter using the predicate |
|
7042
|
|
|
|
|
|
|
while( $predicates=~ s{^\s*($REG_PREDICATE_ALT)\s*}{}o) |
|
7043
|
|
|
|
|
|
|
{ my $pred= $1; |
|
7044
|
|
|
|
|
|
|
$pred=~ s{^\s*\[\s*}{}; |
|
7045
|
|
|
|
|
|
|
$pred=~ s{\s*\]\s*$}{}; |
|
7046
|
|
|
|
|
|
|
my $test=""; |
|
7047
|
|
|
|
|
|
|
my $pos; |
|
7048
|
|
|
|
|
|
|
if( $pred=~ m{^(-?\s*\d+)$}) |
|
7049
|
|
|
|
|
|
|
{ my $pos= $1; |
|
7050
|
|
|
|
|
|
|
if( $step=~ m{^\s*grep(.*) (\$_->\w+\(\s*'[^']*'\s*\))}) |
|
7051
|
|
|
|
|
|
|
{ $step= "XML::Twig::_first_n $1 $pos, $2"; } |
|
7052
|
|
|
|
|
|
|
else |
|
7053
|
|
|
|
|
|
|
{ if( $pos > 0) { $pos--; } |
|
7054
|
|
|
|
|
|
|
$step= "($step)[$pos]"; |
|
7055
|
|
|
|
|
|
|
} |
|
7056
|
|
|
|
|
|
|
#warn "number predicate '$pos' - generated step '$step'\n"; |
|
7057
|
|
|
|
|
|
|
} |
|
7058
|
|
|
|
|
|
|
else |
|
7059
|
|
|
|
|
|
|
{ my $syntax_error=0; |
|
7060
|
|
|
|
|
|
|
do |
|
7061
|
|
|
|
|
|
|
{ if( $pred =~ s{^string\(\s*\)\s*=\s*($REG_STRING)\s*}{}o) # string()="string" pred |
|
7062
|
|
|
|
|
|
|
{ $test .= "\$_->text eq $1"; } |
|
7063
|
|
|
|
|
|
|
elsif( $pred =~ s{^string\(\s*\)\s*!=\s*($REG_STRING)\s*}{}o) # string()!="string" pred |
|
7064
|
|
|
|
|
|
|
{ $test .= "\$_->text ne $1"; } |
|
7065
|
|
|
|
|
|
|
if( $pred =~ s{^string\(\s*\)\s*=\s*($REG_NUMBER)\s*}{}o) # string()= pred |
|
7066
|
|
|
|
|
|
|
{ $test .= "\$_->text eq $1"; } |
|
7067
|
|
|
|
|
|
|
elsif( $pred =~ s{^string\(\s*\)\s*!=\s*($REG_NUMBER)\s*}{}o) # string()!= pred |
|
7068
|
|
|
|
|
|
|
{ $test .= "\$_->text ne $1"; } |
|
7069
|
|
|
|
|
|
|
elsif( $pred =~ s{^string\(\s*\)\s*(>|<|>=|<=)\s*($REG_NUMBER)\s*}{}o) # string()!= pred |
|
7070
|
|
|
|
|
|
|
{ $test .= "\$_->text $1 $2"; } |
|
7071
|
|
|
|
|
|
|
|
|
7072
|
|
|
|
|
|
|
elsif( $pred =~ s{^string\(\s*\)\s*($REG_MATCH)\s*($REG_REGEXP)\s*}{}o) # string()=~/regex/ pred |
|
7073
|
|
|
|
|
|
|
{ my( $match, $regexp)= ($1, $2); |
|
7074
|
|
|
|
|
|
|
$test .= "\$_->text $match $regexp"; |
|
7075
|
|
|
|
|
|
|
} |
|
7076
|
|
|
|
|
|
|
elsif( $pred =~ s{^string\(\s*\)\s*}{}o) # string() pred |
|
7077
|
|
|
|
|
|
|
{ $test .= "\$_->text"; } |
|
7078
|
|
|
|
|
|
|
elsif( $pred=~ s{^@($REG_TAG_NAME)\s*($REG_OP)\s*($REG_STRING|$REG_NUMBER)}{}o) # @att="val" pred |
|
7079
|
|
|
|
|
|
|
{ my( $att, $oper, $val)= ($1, _op( $2), $3); |
|
7080
|
|
|
|
|
|
|
$test .= qq{((defined \$_->{'att'}->{"$att"}) && (\$_->{'att'}->{"$att"} $oper $val))}; |
|
7081
|
|
|
|
|
|
|
} |
|
7082
|
|
|
|
|
|
|
elsif( $pred =~ s{^@($REG_TAG_NAME)\s*($REG_MATCH)\s*($REG_REGEXP)\s*}{}o) # @att=~/regex/ pred XXX |
|
7083
|
|
|
|
|
|
|
{ my( $att, $match, $regexp)= ($1, $2, $3); |
|
7084
|
|
|
|
|
|
|
$test .= qq{((defined \$_->{'att'}->{"$att"}) && (\$_->{'att'}->{"$att"} $match $regexp))};; |
|
7085
|
|
|
|
|
|
|
} |
|
7086
|
|
|
|
|
|
|
elsif( $pred=~ s{^@($REG_TAG_NAME)\s*}{}o) # @att pred |
|
7087
|
|
|
|
|
|
|
{ $test .= qq{(defined \$_->{'att'}->{"$1"})}; } |
|
7088
|
|
|
|
|
|
|
elsif( $pred=~ s{^\s*(?:not|!)\s*@($REG_TAG_NAME)\s*}{}o) # not @att pred |
|
7089
|
|
|
|
|
|
|
{ $test .= qq{((\$_->is_elt) && (not defined \$_->{'att'}->{"$1"}))}; } |
|
7090
|
|
|
|
|
|
|
elsif( $pred=~ s{^\s*([()])}{}) # ( or ) (just add to the test) |
|
7091
|
|
|
|
|
|
|
{ $test .= qq{$1}; } |
|
7092
|
|
|
|
|
|
|
elsif( $pred=~ s{^\s*(and|or)\s*}{}) |
|
7093
|
|
|
|
|
|
|
{ $test .= lc " $1 "; } |
|
7094
|
|
|
|
|
|
|
else |
|
7095
|
|
|
|
|
|
|
{ $syntax_error=1; } |
|
7096
|
|
|
|
|
|
|
|
|
7097
|
|
|
|
|
|
|
} while( !$syntax_error && $pred); |
|
7098
|
|
|
|
|
|
|
_croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp at $pred") if( $pred); |
|
7099
|
|
|
|
|
|
|
$step= " grep { $test } $step "; |
|
7100
|
|
|
|
|
|
|
} |
|
7101
|
|
|
|
|
|
|
} |
|
7102
|
|
|
|
|
|
|
#warn "step: '$step'"; |
|
7103
|
|
|
|
|
|
|
$sub .= "\@results= grep defined, map { $step } \@results;"; |
|
7104
|
|
|
|
|
|
|
} |
|
7105
|
|
|
|
|
|
|
} |
|
7106
|
|
|
|
|
|
|
} |
|
7107
|
|
|
|
|
|
|
|
|
7108
|
|
|
|
|
|
|
if( $xpath_exp) |
|
7109
|
|
|
|
|
|
|
{ _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp around $xpath_exp"); } |
|
7110
|
|
|
|
|
|
|
|
|
7111
|
|
|
|
|
|
|
$sub .= q{return XML::Twig::_unique_elts( @results); }; |
|
7112
|
|
|
|
|
|
|
#warn "generated: '$sub'\n"; |
|
7113
|
|
|
|
|
|
|
my $s= eval "sub { $NO_WARNINGS; $sub }"; |
|
7114
|
|
|
|
|
|
|
if( $@) |
|
7115
|
|
|
|
|
|
|
{ _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp ($@);") } |
|
7116
|
|
|
|
|
|
|
return( $s); |
|
7117
|
|
|
|
|
|
|
} |
|
7118
|
|
|
|
|
|
|
} |
|
7119
|
|
|
|
|
|
|
|
|
7120
|
|
|
|
|
|
|
sub _croak_and_doublecheck_xpath |
|
7121
|
|
|
|
|
|
|
{ my $xpath_expression= shift; |
|
7122
|
|
|
|
|
|
|
my $mess= join( "\n", @_); |
|
7123
|
|
|
|
|
|
|
if( $XML::Twig::XPath::VERSION || 0) |
|
7124
|
|
|
|
|
|
|
{ my $check_twig= XML::Twig::XPath->new; |
|
7125
|
|
|
|
|
|
|
if( eval { $check_twig->{twig_xp}->_parse( $xpath_expression) }) |
|
7126
|
|
|
|
|
|
|
{ $mess .= "\nthe expression is a valid XPath statement, and you are using XML::Twig::XPath, but" |
|
7127
|
|
|
|
|
|
|
. "\nyou are using either 'find_nodes' or 'get_xpath' where the method you likely wanted" |
|
7128
|
|
|
|
|
|
|
. "\nto use is 'findnodes', which is the only one that uses the full XPath engine\n"; |
|
7129
|
|
|
|
|
|
|
} |
|
7130
|
|
|
|
|
|
|
} |
|
7131
|
|
|
|
|
|
|
croak $mess; |
|
7132
|
|
|
|
|
|
|
} |
|
7133
|
|
|
|
|
|
|
|
|
7134
|
|
|
|
|
|
|
|
|
7135
|
|
|
|
|
|
|
|
|
7136
|
|
|
|
|
|
|
{ # extremely elaborate caching mechanism |
|
7137
|
|
|
|
|
|
|
my %xpath; # xpath_expression => subroutine_code; |
|
7138
|
|
|
|
|
|
|
sub get_xpath |
|
7139
|
|
|
|
|
|
|
{ my( $elt, $xpath_exp, $offset)= @_; |
|
7140
|
|
|
|
|
|
|
my $sub= ($xpath{$xpath_exp} ||= _install_xpath( $xpath_exp)); |
|
7141
|
|
|
|
|
|
|
return $sub->( $elt) unless( defined $offset); |
|
7142
|
|
|
|
|
|
|
my @res= $sub->( $elt); |
|
7143
|
|
|
|
|
|
|
return $res[$offset]; |
|
7144
|
|
|
|
|
|
|
} |
|
7145
|
|
|
|
|
|
|
} |
|
7146
|
|
|
|
|
|
|
|
|
7147
|
|
|
|
|
|
|
|
|
7148
|
|
|
|
|
|
|
sub findvalues |
|
7149
|
|
|
|
|
|
|
{ my $elt= shift; |
|
7150
|
|
|
|
|
|
|
return map { $_->text } $elt->get_xpath( @_); |
|
7151
|
|
|
|
|
|
|
} |
|
7152
|
|
|
|
|
|
|
|
|
7153
|
|
|
|
|
|
|
sub findvalue |
|
7154
|
|
|
|
|
|
|
{ my $elt= shift; |
|
7155
|
|
|
|
|
|
|
return join '', map { $_->text } $elt->get_xpath( @_); |
|
7156
|
|
|
|
|
|
|
} |
|
7157
|
|
|
|
|
|
|
|
|
7158
|
|
|
|
|
|
|
|
|
7159
|
|
|
|
|
|
|
# XML::XPath compatibility |
|
7160
|
|
|
|
|
|
|
sub getElementById { return $_[0]->twig->elt_id( $_[1]); } |
|
7161
|
|
|
|
|
|
|
sub getChildNodes { my @children= do { my $elt= $_[0]; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; return wantarray ? @children : \@children; } |
|
7162
|
|
|
|
|
|
|
|
|
7163
|
|
|
|
|
|
|
sub _flushed { return $_[0]->{flushed}; } |
|
7164
|
|
|
|
|
|
|
sub _set_flushed { $_[0]->{flushed}=1; } |
|
7165
|
|
|
|
|
|
|
sub _del_flushed { delete $_[0]->{flushed}; } |
|
7166
|
|
|
|
|
|
|
|
|
7167
|
|
|
|
|
|
|
sub cut |
|
7168
|
|
|
|
|
|
|
{ my $elt= shift; |
|
7169
|
|
|
|
|
|
|
my( $parent, $prev_sibling, $next_sibling); |
|
7170
|
|
|
|
|
|
|
$parent= $elt->{parent}; |
|
7171
|
|
|
|
|
|
|
if( ! $parent && $elt->is_elt) |
|
7172
|
|
|
|
|
|
|
{ # are we cutting the root? |
|
7173
|
|
|
|
|
|
|
my $t= $elt->{twig}; |
|
7174
|
|
|
|
|
|
|
if( $t && ! $t->{twig_parsing}) |
|
7175
|
|
|
|
|
|
|
{ delete $t->{twig_root}; |
|
7176
|
|
|
|
|
|
|
delete $elt->{twig}; |
|
7177
|
|
|
|
|
|
|
return $elt; |
|
7178
|
|
|
|
|
|
|
} # cutt`ing the root |
|
7179
|
|
|
|
|
|
|
else |
|
7180
|
|
|
|
|
|
|
{ return; } # cutting an orphan, returning $elt would break backward compatibility |
|
7181
|
|
|
|
|
|
|
} |
|
7182
|
|
|
|
|
|
|
|
|
7183
|
|
|
|
|
|
|
# save the old links, that'll make it easier for some loops |
|
7184
|
|
|
|
|
|
|
foreach my $link ( qw(parent prev_sibling next_sibling) ) |
|
7185
|
|
|
|
|
|
|
{ $elt->{former}->{$link}= $elt->{$link}; |
|
7186
|
|
|
|
|
|
|
if( $XML::Twig::weakrefs) { weaken( $elt->{former}->{$link}); } |
|
7187
|
|
|
|
|
|
|
} |
|
7188
|
|
|
|
|
|
|
|
|
7189
|
|
|
|
|
|
|
# if we cut the current element then its parent becomes the current elt |
|
7190
|
|
|
|
|
|
|
if( $elt->{twig_current}) |
|
7191
|
|
|
|
|
|
|
{ my $twig_current= $elt->{parent}; |
|
7192
|
|
|
|
|
|
|
$elt->twig->{twig_current}= $twig_current; |
|
7193
|
|
|
|
|
|
|
$twig_current->{'twig_current'}=1; |
|
7194
|
|
|
|
|
|
|
delete $elt->{'twig_current'}; |
|
7195
|
|
|
|
|
|
|
} |
|
7196
|
|
|
|
|
|
|
|
|
7197
|
|
|
|
|
|
|
if( $parent->{first_child} && $parent->{first_child} == $elt) |
|
7198
|
|
|
|
|
|
|
{ $parent->{first_child}= $elt->{next_sibling}; |
|
7199
|
|
|
|
|
|
|
# cutting can make the parent empty |
|
7200
|
|
|
|
|
|
|
if( ! $parent->{first_child}) { $parent->{empty}= 1; } |
|
7201
|
|
|
|
|
|
|
} |
|
7202
|
|
|
|
|
|
|
|
|
7203
|
|
|
|
|
|
|
if( $parent->{last_child} && $parent->{last_child} == $elt) |
|
7204
|
|
|
|
|
|
|
{ delete $parent->{empty}; $parent->{last_child}=$elt->{prev_sibling}; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; |
|
7205
|
|
|
|
|
|
|
} |
|
7206
|
|
|
|
|
|
|
|
|
7207
|
|
|
|
|
|
|
if( $prev_sibling= $elt->{prev_sibling}) |
|
7208
|
|
|
|
|
|
|
{ $prev_sibling->{next_sibling}= $elt->{next_sibling}; } |
|
7209
|
|
|
|
|
|
|
if( $next_sibling= $elt->{next_sibling}) |
|
7210
|
|
|
|
|
|
|
{ $next_sibling->{prev_sibling}=$elt->{prev_sibling}; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; } |
|
7211
|
|
|
|
|
|
|
|
|
7212
|
|
|
|
|
|
|
|
|
7213
|
|
|
|
|
|
|
$elt->{parent}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; |
|
7214
|
|
|
|
|
|
|
$elt->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; |
|
7215
|
|
|
|
|
|
|
$elt->{next_sibling}= undef; |
|
7216
|
|
|
|
|
|
|
|
|
7217
|
|
|
|
|
|
|
# merge 2 (now) consecutive text nodes if they are of the same type |
|
7218
|
|
|
|
|
|
|
# (type can be PCDATA or CDATA) |
|
7219
|
|
|
|
|
|
|
if( $prev_sibling && $next_sibling && $prev_sibling->is_text && ( $XML::Twig::index2gi[$prev_sibling->{'gi'}] eq $XML::Twig::index2gi[$next_sibling->{'gi'}])) |
|
7220
|
|
|
|
|
|
|
{ $prev_sibling->merge_text( $next_sibling); } |
|
7221
|
|
|
|
|
|
|
|
|
7222
|
|
|
|
|
|
|
return $elt; |
|
7223
|
|
|
|
|
|
|
} |
|
7224
|
|
|
|
|
|
|
|
|
7225
|
|
|
|
|
|
|
|
|
7226
|
|
|
|
|
|
|
sub former_next_sibling { return $_[0]->{former}->{next_sibling}; } |
|
7227
|
|
|
|
|
|
|
sub former_prev_sibling { return $_[0]->{former}->{prev_sibling}; } |
|
7228
|
|
|
|
|
|
|
sub former_parent { return $_[0]->{former}->{parent}; } |
|
7229
|
|
|
|
|
|
|
|
|
7230
|
|
|
|
|
|
|
sub cut_children |
|
7231
|
|
|
|
|
|
|
{ my( $elt, $exp)= @_; |
|
7232
|
|
|
|
|
|
|
my @children= $elt->children( $exp); |
|
7233
|
|
|
|
|
|
|
foreach (@children) { $_->cut; } |
|
7234
|
|
|
|
|
|
|
if( ! $elt->has_children) { $elt->{empty}= 1; } |
|
7235
|
|
|
|
|
|
|
return @children; |
|
7236
|
|
|
|
|
|
|
} |
|
7237
|
|
|
|
|
|
|
|
|
7238
|
|
|
|
|
|
|
sub cut_descendants |
|
7239
|
|
|
|
|
|
|
{ my( $elt, $exp)= @_; |
|
7240
|
|
|
|
|
|
|
my @descendants= $elt->descendants( $exp); |
|
7241
|
|
|
|
|
|
|
foreach ($elt->descendants( $exp)) { $_->cut; } |
|
7242
|
|
|
|
|
|
|
if( ! $elt->has_children) { $elt->{empty}= 1; } |
|
7243
|
|
|
|
|
|
|
return @descendants; |
|
7244
|
|
|
|
|
|
|
} |
|
7245
|
|
|
|
|
|
|
|
|
7246
|
|
|
|
|
|
|
|
|
7247
|
|
|
|
|
|
|
sub erase |
|
7248
|
|
|
|
|
|
|
{ my $elt= shift; |
|
7249
|
|
|
|
|
|
|
#you cannot erase the current element |
|
7250
|
|
|
|
|
|
|
if( $elt->{twig_current}) |
|
7251
|
|
|
|
|
|
|
{ croak "trying to erase an element before it has been completely parsed"; } |
|
7252
|
|
|
|
|
|
|
if( my $parent= $elt->{parent}) |
|
7253
|
|
|
|
|
|
|
{ # normal case |
|
7254
|
|
|
|
|
|
|
$elt->_move_extra_data_after_erase; |
|
7255
|
|
|
|
|
|
|
my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; |
|
7256
|
|
|
|
|
|
|
if( @children) |
|
7257
|
|
|
|
|
|
|
{ |
|
7258
|
|
|
|
|
|
|
# elt has children, move them up |
|
7259
|
|
|
|
|
|
|
|
|
7260
|
|
|
|
|
|
|
# the first child may need to be merged with a previous text |
|
7261
|
|
|
|
|
|
|
my $first_child= shift @children; |
|
7262
|
|
|
|
|
|
|
$first_child->move( before => $elt); |
|
7263
|
|
|
|
|
|
|
my $prev= $first_child->{prev_sibling}; |
|
7264
|
|
|
|
|
|
|
if( $prev && $prev->is_text && ($XML::Twig::index2gi[$first_child->{'gi'}] eq $XML::Twig::index2gi[$prev->{'gi'}]) ) |
|
7265
|
|
|
|
|
|
|
{ $prev->merge_text( $first_child); } |
|
7266
|
|
|
|
|
|
|
|
|
7267
|
|
|
|
|
|
|
# move the rest of the children |
|
7268
|
|
|
|
|
|
|
foreach my $child (@children) |
|
7269
|
|
|
|
|
|
|
{ $child->move( before => $elt); } |
|
7270
|
|
|
|
|
|
|
|
|
7271
|
|
|
|
|
|
|
# now the elt had no child, delete it |
|
7272
|
|
|
|
|
|
|
$elt->delete; |
|
7273
|
|
|
|
|
|
|
|
|
7274
|
|
|
|
|
|
|
# now see if we need to merge the last child with the next element |
|
7275
|
|
|
|
|
|
|
my $last_child= $children[-1] || $first_child; # if no last child, then it's also the first child |
|
7276
|
|
|
|
|
|
|
my $next= $last_child->{next_sibling}; |
|
7277
|
|
|
|
|
|
|
if( $next && $next->is_text && ($XML::Twig::index2gi[$last_child->{'gi'}] eq $XML::Twig::index2gi[$next->{'gi'}]) ) |
|
7278
|
|
|
|
|
|
|
{ $last_child->merge_text( $next); } |
|
7279
|
|
|
|
|
|
|
|
|
7280
|
|
|
|
|
|
|
# if parsing and have now a PCDATA text, mark so we can normalize later on if need be |
|
7281
|
|
|
|
|
|
|
if( $parent->{twig_current} && $last_child->is_text) { $parent->{twig_to_be_normalized}=1; } |
|
7282
|
|
|
|
|
|
|
} |
|
7283
|
|
|
|
|
|
|
else |
|
7284
|
|
|
|
|
|
|
{ # no children, just cut the elt |
|
7285
|
|
|
|
|
|
|
$elt->delete; |
|
7286
|
|
|
|
|
|
|
} |
|
7287
|
|
|
|
|
|
|
} |
|
7288
|
|
|
|
|
|
|
else |
|
7289
|
|
|
|
|
|
|
{ # trying to erase the root (of a twig or of a cut/new element) |
|
7290
|
|
|
|
|
|
|
my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; |
|
7291
|
|
|
|
|
|
|
unless( @children == 1) |
|
7292
|
|
|
|
|
|
|
{ croak "can only erase an element with no parent if it has a single child"; } |
|
7293
|
|
|
|
|
|
|
$elt->_move_extra_data_after_erase; |
|
7294
|
|
|
|
|
|
|
my $child= shift @children; |
|
7295
|
|
|
|
|
|
|
$child->{parent}=undef; if( $XML::Twig::weakrefs) { weaken( $child->{parent});} ; |
|
7296
|
|
|
|
|
|
|
my $twig= $elt->twig; |
|
7297
|
|
|
|
|
|
|
$twig->set_root( $child); |
|
7298
|
|
|
|
|
|
|
} |
|
7299
|
|
|
|
|
|
|
|
|
7300
|
|
|
|
|
|
|
return $elt; |
|
7301
|
|
|
|
|
|
|
|
|
7302
|
|
|
|
|
|
|
} |
|
7303
|
|
|
|
|
|
|
|
|
7304
|
|
|
|
|
|
|
sub _move_extra_data_after_erase |
|
7305
|
|
|
|
|
|
|
{ my( $elt)= @_; |
|
7306
|
|
|
|
|
|
|
# extra_data |
|
7307
|
|
|
|
|
|
|
if( my $extra_data= $elt->{extra_data}) |
|
7308
|
|
|
|
|
|
|
{ my $target= $elt->{first_child} || $elt->{next_sibling}; |
|
7309
|
|
|
|
|
|
|
if( $target) |
|
7310
|
|
|
|
|
|
|
{ |
|
7311
|
|
|
|
|
|
|
if( $target->is( $ELT)) |
|
7312
|
|
|
|
|
|
|
{ $target->set_extra_data( $extra_data . ($target->extra_data || '')); } |
|
7313
|
|
|
|
|
|
|
elsif( $target->is( $TEXT)) |
|
7314
|
|
|
|
|
|
|
{ $target->_unshift_extra_data_in_pcdata( $extra_data, 0); } # TO CHECK |
|
7315
|
|
|
|
|
|
|
} |
|
7316
|
|
|
|
|
|
|
else |
|
7317
|
|
|
|
|
|
|
{ my $parent= $elt->{parent}; # always exists or the erase cannot be performed |
|
7318
|
|
|
|
|
|
|
$parent->_prefix_extra_data_before_end_tag( $extra_data); |
|
7319
|
|
|
|
|
|
|
} |
|
7320
|
|
|
|
|
|
|
} |
|
7321
|
|
|
|
|
|
|
|
|
7322
|
|
|
|
|
|
|
# extra_data_before_end_tag |
|
7323
|
|
|
|
|
|
|
if( my $extra_data= $elt->{extra_data_before_end_tag}) |
|
7324
|
|
|
|
|
|
|
{ if( my $target= $elt->{next_sibling}) |
|
7325
|
|
|
|
|
|
|
{ if( $target->is( $ELT)) |
|
7326
|
|
|
|
|
|
|
{ $target->set_extra_data( $extra_data . ($target->extra_data || '')); } |
|
7327
|
|
|
|
|
|
|
elsif( $target->is( $TEXT)) |
|
7328
|
|
|
|
|
|
|
{ |
|
7329
|
|
|
|
|
|
|
$target->_unshift_extra_data_in_pcdata( $extra_data, 0); |
|
7330
|
|
|
|
|
|
|
} |
|
7331
|
|
|
|
|
|
|
} |
|
7332
|
|
|
|
|
|
|
elsif( my $parent= $elt->{parent}) |
|
7333
|
|
|
|
|
|
|
{ $parent->_prefix_extra_data_before_end_tag( $extra_data); } |
|
7334
|
|
|
|
|
|
|
} |
|
7335
|
|
|
|
|
|
|
|
|
7336
|
|
|
|
|
|
|
return $elt; |
|
7337
|
|
|
|
|
|
|
|
|
7338
|
|
|
|
|
|
|
} |
|
7339
|
|
|
|
|
|
|
BEGIN |
|
7340
|
|
|
|
|
|
|
{ my %method= ( before => \&paste_before, |
|
7341
|
|
|
|
|
|
|
after => \&paste_after, |
|
7342
|
|
|
|
|
|
|
first_child => \&paste_first_child, |
|
7343
|
|
|
|
|
|
|
last_child => \&paste_last_child, |
|
7344
|
|
|
|
|
|
|
within => \&paste_within, |
|
7345
|
|
|
|
|
|
|
); |
|
7346
|
|
|
|
|
|
|
|
|
7347
|
|
|
|
|
|
|
# paste elt somewhere around ref |
|
7348
|
|
|
|
|
|
|
# pos can be first_child (default), last_child, before, after or within |
|
7349
|
|
|
|
|
|
|
sub paste ## no critic (Subroutines::ProhibitNestedSubs); |
|
7350
|
|
|
|
|
|
|
{ my $elt= shift; |
|
7351
|
|
|
|
|
|
|
if( $elt->{parent}) |
|
7352
|
|
|
|
|
|
|
{ croak "cannot paste an element that belongs to a tree"; } |
|
7353
|
|
|
|
|
|
|
my $pos; |
|
7354
|
|
|
|
|
|
|
my $ref; |
|
7355
|
|
|
|
|
|
|
if( ref $_[0]) |
|
7356
|
|
|
|
|
|
|
{ $pos= 'first_child'; |
|
7357
|
|
|
|
|
|
|
croak "wrong argument order in paste, should be $_[1] first" if($_[1]); |
|
7358
|
|
|
|
|
|
|
} |
|
7359
|
|
|
|
|
|
|
else |
|
7360
|
|
|
|
|
|
|
{ $pos= shift; } |
|
7361
|
|
|
|
|
|
|
|
|
7362
|
|
|
|
|
|
|
if( my $method= $method{$pos}) |
|
7363
|
|
|
|
|
|
|
{ |
|
7364
|
|
|
|
|
|
|
unless( ref( $_[0]) && isa( $_[0], 'XML::Twig::Elt')) |
|
7365
|
|
|
|
|
|
|
{ if( ! defined( $_[0])) |
|
7366
|
|
|
|
|
|
|
{ croak "missing target in paste"; } |
|
7367
|
|
|
|
|
|
|
elsif( ! ref( $_[0])) |
|
7368
|
|
|
|
|
|
|
{ croak "wrong target type in paste (not a reference), should be XML::Twig::Elt or a subclass"; } |
|
7369
|
|
|
|
|
|
|
else |
|
7370
|
|
|
|
|
|
|
{ my $ref= ref $_[0]; |
|
7371
|
|
|
|
|
|
|
croak "wrong target type in paste: '$ref', should be XML::Twig::Elt or a subclass"; |
|
7372
|
|
|
|
|
|
|
} |
|
7373
|
|
|
|
|
|
|
} |
|
7374
|
|
|
|
|
|
|
$ref= $_[0]; |
|
7375
|
|
|
|
|
|
|
# check here so error message lists the caller file/line |
|
7376
|
|
|
|
|
|
|
if( !$ref->{parent} && ($pos=~ m{^(before|after)$}) && !(exists $elt->{'target'}) && !(exists $elt->{'comment'})) |
|
7377
|
|
|
|
|
|
|
{ croak "cannot paste $1 root"; } |
|
7378
|
|
|
|
|
|
|
$elt->$method( @_); |
|
7379
|
|
|
|
|
|
|
} |
|
7380
|
|
|
|
|
|
|
else |
|
7381
|
|
|
|
|
|
|
{ croak "tried to paste in wrong position '$pos', allowed positions " . |
|
7382
|
|
|
|
|
|
|
" are 'first_child', 'last_child', 'before', 'after' and " . |
|
7383
|
|
|
|
|
|
|
"'within'"; |
|
7384
|
|
|
|
|
|
|
} |
|
7385
|
|
|
|
|
|
|
if( (my $ids= $elt->{twig_id_list}) && (my $t= $ref->twig) ) |
|
7386
|
|
|
|
|
|
|
{ $t->{twig_id_list}||={}; |
|
7387
|
|
|
|
|
|
|
foreach my $id (keys %$ids) |
|
7388
|
|
|
|
|
|
|
{ $t->{twig_id_list}->{$id}= $ids->{$id}; |
|
7389
|
|
|
|
|
|
|
if( $XML::Twig::weakrefs) { weaken( $t->{twig_id_list}->{$id}); } |
|
7390
|
|
|
|
|
|
|
} |
|
7391
|
|
|
|
|
|
|
} |
|
7392
|
|
|
|
|
|
|
return $elt; |
|
7393
|
|
|
|
|
|
|
} |
|
7394
|
|
|
|
|
|
|
|
|
7395
|
|
|
|
|
|
|
|
|
7396
|
|
|
|
|
|
|
sub paste_before |
|
7397
|
|
|
|
|
|
|
{ my( $elt, $ref)= @_; |
|
7398
|
|
|
|
|
|
|
my( $parent, $prev_sibling, $next_sibling ); |
|
7399
|
|
|
|
|
|
|
|
|
7400
|
|
|
|
|
|
|
# trying to paste before an orphan (root or detached wlt) |
|
7401
|
|
|
|
|
|
|
unless( $ref->{parent}) |
|
7402
|
|
|
|
|
|
|
{ if( my $t= $ref->twig) |
|
7403
|
|
|
|
|
|
|
{ if( (exists $elt->{'comment'}) || (exists $elt->{'target'})) # we can still do this |
|
7404
|
|
|
|
|
|
|
{ $t->_add_cpi_outside_of_root( leading_cpi => $elt); return; } |
|
7405
|
|
|
|
|
|
|
else |
|
7406
|
|
|
|
|
|
|
{ croak "cannot paste before root"; } |
|
7407
|
|
|
|
|
|
|
} |
|
7408
|
|
|
|
|
|
|
else |
|
7409
|
|
|
|
|
|
|
{ croak "cannot paste before an orphan element"; } |
|
7410
|
|
|
|
|
|
|
} |
|
7411
|
|
|
|
|
|
|
$parent= $ref->{parent}; |
|
7412
|
|
|
|
|
|
|
$prev_sibling= $ref->{prev_sibling}; |
|
7413
|
|
|
|
|
|
|
$next_sibling= $ref; |
|
7414
|
|
|
|
|
|
|
|
|
7415
|
|
|
|
|
|
|
$elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; |
|
7416
|
|
|
|
|
|
|
if( $parent->{first_child} == $ref) { $parent->{first_child}= $elt; } |
|
7417
|
|
|
|
|
|
|
|
|
7418
|
|
|
|
|
|
|
if( $prev_sibling) { $prev_sibling->{next_sibling}= $elt; } |
|
7419
|
|
|
|
|
|
|
$elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; |
|
7420
|
|
|
|
|
|
|
|
|
7421
|
|
|
|
|
|
|
$next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; |
|
7422
|
|
|
|
|
|
|
$elt->{next_sibling}= $ref; |
|
7423
|
|
|
|
|
|
|
return $elt; |
|
7424
|
|
|
|
|
|
|
} |
|
7425
|
|
|
|
|
|
|
|
|
7426
|
|
|
|
|
|
|
sub paste_after |
|
7427
|
|
|
|
|
|
|
{ my( $elt, $ref)= @_; |
|
7428
|
|
|
|
|
|
|
my( $parent, $prev_sibling, $next_sibling ); |
|
7429
|
|
|
|
|
|
|
|
|
7430
|
|
|
|
|
|
|
# trying to paste after an orphan (root or detached wlt) |
|
7431
|
|
|
|
|
|
|
unless( $ref->{parent}) |
|
7432
|
|
|
|
|
|
|
{ if( my $t= $ref->twig) |
|
7433
|
|
|
|
|
|
|
{ if( (exists $elt->{'comment'}) || (exists $elt->{'target'})) # we can still do this |
|
7434
|
|
|
|
|
|
|
{ $t->_add_cpi_outside_of_root( trailing_cpi => $elt); return; } |
|
7435
|
|
|
|
|
|
|
else |
|
7436
|
|
|
|
|
|
|
{ croak "cannot paste after root"; } |
|
7437
|
|
|
|
|
|
|
} |
|
7438
|
|
|
|
|
|
|
else |
|
7439
|
|
|
|
|
|
|
{ croak "cannot paste after an orphan element"; } |
|
7440
|
|
|
|
|
|
|
} |
|
7441
|
|
|
|
|
|
|
$parent= $ref->{parent}; |
|
7442
|
|
|
|
|
|
|
$prev_sibling= $ref; |
|
7443
|
|
|
|
|
|
|
$next_sibling= $ref->{next_sibling}; |
|
7444
|
|
|
|
|
|
|
|
|
7445
|
|
|
|
|
|
|
$elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; |
|
7446
|
|
|
|
|
|
|
if( $parent->{last_child}== $ref) { delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } |
|
7447
|
|
|
|
|
|
|
|
|
7448
|
|
|
|
|
|
|
$prev_sibling->{next_sibling}= $elt; |
|
7449
|
|
|
|
|
|
|
$elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; |
|
7450
|
|
|
|
|
|
|
|
|
7451
|
|
|
|
|
|
|
if( $next_sibling) { $next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; } |
|
7452
|
|
|
|
|
|
|
$elt->{next_sibling}= $next_sibling; |
|
7453
|
|
|
|
|
|
|
return $elt; |
|
7454
|
|
|
|
|
|
|
|
|
7455
|
|
|
|
|
|
|
} |
|
7456
|
|
|
|
|
|
|
|
|
7457
|
|
|
|
|
|
|
sub paste_first_child |
|
7458
|
|
|
|
|
|
|
{ my( $elt, $ref)= @_; |
|
7459
|
|
|
|
|
|
|
my( $parent, $prev_sibling, $next_sibling ); |
|
7460
|
|
|
|
|
|
|
$parent= $ref; |
|
7461
|
|
|
|
|
|
|
$next_sibling= $ref->{first_child}; |
|
7462
|
|
|
|
|
|
|
|
|
7463
|
|
|
|
|
|
|
$elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; |
|
7464
|
|
|
|
|
|
|
$parent->{first_child}= $elt; |
|
7465
|
|
|
|
|
|
|
unless( $parent->{last_child}) { delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } |
|
7466
|
|
|
|
|
|
|
|
|
7467
|
|
|
|
|
|
|
$elt->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; |
|
7468
|
|
|
|
|
|
|
|
|
7469
|
|
|
|
|
|
|
if( $next_sibling) { $next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; } |
|
7470
|
|
|
|
|
|
|
$elt->{next_sibling}= $next_sibling; |
|
7471
|
|
|
|
|
|
|
return $elt; |
|
7472
|
|
|
|
|
|
|
} |
|
7473
|
|
|
|
|
|
|
|
|
7474
|
|
|
|
|
|
|
sub paste_last_child |
|
7475
|
|
|
|
|
|
|
{ my( $elt, $ref)= @_; |
|
7476
|
|
|
|
|
|
|
my( $parent, $prev_sibling, $next_sibling ); |
|
7477
|
|
|
|
|
|
|
$parent= $ref; |
|
7478
|
|
|
|
|
|
|
$prev_sibling= $ref->{last_child}; |
|
7479
|
|
|
|
|
|
|
|
|
7480
|
|
|
|
|
|
|
$elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; |
|
7481
|
|
|
|
|
|
|
delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; |
|
7482
|
|
|
|
|
|
|
unless( $parent->{first_child}) { $parent->{first_child}= $elt; } |
|
7483
|
|
|
|
|
|
|
|
|
7484
|
|
|
|
|
|
|
$elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; |
|
7485
|
|
|
|
|
|
|
if( $prev_sibling) { $prev_sibling->{next_sibling}= $elt; } |
|
7486
|
|
|
|
|
|
|
|
|
7487
|
|
|
|
|
|
|
$elt->{next_sibling}= undef; |
|
7488
|
|
|
|
|
|
|
return $elt; |
|
7489
|
|
|
|
|
|
|
} |
|
7490
|
|
|
|
|
|
|
|
|
7491
|
|
|
|
|
|
|
sub paste_within |
|
7492
|
|
|
|
|
|
|
{ my( $elt, $ref, $offset)= @_; |
|
7493
|
|
|
|
|
|
|
my $text= $ref->is_text ? $ref : $ref->next_elt( $TEXT, $ref); |
|
7494
|
|
|
|
|
|
|
my $new= $text->split_at( $offset); |
|
7495
|
|
|
|
|
|
|
$elt->paste_before( $new); |
|
7496
|
|
|
|
|
|
|
return $elt; |
|
7497
|
|
|
|
|
|
|
} |
|
7498
|
|
|
|
|
|
|
} |
|
7499
|
|
|
|
|
|
|
|
|
7500
|
|
|
|
|
|
|
# load an element into a structure similar to XML::Simple's |
|
7501
|
|
|
|
|
|
|
sub simplify |
|
7502
|
|
|
|
|
|
|
{ my $elt= shift; |
|
7503
|
|
|
|
|
|
|
|
|
7504
|
|
|
|
|
|
|
# normalize option names |
|
7505
|
|
|
|
|
|
|
my %options= @_; |
|
7506
|
|
|
|
|
|
|
%options= map { my ($key, $val)= ($_, $options{$_}); |
|
7507
|
|
|
|
|
|
|
$key=~ s{(\w)([A-Z])}{$1_\L$2}g; |
|
7508
|
|
|
|
|
|
|
$key => $val |
|
7509
|
|
|
|
|
|
|
} keys %options; |
|
7510
|
|
|
|
|
|
|
|
|
7511
|
|
|
|
|
|
|
# check options |
|
7512
|
|
|
|
|
|
|
my @allowed_options= qw( keyattr forcearray noattr content_key |
|
7513
|
|
|
|
|
|
|
var var_regexp variables var_attr |
|
7514
|
|
|
|
|
|
|
group_tags forcecontent |
|
7515
|
|
|
|
|
|
|
normalise_space normalize_space |
|
7516
|
|
|
|
|
|
|
); |
|
7517
|
|
|
|
|
|
|
my %allowed_options= map { $_ => 1 } @allowed_options; |
|
7518
|
|
|
|
|
|
|
foreach my $option (keys %options) |
|
7519
|
|
|
|
|
|
|
{ carp "invalid option $option\n" unless( $allowed_options{$option}); } |
|
7520
|
|
|
|
|
|
|
|
|
7521
|
|
|
|
|
|
|
$options{normalise_space} ||= $options{normalize_space} || 0; |
|
7522
|
|
|
|
|
|
|
|
|
7523
|
|
|
|
|
|
|
$options{content_key} ||= 'content'; |
|
7524
|
|
|
|
|
|
|
if( $options{content_key}=~ m{^-}) |
|
7525
|
|
|
|
|
|
|
{ # need to remove the - and to activate extra folding |
|
7526
|
|
|
|
|
|
|
$options{content_key}=~ s{^-}{}; |
|
7527
|
|
|
|
|
|
|
$options{extra_folding}= 1; |
|
7528
|
|
|
|
|
|
|
} |
|
7529
|
|
|
|
|
|
|
else |
|
7530
|
|
|
|
|
|
|
{ $options{extra_folding}= 0; } |
|
7531
|
|
|
|
|
|
|
|
|
7532
|
|
|
|
|
|
|
$options{forcearray} ||=0; |
|
7533
|
|
|
|
|
|
|
if( isa( $options{forcearray}, 'ARRAY')) |
|
7534
|
|
|
|
|
|
|
{ my %forcearray_tags= map { $_ => 1 } @{$options{forcearray}}; |
|
7535
|
|
|
|
|
|
|
$options{forcearray_tags}= \%forcearray_tags; |
|
7536
|
|
|
|
|
|
|
$options{forcearray}= 0; |
|
7537
|
|
|
|
|
|
|
} |
|
7538
|
|
|
|
|
|
|
|
|
7539
|
|
|
|
|
|
|
$options{keyattr} ||= ['name', 'key', 'id']; |
|
7540
|
|
|
|
|
|
|
if( ref $options{keyattr} eq 'ARRAY') |
|
7541
|
|
|
|
|
|
|
{ foreach my $keyattr (@{$options{keyattr}}) |
|
7542
|
|
|
|
|
|
|
{ my( $prefix, $att)= ($keyattr=~ m{^([+-])?(.*)}); |
|
7543
|
|
|
|
|
|
|
$prefix ||= ''; |
|
7544
|
|
|
|
|
|
|
$options{key_for_all}->{$att}= 1; |
|
7545
|
|
|
|
|
|
|
$options{remove_key_for_all}->{$att}=1 unless( $prefix eq '+'); |
|
7546
|
|
|
|
|
|
|
$options{prefix_key_for_all}->{$att}=1 if( $prefix eq '-'); |
|
7547
|
|
|
|
|
|
|
} |
|
7548
|
|
|
|
|
|
|
} |
|
7549
|
|
|
|
|
|
|
elsif( ref $options{keyattr} eq 'HASH') |
|
7550
|
|
|
|
|
|
|
{ while( my( $elt, $keyattr)= each %{$options{keyattr}}) |
|
7551
|
|
|
|
|
|
|
{ my( $prefix, $att)= ($keyattr=~ m{^([+-])?(.*)}); |
|
7552
|
|
|
|
|
|
|
$prefix ||=''; |
|
7553
|
|
|
|
|
|
|
$options{key_for_elt}->{$elt}= $att; |
|
7554
|
|
|
|
|
|
|
$options{remove_key_for_elt}->{"$elt#$att"}=1 unless( $prefix); |
|
7555
|
|
|
|
|
|
|
$options{prefix_key_for_elt}->{"$elt#$att"}=1 if( $prefix eq '-'); |
|
7556
|
|
|
|
|
|
|
} |
|
7557
|
|
|
|
|
|
|
} |
|
7558
|
|
|
|
|
|
|
|
|
7559
|
|
|
|
|
|
|
|
|
7560
|
|
|
|
|
|
|
$options{var}||= $options{var_attr}; # for compat with XML::Simple |
|
7561
|
|
|
|
|
|
|
if( $options{var}) { $options{var_values}= {}; } |
|
7562
|
|
|
|
|
|
|
else { $options{var}=''; } |
|
7563
|
|
|
|
|
|
|
|
|
7564
|
|
|
|
|
|
|
if( $options{variables}) |
|
7565
|
|
|
|
|
|
|
{ $options{var}||= 1; |
|
7566
|
|
|
|
|
|
|
$options{var_values}= $options{variables}; |
|
7567
|
|
|
|
|
|
|
} |
|
7568
|
|
|
|
|
|
|
|
|
7569
|
|
|
|
|
|
|
if( $options{var_regexp} and !$options{var}) |
|
7570
|
|
|
|
|
|
|
{ warn "var option not used, var_regexp option ignored\n"; } |
|
7571
|
|
|
|
|
|
|
$options{var_regexp} ||= '\$\{?(\w+)\}?'; |
|
7572
|
|
|
|
|
|
|
|
|
7573
|
|
|
|
|
|
|
$elt->_simplify( \%options); |
|
7574
|
|
|
|
|
|
|
|
|
7575
|
|
|
|
|
|
|
} |
|
7576
|
|
|
|
|
|
|
|
|
7577
|
|
|
|
|
|
|
sub _simplify |
|
7578
|
|
|
|
|
|
|
{ my( $elt, $options)= @_; |
|
7579
|
|
|
|
|
|
|
|
|
7580
|
|
|
|
|
|
|
my $data; |
|
7581
|
|
|
|
|
|
|
|
|
7582
|
|
|
|
|
|
|
my $gi= $XML::Twig::index2gi[$elt->{'gi'}]; |
|
7583
|
|
|
|
|
|
|
my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; |
|
7584
|
|
|
|
|
|
|
my %atts= $options->{noattr} || !$elt->{att} ? () : %{$elt->{att}}; |
|
7585
|
|
|
|
|
|
|
my $nb_atts= keys %atts; |
|
7586
|
|
|
|
|
|
|
my $nb_children= $elt->children_count + $nb_atts; |
|
7587
|
|
|
|
|
|
|
|
|
7588
|
|
|
|
|
|
|
my %nb_children; |
|
7589
|
|
|
|
|
|
|
foreach (@children) { $nb_children{$_->tag}++; } |
|
7590
|
|
|
|
|
|
|
foreach (keys %atts) { $nb_children{$_}++; } |
|
7591
|
|
|
|
|
|
|
|
|
7592
|
|
|
|
|
|
|
my $arrays; # tag => array where elements are stored |
|
7593
|
|
|
|
|
|
|
|
|
7594
|
|
|
|
|
|
|
|
|
7595
|
|
|
|
|
|
|
# store children |
|
7596
|
|
|
|
|
|
|
foreach my $child (@children) |
|
7597
|
|
|
|
|
|
|
{ if( $child->is_text) |
|
7598
|
|
|
|
|
|
|
{ # generate with a content key |
|
7599
|
|
|
|
|
|
|
my $text= $elt->_text_with_vars( $options); |
|
7600
|
|
|
|
|
|
|
if( $options->{normalise_space} >= 2) { $text= _normalize_space( $text); } |
|
7601
|
|
|
|
|
|
|
if( $options->{force_content} |
|
7602
|
|
|
|
|
|
|
|| $nb_atts |
|
7603
|
|
|
|
|
|
|
|| (scalar @children > 1) |
|
7604
|
|
|
|
|
|
|
) |
|
7605
|
|
|
|
|
|
|
{ $data->{$options->{content_key}}= $text; } |
|
7606
|
|
|
|
|
|
|
else |
|
7607
|
|
|
|
|
|
|
{ $data= $text; } |
|
7608
|
|
|
|
|
|
|
} |
|
7609
|
|
|
|
|
|
|
else |
|
7610
|
|
|
|
|
|
|
{ # element with sub-elements |
|
7611
|
|
|
|
|
|
|
my $child_gi= $XML::Twig::index2gi[$child->{'gi'}]; |
|
7612
|
|
|
|
|
|
|
|
|
7613
|
|
|
|
|
|
|
my $child_data= $child->_simplify( $options); |
|
7614
|
|
|
|
|
|
|
|
|
7615
|
|
|
|
|
|
|
# first see if we need to simplify further the child data |
|
7616
|
|
|
|
|
|
|
# simplify because of grouped tags |
|
7617
|
|
|
|
|
|
|
if( my $grouped_tag= $options->{group_tags}->{$child_gi}) |
|
7618
|
|
|
|
|
|
|
{ # check that the child data is a hash with a single field |
|
7619
|
|
|
|
|
|
|
unless( (ref( $child_data) eq 'HASH') |
|
7620
|
|
|
|
|
|
|
&& (keys %$child_data == 1) |
|
7621
|
|
|
|
|
|
|
&& defined ( my $grouped_child_data= $child_data->{$grouped_tag}) |
|
7622
|
|
|
|
|
|
|
) |
|
7623
|
|
|
|
|
|
|
{ croak "error in grouped tag $child_gi"; } |
|
7624
|
|
|
|
|
|
|
else |
|
7625
|
|
|
|
|
|
|
{ $child_data= $grouped_child_data; } |
|
7626
|
|
|
|
|
|
|
} |
|
7627
|
|
|
|
|
|
|
# simplify because of extra folding |
|
7628
|
|
|
|
|
|
|
if( $options->{extra_folding}) |
|
7629
|
|
|
|
|
|
|
{ if( (ref( $child_data) eq 'HASH') |
|
7630
|
|
|
|
|
|
|
&& (keys %$child_data == 1) |
|
7631
|
|
|
|
|
|
|
&& defined( my $content= $child_data->{$options->{content_key}}) |
|
7632
|
|
|
|
|
|
|
) |
|
7633
|
|
|
|
|
|
|
{ $child_data= $content; } |
|
7634
|
|
|
|
|
|
|
} |
|
7635
|
|
|
|
|
|
|
|
|
7636
|
|
|
|
|
|
|
if( my $keyatt= $child->_key_attr( $options)) |
|
7637
|
|
|
|
|
|
|
{ # simplify element with key |
|
7638
|
|
|
|
|
|
|
my $key= $child->{'att'}->{$keyatt}; |
|
7639
|
|
|
|
|
|
|
if( $options->{normalise_space} >= 1) { $key= _normalize_space( $key); } |
|
7640
|
|
|
|
|
|
|
$data->{$child_gi}->{$key}= $child_data; |
|
7641
|
|
|
|
|
|
|
} |
|
7642
|
|
|
|
|
|
|
elsif( $options->{forcearray} |
|
7643
|
|
|
|
|
|
|
|| $options->{forcearray_tags}->{$child_gi} |
|
7644
|
|
|
|
|
|
|
|| ( $nb_children{$child_gi} > 1) |
|
7645
|
|
|
|
|
|
|
) |
|
7646
|
|
|
|
|
|
|
{ # simplify element to store in an array |
|
7647
|
|
|
|
|
|
|
if( defined $child_data && $child_data ne "" ) |
|
7648
|
|
|
|
|
|
|
{ $data->{$child_gi} ||= []; |
|
7649
|
|
|
|
|
|
|
push @{$data->{$child_gi}}, $child_data; |
|
7650
|
|
|
|
|
|
|
} |
|
7651
|
|
|
|
|
|
|
else |
|
7652
|
|
|
|
|
|
|
{ $data->{$child_gi}= [{}]; } |
|
7653
|
|
|
|
|
|
|
} |
|
7654
|
|
|
|
|
|
|
else |
|
7655
|
|
|
|
|
|
|
{ # simplify element to store as a hash field |
|
7656
|
|
|
|
|
|
|
$data->{$child_gi}=$child_data; |
|
7657
|
|
|
|
|
|
|
$data->{$child_gi}= defined $child_data && $child_data ne "" ? $child_data : {}; |
|
7658
|
|
|
|
|
|
|
} |
|
7659
|
|
|
|
|
|
|
} |
|
7660
|
|
|
|
|
|
|
} |
|
7661
|
|
|
|
|
|
|
|
|
7662
|
|
|
|
|
|
|
# store atts |
|
7663
|
|
|
|
|
|
|
# TODO: deal with att that already have an element by that name |
|
7664
|
|
|
|
|
|
|
foreach my $att (keys %atts) |
|
7665
|
|
|
|
|
|
|
{ # do not store if the att is a key that needs to be removed |
|
7666
|
|
|
|
|
|
|
if( $options->{remove_key_for_all}->{$att} |
|
7667
|
|
|
|
|
|
|
|| $options->{remove_key_for_elt}->{"$gi#$att"} |
|
7668
|
|
|
|
|
|
|
) |
|
7669
|
|
|
|
|
|
|
{ next; } |
|
7670
|
|
|
|
|
|
|
|
|
7671
|
|
|
|
|
|
|
my $att_text= $options->{var} ? _replace_vars_in_text( $atts{$att}, $options) : $atts{$att} ; |
|
7672
|
|
|
|
|
|
|
if( $options->{normalise_space} >= 2) { $att_text= _normalize_space( $att_text); } |
|
7673
|
|
|
|
|
|
|
|
|
7674
|
|
|
|
|
|
|
if( $options->{prefix_key_for_all}->{$att} |
|
7675
|
|
|
|
|
|
|
|| $options->{prefix_key_for_elt}->{"$gi#$att"} |
|
7676
|
|
|
|
|
|
|
) |
|
7677
|
|
|
|
|
|
|
{ # prefix the att |
|
7678
|
|
|
|
|
|
|
$data->{"-$att"}= $att_text; |
|
7679
|
|
|
|
|
|
|
} |
|
7680
|
|
|
|
|
|
|
else |
|
7681
|
|
|
|
|
|
|
{ # normal case |
|
7682
|
|
|
|
|
|
|
$data->{$att}= $att_text; |
|
7683
|
|
|
|
|
|
|
} |
|
7684
|
|
|
|
|
|
|
} |
|
7685
|
|
|
|
|
|
|
|
|
7686
|
|
|
|
|
|
|
return $data; |
|
7687
|
|
|
|
|
|
|
} |
|
7688
|
|
|
|
|
|
|
|
|
7689
|
|
|
|
|
|
|
sub _key_attr |
|
7690
|
|
|
|
|
|
|
{ my( $elt, $options)=@_; |
|
7691
|
|
|
|
|
|
|
return if( $options->{noattr}); |
|
7692
|
|
|
|
|
|
|
if( $options->{key_for_all}) |
|
7693
|
|
|
|
|
|
|
{ foreach my $att ($elt->att_names) |
|
7694
|
|
|
|
|
|
|
{ if( $options->{key_for_all}->{$att}) |
|
7695
|
|
|
|
|
|
|
{ return $att; } |
|
7696
|
|
|
|
|
|
|
} |
|
7697
|
|
|
|
|
|
|
} |
|
7698
|
|
|
|
|
|
|
elsif( $options->{key_for_elt}) |
|
7699
|
|
|
|
|
|
|
{ if( my $key_for_elt= $options->{key_for_elt}->{$XML::Twig::index2gi[$elt->{'gi'}]} ) |
|
7700
|
|
|
|
|
|
|
{ return $key_for_elt if( defined( $elt->{'att'}->{$key_for_elt})); } |
|
7701
|
|
|
|
|
|
|
} |
|
7702
|
|
|
|
|
|
|
return; |
|
7703
|
|
|
|
|
|
|
} |
|
7704
|
|
|
|
|
|
|
|
|
7705
|
|
|
|
|
|
|
sub _text_with_vars |
|
7706
|
|
|
|
|
|
|
{ my( $elt, $options)= @_; |
|
7707
|
|
|
|
|
|
|
my $text; |
|
7708
|
|
|
|
|
|
|
if( $options->{var}) |
|
7709
|
|
|
|
|
|
|
{ $text= _replace_vars_in_text( $elt->text, $options); |
|
7710
|
|
|
|
|
|
|
$elt->_store_var( $options); |
|
7711
|
|
|
|
|
|
|
} |
|
7712
|
|
|
|
|
|
|
else |
|
7713
|
|
|
|
|
|
|
{ $text= $elt->text; } |
|
7714
|
|
|
|
|
|
|
return $text; |
|
7715
|
|
|
|
|
|
|
} |
|
7716
|
|
|
|
|
|
|
|
|
7717
|
|
|
|
|
|
|
|
|
7718
|
|
|
|
|
|
|
sub _normalize_space |
|
7719
|
|
|
|
|
|
|
{ my $text= shift; |
|
7720
|
|
|
|
|
|
|
$text=~ s{\s+}{ }sg; |
|
7721
|
|
|
|
|
|
|
$text=~ s{^\s}{}; |
|
7722
|
|
|
|
|
|
|
$text=~ s{\s$}{}; |
|
7723
|
|
|
|
|
|
|
return $text; |
|
7724
|
|
|
|
|
|
|
} |
|
7725
|
|
|
|
|
|
|
|
|
7726
|
|
|
|
|
|
|
|
|
7727
|
|
|
|
|
|
|
sub att_nb |
|
7728
|
|
|
|
|
|
|
{ return 0 unless( my $atts= $_[0]->{att}); |
|
7729
|
|
|
|
|
|
|
return scalar keys %$atts; |
|
7730
|
|
|
|
|
|
|
} |
|
7731
|
|
|
|
|
|
|
|
|
7732
|
|
|
|
|
|
|
sub has_no_atts |
|
7733
|
|
|
|
|
|
|
{ return 1 unless( my $atts= $_[0]->{att}); |
|
7734
|
|
|
|
|
|
|
return scalar keys %$atts ? 0 : 1; |
|
7735
|
|
|
|
|
|
|
} |
|
7736
|
|
|
|
|
|
|
|
|
7737
|
|
|
|
|
|
|
sub _replace_vars_in_text |
|
7738
|
|
|
|
|
|
|
{ my( $text, $options)= @_; |
|
7739
|
|
|
|
|
|
|
|
|
7740
|
|
|
|
|
|
|
$text=~ s{($options->{var_regexp})} |
|
7741
|
|
|
|
|
|
|
{ if( defined( my $value= $options->{var_values}->{$2})) |
|
7742
|
|
|
|
|
|
|
{ $value } |
|
7743
|
|
|
|
|
|
|
else |
|
7744
|
|
|
|
|
|
|
{ warn "unknown variable $2\n"; |
|
7745
|
|
|
|
|
|
|
$1 |
|
7746
|
|
|
|
|
|
|
} |
|
7747
|
|
|
|
|
|
|
}gex; |
|
7748
|
|
|
|
|
|
|
return $text; |
|
7749
|
|
|
|
|
|
|
} |
|
7750
|
|
|
|
|
|
|
|
|
7751
|
|
|
|
|
|
|
sub _store_var |
|
7752
|
|
|
|
|
|
|
{ my( $elt, $options)= @_; |
|
7753
|
|
|
|
|
|
|
if( defined (my $var_name= $elt->{'att'}->{$options->{var}})) |
|
7754
|
|
|
|
|
|
|
{ $options->{var_values}->{$var_name}= $elt->text; |
|
7755
|
|
|
|
|
|
|
} |
|
7756
|
|
|
|
|
|
|
} |
|
7757
|
|
|
|
|
|
|
|
|
7758
|
|
|
|
|
|
|
|
|
7759
|
|
|
|
|
|
|
# split a text element at a given offset |
|
7760
|
|
|
|
|
|
|
sub split_at |
|
7761
|
|
|
|
|
|
|
{ my( $elt, $offset)= @_; |
|
7762
|
|
|
|
|
|
|
my $text_elt= $elt->is_text ? $elt : $elt->first_child( $TEXT) || return ''; |
|
7763
|
|
|
|
|
|
|
my $string= $text_elt->text; |
|
7764
|
|
|
|
|
|
|
my $left_string= substr( $string, 0, $offset); |
|
7765
|
|
|
|
|
|
|
my $right_string= substr( $string, $offset); |
|
7766
|
|
|
|
|
|
|
$text_elt->{pcdata}= (delete $text_elt->{empty} || 1) && $left_string; |
|
7767
|
|
|
|
|
|
|
my $new_elt= $elt->new( $XML::Twig::index2gi[$elt->{'gi'}], $right_string); |
|
7768
|
|
|
|
|
|
|
$new_elt->paste( after => $elt); |
|
7769
|
|
|
|
|
|
|
return $new_elt; |
|
7770
|
|
|
|
|
|
|
} |
|
7771
|
|
|
|
|
|
|
|
|
7772
|
|
|
|
|
|
|
|
|
7773
|
|
|
|
|
|
|
# split an element or its text descendants into several, in place |
|
7774
|
|
|
|
|
|
|
# all elements (new and untouched) are returned |
|
7775
|
|
|
|
|
|
|
sub split |
|
7776
|
|
|
|
|
|
|
{ my $elt= shift; |
|
7777
|
|
|
|
|
|
|
my @text_chunks; |
|
7778
|
|
|
|
|
|
|
my @result; |
|
7779
|
|
|
|
|
|
|
if( $elt->is_text) { @text_chunks= ($elt); } |
|
7780
|
|
|
|
|
|
|
else { @text_chunks= $elt->descendants( $TEXT); } |
|
7781
|
|
|
|
|
|
|
foreach my $text_chunk (@text_chunks) |
|
7782
|
|
|
|
|
|
|
{ push @result, $text_chunk->_split( 1, @_); } |
|
7783
|
|
|
|
|
|
|
return @result; |
|
7784
|
|
|
|
|
|
|
} |
|
7785
|
|
|
|
|
|
|
|
|
7786
|
|
|
|
|
|
|
# split an element or its text descendants into several, in place |
|
7787
|
|
|
|
|
|
|
# created elements (those which match the regexp) are returned |
|
7788
|
|
|
|
|
|
|
sub mark |
|
7789
|
|
|
|
|
|
|
{ my $elt= shift; |
|
7790
|
|
|
|
|
|
|
my @text_chunks; |
|
7791
|
|
|
|
|
|
|
my @result; |
|
7792
|
|
|
|
|
|
|
if( $elt->is_text) { @text_chunks= ($elt); } |
|
7793
|
|
|
|
|
|
|
else { @text_chunks= $elt->descendants( $TEXT); } |
|
7794
|
|
|
|
|
|
|
foreach my $text_chunk (@text_chunks) |
|
7795
|
|
|
|
|
|
|
{ push @result, $text_chunk->_split( 0, @_); } |
|
7796
|
|
|
|
|
|
|
return @result; |
|
7797
|
|
|
|
|
|
|
} |
|
7798
|
|
|
|
|
|
|
|
|
7799
|
|
|
|
|
|
|
# split a single text element |
|
7800
|
|
|
|
|
|
|
# return_all defines what is returned: if it is true |
|
7801
|
|
|
|
|
|
|
# only returns the elements created by matches in the split regexp |
|
7802
|
|
|
|
|
|
|
# otherwise all elements (new and untouched) are returned |
|
7803
|
|
|
|
|
|
|
|
|
7804
|
|
|
|
|
|
|
|
|
7805
|
|
|
|
|
|
|
{ |
|
7806
|
|
|
|
|
|
|
|
|
7807
|
|
|
|
|
|
|
sub _split |
|
7808
|
|
|
|
|
|
|
{ my $elt= shift; |
|
7809
|
|
|
|
|
|
|
my $return_all= shift; |
|
7810
|
|
|
|
|
|
|
my $regexp= shift; |
|
7811
|
|
|
|
|
|
|
my @tags; |
|
7812
|
|
|
|
|
|
|
|
|
7813
|
|
|
|
|
|
|
while( @_) |
|
7814
|
|
|
|
|
|
|
{ my $tag= shift(); |
|
7815
|
|
|
|
|
|
|
if( ref $_[0]) |
|
7816
|
|
|
|
|
|
|
{ push @tags, { tag => $tag, atts => shift }; } |
|
7817
|
|
|
|
|
|
|
else |
|
7818
|
|
|
|
|
|
|
{ push @tags, { tag => $tag }; } |
|
7819
|
|
|
|
|
|
|
} |
|
7820
|
|
|
|
|
|
|
|
|
7821
|
|
|
|
|
|
|
unless( @tags) { @tags= { tag => $elt->{parent}->gi }; } |
|
7822
|
|
|
|
|
|
|
|
|
7823
|
|
|
|
|
|
|
my @result; # the returned list of elements |
|
7824
|
|
|
|
|
|
|
my $text= $elt->text; |
|
7825
|
|
|
|
|
|
|
my $gi= $XML::Twig::index2gi[$elt->{'gi'}]; |
|
7826
|
|
|
|
|
|
|
|
|
7827
|
|
|
|
|
|
|
# 2 uses: if split matches then the first substring reuses $elt |
|
7828
|
|
|
|
|
|
|
# once a split has occurred then the last match needs to be put in |
|
7829
|
|
|
|
|
|
|
# a new element |
|
7830
|
|
|
|
|
|
|
my $previous_match= 0; |
|
7831
|
|
|
|
|
|
|
|
|
7832
|
|
|
|
|
|
|
while( my( $pre_match, @matches)= $text=~ /^(.*?)$regexp(.*)$/gcs) |
|
7833
|
|
|
|
|
|
|
{ $text= pop @matches; |
|
7834
|
|
|
|
|
|
|
if( $previous_match) |
|
7835
|
|
|
|
|
|
|
{ # match, not the first one, create a new text ($gi) element |
|
7836
|
|
|
|
|
|
|
_utf8_ify( $pre_match) if( $] < 5.010); |
|
7837
|
|
|
|
|
|
|
$elt= $elt->insert_new_elt( after => $gi, $pre_match); |
|
7838
|
|
|
|
|
|
|
push @result, $elt if( $return_all); |
|
7839
|
|
|
|
|
|
|
} |
|
7840
|
|
|
|
|
|
|
else |
|
7841
|
|
|
|
|
|
|
{ # first match in $elt, re-use $elt for the first sub-string |
|
7842
|
|
|
|
|
|
|
_utf8_ify( $pre_match) if( $] < 5.010); |
|
7843
|
|
|
|
|
|
|
$elt->set_text( $pre_match); |
|
7844
|
|
|
|
|
|
|
$previous_match++; # store the fact that there was a match |
|
7845
|
|
|
|
|
|
|
push @result, $elt if( $return_all); |
|
7846
|
|
|
|
|
|
|
} |
|
7847
|
|
|
|
|
|
|
|
|
7848
|
|
|
|
|
|
|
# now deal with matches captured in the regexp |
|
7849
|
|
|
|
|
|
|
if( @matches) |
|
7850
|
|
|
|
|
|
|
{ # match, with capture |
|
7851
|
|
|
|
|
|
|
my $i=0; |
|
7852
|
|
|
|
|
|
|
foreach my $match (@matches) |
|
7853
|
|
|
|
|
|
|
{ # create new element, text is the match |
|
7854
|
|
|
|
|
|
|
_utf8_ify( $match) if( $] < 5.010); |
|
7855
|
|
|
|
|
|
|
my $tag = _repl_match( $tags[$i]->{tag}, @matches) || '#PCDATA'; |
|
7856
|
|
|
|
|
|
|
my $atts = \%{$tags[$i]->{atts}} || {}; |
|
7857
|
|
|
|
|
|
|
my %atts= map { _repl_match( $_, @matches) => _repl_match( $atts->{$_}, @matches) } keys %$atts; |
|
7858
|
|
|
|
|
|
|
$elt= $elt->insert_new_elt( after => $tag, \%atts, $match); |
|
7859
|
|
|
|
|
|
|
push @result, $elt; |
|
7860
|
|
|
|
|
|
|
$i= ($i + 1) % @tags; |
|
7861
|
|
|
|
|
|
|
} |
|
7862
|
|
|
|
|
|
|
} |
|
7863
|
|
|
|
|
|
|
else |
|
7864
|
|
|
|
|
|
|
{ # match, no captures |
|
7865
|
|
|
|
|
|
|
my $tag = $tags[0]->{tag}; |
|
7866
|
|
|
|
|
|
|
my $atts = \%{$tags[0]->{atts}} || {}; |
|
7867
|
|
|
|
|
|
|
$elt= $elt->insert_new_elt( after => $tag, $atts); |
|
7868
|
|
|
|
|
|
|
push @result, $elt; |
|
7869
|
|
|
|
|
|
|
} |
|
7870
|
|
|
|
|
|
|
} |
|
7871
|
|
|
|
|
|
|
if( $previous_match && $text) |
|
7872
|
|
|
|
|
|
|
{ # there was at least 1 match, and there is text left after the match |
|
7873
|
|
|
|
|
|
|
$elt= $elt->insert_new_elt( after => $gi, $text); |
|
7874
|
|
|
|
|
|
|
} |
|
7875
|
|
|
|
|
|
|
|
|
7876
|
|
|
|
|
|
|
push @result, $elt if( $return_all); |
|
7877
|
|
|
|
|
|
|
|
|
7878
|
|
|
|
|
|
|
return @result; # return all elements |
|
7879
|
|
|
|
|
|
|
} |
|
7880
|
|
|
|
|
|
|
|
|
7881
|
|
|
|
|
|
|
sub _repl_match |
|
7882
|
|
|
|
|
|
|
{ my( $val, @matches)= @_; |
|
7883
|
|
|
|
|
|
|
$val=~ s{\$(\d+)}{$matches[$1-1]}g; |
|
7884
|
|
|
|
|
|
|
return $val; |
|
7885
|
|
|
|
|
|
|
} |
|
7886
|
|
|
|
|
|
|
|
|
7887
|
|
|
|
|
|
|
# evil hack needed as sometimes |
|
7888
|
|
|
|
|
|
|
my $encode_is_loaded=0; # so we only load Encode once |
|
7889
|
|
|
|
|
|
|
sub _utf8_ify |
|
7890
|
|
|
|
|
|
|
{ |
|
7891
|
|
|
|
|
|
|
if( $perl_version >= 5.008 and $perl_version < 5.010 and !_keep_encoding()) |
|
7892
|
|
|
|
|
|
|
{ unless( $encode_is_loaded) { require Encode; import Encode; $encode_is_loaded++; } |
|
7893
|
|
|
|
|
|
|
Encode::_utf8_on( $_[0]); # the flag should be set but is not |
|
7894
|
|
|
|
|
|
|
} |
|
7895
|
|
|
|
|
|
|
} |
|
7896
|
|
|
|
|
|
|
|
|
7897
|
|
|
|
|
|
|
|
|
7898
|
|
|
|
|
|
|
} |
|
7899
|
|
|
|
|
|
|
|
|
7900
|
|
|
|
|
|
|
{ my %replace_sub; # cache for complex expressions (expression => sub) |
|
7901
|
|
|
|
|
|
|
|
|
7902
|
|
|
|
|
|
|
sub subs_text |
|
7903
|
|
|
|
|
|
|
{ my( $elt, $regexp, $replace)= @_; |
|
7904
|
|
|
|
|
|
|
|
|
7905
|
|
|
|
|
|
|
my $replacement_string; |
|
7906
|
|
|
|
|
|
|
my $is_string= _is_string( $replace); |
|
7907
|
|
|
|
|
|
|
|
|
7908
|
|
|
|
|
|
|
my @parents; |
|
7909
|
|
|
|
|
|
|
|
|
7910
|
|
|
|
|
|
|
foreach my $text_elt ($elt->descendants_or_self( $TEXT)) |
|
7911
|
|
|
|
|
|
|
{ |
|
7912
|
|
|
|
|
|
|
if( $is_string) |
|
7913
|
|
|
|
|
|
|
{ my $text= $text_elt->text; |
|
7914
|
|
|
|
|
|
|
$text=~ s{$regexp}{ _replace_var( $replace, $1, $2, $3, $4, $5, $6, $7, $8, $9)}egx; |
|
7915
|
|
|
|
|
|
|
$text_elt->set_text( $text); |
|
7916
|
|
|
|
|
|
|
} |
|
7917
|
|
|
|
|
|
|
else |
|
7918
|
|
|
|
|
|
|
{ |
|
7919
|
|
|
|
|
|
|
no utf8; # = perl 5.6 |
|
7920
|
|
|
|
|
|
|
my $replace_sub= ( $replace_sub{$replace} ||= _install_replace_sub( $replace)); |
|
7921
|
|
|
|
|
|
|
my $text= $text_elt->text; |
|
7922
|
|
|
|
|
|
|
my $pos=0; # used to skip text that was previously matched |
|
7923
|
|
|
|
|
|
|
my $found_hit; |
|
7924
|
|
|
|
|
|
|
while( my( $pre_match_string, $match_string, @var)= ($text=~ m{(.*?)($regexp)}sg)) |
|
7925
|
|
|
|
|
|
|
{ $found_hit=1; |
|
7926
|
|
|
|
|
|
|
my $match_start = length( $pre_match_string); |
|
7927
|
|
|
|
|
|
|
my $match = $match_start ? $text_elt->split_at( $match_start + $pos) : $text_elt; |
|
7928
|
|
|
|
|
|
|
my $match_length = length( $match_string); |
|
7929
|
|
|
|
|
|
|
my $post_match = $match->split_at( $match_length); |
|
7930
|
|
|
|
|
|
|
$replace_sub->( $match, @var); |
|
7931
|
|
|
|
|
|
|
|
|
7932
|
|
|
|
|
|
|
# go to next |
|
7933
|
|
|
|
|
|
|
$text_elt= $post_match; |
|
7934
|
|
|
|
|
|
|
$text= $post_match->text; |
|
7935
|
|
|
|
|
|
|
|
|
7936
|
|
|
|
|
|
|
if( $found_hit) { push @parents, $text_elt->{parent} unless $parents[-1] && $parents[-1]== $text_elt->{parent}; } |
|
7937
|
|
|
|
|
|
|
|
|
7938
|
|
|
|
|
|
|
} |
|
7939
|
|
|
|
|
|
|
} |
|
7940
|
|
|
|
|
|
|
} |
|
7941
|
|
|
|
|
|
|
|
|
7942
|
|
|
|
|
|
|
foreach my $parent (@parents) { $parent->normalize; } |
|
7943
|
|
|
|
|
|
|
|
|
7944
|
|
|
|
|
|
|
return $elt; |
|
7945
|
|
|
|
|
|
|
} |
|
7946
|
|
|
|
|
|
|
|
|
7947
|
|
|
|
|
|
|
|
|
7948
|
|
|
|
|
|
|
sub _is_string |
|
7949
|
|
|
|
|
|
|
{ return ($_[0]=~ m{&e[ln]t}) ? 0: 1 } |
|
7950
|
|
|
|
|
|
|
|
|
7951
|
|
|
|
|
|
|
sub _replace_var |
|
7952
|
|
|
|
|
|
|
{ my( $string, @var)= @_; |
|
7953
|
|
|
|
|
|
|
unshift @var, undef; |
|
7954
|
|
|
|
|
|
|
$string=~ s{\$(\d)}{$var[$1]}g; |
|
7955
|
|
|
|
|
|
|
return $string; |
|
7956
|
|
|
|
|
|
|
} |
|
7957
|
|
|
|
|
|
|
|
|
7958
|
|
|
|
|
|
|
sub _install_replace_sub |
|
7959
|
|
|
|
|
|
|
{ my $replace_exp= shift; |
|
7960
|
|
|
|
|
|
|
my @item= split m{(&e[ln]t\s*\([^)]*\))}, $replace_exp; |
|
7961
|
|
|
|
|
|
|
my $sub= q{ my( $match, @var)= @_; my $new; my $last_inserted=$match;}; |
|
7962
|
|
|
|
|
|
|
my( $gi, $exp); |
|
7963
|
|
|
|
|
|
|
foreach my $item (@item) |
|
7964
|
|
|
|
|
|
|
{ next if ! length $item; |
|
7965
|
|
|
|
|
|
|
if( $item=~ m{^&elt\s*\(([^)]*)\)}) |
|
7966
|
|
|
|
|
|
|
{ $exp= $1; } |
|
7967
|
|
|
|
|
|
|
elsif( $item=~ m{^&ent\s*\(\s*([^\s)]*)\s*\)}) |
|
7968
|
|
|
|
|
|
|
{ $exp= " '#ENT' => $1"; } |
|
7969
|
|
|
|
|
|
|
else |
|
7970
|
|
|
|
|
|
|
{ $exp= qq{ '#PCDATA' => "$item"}; } |
|
7971
|
|
|
|
|
|
|
$exp=~ s{\$(\d)}{my $i= $1-1; "\$var[$i]"}eg; # replace references to matches |
|
7972
|
|
|
|
|
|
|
$sub.= qq{ \$new= \$match->new( $exp); }; |
|
7973
|
|
|
|
|
|
|
$sub .= q{ $new->paste( after => $last_inserted); $last_inserted=$new;}; |
|
7974
|
|
|
|
|
|
|
} |
|
7975
|
|
|
|
|
|
|
$sub .= q{ $match->delete; }; |
|
7976
|
|
|
|
|
|
|
#$sub=~ s/;/;\n/g; warn "subs: $sub"; |
|
7977
|
|
|
|
|
|
|
my $coderef= eval "sub { $NO_WARNINGS; $sub }"; |
|
7978
|
|
|
|
|
|
|
if( $@) { croak( "invalid replacement expression $replace_exp: ",$@); } |
|
7979
|
|
|
|
|
|
|
return $coderef; |
|
7980
|
|
|
|
|
|
|
} |
|
7981
|
|
|
|
|
|
|
|
|
7982
|
|
|
|
|
|
|
} |
|
7983
|
|
|
|
|
|
|
|
|
7984
|
|
|
|
|
|
|
|
|
7985
|
|
|
|
|
|
|
sub merge_text |
|
7986
|
|
|
|
|
|
|
{ my( $e1, $e2)= @_; |
|
7987
|
|
|
|
|
|
|
croak "invalid merge: can only merge 2 elements" |
|
7988
|
|
|
|
|
|
|
unless( isa( $e2, 'XML::Twig::Elt')); |
|
7989
|
|
|
|
|
|
|
croak "invalid merge: can only merge 2 text elements" |
|
7990
|
|
|
|
|
|
|
unless( $e1->is_text && $e2->is_text && ($e1->gi eq $e2->gi)); |
|
7991
|
|
|
|
|
|
|
|
|
7992
|
|
|
|
|
|
|
my $t1_length= length( $e1->text); |
|
7993
|
|
|
|
|
|
|
|
|
7994
|
|
|
|
|
|
|
$e1->set_text( $e1->text . $e2->text); |
|
7995
|
|
|
|
|
|
|
|
|
7996
|
|
|
|
|
|
|
if( my $extra_data_in_pcdata= $e2->_extra_data_in_pcdata) |
|
7997
|
|
|
|
|
|
|
{ foreach my $data (@$extra_data_in_pcdata) { $e1->_push_extra_data_in_pcdata( $data->{text}, $data->{offset} + $t1_length); } } |
|
7998
|
|
|
|
|
|
|
|
|
7999
|
|
|
|
|
|
|
$e2->delete; |
|
8000
|
|
|
|
|
|
|
|
|
8001
|
|
|
|
|
|
|
return $e1; |
|
8002
|
|
|
|
|
|
|
} |
|
8003
|
|
|
|
|
|
|
|
|
8004
|
|
|
|
|
|
|
sub merge |
|
8005
|
|
|
|
|
|
|
{ my( $e1, $e2)= @_; |
|
8006
|
|
|
|
|
|
|
my @e2_children= $e2->_children; |
|
8007
|
|
|
|
|
|
|
if( $e1->_last_child && $e1->_last_child->is_pcdata |
|
8008
|
|
|
|
|
|
|
&& @e2_children && $e2_children[0]->is_pcdata |
|
8009
|
|
|
|
|
|
|
) |
|
8010
|
|
|
|
|
|
|
{ my $t1_length= length( $e1->_last_child->{pcdata}); |
|
8011
|
|
|
|
|
|
|
my $child1= $e1->_last_child; |
|
8012
|
|
|
|
|
|
|
my $child2= shift @e2_children; |
|
8013
|
|
|
|
|
|
|
$child1->{pcdata} .= $child2->{pcdata}; |
|
8014
|
|
|
|
|
|
|
|
|
8015
|
|
|
|
|
|
|
my $extra_data= $e1->_extra_data_before_end_tag . $e2->extra_data; |
|
8016
|
|
|
|
|
|
|
|
|
8017
|
|
|
|
|
|
|
if( $extra_data) |
|
8018
|
|
|
|
|
|
|
{ $e1->_del_extra_data_before_end_tag; |
|
8019
|
|
|
|
|
|
|
$child1->_push_extra_data_in_pcdata( $extra_data, $t1_length); |
|
8020
|
|
|
|
|
|
|
} |
|
8021
|
|
|
|
|
|
|
|
|
8022
|
|
|
|
|
|
|
if( my $extra_data_in_pcdata= $child2->_extra_data_in_pcdata) |
|
8023
|
|
|
|
|
|
|
{ foreach my $data (@$extra_data_in_pcdata) { $child1->_push_extra_data_in_pcdata( $data->{text}, $data->{offset} + $t1_length); } } |
|
8024
|
|
|
|
|
|
|
|
|
8025
|
|
|
|
|
|
|
if( my $extra_data_before_end_tag= $e2->_extra_data_before_end_tag) |
|
8026
|
|
|
|
|
|
|
{ $e1->_set_extra_data_before_end_tag( $extra_data_before_end_tag); } |
|
8027
|
|
|
|
|
|
|
} |
|
8028
|
|
|
|
|
|
|
|
|
8029
|
|
|
|
|
|
|
foreach my $e (@e2_children) { $e->move( last_child => $e1); } |
|
8030
|
|
|
|
|
|
|
|
|
8031
|
|
|
|
|
|
|
$e2->delete; |
|
8032
|
|
|
|
|
|
|
return $e1; |
|
8033
|
|
|
|
|
|
|
} |
|
8034
|
|
|
|
|
|
|
|
|
8035
|
|
|
|
|
|
|
|
|
8036
|
|
|
|
|
|
|
# recursively copy an element and returns the copy (can be huge and long) |
|
8037
|
|
|
|
|
|
|
sub copy |
|
8038
|
|
|
|
|
|
|
{ my $elt= shift; |
|
8039
|
|
|
|
|
|
|
my $copy= $elt->new( $XML::Twig::index2gi[$elt->{'gi'}]); |
|
8040
|
|
|
|
|
|
|
|
|
8041
|
|
|
|
|
|
|
if( $elt->extra_data) { $copy->set_extra_data( $elt->extra_data); } |
|
8042
|
|
|
|
|
|
|
if( $elt->{extra_data_before_end_tag}) { $copy->_set_extra_data_before_end_tag( $elt->{extra_data_before_end_tag}); } |
|
8043
|
|
|
|
|
|
|
|
|
8044
|
|
|
|
|
|
|
if( $elt->is_asis) { $copy->set_asis; } |
|
8045
|
|
|
|
|
|
|
|
|
8046
|
|
|
|
|
|
|
if( (exists $elt->{'pcdata'})) |
|
8047
|
|
|
|
|
|
|
{ $copy->{pcdata}= (delete $copy->{empty} || 1) && $elt->{pcdata}; |
|
8048
|
|
|
|
|
|
|
if( $elt->{extra_data_in_pcdata}) { $copy->_set_extra_data_in_pcdata( $elt->{extra_data_in_pcdata}); } |
|
8049
|
|
|
|
|
|
|
} |
|
8050
|
|
|
|
|
|
|
elsif( (exists $elt->{'cdata'})) |
|
8051
|
|
|
|
|
|
|
{ $copy->{cdata}= $elt->{cdata}; |
|
8052
|
|
|
|
|
|
|
if( $elt->{extra_data_in_pcdata}) { $copy->_set_extra_data_in_pcdata( $elt->{extra_data_in_pcdata}); } |
|
8053
|
|
|
|
|
|
|
} |
|
8054
|
|
|
|
|
|
|
elsif( (exists $elt->{'target'})) |
|
8055
|
|
|
|
|
|
|
{ $copy->_set_pi( $elt->{target}, $elt->{data}); } |
|
8056
|
|
|
|
|
|
|
elsif( (exists $elt->{'comment'})) |
|
8057
|
|
|
|
|
|
|
{ $copy->{comment}= $elt->{comment}; } |
|
8058
|
|
|
|
|
|
|
elsif( (exists $elt->{'ent'})) |
|
8059
|
|
|
|
|
|
|
{ $copy->{ent}= $elt->{ent}; } |
|
8060
|
|
|
|
|
|
|
else |
|
8061
|
|
|
|
|
|
|
{ my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; |
|
8062
|
|
|
|
|
|
|
if( my $atts= $elt->{att}) |
|
8063
|
|
|
|
|
|
|
{ my %atts; |
|
8064
|
|
|
|
|
|
|
tie %atts, 'Tie::IxHash' if (keep_atts_order()); |
|
8065
|
|
|
|
|
|
|
%atts= %{$atts}; # we want to do a real copy of the attributes |
|
8066
|
|
|
|
|
|
|
$copy->set_atts( \%atts); |
|
8067
|
|
|
|
|
|
|
} |
|
8068
|
|
|
|
|
|
|
foreach my $child (@children) |
|
8069
|
|
|
|
|
|
|
{ my $child_copy= $child->copy; |
|
8070
|
|
|
|
|
|
|
$child_copy->paste( 'last_child', $copy); |
|
8071
|
|
|
|
|
|
|
} |
|
8072
|
|
|
|
|
|
|
} |
|
8073
|
|
|
|
|
|
|
# save links to the original location, which can be convenient and is used for namespace resolution |
|
8074
|
|
|
|
|
|
|
foreach my $link ( qw(parent prev_sibling next_sibling) ) |
|
8075
|
|
|
|
|
|
|
{ $copy->{former}->{$link}= $elt->{$link}; |
|
8076
|
|
|
|
|
|
|
if( $XML::Twig::weakrefs) { weaken( $copy->{former}->{$link}); } |
|
8077
|
|
|
|
|
|
|
} |
|
8078
|
|
|
|
|
|
|
|
|
8079
|
|
|
|
|
|
|
$copy->{empty}= $elt->{'empty'}; |
|
8080
|
|
|
|
|
|
|
|
|
8081
|
|
|
|
|
|
|
return $copy; |
|
8082
|
|
|
|
|
|
|
} |
|
8083
|
|
|
|
|
|
|
|
|
8084
|
|
|
|
|
|
|
|
|
8085
|
|
|
|
|
|
|
sub delete |
|
8086
|
|
|
|
|
|
|
{ my $elt= shift; |
|
8087
|
|
|
|
|
|
|
$elt->cut; |
|
8088
|
|
|
|
|
|
|
$elt->DESTROY unless $XML::Twig::weakrefs; |
|
8089
|
|
|
|
|
|
|
return undef; |
|
8090
|
|
|
|
|
|
|
} |
|
8091
|
|
|
|
|
|
|
|
|
8092
|
|
|
|
|
|
|
sub __destroy |
|
8093
|
|
|
|
|
|
|
{ my $elt= shift; |
|
8094
|
|
|
|
|
|
|
return if( $XML::Twig::weakrefs); |
|
8095
|
|
|
|
|
|
|
my $t= shift || $elt->twig; # optional argument, passed in recursive calls |
|
8096
|
|
|
|
|
|
|
|
|
8097
|
|
|
|
|
|
|
foreach( @{[$elt->_children]}) { $_->DESTROY( $t); } |
|
8098
|
|
|
|
|
|
|
|
|
8099
|
|
|
|
|
|
|
# the id reference needs to be destroyed |
|
8100
|
|
|
|
|
|
|
# lots of tests to avoid warnings during the cleanup phase |
|
8101
|
|
|
|
|
|
|
$elt->del_id( $t) if( $ID && $t && defined( $elt->{att}) && exists( $elt->{att}->{$ID})); |
|
8102
|
|
|
|
|
|
|
if( $elt->{former}) { foreach (keys %{$elt->{former}}) { delete $elt->{former}->{$_}; } delete $elt->{former}; } |
|
8103
|
|
|
|
|
|
|
foreach (qw( keys %$elt)) { delete $elt->{$_}; } |
|
8104
|
|
|
|
|
|
|
undef $elt; |
|
8105
|
|
|
|
|
|
|
} |
|
8106
|
|
|
|
|
|
|
|
|
8107
|
|
|
|
|
|
|
BEGIN |
|
8108
|
|
|
|
|
|
|
{ sub set_destroy { if( $XML::Twig::weakrefs) { undef *DESTROY } else { *DESTROY= *__destroy; } } |
|
8109
|
|
|
|
|
|
|
set_destroy(); |
|
8110
|
|
|
|
|
|
|
} |
|
8111
|
|
|
|
|
|
|
|
|
8112
|
|
|
|
|
|
|
# ignores the element |
|
8113
|
|
|
|
|
|
|
sub ignore |
|
8114
|
|
|
|
|
|
|
{ my $elt= shift; |
|
8115
|
|
|
|
|
|
|
my $t= $elt->twig; |
|
8116
|
|
|
|
|
|
|
$t->ignore( $elt, @_); |
|
8117
|
|
|
|
|
|
|
} |
|
8118
|
|
|
|
|
|
|
|
|
8119
|
|
|
|
|
|
|
BEGIN { |
|
8120
|
|
|
|
|
|
|
my $pretty = 0; |
|
8121
|
|
|
|
|
|
|
my $quote = '"'; |
|
8122
|
|
|
|
|
|
|
my $INDENT = ' '; |
|
8123
|
|
|
|
|
|
|
my $empty_tag_style = 0; |
|
8124
|
|
|
|
|
|
|
my $remove_cdata = 0; |
|
8125
|
|
|
|
|
|
|
my $keep_encoding = 0; |
|
8126
|
|
|
|
|
|
|
my $expand_external_entities = 0; |
|
8127
|
|
|
|
|
|
|
my $keep_atts_order = 0; |
|
8128
|
|
|
|
|
|
|
my $do_not_escape_amp_in_atts = 0; |
|
8129
|
|
|
|
|
|
|
my $WRAP = '80'; |
|
8130
|
|
|
|
|
|
|
my $REPLACED_ENTS = qq{&<}; |
|
8131
|
|
|
|
|
|
|
|
|
8132
|
|
|
|
|
|
|
my ($NSGMLS, $NICE, $INDENTED, $INDENTEDCT, $INDENTEDC, $WRAPPED, $RECORD1, $RECORD2, $INDENTEDA)= (1..9); |
|
8133
|
|
|
|
|
|
|
my %KEEP_TEXT_TAG_ON_ONE_LINE= map { $_ => 1 } ( $INDENTED, $INDENTEDCT, $INDENTEDC, $INDENTEDA, $WRAPPED); |
|
8134
|
|
|
|
|
|
|
my %WRAPPED = map { $_ => 1 } ( $WRAPPED, $INDENTEDA, $INDENTEDC); |
|
8135
|
|
|
|
|
|
|
|
|
8136
|
|
|
|
|
|
|
my %pretty_print_style= |
|
8137
|
|
|
|
|
|
|
( none => 0, # no added \n |
|
8138
|
|
|
|
|
|
|
nsgmls => $NSGMLS, # nsgmls-style, \n in tags |
|
8139
|
|
|
|
|
|
|
# below this line styles are UNSAFE (the generated XML can be well-formed but invalid) |
|
8140
|
|
|
|
|
|
|
nice => $NICE, # \n after open/close tags except when the |
|
8141
|
|
|
|
|
|
|
# element starts with text |
|
8142
|
|
|
|
|
|
|
indented => $INDENTED, # nice plus idented |
|
8143
|
|
|
|
|
|
|
indented_close_tag => $INDENTEDCT, # nice plus idented |
|
8144
|
|
|
|
|
|
|
indented_c => $INDENTEDC, # slightly more compact than indented (closing |
|
8145
|
|
|
|
|
|
|
# tags are on the same line) |
|
8146
|
|
|
|
|
|
|
wrapped => $WRAPPED, # text is wrapped at column |
|
8147
|
|
|
|
|
|
|
record_c => $RECORD1, # for record-like data (compact) |
|
8148
|
|
|
|
|
|
|
record => $RECORD2, # for record-like data (not so compact) |
|
8149
|
|
|
|
|
|
|
indented_a => $INDENTEDA, # nice, indented, and with attributes on separate |
|
8150
|
|
|
|
|
|
|
# lines as the nsgmls style, as well as wrapped |
|
8151
|
|
|
|
|
|
|
# lines - to make the xml friendly to line-oriented tools |
|
8152
|
|
|
|
|
|
|
cvs => $INDENTEDA, # alias for indented_a |
|
8153
|
|
|
|
|
|
|
); |
|
8154
|
|
|
|
|
|
|
|
|
8155
|
|
|
|
|
|
|
my ($HTML, $EXPAND)= (1..2); |
|
8156
|
|
|
|
|
|
|
my %empty_tag_style= |
|
8157
|
|
|
|
|
|
|
( normal => 0, # |
|
8158
|
|
|
|
|
|
|
html => $HTML, # |
|
8159
|
|
|
|
|
|
|
xhtml => $HTML, # |
|
8160
|
|
|
|
|
|
|
expand => $EXPAND, # |
|
8161
|
|
|
|
|
|
|
); |
|
8162
|
|
|
|
|
|
|
|
|
8163
|
|
|
|
|
|
|
my %quote_style= |
|
8164
|
|
|
|
|
|
|
( double => '"', |
|
8165
|
|
|
|
|
|
|
single => "'", |
|
8166
|
|
|
|
|
|
|
# smart => "smart", |
|
8167
|
|
|
|
|
|
|
); |
|
8168
|
|
|
|
|
|
|
|
|
8169
|
|
|
|
|
|
|
my $xml_space_preserve; # set when an element includes xml:space="preserve" |
|
8170
|
|
|
|
|
|
|
|
|
8171
|
|
|
|
|
|
|
my $output_filter; # filters the entire output (including < and >) |
|
8172
|
|
|
|
|
|
|
my $output_text_filter; # filters only the text part (tag names, attributes, pcdata) |
|
8173
|
|
|
|
|
|
|
|
|
8174
|
|
|
|
|
|
|
my $replaced_ents= $REPLACED_ENTS; |
|
8175
|
|
|
|
|
|
|
|
|
8176
|
|
|
|
|
|
|
|
|
8177
|
|
|
|
|
|
|
# returns those pesky "global" variables so you can switch between twigs |
|
8178
|
|
|
|
|
|
|
sub global_state ## no critic (Subroutines::ProhibitNestedSubs); |
|
8179
|
|
|
|
|
|
|
{ return |
|
8180
|
|
|
|
|
|
|
{ pretty => $pretty, |
|
8181
|
|
|
|
|
|
|
quote => $quote, |
|
8182
|
|
|
|
|
|
|
indent => $INDENT, |
|
8183
|
|
|
|
|
|
|
empty_tag_style => $empty_tag_style, |
|
8184
|
|
|
|
|
|
|
remove_cdata => $remove_cdata, |
|
8185
|
|
|
|
|
|
|
keep_encoding => $keep_encoding, |
|
8186
|
|
|
|
|
|
|
expand_external_entities => $expand_external_entities, |
|
8187
|
|
|
|
|
|
|
output_filter => $output_filter, |
|
8188
|
|
|
|
|
|
|
output_text_filter => $output_text_filter, |
|
8189
|
|
|
|
|
|
|
keep_atts_order => $keep_atts_order, |
|
8190
|
|
|
|
|
|
|
do_not_escape_amp_in_atts => $do_not_escape_amp_in_atts, |
|
8191
|
|
|
|
|
|
|
wrap => $WRAP, |
|
8192
|
|
|
|
|
|
|
replaced_ents => $replaced_ents, |
|
8193
|
|
|
|
|
|
|
}; |
|
8194
|
|
|
|
|
|
|
} |
|
8195
|
|
|
|
|
|
|
|
|
8196
|
|
|
|
|
|
|
# restores the global variables |
|
8197
|
|
|
|
|
|
|
sub set_global_state |
|
8198
|
|
|
|
|
|
|
{ my $state= shift; |
|
8199
|
|
|
|
|
|
|
$pretty = $state->{pretty}; |
|
8200
|
|
|
|
|
|
|
$quote = $state->{quote}; |
|
8201
|
|
|
|
|
|
|
$INDENT = $state->{indent}; |
|
8202
|
|
|
|
|
|
|
$empty_tag_style = $state->{empty_tag_style}; |
|
8203
|
|
|
|
|
|
|
$remove_cdata = $state->{remove_cdata}; |
|
8204
|
|
|
|
|
|
|
$keep_encoding = $state->{keep_encoding}; |
|
8205
|
|
|
|
|
|
|
$expand_external_entities = $state->{expand_external_entities}; |
|
8206
|
|
|
|
|
|
|
$output_filter = $state->{output_filter}; |
|
8207
|
|
|
|
|
|
|
$output_text_filter = $state->{output_text_filter}; |
|
8208
|
|
|
|
|
|
|
$keep_atts_order = $state->{keep_atts_order}; |
|
8209
|
|
|
|
|
|
|
$do_not_escape_amp_in_atts = $state->{do_not_escape_amp_in_atts}; |
|
8210
|
|
|
|
|
|
|
$WRAP = $state->{wrap}; |
|
8211
|
|
|
|
|
|
|
$replaced_ents = $state->{replaced_ents}, |
|
8212
|
|
|
|
|
|
|
} |
|
8213
|
|
|
|
|
|
|
|
|
8214
|
|
|
|
|
|
|
# sets global state to defaults |
|
8215
|
|
|
|
|
|
|
sub init_global_state |
|
8216
|
|
|
|
|
|
|
{ set_global_state( |
|
8217
|
|
|
|
|
|
|
{ pretty => 0, |
|
8218
|
|
|
|
|
|
|
quote => '"', |
|
8219
|
|
|
|
|
|
|
indent => $INDENT, |
|
8220
|
|
|
|
|
|
|
empty_tag_style => 0, |
|
8221
|
|
|
|
|
|
|
remove_cdata => 0, |
|
8222
|
|
|
|
|
|
|
keep_encoding => 0, |
|
8223
|
|
|
|
|
|
|
expand_external_entities => 0, |
|
8224
|
|
|
|
|
|
|
output_filter => undef, |
|
8225
|
|
|
|
|
|
|
output_text_filter => undef, |
|
8226
|
|
|
|
|
|
|
keep_atts_order => undef, |
|
8227
|
|
|
|
|
|
|
do_not_escape_amp_in_atts => 0, |
|
8228
|
|
|
|
|
|
|
wrap => $WRAP, |
|
8229
|
|
|
|
|
|
|
replaced_ents => $REPLACED_ENTS, |
|
8230
|
|
|
|
|
|
|
}); |
|
8231
|
|
|
|
|
|
|
} |
|
8232
|
|
|
|
|
|
|
|
|
8233
|
|
|
|
|
|
|
|
|
8234
|
|
|
|
|
|
|
# set the pretty_print style (in $pretty) and returns the old one |
|
8235
|
|
|
|
|
|
|
# can be called from outside the package with 2 arguments (elt, style) |
|
8236
|
|
|
|
|
|
|
# or from inside with only one argument (style) |
|
8237
|
|
|
|
|
|
|
# the style can be either a string (one of the keys of %pretty_print_style |
|
8238
|
|
|
|
|
|
|
# or a number (presumably an old value saved) |
|
8239
|
|
|
|
|
|
|
sub set_pretty_print |
|
8240
|
|
|
|
|
|
|
{ my $style= lc( defined $_[1] ? $_[1] : $_[0]); # so we cover both cases |
|
8241
|
|
|
|
|
|
|
my $old_pretty= $pretty; |
|
8242
|
|
|
|
|
|
|
if( $style=~ /^\d+$/) |
|
8243
|
|
|
|
|
|
|
{ croak "invalid pretty print style $style" unless( $style < keys %pretty_print_style); |
|
8244
|
|
|
|
|
|
|
$pretty= $style; |
|
8245
|
|
|
|
|
|
|
} |
|
8246
|
|
|
|
|
|
|
else |
|
8247
|
|
|
|
|
|
|
{ croak "invalid pretty print style '$style'" unless( exists $pretty_print_style{$style}); |
|
8248
|
|
|
|
|
|
|
$pretty= $pretty_print_style{$style}; |
|
8249
|
|
|
|
|
|
|
} |
|
8250
|
|
|
|
|
|
|
if( $WRAPPED{$pretty} ) |
|
8251
|
|
|
|
|
|
|
{ XML::Twig::_use( 'Text::Wrap') or croak( "Text::Wrap not available, cannot use style $style"); } |
|
8252
|
|
|
|
|
|
|
return $old_pretty; |
|
8253
|
|
|
|
|
|
|
} |
|
8254
|
|
|
|
|
|
|
|
|
8255
|
|
|
|
|
|
|
sub _pretty_print { return $pretty; } |
|
8256
|
|
|
|
|
|
|
|
|
8257
|
|
|
|
|
|
|
# set the empty tag style (in $empty_tag_style) and returns the old one |
|
8258
|
|
|
|
|
|
|
# can be called from outside the package with 2 arguments (elt, style) |
|
8259
|
|
|
|
|
|
|
# or from inside with only one argument (style) |
|
8260
|
|
|
|
|
|
|
# the style can be either a string (one of the keys of %empty_tag_style |
|
8261
|
|
|
|
|
|
|
# or a number (presumably an old value saved) |
|
8262
|
|
|
|
|
|
|
sub set_empty_tag_style |
|
8263
|
|
|
|
|
|
|
{ my $style= lc( defined $_[1] ? $_[1] : $_[0]); # so we cover both cases |
|
8264
|
|
|
|
|
|
|
my $old_style= $empty_tag_style; |
|
8265
|
|
|
|
|
|
|
if( $style=~ /^\d+$/) |
|
8266
|
|
|
|
|
|
|
{ croak "invalid empty tag style $style" |
|
8267
|
|
|
|
|
|
|
unless( $style < keys %empty_tag_style); |
|
8268
|
|
|
|
|
|
|
$empty_tag_style= $style; |
|
8269
|
|
|
|
|
|
|
} |
|
8270
|
|
|
|
|
|
|
else |
|
8271
|
|
|
|
|
|
|
{ croak "invalid empty tag style '$style'" |
|
8272
|
|
|
|
|
|
|
unless( exists $empty_tag_style{$style}); |
|
8273
|
|
|
|
|
|
|
$empty_tag_style= $empty_tag_style{$style}; |
|
8274
|
|
|
|
|
|
|
} |
|
8275
|
|
|
|
|
|
|
return $old_style; |
|
8276
|
|
|
|
|
|
|
} |
|
8277
|
|
|
|
|
|
|
|
|
8278
|
|
|
|
|
|
|
sub _pretty_print_styles |
|
8279
|
|
|
|
|
|
|
{ return (sort { $pretty_print_style{$a} <=> $pretty_print_style{$b} || $a cmp $b } keys %pretty_print_style); } |
|
8280
|
|
|
|
|
|
|
|
|
8281
|
|
|
|
|
|
|
sub set_quote |
|
8282
|
|
|
|
|
|
|
{ my $style= $_[1] || $_[0]; |
|
8283
|
|
|
|
|
|
|
my $old_quote= $quote; |
|
8284
|
|
|
|
|
|
|
croak "invalid quote '$style'" unless( exists $quote_style{$style}); |
|
8285
|
|
|
|
|
|
|
$quote= $quote_style{$style}; |
|
8286
|
|
|
|
|
|
|
return $old_quote; |
|
8287
|
|
|
|
|
|
|
} |
|
8288
|
|
|
|
|
|
|
|
|
8289
|
|
|
|
|
|
|
sub set_remove_cdata |
|
8290
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; |
|
8291
|
|
|
|
|
|
|
my $old_value= $remove_cdata; |
|
8292
|
|
|
|
|
|
|
$remove_cdata= $new_value; |
|
8293
|
|
|
|
|
|
|
return $old_value; |
|
8294
|
|
|
|
|
|
|
} |
|
8295
|
|
|
|
|
|
|
|
|
8296
|
|
|
|
|
|
|
|
|
8297
|
|
|
|
|
|
|
sub set_indent |
|
8298
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; |
|
8299
|
|
|
|
|
|
|
my $old_value= $INDENT; |
|
8300
|
|
|
|
|
|
|
$INDENT= $new_value; |
|
8301
|
|
|
|
|
|
|
return $old_value; |
|
8302
|
|
|
|
|
|
|
} |
|
8303
|
|
|
|
|
|
|
|
|
8304
|
|
|
|
|
|
|
sub set_wrap |
|
8305
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; |
|
8306
|
|
|
|
|
|
|
my $old_value= $WRAP; |
|
8307
|
|
|
|
|
|
|
$WRAP= $new_value; |
|
8308
|
|
|
|
|
|
|
return $old_value; |
|
8309
|
|
|
|
|
|
|
} |
|
8310
|
|
|
|
|
|
|
|
|
8311
|
|
|
|
|
|
|
|
|
8312
|
|
|
|
|
|
|
sub set_keep_encoding |
|
8313
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; |
|
8314
|
|
|
|
|
|
|
my $old_value= $keep_encoding; |
|
8315
|
|
|
|
|
|
|
$keep_encoding= $new_value; |
|
8316
|
|
|
|
|
|
|
return $old_value; |
|
8317
|
|
|
|
|
|
|
} |
|
8318
|
|
|
|
|
|
|
|
|
8319
|
|
|
|
|
|
|
sub set_replaced_ents |
|
8320
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; |
|
8321
|
|
|
|
|
|
|
my $old_value= $replaced_ents; |
|
8322
|
|
|
|
|
|
|
$replaced_ents= $new_value; |
|
8323
|
|
|
|
|
|
|
return $old_value; |
|
8324
|
|
|
|
|
|
|
} |
|
8325
|
|
|
|
|
|
|
|
|
8326
|
|
|
|
|
|
|
sub do_not_escape_gt |
|
8327
|
|
|
|
|
|
|
{ my $old_value= $replaced_ents; |
|
8328
|
|
|
|
|
|
|
$replaced_ents= q{&<}; # & needs to be first |
|
8329
|
|
|
|
|
|
|
return $old_value; |
|
8330
|
|
|
|
|
|
|
} |
|
8331
|
|
|
|
|
|
|
|
|
8332
|
|
|
|
|
|
|
sub escape_gt |
|
8333
|
|
|
|
|
|
|
{ my $old_value= $replaced_ents; |
|
8334
|
|
|
|
|
|
|
$replaced_ents= qq{&<>}; # & needs to be first |
|
8335
|
|
|
|
|
|
|
return $old_value; |
|
8336
|
|
|
|
|
|
|
} |
|
8337
|
|
|
|
|
|
|
|
|
8338
|
|
|
|
|
|
|
sub _keep_encoding { return $keep_encoding; } # so I can use elsewhere in the module |
|
8339
|
|
|
|
|
|
|
|
|
8340
|
|
|
|
|
|
|
sub set_do_not_escape_amp_in_atts |
|
8341
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; |
|
8342
|
|
|
|
|
|
|
my $old_value= $do_not_escape_amp_in_atts; |
|
8343
|
|
|
|
|
|
|
$do_not_escape_amp_in_atts= $new_value; |
|
8344
|
|
|
|
|
|
|
return $old_value; |
|
8345
|
|
|
|
|
|
|
} |
|
8346
|
|
|
|
|
|
|
|
|
8347
|
|
|
|
|
|
|
sub output_filter { return $output_filter; } |
|
8348
|
|
|
|
|
|
|
sub output_text_filter { return $output_text_filter; } |
|
8349
|
|
|
|
|
|
|
|
|
8350
|
|
|
|
|
|
|
sub set_output_filter |
|
8351
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; # can be called in object/non-object mode |
|
8352
|
|
|
|
|
|
|
# if called in object mode with no argument, the filter is undefined |
|
8353
|
|
|
|
|
|
|
if( isa( $new_value, 'XML::Twig::Elt') || isa( $new_value, 'XML::Twig')) { undef $new_value; } |
|
8354
|
|
|
|
|
|
|
my $old_value= $output_filter; |
|
8355
|
|
|
|
|
|
|
if( !$new_value || isa( $new_value, 'CODE') ) |
|
8356
|
|
|
|
|
|
|
{ $output_filter= $new_value; } |
|
8357
|
|
|
|
|
|
|
elsif( $new_value eq 'latin1') |
|
8358
|
|
|
|
|
|
|
{ $output_filter= XML::Twig::latin1(); |
|
8359
|
|
|
|
|
|
|
} |
|
8360
|
|
|
|
|
|
|
elsif( $XML::Twig::filter{$new_value}) |
|
8361
|
|
|
|
|
|
|
{ $output_filter= $XML::Twig::filter{$new_value}; } |
|
8362
|
|
|
|
|
|
|
else |
|
8363
|
|
|
|
|
|
|
{ croak "invalid output filter '$new_value'"; } |
|
8364
|
|
|
|
|
|
|
|
|
8365
|
|
|
|
|
|
|
return $old_value; |
|
8366
|
|
|
|
|
|
|
} |
|
8367
|
|
|
|
|
|
|
|
|
8368
|
|
|
|
|
|
|
sub set_output_text_filter |
|
8369
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; # can be called in object/non-object mode |
|
8370
|
|
|
|
|
|
|
# if called in object mode with no argument, the filter is undefined |
|
8371
|
|
|
|
|
|
|
if( isa( $new_value, 'XML::Twig::Elt') || isa( $new_value, 'XML::Twig')) { undef $new_value; } |
|
8372
|
|
|
|
|
|
|
my $old_value= $output_text_filter; |
|
8373
|
|
|
|
|
|
|
if( !$new_value || isa( $new_value, 'CODE') ) |
|
8374
|
|
|
|
|
|
|
{ $output_text_filter= $new_value; } |
|
8375
|
|
|
|
|
|
|
elsif( $new_value eq 'latin1') |
|
8376
|
|
|
|
|
|
|
{ $output_text_filter= XML::Twig::latin1(); |
|
8377
|
|
|
|
|
|
|
} |
|
8378
|
|
|
|
|
|
|
elsif( $XML::Twig::filter{$new_value}) |
|
8379
|
|
|
|
|
|
|
{ $output_text_filter= $XML::Twig::filter{$new_value}; } |
|
8380
|
|
|
|
|
|
|
else |
|
8381
|
|
|
|
|
|
|
{ croak "invalid output text filter '$new_value'"; } |
|
8382
|
|
|
|
|
|
|
|
|
8383
|
|
|
|
|
|
|
return $old_value; |
|
8384
|
|
|
|
|
|
|
} |
|
8385
|
|
|
|
|
|
|
|
|
8386
|
|
|
|
|
|
|
sub set_expand_external_entities |
|
8387
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; |
|
8388
|
|
|
|
|
|
|
my $old_value= $expand_external_entities; |
|
8389
|
|
|
|
|
|
|
$expand_external_entities= $new_value; |
|
8390
|
|
|
|
|
|
|
return $old_value; |
|
8391
|
|
|
|
|
|
|
} |
|
8392
|
|
|
|
|
|
|
|
|
8393
|
|
|
|
|
|
|
sub set_keep_atts_order |
|
8394
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; |
|
8395
|
|
|
|
|
|
|
my $old_value= $keep_atts_order; |
|
8396
|
|
|
|
|
|
|
$keep_atts_order= $new_value; |
|
8397
|
|
|
|
|
|
|
return $old_value; |
|
8398
|
|
|
|
|
|
|
|
|
8399
|
|
|
|
|
|
|
} |
|
8400
|
|
|
|
|
|
|
|
|
8401
|
|
|
|
|
|
|
sub keep_atts_order { return $keep_atts_order; } # so I can use elsewhere in the module |
|
8402
|
|
|
|
|
|
|
|
|
8403
|
|
|
|
|
|
|
my %html_empty_elt; |
|
8404
|
|
|
|
|
|
|
BEGIN { %html_empty_elt= map { $_ => 1} qw( base meta link hr br param img area input col); } |
|
8405
|
|
|
|
|
|
|
|
|
8406
|
|
|
|
|
|
|
sub start_tag |
|
8407
|
|
|
|
|
|
|
{ my( $elt, $option)= @_; |
|
8408
|
|
|
|
|
|
|
|
|
8409
|
|
|
|
|
|
|
|
|
8410
|
|
|
|
|
|
|
return if( $elt->{gi} < $XML::Twig::SPECIAL_GI); |
|
8411
|
|
|
|
|
|
|
|
|
8412
|
|
|
|
|
|
|
my $extra_data= $elt->{extra_data} || ''; |
|
8413
|
|
|
|
|
|
|
|
|
8414
|
|
|
|
|
|
|
my $gi= $XML::Twig::index2gi[$elt->{'gi'}]; |
|
8415
|
|
|
|
|
|
|
my $att= $elt->{att}; # should be $elt->{att}, optimized into a pure hash look-up |
|
8416
|
|
|
|
|
|
|
|
|
8417
|
|
|
|
|
|
|
my $ns_map= $att ? $att->{'#original_gi'} : ''; |
|
8418
|
|
|
|
|
|
|
if( $ns_map) { $gi= _restore_original_prefix( $ns_map, $gi); } |
|
8419
|
|
|
|
|
|
|
$gi=~ s{^#default:}{}; # remove default prefix |
|
8420
|
|
|
|
|
|
|
|
|
8421
|
|
|
|
|
|
|
if( $output_text_filter) { $gi= $output_text_filter->( $gi); } |
|
8422
|
|
|
|
|
|
|
|
|
8423
|
|
|
|
|
|
|
# get the attribute and their values |
|
8424
|
|
|
|
|
|
|
my $att_sep = $pretty==$NSGMLS ? "\n" |
|
8425
|
|
|
|
|
|
|
: $pretty==$INDENTEDA ? "\n" . $INDENT x ($elt->level+1) . ' ' |
|
8426
|
|
|
|
|
|
|
: ' ' |
|
8427
|
|
|
|
|
|
|
; |
|
8428
|
|
|
|
|
|
|
|
|
8429
|
|
|
|
|
|
|
my $replace_in_att_value= $replaced_ents . "$quote\t\r\n"; |
|
8430
|
|
|
|
|
|
|
if( $option->{escape_gt} && $replaced_ents !~ m{>}) { $replace_in_att_value.= '>'; } |
|
8431
|
|
|
|
|
|
|
|
|
8432
|
|
|
|
|
|
|
my $tag; |
|
8433
|
|
|
|
|
|
|
my @att_names= grep { !( $_=~ m{^#(?!default:)} ) } $keep_atts_order ? keys %{$att} : sort keys %{$att}; |
|
8434
|
|
|
|
|
|
|
if( @att_names) |
|
8435
|
|
|
|
|
|
|
{ my $atts= join $att_sep, map { my $output_att_name= $ns_map ? _restore_original_prefix( $ns_map, $_) : $_; |
|
8436
|
|
|
|
|
|
|
if( $output_text_filter) |
|
8437
|
|
|
|
|
|
|
{ $output_att_name= $output_text_filter->( $output_att_name); } |
|
8438
|
|
|
|
|
|
|
$output_att_name . '=' . $quote . _att_xml_string( $att->{$_}, $replace_in_att_value) . $quote |
|
8439
|
|
|
|
|
|
|
|
|
8440
|
|
|
|
|
|
|
} |
|
8441
|
|
|
|
|
|
|
@att_names |
|
8442
|
|
|
|
|
|
|
; |
|
8443
|
|
|
|
|
|
|
if( $pretty==$INDENTEDA && @att_names == 1) { $att_sep= ' '; } |
|
8444
|
|
|
|
|
|
|
$tag= "<$gi$att_sep$atts"; |
|
8445
|
|
|
|
|
|
|
} |
|
8446
|
|
|
|
|
|
|
else |
|
8447
|
|
|
|
|
|
|
{ $tag= "<$gi"; } |
|
8448
|
|
|
|
|
|
|
|
|
8449
|
|
|
|
|
|
|
$tag .= "\n" if($pretty==$NSGMLS); |
|
8450
|
|
|
|
|
|
|
|
|
8451
|
|
|
|
|
|
|
|
|
8452
|
|
|
|
|
|
|
# force empty if suitable HTML tag, otherwise use the value from the input tree |
|
8453
|
|
|
|
|
|
|
if( ($empty_tag_style eq $HTML) && !$elt->{first_child} && !$elt->{extra_data_before_end_tag} && $html_empty_elt{$gi}) |
|
8454
|
|
|
|
|
|
|
{ $elt->{empty}= 1; } |
|
8455
|
|
|
|
|
|
|
my $empty= defined $elt->{empty} ? $elt->{empty} |
|
8456
|
|
|
|
|
|
|
: $elt->{first_child} ? 0 |
|
8457
|
|
|
|
|
|
|
: 1; |
|
8458
|
|
|
|
|
|
|
|
|
8459
|
|
|
|
|
|
|
$tag .= (!$elt->{empty} || $elt->{extra_data_before_end_tag}) ? '>' # element has content |
|
8460
|
|
|
|
|
|
|
: (($empty_tag_style eq $HTML) && $html_empty_elt{$gi}) ? ' />' # html empty element |
|
8461
|
|
|
|
|
|
|
# cvs-friendly format |
|
8462
|
|
|
|
|
|
|
: ( $pretty == $INDENTEDA && @att_names > 1) ? "\n" . $INDENT x $elt->level . "/>" |
|
8463
|
|
|
|
|
|
|
: ( $pretty == $INDENTEDA && @att_names == 1) ? " />" |
|
8464
|
|
|
|
|
|
|
: $empty_tag_style ? ">" . $XML::Twig::index2gi[$elt->{'gi'}] . ">" # $empty_tag_style is $HTML or $EXPAND |
|
8465
|
|
|
|
|
|
|
: '/>' |
|
8466
|
|
|
|
|
|
|
; |
|
8467
|
|
|
|
|
|
|
|
|
8468
|
|
|
|
|
|
|
if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )) { $tag= ''; } |
|
8469
|
|
|
|
|
|
|
|
|
8470
|
|
|
|
|
|
|
#warn "TRACE: ", $tag,": ", Encode::is_utf8( $tag) ? "has flag" : "FLAG NOT SET"; |
|
8471
|
|
|
|
|
|
|
|
|
8472
|
|
|
|
|
|
|
unless( $pretty) { return defined( $extra_data) ? $extra_data . $tag : $tag; } |
|
8473
|
|
|
|
|
|
|
|
|
8474
|
|
|
|
|
|
|
my $prefix=''; |
|
8475
|
|
|
|
|
|
|
my $return=''; # '' or \n is to be printed before the tag |
|
8476
|
|
|
|
|
|
|
my $indent=0; # number of indents before the tag |
|
8477
|
|
|
|
|
|
|
|
|
8478
|
|
|
|
|
|
|
if( $pretty==$RECORD1) |
|
8479
|
|
|
|
|
|
|
{ my $level= $elt->level; |
|
8480
|
|
|
|
|
|
|
$return= "\n" if( $level < 2); |
|
8481
|
|
|
|
|
|
|
$indent= 1 if( $level == 1); |
|
8482
|
|
|
|
|
|
|
} |
|
8483
|
|
|
|
|
|
|
|
|
8484
|
|
|
|
|
|
|
elsif( $pretty==$RECORD2) |
|
8485
|
|
|
|
|
|
|
{ $return= "\n"; |
|
8486
|
|
|
|
|
|
|
$indent= $elt->level; |
|
8487
|
|
|
|
|
|
|
} |
|
8488
|
|
|
|
|
|
|
|
|
8489
|
|
|
|
|
|
|
elsif( $pretty==$NICE) |
|
8490
|
|
|
|
|
|
|
{ my $parent= $elt->{parent}; |
|
8491
|
|
|
|
|
|
|
unless( !$parent || $parent->{contains_text}) |
|
8492
|
|
|
|
|
|
|
{ $return= "\n"; } |
|
8493
|
|
|
|
|
|
|
$elt->{contains_text}= 1 if( ($parent && $parent->{contains_text}) |
|
8494
|
|
|
|
|
|
|
|| $elt->contains_text); |
|
8495
|
|
|
|
|
|
|
} |
|
8496
|
|
|
|
|
|
|
|
|
8497
|
|
|
|
|
|
|
elsif( $KEEP_TEXT_TAG_ON_ONE_LINE{$pretty}) |
|
8498
|
|
|
|
|
|
|
{ my $parent= $elt->{parent}; |
|
8499
|
|
|
|
|
|
|
unless( !$parent || $parent->{contains_text}) |
|
8500
|
|
|
|
|
|
|
{ $return= "\n"; |
|
8501
|
|
|
|
|
|
|
$indent= $elt->level; |
|
8502
|
|
|
|
|
|
|
} |
|
8503
|
|
|
|
|
|
|
$elt->{contains_text}= 1 if( ($parent && $parent->{contains_text}) |
|
8504
|
|
|
|
|
|
|
|| $elt->contains_text); |
|
8505
|
|
|
|
|
|
|
} |
|
8506
|
|
|
|
|
|
|
|
|
8507
|
|
|
|
|
|
|
if( $return || $indent) |
|
8508
|
|
|
|
|
|
|
{ # check for elements in which spaces should be kept |
|
8509
|
|
|
|
|
|
|
my $t= $elt->twig; |
|
8510
|
|
|
|
|
|
|
return $extra_data . $tag if( $xml_space_preserve); |
|
8511
|
|
|
|
|
|
|
if( $t && $t->{twig_keep_spaces_in}) |
|
8512
|
|
|
|
|
|
|
{ foreach my $ancestor ($elt->ancestors) |
|
8513
|
|
|
|
|
|
|
{ return $extra_data . $tag if( $t->{twig_keep_spaces_in}->{$XML::Twig::index2gi[$ancestor->{'gi'}]}) } |
|
8514
|
|
|
|
|
|
|
} |
|
8515
|
|
|
|
|
|
|
|
|
8516
|
|
|
|
|
|
|
$prefix= $INDENT x $indent; |
|
8517
|
|
|
|
|
|
|
if( $extra_data) |
|
8518
|
|
|
|
|
|
|
{ $extra_data=~ s{\s+$}{}; |
|
8519
|
|
|
|
|
|
|
$extra_data=~ s{^\s+}{}; |
|
8520
|
|
|
|
|
|
|
$extra_data= $prefix . $extra_data . $return; |
|
8521
|
|
|
|
|
|
|
} |
|
8522
|
|
|
|
|
|
|
} |
|
8523
|
|
|
|
|
|
|
|
|
8524
|
|
|
|
|
|
|
|
|
8525
|
|
|
|
|
|
|
return $return . $extra_data . $prefix . $tag; |
|
8526
|
|
|
|
|
|
|
} |
|
8527
|
|
|
|
|
|
|
|
|
8528
|
|
|
|
|
|
|
sub end_tag |
|
8529
|
|
|
|
|
|
|
{ my $elt= shift; |
|
8530
|
|
|
|
|
|
|
return '' if( ($elt->{gi}<$XML::Twig::SPECIAL_GI) |
|
8531
|
|
|
|
|
|
|
|| ($elt->{'empty'} && !$elt->{extra_data_before_end_tag}) |
|
8532
|
|
|
|
|
|
|
); |
|
8533
|
|
|
|
|
|
|
my $tag= "<"; |
|
8534
|
|
|
|
|
|
|
my $gi= $XML::Twig::index2gi[$elt->{'gi'}]; |
|
8535
|
|
|
|
|
|
|
|
|
8536
|
|
|
|
|
|
|
if( my $map= $elt->{'att'}->{'#original_gi'}) { $gi= _restore_original_prefix( $map, $gi); } |
|
8537
|
|
|
|
|
|
|
$gi=~ s{^#default:}{}; # remove default prefix |
|
8538
|
|
|
|
|
|
|
|
|
8539
|
|
|
|
|
|
|
if( $output_text_filter) { $gi= $output_text_filter->( $XML::Twig::index2gi[$elt->{'gi'}]); } |
|
8540
|
|
|
|
|
|
|
$tag .= "/$gi>"; |
|
8541
|
|
|
|
|
|
|
|
|
8542
|
|
|
|
|
|
|
$tag = ($elt->{extra_data_before_end_tag} || '') . $tag; |
|
8543
|
|
|
|
|
|
|
|
|
8544
|
|
|
|
|
|
|
if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )) { $tag= ''; } |
|
8545
|
|
|
|
|
|
|
|
|
8546
|
|
|
|
|
|
|
return $tag unless $pretty; |
|
8547
|
|
|
|
|
|
|
|
|
8548
|
|
|
|
|
|
|
my $prefix=''; |
|
8549
|
|
|
|
|
|
|
my $return=0; # 1 if a \n is to be printed before the tag |
|
8550
|
|
|
|
|
|
|
my $indent=0; # number of indents before the tag |
|
8551
|
|
|
|
|
|
|
|
|
8552
|
|
|
|
|
|
|
if( $pretty==$RECORD1) |
|
8553
|
|
|
|
|
|
|
{ $return= 1 if( $elt->level == 0); |
|
8554
|
|
|
|
|
|
|
} |
|
8555
|
|
|
|
|
|
|
|
|
8556
|
|
|
|
|
|
|
elsif( $pretty==$RECORD2) |
|
8557
|
|
|
|
|
|
|
{ unless( $elt->contains_text) |
|
8558
|
|
|
|
|
|
|
{ $return= 1 ; |
|
8559
|
|
|
|
|
|
|
$indent= $elt->level; |
|
8560
|
|
|
|
|
|
|
} |
|
8561
|
|
|
|
|
|
|
} |
|
8562
|
|
|
|
|
|
|
|
|
8563
|
|
|
|
|
|
|
elsif( $pretty==$NICE) |
|
8564
|
|
|
|
|
|
|
{ my $parent= $elt->{parent}; |
|
8565
|
|
|
|
|
|
|
if( ( ($parent && !$parent->{contains_text}) || !$parent ) |
|
8566
|
|
|
|
|
|
|
&& ( !$elt->{contains_text} |
|
8567
|
|
|
|
|
|
|
&& ($elt->{has_flushed_child} || $elt->{first_child}) |
|
8568
|
|
|
|
|
|
|
) |
|
8569
|
|
|
|
|
|
|
) |
|
8570
|
|
|
|
|
|
|
{ $return= 1; } |
|
8571
|
|
|
|
|
|
|
} |
|
8572
|
|
|
|
|
|
|
|
|
8573
|
|
|
|
|
|
|
elsif( $KEEP_TEXT_TAG_ON_ONE_LINE{$pretty}) |
|
8574
|
|
|
|
|
|
|
{ my $parent= $elt->{parent}; |
|
8575
|
|
|
|
|
|
|
if( ( ($parent && !$parent->{contains_text}) || !$parent ) |
|
8576
|
|
|
|
|
|
|
&& ( !$elt->{contains_text} |
|
8577
|
|
|
|
|
|
|
&& ($elt->{has_flushed_child} || $elt->{first_child}) |
|
8578
|
|
|
|
|
|
|
) |
|
8579
|
|
|
|
|
|
|
) |
|
8580
|
|
|
|
|
|
|
{ $return= 1; |
|
8581
|
|
|
|
|
|
|
$indent= $elt->level; |
|
8582
|
|
|
|
|
|
|
} |
|
8583
|
|
|
|
|
|
|
} |
|
8584
|
|
|
|
|
|
|
|
|
8585
|
|
|
|
|
|
|
if( $return || $indent) |
|
8586
|
|
|
|
|
|
|
{ # check for elements in which spaces should be kept |
|
8587
|
|
|
|
|
|
|
my $t= $elt->twig; |
|
8588
|
|
|
|
|
|
|
return $tag if( $xml_space_preserve); |
|
8589
|
|
|
|
|
|
|
if( $t && $t->{twig_keep_spaces_in}) |
|
8590
|
|
|
|
|
|
|
{ foreach my $ancestor ($elt, $elt->ancestors) |
|
8591
|
|
|
|
|
|
|
{ return $tag if( $t->{twig_keep_spaces_in}->{$XML::Twig::index2gi[$ancestor->{'gi'}]}) } |
|
8592
|
|
|
|
|
|
|
} |
|
8593
|
|
|
|
|
|
|
|
|
8594
|
|
|
|
|
|
|
if( $return) { $prefix= ($pretty== $INDENTEDCT) ? "\n$INDENT" : "\n"; } |
|
8595
|
|
|
|
|
|
|
$prefix.= $INDENT x $indent; |
|
8596
|
|
|
|
|
|
|
} |
|
8597
|
|
|
|
|
|
|
|
|
8598
|
|
|
|
|
|
|
# add a \n at the end of the document (after the root element) |
|
8599
|
|
|
|
|
|
|
$tag .= "\n" unless( $elt->{parent}); |
|
8600
|
|
|
|
|
|
|
|
|
8601
|
|
|
|
|
|
|
return $prefix . $tag; |
|
8602
|
|
|
|
|
|
|
} |
|
8603
|
|
|
|
|
|
|
|
|
8604
|
|
|
|
|
|
|
sub _restore_original_prefix |
|
8605
|
|
|
|
|
|
|
{ my( $map, $name)= @_; |
|
8606
|
|
|
|
|
|
|
my $prefix= _ns_prefix( $name); |
|
8607
|
|
|
|
|
|
|
if( my $original_prefix= $map->{$prefix}) |
|
8608
|
|
|
|
|
|
|
{ if( $original_prefix eq '#default') |
|
8609
|
|
|
|
|
|
|
{ $name=~ s{^$prefix:}{}; } |
|
8610
|
|
|
|
|
|
|
else |
|
8611
|
|
|
|
|
|
|
{ $name=~ s{^$prefix(?=:)}{$original_prefix}; } |
|
8612
|
|
|
|
|
|
|
} |
|
8613
|
|
|
|
|
|
|
return $name; |
|
8614
|
|
|
|
|
|
|
} |
|
8615
|
|
|
|
|
|
|
|
|
8616
|
|
|
|
|
|
|
# buffer used to hold the text to print/sprint, to avoid passing it back and forth between methods |
|
8617
|
|
|
|
|
|
|
my @sprint; |
|
8618
|
|
|
|
|
|
|
|
|
8619
|
|
|
|
|
|
|
# $elt is an element to print |
|
8620
|
|
|
|
|
|
|
# $fh is an optional filehandle to print to |
|
8621
|
|
|
|
|
|
|
# $pretty is an optional value, if true a \n is printed after the < of the |
|
8622
|
|
|
|
|
|
|
# opening tag |
|
8623
|
|
|
|
|
|
|
sub print |
|
8624
|
|
|
|
|
|
|
{ my $elt= shift; |
|
8625
|
|
|
|
|
|
|
|
|
8626
|
|
|
|
|
|
|
my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; |
|
8627
|
|
|
|
|
|
|
my $old_select= defined $fh ? select $fh : undef; |
|
8628
|
|
|
|
|
|
|
print $elt->sprint( @_); |
|
8629
|
|
|
|
|
|
|
select $old_select if( defined $old_select); |
|
8630
|
|
|
|
|
|
|
} |
|
8631
|
|
|
|
|
|
|
|
|
8632
|
|
|
|
|
|
|
|
|
8633
|
|
|
|
|
|
|
# those next 2 methods need to be refactored, they are copies of the same methods in XML::Twig |
|
8634
|
|
|
|
|
|
|
sub print_to_file |
|
8635
|
|
|
|
|
|
|
{ my( $elt, $filename)= (shift, shift); |
|
8636
|
|
|
|
|
|
|
my $out_fh; |
|
8637
|
|
|
|
|
|
|
# open( $out_fh, ">$filename") or _croak( "cannot create file $filename: $!"); # < perl 5.8 |
|
8638
|
|
|
|
|
|
|
my $mode= $keep_encoding ? '>' : '>:utf8'; # >= perl 5.8 |
|
8639
|
|
|
|
|
|
|
open( $out_fh, $mode, $filename) or _croak( "cannot create file $filename: $!"); # >= perl 5.8 |
|
8640
|
|
|
|
|
|
|
$elt->print( $out_fh, @_); |
|
8641
|
|
|
|
|
|
|
close $out_fh; |
|
8642
|
|
|
|
|
|
|
return $elt; |
|
8643
|
|
|
|
|
|
|
} |
|
8644
|
|
|
|
|
|
|
|
|
8645
|
|
|
|
|
|
|
# probably only works on *nix (at least the chmod bit) |
|
8646
|
|
|
|
|
|
|
# first print to a temporary file, then rename that file to the desired file name, then change permissions |
|
8647
|
|
|
|
|
|
|
# to the original file permissions (or to the current umask) |
|
8648
|
|
|
|
|
|
|
sub safe_print_to_file |
|
8649
|
|
|
|
|
|
|
{ my( $elt, $filename)= (shift, shift); |
|
8650
|
|
|
|
|
|
|
my $perm= -f $filename ? (stat $filename)[2] & 07777 : ~umask() ; |
|
8651
|
|
|
|
|
|
|
XML::Twig::_use( 'File::Temp') || croak "need File::Temp to use safe_print_to_file\n"; |
|
8652
|
|
|
|
|
|
|
XML::Twig::_use( 'File::Basename') || croak "need File::Basename to use safe_print_to_file\n"; |
|
8653
|
|
|
|
|
|
|
my $tmpdir= File::Basename::dirname( $filename); |
|
8654
|
|
|
|
|
|
|
my( $fh, $tmpfilename) = File::Temp::tempfile( DIR => $tmpdir); |
|
8655
|
|
|
|
|
|
|
$elt->print_to_file( $tmpfilename, @_); |
|
8656
|
|
|
|
|
|
|
rename( $tmpfilename, $filename) or unlink $tmpfilename && _croak( "cannot move temporary file to $filename: $!"); |
|
8657
|
|
|
|
|
|
|
chmod $perm, $filename; |
|
8658
|
|
|
|
|
|
|
return $elt; |
|
8659
|
|
|
|
|
|
|
} |
|
8660
|
|
|
|
|
|
|
|
|
8661
|
|
|
|
|
|
|
|
|
8662
|
|
|
|
|
|
|
# same as print but does not output the start tag if the element |
|
8663
|
|
|
|
|
|
|
# is marked as flushed |
|
8664
|
|
|
|
|
|
|
sub flush |
|
8665
|
|
|
|
|
|
|
{ my $elt= shift; |
|
8666
|
|
|
|
|
|
|
my $up_to= $_[0] && isa( $_[0], 'XML::Twig::Elt') ? shift : $elt; |
|
8667
|
|
|
|
|
|
|
$elt->twig->flush_up_to( $up_to, @_); |
|
8668
|
|
|
|
|
|
|
} |
|
8669
|
|
|
|
|
|
|
sub purge |
|
8670
|
|
|
|
|
|
|
{ my $elt= shift; |
|
8671
|
|
|
|
|
|
|
my $up_to= $_[0] && isa( $_[0], 'XML::Twig::Elt') ? shift : $elt; |
|
8672
|
|
|
|
|
|
|
$elt->twig->purge_up_to( $up_to, @_); |
|
8673
|
|
|
|
|
|
|
} |
|
8674
|
|
|
|
|
|
|
|
|
8675
|
|
|
|
|
|
|
sub _flush |
|
8676
|
|
|
|
|
|
|
{ my $elt= shift; |
|
8677
|
|
|
|
|
|
|
|
|
8678
|
|
|
|
|
|
|
my $pretty; |
|
8679
|
|
|
|
|
|
|
my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; |
|
8680
|
|
|
|
|
|
|
my $old_select= defined $fh ? select $fh : undef; |
|
8681
|
|
|
|
|
|
|
my $old_pretty= defined ($pretty= shift) ? set_pretty_print( $pretty) : undef; |
|
8682
|
|
|
|
|
|
|
|
|
8683
|
|
|
|
|
|
|
$xml_space_preserve= 1 if( ($elt->inherit_att( 'xml:space') || '') eq 'preserve'); |
|
8684
|
|
|
|
|
|
|
|
|
8685
|
|
|
|
|
|
|
$elt->__flush(); |
|
8686
|
|
|
|
|
|
|
|
|
8687
|
|
|
|
|
|
|
$xml_space_preserve= 0; |
|
8688
|
|
|
|
|
|
|
|
|
8689
|
|
|
|
|
|
|
select $old_select if( defined $old_select); |
|
8690
|
|
|
|
|
|
|
set_pretty_print( $old_pretty) if( defined $old_pretty); |
|
8691
|
|
|
|
|
|
|
} |
|
8692
|
|
|
|
|
|
|
|
|
8693
|
|
|
|
|
|
|
sub __flush |
|
8694
|
|
|
|
|
|
|
{ my $elt= shift; |
|
8695
|
|
|
|
|
|
|
|
|
8696
|
|
|
|
|
|
|
if( $elt->{gi} >= $XML::Twig::SPECIAL_GI) |
|
8697
|
|
|
|
|
|
|
{ my $preserve= ($elt->{'att'}->{'xml:space'} || '') eq 'preserve'; |
|
8698
|
|
|
|
|
|
|
$xml_space_preserve++ if $preserve; |
|
8699
|
|
|
|
|
|
|
unless( $elt->{'flushed'}) |
|
8700
|
|
|
|
|
|
|
{ print $elt->start_tag(); |
|
8701
|
|
|
|
|
|
|
} |
|
8702
|
|
|
|
|
|
|
|
|
8703
|
|
|
|
|
|
|
# flush the children |
|
8704
|
|
|
|
|
|
|
my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; |
|
8705
|
|
|
|
|
|
|
foreach my $child (@children) |
|
8706
|
|
|
|
|
|
|
{ $child->_flush( $pretty); |
|
8707
|
|
|
|
|
|
|
$child->{'flushed'}=1; |
|
8708
|
|
|
|
|
|
|
} |
|
8709
|
|
|
|
|
|
|
if( ! $elt->{end_tag_flushed}) |
|
8710
|
|
|
|
|
|
|
{ print $elt->end_tag; |
|
8711
|
|
|
|
|
|
|
$elt->{end_tag_flushed}=1; |
|
8712
|
|
|
|
|
|
|
$elt->{'flushed'}=1; |
|
8713
|
|
|
|
|
|
|
} |
|
8714
|
|
|
|
|
|
|
$xml_space_preserve-- if $preserve; |
|
8715
|
|
|
|
|
|
|
# used for pretty printing |
|
8716
|
|
|
|
|
|
|
if( my $parent= $elt->{parent}) { $parent->{has_flushed_child}= 1; } |
|
8717
|
|
|
|
|
|
|
} |
|
8718
|
|
|
|
|
|
|
else # text or special element |
|
8719
|
|
|
|
|
|
|
{ my $text; |
|
8720
|
|
|
|
|
|
|
if( (exists $elt->{'pcdata'})) { $text= $elt->pcdata_xml_string; |
|
8721
|
|
|
|
|
|
|
if( my $parent= $elt->{parent}) |
|
8722
|
|
|
|
|
|
|
{ $parent->{contains_text}= 1; } |
|
8723
|
|
|
|
|
|
|
} |
|
8724
|
|
|
|
|
|
|
elsif( (exists $elt->{'cdata'})) { $text= $elt->cdata_string; |
|
8725
|
|
|
|
|
|
|
if( my $parent= $elt->{parent}) |
|
8726
|
|
|
|
|
|
|
{ $parent->{contains_text}= 1; } |
|
8727
|
|
|
|
|
|
|
} |
|
8728
|
|
|
|
|
|
|
elsif( (exists $elt->{'target'})) { $text= $elt->pi_string; } |
|
8729
|
|
|
|
|
|
|
elsif( (exists $elt->{'comment'})) { $text= $elt->comment_string; } |
|
8730
|
|
|
|
|
|
|
elsif( (exists $elt->{'ent'})) { $text= $elt->ent_string; } |
|
8731
|
|
|
|
|
|
|
|
|
8732
|
|
|
|
|
|
|
print $output_filter ? $output_filter->( $text) : $text; |
|
8733
|
|
|
|
|
|
|
} |
|
8734
|
|
|
|
|
|
|
} |
|
8735
|
|
|
|
|
|
|
|
|
8736
|
|
|
|
|
|
|
|
|
8737
|
|
|
|
|
|
|
sub xml_text |
|
8738
|
|
|
|
|
|
|
{ my( $elt, @options)= @_; |
|
8739
|
|
|
|
|
|
|
|
|
8740
|
|
|
|
|
|
|
if( @options && grep { lc( $_) eq 'no_recurse' } @options) { return $elt->xml_text_only; } |
|
8741
|
|
|
|
|
|
|
|
|
8742
|
|
|
|
|
|
|
my $string=''; |
|
8743
|
|
|
|
|
|
|
|
|
8744
|
|
|
|
|
|
|
if( ($elt->{gi} >= $XML::Twig::SPECIAL_GI) ) |
|
8745
|
|
|
|
|
|
|
{ # sprint the children |
|
8746
|
|
|
|
|
|
|
my $child= $elt->{first_child} || ''; |
|
8747
|
|
|
|
|
|
|
while( $child) |
|
8748
|
|
|
|
|
|
|
{ $string.= $child->xml_text; |
|
8749
|
|
|
|
|
|
|
} continue { $child= $child->{next_sibling}; } |
|
8750
|
|
|
|
|
|
|
} |
|
8751
|
|
|
|
|
|
|
elsif( (exists $elt->{'pcdata'})) { $string .= $output_filter ? $output_filter->($elt->pcdata_xml_string) |
|
8752
|
|
|
|
|
|
|
: $elt->pcdata_xml_string; |
|
8753
|
|
|
|
|
|
|
} |
|
8754
|
|
|
|
|
|
|
elsif( (exists $elt->{'cdata'})) { $string .= $output_filter ? $output_filter->($elt->cdata_string) |
|
8755
|
|
|
|
|
|
|
: $elt->cdata_string; |
|
8756
|
|
|
|
|
|
|
} |
|
8757
|
|
|
|
|
|
|
elsif( (exists $elt->{'ent'})) { $string .= $elt->ent_string; } |
|
8758
|
|
|
|
|
|
|
|
|
8759
|
|
|
|
|
|
|
return $string; |
|
8760
|
|
|
|
|
|
|
} |
|
8761
|
|
|
|
|
|
|
|
|
8762
|
|
|
|
|
|
|
sub xml_text_only |
|
8763
|
|
|
|
|
|
|
{ return join '', map { $_->xml_text if( $_->is_text || (exists $_->{'ent'})) } $_[0]->_children; } |
|
8764
|
|
|
|
|
|
|
|
|
8765
|
|
|
|
|
|
|
# same as print but except... it does not print but rather returns the string |
|
8766
|
|
|
|
|
|
|
# if the second parameter is set then only the content is returned, not the |
|
8767
|
|
|
|
|
|
|
# start and end tags of the element (but the tags of the included elements are |
|
8768
|
|
|
|
|
|
|
# returned) |
|
8769
|
|
|
|
|
|
|
|
|
8770
|
|
|
|
|
|
|
sub sprint |
|
8771
|
|
|
|
|
|
|
{ my $elt= shift; |
|
8772
|
|
|
|
|
|
|
my( $old_pretty, $old_empty_tag_style); |
|
8773
|
|
|
|
|
|
|
|
|
8774
|
|
|
|
|
|
|
if( $_[0]) |
|
8775
|
|
|
|
|
|
|
{ if( isa( $_[0], 'HASH')) |
|
8776
|
|
|
|
|
|
|
{ # "proper way, using a hashref for options |
|
8777
|
|
|
|
|
|
|
my %args= XML::Twig::_normalize_args( %{shift()}); |
|
8778
|
|
|
|
|
|
|
if( defined $args{PrettyPrint}) { $old_pretty = set_pretty_print( $args{PrettyPrint}); } |
|
8779
|
|
|
|
|
|
|
if( defined $args{EmptyTags}) { $old_empty_tag_style = set_empty_tag_style( $args{EmptyTags}); } |
|
8780
|
|
|
|
|
|
|
} |
|
8781
|
|
|
|
|
|
|
else |
|
8782
|
|
|
|
|
|
|
{ # "old" way, just using the option name |
|
8783
|
|
|
|
|
|
|
my @other_opt; |
|
8784
|
|
|
|
|
|
|
foreach my $opt (@_) |
|
8785
|
|
|
|
|
|
|
{ if( exists $pretty_print_style{$opt}) { $old_pretty = set_pretty_print( $opt); } |
|
8786
|
|
|
|
|
|
|
elsif( exists $empty_tag_style{$opt}) { $old_empty_tag_style = set_empty_tag_style( $opt); } |
|
8787
|
|
|
|
|
|
|
else { push @other_opt, $opt; } |
|
8788
|
|
|
|
|
|
|
} |
|
8789
|
|
|
|
|
|
|
@_= @other_opt; |
|
8790
|
|
|
|
|
|
|
} |
|
8791
|
|
|
|
|
|
|
} |
|
8792
|
|
|
|
|
|
|
|
|
8793
|
|
|
|
|
|
|
$xml_space_preserve= 1 if( ($elt->inherit_att( 'xml:space') || '') eq 'preserve'); |
|
8794
|
|
|
|
|
|
|
|
|
8795
|
|
|
|
|
|
|
@sprint=(); |
|
8796
|
|
|
|
|
|
|
$elt->_sprint( @_); |
|
8797
|
|
|
|
|
|
|
my $sprint= join( '', @sprint); |
|
8798
|
|
|
|
|
|
|
if( $output_filter) { $sprint= $output_filter->( $sprint); } |
|
8799
|
|
|
|
|
|
|
|
|
8800
|
|
|
|
|
|
|
if( ( ($pretty== $WRAPPED) || ($pretty==$INDENTEDC)) && !$xml_space_preserve) |
|
8801
|
|
|
|
|
|
|
{ $sprint= _wrap_text( $sprint); } |
|
8802
|
|
|
|
|
|
|
$xml_space_preserve= 0; |
|
8803
|
|
|
|
|
|
|
|
|
8804
|
|
|
|
|
|
|
|
|
8805
|
|
|
|
|
|
|
if( defined $old_pretty) { set_pretty_print( $old_pretty); } |
|
8806
|
|
|
|
|
|
|
if( defined $old_empty_tag_style) { set_empty_tag_style( $old_empty_tag_style); } |
|
8807
|
|
|
|
|
|
|
|
|
8808
|
|
|
|
|
|
|
return $sprint; |
|
8809
|
|
|
|
|
|
|
} |
|
8810
|
|
|
|
|
|
|
|
|
8811
|
|
|
|
|
|
|
sub _wrap_text |
|
8812
|
|
|
|
|
|
|
{ my( $string)= @_; |
|
8813
|
|
|
|
|
|
|
my $wrapped; |
|
8814
|
|
|
|
|
|
|
foreach my $line (split /\n/, $string) |
|
8815
|
|
|
|
|
|
|
{ my( $initial_indent)= $line=~ m{^(\s*)}; |
|
8816
|
|
|
|
|
|
|
my $wrapped_line= Text::Wrap::wrap( '', $initial_indent . $INDENT, $line) . "\n"; |
|
8817
|
|
|
|
|
|
|
|
|
8818
|
|
|
|
|
|
|
# fix glitch with Text::wrap when the first line is long and does not include spaces |
|
8819
|
|
|
|
|
|
|
# the first line ends up being too short by 2 chars, but we'll have to live with it! |
|
8820
|
|
|
|
|
|
|
$wrapped_line=~ s{^ +\n }{}s; # this prefix needs to be removed |
|
8821
|
|
|
|
|
|
|
|
|
8822
|
|
|
|
|
|
|
$wrapped .= $wrapped_line; |
|
8823
|
|
|
|
|
|
|
} |
|
8824
|
|
|
|
|
|
|
|
|
8825
|
|
|
|
|
|
|
return $wrapped; |
|
8826
|
|
|
|
|
|
|
} |
|
8827
|
|
|
|
|
|
|
|
|
8828
|
|
|
|
|
|
|
|
|
8829
|
|
|
|
|
|
|
sub _sprint |
|
8830
|
|
|
|
|
|
|
{ my $elt= shift; |
|
8831
|
|
|
|
|
|
|
my $no_tag= shift || 0; |
|
8832
|
|
|
|
|
|
|
# in case there's some comments or PI's piggybacking |
|
8833
|
|
|
|
|
|
|
|
|
8834
|
|
|
|
|
|
|
if( $elt->{gi} >= $XML::Twig::SPECIAL_GI) |
|
8835
|
|
|
|
|
|
|
{ |
|
8836
|
|
|
|
|
|
|
my $preserve= ($elt->{'att'}->{'xml:space'} || '') eq 'preserve'; |
|
8837
|
|
|
|
|
|
|
$xml_space_preserve++ if $preserve; |
|
8838
|
|
|
|
|
|
|
|
|
8839
|
|
|
|
|
|
|
push @sprint, $elt->start_tag unless( $no_tag); |
|
8840
|
|
|
|
|
|
|
|
|
8841
|
|
|
|
|
|
|
# sprint the children |
|
8842
|
|
|
|
|
|
|
my $child= $elt->{first_child}; |
|
8843
|
|
|
|
|
|
|
while( $child) |
|
8844
|
|
|
|
|
|
|
{ $child->_sprint; |
|
8845
|
|
|
|
|
|
|
$child= $child->{next_sibling}; |
|
8846
|
|
|
|
|
|
|
} |
|
8847
|
|
|
|
|
|
|
push @sprint, $elt->end_tag unless( $no_tag); |
|
8848
|
|
|
|
|
|
|
$xml_space_preserve-- if $preserve; |
|
8849
|
|
|
|
|
|
|
} |
|
8850
|
|
|
|
|
|
|
else |
|
8851
|
|
|
|
|
|
|
{ push @sprint, $elt->{extra_data} if( $elt->{extra_data}) ; |
|
8852
|
|
|
|
|
|
|
if( (exists $elt->{'pcdata'})) { push @sprint, $elt->pcdata_xml_string; } |
|
8853
|
|
|
|
|
|
|
elsif( (exists $elt->{'cdata'})) { push @sprint, $elt->cdata_string; } |
|
8854
|
|
|
|
|
|
|
elsif( (exists $elt->{'target'})) { if( ($pretty >= $INDENTED) && !$elt->{parent}->{contains_text}) { push @sprint, "\n" . $INDENT x $elt->level; } |
|
8855
|
|
|
|
|
|
|
push @sprint, $elt->pi_string; |
|
8856
|
|
|
|
|
|
|
} |
|
8857
|
|
|
|
|
|
|
elsif( (exists $elt->{'comment'})) { if( ($pretty >= $INDENTED) && !$elt->{parent}->{contains_text}) { push @sprint, "\n" . $INDENT x $elt->level; } |
|
8858
|
|
|
|
|
|
|
push @sprint, $elt->comment_string; |
|
8859
|
|
|
|
|
|
|
} |
|
8860
|
|
|
|
|
|
|
elsif( (exists $elt->{'ent'})) { push @sprint, $elt->ent_string; } |
|
8861
|
|
|
|
|
|
|
} |
|
8862
|
|
|
|
|
|
|
|
|
8863
|
|
|
|
|
|
|
return; |
|
8864
|
|
|
|
|
|
|
} |
|
8865
|
|
|
|
|
|
|
|
|
8866
|
|
|
|
|
|
|
# just a shortcut to $elt->sprint( 1) |
|
8867
|
|
|
|
|
|
|
sub xml_string |
|
8868
|
|
|
|
|
|
|
{ my $elt= shift; |
|
8869
|
|
|
|
|
|
|
isa( $_[0], 'HASH') ? $elt->sprint( shift(), 1) : $elt->sprint( 1); |
|
8870
|
|
|
|
|
|
|
} |
|
8871
|
|
|
|
|
|
|
|
|
8872
|
|
|
|
|
|
|
sub pcdata_xml_string |
|
8873
|
|
|
|
|
|
|
{ my $elt= shift; |
|
8874
|
|
|
|
|
|
|
if( defined( my $string= $elt->{pcdata}) ) |
|
8875
|
|
|
|
|
|
|
{ |
|
8876
|
|
|
|
|
|
|
if( ! $elt->{extra_data_in_pcdata}) |
|
8877
|
|
|
|
|
|
|
{ |
|
8878
|
|
|
|
|
|
|
$string=~ s/([$replaced_ents])/$XML::Twig::base_ent{$1}/g unless( !$replaced_ents || $keep_encoding || $elt->{asis}); |
|
8879
|
|
|
|
|
|
|
$string=~ s{\Q]]>}{]]>}g; |
|
8880
|
|
|
|
|
|
|
} |
|
8881
|
|
|
|
|
|
|
else |
|
8882
|
|
|
|
|
|
|
{ _gen_mark( $string); # used by _(un)?protect_extra_data |
|
8883
|
|
|
|
|
|
|
foreach my $data (reverse @{$elt->{extra_data_in_pcdata}}) |
|
8884
|
|
|
|
|
|
|
{ my $substr= substr( $string, $data->{offset}); |
|
8885
|
|
|
|
|
|
|
if( $keep_encoding || $elt->{asis}) |
|
8886
|
|
|
|
|
|
|
{ substr( $string, $data->{offset}, 0, $data->{text}); } |
|
8887
|
|
|
|
|
|
|
else |
|
8888
|
|
|
|
|
|
|
{ substr( $string, $data->{offset}, 0, _protect_extra_data( $data->{text})); } |
|
8889
|
|
|
|
|
|
|
} |
|
8890
|
|
|
|
|
|
|
unless( $keep_encoding || $elt->{asis}) |
|
8891
|
|
|
|
|
|
|
{ |
|
8892
|
|
|
|
|
|
|
$string=~ s{([$replaced_ents])}{$XML::Twig::base_ent{$1}}g ; |
|
8893
|
|
|
|
|
|
|
$string=~ s{\Q]]>}{]]>}g; |
|
8894
|
|
|
|
|
|
|
_unprotect_extra_data( $string); |
|
8895
|
|
|
|
|
|
|
} |
|
8896
|
|
|
|
|
|
|
} |
|
8897
|
|
|
|
|
|
|
return $output_text_filter ? $output_text_filter->( $string) : $string; |
|
8898
|
|
|
|
|
|
|
} |
|
8899
|
|
|
|
|
|
|
else |
|
8900
|
|
|
|
|
|
|
{ return ''; } |
|
8901
|
|
|
|
|
|
|
} |
|
8902
|
|
|
|
|
|
|
|
|
8903
|
|
|
|
|
|
|
{ my $mark; |
|
8904
|
|
|
|
|
|
|
my( %char2ent, %ent2char); |
|
8905
|
|
|
|
|
|
|
BEGIN |
|
8906
|
|
|
|
|
|
|
{ %char2ent= ( '<' => 'lt', '&' => 'amp', '>' => 'gt'); |
|
8907
|
|
|
|
|
|
|
%ent2char= map { $char2ent{$_} => $_ } keys %char2ent; |
|
8908
|
|
|
|
|
|
|
} |
|
8909
|
|
|
|
|
|
|
|
|
8910
|
|
|
|
|
|
|
# generate a unique mark (a string) not found in the string, |
|
8911
|
|
|
|
|
|
|
# used to mark < and & in the extra data |
|
8912
|
|
|
|
|
|
|
sub _gen_mark |
|
8913
|
|
|
|
|
|
|
{ $mark="AAAA"; |
|
8914
|
|
|
|
|
|
|
$mark++ while( index( $_[0], $mark) > -1); |
|
8915
|
|
|
|
|
|
|
return $mark; |
|
8916
|
|
|
|
|
|
|
} |
|
8917
|
|
|
|
|
|
|
|
|
8918
|
|
|
|
|
|
|
sub _protect_extra_data |
|
8919
|
|
|
|
|
|
|
{ my( $extra_data)= @_; |
|
8920
|
|
|
|
|
|
|
$extra_data=~ s{([<&>])}{:$mark:$char2ent{$1}:}g; |
|
8921
|
|
|
|
|
|
|
return $extra_data; |
|
8922
|
|
|
|
|
|
|
} |
|
8923
|
|
|
|
|
|
|
|
|
8924
|
|
|
|
|
|
|
sub _unprotect_extra_data |
|
8925
|
|
|
|
|
|
|
{ $_[0]=~ s{:$mark:(\w+):}{$ent2char{$1}}g; } |
|
8926
|
|
|
|
|
|
|
|
|
8927
|
|
|
|
|
|
|
} |
|
8928
|
|
|
|
|
|
|
|
|
8929
|
|
|
|
|
|
|
sub cdata_string |
|
8930
|
|
|
|
|
|
|
{ my $cdata= $_[0]->{cdata}; |
|
8931
|
|
|
|
|
|
|
unless( defined $cdata) { return ''; } |
|
8932
|
|
|
|
|
|
|
if( $remove_cdata) |
|
8933
|
|
|
|
|
|
|
{ $cdata=~ s/([$replaced_ents])/$XML::Twig::base_ent{$1}/g; } |
|
8934
|
|
|
|
|
|
|
else |
|
8935
|
|
|
|
|
|
|
{ $cdata= $CDATA_START . $cdata . $CDATA_END; } |
|
8936
|
|
|
|
|
|
|
return $cdata; |
|
8937
|
|
|
|
|
|
|
} |
|
8938
|
|
|
|
|
|
|
|
|
8939
|
|
|
|
|
|
|
sub att_xml_string |
|
8940
|
|
|
|
|
|
|
{ my $elt= shift; |
|
8941
|
|
|
|
|
|
|
my $att= shift; |
|
8942
|
|
|
|
|
|
|
|
|
8943
|
|
|
|
|
|
|
my $replace= $replaced_ents . "$quote\n\r\t"; |
|
8944
|
|
|
|
|
|
|
if($_[0] && $_[0]->{escape_gt} && ($replace!~ m{>}) ) { $replace .='>'; } |
|
8945
|
|
|
|
|
|
|
|
|
8946
|
|
|
|
|
|
|
if( defined (my $string= $elt->{att}->{$att})) |
|
8947
|
|
|
|
|
|
|
{ return _att_xml_string( $string, $replace); } |
|
8948
|
|
|
|
|
|
|
else |
|
8949
|
|
|
|
|
|
|
{ return ''; } |
|
8950
|
|
|
|
|
|
|
} |
|
8951
|
|
|
|
|
|
|
|
|
8952
|
|
|
|
|
|
|
# escaped xml string for an attribute value |
|
8953
|
|
|
|
|
|
|
sub _att_xml_string |
|
8954
|
|
|
|
|
|
|
{ my( $string, $escape)= @_; |
|
8955
|
|
|
|
|
|
|
if( !defined( $string)) { return ''; } |
|
8956
|
|
|
|
|
|
|
if( $keep_encoding) |
|
8957
|
|
|
|
|
|
|
{ $string=~ s{$quote}{$XML::Twig::base_ent{$quote}}g; |
|
8958
|
|
|
|
|
|
|
} |
|
8959
|
|
|
|
|
|
|
else |
|
8960
|
|
|
|
|
|
|
{ |
|
8961
|
|
|
|
|
|
|
if( $do_not_escape_amp_in_atts) |
|
8962
|
|
|
|
|
|
|
{ $escape=~ s{^.}{}; # seems like the most backward compatible way to remove & from the list |
|
8963
|
|
|
|
|
|
|
$string=~ s{([$escape])}{$XML::Twig::base_ent{$1}}g; |
|
8964
|
|
|
|
|
|
|
$string=~ s{&(?!(\w+|#\d+|[xX][0-9a-fA-F]+);)}{&}g; # dodgy: escape & that do not start an entity |
|
8965
|
|
|
|
|
|
|
} |
|
8966
|
|
|
|
|
|
|
else |
|
8967
|
|
|
|
|
|
|
{ $string=~ s{([$escape])}{$XML::Twig::base_ent{$1}}g; |
|
8968
|
|
|
|
|
|
|
$string=~ s{\Q]]>}{]]>}g; |
|
8969
|
|
|
|
|
|
|
} |
|
8970
|
|
|
|
|
|
|
} |
|
8971
|
|
|
|
|
|
|
|
|
8972
|
|
|
|
|
|
|
return $output_text_filter ? $output_text_filter->( $string) : $string; |
|
8973
|
|
|
|
|
|
|
} |
|
8974
|
|
|
|
|
|
|
|
|
8975
|
|
|
|
|
|
|
sub ent_string |
|
8976
|
|
|
|
|
|
|
{ my $ent= shift; |
|
8977
|
|
|
|
|
|
|
my $ent_text= $ent->{ent}; |
|
8978
|
|
|
|
|
|
|
my( $t, $el, $ent_string); |
|
8979
|
|
|
|
|
|
|
if( $expand_external_entities |
|
8980
|
|
|
|
|
|
|
&& ($t= $ent->twig) |
|
8981
|
|
|
|
|
|
|
&& ($el= $t->entity_list) |
|
8982
|
|
|
|
|
|
|
&& ($ent_string= $el->{entities}->{$ent->ent_name}->{val}) |
|
8983
|
|
|
|
|
|
|
) |
|
8984
|
|
|
|
|
|
|
{ return $ent_string; } |
|
8985
|
|
|
|
|
|
|
else |
|
8986
|
|
|
|
|
|
|
{ return $ent_text; } |
|
8987
|
|
|
|
|
|
|
} |
|
8988
|
|
|
|
|
|
|
|
|
8989
|
|
|
|
|
|
|
# returns just the text, no tags, for an element |
|
8990
|
|
|
|
|
|
|
sub text |
|
8991
|
|
|
|
|
|
|
{ my( $elt, @options)= @_; |
|
8992
|
|
|
|
|
|
|
|
|
8993
|
|
|
|
|
|
|
if( @options && grep { lc( $_) eq 'no_recurse' } @options) { return $elt->text_only; } |
|
8994
|
|
|
|
|
|
|
my $sep = (@options && grep { lc( $_) eq 'sep' } @options) ? ' ' : ''; |
|
8995
|
|
|
|
|
|
|
|
|
8996
|
|
|
|
|
|
|
my $string; |
|
8997
|
|
|
|
|
|
|
|
|
8998
|
|
|
|
|
|
|
if( (exists $elt->{'pcdata'})) { return $elt->{pcdata} . $sep; } |
|
8999
|
|
|
|
|
|
|
elsif( (exists $elt->{'cdata'})) { return $elt->{cdata} . $sep; } |
|
9000
|
|
|
|
|
|
|
elsif( (exists $elt->{'target'})) { return $elt->pi_string . $sep; } |
|
9001
|
|
|
|
|
|
|
elsif( (exists $elt->{'comment'})) { return $elt->{comment} . $sep; } |
|
9002
|
|
|
|
|
|
|
elsif( (exists $elt->{'ent'})) { return $elt->{ent} . $sep ; } |
|
9003
|
|
|
|
|
|
|
|
|
9004
|
|
|
|
|
|
|
|
|
9005
|
|
|
|
|
|
|
my $child= $elt->{first_child} ||''; |
|
9006
|
|
|
|
|
|
|
while( $child) |
|
9007
|
|
|
|
|
|
|
{ |
|
9008
|
|
|
|
|
|
|
my $child_text= $child->text( @options); |
|
9009
|
|
|
|
|
|
|
$string.= defined( $child_text) ? $sep . $child_text : ''; |
|
9010
|
|
|
|
|
|
|
} continue { $child= $child->{next_sibling}; } |
|
9011
|
|
|
|
|
|
|
|
|
9012
|
|
|
|
|
|
|
unless( defined $string) { $string=''; } |
|
9013
|
|
|
|
|
|
|
|
|
9014
|
|
|
|
|
|
|
return $output_text_filter ? $output_text_filter->( $string) : $string; |
|
9015
|
|
|
|
|
|
|
} |
|
9016
|
|
|
|
|
|
|
|
|
9017
|
|
|
|
|
|
|
sub text_only |
|
9018
|
|
|
|
|
|
|
{ return join '', map { $_->text if( $_->is_text || (exists $_->{'ent'})) } $_[0]->_children; } |
|
9019
|
|
|
|
|
|
|
|
|
9020
|
|
|
|
|
|
|
sub trimmed_text |
|
9021
|
|
|
|
|
|
|
{ my $elt= shift; |
|
9022
|
|
|
|
|
|
|
my $text= $elt->text( @_); |
|
9023
|
|
|
|
|
|
|
$text=~ s{\s+}{ }sg; |
|
9024
|
|
|
|
|
|
|
$text=~ s{^\s*}{}; |
|
9025
|
|
|
|
|
|
|
$text=~ s{\s*$}{}; |
|
9026
|
|
|
|
|
|
|
return $text; |
|
9027
|
|
|
|
|
|
|
} |
|
9028
|
|
|
|
|
|
|
|
|
9029
|
|
|
|
|
|
|
sub trim |
|
9030
|
|
|
|
|
|
|
{ my( $elt)= @_; |
|
9031
|
|
|
|
|
|
|
my $pcdata= $elt->first_descendant( $TEXT); |
|
9032
|
|
|
|
|
|
|
(my $pcdata_text= $pcdata->text)=~ s{^\s+}{}s; |
|
9033
|
|
|
|
|
|
|
$pcdata->set_text( $pcdata_text); |
|
9034
|
|
|
|
|
|
|
$pcdata= $elt->last_descendant( $TEXT); |
|
9035
|
|
|
|
|
|
|
($pcdata_text= $pcdata->text)=~ s{\s+$}{}; |
|
9036
|
|
|
|
|
|
|
$pcdata->set_text( $pcdata_text); |
|
9037
|
|
|
|
|
|
|
foreach my $pcdata ($elt->descendants( $TEXT)) |
|
9038
|
|
|
|
|
|
|
{ ($pcdata_text= $pcdata->text)=~ s{\s+}{ }g; |
|
9039
|
|
|
|
|
|
|
$pcdata->set_text( $pcdata_text); |
|
9040
|
|
|
|
|
|
|
} |
|
9041
|
|
|
|
|
|
|
return $elt; |
|
9042
|
|
|
|
|
|
|
} |
|
9043
|
|
|
|
|
|
|
|
|
9044
|
|
|
|
|
|
|
|
|
9045
|
|
|
|
|
|
|
# remove cdata sections (turns them into regular pcdata) in an element |
|
9046
|
|
|
|
|
|
|
sub remove_cdata |
|
9047
|
|
|
|
|
|
|
{ my $elt= shift; |
|
9048
|
|
|
|
|
|
|
foreach my $cdata ($elt->descendants_or_self( $CDATA)) |
|
9049
|
|
|
|
|
|
|
{ if( $keep_encoding) |
|
9050
|
|
|
|
|
|
|
{ my $data= $cdata->{cdata}; |
|
9051
|
|
|
|
|
|
|
$data=~ s{([&<"'])}{$XML::Twig::base_ent{$1}}g; |
|
9052
|
|
|
|
|
|
|
$cdata->{pcdata}= (delete $cdata->{empty} || 1) && $data; |
|
9053
|
|
|
|
|
|
|
} |
|
9054
|
|
|
|
|
|
|
else |
|
9055
|
|
|
|
|
|
|
{ $cdata->{pcdata}= (delete $cdata->{empty} || 1) && $cdata->{cdata}; } |
|
9056
|
|
|
|
|
|
|
$cdata->{gi}=$XML::Twig::gi2index{$PCDATA} or $cdata->set_gi( $PCDATA); |
|
9057
|
|
|
|
|
|
|
undef $cdata->{cdata}; |
|
9058
|
|
|
|
|
|
|
} |
|
9059
|
|
|
|
|
|
|
} |
|
9060
|
|
|
|
|
|
|
|
|
9061
|
|
|
|
|
|
|
sub _is_private { return _is_private_name( $_[0]->gi); } |
|
9062
|
|
|
|
|
|
|
sub _is_private_name { return $_[0]=~ m{^#(?!default:)}; } |
|
9063
|
|
|
|
|
|
|
|
|
9064
|
|
|
|
|
|
|
|
|
9065
|
|
|
|
|
|
|
} # end of block containing package globals ($pretty_print, $quotes, keep_encoding...) |
|
9066
|
|
|
|
|
|
|
|
|
9067
|
|
|
|
|
|
|
# merges consecutive #PCDATAs in am element |
|
9068
|
|
|
|
|
|
|
sub normalize |
|
9069
|
|
|
|
|
|
|
{ my( $elt)= @_; |
|
9070
|
|
|
|
|
|
|
my @descendants= $elt->descendants( $PCDATA); |
|
9071
|
|
|
|
|
|
|
while( my $desc= shift @descendants) |
|
9072
|
|
|
|
|
|
|
{ if( ! length $desc->{pcdata}) { $desc->delete; next; } |
|
9073
|
|
|
|
|
|
|
while( @descendants && $desc->{next_sibling} && $desc->{next_sibling}== $descendants[0]) |
|
9074
|
|
|
|
|
|
|
{ my $to_merge= shift @descendants; |
|
9075
|
|
|
|
|
|
|
$desc->merge_text( $to_merge); |
|
9076
|
|
|
|
|
|
|
} |
|
9077
|
|
|
|
|
|
|
} |
|
9078
|
|
|
|
|
|
|
return $elt; |
|
9079
|
|
|
|
|
|
|
} |
|
9080
|
|
|
|
|
|
|
|
|
9081
|
|
|
|
|
|
|
# SAX export methods |
|
9082
|
|
|
|
|
|
|
sub toSAX1 |
|
9083
|
|
|
|
|
|
|
{ _toSAX(@_, \&_start_tag_data_SAX1, \&_end_tag_data_SAX1); } |
|
9084
|
|
|
|
|
|
|
|
|
9085
|
|
|
|
|
|
|
sub toSAX2 |
|
9086
|
|
|
|
|
|
|
{ _toSAX(@_, \&_start_tag_data_SAX2, \&_end_tag_data_SAX2); } |
|
9087
|
|
|
|
|
|
|
|
|
9088
|
|
|
|
|
|
|
sub _toSAX |
|
9089
|
|
|
|
|
|
|
{ my( $elt, $handler, $start_tag_data, $end_tag_data)= @_; |
|
9090
|
|
|
|
|
|
|
if( $elt->{gi} >= $XML::Twig::SPECIAL_GI) |
|
9091
|
|
|
|
|
|
|
{ my $data= $start_tag_data->( $elt); |
|
9092
|
|
|
|
|
|
|
_start_prefix_mapping( $elt, $handler, $data); |
|
9093
|
|
|
|
|
|
|
if( $data && (my $start_element = $handler->can( 'start_element'))) |
|
9094
|
|
|
|
|
|
|
{ unless( $elt->{'flushed'}) { $start_element->( $handler, $data); } } |
|
9095
|
|
|
|
|
|
|
|
|
9096
|
|
|
|
|
|
|
foreach my $child ($elt->_children) |
|
9097
|
|
|
|
|
|
|
{ $child->_toSAX( $handler, $start_tag_data, $end_tag_data); } |
|
9098
|
|
|
|
|
|
|
|
|
9099
|
|
|
|
|
|
|
if( (my $data= $end_tag_data->( $elt)) && (my $end_element = $handler->can( 'end_element')) ) |
|
9100
|
|
|
|
|
|
|
{ $end_element->( $handler, $data); } |
|
9101
|
|
|
|
|
|
|
_end_prefix_mapping( $elt, $handler); |
|
9102
|
|
|
|
|
|
|
} |
|
9103
|
|
|
|
|
|
|
else # text or special element |
|
9104
|
|
|
|
|
|
|
{ if( (exists $elt->{'pcdata'}) && (my $characters= $handler->can( 'characters'))) |
|
9105
|
|
|
|
|
|
|
{ $characters->( $handler, { Data => $elt->{pcdata} }); } |
|
9106
|
|
|
|
|
|
|
elsif( (exists $elt->{'cdata'})) |
|
9107
|
|
|
|
|
|
|
{ if( my $start_cdata= $handler->can( 'start_cdata')) |
|
9108
|
|
|
|
|
|
|
{ $start_cdata->( $handler); } |
|
9109
|
|
|
|
|
|
|
if( my $characters= $handler->can( 'characters')) |
|
9110
|
|
|
|
|
|
|
{ $characters->( $handler, {Data => $elt->{cdata} }); } |
|
9111
|
|
|
|
|
|
|
if( my $end_cdata= $handler->can( 'end_cdata')) |
|
9112
|
|
|
|
|
|
|
{ $end_cdata->( $handler); } |
|
9113
|
|
|
|
|
|
|
} |
|
9114
|
|
|
|
|
|
|
elsif( ((exists $elt->{'target'})) && (my $pi= $handler->can( 'processing_instruction'))) |
|
9115
|
|
|
|
|
|
|
{ $pi->( $handler, { Target =>$elt->{target}, Data => $elt->{data} }); } |
|
9116
|
|
|
|
|
|
|
elsif( ((exists $elt->{'comment'})) && (my $comment= $handler->can( 'comment'))) |
|
9117
|
|
|
|
|
|
|
{ $comment->( $handler, { Data => $elt->{comment} }); } |
|
9118
|
|
|
|
|
|
|
elsif( ((exists $elt->{'ent'}))) |
|
9119
|
|
|
|
|
|
|
{ |
|
9120
|
|
|
|
|
|
|
if( my $se= $handler->can( 'skipped_entity')) |
|
9121
|
|
|
|
|
|
|
{ $se->( $handler, { Name => $elt->ent_name }); } |
|
9122
|
|
|
|
|
|
|
elsif( my $characters= $handler->can( 'characters')) |
|
9123
|
|
|
|
|
|
|
{ if( defined $elt->ent_string) |
|
9124
|
|
|
|
|
|
|
{ $characters->( $handler, {Data => $elt->ent_string}); } |
|
9125
|
|
|
|
|
|
|
else |
|
9126
|
|
|
|
|
|
|
{ $characters->( $handler, {Data => $elt->ent_name}); } |
|
9127
|
|
|
|
|
|
|
} |
|
9128
|
|
|
|
|
|
|
} |
|
9129
|
|
|
|
|
|
|
|
|
9130
|
|
|
|
|
|
|
} |
|
9131
|
|
|
|
|
|
|
} |
|
9132
|
|
|
|
|
|
|
|
|
9133
|
|
|
|
|
|
|
sub _start_tag_data_SAX1 |
|
9134
|
|
|
|
|
|
|
{ my( $elt)= @_; |
|
9135
|
|
|
|
|
|
|
my $name= $XML::Twig::index2gi[$elt->{'gi'}]; |
|
9136
|
|
|
|
|
|
|
return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )); |
|
9137
|
|
|
|
|
|
|
my $attributes={}; |
|
9138
|
|
|
|
|
|
|
my $atts= $elt->{att}; |
|
9139
|
|
|
|
|
|
|
while( my( $att, $value)= each %$atts) |
|
9140
|
|
|
|
|
|
|
{ $attributes->{$att}= $value unless( ( $att=~ m{^#(?!default:)} )); } |
|
9141
|
|
|
|
|
|
|
my $data= { Name => $name, Attributes => $attributes}; |
|
9142
|
|
|
|
|
|
|
return $data; |
|
9143
|
|
|
|
|
|
|
} |
|
9144
|
|
|
|
|
|
|
|
|
9145
|
|
|
|
|
|
|
sub _end_tag_data_SAX1 |
|
9146
|
|
|
|
|
|
|
{ my( $elt)= @_; |
|
9147
|
|
|
|
|
|
|
return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )); |
|
9148
|
|
|
|
|
|
|
return { Name => $XML::Twig::index2gi[$elt->{'gi'}] }; |
|
9149
|
|
|
|
|
|
|
} |
|
9150
|
|
|
|
|
|
|
|
|
9151
|
|
|
|
|
|
|
sub _start_tag_data_SAX2 |
|
9152
|
|
|
|
|
|
|
{ my( $elt)= @_; |
|
9153
|
|
|
|
|
|
|
my $data={}; |
|
9154
|
|
|
|
|
|
|
|
|
9155
|
|
|
|
|
|
|
my $name= $XML::Twig::index2gi[$elt->{'gi'}]; |
|
9156
|
|
|
|
|
|
|
return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )); |
|
9157
|
|
|
|
|
|
|
$data->{Name} = $name; |
|
9158
|
|
|
|
|
|
|
$data->{Prefix} = $elt->ns_prefix; |
|
9159
|
|
|
|
|
|
|
$data->{LocalName} = $elt->local_name; |
|
9160
|
|
|
|
|
|
|
$data->{NamespaceURI} = $elt->namespace; |
|
9161
|
|
|
|
|
|
|
|
|
9162
|
|
|
|
|
|
|
# save a copy of the data so we can re-use it for the end tag |
|
9163
|
|
|
|
|
|
|
my %sax2_data= %$data; |
|
9164
|
|
|
|
|
|
|
$elt->{twig_elt_SAX2_data}= \%sax2_data; |
|
9165
|
|
|
|
|
|
|
|
|
9166
|
|
|
|
|
|
|
# add the attributes |
|
9167
|
|
|
|
|
|
|
$data->{Attributes}= $elt->_atts_to_SAX2; |
|
9168
|
|
|
|
|
|
|
|
|
9169
|
|
|
|
|
|
|
return $data; |
|
9170
|
|
|
|
|
|
|
} |
|
9171
|
|
|
|
|
|
|
|
|
9172
|
|
|
|
|
|
|
sub _atts_to_SAX2 |
|
9173
|
|
|
|
|
|
|
{ my $elt= shift; |
|
9174
|
|
|
|
|
|
|
my $SAX2_atts= {}; |
|
9175
|
|
|
|
|
|
|
foreach my $att (keys %{$elt->{att}}) |
|
9176
|
|
|
|
|
|
|
{ |
|
9177
|
|
|
|
|
|
|
next if( ( $att=~ m{^#(?!default:)} )); |
|
9178
|
|
|
|
|
|
|
my $SAX2_att={}; |
|
9179
|
|
|
|
|
|
|
$SAX2_att->{Name} = $att; |
|
9180
|
|
|
|
|
|
|
$SAX2_att->{Prefix} = _ns_prefix( $att); |
|
9181
|
|
|
|
|
|
|
$SAX2_att->{LocalName} = _local_name( $att); |
|
9182
|
|
|
|
|
|
|
$SAX2_att->{NamespaceURI} = $elt->namespace( $SAX2_att->{Prefix}); |
|
9183
|
|
|
|
|
|
|
$SAX2_att->{Value} = $elt->{'att'}->{$att}; |
|
9184
|
|
|
|
|
|
|
my $SAX2_att_name= "{$SAX2_att->{NamespaceURI}}$SAX2_att->{LocalName}"; |
|
9185
|
|
|
|
|
|
|
|
|
9186
|
|
|
|
|
|
|
$SAX2_atts->{$SAX2_att_name}= $SAX2_att; |
|
9187
|
|
|
|
|
|
|
} |
|
9188
|
|
|
|
|
|
|
return $SAX2_atts; |
|
9189
|
|
|
|
|
|
|
} |
|
9190
|
|
|
|
|
|
|
|
|
9191
|
|
|
|
|
|
|
sub _start_prefix_mapping |
|
9192
|
|
|
|
|
|
|
{ my( $elt, $handler, $data)= @_; |
|
9193
|
|
|
|
|
|
|
if( my $start_prefix_mapping= $handler->can( 'start_prefix_mapping') |
|
9194
|
|
|
|
|
|
|
and my @new_prefix_mappings= grep { /^\{[^}]*\}xmlns/ || /^\{$XMLNS_URI\}/ } keys %{$data->{Attributes}} |
|
9195
|
|
|
|
|
|
|
) |
|
9196
|
|
|
|
|
|
|
{ foreach my $prefix (@new_prefix_mappings) |
|
9197
|
|
|
|
|
|
|
{ my $prefix_string= $data->{Attributes}->{$prefix}->{LocalName}; |
|
9198
|
|
|
|
|
|
|
if( $prefix_string eq 'xmlns') { $prefix_string=''; } |
|
9199
|
|
|
|
|
|
|
my $prefix_data= |
|
9200
|
|
|
|
|
|
|
{ Prefix => $prefix_string, |
|
9201
|
|
|
|
|
|
|
NamespaceURI => $data->{Attributes}->{$prefix}->{Value} |
|
9202
|
|
|
|
|
|
|
}; |
|
9203
|
|
|
|
|
|
|
$start_prefix_mapping->( $handler, $prefix_data); |
|
9204
|
|
|
|
|
|
|
$elt->{twig_end_prefix_mapping} ||= []; |
|
9205
|
|
|
|
|
|
|
push @{$elt->{twig_end_prefix_mapping}}, $prefix_string; |
|
9206
|
|
|
|
|
|
|
} |
|
9207
|
|
|
|
|
|
|
} |
|
9208
|
|
|
|
|
|
|
} |
|
9209
|
|
|
|
|
|
|
|
|
9210
|
|
|
|
|
|
|
sub _end_prefix_mapping |
|
9211
|
|
|
|
|
|
|
{ my( $elt, $handler)= @_; |
|
9212
|
|
|
|
|
|
|
if( my $end_prefix_mapping= $handler->can( 'end_prefix_mapping')) |
|
9213
|
|
|
|
|
|
|
{ foreach my $prefix (@{$elt->{twig_end_prefix_mapping}}) |
|
9214
|
|
|
|
|
|
|
{ $end_prefix_mapping->( $handler, { Prefix => $prefix} ); } |
|
9215
|
|
|
|
|
|
|
} |
|
9216
|
|
|
|
|
|
|
} |
|
9217
|
|
|
|
|
|
|
|
|
9218
|
|
|
|
|
|
|
sub _end_tag_data_SAX2 |
|
9219
|
|
|
|
|
|
|
{ my( $elt)= @_; |
|
9220
|
|
|
|
|
|
|
return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )); |
|
9221
|
|
|
|
|
|
|
return $elt->{twig_elt_SAX2_data}; |
|
9222
|
|
|
|
|
|
|
} |
|
9223
|
|
|
|
|
|
|
|
|
9224
|
|
|
|
|
|
|
sub contains_text |
|
9225
|
|
|
|
|
|
|
{ my $elt= shift; |
|
9226
|
|
|
|
|
|
|
my $child= $elt->{first_child}; |
|
9227
|
|
|
|
|
|
|
while ($child) |
|
9228
|
|
|
|
|
|
|
{ return 1 if( $child->is_text || (exists $child->{'ent'})); |
|
9229
|
|
|
|
|
|
|
$child= $child->{next_sibling}; |
|
9230
|
|
|
|
|
|
|
} |
|
9231
|
|
|
|
|
|
|
return 0; |
|
9232
|
|
|
|
|
|
|
} |
|
9233
|
|
|
|
|
|
|
|
|
9234
|
|
|
|
|
|
|
# creates a single pcdata element containing the text as child of the element |
|
9235
|
|
|
|
|
|
|
# options: |
|
9236
|
|
|
|
|
|
|
# - force_pcdata: when set to a true value forces the text to be in a #PCDATA |
|
9237
|
|
|
|
|
|
|
# even if the original element was a #CDATA |
|
9238
|
|
|
|
|
|
|
sub set_text |
|
9239
|
|
|
|
|
|
|
{ my( $elt, $string, %option)= @_; |
|
9240
|
|
|
|
|
|
|
|
|
9241
|
|
|
|
|
|
|
if( $XML::Twig::index2gi[$elt->{'gi'}] eq $PCDATA) |
|
9242
|
|
|
|
|
|
|
{ return $elt->{pcdata}= (delete $elt->{empty} || 1) && $string; } |
|
9243
|
|
|
|
|
|
|
elsif( $XML::Twig::index2gi[$elt->{'gi'}] eq $CDATA) |
|
9244
|
|
|
|
|
|
|
{ if( $option{force_pcdata}) |
|
9245
|
|
|
|
|
|
|
{ $elt->{gi}=$XML::Twig::gi2index{$PCDATA} or $elt->set_gi( $PCDATA); |
|
9246
|
|
|
|
|
|
|
$elt->{cdata}= ''; |
|
9247
|
|
|
|
|
|
|
return $elt->{pcdata}= (delete $elt->{empty} || 1) && $string; |
|
9248
|
|
|
|
|
|
|
} |
|
9249
|
|
|
|
|
|
|
else |
|
9250
|
|
|
|
|
|
|
{ $elt->{cdata}= $string; |
|
9251
|
|
|
|
|
|
|
return $string; |
|
9252
|
|
|
|
|
|
|
} |
|
9253
|
|
|
|
|
|
|
} |
|
9254
|
|
|
|
|
|
|
elsif( $elt->contains_a_single( $PCDATA) ) |
|
9255
|
|
|
|
|
|
|
{ # optimized so we have a slight chance of not losing embedded comments and pi's |
|
9256
|
|
|
|
|
|
|
$elt->{first_child}->set_pcdata( $string); |
|
9257
|
|
|
|
|
|
|
return $elt; |
|
9258
|
|
|
|
|
|
|
} |
|
9259
|
|
|
|
|
|
|
|
|
9260
|
|
|
|
|
|
|
foreach my $child (@{[$elt->_children]}) |
|
9261
|
|
|
|
|
|
|
{ $child->delete; } |
|
9262
|
|
|
|
|
|
|
|
|
9263
|
|
|
|
|
|
|
my $pcdata= $elt->_new_pcdata( $string); |
|
9264
|
|
|
|
|
|
|
$pcdata->paste( $elt); |
|
9265
|
|
|
|
|
|
|
|
|
9266
|
|
|
|
|
|
|
delete $elt->{empty}; |
|
9267
|
|
|
|
|
|
|
|
|
9268
|
|
|
|
|
|
|
return $elt; |
|
9269
|
|
|
|
|
|
|
} |
|
9270
|
|
|
|
|
|
|
|
|
9271
|
|
|
|
|
|
|
# set the content of an element from a list of strings and elements |
|
9272
|
|
|
|
|
|
|
sub set_content |
|
9273
|
|
|
|
|
|
|
{ my $elt= shift; |
|
9274
|
|
|
|
|
|
|
|
|
9275
|
|
|
|
|
|
|
return $elt unless defined $_[0]; |
|
9276
|
|
|
|
|
|
|
|
|
9277
|
|
|
|
|
|
|
# attributes can be given as a hash (passed by ref) |
|
9278
|
|
|
|
|
|
|
if( ref $_[0] eq 'HASH') |
|
9279
|
|
|
|
|
|
|
{ my $atts= shift; |
|
9280
|
|
|
|
|
|
|
$elt->del_atts; # usually useless but better safe than sorry |
|
9281
|
|
|
|
|
|
|
$elt->set_atts( $atts); |
|
9282
|
|
|
|
|
|
|
return $elt unless defined $_[0]; |
|
9283
|
|
|
|
|
|
|
} |
|
9284
|
|
|
|
|
|
|
|
|
9285
|
|
|
|
|
|
|
# check next argument for #EMPTY |
|
9286
|
|
|
|
|
|
|
if( !(ref $_[0]) && ($_[0] eq $EMPTY) ) |
|
9287
|
|
|
|
|
|
|
{ $elt->{empty}= 1; return $elt; } |
|
9288
|
|
|
|
|
|
|
|
|
9289
|
|
|
|
|
|
|
# case where we really want to do a set_text, the element is '#PCDATA' |
|
9290
|
|
|
|
|
|
|
# or contains a single PCDATA and we only want to add text in it |
|
9291
|
|
|
|
|
|
|
if( ($XML::Twig::index2gi[$elt->{'gi'}] eq $PCDATA || $elt->contains_a_single( $PCDATA)) |
|
9292
|
|
|
|
|
|
|
&& (@_ == 1) && !( ref $_[0])) |
|
9293
|
|
|
|
|
|
|
{ $elt->set_text( $_[0]); |
|
9294
|
|
|
|
|
|
|
return $elt; |
|
9295
|
|
|
|
|
|
|
} |
|
9296
|
|
|
|
|
|
|
elsif( ($XML::Twig::index2gi[$elt->{'gi'}] eq $CDATA) && (@_ == 1) && !( ref $_[0])) |
|
9297
|
|
|
|
|
|
|
{ $elt->{cdata}= $_[0]; |
|
9298
|
|
|
|
|
|
|
return $elt; |
|
9299
|
|
|
|
|
|
|
} |
|
9300
|
|
|
|
|
|
|
|
|
9301
|
|
|
|
|
|
|
# delete the children |
|
9302
|
|
|
|
|
|
|
foreach my $child (@{[$elt->_children]}) |
|
9303
|
|
|
|
|
|
|
{ $child->delete; } |
|
9304
|
|
|
|
|
|
|
|
|
9305
|
|
|
|
|
|
|
if( @_) { delete $elt->{empty}; } |
|
9306
|
|
|
|
|
|
|
|
|
9307
|
|
|
|
|
|
|
foreach my $child (@_) |
|
9308
|
|
|
|
|
|
|
{ if( ref( $child) && isa( $child, 'XML::Twig::Elt')) |
|
9309
|
|
|
|
|
|
|
{ # argument is an element |
|
9310
|
|
|
|
|
|
|
$child->paste( 'last_child', $elt); |
|
9311
|
|
|
|
|
|
|
} |
|
9312
|
|
|
|
|
|
|
else |
|
9313
|
|
|
|
|
|
|
{ # argument is a string |
|
9314
|
|
|
|
|
|
|
if( (my $pcdata= $elt->{last_child}) && $elt->{last_child}->is_pcdata) |
|
9315
|
|
|
|
|
|
|
{ # previous child is also pcdata: just concatenate |
|
9316
|
|
|
|
|
|
|
$pcdata->{pcdata}= (delete $pcdata->{empty} || 1) && $pcdata->{pcdata} . $child |
|
9317
|
|
|
|
|
|
|
} |
|
9318
|
|
|
|
|
|
|
else |
|
9319
|
|
|
|
|
|
|
{ # previous child is not a string: create a new pcdata element |
|
9320
|
|
|
|
|
|
|
$pcdata= $elt->_new_pcdata( $child); |
|
9321
|
|
|
|
|
|
|
$pcdata->paste( 'last_child', $elt); |
|
9322
|
|
|
|
|
|
|
} |
|
9323
|
|
|
|
|
|
|
} |
|
9324
|
|
|
|
|
|
|
} |
|
9325
|
|
|
|
|
|
|
|
|
9326
|
|
|
|
|
|
|
|
|
9327
|
|
|
|
|
|
|
return $elt; |
|
9328
|
|
|
|
|
|
|
} |
|
9329
|
|
|
|
|
|
|
|
|
9330
|
|
|
|
|
|
|
# inserts an element (whose gi is given) as child of the element |
|
9331
|
|
|
|
|
|
|
# all children of the element are now children of the new element |
|
9332
|
|
|
|
|
|
|
# returns the new element |
|
9333
|
|
|
|
|
|
|
sub insert |
|
9334
|
|
|
|
|
|
|
{ my ($elt, @args)= @_; |
|
9335
|
|
|
|
|
|
|
# first cut the children |
|
9336
|
|
|
|
|
|
|
my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; |
|
9337
|
|
|
|
|
|
|
foreach my $child (@children) |
|
9338
|
|
|
|
|
|
|
{ $child->cut; } |
|
9339
|
|
|
|
|
|
|
# insert elements |
|
9340
|
|
|
|
|
|
|
while( my $gi= shift @args) |
|
9341
|
|
|
|
|
|
|
{ my $new_elt= $elt->new( $gi); |
|
9342
|
|
|
|
|
|
|
# add attributes if needed |
|
9343
|
|
|
|
|
|
|
if( defined( $args[0]) && ( isa( $args[0], 'HASH')) ) |
|
9344
|
|
|
|
|
|
|
{ $new_elt->set_atts( shift @args); } |
|
9345
|
|
|
|
|
|
|
# paste the element |
|
9346
|
|
|
|
|
|
|
$new_elt->paste( $elt); |
|
9347
|
|
|
|
|
|
|
delete $elt->{empty}; |
|
9348
|
|
|
|
|
|
|
$elt= $new_elt; |
|
9349
|
|
|
|
|
|
|
} |
|
9350
|
|
|
|
|
|
|
# paste back the children |
|
9351
|
|
|
|
|
|
|
foreach my $child (@children) |
|
9352
|
|
|
|
|
|
|
{ $child->paste( 'last_child', $elt); } |
|
9353
|
|
|
|
|
|
|
return $elt; |
|
9354
|
|
|
|
|
|
|
} |
|
9355
|
|
|
|
|
|
|
|
|
9356
|
|
|
|
|
|
|
# insert a new element |
|
9357
|
|
|
|
|
|
|
# $elt->insert_new_element( $opt_position, $gi, $opt_atts_hash, @opt_content); |
|
9358
|
|
|
|
|
|
|
# the element is created with the same syntax as new |
|
9359
|
|
|
|
|
|
|
# position is the same as in paste, first_child by default |
|
9360
|
|
|
|
|
|
|
sub insert_new_elt |
|
9361
|
|
|
|
|
|
|
{ my $elt= shift; |
|
9362
|
|
|
|
|
|
|
my $position= $_[0]; |
|
9363
|
|
|
|
|
|
|
if( ($position eq 'before') || ($position eq 'after') |
|
9364
|
|
|
|
|
|
|
|| ($position eq 'first_child') || ($position eq 'last_child')) |
|
9365
|
|
|
|
|
|
|
{ shift; } |
|
9366
|
|
|
|
|
|
|
else |
|
9367
|
|
|
|
|
|
|
{ $position= 'first_child'; } |
|
9368
|
|
|
|
|
|
|
|
|
9369
|
|
|
|
|
|
|
my $new_elt= $elt->new( @_); |
|
9370
|
|
|
|
|
|
|
$new_elt->paste( $position, $elt); |
|
9371
|
|
|
|
|
|
|
|
|
9372
|
|
|
|
|
|
|
#if( defined $new_elt->{'att'}->{$ID}) { $new_elt->set_id( $new_elt->{'att'}->{$ID}); } |
|
9373
|
|
|
|
|
|
|
|
|
9374
|
|
|
|
|
|
|
return $new_elt; |
|
9375
|
|
|
|
|
|
|
} |
|
9376
|
|
|
|
|
|
|
|
|
9377
|
|
|
|
|
|
|
# wraps an element in elements which gi's are given as arguments |
|
9378
|
|
|
|
|
|
|
# $elt->wrap_in( 'td', 'tr', 'table') wraps the element as a single |
|
9379
|
|
|
|
|
|
|
# cell in a table for example |
|
9380
|
|
|
|
|
|
|
# returns the new element |
|
9381
|
|
|
|
|
|
|
sub wrap_in |
|
9382
|
|
|
|
|
|
|
{ my $elt= shift; |
|
9383
|
|
|
|
|
|
|
while( my $gi = shift @_) |
|
9384
|
|
|
|
|
|
|
{ my $new_elt = $elt->new( $gi); |
|
9385
|
|
|
|
|
|
|
if( $elt->{twig_current}) |
|
9386
|
|
|
|
|
|
|
{ my $t= $elt->twig; |
|
9387
|
|
|
|
|
|
|
$t->{twig_current}= $new_elt; |
|
9388
|
|
|
|
|
|
|
delete $elt->{'twig_current'}; |
|
9389
|
|
|
|
|
|
|
$new_elt->{'twig_current'}=1; |
|
9390
|
|
|
|
|
|
|
} |
|
9391
|
|
|
|
|
|
|
|
|
9392
|
|
|
|
|
|
|
if( my $parent= $elt->{parent}) |
|
9393
|
|
|
|
|
|
|
{ $new_elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $new_elt->{parent});} ; |
|
9394
|
|
|
|
|
|
|
if( $parent->{first_child} == $elt) { $parent->{first_child}= $new_elt; } |
|
9395
|
|
|
|
|
|
|
if( $parent->{last_child} == $elt) { delete $parent->{empty}; $parent->{last_child}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } |
|
9396
|
|
|
|
|
|
|
} |
|
9397
|
|
|
|
|
|
|
else |
|
9398
|
|
|
|
|
|
|
{ # wrapping the root |
|
9399
|
|
|
|
|
|
|
my $twig= $elt->twig; |
|
9400
|
|
|
|
|
|
|
if( $twig && $twig->root && ($twig->root eq $elt) ) |
|
9401
|
|
|
|
|
|
|
{ $twig->set_root( $new_elt); |
|
9402
|
|
|
|
|
|
|
} |
|
9403
|
|
|
|
|
|
|
} |
|
9404
|
|
|
|
|
|
|
|
|
9405
|
|
|
|
|
|
|
if( my $prev_sibling= $elt->{prev_sibling}) |
|
9406
|
|
|
|
|
|
|
{ $new_elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $new_elt->{prev_sibling});} ; |
|
9407
|
|
|
|
|
|
|
$prev_sibling->{next_sibling}= $new_elt; |
|
9408
|
|
|
|
|
|
|
} |
|
9409
|
|
|
|
|
|
|
|
|
9410
|
|
|
|
|
|
|
if( my $next_sibling= $elt->{next_sibling}) |
|
9411
|
|
|
|
|
|
|
{ $new_elt->{next_sibling}= $next_sibling; |
|
9412
|
|
|
|
|
|
|
$next_sibling->{prev_sibling}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; |
|
9413
|
|
|
|
|
|
|
} |
|
9414
|
|
|
|
|
|
|
$new_elt->{first_child}= $elt; |
|
9415
|
|
|
|
|
|
|
delete $new_elt->{empty}; $new_elt->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $new_elt->{last_child});} ; |
|
9416
|
|
|
|
|
|
|
|
|
9417
|
|
|
|
|
|
|
$elt->{parent}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; |
|
9418
|
|
|
|
|
|
|
$elt->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; |
|
9419
|
|
|
|
|
|
|
$elt->{next_sibling}= undef; |
|
9420
|
|
|
|
|
|
|
|
|
9421
|
|
|
|
|
|
|
# add the attributes if the next argument is a hash ref |
|
9422
|
|
|
|
|
|
|
if( defined( $_[0]) && (isa( $_[0], 'HASH')) ) |
|
9423
|
|
|
|
|
|
|
{ $new_elt->set_atts( shift @_); } |
|
9424
|
|
|
|
|
|
|
|
|
9425
|
|
|
|
|
|
|
$elt= $new_elt; |
|
9426
|
|
|
|
|
|
|
} |
|
9427
|
|
|
|
|
|
|
|
|
9428
|
|
|
|
|
|
|
return $elt; |
|
9429
|
|
|
|
|
|
|
} |
|
9430
|
|
|
|
|
|
|
|
|
9431
|
|
|
|
|
|
|
sub replace |
|
9432
|
|
|
|
|
|
|
{ my( $elt, $ref)= @_; |
|
9433
|
|
|
|
|
|
|
|
|
9434
|
|
|
|
|
|
|
if( $elt->{parent}) { $elt->cut; } |
|
9435
|
|
|
|
|
|
|
|
|
9436
|
|
|
|
|
|
|
if( my $parent= $ref->{parent}) |
|
9437
|
|
|
|
|
|
|
{ $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; |
|
9438
|
|
|
|
|
|
|
if( $parent->{first_child} == $ref) { $parent->{first_child}= $elt; } |
|
9439
|
|
|
|
|
|
|
if( $parent->{last_child} == $ref) { delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } |
|
9440
|
|
|
|
|
|
|
} |
|
9441
|
|
|
|
|
|
|
elsif( $ref->twig && $ref == $ref->twig->root) |
|
9442
|
|
|
|
|
|
|
{ $ref->twig->set_root( $elt); } |
|
9443
|
|
|
|
|
|
|
|
|
9444
|
|
|
|
|
|
|
if( my $prev_sibling= $ref->{prev_sibling}) |
|
9445
|
|
|
|
|
|
|
{ $elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; |
|
9446
|
|
|
|
|
|
|
$prev_sibling->{next_sibling}= $elt; |
|
9447
|
|
|
|
|
|
|
} |
|
9448
|
|
|
|
|
|
|
if( my $next_sibling= $ref->{next_sibling}) |
|
9449
|
|
|
|
|
|
|
{ $elt->{next_sibling}= $next_sibling; |
|
9450
|
|
|
|
|
|
|
$next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; |
|
9451
|
|
|
|
|
|
|
} |
|
9452
|
|
|
|
|
|
|
|
|
9453
|
|
|
|
|
|
|
$ref->{parent}=undef; if( $XML::Twig::weakrefs) { weaken( $ref->{parent});} ; |
|
9454
|
|
|
|
|
|
|
$ref->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $ref->{prev_sibling});} ; |
|
9455
|
|
|
|
|
|
|
$ref->{next_sibling}= undef; |
|
9456
|
|
|
|
|
|
|
return $ref; |
|
9457
|
|
|
|
|
|
|
} |
|
9458
|
|
|
|
|
|
|
|
|
9459
|
|
|
|
|
|
|
sub replace_with |
|
9460
|
|
|
|
|
|
|
{ my $ref= shift; |
|
9461
|
|
|
|
|
|
|
my $elt= shift; |
|
9462
|
|
|
|
|
|
|
$elt->replace( $ref); |
|
9463
|
|
|
|
|
|
|
foreach my $new_elt (reverse @_) |
|
9464
|
|
|
|
|
|
|
{ $new_elt->paste( after => $elt); } |
|
9465
|
|
|
|
|
|
|
return $elt; |
|
9466
|
|
|
|
|
|
|
} |
|
9467
|
|
|
|
|
|
|
|
|
9468
|
|
|
|
|
|
|
|
|
9469
|
|
|
|
|
|
|
# move an element, same syntax as paste, except the element is first cut |
|
9470
|
|
|
|
|
|
|
sub move |
|
9471
|
|
|
|
|
|
|
{ my $elt= shift; |
|
9472
|
|
|
|
|
|
|
$elt->cut; |
|
9473
|
|
|
|
|
|
|
$elt->paste( @_); |
|
9474
|
|
|
|
|
|
|
return $elt; |
|
9475
|
|
|
|
|
|
|
} |
|
9476
|
|
|
|
|
|
|
|
|
9477
|
|
|
|
|
|
|
|
|
9478
|
|
|
|
|
|
|
# adds a prefix to an element, creating a pcdata child if needed |
|
9479
|
|
|
|
|
|
|
sub prefix |
|
9480
|
|
|
|
|
|
|
{ my ($elt, $prefix, $option)= @_; |
|
9481
|
|
|
|
|
|
|
my $asis= ($option && ($option eq 'asis')) ? 1 : 0; |
|
9482
|
|
|
|
|
|
|
if( (exists $elt->{'pcdata'}) |
|
9483
|
|
|
|
|
|
|
&& (($asis && $elt->{asis}) || (!$asis && ! $elt->{asis})) |
|
9484
|
|
|
|
|
|
|
) |
|
9485
|
|
|
|
|
|
|
{ $elt->{pcdata}= (delete $elt->{empty} || 1) && $prefix . $elt->{pcdata}; } |
|
9486
|
|
|
|
|
|
|
elsif( $elt->{first_child} && $elt->{first_child}->is_pcdata |
|
9487
|
|
|
|
|
|
|
&& ( ($asis && $elt->{first_child}->{asis}) |
|
9488
|
|
|
|
|
|
|
|| (!$asis && ! $elt->{first_child}->{asis})) |
|
9489
|
|
|
|
|
|
|
) |
|
9490
|
|
|
|
|
|
|
{ |
|
9491
|
|
|
|
|
|
|
$elt->{first_child}->set_pcdata( $prefix . $elt->{first_child}->pcdata); |
|
9492
|
|
|
|
|
|
|
} |
|
9493
|
|
|
|
|
|
|
else |
|
9494
|
|
|
|
|
|
|
{ my $new_elt= $elt->_new_pcdata( $prefix); |
|
9495
|
|
|
|
|
|
|
my $pos= (exists $elt->{'pcdata'}) ? 'before' : 'first_child'; |
|
9496
|
|
|
|
|
|
|
$new_elt->paste( $pos => $elt); |
|
9497
|
|
|
|
|
|
|
if( $asis) { $new_elt->set_asis; } |
|
9498
|
|
|
|
|
|
|
} |
|
9499
|
|
|
|
|
|
|
return $elt; |
|
9500
|
|
|
|
|
|
|
} |
|
9501
|
|
|
|
|
|
|
|
|
9502
|
|
|
|
|
|
|
# adds a suffix to an element, creating a pcdata child if needed |
|
9503
|
|
|
|
|
|
|
sub suffix |
|
9504
|
|
|
|
|
|
|
{ my ($elt, $suffix, $option)= @_; |
|
9505
|
|
|
|
|
|
|
my $asis= ($option && ($option eq 'asis')) ? 1 : 0; |
|
9506
|
|
|
|
|
|
|
if( (exists $elt->{'pcdata'}) |
|
9507
|
|
|
|
|
|
|
&& (($asis && $elt->{asis}) || (!$asis && ! $elt->{asis})) |
|
9508
|
|
|
|
|
|
|
) |
|
9509
|
|
|
|
|
|
|
{ $elt->{pcdata}= (delete $elt->{empty} || 1) && $elt->{pcdata} . $suffix; } |
|
9510
|
|
|
|
|
|
|
elsif( $elt->{last_child} && $elt->{last_child}->is_pcdata |
|
9511
|
|
|
|
|
|
|
&& ( ($asis && $elt->{last_child}->{asis}) |
|
9512
|
|
|
|
|
|
|
|| (!$asis && ! $elt->{last_child}->{asis})) |
|
9513
|
|
|
|
|
|
|
) |
|
9514
|
|
|
|
|
|
|
{ $elt->{last_child}->set_pcdata( $elt->{last_child}->pcdata . $suffix); } |
|
9515
|
|
|
|
|
|
|
else |
|
9516
|
|
|
|
|
|
|
{ my $new_elt= $elt->_new_pcdata( $suffix); |
|
9517
|
|
|
|
|
|
|
my $pos= (exists $elt->{'pcdata'}) ? 'after' : 'last_child'; |
|
9518
|
|
|
|
|
|
|
$new_elt->paste( $pos => $elt); |
|
9519
|
|
|
|
|
|
|
if( $asis) { $new_elt->set_asis; } |
|
9520
|
|
|
|
|
|
|
} |
|
9521
|
|
|
|
|
|
|
return $elt; |
|
9522
|
|
|
|
|
|
|
} |
|
9523
|
|
|
|
|
|
|
|
|
9524
|
|
|
|
|
|
|
# create a path to an element ('/root/.../gi) |
|
9525
|
|
|
|
|
|
|
sub path |
|
9526
|
|
|
|
|
|
|
{ my $elt= shift; |
|
9527
|
|
|
|
|
|
|
my @context= ( $elt, $elt->ancestors); |
|
9528
|
|
|
|
|
|
|
return "/" . join( "/", reverse map {$_->gi} @context); |
|
9529
|
|
|
|
|
|
|
} |
|
9530
|
|
|
|
|
|
|
|
|
9531
|
|
|
|
|
|
|
sub xpath |
|
9532
|
|
|
|
|
|
|
{ my $elt= shift; |
|
9533
|
|
|
|
|
|
|
my $xpath; |
|
9534
|
|
|
|
|
|
|
foreach my $ancestor (reverse $elt->ancestors_or_self) |
|
9535
|
|
|
|
|
|
|
{ my $gi= $XML::Twig::index2gi[$ancestor->{'gi'}]; |
|
9536
|
|
|
|
|
|
|
$xpath.= "/$gi"; |
|
9537
|
|
|
|
|
|
|
my $index= $ancestor->prev_siblings( $gi) + 1; |
|
9538
|
|
|
|
|
|
|
unless( ($index == 1) && !$ancestor->next_sibling( $gi)) |
|
9539
|
|
|
|
|
|
|
{ $xpath.= "[$index]"; } |
|
9540
|
|
|
|
|
|
|
} |
|
9541
|
|
|
|
|
|
|
return $xpath; |
|
9542
|
|
|
|
|
|
|
} |
|
9543
|
|
|
|
|
|
|
|
|
9544
|
|
|
|
|
|
|
# methods used mainly by wrap_children |
|
9545
|
|
|
|
|
|
|
|
|
9546
|
|
|
|
|
|
|
# return a string with the |
|
9547
|
|
|
|
|
|
|
# for an element ...... |
|
9548
|
|
|
|
|
|
|
# returns '' |
|
9549
|
|
|
|
|
|
|
sub _stringify_struct |
|
9550
|
|
|
|
|
|
|
{ my( $elt, %opt)= @_; |
|
9551
|
|
|
|
|
|
|
my $string=''; |
|
9552
|
|
|
|
|
|
|
my $pretty_print= set_pretty_print( 'none'); |
|
9553
|
|
|
|
|
|
|
foreach my $child ($elt->_children) |
|
9554
|
|
|
|
|
|
|
{ $child->add_id; $string .= $child->start_tag( { escape_gt => 1 }) ||''; } |
|
9555
|
|
|
|
|
|
|
set_pretty_print( $pretty_print); |
|
9556
|
|
|
|
|
|
|
return $string; |
|
9557
|
|
|
|
|
|
|
} |
|
9558
|
|
|
|
|
|
|
|
|
9559
|
|
|
|
|
|
|
# wrap a series of elements in a new one |
|
9560
|
|
|
|
|
|
|
sub _wrap_range |
|
9561
|
|
|
|
|
|
|
{ my $elt= shift; |
|
9562
|
|
|
|
|
|
|
my $gi= shift; |
|
9563
|
|
|
|
|
|
|
my $atts= isa( $_[0], 'HASH') ? shift : undef; |
|
9564
|
|
|
|
|
|
|
my $range= shift; # the string with the tags to wrap |
|
9565
|
|
|
|
|
|
|
|
|
9566
|
|
|
|
|
|
|
my $t= $elt->twig; |
|
9567
|
|
|
|
|
|
|
|
|
9568
|
|
|
|
|
|
|
# get the tags to wrap |
|
9569
|
|
|
|
|
|
|
my @to_wrap; |
|
9570
|
|
|
|
|
|
|
while( $range=~ m{<\w+\s+[^>]*id=("[^"]*"|'[^']*')[^>]*>}g) |
|
9571
|
|
|
|
|
|
|
{ push @to_wrap, $t->elt_id( substr( $1, 1, -1)); } |
|
9572
|
|
|
|
|
|
|
|
|
9573
|
|
|
|
|
|
|
return '' unless @to_wrap; |
|
9574
|
|
|
|
|
|
|
|
|
9575
|
|
|
|
|
|
|
my $to_wrap= shift @to_wrap; |
|
9576
|
|
|
|
|
|
|
my %atts= %$atts; |
|
9577
|
|
|
|
|
|
|
my $new_elt= $to_wrap->wrap_in( $gi, \%atts); |
|
9578
|
|
|
|
|
|
|
$_->move( last_child => $new_elt) foreach (@to_wrap); |
|
9579
|
|
|
|
|
|
|
|
|
9580
|
|
|
|
|
|
|
return ''; |
|
9581
|
|
|
|
|
|
|
} |
|
9582
|
|
|
|
|
|
|
|
|
9583
|
|
|
|
|
|
|
# wrap children matching a regexp in a new element |
|
9584
|
|
|
|
|
|
|
sub wrap_children |
|
9585
|
|
|
|
|
|
|
{ my( $elt, $regexp, $gi, $atts)= @_; |
|
9586
|
|
|
|
|
|
|
|
|
9587
|
|
|
|
|
|
|
$atts ||={}; |
|
9588
|
|
|
|
|
|
|
|
|
9589
|
|
|
|
|
|
|
my $elt_as_string= $elt->_stringify_struct; # stringify the elt structure |
|
9590
|
|
|
|
|
|
|
$regexp=~ s{(<[^>]*>)}{_match_expr( $1)}eg; # in the regexp, replace gi's by the proper regexp |
|
9591
|
|
|
|
|
|
|
$elt_as_string=~ s{($regexp)}{$elt->_wrap_range( $gi, $atts, $1)}eg; # then do the actual replace |
|
9592
|
|
|
|
|
|
|
|
|
9593
|
|
|
|
|
|
|
return $elt; |
|
9594
|
|
|
|
|
|
|
} |
|
9595
|
|
|
|
|
|
|
|
|
9596
|
|
|
|
|
|
|
sub _match_expr |
|
9597
|
|
|
|
|
|
|
{ my $tag= shift; |
|
9598
|
|
|
|
|
|
|
my( $gi, %atts)= XML::Twig::_parse_start_tag( $tag); |
|
9599
|
|
|
|
|
|
|
return _match_tag( $gi, %atts); |
|
9600
|
|
|
|
|
|
|
} |
|
9601
|
|
|
|
|
|
|
|
|
9602
|
|
|
|
|
|
|
|
|
9603
|
|
|
|
|
|
|
sub _match_tag |
|
9604
|
|
|
|
|
|
|
{ my( $elt, %atts)= @_; |
|
9605
|
|
|
|
|
|
|
my $string= "<$elt\\b"; |
|
9606
|
|
|
|
|
|
|
foreach my $key (sort keys %atts) |
|
9607
|
|
|
|
|
|
|
{ my $val= qq{\Q$atts{$key}\E}; |
|
9608
|
|
|
|
|
|
|
$string.= qq{[^>]*$key=(?:"$val"|'$val')}; |
|
9609
|
|
|
|
|
|
|
} |
|
9610
|
|
|
|
|
|
|
$string.= qq{[^>]*>}; |
|
9611
|
|
|
|
|
|
|
return "(?:$string)"; |
|
9612
|
|
|
|
|
|
|
} |
|
9613
|
|
|
|
|
|
|
|
|
9614
|
|
|
|
|
|
|
sub field_to_att |
|
9615
|
|
|
|
|
|
|
{ my( $elt, $cond, $att)= @_; |
|
9616
|
|
|
|
|
|
|
$att ||= $cond; |
|
9617
|
|
|
|
|
|
|
my $child= $elt->first_child( $cond) or return undef; |
|
9618
|
|
|
|
|
|
|
$elt->set_att( $att => $child->text); |
|
9619
|
|
|
|
|
|
|
$child->cut; |
|
9620
|
|
|
|
|
|
|
return $elt; |
|
9621
|
|
|
|
|
|
|
} |
|
9622
|
|
|
|
|
|
|
|
|
9623
|
|
|
|
|
|
|
sub att_to_field |
|
9624
|
|
|
|
|
|
|
{ my( $elt, $att, $tag)= @_; |
|
9625
|
|
|
|
|
|
|
$tag ||= $att; |
|
9626
|
|
|
|
|
|
|
my $child= $elt->insert_new_elt( first_child => $tag, $elt->{'att'}->{$att}); |
|
9627
|
|
|
|
|
|
|
$elt->del_att( $att); |
|
9628
|
|
|
|
|
|
|
return $elt; |
|
9629
|
|
|
|
|
|
|
} |
|
9630
|
|
|
|
|
|
|
|
|
9631
|
|
|
|
|
|
|
# sort children methods |
|
9632
|
|
|
|
|
|
|
|
|
9633
|
|
|
|
|
|
|
sub sort_children_on_field |
|
9634
|
|
|
|
|
|
|
{ my $elt = shift; |
|
9635
|
|
|
|
|
|
|
my $field = shift; |
|
9636
|
|
|
|
|
|
|
my $get_key= sub { return $_[0]->field( $field) }; |
|
9637
|
|
|
|
|
|
|
return $elt->sort_children( $get_key, @_); |
|
9638
|
|
|
|
|
|
|
} |
|
9639
|
|
|
|
|
|
|
|
|
9640
|
|
|
|
|
|
|
sub sort_children_on_att |
|
9641
|
|
|
|
|
|
|
{ my $elt = shift; |
|
9642
|
|
|
|
|
|
|
my $att = shift; |
|
9643
|
|
|
|
|
|
|
my $get_key= sub { return $_[0]->{'att'}->{$att} }; |
|
9644
|
|
|
|
|
|
|
return $elt->sort_children( $get_key, @_); |
|
9645
|
|
|
|
|
|
|
} |
|
9646
|
|
|
|
|
|
|
|
|
9647
|
|
|
|
|
|
|
sub sort_children_on_value |
|
9648
|
|
|
|
|
|
|
{ my $elt = shift; |
|
9649
|
|
|
|
|
|
|
#my $get_key= eval qq{ sub { $NO_WARNINGS; return \$_[0]->text } }; |
|
9650
|
|
|
|
|
|
|
my $get_key= \&text; |
|
9651
|
|
|
|
|
|
|
return $elt->sort_children( $get_key, @_); |
|
9652
|
|
|
|
|
|
|
} |
|
9653
|
|
|
|
|
|
|
|
|
9654
|
|
|
|
|
|
|
sub sort_children |
|
9655
|
|
|
|
|
|
|
{ my( $elt, $get_key, %opt)=@_; |
|
9656
|
|
|
|
|
|
|
$opt{order} ||= 'normal'; |
|
9657
|
|
|
|
|
|
|
$opt{type} ||= 'alpha'; |
|
9658
|
|
|
|
|
|
|
my( $par_a, $par_b)= ($opt{order} eq 'reverse') ? qw( b a) : qw ( a b) ; |
|
9659
|
|
|
|
|
|
|
my $op= ($opt{type} eq 'numeric') ? '<=>' : 'cmp' ; |
|
9660
|
|
|
|
|
|
|
my @children= $elt->cut_children; |
|
9661
|
|
|
|
|
|
|
if( $opt{type} eq 'numeric') |
|
9662
|
|
|
|
|
|
|
{ @children= map { $_->[1] } |
|
9663
|
|
|
|
|
|
|
sort { $a->[0] <=> $b->[0] } |
|
9664
|
|
|
|
|
|
|
map { [ $get_key->( $_), $_] } @children; |
|
9665
|
|
|
|
|
|
|
} |
|
9666
|
|
|
|
|
|
|
elsif( $opt{type} eq 'alpha') |
|
9667
|
|
|
|
|
|
|
{ @children= map { $_->[1] } |
|
9668
|
|
|
|
|
|
|
sort { $a->[0] cmp $b->[0] } |
|
9669
|
|
|
|
|
|
|
map { [ $get_key->( $_), $_] } @children; |
|
9670
|
|
|
|
|
|
|
} |
|
9671
|
|
|
|
|
|
|
else |
|
9672
|
|
|
|
|
|
|
{ croak "wrong sort type '$opt{type}', should be either 'alpha' or 'numeric'"; } |
|
9673
|
|
|
|
|
|
|
|
|
9674
|
|
|
|
|
|
|
@children= reverse @children if( $opt{order} eq 'reverse'); |
|
9675
|
|
|
|
|
|
|
$elt->set_content( @children); |
|
9676
|
|
|
|
|
|
|
} |
|
9677
|
|
|
|
|
|
|
|
|
9678
|
|
|
|
|
|
|
|
|
9679
|
|
|
|
|
|
|
# comparison methods |
|
9680
|
|
|
|
|
|
|
|
|
9681
|
|
|
|
|
|
|
sub before |
|
9682
|
|
|
|
|
|
|
{ my( $a, $b)=@_; |
|
9683
|
|
|
|
|
|
|
if( $a->cmp( $b) == -1) { return 1; } else { return 0; } |
|
9684
|
|
|
|
|
|
|
} |
|
9685
|
|
|
|
|
|
|
|
|
9686
|
|
|
|
|
|
|
sub after |
|
9687
|
|
|
|
|
|
|
{ my( $a, $b)=@_; |
|
9688
|
|
|
|
|
|
|
if( $a->cmp( $b) == 1) { return 1; } else { return 0; } |
|
9689
|
|
|
|
|
|
|
} |
|
9690
|
|
|
|
|
|
|
|
|
9691
|
|
|
|
|
|
|
sub lt |
|
9692
|
|
|
|
|
|
|
{ my( $a, $b)=@_; |
|
9693
|
|
|
|
|
|
|
return 1 if( $a->cmp( $b) == -1); |
|
9694
|
|
|
|
|
|
|
return 0; |
|
9695
|
|
|
|
|
|
|
} |
|
9696
|
|
|
|
|
|
|
|
|
9697
|
|
|
|
|
|
|
sub le |
|
9698
|
|
|
|
|
|
|
{ my( $a, $b)=@_; |
|
9699
|
|
|
|
|
|
|
return 1 unless( $a->cmp( $b) == 1); |
|
9700
|
|
|
|
|
|
|
return 0; |
|
9701
|
|
|
|
|
|
|
} |
|
9702
|
|
|
|
|
|
|
|
|
9703
|
|
|
|
|
|
|
sub gt |
|
9704
|
|
|
|
|
|
|
{ my( $a, $b)=@_; |
|
9705
|
|
|
|
|
|
|
return 1 if( $a->cmp( $b) == 1); |
|
9706
|
|
|
|
|
|
|
return 0; |
|
9707
|
|
|
|
|
|
|
} |
|
9708
|
|
|
|
|
|
|
|
|
9709
|
|
|
|
|
|
|
sub ge |
|
9710
|
|
|
|
|
|
|
{ my( $a, $b)=@_; |
|
9711
|
|
|
|
|
|
|
return 1 unless( $a->cmp( $b) == -1); |
|
9712
|
|
|
|
|
|
|
return 0; |
|
9713
|
|
|
|
|
|
|
} |
|
9714
|
|
|
|
|
|
|
|
|
9715
|
|
|
|
|
|
|
|
|
9716
|
|
|
|
|
|
|
sub cmp |
|
9717
|
|
|
|
|
|
|
{ my( $a, $b)=@_; |
|
9718
|
|
|
|
|
|
|
|
|
9719
|
|
|
|
|
|
|
# easy cases |
|
9720
|
|
|
|
|
|
|
return 0 if( $a == $b); |
|
9721
|
|
|
|
|
|
|
return 1 if( $a->in($b)); # a in b => a starts after b |
|
9722
|
|
|
|
|
|
|
return -1 if( $b->in($a)); # b in a => a starts before b |
|
9723
|
|
|
|
|
|
|
|
|
9724
|
|
|
|
|
|
|
# ancestors does not include the element itself |
|
9725
|
|
|
|
|
|
|
my @a_pile= ($a, $a->ancestors); |
|
9726
|
|
|
|
|
|
|
my @b_pile= ($b, $b->ancestors); |
|
9727
|
|
|
|
|
|
|
|
|
9728
|
|
|
|
|
|
|
# the 2 elements are not in the same twig |
|
9729
|
|
|
|
|
|
|
return undef unless( $a_pile[-1] == $b_pile[-1]); |
|
9730
|
|
|
|
|
|
|
|
|
9731
|
|
|
|
|
|
|
# find the first non common ancestors (they are siblings) |
|
9732
|
|
|
|
|
|
|
my $a_anc= pop @a_pile; |
|
9733
|
|
|
|
|
|
|
my $b_anc= pop @b_pile; |
|
9734
|
|
|
|
|
|
|
|
|
9735
|
|
|
|
|
|
|
while( $a_anc == $b_anc) |
|
9736
|
|
|
|
|
|
|
{ $a_anc= pop @a_pile; |
|
9737
|
|
|
|
|
|
|
$b_anc= pop @b_pile; |
|
9738
|
|
|
|
|
|
|
} |
|
9739
|
|
|
|
|
|
|
|
|
9740
|
|
|
|
|
|
|
# from there move left and right and figure out the order |
|
9741
|
|
|
|
|
|
|
my( $a_prev, $a_next, $b_prev, $b_next)= ($a_anc, $a_anc, $b_anc, $b_anc); |
|
9742
|
|
|
|
|
|
|
while() |
|
9743
|
|
|
|
|
|
|
{ $a_prev= $a_prev->{prev_sibling} || return( -1); |
|
9744
|
|
|
|
|
|
|
return 1 if( $a_prev == $b_next); |
|
9745
|
|
|
|
|
|
|
$a_next= $a_next->{next_sibling} || return( 1); |
|
9746
|
|
|
|
|
|
|
return -1 if( $a_next == $b_prev); |
|
9747
|
|
|
|
|
|
|
$b_prev= $b_prev->{prev_sibling} || return( 1); |
|
9748
|
|
|
|
|
|
|
return -1 if( $b_prev == $a_next); |
|
9749
|
|
|
|
|
|
|
$b_next= $b_next->{next_sibling} || return( -1); |
|
9750
|
|
|
|
|
|
|
return 1 if( $b_next == $a_prev); |
|
9751
|
|
|
|
|
|
|
} |
|
9752
|
|
|
|
|
|
|
} |
|
9753
|
|
|
|
|
|
|
|
|
9754
|
|
|
|
|
|
|
sub _dump |
|
9755
|
|
|
|
|
|
|
{ my( $elt, $option)= @_; |
|
9756
|
|
|
|
|
|
|
|
|
9757
|
|
|
|
|
|
|
my $atts = defined $option->{atts} ? $option->{atts} : 1; |
|
9758
|
|
|
|
|
|
|
my $extra = defined $option->{extra} ? $option->{extra} : 0; |
|
9759
|
|
|
|
|
|
|
my $short_text = defined $option->{short_text} ? $option->{short_text} : 40; |
|
9760
|
|
|
|
|
|
|
|
|
9761
|
|
|
|
|
|
|
my $sp= '| '; |
|
9762
|
|
|
|
|
|
|
my $indent= $sp x $elt->level; |
|
9763
|
|
|
|
|
|
|
my $indent_sp= ' ' x $elt->level; |
|
9764
|
|
|
|
|
|
|
|
|
9765
|
|
|
|
|
|
|
my $dump=''; |
|
9766
|
|
|
|
|
|
|
if( $elt->is_elt) |
|
9767
|
|
|
|
|
|
|
{ |
|
9768
|
|
|
|
|
|
|
$dump .= $indent . '|-' . $XML::Twig::index2gi[$elt->{'gi'}]; |
|
9769
|
|
|
|
|
|
|
|
|
9770
|
|
|
|
|
|
|
if( $atts && (my @atts= $elt->att_names) ) |
|
9771
|
|
|
|
|
|
|
{ $dump .= ' ' . join( ' ', map { qq{$_="} . $elt->{'att'}->{$_} . qq{"} } @atts); } |
|
9772
|
|
|
|
|
|
|
|
|
9773
|
|
|
|
|
|
|
$dump .= "\n"; |
|
9774
|
|
|
|
|
|
|
if( $extra) { $dump .= $elt->_dump_extra_data( $indent, $indent_sp, $short_text); } |
|
9775
|
|
|
|
|
|
|
$dump .= join( "", map { $_->_dump( $option) } do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }); |
|
9776
|
|
|
|
|
|
|
} |
|
9777
|
|
|
|
|
|
|
else |
|
9778
|
|
|
|
|
|
|
{ |
|
9779
|
|
|
|
|
|
|
if( (exists $elt->{'pcdata'})) |
|
9780
|
|
|
|
|
|
|
{ $dump .= "$indent|-PCDATA: '" . _short_text( $elt->{pcdata}, $short_text) . "'\n" } |
|
9781
|
|
|
|
|
|
|
elsif( (exists $elt->{'ent'})) |
|
9782
|
|
|
|
|
|
|
{ $dump .= "$indent|-ENTITY: '" . _short_text( $elt->{ent}, $short_text) . "'\n" } |
|
9783
|
|
|
|
|
|
|
elsif( (exists $elt->{'cdata'})) |
|
9784
|
|
|
|
|
|
|
{ $dump .= "$indent|-CDATA: '" . _short_text( $elt->{cdata}, $short_text) . "'\n" } |
|
9785
|
|
|
|
|
|
|
elsif( (exists $elt->{'comment'})) |
|
9786
|
|
|
|
|
|
|
{ $dump .= "$indent|-COMMENT: '" . _short_text( $elt->comment_string, $short_text) . "'\n" } |
|
9787
|
|
|
|
|
|
|
elsif( (exists $elt->{'target'})) |
|
9788
|
|
|
|
|
|
|
{ $dump .= "$indent|-PI: '" . $elt->{target} . "' - '" . _short_text( $elt->{data}, $short_text) . "'\n" } |
|
9789
|
|
|
|
|
|
|
if( $extra) { $dump .= $elt->_dump_extra_data( $indent, $indent_sp, $short_text); } |
|
9790
|
|
|
|
|
|
|
} |
|
9791
|
|
|
|
|
|
|
return $dump; |
|
9792
|
|
|
|
|
|
|
} |
|
9793
|
|
|
|
|
|
|
|
|
9794
|
|
|
|
|
|
|
sub _dump_extra_data |
|
9795
|
|
|
|
|
|
|
{ my( $elt, $indent, $indent_sp, $short_text)= @_; |
|
9796
|
|
|
|
|
|
|
my $dump=''; |
|
9797
|
|
|
|
|
|
|
if( $elt->extra_data) |
|
9798
|
|
|
|
|
|
|
{ my $extra_data = $indent . "|-- (cpi before) '" . _short_text( $elt->extra_data, $short_text) . "'"; |
|
9799
|
|
|
|
|
|
|
$extra_data=~ s{\n}{$indent_sp}g; |
|
9800
|
|
|
|
|
|
|
$dump .= $extra_data . "\n"; |
|
9801
|
|
|
|
|
|
|
} |
|
9802
|
|
|
|
|
|
|
if( $elt->{extra_data_in_pcdata}) |
|
9803
|
|
|
|
|
|
|
{ foreach my $data ( @{$elt->{extra_data_in_pcdata}}) |
|
9804
|
|
|
|
|
|
|
{ my $extra_data = $indent . "|-- (cpi offset $data->{offset}) '" . _short_text( $data->{text}, $short_text) . "'"; |
|
9805
|
|
|
|
|
|
|
$extra_data=~ s{\n}{$indent_sp}g; |
|
9806
|
|
|
|
|
|
|
$dump .= $extra_data . "\n"; |
|
9807
|
|
|
|
|
|
|
} |
|
9808
|
|
|
|
|
|
|
} |
|
9809
|
|
|
|
|
|
|
if( $elt->{extra_data_before_end_tag}) |
|
9810
|
|
|
|
|
|
|
{ my $extra_data = $indent . "|-- (cpi end) '" . _short_text( $elt->{extra_data_before_end_tag}, $short_text) . "'"; |
|
9811
|
|
|
|
|
|
|
$extra_data=~ s{\n}{$indent_sp}g; |
|
9812
|
|
|
|
|
|
|
$dump .= $extra_data . "\n"; |
|
9813
|
|
|
|
|
|
|
} |
|
9814
|
|
|
|
|
|
|
return $dump; |
|
9815
|
|
|
|
|
|
|
} |
|
9816
|
|
|
|
|
|
|
|
|
9817
|
|
|
|
|
|
|
|
|
9818
|
|
|
|
|
|
|
sub _short_text |
|
9819
|
|
|
|
|
|
|
{ my( $string, $length)= @_; |
|
9820
|
|
|
|
|
|
|
if( !$length || (length( $string) < $length) ) { return $string; } |
|
9821
|
|
|
|
|
|
|
my $l1= (length( $string) -5) /2; |
|
9822
|
|
|
|
|
|
|
my $l2= length( $string) - ($l1 + 5); |
|
9823
|
|
|
|
|
|
|
return substr( $string, 0, $l1) . ' ... ' . substr( $string, -$l2); |
|
9824
|
|
|
|
|
|
|
} |
|
9825
|
|
|
|
|
|
|
|
|
9826
|
|
|
|
|
|
|
|
|
9827
|
|
|
|
|
|
|
sub _and { return _join_defined( ' && ', @_); } |
|
9828
|
|
|
|
|
|
|
sub _join_defined { return join( shift(), grep { $_ } @_); } |
|
9829
|
|
|
|
|
|
|
|
|
9830
|
|
|
|
|
|
|
1; |
|
9831
|
|
|
|
|
|
|
__END__ |