File Coverage

lib/XML/DOM/Lite/Parser.pm
Criterion Covered Total %
statement 110 137 80.2
branch 36 52 69.2
condition n/a
subroutine 15 17 88.2
pod 0 3 0.0
total 161 209 77.0


line stmt bran cond sub pod time code
1             package XML::DOM::Lite::Parser;
2              
3 8     8   46211 use XML::DOM::Lite::Document;
  8         20  
  8         222  
4 8     8   52 use XML::DOM::Lite::Node;
  8         42  
  8         214  
5 8     8   80 use XML::DOM::Lite::Constants qw(:all);
  8         14  
  8         24782  
6              
7             #========================================================================
8             # These regular expressions have been gratefully borrowed from:
9             #
10             # REX/Perl 1.0
11             # Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions",
12             # Technical Report TR 1998-17, School of Computing Science, Simon Fraser
13             # University, November, 1998.
14             # Copyright (c) 1998, Robert D. Cameron.
15             # The following code may be freely used and distributed provided that
16             # this copyright and citation notice remains intact and that modifications
17             # or additions are clearly identified.
18              
19             our $TextSE = "[^<]+";
20             our $UntilHyphen = "[^-]*-";
21             our $Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-";
22             our $CommentCE = "$Until2Hyphens>?";
23             our $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+";
24             our $CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>";
25             our $S = "[ \\n\\t\\r]+";
26             our $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]";
27             our $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]";
28             our $Name = "(?:$NameStrt)(?:$NameChar)*";
29             our $QuoteSE = "\"[^\"]*\"|'[^']*'";
30             our $DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*";
31             our $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>";
32             our $S1 = "[\\n\\r\\t ]";
33             our $UntilQMs = "[^?]*\\?+";
34             our $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>";
35             our $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S";
36             our $DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?";
37             our $DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?";
38             our $PI_CE = "$Name(?:$PI_Tail)?";
39             our $EndTagCE = "$Name(?:$S)?>?";
40             our $AttValSE = "\"[^<\"]*\"|'[^<']*'";
41             our $ElemTagCE = "$Name(?:$S$Name(?:$S)?=(?:$S)?(?:$AttValSE))*(?:$S)?/?>?";
42             our $ElementCE = "/(?:$EndTagCE)?|(?:$ElemTagCE)?";
43             our $MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|(?:$ElementCE)?)";
44             our $XML_SPE = "$TextSE|$MarkupSPE";
45              
46             #========================================================================
47              
48             # these have captures for parsing the attributes
49             our $AttValSE2 = "\"([^<\"]*)\"|'([^<']*)'";
50             our $ElemTagCE2 = "(?:($Name)(?:$S)?=(?:$S)?($AttValSE2))+(?:$S)?/?>?";
51              
52             sub new {
53 11     11 0 174 my ($class, %options) = @_;
54 11         70 my $self = bless {
55             stack => [ ],
56             options => \%options,
57             }, $class;
58 11         39 return $self;
59             }
60              
61             sub parse {
62 12     12 0 9170 my ($self, $XML) = (shift, shift);
63 12 50       55 unless (ref($self)) {
64 0         0 $self = __PACKAGE__->new(@_);
65             }
66 12         239 my @nodes = $self->_shallow_parse($XML);
67              
68 12         177 $self->{document} = XML::DOM::Lite::Document->new();
69 12         26 push @{$self->{stack}}, $self->{document};
  12         47  
70              
71 12         33 STEP : foreach my $n ( @nodes ) {
72 103 100       262 substr($n, 0, 1) eq '<' && do {
73 76 100       174 substr($n, 1, 1) eq '!' && do {
74 1         5 $self->_handle_decl_node($n);
75 1         3 next STEP;
76             };
77 75 100       157 substr($n, 1, 1) eq '?' && do {
78 1         6 $self->_handle_pi_node($n);
79 1         4 next STEP;
80             };
81 74         174 $self->_handle_element_node($n);
82 74         158 next STEP;
83             };
84 27         74 $self->_handle_text_node($n);
85             }
86              
87 12         60 return $self->{document};
88             }
89              
90             sub parseFile {
91 0     0 0 0 my ($self, $filename) = @_;
92 0 0       0 unless (ref $self) {
93 0         0 $self = __PACKAGE__->new;
94             }
95 0         0 my $stream;
96             {
97 0 0       0 open FH, $filename or
  0         0  
98             die "can't open file $filename for reading ".$!;
99 0         0 local $/ = undef;
100 0         0 $stream = ;
101 0         0 close FH;
102             }
103 0         0 return $self->parse($stream);
104             }
105              
106             sub _shallow_parse {
107 12     12   33 my ($self, $XML) = @_;
108              
109             # Check the options.
110 12         25 my %options = %{$self->{options}};
  12         71  
111 12 100       60 if (defined($options{'whitespace'})) {
112 11         29 my $mode = $options{'whitespace'};
113 11 100       271 if (index($mode, 'strip') >= 0) {
114 9         458 $XML =~ s/>$S/>/sg;
115 9         3689 $XML =~ s/$S
116             }
117 11 100       62 if (index($mode, 'normalize') >= 0) {
118 2         67 $XML =~ s/$S/ /sg
119             }
120             }
121              
122 12         3909 return $XML =~ /$XML_SPE/go;
123             }
124              
125             sub _handle_decl_node {
126 1     1   3 my ($self, $decl) = @_;
127 1         2 my $kind;
128 1         3 my $length = length($decl);
129 1         2 my $start = 1;
130 1         2 $parent = $self->{stack}->[$#{$self->{stack}}];
  1         3  
131 1 50       5 substr($decl, 0, 4) eq '