|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Catmandu::Importer::HOCR;  | 
| 
2
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
116653
 | 
 use Catmandu::Sane;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
214745
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
3
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
329
 | 
 use Moo;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
4
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
1002
 | 
 use XML::LibXML::Reader;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52369
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
114
 | 
    | 
| 
5
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
10
 | 
 use Catmandu::Error;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
    | 
| 
6
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
5
 | 
 use namespace::clean;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 with "Catmandu::Importer";  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 has reader => ( is => "lazy", init_arg => undef );  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 has current_page => (  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     is => "rw",  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     default => sub { undef; }  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 );  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _build_reader {  | 
| 
18
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
18
 | 
     my $self = $_[0];  | 
| 
19
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
     binmode $self->fh;  | 
| 
20
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
527
 | 
     my $reader = XML::LibXML::Reader->new(  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         IO => $self->fh,  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         validation => 0,  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         load_ext_dtd => 0  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ) or Catmandu::Error->throw("unable to read file");  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
26
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1710
 | 
     $reader;  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _parse_ocr_attr {  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
31
 | 
10
 | 
 
 | 
 
 | 
  
10
  
 | 
 
 | 
19
 | 
     my ( $key, $value ) = @_;  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
33
 | 
10
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
23
 | 
     if ( $key eq "bbox" ) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
35
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
         my @coords = map { int($_) } split( " ", $value );  | 
| 
 
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
    | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         return +{  | 
| 
37
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
             x1 => $coords[0], y1 => $coords[1],  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             x2 => $coords[2], y2 => $coords[3]  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         };  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif ( $key eq "x_wconf" ) {  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
44
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         return int($value);  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
48
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     $value;  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _parse_ocr_attrs {  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
54
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
8
 | 
     my $title = $_[0];  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     map {  | 
| 
57
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
         $_ =~ s/^\s+//o;  | 
| 
 
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
    | 
| 
58
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
         $_ =~ s/\s+$//o;  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
60
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
         my $idx = index( $_, " " );  | 
| 
61
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
         my $key;  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $val;  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
64
 | 
10
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
19
 | 
         if( $idx >= 0 ) {  | 
| 
65
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
             $key = substr( $_, 0, $idx );  | 
| 
66
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
             $val = substr( $_, $idx + 1 );  | 
| 
67
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
             $val = _parse_ocr_attr( $key, $val );  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
70
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $key = $_;  | 
| 
71
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $val = undef;  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
74
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
45
 | 
         $key => $val;  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } split( ";", $title );  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _parse_coords {  | 
| 
80
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
10
 | 
     my $title = $_[0];  | 
| 
81
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     my %attrs = _parse_ocr_attrs( $title );  | 
| 
82
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     my $bbox = $attrs{bbox};  | 
| 
83
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     my $x = $bbox->{x1};  | 
| 
84
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     my $y = $bbox->{y1};  | 
| 
85
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     my $w = $bbox->{x2} - $bbox->{x1};  | 
| 
86
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     my $h = $bbox->{y2} - $bbox->{y1};  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     +{  | 
| 
89
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
         x => $x,  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         y => $y,  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         w => $w,  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         h => $h  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _read {  | 
| 
97
 | 
89
 | 
  
 50
  
 | 
 
 | 
  
89
  
 | 
 
 | 
1570
 | 
     my $state = $_[0]->reader->read() or return;  | 
| 
98
 | 
89
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1187
 | 
     $state < 0 && Catmandu::Error->throw("error occurred during parsing of file");  | 
| 
99
 | 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
215
 | 
     $state;  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _next {  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
104
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
7
 | 
     my $self = $_[0];  | 
| 
105
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     state $start_element = XML_READER_TYPE_ELEMENT;  | 
| 
106
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     state $end_element   = XML_READER_TYPE_END_ELEMENT;  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
108
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
60
 | 
     my $reader = $self->reader();  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
110
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
     my $line;  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
112
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     while( $self->_read ){  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
114
 | 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
243
 | 
         my $name = $reader->name();  | 
| 
115
 | 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
204
 | 
         my $nodeType = $reader->nodeType();  | 
| 
116
 | 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
208
 | 
         my $class = $reader->getAttribute("class");  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
118
 | 
89
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
712
 | 
         if ( $nodeType == $start_element && defined($class) && $class eq "ocr_page" ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
120
 | 
2
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
13
 | 
             my $old_page = $self->current_page // +{ no => 0 };  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
122
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
             my $title = $reader->getAttribute("title");  | 
| 
123
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
             my $coords = _parse_coords( $title );  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $self->current_page({  | 
| 
126
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
                 no => $old_page->{no} + 1,  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 %$coords  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             });  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
130
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
             next;  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ( $nodeType == $start_element && defined($class) && $class eq "ocr_line" ) {  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
135
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
             my $title = $reader->getAttribute("title");  | 
| 
136
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
             my $coords = _parse_coords( $title );  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
138
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
             my $current_page = $self->current_page();  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $line = +{  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 page => $current_page->{no},  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 page_x => $current_page->{x},  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 page_y => $current_page->{y},  | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 page_w => $current_page->{w},  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 page_h => $current_page->{h},  | 
| 
145
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
                 %$coords,  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 text => []  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             };  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ( $nodeType == $start_element && defined($class) && ($class eq "ocr_word" || $class eq "ocrx_word") ) {  | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # only include text nodes, and read until end of span  | 
| 
153
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
87
 | 
             while( $reader->read() ){  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # text may be enclosed with <strong>  | 
| 
156
 | 
46
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
209
 | 
                 if( $reader->nodeType() == XML_READER_TYPE_TEXT ){  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
157
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
                     push @{$line->{text}}, $reader->value();  | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
97
 | 
    | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 elsif( $reader->nodeType == $end_element && $reader->localName eq "span" ){  | 
| 
160
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
                     last;  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ( defined($line) && $nodeType == $end_element ) {  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
168
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
             if ( $line ) {  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
170
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
                 $line->{text} = join(" ", @{ $line->{text} });  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
    | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
173
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
             last;  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
179
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     $line;  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub generator {  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $self = $_[0];  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sub {  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $self->_next();  | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  |