line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package XML::Records;
|
2
|
1
|
|
|
1
|
|
645
|
use strict;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
34
|
|
3
|
1
|
|
|
1
|
|
5
|
use vars qw($VERSION);
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
58
|
|
4
|
|
|
|
|
|
|
$VERSION = '0.12';
|
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
5
|
use base 'XML::TokeParser';
|
|
1
|
|
|
|
|
11
|
|
|
1
|
|
|
|
|
1791
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
sub new {
|
9
|
|
|
|
|
|
|
my $class=shift;
|
10
|
|
|
|
|
|
|
$class=ref $class || $class;
|
11
|
|
|
|
|
|
|
my $self=$class->SUPER::new(@_);
|
12
|
|
|
|
|
|
|
$self->{rectypes}=[{}];
|
13
|
|
|
|
|
|
|
bless $self,$class;
|
14
|
|
|
|
|
|
|
}
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
sub set_records {
|
17
|
|
|
|
|
|
|
my $self=shift;
|
18
|
|
|
|
|
|
|
$self->{rectypes}[-1]={map {$_=>1} @_};
|
19
|
|
|
|
|
|
|
}
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
sub get_record {
|
22
|
|
|
|
|
|
|
my $self=shift;
|
23
|
|
|
|
|
|
|
my ($rec,$rectype);
|
24
|
|
|
|
|
|
|
if ($self->skip_to(@_)) {
|
25
|
|
|
|
|
|
|
my $token=$self->get_token();
|
26
|
|
|
|
|
|
|
$rectype=$token->[1];
|
27
|
|
|
|
|
|
|
my $t=$self->{noempty};
|
28
|
|
|
|
|
|
|
$self->{noempty}=1;
|
29
|
|
|
|
|
|
|
$rec=$self->get_hash($token);
|
30
|
|
|
|
|
|
|
$self->{noempty}=$t;
|
31
|
|
|
|
|
|
|
}
|
32
|
|
|
|
|
|
|
($rectype,$rec);
|
33
|
|
|
|
|
|
|
}
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
sub get_hash {
|
36
|
|
|
|
|
|
|
my ($self,$token)=@_;
|
37
|
|
|
|
|
|
|
my ($field,$buf,$field_token);
|
38
|
|
|
|
|
|
|
my $rectype=$token->[1];
|
39
|
|
|
|
|
|
|
my $nest=0;
|
40
|
|
|
|
|
|
|
my $h={};
|
41
|
|
|
|
|
|
|
# treat attributes of record or subrecord as fields
|
42
|
|
|
|
|
|
|
foreach (keys %{$token->[2]}) {
|
43
|
|
|
|
|
|
|
$h->{$_}=$token->[2]{$_};
|
44
|
|
|
|
|
|
|
}
|
45
|
|
|
|
|
|
|
while ($token=$self->get_token()) {
|
46
|
|
|
|
|
|
|
my $t=$token->[0];
|
47
|
|
|
|
|
|
|
if ($t eq 'S') {
|
48
|
|
|
|
|
|
|
if ($self->{rectypes}[-1]{"-$token->[1]"}) { # record ended by start
|
49
|
|
|
|
|
|
|
$self->unget_token($token);
|
50
|
|
|
|
|
|
|
last;
|
51
|
|
|
|
|
|
|
}
|
52
|
|
|
|
|
|
|
if ($nest++) { # start tag inside field, get subrecord
|
53
|
|
|
|
|
|
|
$self->unget_token($token);
|
54
|
|
|
|
|
|
|
add_hash($h,$field,$self->get_hash($field_token));
|
55
|
|
|
|
|
|
|
$nest-=2; # we won't see sub-field's or field's end tag
|
56
|
|
|
|
|
|
|
}
|
57
|
|
|
|
|
|
|
else {
|
58
|
|
|
|
|
|
|
$buf="";
|
59
|
|
|
|
|
|
|
$field=$token->[1];
|
60
|
|
|
|
|
|
|
$field_token=$token;
|
61
|
|
|
|
|
|
|
}
|
62
|
|
|
|
|
|
|
}
|
63
|
|
|
|
|
|
|
elsif ($t eq 'T') {
|
64
|
|
|
|
|
|
|
$buf=$token->[1] unless $token->[1] =~ /^\s*$/;
|
65
|
|
|
|
|
|
|
}
|
66
|
|
|
|
|
|
|
elsif ($t eq 'E') {
|
67
|
|
|
|
|
|
|
last if $token->[1] eq $rectype;
|
68
|
|
|
|
|
|
|
add_hash($h,$field,$buf);
|
69
|
|
|
|
|
|
|
if (--$nest==0 && keys %{$field_token->[2]}) {
|
70
|
|
|
|
|
|
|
add_hash($h,$field,{%{$field_token->[2]}});
|
71
|
|
|
|
|
|
|
}
|
72
|
|
|
|
|
|
|
}
|
73
|
|
|
|
|
|
|
}
|
74
|
|
|
|
|
|
|
$h;
|
75
|
|
|
|
|
|
|
}
|
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub add_hash {
|
78
|
|
|
|
|
|
|
my ($h,$field,$val)=@_;
|
79
|
|
|
|
|
|
|
if (defined $h->{$field}) { # duplicate fields become arrays
|
80
|
|
|
|
|
|
|
my $t=$h->{$field};
|
81
|
|
|
|
|
|
|
$t=[$t] unless ref $t eq 'ARRAY';
|
82
|
|
|
|
|
|
|
push @$t,$val;
|
83
|
|
|
|
|
|
|
$val=$t;
|
84
|
|
|
|
|
|
|
}
|
85
|
|
|
|
|
|
|
$h->{$field}=$val;
|
86
|
|
|
|
|
|
|
}
|
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
sub get_simple_tree {
|
89
|
|
|
|
|
|
|
my $self=shift;
|
90
|
|
|
|
|
|
|
return undef unless ($self->skip_to(@_));
|
91
|
|
|
|
|
|
|
my $lists=[];
|
92
|
|
|
|
|
|
|
my $tree=[];
|
93
|
|
|
|
|
|
|
my $curlist=$tree;
|
94
|
|
|
|
|
|
|
my $ecount=0;
|
95
|
|
|
|
|
|
|
while (my $token=$self->get_token()) {
|
96
|
|
|
|
|
|
|
my $type=$token->[0];
|
97
|
|
|
|
|
|
|
if ($type eq 'S') {
|
98
|
|
|
|
|
|
|
my $newlist=[];
|
99
|
|
|
|
|
|
|
my $newnode={type=>'e',attrib=>$token->[2],name=>$token->[1],content=>$newlist};
|
100
|
|
|
|
|
|
|
push @$lists, $curlist;
|
101
|
|
|
|
|
|
|
push @$curlist,$newnode;
|
102
|
|
|
|
|
|
|
$curlist=$newlist;
|
103
|
|
|
|
|
|
|
++$ecount;
|
104
|
|
|
|
|
|
|
}
|
105
|
|
|
|
|
|
|
elsif ($type eq 'E') {
|
106
|
|
|
|
|
|
|
$curlist=pop @$lists;
|
107
|
|
|
|
|
|
|
last if --$ecount==0;
|
108
|
|
|
|
|
|
|
}
|
109
|
|
|
|
|
|
|
elsif ($type eq 'T') {
|
110
|
|
|
|
|
|
|
push @$curlist,{type=>'t',content=>$token->[1]};
|
111
|
|
|
|
|
|
|
}
|
112
|
|
|
|
|
|
|
elsif ($type eq 'PI') {
|
113
|
|
|
|
|
|
|
push @$curlist,{type=>'p',target=>$token->[1],content=>$token->[2]};
|
114
|
|
|
|
|
|
|
}
|
115
|
|
|
|
|
|
|
}
|
116
|
|
|
|
|
|
|
$tree->[0];
|
117
|
|
|
|
|
|
|
}
|
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub drive_SAX {
|
120
|
|
|
|
|
|
|
my $self=shift;
|
121
|
|
|
|
|
|
|
my $handler=shift;
|
122
|
|
|
|
|
|
|
my $wrap=1;
|
123
|
|
|
|
|
|
|
if (@_ && ref($_[0]) eq 'HASH' && defined $_[0]->{wrap}) {
|
124
|
|
|
|
|
|
|
$wrap=$_[0]->{wrap};
|
125
|
|
|
|
|
|
|
}
|
126
|
|
|
|
|
|
|
return undef unless ($self->skip_to(@_));
|
127
|
|
|
|
|
|
|
my $ecount=0;
|
128
|
|
|
|
|
|
|
$handler->start_document({}) if $wrap;
|
129
|
|
|
|
|
|
|
while (my $token=$self->get_token()) {
|
130
|
|
|
|
|
|
|
my $type=$token->[0];
|
131
|
|
|
|
|
|
|
if ($type eq 'S') {
|
132
|
|
|
|
|
|
|
$handler->start_element({Attributes=>$token->[2],Name=>$token->[1]});
|
133
|
|
|
|
|
|
|
++$ecount;
|
134
|
|
|
|
|
|
|
}
|
135
|
|
|
|
|
|
|
elsif ($type eq 'E') {
|
136
|
|
|
|
|
|
|
$handler->end_element({Name=>$token->[1]});
|
137
|
|
|
|
|
|
|
last if --$ecount==0;
|
138
|
|
|
|
|
|
|
}
|
139
|
|
|
|
|
|
|
elsif ($type eq 'T') {
|
140
|
|
|
|
|
|
|
$handler->characters({Data=>$token->[1]});
|
141
|
|
|
|
|
|
|
}
|
142
|
|
|
|
|
|
|
elsif ($type eq 'PI') {
|
143
|
|
|
|
|
|
|
$handler->processing_instruction({Target=>$token->[1],Data=>$token->[2]});
|
144
|
|
|
|
|
|
|
}
|
145
|
|
|
|
|
|
|
}
|
146
|
|
|
|
|
|
|
$wrap? $handler->end_document({}): 1;
|
147
|
|
|
|
|
|
|
}
|
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub skip_to {
|
150
|
|
|
|
|
|
|
my $self=shift;
|
151
|
|
|
|
|
|
|
my $here=0;
|
152
|
|
|
|
|
|
|
if (@_ && ref($_[0]) eq 'HASH') {
|
153
|
|
|
|
|
|
|
my $opts=shift;
|
154
|
|
|
|
|
|
|
$here ||= $opts->{here};
|
155
|
|
|
|
|
|
|
}
|
156
|
|
|
|
|
|
|
my $token;
|
157
|
|
|
|
|
|
|
push @{$self->{rectypes}},{%{$self->{rectypes}[-1]}};
|
158
|
|
|
|
|
|
|
$self->set_records(@_) if @_;
|
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
if ($here) { # next non-comment token must be start of record
|
161
|
|
|
|
|
|
|
my $found=0;
|
162
|
|
|
|
|
|
|
while (($token=$self->get_token()) && $token->[0] eq 'C') {
|
163
|
|
|
|
|
|
|
;
|
164
|
|
|
|
|
|
|
}
|
165
|
|
|
|
|
|
|
$found=($token && $token->[0] eq 'S'
|
166
|
|
|
|
|
|
|
&& (!keys(%{$self->{rectypes}[-1]})
|
167
|
|
|
|
|
|
|
|| $self->{rectypes}[-1]{$token->[1]})
|
168
|
|
|
|
|
|
|
);
|
169
|
|
|
|
|
|
|
$self->unget_token($token) if $token;
|
170
|
|
|
|
|
|
|
$token=$found;
|
171
|
|
|
|
|
|
|
}
|
172
|
|
|
|
|
|
|
else { # skip to start of record
|
173
|
|
|
|
|
|
|
while ($token=$self->get_token()) {
|
174
|
|
|
|
|
|
|
next unless $token->[0] eq 'S';
|
175
|
|
|
|
|
|
|
next unless !keys(%{$self->{rectypes}[-1]}) || $self->{rectypes}[-1]{$token->[1]};
|
176
|
|
|
|
|
|
|
$self->unget_token($token);
|
177
|
|
|
|
|
|
|
last;
|
178
|
|
|
|
|
|
|
}
|
179
|
|
|
|
|
|
|
}
|
180
|
|
|
|
|
|
|
pop @{$self->{rectypes}};
|
181
|
|
|
|
|
|
|
$token;
|
182
|
|
|
|
|
|
|
}
|
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
1;
|
185
|
|
|
|
|
|
|
__END__
|