| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package MARC::Parser::RAW; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = "0.04"; |
|
4
|
|
|
|
|
|
|
|
|
5
|
2
|
|
|
2
|
|
26971
|
use strict; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
53
|
|
|
6
|
2
|
|
|
2
|
|
8
|
use warnings; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
57
|
|
|
7
|
2
|
|
|
2
|
|
1009
|
use charnames qw< :full >; |
|
|
2
|
|
|
|
|
45889
|
|
|
|
2
|
|
|
|
|
9
|
|
|
8
|
2
|
|
|
2
|
|
296
|
use Carp qw(croak carp); |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
113
|
|
|
9
|
2
|
|
|
2
|
|
955
|
use Encode qw(find_encoding); |
|
|
2
|
|
|
|
|
12525
|
|
|
|
2
|
|
|
|
|
119
|
|
|
10
|
2
|
|
|
2
|
|
944
|
use English; |
|
|
2
|
|
|
|
|
5838
|
|
|
|
2
|
|
|
|
|
9
|
|
|
11
|
2
|
|
|
2
|
|
1566
|
use Readonly; |
|
|
2
|
|
|
|
|
5078
|
|
|
|
2
|
|
|
|
|
97
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Readonly my $LEADER_LEN => 24; |
|
14
|
|
|
|
|
|
|
Readonly my $SUBFIELD_INDICATOR => qq{\N{INFORMATION SEPARATOR ONE}}; |
|
15
|
|
|
|
|
|
|
Readonly my $END_OF_FIELD => qq{\N{INFORMATION SEPARATOR TWO}}; |
|
16
|
|
|
|
|
|
|
Readonly my $END_OF_RECORD => qq{\N{INFORMATION SEPARATOR THREE}}; |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 NAME |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
MARC::Parser::RAW - Parser for ISO 2709 encoded MARC records |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=begin markdown |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
[](https://travis-ci.org/jorol/MARC-Parser-RAW) |
|
25
|
|
|
|
|
|
|
[](https://coveralls.io/r/jorol/MARC-Parser-RAW?branch=devel) |
|
26
|
|
|
|
|
|
|
[](http://cpants.cpanauthors.org/dist/MARC-Parser-RAW) |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=end markdown |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
use MARC::Parser::RAW; |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
my $parser = MARC::Parser::RAW->new( $file ); |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
while ( my $record = $parser->next() ) { |
|
37
|
|
|
|
|
|
|
# do something |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
L is a lightweight, fault tolerant parser for ISO 2709 |
|
43
|
|
|
|
|
|
|
encoded MARC records. Tags, indicators and subfield codes are not validated |
|
44
|
|
|
|
|
|
|
against the MARC standard. Record length from leader and field lengths from |
|
45
|
|
|
|
|
|
|
the directory are ignored. Records with a faulty structure will be skipped |
|
46
|
|
|
|
|
|
|
with a warning. The resulting data structure is optimized for usage with the |
|
47
|
|
|
|
|
|
|
L data tool kit. |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
L expects UTF-8 encoded files as input. Otherwise provide |
|
50
|
|
|
|
|
|
|
a filehande with a specified I/O layer or specify encoding. |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head1 MARC |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
The MARC record is parsed into an ARRAY of ARRAYs: |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
$record = [ |
|
57
|
|
|
|
|
|
|
[ 'LDR', undef, undef, '_', '00661nam 22002538a 4500' ], |
|
58
|
|
|
|
|
|
|
[ '001', undef, undef, '_', 'fol05865967 ' ], |
|
59
|
|
|
|
|
|
|
... |
|
60
|
|
|
|
|
|
|
[ '245', '1', '0', 'a', 'Programming Perl /', |
|
61
|
|
|
|
|
|
|
'c', 'Larry Wall, Tom Christiansen & Jon Orwant.' |
|
62
|
|
|
|
|
|
|
], |
|
63
|
|
|
|
|
|
|
... |
|
64
|
|
|
|
|
|
|
]; |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 METHODS |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=head2 new($file|$fh [, $encoding]) |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head3 Configuration |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=over |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=item C |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
Path to file with raw MARC records. |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=item C |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Open filehandle for raw MARC records. |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=item C |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Set encoding. Default: UTF-8. Optional. |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=back |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=cut |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
sub new { |
|
92
|
6
|
|
|
6
|
1
|
2417
|
my ( $class, $file, $encoding ) = @_; |
|
93
|
|
|
|
|
|
|
|
|
94
|
6
|
100
|
|
|
|
157
|
$file or croak "first argument must be a file or filehandle"; |
|
95
|
|
|
|
|
|
|
|
|
96
|
5
|
100
|
|
|
|
9
|
if ($encoding) { |
|
97
|
2
|
100
|
|
|
|
5
|
find_encoding($encoding) or croak "encoding \"$_[0]\" not found"; |
|
98
|
|
|
|
|
|
|
} |
|
99
|
|
|
|
|
|
|
|
|
100
|
4
|
100
|
|
|
|
29
|
my $self = { |
|
101
|
|
|
|
|
|
|
file => undef, |
|
102
|
|
|
|
|
|
|
fh => undef, |
|
103
|
|
|
|
|
|
|
encoding => $encoding ? $encoding : 'UTF-8', |
|
104
|
|
|
|
|
|
|
rec_number => 0, |
|
105
|
|
|
|
|
|
|
}; |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# check for file or filehandle |
|
108
|
4
|
|
|
|
|
4
|
my $ishandle = eval { fileno($file); }; |
|
|
4
|
|
|
|
|
32
|
|
|
109
|
4
|
50
|
33
|
|
|
80
|
if ( !$@ && defined $ishandle ) { |
|
|
|
100
|
|
|
|
|
|
|
110
|
0
|
|
|
|
|
0
|
$self->{file} = scalar $file; |
|
111
|
0
|
|
|
|
|
0
|
$self->{fh} = $file; |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
elsif ( -e $file ) { |
|
114
|
3
|
50
|
|
1
|
|
75
|
open $self->{fh}, "<:encoding($self->{encoding})", $file |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
5
|
|
|
115
|
|
|
|
|
|
|
or croak "cannot read from file $file\n"; |
|
116
|
3
|
|
|
|
|
1040
|
$self->{file} = $file; |
|
117
|
|
|
|
|
|
|
} |
|
118
|
|
|
|
|
|
|
else { |
|
119
|
1
|
|
|
|
|
83
|
croak "file or filehande $file does not exists"; |
|
120
|
|
|
|
|
|
|
} |
|
121
|
3
|
|
|
|
|
13
|
return ( bless $self, $class ); |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=head2 next() |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
Reads the next record from MARC input stream. Returns a Perl hash. |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=cut |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub next { |
|
131
|
6
|
|
|
6
|
1
|
1126
|
my $self = shift; |
|
132
|
6
|
|
|
|
|
15
|
my $fh = $self->{fh}; |
|
133
|
6
|
|
|
|
|
22
|
local $INPUT_RECORD_SEPARATOR = $END_OF_RECORD; |
|
134
|
6
|
50
|
|
|
|
71
|
if ( defined (my $raw = <$fh>) ) { |
|
135
|
6
|
|
|
|
|
101
|
$self->{rec_number}++; |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# remove illegal garbage that sometimes occurs between records |
|
138
|
6
|
|
|
|
|
28
|
$raw |
|
139
|
|
|
|
|
|
|
=~ s/^[\N{SPACE}\N{NUL}\N{LINE FEED}\N{CARRIAGE RETURN}\N{SUB}]+//; |
|
140
|
6
|
50
|
|
|
|
8
|
return unless $raw; |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
|
|
143
|
6
|
100
|
|
|
|
9
|
if ( my $marc = $self->_decode($raw) ) { |
|
144
|
2
|
|
|
|
|
9
|
return $marc; |
|
145
|
|
|
|
|
|
|
} |
|
146
|
|
|
|
|
|
|
else { |
|
147
|
4
|
|
|
|
|
12
|
return $self->next(); |
|
148
|
|
|
|
|
|
|
} |
|
149
|
|
|
|
|
|
|
} |
|
150
|
0
|
|
|
|
|
0
|
return; |
|
151
|
|
|
|
|
|
|
} |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=head2 _decode($record) |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Deserialize a raw MARC record to an ARRAY of ARRAYs. |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=cut |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub _decode { |
|
160
|
6
|
|
|
6
|
|
8
|
my ( $self, $raw ) = @_; |
|
161
|
6
|
|
|
|
|
9
|
chop $raw; |
|
162
|
6
|
|
|
|
|
27
|
my ( $head, @fields ) = split $END_OF_FIELD, $raw; |
|
163
|
|
|
|
|
|
|
|
|
164
|
6
|
100
|
|
|
|
124
|
if ( !@fields ) { |
|
165
|
1
|
|
|
|
|
105
|
carp "no fields found in record " . $self->{rec_number}; |
|
166
|
1
|
|
|
|
|
31
|
return; |
|
167
|
|
|
|
|
|
|
} |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# ToDO: better RegEX for leader |
|
170
|
5
|
|
|
|
|
3
|
my $leader; |
|
171
|
5
|
100
|
|
|
|
15
|
if ( $head =~ /(.{$LEADER_LEN})/cg ) { |
|
172
|
4
|
|
|
|
|
36
|
$leader = $1; |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
else { |
|
175
|
1
|
|
|
|
|
72
|
carp "no valid record leader found in record " . $self->{rec_number}; |
|
176
|
1
|
|
|
|
|
25
|
return; |
|
177
|
|
|
|
|
|
|
} |
|
178
|
|
|
|
|
|
|
|
|
179
|
4
|
|
|
|
|
55
|
my @tags = $head =~ /\G(\d{3})\d{9}/cg; |
|
180
|
|
|
|
|
|
|
|
|
181
|
4
|
100
|
|
|
|
25
|
if ( scalar @tags != scalar @fields ) { |
|
182
|
|
|
|
|
|
|
carp "different number of tags and fields in record " |
|
183
|
1
|
|
|
|
|
73
|
. $self->{rec_number}; |
|
184
|
1
|
|
|
|
|
24
|
return; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
|
|
187
|
3
|
100
|
|
|
|
12
|
if ( $head !~ /\G$/cg ) { |
|
188
|
1
|
|
|
|
|
81
|
carp "incomplete directory entry in record " . $self->{rec_number}; |
|
189
|
1
|
|
|
|
|
24
|
return; |
|
190
|
|
|
|
|
|
|
} |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
return [ |
|
193
|
2
|
|
|
|
|
12
|
[ 'LDR', undef, undef, '_', $leader ], |
|
194
|
|
|
|
|
|
|
map [ shift(@tags), $self->_field($_) ], |
|
195
|
|
|
|
|
|
|
@fields |
|
196
|
|
|
|
|
|
|
]; |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=head2 _field($field) |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
Split MARC field string in individual components. |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=cut |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
sub _field { |
|
206
|
35
|
|
|
35
|
|
30
|
my ( $self, $field ) = @_; |
|
207
|
35
|
|
|
|
|
52
|
my @chunks = split( /$SUBFIELD_INDICATOR(.)/, $field ); |
|
208
|
35
|
100
|
|
|
|
207
|
return ( undef, undef, '_', @chunks ) if @chunks == 1; |
|
209
|
27
|
|
|
|
|
25
|
my @subfields; |
|
210
|
27
|
|
|
|
|
35
|
my ( $indicator1, $indicator2 ) = ( split //, shift @chunks ); |
|
211
|
27
|
|
|
|
|
38
|
while (@chunks) { |
|
212
|
47
|
|
|
|
|
92
|
push @subfields, ( splice @chunks, 0, 2 ); |
|
213
|
|
|
|
|
|
|
} |
|
214
|
27
|
|
|
|
|
102
|
return ( $indicator1, $indicator2, @subfields ); |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=head1 AUTHOR |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
Johann Rolschewski Ejorol@cpan.orgE |
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
Copyright 2014- Johann Rolschewski |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=head1 LICENSE |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
|
228
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=head1 SEEALSO |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
L, L. |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENT |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
The parser methods are adapted from Marc Chantreux's L module. |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
=cut |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
1; # End of MARC::Parser::RAW |
|
241
|
|
|
|
|
|
|
|