File Coverage

blib/lib/MARC/File/MicroLIF.pm

Criterion	Covered	Total	%
statement	106	109	97.2
branch	35	42	83.3
condition	14	17	82.3
subroutine	15	15	100.0
pod	3	3	100.0
total	173	186	93.0

line	stmt	bran	cond	sub	pod	time	code
1							package MARC::File::MicroLIF;
2
3							=head1 NAME
4
5							MARC::File::MicroLIF - MicroLIF-specific file handling
6
7							=cut
8
9	7			7		79689	use strict;
	7					19
	7					286
10	7			7		45	use warnings;
	7					12
	7					237
11	7			7		39	use integer;
	7					16
	7					57
12	7			7		194	use vars qw( $ERROR );
	7					13
	7					352
13
14	7			7		1648	use MARC::File;
	7					15
	7					189
15	7			7		40	use vars qw( @ISA ); @ISA = qw( MARC::File );
	7					12
	7					415
16
17	7			7		630	use MARC::Record qw( LEADER_LEN );
	7					15
	7					515
18
19							=head1 SYNOPSIS
20
21							use MARC::File::MicroLIF;
22
23							my $file = MARC::File::MicroLIF->in( $filename );
24
25							while ( my $marc = $file->next() ) {
26							# Do something
27							}
28							$file->close();
29							undef $file;
30
31							=head1 EXPORT
32
33							None.
34
35							=cut
36
37
38							=for internal
39
40							The buffer must be large enough to handle any valid record because
41							we don't check for cases like a CR/LF pair or an end-of-record/CR/LF
42							trio being only partially in the buffer.
43
44							The max valid record is the max MARC record size (99999) plus one
45							or two characters per tag (CR, LF, or CR/LF). It's hard to say
46							what the max number of tags is, so here we use 6000. (6000 tags
47							can be squeezed into a MARC record only if every tag has only one
48							subfield containing a maximum of one character, or if data from
49							multiple tags overlaps in the MARC record body. We're pretty safe.)
50
51							=cut
52
53	7			7		44	use constant BUFFER_MIN => (99999 + 6000 * 2);
	7					14
	7					11334
54
55							=head1 METHODS
56
57							=head2 in()
58
59							Opens a MicroLIF file for reading.
60
61							=cut
62
63							sub in {
64	14			14	1	2643	my $class = shift;
65	14					115	my $self = $class->SUPER::in( @_ );
66
67	14	50				104	if ( $self ) {
68	14					52	bless $self, $class;
69
70	14					46	$self->{exhaustedfh} = 0;
71	14					41	$self->{inputbuf} = '';
72	14					42	$self->{header} = undef;
73
74							# get the MicroLIF header, but handle the case in
75							# which it's missing.
76	14					48	my $header = $self->_get_chunk( 1 );
77	14	50				54	if ( defined $header ) {
78	14	100				54	if ( $header =~ /^LDR/ ) {
79							# header missing, put this back
80	2					12	$self->_unget_chunk( $header . "\n" );
81
82							# XXX should we warn of a missing header?
83							}
84							else {
85	12					43	$self->{header} = $header;
86							}
87							}
88							else {
89							# can't read from the file
90	0					0	undef $self;
91							}
92							}
93
94	14					67	return $self;
95							} # new
96
97
98							# fill the buffer if we need to
99							sub _fill_buffer {
100	247			247		359	my $self = shift;
101	247					315	my $ok = 1;
102
103	247	100	66			1188	if ( !$self->{exhaustedfh} && length( $self->{inputbuf} ) < BUFFER_MIN ) {
104							# append the next chunk of bytes to the buffer
105	26					2103	my $read = read $self->{fh}, $self->{inputbuf}, BUFFER_MIN, length($self->{inputbuf});
106	26	50				185	if ( !defined $read ) {
		100
107							# error!
108	0					0	$ok = undef;
109	0					0	$MARC::File::ERROR = "error reading from file " . $self->{filename};
110							}
111							elsif ( $read < 1 ) {
112	12					41	$self->{exhaustedfh} = 1;
113							}
114							}
115
116	247					2184	return $ok;
117							}
118
119
120							=for internal
121
122							Gets the next chunk of data. If C<$want_line> is true then you get
123							the next chunk ending with any combination of \r and \n of any length.
124							If it is false or not passed then you get the next chunk ending with
125							\x60 followed by any combination of \r and \n of any length.
126
127							All trailing \r and \n are stripped.
128
129							=cut
130
131							sub _get_chunk {
132	247			247		431	my $self = shift;
133	247		100			1294	my $want_line = shift \|\| 0;
134
135	247					433	my $chunk = undef;
136
137	247	100	66			669	if ( $self->_fill_buffer() && length($self->{inputbuf}) > 0 ) {
138
139							# the buffer always has at least one full line in it, so we're
140							# guaranteed that if there are no line endings then we're
141							# on the last line.
142
143	239	100				644	if ( $want_line ) {
144	14	50				174	if ( $self->{inputbuf} =~ /^([^\x0d\x0a]*)([\x0d\x0a]+)/ ) {
145	14					45	$chunk = $1;
146	14					197	$self->{inputbuf} = substr( $self->{inputbuf}, length($1)+length($2) );
147							}
148							}
149							else {
150							# couldn't figure out how to make this work as a regex
151	225					457	my $pos = -1;
152	225					551	while ( !$chunk ) {
153	225					3893	$pos = index( $self->{inputbuf}, '`', $pos+1 );
154	225	100				1005	last if $pos < 0;
155	224	50	66			2186	if ( substr($self->{inputbuf}, $pos+1, 1) eq "\x0d" or substr($self->{inputbuf}, $pos+1, 1) eq "\x0a" ) {
156	224					904	$chunk = substr( $self->{inputbuf}, 0, $pos+1 ); # include the '`' but not the newlines
157	224		100			1950	while ( substr($self->{inputbuf}, $pos+1, 1) eq "\x0d" or substr($self->{inputbuf}, $pos+1, 1) eq "\x0a" ) {
158	225					2018	++$pos;
159							}
160							# $pos now pointing at last newline char
161	224					1898	$self->{inputbuf} = substr( $self->{inputbuf}, $pos+1 );
162							}
163							}
164							}
165
166	239	100				722	if ( !$chunk ) {
167	1					3	$chunk = $self->{inputbuf};
168	1					11	$self->{inputbuf} = '';
169	1					3	$self->{exhaustedfh} = 1;
170							}
171							}
172
173	247					1050	return $chunk;
174							}
175
176
177							# $chunk is put at the beginning of the buffer exactly as
178							# passed in. No line endings are added.
179							sub _unget_chunk {
180	2			2		4	my $self = shift;
181	2					5	my $chunk = shift;
182	2					12	$self->{inputbuf} = $chunk . $self->{inputbuf};
183	2					8	return;
184							}
185
186
187							sub _next {
188	233			233		371	my $self = shift;
189
190	233					631	my $lifrec = $self->_get_chunk();
191
192							# for ease, make the newlines match this platform
193	233	100				3748	$lifrec =~ s/[\x0a\x0d]+/\n/g if defined $lifrec;
194
195	233					1638	return $lifrec;
196							}
197
198
199							=head2 header()
200
201							If the MicroLIF file has a file header then the header is returned.
202							If the file has no header or the file has not yet been opened then
203							C is returned.
204
205							=cut
206
207							sub header {
208	2			2	1	1957	my $self = shift;
209	2					17	return $self->{header};
210							}
211
212							=head2 decode()
213
214							Decodes a MicroLIF record and returns a USMARC record.
215
216							Can be called in one of three different ways:
217
218							$object->decode( $lif )
219							MARC::File::MicroLIF->decode( $lif )
220							MARC::File::MicroLIF::decode( $lif )
221
222							=cut
223
224							sub decode {
225	232			232	1	4420	my $self = shift;
226	232					372	my $location = '';
227	232					324	my $text = '';
228
229							## decode can be called in a variety of ways
230							## this bit of code covers all three
231
232	232	100				969	if ( ref($self) =~ /^MARC::File/ ) {
233	225					526	$location = 'in record '.$self->{recnum};
234	225					524	$text = shift;
235							} else {
236	7					13	$location = 'in record 1';
237	7	100				33	$text = $self=~/MARC::File/ ? shift : $self;
238							}
239
240	232					986	my $marc = MARC::Record->new();
241
242							# for ease, make the newlines match this platform
243	232	50				3288	$text =~ s/[\x0a\x0d]+/\n/g if defined $text;
244
245	232					2903	my @lines = split( /\n/, $text );
246	232					822	for my $line ( @lines ) {
247
248	2335	50				12513	($line =~ s/^([0-9A-Za-z]{3})//) or
249							$marc->_warn( "Invalid tag number: ".substr( $line, 0, 3 )." $location" );
250	2335					5179	my $tagno = $1;
251
252	2335	100				11810	($line =~ s/\^`?$//)
253							or $marc->_warn( "Tag $tagno $location is missing a trailing caret." );
254
255	2335	100	100			15459	if ( $tagno eq "LDR" ) {
		100
256	232					1187	$marc->leader( substr( $line, 0, LEADER_LEN ) );
257							} elsif ( $tagno =~ /^\d+$/ and $tagno < 10 ) {
258	289					1003	$marc->add_fields( $tagno, $line );
259							} else {
260	1814					5801	$line =~ s/^(.)(.)//;
261	1814					5521	my ($ind1,$ind2) = ($1,$2);
262	1814					1996	my @subfields;
263	1814					17424	my @subfield_data_pairs = split( /_(?=[a-z0-9])/, $line );
264	1814	100				4828	if ( scalar @subfield_data_pairs < 2 ) {
265	8					52	$marc->_warn( "Tag $tagno $location has no subfields--discarded." );
266							}
267							else {
268	1806					2309	shift @subfield_data_pairs; # Leading _ makes an empty pair
269	1806					4743	for my $pair ( @subfield_data_pairs ) {
270	3020					9785	my ($subfield,$data) = (substr( $pair, 0, 1 ), substr( $pair, 1 ));
271	3020					11547	push( @subfields, $subfield, $data );
272							}
273	1806					6505	$marc->add_fields( $tagno, $ind1, $ind2, @subfields );
274							}
275							}
276							} # for
277
278	232					1599	return $marc;
279							}
280
281							1;
282
283							__END__