File Coverage

blib/lib/MARC/File/MicroLIF.pm
Criterion Covered Total %
statement 106 109 97.2
branch 35 42 83.3
condition 14 17 82.3
subroutine 15 15 100.0
pod 3 3 100.0
total 173 186 93.0


line stmt bran cond sub pod time code
1             package MARC::File::MicroLIF;
2              
3             =head1 NAME
4              
5             MARC::File::MicroLIF - MicroLIF-specific file handling
6              
7             =cut
8              
9 7     7   23580 use strict;
  7         19  
  7         192  
10 7     7   36 use warnings;
  7         15  
  7         198  
11 7     7   40 use integer;
  7         46  
  7         46  
12 7     7   503 use vars qw( $ERROR );
  7         16  
  7         282  
13              
14 7     7   881 use MARC::File;
  7         16  
  7         208  
15 7     7   38 use vars qw( @ISA ); @ISA = qw( MARC::File );
  7         14  
  7         302  
16              
17 7     7   374 use MARC::Record qw( LEADER_LEN );
  7         14  
  7         411  
18              
19             =head1 SYNOPSIS
20              
21             use MARC::File::MicroLIF;
22              
23             my $file = MARC::File::MicroLIF->in( $filename );
24              
25             while ( my $marc = $file->next() ) {
26             # Do something
27             }
28             $file->close();
29             undef $file;
30              
31             =head1 EXPORT
32              
33             None.
34              
35             =cut
36              
37              
38             =for internal
39              
40             The buffer must be large enough to handle any valid record because
41             we don't check for cases like a CR/LF pair or an end-of-record/CR/LF
42             trio being only partially in the buffer.
43              
44             The max valid record is the max MARC record size (99999) plus one
45             or two characters per tag (CR, LF, or CR/LF). It's hard to say
46             what the max number of tags is, so here we use 6000. (6000 tags
47             can be squeezed into a MARC record only if every tag has only one
48             subfield containing a maximum of one character, or if data from
49             multiple tags overlaps in the MARC record body. We're pretty safe.)
50              
51             =cut
52              
53 7     7   41 use constant BUFFER_MIN => (99999 + 6000 * 2);
  7         14  
  7         7404  
54              
55             =head1 METHODS
56              
57             =head2 in()
58              
59             Opens a MicroLIF file for reading.
60              
61             =cut
62              
63             sub in {
64 14     14 1 2092 my $class = shift;
65 14         103 my $self = $class->SUPER::in( @_ );
66              
67 14 50       98 if ( $self ) {
68 14         36 bless $self, $class;
69              
70 14         41 $self->{exhaustedfh} = 0;
71 14         40 $self->{inputbuf} = '';
72 14         37 $self->{header} = undef;
73              
74             # get the MicroLIF header, but handle the case in
75             # which it's missing.
76 14         54 my $header = $self->_get_chunk( 1 );
77 14 50       57 if ( defined $header ) {
78 14 100       57 if ( $header =~ /^LDR/ ) {
79             # header missing, put this back
80 2         12 $self->_unget_chunk( $header . "\n" );
81              
82             # XXX should we warn of a missing header?
83             }
84             else {
85 12         38 $self->{header} = $header;
86             }
87             }
88             else {
89             # can't read from the file
90 0         0 undef $self;
91             }
92             }
93              
94 14         67 return $self;
95             } # new
96              
97              
98             # fill the buffer if we need to
99             sub _fill_buffer {
100 247     247   499 my $self = shift;
101 247         497 my $ok = 1;
102              
103 247 100 66     1127 if ( !$self->{exhaustedfh} && length( $self->{inputbuf} ) < BUFFER_MIN ) {
104             # append the next chunk of bytes to the buffer
105 26         712 my $read = read $self->{fh}, $self->{inputbuf}, BUFFER_MIN, length($self->{inputbuf});
106 26 50       187 if ( !defined $read ) {
    100          
107             # error!
108 0         0 $ok = undef;
109 0         0 $MARC::File::ERROR = "error reading from file " . $self->{filename};
110             }
111             elsif ( $read < 1 ) {
112 12         49 $self->{exhaustedfh} = 1;
113             }
114             }
115              
116 247         1944 return $ok;
117             }
118              
119              
120             =for internal
121              
122             Gets the next chunk of data. If C<$want_line> is true then you get
123             the next chunk ending with any combination of \r and \n of any length.
124             If it is false or not passed then you get the next chunk ending with
125             \x60 followed by any combination of \r and \n of any length.
126              
127             All trailing \r and \n are stripped.
128              
129             =cut
130              
131             sub _get_chunk {
132 247     247   517 my $self = shift;
133 247   100     1204 my $want_line = shift || 0;
134              
135 247         554 my $chunk = undef;
136              
137 247 100 66     760 if ( $self->_fill_buffer() && length($self->{inputbuf}) > 0 ) {
138              
139             # the buffer always has at least one full line in it, so we're
140             # guaranteed that if there are no line endings then we're
141             # on the last line.
142              
143 239 100       750 if ( $want_line ) {
144 14 50       151 if ( $self->{inputbuf} =~ /^([^\x0d\x0a]*)([\x0d\x0a]+)/ ) {
145 14         55 $chunk = $1;
146 14         156 $self->{inputbuf} = substr( $self->{inputbuf}, length($1)+length($2) );
147             }
148             }
149             else {
150             # couldn't figure out how to make this work as a regex
151 225         476 my $pos = -1;
152 225         712 while ( !$chunk ) {
153 225         3209 $pos = index( $self->{inputbuf}, '`', $pos+1 );
154 225 100       1046 last if $pos < 0;
155 224 50 66     2018 if ( substr($self->{inputbuf}, $pos+1, 1) eq "\x0d" or substr($self->{inputbuf}, $pos+1, 1) eq "\x0a" ) {
156 224         1033 $chunk = substr( $self->{inputbuf}, 0, $pos+1 ); # include the '`' but not the newlines
157 224   100     1914 while ( substr($self->{inputbuf}, $pos+1, 1) eq "\x0d" or substr($self->{inputbuf}, $pos+1, 1) eq "\x0a" ) {
158 225         1684 ++$pos;
159             }
160             # $pos now pointing at last newline char
161 224         1875 $self->{inputbuf} = substr( $self->{inputbuf}, $pos+1 );
162             }
163             }
164             }
165              
166 239 100       962 if ( !$chunk ) {
167 1         5 $chunk = $self->{inputbuf};
168 1         4 $self->{inputbuf} = '';
169 1         3 $self->{exhaustedfh} = 1;
170             }
171             }
172              
173 247         997 return $chunk;
174             }
175              
176              
177             # $chunk is put at the beginning of the buffer exactly as
178             # passed in. No line endings are added.
179             sub _unget_chunk {
180 2     2   7 my $self = shift;
181 2         6 my $chunk = shift;
182 2         11 $self->{inputbuf} = $chunk . $self->{inputbuf};
183 2         8 return;
184             }
185              
186              
187             sub _next {
188 233     233   455 my $self = shift;
189              
190 233         654 my $lifrec = $self->_get_chunk();
191              
192             # for ease, make the newlines match this platform
193 233 100       3542 $lifrec =~ s/[\x0a\x0d]+/\n/g if defined $lifrec;
194              
195 233         1601 return $lifrec;
196             }
197              
198              
199             =head2 header()
200              
201             If the MicroLIF file has a file header then the header is returned.
202             If the file has no header or the file has not yet been opened then
203             C is returned.
204              
205             =cut
206              
207             sub header {
208 2     2 1 1017 my $self = shift;
209 2         13 return $self->{header};
210             }
211              
212             =head2 decode()
213              
214             Decodes a MicroLIF record and returns a USMARC record.
215              
216             Can be called in one of three different ways:
217              
218             $object->decode( $lif )
219             MARC::File::MicroLIF->decode( $lif )
220             MARC::File::MicroLIF::decode( $lif )
221              
222             =cut
223              
224             sub decode {
225 232     232 1 4507 my $self = shift;
226 232         470 my $location = '';
227 232         452 my $text = '';
228              
229             ## decode can be called in a variety of ways
230             ## this bit of code covers all three
231              
232 232 100       1101 if ( ref($self) =~ /^MARC::File/ ) {
233 225         703 $location = 'in record '.$self->{recnum};
234 225         711 $text = shift;
235             } else {
236 7         13 $location = 'in record 1';
237 7 100       32 $text = $self=~/MARC::File/ ? shift : $self;
238             }
239              
240 232         1077 my $marc = MARC::Record->new();
241              
242             # for ease, make the newlines match this platform
243 232 50       2968 $text =~ s/[\x0a\x0d]+/\n/g if defined $text;
244              
245 232         2223 my @lines = split( /\n/, $text );
246 232         735 for my $line ( @lines ) {
247              
248 2335 50       13701 ($line =~ s/^([0-9A-Za-z]{3})//) or
249             $marc->_warn( "Invalid tag number: ".substr( $line, 0, 3 )." $location" );
250 2335         7217 my $tagno = $1;
251              
252 2335 100       12473 ($line =~ s/\^`?$//)
253             or $marc->_warn( "Tag $tagno $location is missing a trailing caret." );
254              
255 2335 100 100     16092 if ( $tagno eq "LDR" ) {
    100          
256 232         1186 $marc->leader( substr( $line, 0, LEADER_LEN ) );
257             } elsif ( $tagno =~ /^\d+$/ and $tagno < 10 ) {
258 289         1172 $marc->add_fields( $tagno, $line );
259             } else {
260 1814         7322 $line =~ s/^(.)(.)//;
261 1814         6343 my ($ind1,$ind2) = ($1,$2);
262 1814         3620 my @subfields;
263 1814         10471 my @subfield_data_pairs = split( /_(?=[a-z0-9])/, $line );
264 1814 100       5635 if ( scalar @subfield_data_pairs < 2 ) {
265 8         36 $marc->_warn( "Tag $tagno $location has no subfields--discarded." );
266             }
267             else {
268 1806         3582 shift @subfield_data_pairs; # Leading _ makes an empty pair
269 1806         4715 for my $pair ( @subfield_data_pairs ) {
270 3020         13506 my ($subfield,$data) = (substr( $pair, 0, 1 ), substr( $pair, 1 ));
271 3020         12878 push( @subfields, $subfield, $data );
272             }
273 1806         6791 $marc->add_fields( $tagno, $ind1, $ind2, @subfields );
274             }
275             }
276             } # for
277              
278 232         1413 return $marc;
279             }
280              
281             1;
282              
283             __END__