File Coverage

blib/lib/MARC/File/MicroLIF.pm
Criterion Covered Total %
statement 106 109 97.2
branch 35 42 83.3
condition 14 17 82.3
subroutine 15 15 100.0
pod 3 3 100.0
total 173 186 93.0


line stmt bran cond sub pod time code
1             package MARC::File::MicroLIF;
2              
3             =head1 NAME
4              
5             MARC::File::MicroLIF - MicroLIF-specific file handling
6              
7             =cut
8              
9 7     7   79689 use strict;
  7         19  
  7         286  
10 7     7   45 use warnings;
  7         12  
  7         237  
11 7     7   39 use integer;
  7         16  
  7         57  
12 7     7   194 use vars qw( $ERROR );
  7         13  
  7         352  
13              
14 7     7   1648 use MARC::File;
  7         15  
  7         189  
15 7     7   40 use vars qw( @ISA ); @ISA = qw( MARC::File );
  7         12  
  7         415  
16              
17 7     7   630 use MARC::Record qw( LEADER_LEN );
  7         15  
  7         515  
18              
19             =head1 SYNOPSIS
20              
21             use MARC::File::MicroLIF;
22              
23             my $file = MARC::File::MicroLIF->in( $filename );
24              
25             while ( my $marc = $file->next() ) {
26             # Do something
27             }
28             $file->close();
29             undef $file;
30              
31             =head1 EXPORT
32              
33             None.
34              
35             =cut
36              
37              
38             =for internal
39              
40             The buffer must be large enough to handle any valid record because
41             we don't check for cases like a CR/LF pair or an end-of-record/CR/LF
42             trio being only partially in the buffer.
43              
44             The max valid record is the max MARC record size (99999) plus one
45             or two characters per tag (CR, LF, or CR/LF). It's hard to say
46             what the max number of tags is, so here we use 6000. (6000 tags
47             can be squeezed into a MARC record only if every tag has only one
48             subfield containing a maximum of one character, or if data from
49             multiple tags overlaps in the MARC record body. We're pretty safe.)
50              
51             =cut
52              
53 7     7   44 use constant BUFFER_MIN => (99999 + 6000 * 2);
  7         14  
  7         11334  
54              
55             =head1 METHODS
56              
57             =head2 in()
58              
59             Opens a MicroLIF file for reading.
60              
61             =cut
62              
63             sub in {
64 14     14 1 2643 my $class = shift;
65 14         115 my $self = $class->SUPER::in( @_ );
66              
67 14 50       104 if ( $self ) {
68 14         52 bless $self, $class;
69              
70 14         46 $self->{exhaustedfh} = 0;
71 14         41 $self->{inputbuf} = '';
72 14         42 $self->{header} = undef;
73              
74             # get the MicroLIF header, but handle the case in
75             # which it's missing.
76 14         48 my $header = $self->_get_chunk( 1 );
77 14 50       54 if ( defined $header ) {
78 14 100       54 if ( $header =~ /^LDR/ ) {
79             # header missing, put this back
80 2         12 $self->_unget_chunk( $header . "\n" );
81              
82             # XXX should we warn of a missing header?
83             }
84             else {
85 12         43 $self->{header} = $header;
86             }
87             }
88             else {
89             # can't read from the file
90 0         0 undef $self;
91             }
92             }
93              
94 14         67 return $self;
95             } # new
96              
97              
98             # fill the buffer if we need to
99             sub _fill_buffer {
100 247     247   359 my $self = shift;
101 247         315 my $ok = 1;
102              
103 247 100 66     1188 if ( !$self->{exhaustedfh} && length( $self->{inputbuf} ) < BUFFER_MIN ) {
104             # append the next chunk of bytes to the buffer
105 26         2103 my $read = read $self->{fh}, $self->{inputbuf}, BUFFER_MIN, length($self->{inputbuf});
106 26 50       185 if ( !defined $read ) {
    100          
107             # error!
108 0         0 $ok = undef;
109 0         0 $MARC::File::ERROR = "error reading from file " . $self->{filename};
110             }
111             elsif ( $read < 1 ) {
112 12         41 $self->{exhaustedfh} = 1;
113             }
114             }
115              
116 247         2184 return $ok;
117             }
118              
119              
120             =for internal
121              
122             Gets the next chunk of data. If C<$want_line> is true then you get
123             the next chunk ending with any combination of \r and \n of any length.
124             If it is false or not passed then you get the next chunk ending with
125             \x60 followed by any combination of \r and \n of any length.
126              
127             All trailing \r and \n are stripped.
128              
129             =cut
130              
131             sub _get_chunk {
132 247     247   431 my $self = shift;
133 247   100     1294 my $want_line = shift || 0;
134              
135 247         433 my $chunk = undef;
136              
137 247 100 66     669 if ( $self->_fill_buffer() && length($self->{inputbuf}) > 0 ) {
138              
139             # the buffer always has at least one full line in it, so we're
140             # guaranteed that if there are no line endings then we're
141             # on the last line.
142              
143 239 100       644 if ( $want_line ) {
144 14 50       174 if ( $self->{inputbuf} =~ /^([^\x0d\x0a]*)([\x0d\x0a]+)/ ) {
145 14         45 $chunk = $1;
146 14         197 $self->{inputbuf} = substr( $self->{inputbuf}, length($1)+length($2) );
147             }
148             }
149             else {
150             # couldn't figure out how to make this work as a regex
151 225         457 my $pos = -1;
152 225         551 while ( !$chunk ) {
153 225         3893 $pos = index( $self->{inputbuf}, '`', $pos+1 );
154 225 100       1005 last if $pos < 0;
155 224 50 66     2186 if ( substr($self->{inputbuf}, $pos+1, 1) eq "\x0d" or substr($self->{inputbuf}, $pos+1, 1) eq "\x0a" ) {
156 224         904 $chunk = substr( $self->{inputbuf}, 0, $pos+1 ); # include the '`' but not the newlines
157 224   100     1950 while ( substr($self->{inputbuf}, $pos+1, 1) eq "\x0d" or substr($self->{inputbuf}, $pos+1, 1) eq "\x0a" ) {
158 225         2018 ++$pos;
159             }
160             # $pos now pointing at last newline char
161 224         1898 $self->{inputbuf} = substr( $self->{inputbuf}, $pos+1 );
162             }
163             }
164             }
165              
166 239 100       722 if ( !$chunk ) {
167 1         3 $chunk = $self->{inputbuf};
168 1         11 $self->{inputbuf} = '';
169 1         3 $self->{exhaustedfh} = 1;
170             }
171             }
172              
173 247         1050 return $chunk;
174             }
175              
176              
177             # $chunk is put at the beginning of the buffer exactly as
178             # passed in. No line endings are added.
179             sub _unget_chunk {
180 2     2   4 my $self = shift;
181 2         5 my $chunk = shift;
182 2         12 $self->{inputbuf} = $chunk . $self->{inputbuf};
183 2         8 return;
184             }
185              
186              
187             sub _next {
188 233     233   371 my $self = shift;
189              
190 233         631 my $lifrec = $self->_get_chunk();
191              
192             # for ease, make the newlines match this platform
193 233 100       3748 $lifrec =~ s/[\x0a\x0d]+/\n/g if defined $lifrec;
194              
195 233         1638 return $lifrec;
196             }
197              
198              
199             =head2 header()
200              
201             If the MicroLIF file has a file header then the header is returned.
202             If the file has no header or the file has not yet been opened then
203             C is returned.
204              
205             =cut
206              
207             sub header {
208 2     2 1 1957 my $self = shift;
209 2         17 return $self->{header};
210             }
211              
212             =head2 decode()
213              
214             Decodes a MicroLIF record and returns a USMARC record.
215              
216             Can be called in one of three different ways:
217              
218             $object->decode( $lif )
219             MARC::File::MicroLIF->decode( $lif )
220             MARC::File::MicroLIF::decode( $lif )
221              
222             =cut
223              
224             sub decode {
225 232     232 1 4420 my $self = shift;
226 232         372 my $location = '';
227 232         324 my $text = '';
228              
229             ## decode can be called in a variety of ways
230             ## this bit of code covers all three
231              
232 232 100       969 if ( ref($self) =~ /^MARC::File/ ) {
233 225         526 $location = 'in record '.$self->{recnum};
234 225         524 $text = shift;
235             } else {
236 7         13 $location = 'in record 1';
237 7 100       33 $text = $self=~/MARC::File/ ? shift : $self;
238             }
239              
240 232         986 my $marc = MARC::Record->new();
241              
242             # for ease, make the newlines match this platform
243 232 50       3288 $text =~ s/[\x0a\x0d]+/\n/g if defined $text;
244              
245 232         2903 my @lines = split( /\n/, $text );
246 232         822 for my $line ( @lines ) {
247              
248 2335 50       12513 ($line =~ s/^([0-9A-Za-z]{3})//) or
249             $marc->_warn( "Invalid tag number: ".substr( $line, 0, 3 )." $location" );
250 2335         5179 my $tagno = $1;
251              
252 2335 100       11810 ($line =~ s/\^`?$//)
253             or $marc->_warn( "Tag $tagno $location is missing a trailing caret." );
254              
255 2335 100 100     15459 if ( $tagno eq "LDR" ) {
    100          
256 232         1187 $marc->leader( substr( $line, 0, LEADER_LEN ) );
257             } elsif ( $tagno =~ /^\d+$/ and $tagno < 10 ) {
258 289         1003 $marc->add_fields( $tagno, $line );
259             } else {
260 1814         5801 $line =~ s/^(.)(.)//;
261 1814         5521 my ($ind1,$ind2) = ($1,$2);
262 1814         1996 my @subfields;
263 1814         17424 my @subfield_data_pairs = split( /_(?=[a-z0-9])/, $line );
264 1814 100       4828 if ( scalar @subfield_data_pairs < 2 ) {
265 8         52 $marc->_warn( "Tag $tagno $location has no subfields--discarded." );
266             }
267             else {
268 1806         2309 shift @subfield_data_pairs; # Leading _ makes an empty pair
269 1806         4743 for my $pair ( @subfield_data_pairs ) {
270 3020         9785 my ($subfield,$data) = (substr( $pair, 0, 1 ), substr( $pair, 1 ));
271 3020         11547 push( @subfields, $subfield, $data );
272             }
273 1806         6505 $marc->add_fields( $tagno, $ind1, $ind2, @subfields );
274             }
275             }
276             } # for
277              
278 232         1599 return $marc;
279             }
280              
281             1;
282              
283             __END__