File Coverage

blib/lib/MARC/File/MicroLIF.pm
Criterion Covered Total %
statement 102 105 97.1
branch 34 40 85.0
condition 14 17 82.3
subroutine 14 14 100.0
pod 3 3 100.0
total 167 179 93.3


line stmt bran cond sub pod time code
1             package MARC::File::MicroLIF;
2              
3             =head1 NAME
4              
5             MARC::File::MicroLIF - MicroLIF-specific file handling
6              
7             =cut
8              
9 7     7   27702 use strict;
  7         12  
  7         245  
10 7     7   31 use integer;
  7         8  
  7         38  
11 7     7   137 use vars qw( $ERROR );
  7         10  
  7         270  
12              
13 7     7   971 use MARC::File;
  7         10  
  7         178  
14 7     7   29 use vars qw( @ISA ); @ISA = qw( MARC::File );
  7         9  
  7         309  
15              
16 7     7   401 use MARC::Record qw( LEADER_LEN );
  7         8  
  7         469  
17              
18             =head1 SYNOPSIS
19              
20             use MARC::File::MicroLIF;
21              
22             my $file = MARC::File::MicroLIF->in( $filename );
23              
24             while ( my $marc = $file->next() ) {
25             # Do something
26             }
27             $file->close();
28             undef $file;
29              
30             =head1 EXPORT
31              
32             None.
33              
34             =cut
35              
36              
37             =for internal
38              
39             The buffer must be large enough to handle any valid record because
40             we don't check for cases like a CR/LF pair or an end-of-record/CR/LF
41             trio being only partially in the buffer.
42              
43             The max valid record is the max MARC record size (99999) plus one
44             or two characters per tag (CR, LF, or CR/LF). It's hard to say
45             what the max number of tags is, so here we use 6000. (6000 tags
46             can be squeezed into a MARC record only if every tag has only one
47             subfield containing a maximum of one character, or if data from
48             multiple tags overlaps in the MARC record body. We're pretty safe.)
49              
50             =cut
51              
52 7     7   33 use constant BUFFER_MIN => (99999 + 6000 * 2);
  7         8  
  7         7535  
53              
54             =head1 METHODS
55              
56             =head2 in()
57              
58             Opens a MicroLIF file for reading.
59              
60             =cut
61              
62             sub in {
63 14     14 1 2148 my $class = shift;
64 14         91 my $self = $class->SUPER::in( @_ );
65              
66 14 50       68 if ( $self ) {
67 14         25 bless $self, $class;
68              
69 14         37 $self->{exhaustedfh} = 0;
70 14         29 $self->{inputbuf} = '';
71 14         26 $self->{header} = undef;
72              
73             # get the MicroLIF header, but handle the case in
74             # which it's missing.
75 14         48 my $header = $self->_get_chunk( 1 );
76 14 50       42 if ( defined $header ) {
77 14 100       79 if ( $header =~ /^LDR/ ) {
78             # header missing, put this back
79 2         8 $self->_unget_chunk( $header . "\n" );
80              
81             # XXX should we warn of a missing header?
82             }
83             else {
84 12         32 $self->{header} = $header;
85             }
86             }
87             else {
88             # can't read from the file
89 0         0 undef $self;
90             }
91             }
92              
93 14         55 return $self;
94             } # new
95              
96              
97             # fill the buffer if we need to
98             sub _fill_buffer {
99 247     247   266 my $self = shift;
100 247         321 my $ok = 1;
101              
102 247 100 66     815 if ( !$self->{exhaustedfh} && length( $self->{inputbuf} ) < BUFFER_MIN ) {
103             # append the next chunk of bytes to the buffer
104 26         772 my $read = read $self->{fh}, $self->{inputbuf}, BUFFER_MIN, length($self->{inputbuf});
105 26 50       159 if ( !defined $read ) {
    100          
106             # error!
107 0         0 $ok = undef;
108 0         0 $MARC::File::ERROR = "error reading from file " . $self->{filename};
109             }
110             elsif ( $read < 1 ) {
111 12         41 $self->{exhaustedfh} = 1;
112             }
113             }
114              
115 247         1669 return $ok;
116             }
117              
118              
119             =for internal
120              
121             Gets the next chunk of data. If C<$want_line> is true then you get
122             the next chunk ending with any combination of \r and \n of any length.
123             If it is false or not passed then you get the next chunk ending with
124             \x60 followed by any combination of \r and \n of any length.
125              
126             All trailing \r and \n are stripped.
127              
128             =cut
129              
130             sub _get_chunk {
131 247     247   275 my $self = shift;
132 247   100     862 my $want_line = shift || 0;
133              
134 247         294 my $chunk = undef;
135              
136 247 100 66     411 if ( $self->_fill_buffer() && length($self->{inputbuf}) > 0 ) {
137              
138             # the buffer always has at least one full line in it, so we're
139             # guaranteed that if there are no line endings then we're
140             # on the last line.
141              
142 239 100       503 if ( $want_line ) {
143 14 50       133 if ( $self->{inputbuf} =~ /^([^\x0d\x0a]*)([\x0d\x0a]+)/ ) {
144 14         72 $chunk = $1;
145 14         136 $self->{inputbuf} = substr( $self->{inputbuf}, length($1)+length($2) );
146             }
147             }
148             else {
149             # couldn't figure out how to make this work as a regex
150 225         266 my $pos = -1;
151 225         402 while ( !$chunk ) {
152 225         2411 $pos = index( $self->{inputbuf}, '`', $pos+1 );
153 225 100       746 last if $pos < 0;
154 224 50 66     1669 if ( substr($self->{inputbuf}, $pos+1, 1) eq "\x0d" or substr($self->{inputbuf}, $pos+1, 1) eq "\x0a" ) {
155 224         741 $chunk = substr( $self->{inputbuf}, 0, $pos+1 ); # include the '`' but not the newlines
156 224   100     1412 while ( substr($self->{inputbuf}, $pos+1, 1) eq "\x0d" or substr($self->{inputbuf}, $pos+1, 1) eq "\x0a" ) {
157 225         1318 ++$pos;
158             }
159             # $pos now pointing at last newline char
160 224         1326 $self->{inputbuf} = substr( $self->{inputbuf}, $pos+1 );
161             }
162             }
163             }
164              
165 239 100       695 if ( !$chunk ) {
166 1         2 $chunk = $self->{inputbuf};
167 1         2 $self->{inputbuf} = '';
168 1         2 $self->{exhaustedfh} = 1;
169             }
170             }
171              
172 247         779 return $chunk;
173             }
174              
175              
176             # $chunk is put at the beginning of the buffer exactly as
177             # passed in. No line endings are added.
178             sub _unget_chunk {
179 2     2   3 my $self = shift;
180 2         3 my $chunk = shift;
181 2         7 $self->{inputbuf} = $chunk . $self->{inputbuf};
182 2         6 return;
183             }
184              
185              
186             sub _next {
187 233     233   244 my $self = shift;
188              
189 233         379 my $lifrec = $self->_get_chunk();
190              
191             # for ease, make the newlines match this platform
192 233 100       3108 $lifrec =~ s/[\x0a\x0d]+/\n/g if defined $lifrec;
193              
194 233         1149 return $lifrec;
195             }
196              
197              
198             =head2 header()
199              
200             If the MicroLIF file has a file header then the header is returned.
201             If the file has no header or the file has not yet been opened then
202             C is returned.
203              
204             =cut
205              
206             sub header {
207 2     2 1 1006 my $self = shift;
208 2         10 return $self->{header};
209             }
210              
211             =head2 decode()
212              
213             Decodes a MicroLIF record and returns a USMARC record.
214              
215             Can be called in one of three different ways:
216              
217             $object->decode( $lif )
218             MARC::File::MicroLIF->decode( $lif )
219             MARC::File::MicroLIF::decode( $lif )
220              
221             =cut
222              
223             sub decode {
224 228     228 1 1879 my $self = shift;
225 228         295 my $location = '';
226 228         220 my $text = '';
227              
228             ## decode can be called in a variety of ways
229             ## this bit of code covers all three
230              
231 228 100       818 if ( ref($self) =~ /^MARC::File/ ) {
232 225         394 $location = 'in record '.$self->{recnum};
233 225         374 $text = shift;
234             } else {
235 3         7 $location = 'in record 1';
236 3 100       22 $text = $self=~/MARC::File/ ? shift : $self;
237             }
238              
239 228         672 my $marc = MARC::Record->new();
240              
241 228         2030 my @lines = split( /\n/, $text );
242 228         608 for my $line ( @lines ) {
243              
244 2297 50       8823 ($line =~ s/^([0-9A-Za-z]{3})//) or
245             $marc->_warn( "Invalid tag number: ".substr( $line, 0, 3 )." $location" );
246 2297         3940 my $tagno = $1;
247              
248 2297 100       8015 ($line =~ s/\^`?$//)
249             or $marc->_warn( "Tag $tagno $location is missing a trailing caret." );
250              
251 2297 100 100     11221 if ( $tagno eq "LDR" ) {
    100          
252 228         764 $marc->leader( substr( $line, 0, LEADER_LEN ) );
253             } elsif ( $tagno =~ /^\d+$/ and $tagno < 10 ) {
254 285         743 $marc->add_fields( $tagno, $line );
255             } else {
256 1784         3956 $line =~ s/^(.)(.)//;
257 1784         3136 my ($ind1,$ind2) = ($1,$2);
258 1784         1583 my @subfields;
259 1784         8505 my @subfield_data_pairs = split( /_(?=[a-z0-9])/, $line );
260 1784 100       3542 if ( scalar @subfield_data_pairs < 2 ) {
261 2         14 $marc->_warn( "Tag $tagno $location has no subfields--discarded." );
262             }
263             else {
264 1782         1771 shift @subfield_data_pairs; # Leading _ makes an empty pair
265 1782         2615 for my $pair ( @subfield_data_pairs ) {
266 2984         7026 my ($subfield,$data) = (substr( $pair, 0, 1 ), substr( $pair, 1 ));
267 2984         7899 push( @subfields, $subfield, $data );
268             }
269 1782         4419 $marc->add_fields( $tagno, $ind1, $ind2, @subfields );
270             }
271             }
272             } # for
273              
274 228         1086 return $marc;
275             }
276              
277             1;
278              
279             __END__