File Coverage

blib/lib/MARC/File/USMARC.pm
Criterion Covered Total %
statement 122 131 93.1
branch 37 52 71.1
condition 4 9 44.4
subroutine 17 18 94.4
pod 3 3 100.0
total 183 213 85.9


line stmt bran cond sub pod time code
1             package MARC::File::USMARC;
2              
3             =head1 NAME
4              
5             MARC::File::USMARC - USMARC-specific file handling
6              
7             =cut
8              
9 28     28   147637 use strict;
  28         105  
  28         752  
10 28     28   144 use warnings;
  28         57  
  28         751  
11 28     28   1408 use integer;
  28         91  
  28         152  
12              
13 28     28   675 use vars qw( $ERROR );
  28         54  
  28         1227  
14 28     28   9653 use MARC::File::Encode qw( marc_to_utf8 );
  28         153  
  28         1551  
15              
16 28     28   8371 use MARC::File;
  28         75  
  28         714  
17 28     28   162 use vars qw( @ISA ); @ISA = qw( MARC::File );
  28         51  
  28         1213  
18              
19 28     28   7525 use MARC::Record qw( LEADER_LEN );
  28         77  
  28         1294  
20 28     28   183 use MARC::Field;
  28         53  
  28         694  
21 28     28   139 use constant SUBFIELD_INDICATOR => "\x1F";
  28         56  
  28         1562  
22 28     28   161 use constant END_OF_FIELD => "\x1E";
  28         55  
  28         1216  
23 28     28   322 use constant END_OF_RECORD => "\x1D";
  28         69  
  28         1098  
24 28     28   151 use constant DIRECTORY_ENTRY_LEN => 12;
  28         57  
  28         30263  
25              
26             =head1 SYNOPSIS
27              
28             use MARC::File::USMARC;
29              
30             my $file = MARC::File::USMARC->in( $filename );
31              
32             while ( my $marc = $file->next() ) {
33             # Do something
34             }
35             $file->close();
36             undef $file;
37              
38             =head1 EXPORT
39              
40             None.
41              
42             =head1 METHODS
43              
44             =cut
45              
46             sub _next {
47 176     176   359 my $self = shift;
48 176         380 my $fh = $self->{fh};
49              
50 176         322 my $reclen;
51 176 100       1721 return if eof($fh);
52              
53 164         808 local $/ = END_OF_RECORD;
54 164         2129 my $usmarc = <$fh>;
55              
56             # remove illegal garbage that sometimes occurs between records
57 164         754 $usmarc =~ s/^[ \x00\x0a\x0d\x1a]+//;
58              
59 164         1137 return $usmarc;
60             }
61              
62             =head2 decode( $string [, \&filter_func ] )
63              
64             Constructor for handling data from a USMARC file. This function takes care of
65             all the tag directory parsing & mangling.
66              
67             Any warnings or coercions can be checked in the C function.
68              
69             The C<$filter_func> is an optional reference to a user-supplied function
70             that determines on a tag-by-tag basis if you want the tag passed to it
71             to be put into the MARC record. The function is passed the tag number
72             and the raw tag data, and must return a boolean. The return of a true
73             value tells MARC::File::USMARC::decode that the tag should get put into
74             the resulting MARC record.
75              
76             For example, if you only want title and subject tags in your MARC record,
77             try this:
78              
79             sub filter {
80             my ($tagno,$tagdata) = @_;
81              
82             return ($tagno == 245) || ($tagno >= 600 && $tagno <= 699);
83             }
84              
85             my $marc = MARC::File::USMARC->decode( $string, \&filter );
86              
87             Why would you want to do such a thing? The big reason is that creating
88             fields is processor-intensive, and if your program is doing read-only
89             data analysis and needs to be as fast as possible, you can save time by
90             not creating fields that you'll be ignoring anyway.
91              
92             Another possible use is if you're only interested in printing certain
93             tags from the record, then you can filter them when you read from disc
94             and not have to delete unwanted tags yourself.
95              
96             =cut
97              
98             sub decode {
99              
100 168     168 1 2829 my $text;
101 168         343 my $location = '';
102              
103             ## decode can be called in a variety of ways
104             ## $object->decode( $string )
105             ## MARC::File::USMARC->decode( $string )
106             ## MARC::File::USMARC::decode( $string )
107             ## this bit of code covers all three
108              
109 168         326 my $self = shift;
110 168 100       796 if ( ref($self) =~ /^MARC::File/ ) {
111 162         471 $location = 'in record '.$self->{recnum};
112 162         457 $text = shift;
113             } else {
114 6         13 $location = 'in record 1';
115 6 100       31 $text = $self=~/MARC::File/ ? shift : $self;
116             }
117 168         373 my $filter_func = shift;
118              
119             # ok this the empty shell we will fill
120 168         763 my $marc = MARC::Record->new();
121              
122             # Check for an all-numeric record length
123 168 100       889 ($text =~ /^(\d{5})/)
124             or return $marc->_warn( "Record length \"", substr( $text, 0, 5 ), "\" is not numeric $location" );
125              
126 165         500 my $reclen = $1;
127 165         622 my $realLength = bytes::length( $text );
128 165 50       17150 $marc->_warn( "Invalid record length $location: Leader says $reclen " .
129             "bytes but it's actually $realLength" ) unless $reclen == $realLength;
130              
131 165 50       809 (substr($text, -1, 1) eq END_OF_RECORD)
132             or $marc->_warn( "Invalid record terminator $location" );
133              
134 165         853 $marc->leader( substr( $text, 0, LEADER_LEN ) );
135              
136             # bytes 12 - 16 of leader give offset to the body of the record
137 165         660 my $data_start = 0 + bytes::substr( $text, 12, 5 );
138              
139             # immediately after the leader comes the directory (no separator)
140 165         1544 my $dir = substr( $text, LEADER_LEN, $data_start - LEADER_LEN - 1 ); # -1 to allow for \x1e at end of directory
141              
142             # character after the directory must be \x1e
143 165 100       717 (substr($text, $data_start-1, 1) eq END_OF_FIELD)
144             or $marc->_warn( "No directory found $location" );
145              
146             # all directory entries 12 bytes long, so length % 12 must be 0
147 165 50       683 (length($dir) % DIRECTORY_ENTRY_LEN == 0)
148             or $marc->_warn( "Invalid directory length $location" );
149              
150              
151             # go through all the fields
152 165         481 my $nfields = length($dir)/DIRECTORY_ENTRY_LEN;
153 165         592 for ( my $n = 0; $n < $nfields; $n++ ) {
154 2723         18454 my ( $tagno, $len, $offset ) = unpack( "A3 A4 A5", substr($dir, $n*DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN) );
155              
156             # Check directory validity
157 2723 50       12572 ($tagno =~ /^[0-9A-Za-z]{3}$/)
158             or $marc->_warn( "Invalid tag in directory $location: \"$tagno\"" );
159              
160 2723 50       8372 ($len =~ /^\d{4}$/)
161             or $marc->_warn( "Invalid length in directory $location tag $tagno: \"$len\"" );
162              
163 2723 50       7744 ($offset =~ /^\d{5}$/)
164             or $marc->_warn( "Invalid offset in directory $location tag $tagno: \"$offset\"" );
165              
166 2723 50       9098 ($offset + $len <= $reclen)
167             or $marc->_warn( "Directory entry $location runs off the end of the record tag $tagno" );
168              
169 2723         9098 my $tagdata = bytes::substr( $text, $data_start+$offset, $len );
170              
171             # if utf8 the we encode the string as utf8
172 2723 100       20279 if ( $marc->encoding() eq 'UTF-8' ) {
173 1         5 $tagdata = marc_to_utf8( $tagdata );
174             }
175              
176 2723 50       7397 $marc->_warn( "Invalid length in directory for tag $tagno $location" )
177             unless ( $len == bytes::length($tagdata) );
178              
179 2723 50       16837 if ( substr($tagdata, -1, 1) eq END_OF_FIELD ) {
180             # get rid of the end-of-tag character
181 2723         6343 chop $tagdata;
182 2723         6097 --$len;
183             } else {
184 0         0 $marc->_warn( "field does not end in end of field character in tag $tagno $location" );
185             }
186              
187 2723 50       6613 warn "Specs: ", join( "|", $tagno, $len, $offset, $tagdata ), "\n" if $MARC::Record::DEBUG;
188              
189 2723 100       6146 if ( $filter_func ) {
190 167 100       368 next unless $filter_func->( $tagno, $tagdata );
191             }
192              
193 2569 100       7321 if ( MARC::Field->is_controlfield_tag($tagno) ) {
194 559         1944 $marc->append_fields( MARC::Field->new( $tagno, $tagdata ) );
195             } else {
196 2010         7585 my @subfields = split( SUBFIELD_INDICATOR, $tagdata );
197 2010         4849 my $indicators = shift @subfields;
198 2010         4392 my ($ind1, $ind2);
199              
200 2010 50 33     10610 if ( length( $indicators ) > 2 or length( $indicators ) == 0 ) {
201 0         0 $marc->_warn( "Invalid indicators \"$indicators\" forced to blanks $location for tag $tagno\n" );
202 0         0 ($ind1,$ind2) = (" ", " ");
203             } else {
204 2010         5300 $ind1 = substr( $indicators,0, 1 );
205 2010         4863 $ind2 = substr( $indicators,1, 1 );
206             }
207              
208             # Split the subfield data into subfield name and data pairs
209 2010         3551 my @subfield_data;
210 2010         4242 for ( @subfields ) {
211 3341 50       8225 if ( length > 0 ) {
212 3341         12020 push( @subfield_data, substr($_,0,1),substr($_,1) );
213             } else {
214 0         0 $marc->_warn( "Entirely empty subfield found in tag $tagno" );
215             }
216             }
217              
218 2010 50       5264 if ( !@subfield_data ) {
219 0         0 $marc->_warn( "no subfield data found $location for tag $tagno" );
220 0         0 next;
221             }
222              
223 2010         6316 my $field = MARC::Field->new($tagno, $ind1, $ind2, @subfield_data );
224 2010 100       5543 if ( $field->warnings() ) {
225 2         7 $marc->_warn( $field->warnings() );
226             }
227 2010         5576 $marc->append_fields( $field );
228             }
229             } # looping through all the fields
230              
231              
232 165         1041 return $marc;
233             }
234              
235             =head2 update_leader()
236              
237             If any changes get made to the MARC record, the first 5 bytes of the
238             leader (the length) will be invalid. This function updates the
239             leader with the correct length of the record as it would be if
240             written out to a file.
241              
242             =cut
243              
244             sub update_leader {
245 0     0 1 0 my $self = shift;
246              
247 0         0 my (undef,undef,$reclen,$baseaddress) = $self->_build_tag_directory();
248              
249 0         0 $self->_set_leader_lengths( $reclen, $baseaddress );
250             }
251              
252             =head2 _build_tag_directory()
253              
254             Function for internal use only: Builds the tag directory that gets
255             put in front of the data in a MARC record.
256              
257             Returns two array references, and two lengths: The tag directory, and the data fields themselves,
258             the length of all data (including the Leader that we expect will be added),
259             and the size of the Leader and tag directory.
260              
261             =cut
262              
263             sub _build_tag_directory {
264 10     10   19 my $marc = shift;
265 10 50 33     45 $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/;
266 10 50       36 die "Wanted a MARC::Record but got a ", ref($marc) unless ref($marc) eq "MARC::Record";
267              
268 10         28 my @fields;
269             my @directory;
270              
271 10         20 my $dataend = 0;
272 10         50 for my $field ( $marc->fields() ) {
273             # Dump data into proper format
274 44         142 my $str = $field->as_usmarc;
275 44         95 push( @fields, $str );
276              
277             # Create directory entry
278 44         129 my $len = bytes::length( $str );
279              
280 44         3519 my $direntry = sprintf( "%03s%04d%05d", $field->tag, $len, $dataend );
281 44         97 push( @directory, $direntry );
282 44         97 $dataend += $len;
283             }
284              
285 10         26 my $baseaddress =
286             LEADER_LEN + # better be 24
287             ( @directory * DIRECTORY_ENTRY_LEN ) +
288             # all the directory entries
289             1; # end-of-field marker
290              
291              
292 10         23 my $total =
293             $baseaddress + # stuff before first field
294             $dataend + # Length of the fields
295             1; # End-of-record marker
296              
297              
298              
299 10         34 return (\@fields, \@directory, $total, $baseaddress);
300             }
301              
302             =head2 encode()
303              
304             Returns a string of characters suitable for writing out to a USMARC file,
305             including the leader, directory and all the fields.
306              
307             =cut
308              
309             sub encode {
310 10     10 1 657 my $marc = shift;
311 10 100 66     62 $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/;
312              
313 10         26 my ($fields,$directory,$reclen,$baseaddress) = _build_tag_directory($marc);
314 10         44 $marc->set_leader_lengths( $reclen, $baseaddress );
315              
316             # Glomp it all together
317 10         32 return join("",$marc->leader, @$directory, END_OF_FIELD, @$fields, END_OF_RECORD);
318             }
319             1;
320              
321             __END__