File Coverage

blib/lib/MARC/Batch.pm
Criterion Covered Total %
statement 55 61 90.1
branch 20 26 76.9
condition 3 3 100.0
subroutine 9 11 81.8
pod 8 8 100.0
total 95 109 87.1


line stmt bran cond sub pod time code
1             package MARC::Batch;
2              
3             =head1 NAME
4              
5             MARC::Batch - Perl module for handling files of MARC::Record objects
6              
7             =head1 SYNOPSIS
8              
9             MARC::Batch hides all the file handling of files of Cs.
10             C still does the file I/O, but C handles the
11             multiple-file aspects.
12              
13             use MARC::Batch;
14              
15             # If you have weird control fields...
16             use MARC::Field;
17             MARC::Field->allow_controlfield_tags('FMT', 'LDX');
18            
19              
20             my $batch = MARC::Batch->new( 'USMARC', @files );
21             while ( my $marc = $batch->next ) {
22             print $marc->subfield(245,"a"), "\n";
23             }
24              
25             =head1 EXPORT
26              
27             None. Everything is a class method.
28              
29             =cut
30              
31 13     13   113129 use strict;
  13         34  
  13         417  
32 13     13   1291 use integer;
  13         105  
  13         68  
33 13     13   344 use Carp qw( croak );
  13         29  
  13         7006  
34              
35             =head1 METHODS
36              
37             =head2 new( $type, @files )
38              
39             Create a C object that will process C<@files>.
40              
41             C<$type> must be either "USMARC" or "MicroLIF". If you want to specify
42             "MARC::File::USMARC" or "MARC::File::MicroLIF", that's OK, too. C returns a
43             new MARC::Batch object.
44              
45             C<@files> can be a list of filenames:
46              
47             my $batch = MARC::Batch->new( 'USMARC', 'file1.marc', 'file2.marc' );
48              
49             Your C<@files> may also contain filehandles. So if you've got a large
50             file that's gzipped you can open a pipe to F and pass it in:
51              
52             my $fh = IO::File->new( 'gunzip -c marc.dat.gz |' );
53             my $batch = MARC::Batch->new( 'USMARC', $fh );
54              
55             And you can mix and match if you really want to:
56              
57             my $batch = MARC::Batch->new( 'USMARC', $fh, 'file1.marc' );
58              
59             =cut
60              
61             sub new {
62 17     17 1 11699 my $class = shift;
63 17         51 my $type = shift;
64              
65 17 100       106 my $marcclass = ($type =~ /^MARC::File/) ? $type : "MARC::File::$type";
66              
67 17         1034 eval "require $marcclass";
68 17 50       124 croak $@ if $@;
69              
70 17         75 my @files = @_;
71              
72 17         147 my $self = {
73             filestack => \@files,
74             filename => undef,
75             marcclass => $marcclass,
76             file => undef,
77             warnings => [],
78             'warn' => 1,
79             strict => 1,
80             };
81              
82 17         66 bless $self, $class;
83              
84 17         80 return $self;
85             } # new()
86              
87              
88             =head2 next()
89              
90             Read the next record from that batch, and return it as a MARC::Record
91             object. If the current file is at EOF, close it and open the next
92             one. C will return C when there is no more data to be
93             read from any batch files.
94              
95             By default, C also will return C if an error is
96             encountered while reading from the batch. If not checked for this can
97             cause your iteration to terminate prematurely. To alter this behavior,
98             see C. You can retrieve warning messages using the
99             C method.
100              
101             Optionally you can pass in a filter function as a subroutine reference
102             if you are only interested in particular fields from the record. This
103             can boost performance.
104              
105             =cut
106              
107             sub next {
108 303     303 1 164030 my ( $self, $filter ) = @_;
109 303 100 100     1338 if ( $filter and ref($filter) ne 'CODE' ) {
110 1         132 croak( "filter function in next() must be a subroutine reference" );
111             }
112              
113 302 100       1149 if ( $self->{file} ) {
114              
115             # get the next record
116 285         1224 my $rec = $self->{file}->next( $filter );
117              
118             # collect warnings from MARC::File::* object
119             # we use the warnings() method here since MARC::Batch
120             # hides access to MARC::File objects, and we don't
121             # need to preserve the warnings buffer.
122 285         1564 my @warnings = $self->{file}->warnings();
123 285 50       941 if ( @warnings ) {
124 0         0 $self->warnings( @warnings );
125 0 0       0 return if $self->{ strict };
126             }
127              
128 285 100       890 if ($rec) {
129              
130             # collect warnings from the MARC::Record object
131             # IMPORTANT: here we don't use warnings() but dig
132             # into the the object to get at the warnings without
133             # erasing the buffer. This is so a user can call
134             # warnings() on the MARC::Record object and get back
135             # warnings for that specific record.
136 271         490 my @warnings = @{ $rec->{_warnings} };
  271         659  
137              
138 271 100       812 if (@warnings) {
139 5         15 $self->warnings( @warnings );
140 5 100       17 return if $self->{ strict };
141             }
142              
143             # return the MARC::Record object
144 270         1188 return($rec);
145              
146             }
147              
148             }
149              
150             # Get the next file off the stack, if there is one
151 31 100       72 $self->{filename} = shift @{$self->{filestack}} or return;
  31         191  
152              
153             # Instantiate a filename for it
154 24         78 my $marcclass = $self->{marcclass};
155 24 50       172 $self->{file} = $marcclass->in( $self->{filename} ) or return;
156              
157             # call this method again now that we've got a file open
158 24         1448 return( $self->next( $filter ) );
159              
160             }
161              
162             =head2 strict_off()
163              
164             If you would like C to continue after it has encountered what
165             it believes to be bad MARC data then use this method to turn strict B.
166             A call to C always returns true (1).
167              
168             C can be handy when you don't care about the quality of your
169             MARC data, and just want to plow through it. For safety, C
170             strict is B by default.
171              
172             =cut
173              
174             sub strict_off {
175 3     3 1 17 my $self = shift;
176 3         10 $self->{ strict } = 0;
177 3         8 return(1);
178             }
179              
180             =head2 strict_on()
181              
182             The opposite of C, and the default state. You shouldn't
183             have to use this method unless you've previously used C, and
184             want it back on again. When strict is B calls to next() will return
185             undef when an error is encountered while reading MARC data. strict_on()
186             always returns true (1).
187              
188             =cut
189              
190             sub strict_on {
191 1     1 1 6 my $self = shift;
192 1         2 $self->{ strict } = 1;
193 1         3 return(1);
194             }
195              
196             =head2 warnings()
197              
198             Returns a list of warnings that have accumulated while processing a particular
199             batch file. As a side effect the warning buffer will be cleared.
200              
201             my @warnings = $batch->warnings();
202              
203             This method is also used internally to set warnings, so you probably don't
204             want to be passing in anything as this will set warnings on your batch object.
205              
206             C will return the empty list when there are no warnings.
207              
208             =cut
209              
210             sub warnings {
211 9     9 1 46 my ($self,@new) = @_;
212 9 100       23 if ( @new ) {
213 5         10 push( @{ $self->{warnings} }, @new );
  5         13  
214 5 50       20 print STDERR join( "\n", @new ) . "\n" if $self->{'warn'};
215             } else {
216 4         7 my @old = @{ $self->{warnings} };
  4         12  
217 4         9 $self->{warnings} = [];
218 4         20 return(@old);
219             }
220             }
221              
222              
223             =head2 warnings_off()
224              
225             Turns off the default behavior of printing warnings to STDERR. However, even
226             with warnings off the messages can still be retrieved using the warnings()
227             method if you wish to check for them.
228              
229             C always returns true (1).
230              
231             =cut
232              
233             sub warnings_off {
234 4     4 1 1279 my $self = shift;
235 4         13 $self->{ 'warn' } = 0;
236              
237 4         12 return 1;
238             }
239              
240             =head2 warnings_on()
241              
242             Turns on warnings so that diagnostic information is printed to STDERR. This
243             is on by default so you shouldn't have to use it unless you've previously
244             turned off warnings using warnings_off().
245              
246             warnings_on() always returns true (1).
247              
248             =cut
249              
250             sub warnings_on {
251 0     0 1   my $self = shift;
252 0           $self->{ 'warn' } = 1;
253             }
254              
255             =head2 filename()
256              
257             Returns the currently open filename or C if there is not currently a file
258             open on this batch object.
259              
260             =cut
261              
262             sub filename {
263 0     0 1   my $self = shift;
264              
265 0           return $self->{filename};
266             }
267              
268              
269             1;
270              
271             __END__