File Coverage

blib/lib/SeaBASS/File.pm
Criterion Covered Total %
statement 901 1032 87.3
branch 469 586 80.0
condition 110 165 66.6
subroutine 69 75 92.0
pod 48 48 100.0
total 1597 1906 83.7


line stmt bran cond sub pod time code
1             package SeaBASS::File;
2              
3 23     23   1199445 use strict;
  23         34  
  23         593  
4 23     23   81 use warnings;
  23         26  
  23         1304  
5              
6             =head1 NAME
7              
8             SeaBASS::File - Object-oriented interface for reading/writing SeaBASS files
9              
10             =head1 VERSION
11              
12             version 0.170190
13              
14             =cut
15              
16             our $VERSION = '0.170190'; # VERSION
17              
18             =head1 SYNOPSIS
19              
20             To read SeaBASS files:
21              
22             use SeaBASS::File qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END);
23              
24             my $sb_file = SeaBASS::File->new("input.txt");
25              
26             # Calculate the average chlorophyll value using next
27             my $chl_total = 0;
28             my $measurements = 0;
29            
30             while (my $row = $sb_file->next()){
31             if (defined($row->{'chl'})){
32             $chl_total += $row->{'chl'};
33             $measurements++;
34             }
35             }
36             if ($measurements){
37             print $chl_total/$measurements;
38             } else {
39             print "No chl values.";
40             }
41            
42             #alternatively:
43             $sb_file->rewind();
44             while (my %row = $sb_file->next()){
45             if (defined($row{'chl'})){
46             $chl_total += $row{'chl'};
47             $measurements++;
48             }
49             }
50              
51             # Calculate the average chlorophyll value using where
52             my $chl_total2 = 0;
53             my $measurements2 = 0;
54             $sb_file->where(sub {
55             if (defined($_->{'chl'})){
56             $chl_total2 += $_->{'chl'};
57             $measurements2++;
58             }
59             });
60             if ($measurements2){
61             print $chl_total2/$measurements2;
62             } else {
63             print "No chl values.";
64             }
65            
66             Or to modify SeaBASS files:
67              
68             use SeaBASS::File qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END);
69              
70             my $sb_file = SeaBASS::File->new("input.txt");
71              
72             # Add a one degree bias to water temperature
73             while (my $row = $sb_file->next()){
74             $row->{'wt'} += 1;
75             $sb_file->update($row);
76             }
77            
78             $sb_file->write(); # to STDOUT
79              
80             # Remove the one degree bias to water temperature
81             $sb_file->where(sub {
82             $_->{'wt'} -= 1;
83             });
84            
85             $sb_file->write("output_file.txt");
86            
87             Or to start a SeaBASS file from scratch:
88              
89             use SeaBASS::File qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END);
90              
91             my $sb_file = SeaBASS::File->new({strict => 0, add_empty_headers => 1});
92             $sb_file->add_field('lat','degrees');
93             $sb_file->add_field('lon','degrees');
94             $sb_file->append({'lat' => 1, 'lon' => 2});
95             $sb_file->append("3,4"); # or if you're reading from a CSV file
96             $sb_file->write();
97              
98             =head1 DESCRIPTION
99              
100             C provides an easy to use, object-oriented interface for
101             reading, writing, and modifying SeaBASS data files.
102              
103             =head2 What is SeaBASS?
104              
105             L Bio-optical Archive and Storage
106             System housed at Goddard Space Flight Center.
107             L provides the permanent public
108             repository for data collected under the auspices of the NASA Ocean Biology and
109             Biogeochemistry Program. It also houses data collected by participants in the
110             NASA Sensor Intercomparision and Merger for Biological and Oceanic
111             Interdisciplinary Studies (SIMBIOS) Program. SeaBASS includes marine
112             bio-optical, biogeochemical, and (some) atmospheric data.
113              
114             =head2 SeaBASS File Format
115              
116             SeaBASS files are plain ASCII files with a special header and a matrix of
117             values.
118              
119             =head3 Header
120              
121             The SeaBASS header block consists of many lines of header-keyword pairs. Some
122             headers are optional but most, although technically not required for reading,
123             are required to be ingested into the system. More detailed information is
124             available in the SeaBASS L
125             article|http://seabass.gsfc.nasa.gov/wiki/article.cgi?article=metadataheaders>.
126             The only absolutely required header for this module to work is the /fields
127             line. This module turns fields and units lowercase at all times.
128              
129             /begin_header
130             /delimiter=space
131             /missing=-999
132             /fields=date,time,lat,lon,depth,wt,sal
133             /end_header
134            
135             =head3 Body
136              
137             The SeaBASS body is a matrix of data values, organized much like a spreadsheet.
138             Each column is separated by the value presented in the /delimiter header.
139             Likewise, missing values are indicated by the value presented in the /missing
140             header. The /fields header identifies the geophysical parameter presented in
141             each column.
142              
143             /begin_header
144             /delimiter=space
145             /missing=-999
146             /fields=date,time,lat,lon,depth,wt,sal
147             /end_header
148             19920109 16:30:00 31.389 -64.702 3.4 20.7320 -999
149             19920109 16:30:00 31.389 -64.702 19.1 20.7350 -999
150             19920109 16:30:00 31.389 -64.702 38.3 20.7400 -999
151             19920109 16:30:00 31.389 -64.702 59.6 20.7450 -999
152              
153             =head3 Strictly Speaking
154              
155             SeaBASS files are run through a program called
156             L before
157             they are submitted and before they are ingested into a NASA relational database
158             management system. Some of the things it checks for are required
159             L
160             and proper L
161             names|http://seabass.gsfc.nasa.gov/wiki/article.cgi?article=stdfields>. All
162             data must always have an associated depth, time, and location, though these
163             fields may be placed in the header and are not always required in the data.
164             Just because this module writes the files does not mean they will pass FCHECK.
165              
166             Files are case-INsensitive. Headers are not allowed to have any whitespace.
167              
168             =cut
169              
170 23     23   80 use Carp qw(:DEFAULT);
  23         30  
  23         3095  
171 23     23   91 use Fcntl qw(SEEK_SET);
  23         27  
  23         893  
172 23     23   5776 use List::MoreUtils qw(firstidx each_arrayref);
  23         85184  
  23         121  
173 23     23   18832 use Date::Calc qw(Add_Delta_Days);
  23         112397  
  23         1659  
174 23     23   137 use Scalar::Util qw(looks_like_number);
  23         30  
  23         215980  
175              
176             require Exporter;
177             our @ISA = qw(Exporter);
178             our @EXPORT_OK = qw(STRICT_READ STRICT_WRITE STRICT_ALL INSERT_BEGINNING INSERT_END);
179             our @EXPORT = qw();
180              
181             our $AUTOLOAD;
182              
183             =head1 EXPORT
184              
185             This module does not export anything by default.
186              
187             =head2 STRICT_READ
188              
189             C is used with the C option, enabling error messages when
190             reading header lines and inserting header data.
191              
192             =head2 STRICT_WRITE
193              
194             C is used with the C option, enabling error messages when
195             writing the data to a file/stream.
196              
197             =head2 STRICT_ALL
198              
199             C is used with the C option, enabling C and
200             C.
201              
202             =head2 INSERT_BEGINNING
203              
204             C is used with L
205             \@data_row | $data_row | %data_row)"> or L
206             $unit [, $position]])"> to insert a data row or field at the beginning of their
207             respective lists.
208              
209             =head2 INSERT_END
210              
211             C is used with L
212             $data_row | %data_row)"> or L
213             $position]])"> to insert a data row or field at the end of their respective
214             lists.
215              
216             =cut
217              
218 280     280 1 7367 sub STRICT_READ {1}
219 440     440 1 667 sub STRICT_WRITE {2}
220 1     1 1 1757 sub STRICT_ALL {3}
221 22     22 1 46 sub INSERT_BEGINNING {0}
222 104     104 1 178 sub INSERT_END {-1}
223              
224             my %DEFAULT_OPTIONS = (
225             default_headers => {},
226             headers => {},
227             preserve_case => 1,
228             keep_slashes => 0,
229             cache => 1,
230             delete_missing_headers => 0,
231             missing_data_to_undef => 1,
232             preserve_comments => 1,
233             add_empty_headers => 0,
234             strict => STRICT_WRITE,
235             fill_ancillary_data => 0,
236             preserve_header => 0,
237             );
238              
239             #return values for ref()
240             my %OPTION_TYPES = (
241             default_headers => [ 'ARRAY', 'HASH' ],
242             headers => [ 'ARRAY', 'HASH' ],
243             preserve_case => [''],
244             keep_slashes => [''],
245             cache => [''],
246             delete_missing_headers => [''],
247             missing_data_to_undef => [''],
248             preserve_comments => [''],
249             add_empty_headers => [''],
250             strict => [''],
251             fill_ancillary_data => [''],
252             preserve_header => [''],
253             );
254              
255             # All headers required by STRICT_READ and STRICT_WRITE
256             my @REQUIRED_HEADERS = qw(
257             begin_header
258             investigators
259             affiliations
260             contact
261             experiment
262             cruise
263             data_file_name
264             documents
265             calibration_files
266             data_type
267             start_date
268             end_date
269             start_time
270             end_time
271             north_latitude
272             south_latitude
273             east_longitude
274             west_longitude
275             missing
276             delimiter
277             units
278             end_header
279             );
280              
281             # Headers that must be specified, regardless of strictness.
282             my @ABSOLUTELY_REQUIRED_HEADERS = qw(
283             fields
284             );
285              
286             # Valid headers used for STRICT_READ and STRICT_WRITE
287             our @ALL_HEADERS = qw(
288             begin_header
289             investigators
290             affiliations
291             contact
292             experiment
293             cruise
294             station
295             data_file_name
296             documents
297             calibration_files
298             data_type
299             data_status
300             start_date
301             end_date
302             start_time
303             end_time
304             north_latitude
305             south_latitude
306             east_longitude
307             west_longitude
308             cloud_percent
309             measurement_depth
310             secchi_depth
311             water_depth
312             wave_height
313             wind_speed
314             missing
315             below_detection_limit
316             above_detection_limit
317             delimiter
318             fields
319             units
320             end_header
321             );
322              
323             # headers that are allowed but are not to be added during add_empty_headers
324             my @HIDDEN_HEADERS = qw(
325             received
326             optical_depth_warning
327             );
328              
329             # what to set the missing value to if it's not defined
330             our $DEFAULT_MISSING = -999;
331              
332             # overly complex data structure only understandable by idiots
333             # your IQ is the percentage chance that it won't make sense
334             # IE: my IQ = 40, I have a 60% chance of it making sense to me
335             my %ANCILLARY = (
336             'lat' => [ { 'north_latitude' => qr/^(.*?)$/ }, { 'south_latitude' => qr/^(.*?)$/ } ],
337             'lon' => [ { 'east_longitude' => qr/^(.*?)$/ }, { 'west_longitude' => qr/^(.*?)$/ } ],
338             'depth' => [ { 'measurement_depth' => qr/^(.*?)$/ }, ],
339             'date_time' => [ '$date $time', ],
340             'date' => [ [ \&julian_to_greg, '$year$julian' ], [ \&julian_to_greg, '$year$jd' ], '$year$month$day', ],
341             'year' => [ { '$date' => qr/^(\d{4})/ }, { 'start_date' => qr/^(\d{4})/ }, ],
342             'month' => [ { '$date' => qr/^\d{4}(\d{2})/ }, [ \&julian_to_greg, qr/^\d{4}(\d{2})\d{2}$/, '$year$julian' ], [ \&julian_to_greg, qr/^\d{4}(\d{2})\d{2}$/, '$year$jd' ], { 'start_date' => qr/^\d{4}(\d{2})/ }, ],
343             'day' => [ { '$date' => qr/^\d{6}(\d{2})/ }, [ \&julian_to_greg, qr/^\d{4}\d{2}(\d{2})$/, '$year$julian' ], [ \&julian_to_greg, qr/^\d{4}\d{2}(\d{2})$/, '$year$jd' ], { 'start_date' => qr/^\d{6}(\d{2})/ }, ],
344             'time' => [ '$hour:$minute:$second', ],
345             'hour' => [ { '$time' => qr/^(\d+):/ }, { 'start_time' => qr/^(\d+):/ }, ],
346             'minute' => [ { '$time' => qr/:(\d+):/ }, { 'start_time' => qr/:(\d+):/ }, ],
347             'second' => [ { '$time' => qr/:(\d+)(?:[^:\d]|$)/ }, { 'start_time' => qr/:(\d+)(?:[^:\d]|$)/ }, ],
348             'station' => [ { 'station' => qr/^(.*?)$/ }, ],
349             );
350              
351             # what fill_ancillary_data adds to each row
352             #my @FILL_ANCILLARY_DATA = qw(date time date_time lat lon depth);
353             my @FILL_ANCILLARY_DATA = keys(%ANCILLARY);
354              
355             my %FIELD_FORMATTING = (
356             'year' => '%04d',
357             'month' => '%02d',
358             'day' => '%02d',
359             'julian' => '%03d',
360             'hour' => '%02d',
361             'minute' => '%02d',
362             'second' => '%02d',
363             );
364              
365             =head1 CONSTRUCTOR
366              
367             =head2 new([$filename,] [\%options])
368              
369             my $sb_file = SeaBASS::File->new("input_file.txt");
370             my $sb_file = SeaBASS::File->new("input_file.txt", { delete_missing_headers => 1 });
371             my $sb_file = SeaBASS::File->new("output_file.txt", { add_empty_headers => 1 });
372             my $sb_file = SeaBASS::File->new({ add_empty_headers => 1 });
373              
374             Creates a C object. If the file specified exists, the object
375             can be used to read the file. If the file specified does not exist, an empty
376             object is created and will be written to the specified file by default when
377             invoking C.
378              
379             Options should be given in a hash reference to ensure proper argument parsing.
380             If a file is specified, options can be given as a hash list.
381              
382             =over 4
383              
384             =item * default_headers
385              
386             =item * headers
387              
388             These two options accept either an array reference or a hash reference. They
389             are used to set or override header information. First, headers are read from
390             C, then from the data file itself, then are overridden by
391             whatever is in C.
392              
393             Arguments are an array reference of header lines, or a hash reference of
394             header/value pairs.
395              
396             my $sb_file = SeaBASS::File->new({
397             default_headers => [
398             '/cruise=fake_cruise',
399             '/experiment=default_experiment',
400             ],
401             headers => {
402             'experiment' => 'real_experiment',
403             },
404             });
405            
406             B Modifying the delimiter or missing value will likely break the
407             object. Modifying these will change the expected format for all rows. Do so
408             with caution.
409              
410             =item * preserve_case
411              
412             C<1> or C<0>, default C<1>. Setting this to C<0> will change all values in the
413             header to lowercase. Header descriptors (the /header part) are always turned
414             to lowercase, as well as all fields and units.
415              
416             =item * keep_slashes
417              
418             C<1> or C<0>, default C<0>. Forces the object to keep the / in the beginning of
419             headers when accessed. If set to C<1>, when using the L
420             \%new_headers | \@get_headers | @get_headers ])"> function, they will be
421             returned with leading slash.
422              
423             =item * cache
424              
425             C<1> or C<0>, default C<1>. Enables caching data rows as they are read. This
426             speeds up re-reads and allows the data to be modified. This is required for
427             writing files.
428              
429             =item * delete_missing_headers
430              
431             C<1> or C<0>, default C<0>. Any headers that are equal to the /missing header,
432             NA, or are not defined (when using the C options) are
433             deleted. They cannot be retrieved using C and will not be written.
434              
435             =item * missing_data_to_undef
436              
437             C<1> or C<0>, default C<1>. If any values in the data block are equal to the
438             /missing, /above_detection_limit, /below_detection_limit headers, they are set
439             to undef when they are retrieved.
440              
441             =item * preserve_comments
442              
443             C<1> or C<0>, default C<1>. Setting this option to zero will discard any
444             comments found in the header.
445              
446             =item * add_empty_headers
447              
448             C<0>, C<1>, or a string. If set to a string, this will populate any missing
449             headers, including optional ones, and will set their value to the string given.
450             If set to 1, the string 'NA' is used. This option disables C.
451              
452             =item * strict
453              
454             my $sb_file = SeaBASS::File->new("input_file.txt", {strict => STRICT_ALL});
455             my $sb_file = SeaBASS::File->new("input_file.txt", {strict => (STRICT_READ | STRICT_WRITE)});
456             my $sb_file = SeaBASS::File->new("input_file.txt", {strict => 0});
457             my $sb_file = SeaBASS::File->new("input_file.txt", {strict => STRICT_WRITE}); #default
458              
459             =over 4
460              
461             =item * C
462              
463             C will throw errors when reading invalid headers, missing required
464             ones, or an invalid delimiter. This may change in future revisions.
465              
466             =item * C
467              
468             C will throw the same errors when writing the data to a file or
469             stream. C only checks for required headers and invalid headers,
470             but does not check their values to see if they are actually filled. This may
471             change in future revisions.
472              
473             =back
474              
475             =item * fill_ancillary_data
476              
477             C<0> or C<1>, default C<0>. Insert date, time, measurement depth, station, and
478             location values to the data rows from the headers. Values are not overridden
479             if they are already present. This option is only useful when reading files.
480              
481             B It is bad practice to include these fields in the data if they don't
482             change throughout the file. This option is used to remove the burden of
483             checking whether they are in the data or header.
484              
485             B This option will also combine individual date/time
486             parts in the data (year/month/day/etc) to create more uniform date/time fields.
487              
488             B If any part of a date/time is missing, the fields
489             dependent on it will not be added to the row.
490              
491             =item * preserve_header
492              
493             C<0> or C<1>, default C<0>. Preserves header and comment order. This option
494             disables modifying the header, as well, but will not error if you try -- it will
495             simply not be reflected in the output.
496              
497             =back
498              
499             =cut
500              
501             sub new {
502 137     137 1 163101 my ( $class, $file ) = ( shift, shift );
503              
504 137         219 my $self = bless( {}, $class );
505              
506 137         155 my %myoptions;
507              
508 137 100       472 if ( ref($file) eq 'HASH' ) {
    100          
    50          
509 4         32 %myoptions = ( %DEFAULT_OPTIONS, %$file );
510 4         8 $file = '';
511             } elsif ( ref( $_[0] ) eq 'HASH' ) {
512 125         403 %myoptions = ( %DEFAULT_OPTIONS, %{ $_[0] } );
  125         692  
513             } elsif ( !ref( $_[0] ) ) {
514 8 100       23 if ( $#_ % 2 == 1 ) {
515 7         65 %myoptions = ( %DEFAULT_OPTIONS, @_ );
516             } else {
517 1         187 croak('Even sized list expected');
518             }
519             } else {
520 0         0 croak("Arguments not understood.");
521             }
522              
523 136         363 $self->{'options'} = \%myoptions;
524 136         330 $self->check_options();
525              
526 132 50       371 if ( ref($file) eq 'GLOB' ) {
    100          
    100          
527 0         0 $self->{'handle'} = $file;
528             } elsif ( ref($file) eq 'SCALAR' ) {
529 126         1368 open( my $fh, "<", $file );
530 126         250 $self->{'handle'} = $fh;
531             } elsif ($file) {
532 2 100       7 if ( !ref($file) ) {
533 1 50       31 if ( -r $file ) {
    0          
534 1         33 open( my $fh, "<", $file );
535 1         22 $self->{'handle'} = $fh;
536             } elsif ( $self->{'options'}{'strict'} & STRICT_READ ) {
537 0         0 croak("Strict read set, but input file not found or unreadable.");
538             } else {
539 0         0 $self->{'default_write_to'} = $file;
540 0         0 $file = '';
541             }
542             } else {
543 1         94 croak("Invalid parameter, expected file path or file handle.");
544             }
545             } ## end elsif ($file)
546 131 100       284 if ($file) {
547 127 50       527 unless ( $self->read_headers() ) {
548 0 0       0 unless ( $self->{'options'}{'strict'} & STRICT_READ ) {
549 0         0 return;
550             }
551             }
552             } else {
553 4         11 $self->create_blank_file();
554             }
555 127         303 return $self;
556             } ## end sub new
557              
558             =head1 OBJECT METHODS
559              
560             =head2 add_headers(\%headers | \@header_lines | @header_lines)
561              
562             $sb_file->add_headers({'investigators' => 'jason_lefler'});
563             $sb_file->add_headers(['/investigators=jason_lefler']);
564             $sb_file->add_headers('/investigators=jason_lefler');
565            
566             C is used to add or override metadata for a C, as
567             well as add comments.
568              
569             This function can not be used to change fields/units, see
570             L and
571             L for that.
572              
573             B Modifying the delimiter or missing value halfway through
574             reading/writing will likely break the object. Modifying these will change the
575             expected format for any new or non-cached rows. Do so with caution.
576              
577             =cut
578              
579             sub add_headers {
580 394     394 1 334 my $self = shift;
581 394         315 my $success = 1;
582 394         556 my $strict = $self->{'options'}{'strict'} & STRICT_WRITE;
583 394 100       806 if ( ref( $_[0] ) eq 'HASH' ) {
    100          
    50          
584 257         222 while ( my ( $k, $v ) = each( %{ $_[0] } ) ) {
  265         729  
585 8         17 $success &= $self->validate_header( $k, $v, $strict );
586 8         17 $self->{'headers'}{$k} = $v;
587             }
588             } elsif ( ref( $_[0] ) eq 'ARRAY' ) {
589 136         111 foreach ( @{ $_[0] } ) {
  136         251  
590 2563 100 100     5354 if ( $_ =~ /^\s*!/ ) {
    100          
591 11         6 push( @{ $self->{'comments'} }, $_ );
  11         18  
592             } elsif ( $strict && $_ !~ m"^/" ) {
593 2         340 carp("Invalid header line: $_");
594 2         197 $success = 0;
595             } else {
596 2550         4223 my ( $k, $v ) = split( /=/, $_, 2 );
597 2550         3444 $success &= $self->validate_header( $k, $v, $strict );
598 2550         4186 $self->{'headers'}{$k} = $v;
599             }
600             } ## end foreach (@{$_[0]})
601             } elsif ( !ref( $_[0] ) ) {
602 1         4 foreach (@_) {
603 1 50 33     10 if ( $_ =~ /^\s*!/ ) {
    50          
604 0         0 push( @{ $self->{'comments'} }, $_ );
  0         0  
605             } elsif ( $strict && $_ !~ m"^/" ) {
606 0         0 carp("Invalid header line: $_");
607 0         0 $success = 0;
608             } else {
609 1         5 my ( $k, $v ) = split( /=/, $_, 2 );
610 1         4 $success &= $self->validate_header( $k, $v, $strict );
611 1         3 $self->{'headers'}{$k} = $v;
612             }
613             } ## end foreach (@_)
614             } else {
615 0         0 $success = 0;
616             }
617 394         437 return $success;
618             } ## end sub add_headers
619              
620             =head2 headers([ \%new_headers | \@get_headers | @get_headers ])
621              
622             =head2 head
623              
624             =head2 h
625              
626             my %headers = $sb_file->headers(['investigators']);
627             print Dumper(\%headers); # { investigators => 'jason_lefler' }
628            
629             my ($inv) = $sb_file->headers('investigators');
630             print $inv; # jason_lefler
631            
632             $sb_file->headers({investigators => 'jason_lefler'});
633            
634             $sb_file->headers()->{'investigators'} = 'jason_lefler';
635            
636             C is used to read or modify header values. Given an array reference
637             of header names, it will return a hash/hash reference with header/value pairs.
638             Given a plain list of header names, it will return an array/array reference of
639             the given header values. Given a hash reference, this function is a proxy for
640             L.
641              
642             If C is set, then headers will be returned as such, IE: C<<
643             {'/investigators' => 'jason_lefler'} >>.
644              
645             This function can also be used to set header values without going through the
646             normal validation.
647              
648             C and C are aliases to C.
649              
650             =cut
651              
652 0     0 1 0 sub head { shift->headers(@_); }
653 22     22 1 839 sub h { shift->headers(@_); }
654              
655             sub headers {
656 25     25 1 718 my $self = shift;
657 25 100       55 if ( !@_ ) {
    100          
    100          
658 21         104 return $self->{'headers'};
659             } elsif ( ref( $_[0] ) eq 'HASH' ) {
660 1         3 return $self->add_headers(@_);
661             } elsif ( ref( $_[0] ) eq 'ARRAY' ) {
662 1         27 my %ret;
663 1         2 for my $header ( @{ $_[0] } ) {
  1         5  
664 2         5 $ret{$header} = $self->{'headers'}{ lc($header) };
665             }
666 1 50       2 if (wantarray) {
667 1         8 return %ret;
668             } else {
669 0         0 return \%ret;
670             }
671             } else {
672 2         2 my @ret;
673 2         5 foreach (@_) {
674 4 50       7 if ( !ref ) {
675 4         9 my $value = $self->{'headers'}{ lc($_) };
676 4 50       10 push( @ret, defined($value) ? $value : undef );
677             } else {
678 0         0 croak("Argument not understood: $_");
679             }
680             } ## end foreach (@_)
681 2 100       8 if (wantarray) {
    50          
682 1         4 return @ret;
683             } elsif ( $#ret == 0 ) {
684 0         0 return $ret[0];
685             } else {
686 1         3 return \@ret;
687             }
688             } ## end else [ if (!@_) ]
689              
690             } ## end sub headers
691              
692             =head2 data([$index])
693              
694             =head2 d
695              
696             =head2 body
697              
698             =head2 b
699              
700             =head2 all
701              
702             my $row = $sb_file->data(1);
703             my @rows = $sb_file->all();
704              
705             C is responsible for returning either a data line via an index or all of
706             the data lines at once.
707              
708             Data is returned as C<< field => value >> pairs.
709              
710             If given an index: in list context, returns the hash of the row; in scalar
711             context, returns a reference to the row.
712              
713             If not given an index: in list context, returns an array of the rows; in scalar
714             context, returns a reference to an array of the rows.
715              
716             If given an index out of range, returns undef. If given a negative index,
717             Cs the file, then returns undef.
718              
719             If C is enabled and the row has already been read, it is retrieved from
720             the cache. If it has not already be read, all rows leading up to the desired
721             row will be read and cached, and the desired row returned.
722              
723             If C is disabled and either all rows are retrieved or a previously
724             retrieved row is called again, the file will C, then seek to the
725             desired row.
726              
727             C, C, C, and C are all aliases to C. (Yes, that means
728             C can be used with arguments, it would just look silly.)
729              
730             =cut
731              
732 0     0 1 0 sub body { shift->data(@_); }
733 0     0 1 0 sub b { shift->data(@_); }
734 2     2 1 998 sub d { shift->data(@_); }
735 42     42 1 4898 sub all { shift->data(@_); }
736              
737             sub data {
738 130     130 1 4594 my ( $self, $index ) = @_;
739 130 100       215 if ( defined($index) ) {
740 78 100       152 if ( $index < 0 ) {
741 12         17 $self->rewind();
742 12         12 return;
743             }
744 66 100       122 if ( $self->{'options'}{'cache'} ) {
745 60 100       109 if ( $index > $self->{'max_dataidx'} ) {
746 36         32 my $startidx = $self->{'dataidx'};
747 36         80 for ( my $i = 0; $i < ( $index - $startidx ); $i++ ) {
748 70 100       109 if ( !$self->next() ) {
749 4         9 return;
750             }
751             }
752             } ## end if ($index > $self->{'max_dataidx'...})
753              
754 56         50 $self->{'dataidx'} = $index;
755              
756 56 50       79 if (wantarray) {
757 0         0 return %{ $self->{'data'}[$index] };
  0         0  
758             } else {
759 56         239 return $self->{'data'}[$index];
760             }
761             } else {
762 6 100       17 if ( $index <= $self->{'dataidx'} ) {
763 3         7 $self->rewind();
764             }
765 6         8 my $startidx = $self->{'dataidx'};
766 6         16 for ( my $i = 0; $i < ( $index - $startidx - 1 ); $i++ ) {
767 7 50       16 if ( !$self->next() ) {
768 0         0 return;
769             }
770             }
771 6         11 return $self->next();
772             } ## end else [ if ($self->{'options'}...)]
773             } else {
774 52 100       114 if ( $self->{'options'}{'cache'} ) {
775 45         88 while ( $self->next() ) {
776             # noop
777             }
778 45 100       83 if (wantarray) {
779 7         5 return @{ $self->{'data'} };
  7         27  
780             } else {
781 38         131 return $self->{'data'};
782             }
783             } else {
784 7         18 $self->rewind();
785 7         6 my @data_rows;
786 7         15 while ( my $data = $self->next() ) {
787 28         57 push( @data_rows, $data );
788             }
789 7 100       17 if (wantarray) {
790 6         25 return @data_rows;
791             } else {
792 1         4 return \@data_rows;
793             }
794             } ## end else [ if ($self->{'options'}...)]
795             } ## end else [ if (defined($index)) ]
796             } ## end sub data
797              
798             =head2 next()
799              
800             while (my $row = $sb_file->next()){
801             print $row->{'lat'};
802             ...
803             }
804             while (my %row = $sb_file->next()){
805             print $row{'lat'};
806             ...
807             }
808            
809             Returns the next data row in the file, returning C when it runs out of
810             rows.
811              
812             Data is returned as C<< field => value >> pairs.
813              
814             In list context, returns a hash of the row. In scalar context, returns a
815             reference to the hash of a row.
816              
817             After a C, C will return the very first data hash, then each row
818             in turn. If the row has been cached, it's retrieved from the cache instead of
819             rereading from the file.
820              
821             =cut
822              
823             sub next {
824 426     426 1 12639 my $self = shift;
825 426 50       641 if (@_) {
826 0         0 croak("invalid number of arguments on next(), expected 0.");
827             }
828              
829 426 100 100     1588 if ( $self->{'options'}{'cache'} && $self->{'dataidx'} < $self->{'max_dataidx'} ) {
    100          
830 73         68 $self->{'dataidx'}++;
831              
832 73 50       92 if (wantarray) {
833 0         0 return %{ $self->{'data'}[ $self->{'dataidx'} ] };
  0         0  
834             } else {
835 73         221 return $self->{'data'}[ $self->{'dataidx'} ];
836             }
837             } elsif ( $self->{'handle'} ) {
838 341         292 my $handle = $self->{'handle'};
839 341         279 my $line_number = $self->{'line_number'};
840              
841 341         1025 while ( my $line = <$handle> ) {
842 263         195 $line_number++;
843 263         376 strip($line);
844 263 50       386 if ($line) {
845 263         469 my $data_row = $self->make_data_hash($line);
846 263         318 $self->{'line_number'} = $line_number;
847 263 100       417 if ( $self->{'options'}{'cache'} ) {
848 201         152 push( @{ $self->{'data'} }, $data_row );
  201         271  
849             }
850 263         243 $self->{'dataidx'}++;
851 263 100       415 if ( $self->{'dataidx'} > $self->{'max_dataidx'} ) {
852 235         219 $self->{'max_dataidx'} = $self->{'dataidx'};
853             }
854 263 50       304 if (wantarray) {
855 0         0 return %{$data_row};
  0         0  
856             } else {
857 263         779 return $data_row;
858             }
859             } ## end if ($line)
860             } ## end while (my $line = <$handle>)
861             } ## end elsif ($self->{'handle'})
862 90         194 return;
863             } ## end sub next
864              
865             =head2 rewind()
866              
867             C seeks to the start of the data. The next C will return the
868             very first row (or C). If caching is enabled, it will not actually
869             perform a seek, it will merely reset the index interator. If caching is
870             disabled, a seek is performed on the file handle to return to the start of the
871             data.
872              
873             =cut
874              
875             sub rewind {
876 61     61 1 991 my ($self) = @_;
877 61 100       131 if ( $self->{'dataidx'} != -1 ) {
878 31 100       62 if ( !$self->{'options'}{'cache'} ) {
879 9         20 seek( $self->{'handle'}, $self->{'data_start_position'}, SEEK_SET );
880             }
881 31         32 $self->{'line_number'} = $self->{'data_start_line'};
882 31         40 $self->{'dataidx'} = -1;
883             } ## end if ($self->{'dataidx'}...)
884             } ## end sub rewind
885              
886             =head2 update(\%data_row | \@data_row | $data_row | %data_row)
887              
888             while (my %row = $sb_file->next()){
889             if ($row{'depth'} == -999){
890             $row{'depth'} = 0;
891             }
892             $sb_file->update(\%row);
893             }
894            
895             # Less useful for update():
896             print join(',',@{$sb_file->actual_fields()}); #lat,lon,depth,chl
897            
898             while (my %row = $sb_file->next()){
899             if ($row{'depth'} == -999){
900             $row{'depth'} = 0;
901             }
902             $sb_file->update(@row{'lat','lon','depth','chl'});
903             # or
904             $sb_file->update([@row{'lat','lon','depth','chl'}]);
905             }
906              
907             C replaces the last row read (using C) with the input.
908              
909             Caching must be enabled to use C, C, or C.
910              
911             =cut
912              
913             sub update {
914 12     12 1 50 my $self = shift;
915 12 100       32 if ( !$self->{'options'}{'cache'} ) {
    100          
916 1         123 croak("Caching must be enabled to write.");
917             } elsif ( $self->{'dataidx'} == -1 ) {
918 1         223 croak("No rows read yet.");
919             }
920 10         19 my $new_row = $self->ingest_row(@_);
921 10 100       20 unless ( defined($new_row) ) {
922 2         141 croak("Error parsing inputs");
923             }
924 8         14 $self->{'data'}[ $self->{'dataidx'} ] = $new_row;
925             } ## end sub update
926              
927             =head2 set($index, \%data_row | \@data_row | $data_row | %data_row)
928              
929             my %row = (lat => 1, lon => 2, chl => 1);
930             $sb_file->set(0, \%row);
931            
932             print join(',',@{$sb_file->actual_fields()}); #lat,lon,chl
933             $sb_file->set(0, [1, 2, 1]);
934            
935              
936             C replaces the row at the given index with the input. Seeks to the
937             given index if it has not been read to yet. Cs if the file does not go
938             up to the index specified.
939              
940             Caching must be enabled to use C, C, or C.
941              
942             =cut
943              
944             sub set {
945 3     3 1 13 my $self = shift;
946 3         4 my $index = shift;
947 3 50       9 if ( !$self->{'options'}{'cache'} ) {
948 0         0 croak("Caching must be enabled to write");
949             }
950 3 100       6 if ( $index < 0 ) {
951 1         161 croak("Index must be positive integer");
952             }
953 2         6 my $new_row = $self->ingest_row(@_);
954 2 50       5 unless ( defined($new_row) ) {
955 0         0 croak("Error parsing inputs");
956             }
957              
958 2 50       4 if ( $index > $self->{'max_dataidx'} ) {
959 2         2 my $current_idx = $self->{'dataidx'};
960 2         5 $self->data($index);
961 2         2 $self->{'dataidx'} = $current_idx;
962              
963 2 100       5 if ( $index > $self->{'max_dataidx'} ) {
964 1         81 croak("Index out of bounds.");
965             }
966             } ## end if ($index > $self->{'max_dataidx'...})
967              
968 1         3 $self->{'data'}[$index] = $new_row;
969             } ## end sub set
970              
971             =head2 insert($index, \%data_row | \@data_row | $data_row | %data_row)
972              
973             use SeaBASS::File qw(INSERT_BEGINNING INSERT_END);
974             ...
975            
976             my %row = (lat => 1, lon => 2, chl => 1);
977             $sb_file->insert(INSERT_BEGINNING, \%row);
978            
979             print join(',',@{$sb_file->actual_fields()}); #lat,lon,chl
980            
981             $sb_file->insert(1, [1, 2, 1]);
982             $sb_file->insert(INSERT_END, [1, 2, 1]);
983            
984             Inserts the row into the given position. C inserts a new row
985             at the start of the data, C inserts one at the end of the data
986             block.
987              
988             The index must be a positive integer, C, or C.
989              
990             If a row is inserted at the end, the entire data block is read from the file to
991             cache every row, the row is appended to the end, and the current position is
992             reset to the original position, so C will still return the real next
993             row from the data.
994              
995             If a row is inserted before the current position, the current position is
996             shifted accordingly and will still return the C real row.
997              
998             Caching must be enabled to use C, C, or C.
999              
1000             =cut
1001              
1002             sub insert {
1003 16     16 1 25 my $self = shift;
1004 16         12 my $index = shift;
1005 16 50       31 if ( !$self->{'options'}{'cache'} ) {
1006 0         0 croak("Caching must be enabled to write.");
1007             }
1008 16 100       20 if ( $index < INSERT_END ) {
1009 1         158 croak("Index must be positive integer, or INSERT_BEGINNING (beginning), or INSERT_END (end)");
1010             }
1011 15         30 my $new_row = $self->ingest_row(@_);
1012 15 50       27 unless ( defined($new_row) ) {
1013 0         0 croak("Error parsing inputs");
1014             }
1015              
1016 15 100       22 if ( $index == INSERT_END ) {
    100          
1017 10         11 my $current_idx = $self->{'dataidx'};
1018 10         17 $self->data();
1019 10         11 $self->{'dataidx'} = $current_idx;
1020             } elsif ( $index > $self->{'max_dataidx'} ) {
1021 2         2 my $current_idx = $self->{'dataidx'};
1022 2         6 $self->data($index);
1023 2         3 $self->{'dataidx'} = $current_idx;
1024              
1025 2 50       7 if ( $index == $self->{'max_dataidx'} + 1 ) {
    100          
1026 0         0 $index = INSERT_END;
1027             } elsif ( $index > $self->{'max_dataidx'} ) {
1028 1         87 croak("Index out of bounds.");
1029             }
1030             } ## end elsif ($index > $self->{'max_dataidx'...})
1031              
1032 14 100 100     37 if ( $index <= $self->{'dataidx'} && $index != INSERT_END ) {
1033 3         3 $self->{'dataidx'}++;
1034             }
1035              
1036 14         16 $self->{'max_dataidx'}++;
1037              
1038 14 100       19 if ( $index == INSERT_BEGINNING ) {
    100          
1039 2         2 unshift( @{ $self->{'data'} }, $new_row );
  2         6  
1040             } elsif ( $index == INSERT_END ) {
1041 10         9 push( @{ $self->{'data'} }, $new_row );
  10         24  
1042             } else {
1043 2         1 splice( @{ $self->{'data'} }, $index, 0, $new_row );
  2         6  
1044             }
1045             } ## end sub insert
1046              
1047             =head2 prepend(\%data_row | \@data_row | $data_row | %data_row)
1048              
1049             C is short for C.
1050              
1051             =cut
1052              
1053             sub prepend {
1054 1     1 1 2 my $self = shift;
1055 1         2 $self->insert( INSERT_BEGINNING, @_ );
1056             }
1057              
1058             =head2 append(\%data_row | \@data_row | $data_row | %data_row)
1059              
1060             C is short for C.
1061              
1062             =cut
1063              
1064             sub append {
1065 9     9 1 37 my $self = shift;
1066 9         10 $self->insert( INSERT_END, @_ );
1067             }
1068              
1069             =head2 remove([$index])
1070              
1071             If index is specified, it deletes the desired index. If it is omitted, the
1072             last row read is deleted. The current position is modified accordingly.
1073              
1074             =cut
1075              
1076             sub remove {
1077 6     6 1 15 my ( $self, $index ) = @_;
1078              
1079 6 100 66     28 if ( !$self->{'options'}{'cache'} ) {
    50          
1080 1         119 croak("Caching must be enabled to write.");
1081             } elsif ( !defined($index) && $self->{'dataidx'} < 0 ) {
1082 0         0 croak("No rows read yet.");
1083             }
1084              
1085 5 100       16 if ( !defined($index) ) {
1086 2         4 $index = $self->{'dataidx'};
1087             }
1088              
1089 5 50       14 if ( $index < 0 ) {
    100          
1090 0         0 croak("Index must be positive integer");
1091             } elsif ( $index > $self->{'max_dataidx'} ) {
1092 2         3 my $current_idx = $self->{'dataidx'};
1093 2         3 $self->data($index);
1094 2         3 $self->{'dataidx'} = $current_idx;
1095              
1096 2 100       4 if ( $index > $self->{'max_dataidx'} ) {
1097 1         155 croak("Index out of bounds.");
1098             }
1099             } ## end elsif ($index > $self->{'max_dataidx'...})
1100              
1101 4 100       10 if ( $index <= $self->{'dataidx'} ) {
1102 3         3 $self->{'dataidx'}--;
1103             }
1104 4         4 $self->{'max_dataidx'}--;
1105              
1106 4         3 splice( @{ $self->{'data'} }, $index, 1 );
  4         16  
1107             } ## end sub remove
1108              
1109             =head2 where(\&function)
1110              
1111             # Find all rows with depth greater than 10 meters
1112             my @ret = $sb_file->where(sub {
1113             if ($_->{'depth'} > 10){
1114             return $_;
1115             } else {
1116             return undef;
1117             }
1118             });
1119              
1120             # Delete all measurements with depth less than 10 meters
1121             $sb_file->where(sub {
1122             if ($_->{'depth'} < 10){
1123             $_ = undef;
1124             }
1125             });
1126            
1127             # Calculate the average chlorophyll value
1128             my $chl_total = 0;
1129             my $measurements = 0;
1130             $sb_file->where(sub {
1131             if (defined($_->{'chl'})){
1132             $chl_total += $_->{'chl'};
1133             $measurements++;
1134             }
1135             });
1136             if ($measurements){
1137             print $chl_total/$measurements;
1138             } else {
1139             print "No chl values.";
1140             }
1141              
1142              
1143             Traverses through each data line, running the given function on each row.
1144             C<$_> is set to the current row. If C<$_> is set to undefined, C is
1145             called. Any changes in C<$_> will be reflected in the data.
1146              
1147             Any defined value returned is added to the return array. If nothing is
1148             returned, a 0 is added.
1149              
1150             =cut
1151              
1152             sub where {
1153 7     7 1 31 my ( $self, $function ) = ( shift, shift );
1154 7 100       67 if ( ref($function) ne 'CODE' ) {
1155 1         152 croak("Invalid arguments.");
1156             }
1157 6         8 my $currentidx = $self->{'dataidx'};
1158 6         12 $self->rewind();
1159              
1160 6         5 my @new_rows;
1161              
1162 6         12 while ( my $row = $self->next() ) {
1163 19         23 local *_ = \$row;
1164 19         30 my $ret = $function->();
1165 19 100 100     125 if ( defined($ret) && defined(wantarray) ) {
1166 12         9 push( @new_rows, $ret );
1167             }
1168 19 100       52 if ( !defined($row) ) {
1169 2 100       5 if ( $self->{'dataidx'} <= $currentidx ) {
1170 1         1 $currentidx--;
1171             }
1172 2         5 $self->remove();
1173             } ## end if (!defined($row))
1174             } ## end while (my $row = $self->next...)
1175              
1176 5         10 $self->data($currentidx);
1177              
1178 5         12 return @new_rows;
1179             } ## end sub where
1180              
1181             =head2 get_all($field_name [, ... ] [, \%options])
1182              
1183             Returns an array/arrayref of all the values matching each given field name.
1184             This function errors out if no field names are passed in or a non-existent
1185             field is requested.
1186              
1187             Available options are:
1188              
1189             =over 4
1190              
1191             =item * delete_missing
1192              
1193             If any of the fields are missing, the row will not be added to any of the
1194             return arrays. (Useful for plotting or statistics that don't work well with
1195             bad values.)
1196              
1197             =back
1198              
1199             =cut
1200              
1201             sub get_all {
1202 13     13 1 3630 my $self = shift;
1203 13         25 my %options = ( 'delete_missing' => 0 );
1204 13 100       35 if ( ref( $_[$#_] ) eq 'HASH' ) {
1205 5         5 %options = %{ pop(@_) };
  5         13  
1206             }
1207 13 50       24 if ( !@_ ) {
1208 0         0 croak("get_all must be called with at least one field name");
1209             }
1210              
1211 13 100       41 my $missing = ( $self->{'options'}{'missing_data_to_undef'} ? undef : $self->{'missing'} );
1212              
1213 13         11 my $currentidx = $self->{'dataidx'};
1214 13         26 $self->rewind();
1215              
1216 13         16 my @fields = map {lc} @_; # turn all inputs lowercase
  22         42  
1217              
1218 13         20 foreach my $field (@fields) {
1219 21 100   130   44 if ( ( firstidx { $_ eq $field } @{ $self->{'actual_fields'} } ) < 0 ) {
  130         115  
  21         47  
1220 4 100 66 4   19 if ( !$self->{'options'}{'fill_ancillary_data'} || ( firstidx { $_ eq $field } keys( %{ $self->{'ancillary'} } ) ) < 0 ) {
  4         12  
  1         7  
1221 3         388 croak("Field $field does not exist");
1222             }
1223             }
1224             } ## end foreach my $field (@fields)
1225              
1226 10         14 my @ret = map { [] } @fields; # make return array of arrays
  17         24  
1227              
1228 10         23 while ( my $row = $self->next() ) {
1229 40 100       55 if ( $options{'delete_missing'} ) {
1230 20         11 my $has_all = 1;
1231 20         21 foreach my $field (@fields) {
1232 36 100 100     119 unless ( defined( $row->{$field} ) && ( !defined($missing) || $row->{$field} != $missing ) ) {
      66        
1233 4         4 $has_all = 0;
1234 4         6 last;
1235             }
1236             } ## end foreach my $field (@fields)
1237 20 100       29 unless ($has_all) {
1238 4         6 next;
1239             }
1240             } ## end if ($options{'delete_missing'...})
1241              
1242 36         45 for ( my $i = 0; $i <= $#fields; $i++ ) {
1243 60         38 push( @{ $ret[$i] }, $row->{ $fields[$i] } );
  60         159  
1244             }
1245             } ## end while (my $row = $self->next...)
1246              
1247 10         19 $self->data($currentidx);
1248              
1249 10 100       18 if ( $#_ == 0 ) {
    100          
1250 3 100       5 if (wantarray) {
1251 1         2 return @{ $ret[0] };
  1         4  
1252             } else {
1253 2         7 return $ret[0];
1254             }
1255             } elsif (wantarray) {
1256 4         14 return @ret;
1257             } else {
1258 3         9 return \@ret;
1259             }
1260             } ## end sub get_all
1261              
1262             =head2 remove_field($field_name [, ... ])
1263              
1264             Removes a field from the file. C is called to remove the field
1265             from cached rows. Any new rows grabbed will have the removed fields omitted,
1266             as well. A warning is issued if the field does not exist.
1267              
1268             =cut
1269              
1270             sub remove_field {
1271 8     8 1 23 my $self = shift;
1272 8 100       19 if ( !@_ ) {
1273 1         166 croak("Field(s) must be specified.");
1274             }
1275 7         13 foreach my $field_orig (@_) {
1276 9         9 my $field = lc($field_orig);
1277              
1278 9     30   26 my $field_idx = firstidx { $_ eq $field } @{ $self->{'actual_fields'} };
  30         47  
  9         24  
1279              
1280 9 100       24 if ( $field_idx < 0 ) {
1281 1         93 carp("Field $field does not exist.");
1282             } else {
1283 8         9 splice( @{ $self->{'actual_fields'} }, $field_idx, 1 );
  8         13  
1284 8         6 splice( @{ $self->{'actual_units'} }, $field_idx, 1 );
  8         15  
1285             }
1286             } ## end foreach my $field_orig (@_)
1287 7         123 $self->update_fields();
1288             } ## end sub remove_field
1289              
1290             =head2 add_field($field_name [, $unit [, $position]])
1291              
1292             Adds a field to the file. C is called to populate all cached
1293             rows. Any rows retrieved will have the new field set to undefined or /missing,
1294             depending on if the option C is set.
1295              
1296             If the unit is not specified, it is set to unitless.
1297              
1298             If the position is not specified, the field is added to the end.
1299              
1300             =cut
1301              
1302             sub add_field {
1303 25     25 1 86 my ( $self, $field, $unit, $position ) = @_;
1304 25 50       65 if ( !$self->{'options'}{'cache'} ) {
    100          
1305 0         0 croak("Caching must be enabled to write.");
1306             } elsif ( !$field ) {
1307 1         79 croak("Field must be specified.");
1308             }
1309 24         26 $field = lc($field);
1310              
1311 24     121   58 my $field_idx = firstidx { $_ eq $field } @{ $self->{'actual_fields'} };
  121         79  
  24         55  
1312 24 100       65 if ( $field_idx >= 0 ) {
1313 3         433 croak("Field already exists.");
1314             }
1315 21 100       33 if ( !defined($position) ) {
1316 15         25 $position = INSERT_END;
1317             }
1318 21   100     39 $unit ||= 'unitless';
1319 21         26 $unit = lc($unit);
1320              
1321 21 100       24 if ( $position == INSERT_END ) {
    100          
1322 17         16 push( @{ $self->{'actual_fields'} }, $field );
  17         27  
1323 17         13 push( @{ $self->{'actual_units'} }, $unit );
  17         23  
1324             } elsif ( $position == INSERT_BEGINNING ) {
1325 2         1 unshift( @{ $self->{'actual_fields'} }, $field );
  2         4  
1326 2         3 unshift( @{ $self->{'actual_units'} }, $unit );
  2         3  
1327             } else {
1328 2         2 splice( @{ $self->{'actual_fields'} }, $position, 0, $field );
  2         5  
1329 2         2 splice( @{ $self->{'actual_units'} }, $position, 0, $unit );
  2         3  
1330             }
1331 21         35 $self->update_fields();
1332             } ## end sub add_field
1333              
1334             =head2 find_fields($string | qr/match/ [, ... ])
1335              
1336             Finds fields matching the string or regex given. If given a string, it must
1337             match a field exactly and entirely to be found. To find a substring, use
1338             C. Fields are returned in the order that they will be output. This
1339             function takes into account fields that are added or removed. All fields are
1340             always lowercase, so all matches are case insensitive.
1341              
1342             Given one argument, returns an array of the fields found. An empty array is
1343             returned if no fields match.
1344              
1345             Given multiple arguments, returns an array/arrayref of arrays of fields found.
1346             IE: C would return something like
1347             C<[['lw510','lw550'],['es510','es550']]>. If no field is matched, the inner
1348             array will be empty. IE: C<[[],[]]>.
1349              
1350             =cut
1351              
1352             sub find_fields {
1353 10     10 1 3411 my $self = shift;
1354 10 50       25 if ( $#_ < 0 ) {
1355 0         0 croak("Input must be a string or regex object.");
1356             }
1357              
1358 10         10 my @ret;
1359              
1360 10         16 foreach my $find (@_) {
1361 13         7 my ( $regex, @matching );
1362 13 50       21 if ( defined($find) ) {
1363 13 100       28 if ( !ref($find) ) {
    50          
1364 7         70 $regex = lc(qr/^$find$/i);
1365             } elsif ( ref($find) eq 'Regexp' ) {
1366 6         20 $regex = lc(qr/$find/i);
1367             } else {
1368 0         0 croak("Input must be a string or regex object.");
1369             }
1370              
1371 13         15 foreach my $field ( @{ $self->{'actual_fields'} } ) {
  13         21  
1372 92 100       215 if ( $field =~ $regex ) {
1373 23         28 push( @matching, $field );
1374             }
1375             }
1376             }
1377 13         22 push( @ret, \@matching );
1378             } ## end foreach my $find (@_)
1379              
1380 10 100       17 if ( $#_ == 0 ) {
1381 7         7 return @{ $ret[0] };
  7         22  
1382             } else {
1383 3 100       6 if (wantarray) {
1384 2         6 return @ret;
1385             } else {
1386 1         3 return \@ret;
1387             }
1388             } ## end else [ if ($#_ == 0) ]
1389             } ## end sub find_fields
1390              
1391             =head2 add_comment(@comments)
1392              
1393             Adds comments to the output file, which are printed, in bulk, after C.
1394             Comments are trimmed before entry and !s are added, if required.
1395              
1396             =cut
1397              
1398             sub add_comment {
1399 2     2 1 3 my $self = shift;
1400 2         5 push(@{$self->{'comments'}}, map {
1401 2         23 my $c = $_;
  3         4  
1402 3         11 $c =~ s/^\s+|\s+$//g;
1403 3 100       7 if ($c =~ /^!/){
1404 1         3 $c
1405             } else {
1406 2         5 "! $c"
1407             }
1408             } @_);
1409             }
1410              
1411             =head2 get_comments([@indices])
1412              
1413             Returns a list of the comments at the given indices. If no indices are passed
1414             in, return them all.
1415              
1416             =cut
1417              
1418             sub get_comments {
1419 6     6 1 10 my $self = shift;
1420 6         6 my @ret;
1421 6 100       10 if (@_){
1422 2         4 @ret = map {$self->{'comments'}[$_]} @_;
  3         6  
1423             } else {
1424 4         4 @ret = @{$self->{'comments'}};
  4         9  
1425             }
1426 6 50       9 if (wantarray){
1427 0         0 return @ret;
1428             } else {
1429 6         26 return \@ret;
1430             }
1431             }
1432              
1433             =head2 set_comments(@comments)
1434              
1435             Overwrites all of the comments in the file. For now, this is the proper way
1436             to remove comments. Comments are trimmed before entry and !s are added, if
1437             required.
1438              
1439             =cut
1440              
1441             sub set_comments {
1442 1     1 1 1 my $self = shift;
1443             $self->{'comments'} = [map {
1444 1         3 my $c = $_;
  2         2  
1445 2         5 $c =~ s/^\s+|\s+$//g;
1446 2 100       6 if ($c =~ /^!/){
1447 1         3 $c
1448             } else {
1449 1         3 "! $c"
1450             }
1451             } @_];
1452             }
1453              
1454             =head2 write([$filename | $file_handle | \*GLOB])
1455              
1456             Outputs the current header and data to the given handle or glob. If no
1457             arguments are given, and a non-existent filename was given to C, the
1458             contents are output into that. If an output file was not given, C
1459             outputs to STDOUT.
1460              
1461             If C is enabled, the headers are checked for invalid headers and
1462             missing required headers and errors/warnings can be thrown accordingly.
1463              
1464             The headers are output in a somewhat-arbitrary but consistent order. If
1465             C is enabled, placeholders are added for every header that
1466             does not exist. A comment section is also added if one is not present.
1467              
1468             =cut
1469              
1470             sub write {
1471 15     15 1 9032 my ( $self, $write_to_h ) = @_;
1472              
1473 15         32 my $strict_write = $self->{'options'}{'strict'} & STRICT_WRITE;
1474 15 100       34 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
1475 15         14 my $error = 0;
1476              
1477 15 100       40 if ($strict_write) {
1478 3         3 foreach my $header ( keys( %{ $self->{'headers'} } ) ) {
  3         20  
1479 65         48 ( my $header_no_slash = $header ) =~ s"^/"";
1480 65 50 33 1104   140 if ( ( firstidx { $_ eq $header_no_slash } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $header_no_slash } @HIDDEN_HEADERS ) < 0 ) {
  1104         726  
  0         0  
1481 0         0 carp("Invalid header: $header");
1482 0         0 $error = 1;
1483             }
1484             } ## end foreach my $header (keys(%{...}))
1485              
1486 3         8 foreach my $header (@REQUIRED_HEADERS) {
1487 66 100       126 if ( !exists( $self->{'headers'}{$header} ) ) {
1488 20         1285 carp("Missing required header: $header");
1489 20         696 $error = 1;
1490             }
1491             } ## end foreach my $header (@REQUIRED_HEADERS)
1492             } ## end if ($strict_write)
1493              
1494 15 100       26 if ( !$error ) {
1495 14         33 my $close_write_to = 0;
1496 14         27 my $old_fh = select();
1497              
1498 14 50 33     53 if ( !$write_to_h && exists( $self->{'default_write_to'} ) ) {
1499 0   0     0 $write_to_h ||= $self->{'default_write_to'};
1500             }
1501              
1502 14 50       26 if ( defined($write_to_h) ) {
1503 0 0       0 if ( ref($write_to_h) eq 'GLOB' ) {
    0          
1504 0         0 select($write_to_h);
1505             } elsif ( !ref($write_to_h) ) {
1506 0         0 my $write_to = $write_to_h;
1507 0         0 $write_to_h = undef;
1508 0 0       0 open( $write_to_h, ">", $write_to ) || croak("Invalid argument for write().");
1509 0         0 $close_write_to = 1;
1510 0         0 select($write_to_h);
1511             } else {
1512 0         0 croak("Invalid argument for write().");
1513             }
1514             } ## end if (defined($write_to_h...))
1515              
1516 14   50     63 $self->{'headers'}{"${slash}delimiter"} ||= 'comma';
1517 14         27 my $actual_delim = lc( $self->{'headers'}{"${slash}delimiter"} );
1518 14 100       47 if ( $actual_delim eq 'comma' ) {
    50          
    0          
    0          
1519 2         3 $actual_delim = ',';
1520             } elsif ( $actual_delim eq 'space' ) {
1521 12         16 $actual_delim = ' ';
1522             } elsif ( $actual_delim eq 'tab' ) {
1523 0         0 $actual_delim = "\t";
1524             } elsif ( $actual_delim eq 'semicolon' ) {
1525 0         0 $actual_delim = ';';
1526             } else {
1527 0         0 $actual_delim = ',';
1528 0         0 $self->{'headers'}{"${slash}delimiter"} = 'comma';
1529             }
1530              
1531 14 50       31 my $missing = ( exists( $self->{'missing'} ) ? $self->{'missing'} : $DEFAULT_MISSING );
1532 14 100       41 my $bdl = ( exists( $self->{'below_detection_limit'} ) ? $self->{'below_detection_limit'} : $DEFAULT_MISSING );
1533 14 100       29 my $adl = ( exists( $self->{'above_detection_limit'} ) ? $self->{'above_detection_limit'} : $DEFAULT_MISSING );
1534              
1535 14 100       23 if ( $self->{'options'}{'preserve_header'} ) {
1536 1         2 print join("\n", @{ $self->{'preserved_header'} }, '');
  1         51  
1537             } else {
1538 13 100       34 if ( !exists( $self->{'headers'}{"${slash}begin_header"} ) ) {
1539 3         88 print "/begin_header\n";
1540             }
1541              
1542 13         17 my $add_missing_headers = $self->{'options'}{'add_empty_headers'};
1543 13 100 66     31 if ( $add_missing_headers && $add_missing_headers eq '1' ) {
1544 1         1 $add_missing_headers = 'NA';
1545             }
1546 13 100       26 if ( !$self->{'options'}{'preserve_case'} ) {
1547 1   50     2 $add_missing_headers = lc( $add_missing_headers || '' );
1548             }
1549            
1550 13         15 my @headers_to_print;
1551            
1552 13         63 @headers_to_print = @ALL_HEADERS;
1553 13 100 66     41 if ($missing eq $adl || $self->{'options'}{'missing_data_to_undef'}){
1554 12         118 @headers_to_print = grep(!/above_detection_limit/i, @headers_to_print);
1555             }
1556 13 100 66     45 if ($missing eq $bdl || $self->{'options'}{'missing_data_to_undef'}){
1557 12         87 @headers_to_print = grep(!/below_detection_limit/i, @headers_to_print);
1558             }
1559            
1560 13 100       19 unless (grep($_ ne 'unitless', @{ $self->{'actual_units'} })){
  13         43  
1561 6         62 @headers_to_print = grep(!/units/i, @headers_to_print);
1562             }
1563              
1564 13         23 foreach my $header (@headers_to_print) {
1565 399 100       991 if ( $header eq 'missing' ) {
    100          
    100          
    100          
    50          
1566 13         15 while ( my ( $h, $k ) = each( %{ $self->{'headers'} } ) ) {
  165         319  
1567 152         144 ( my $header_no_slash = $h ) =~ s"^/"";
1568 152 50   3097   344 if ( ( firstidx { $_ eq $header_no_slash } @ALL_HEADERS ) < 0 ) {
  3097         1880  
1569 0         0 print "/$h=$k\n";
1570             }
1571             } ## end while (my ($h, $k) = each...)
1572              
1573 13         13 foreach my $comment ( @{ $self->{'comments'} } ) {
  13         26  
1574 18         103 print "$comment\n";
1575             }
1576 13 100 100     16 if ( !@{ $self->{'comments'} } && $add_missing_headers ) {
  13         61  
1577 1         13 print "! Comments: \n!\n";
1578             }
1579 13 100       31 if ( !exists( $self->{'headers'}{"$slash$header"} ) ) {
1580 3         40 print "/missing=$missing\n";
1581             } else {
1582 10         100 print '/', $header, '=', $self->{'headers'}{"$slash$header"}, "\n";
1583             }
1584             } elsif ( $header eq 'fields' ) {
1585 13         22 print "/$header=", join( ',', @{ $self->{'actual_fields'} } ), "\n";
  13         104  
1586             } elsif ( $header eq 'units' ) {
1587 7         10 print "/$header=", join( ',', @{ $self->{'actual_units'} } ), "\n";
  7         67  
1588             } elsif ( exists( $self->{'headers'}{"$slash$header"} ) ) {
1589 110 100       269 if ( $header =~ /_header/ ) {
    100          
    50          
1590 20         432 print "/$header\n";
1591             } elsif (length($self->{'headers'}{"$slash$header"})) {
1592 65         65 my $v = $self->{'headers'}{"$slash$header"};
1593 65 100       124 if ( $header =~ /_latitude|_longitude/ ) {
    100          
1594 8         51 print "/$header=$v\[deg]\n";
1595             } elsif ( $header =~ /_time/ ) {
1596 4         25 print "/$header=$v\[gmt]\n";
1597             } else {
1598 53         366 print "/$header=$v\n";
1599             }
1600             # print '/', $header, '=', $self->{'headers'}{"$slash$header"}, "\n";
1601             } elsif ($add_missing_headers) {
1602 25 100       41 if ( $header =~ /_latitude|_longitude/ ) {
    100          
1603 4         26 print "/$header=$add_missing_headers\[deg]\n";
1604             } elsif ( $header =~ /_time/ ) {
1605 2         13 print "/$header=$add_missing_headers\[gmt]\n";
1606             } else {
1607 19         111 print "/$header=$add_missing_headers\n";
1608             }
1609             }
1610             } elsif ($add_missing_headers) {
1611 0 0       0 if ( $header =~ /_latitude|_longitude/ ) {
    0          
1612 0         0 print "/$header=$add_missing_headers\[deg]\n";
1613             } elsif ( $header =~ /_time/ ) {
1614 0         0 print "/$header=$add_missing_headers\[gmt]\n";
1615             } else {
1616 0         0 print "/$header=$add_missing_headers\n";
1617             }
1618             } ## end elsif ($add_missing_headers)
1619             } ## end foreach my $header (@ALL_HEADERS)
1620              
1621 13 100       47 if ( !exists( $self->{'headers'}{"${slash}end_header"} ) ) {
1622 3         22 print "/end_header\n";
1623             }
1624             } ## end else [ if ($self->{'options'}...)]
1625              
1626 14         37 $self->rewind();
1627              
1628 14         28 while ( my $row = $self->next() ) {
1629 24         14 my @values;
1630 24         19 foreach my $field ( @{ $self->{'actual_fields'} } ) {
  24         33  
1631 118 100       181 push( @values, ( defined( $row->{$field} ) ? $row->{$field} : $missing ) );
1632             }
1633 24         291 print join( $actual_delim, @values ), "\n";
1634             } ## end while (my $row = $self->next...)
1635              
1636 14         41 select($old_fh);
1637 14 50       37 if ($close_write_to) {
1638 0         0 close($write_to_h);
1639             }
1640             } else {
1641 1         58 croak("Error(s) writing file");
1642             }
1643 14         45 return;
1644             } ## end sub write
1645              
1646             =head2 close()
1647              
1648             If a file handle is opened for reading, this function closes it. This is
1649             automatically called when the object is destroyed. This is useful to replace
1650             the file being read with the current changes.
1651              
1652             =cut
1653              
1654             sub close {
1655 137     137 1 153 my ($self) = @_;
1656 137 100       263 if ( $self->{'handle'} ) {
1657 127         342 my $ret = close( $self->{'handle'} );
1658 127         286 delete( $self->{'handle'} );
1659 127         1610 return $ret;
1660             } else {
1661 10         58 return;
1662             }
1663             } ## end sub close
1664              
1665             =head2 make_data_hash($line [,\@field_list])
1666              
1667             my %row = $sb_file->make_data_hash("1.5,2,2.5");
1668             my %row = $sb_file->make_data_hash("1.5,2,2.5", [qw(lat lon sal)]);
1669             my %row = $sb_file->make_data_hash("1.5,2,2.5", [$sb_file->fields()]);
1670             my %row = $sb_file->make_data_hash("1.5,2,2.5", [$sb_file->actual_fields()]);
1671              
1672             For mostly internal use. This function parses a data line. It first splits
1673             the data via the delimiter, assigns a field to each value, and returns a hash
1674             or hash reference.
1675              
1676             If C<@field_list> is not set, C<< $sb_file->fields() >> is used.
1677              
1678             If a delimiter is not set (a blank file was created, a file without a
1679             /delimiter header is read, etc), the delimiter is guessed and set using
1680             L.
1681              
1682             Cs if the delimiter could not be guessed or the number of fields the
1683             line is split into does not match up with the field list.
1684              
1685             =cut
1686              
1687             sub make_data_hash {
1688 263     263 1 278 my ( $self, $line, $field_list ) = @_;
1689 263 50 66     509 if ( !$self->{'delim'} && !$self->guess_delim($line) ) {
1690 0         0 croak("Need a delimiter");
1691             }
1692 263         1532 my @values = split( $self->{'delim'}, $line );
1693 263   33     781 $field_list ||= $self->{'fields'};
1694              
1695 263         202 my ( $num_expected, $num_got ) = ( scalar( @{ $self->{'fields'} } ), scalar(@values) );
  263         382  
1696 263 50       406 if ( $num_expected != $num_got ) {
1697 0         0 croak("Incorrect number of fields or elements: got $num_got, expected $num_expected");
1698             }
1699              
1700 263         212 my %ret;
1701              
1702 263         1202 my $iterator = each_arrayref( $field_list, \@values );
1703 263         845 while ( my ( $k, $v ) = $iterator->() ) {
1704 1848 100       1914 if ( $self->{'options'}{'missing_data_to_undef'} ) {
1705 937 100 100     8689 if ( $self->{'missing_is_number'} && looks_like_number($v) && $v == $self->{'missing'} ) {
    100 100        
    100 100        
    100 100        
      100        
      100        
      66        
      66        
1706 67         168 $ret{$k} = undef;
1707             } elsif ( $self->{'adl_is_number'} && looks_like_number($v) && $v == $self->{'above_detection_limit'} ){
1708 1         3 $ret{$k} = undef;
1709             } elsif ( $self->{'bdl_is_number'} && looks_like_number($v) && $v == $self->{'below_detection_limit'} ){
1710 1         4 $ret{$k} = undef;
1711             } elsif ($v eq $self->{'missing'} || $v eq $self->{'below_detection_limit'} || $v eq $self->{'above_detection_limit'}) {
1712 8         21 $ret{$k} = undef;
1713             } else {
1714 860         2595 $ret{$k} = $v;
1715             }
1716             } else {
1717 911         2232 $ret{$k} = $v;
1718             }
1719             } ## end while (my ($k, $v) = $iterator...)
1720 263         481 $self->add_and_remove_fields( \%ret );
1721 263 50       325 if (wantarray) {
1722 0         0 return %ret;
1723             } else {
1724 263         1022 return \%ret;
1725             }
1726             } ## end sub make_data_hash
1727              
1728             =head2 AUTOLOAD
1729              
1730             print $sb_file->missing();
1731             print $sb_file->dataidx();
1732             print $sb_file->actual_fields();
1733             ...
1734              
1735             Returns a few internal variables. The accessor is read only, but some
1736             variables can be returned as a reference, and can be modified afterwards.
1737             Though, do it knowing this is a terrible idea.
1738              
1739             If the variable retrieved is an array or hash reference and this is called in a
1740             list context, the variable is dereferenced first.
1741              
1742             Here are a few "useful" variables:
1743              
1744             =over 4
1745              
1746             =item * dataidx
1747              
1748             The current row index.
1749              
1750             =item * max_dataidx
1751              
1752             The highest row index read so far.
1753              
1754             =item * fields
1755              
1756             An array of the original fields.
1757              
1758             =item * actual_fields
1759              
1760             An array of the current fields, as modified by C or C.
1761              
1762             =item * delim
1763              
1764             The regex used to split data lines.
1765              
1766             =item * missing
1767              
1768             The null/fill/missing value of the SeaBASS file.
1769              
1770             =item * delim
1771              
1772             The current line delimiter regex.
1773              
1774             =back
1775              
1776             =cut
1777              
1778             sub AUTOLOAD {
1779 109     109   137 my $self = shift;
1780 109 50       156 if ( !ref($self) ) {
1781 0         0 croak("$self is not an object");
1782             }
1783              
1784 109         92 my $name = $AUTOLOAD;
1785 109 50       181 if ($name) {
1786 109         249 $name =~ s/.*://;
1787 109         118 my $value = $self->{$name};
1788 109 100       173 if ( !defined($value) ) {
1789 1         5 return;
1790             }
1791 108 100 66     353 if ( ref($value) eq 'ARRAY' && wantarray ) {
    50 33        
1792 104         75 return @{$value};
  104         281  
1793             } elsif ( ref($value) eq 'HASH' && wantarray ) {
1794 0         0 return %{$value};
  0         0  
1795             }
1796 4         17 return $value;
1797             } ## end if ($name)
1798             } ## end sub AUTOLOAD
1799              
1800             sub DESTROY {
1801 137     137   50238 my $self = shift;
1802 137         294 $self->close();
1803             }
1804              
1805             =head1 INTERNAL METHODS
1806              
1807             =head2 check_options()
1808              
1809             For internal use only. This function is in charge of checking the options to
1810             make sure they are of the right type (array/hash reference where appropriate).
1811              
1812             If add_empty_headers is set, this function turns off C.
1813              
1814             Called by the object, accepts no arguments.
1815              
1816             =cut
1817              
1818             #<<< perltidy destroys this function
1819             sub check_options {
1820 136     136 1 140 my $self = shift;
1821 136         135 while (my ($k, $v) = each(%{$self->{'options'}})) {
  1744         3175  
1822 1612 100       2984 if (!exists($DEFAULT_OPTIONS{$k})) {
    100          
1823 2         193 croak("Option not understood: $k");
1824 1871     1871   3927 } elsif ((firstidx { $_ eq ref($v) } @{$OPTION_TYPES{$k}}) < 0) {
  1610         2523  
1825 2         4 my $expected_ref = join('/', @{$OPTION_TYPES{$k}});
  2         5  
1826 2 100       197 croak("Option $k not of the right type, expected: " . ($expected_ref ? "$expected_ref reference" : 'scalar'));
1827             } ## end elsif ((firstidx { $_ eq ...}))
1828             } ## end while (my ($k, $v) = each...)
1829 132 100       351 if ($self->{'options'}{'add_empty_headers'}) {
1830 1         3 $self->{'options'}{'strict'} &= STRICT_READ;
1831             }
1832             } ## end sub check_options
1833             #>>>
1834              
1835             =head2 create_blank_file()
1836              
1837             For internal use only. C populates the object with proper
1838             internal variables, as well as adding blank headers if C is
1839             set.
1840              
1841             By default, the missing value is set to C<$DEFAULT_MISSING> (C<-999>).
1842              
1843             This function turns on the C option, as C must be enabled to
1844             write.
1845              
1846             The delimiter is left undefined and will be guessed upon reading the first data
1847             line using the L function.
1848              
1849             Called by the object, accepts no arguments.
1850              
1851             =cut
1852              
1853             sub create_blank_file {
1854 4     4 1 4 my ($self) = @_;
1855 4         7 $self->{'actual_fields'} = [];
1856 4         6 $self->{'actual_units'} = [];
1857 4         5 $self->{'fields'} = [];
1858 4         5 $self->{'units'} = [];
1859 4         5 $self->{'headers'} = {};
1860 4         5 $self->{'comments'} = [];
1861 4         5 $self->{'data'} = [];
1862 4         5 $self->{'dataidx'} = -1;
1863 4         5 $self->{'max_dataidx'} = -1;
1864 4         5 $self->{'delim'} = undef;
1865 4         4 $self->{'missing'} = $DEFAULT_MISSING;
1866              
1867 4         5 $self->{'options'}{'cache'} = 1;
1868 4         3 $self->{'options'}{'fill_ancillary_data'} = 0;
1869              
1870 4 100       8 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
1871 4 100       7 if ($self->{'options'}{'add_empty_headers'}) {
1872 1         2 foreach (@ALL_HEADERS) {
1873 33 50       46 if ( !exists( $self->{'headers'}{"${slash}$_"} ) ) {
1874 33         37 $self->{'headers'}{"${slash}$_"} = '';
1875 33 100       40 if ( $_ eq 'missing' ) {
1876 1         3 $self->{'headers'}{"${slash}missing"} = $DEFAULT_MISSING;
1877             }
1878             } ## end if (!exists($self->{'headers'...}))
1879             } ## end foreach (@ALL_HEADERS)
1880             } ## end if ($add_missing_headers)
1881              
1882 4         5 my $success = 1;
1883 4 50       9 if ( $self->{'options'}{'default_headers'} ) {
1884 4         8 $success &= $self->add_headers( $self->{'options'}{'default_headers'} );
1885             }
1886 4 50       8 if ( $self->{'options'}{'headers'} ) {
1887 4         6 $success &= $self->add_headers( $self->{'options'}{'headers'} );
1888             }
1889 4 50       11 unless ($success) {
1890 0         0 croak("Error creating blank file.");
1891             }
1892             } ## end sub create_blank_file
1893              
1894             =head2 read_headers()
1895              
1896             For internal use only. C reads the metadata at the beginning of
1897             a SeaBASS file.
1898              
1899             Called by the object, accepts no arguments.
1900              
1901             =cut
1902              
1903             sub read_headers {
1904 127     127 1 136 my $self = shift;
1905              
1906 127 50       265 if ( $self->{'headers'} ) {
1907 0         0 return;
1908             }
1909              
1910 127 100       254 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
1911 127         122 my $success = 1;
1912 127         96 my @comments;
1913              
1914 127         183 $self->{'headers'} = {};
1915 127         192 $self->{'comments'} = [];
1916              
1917 127 50       258 if ( $self->{'options'}{'default_headers'} ) {
1918 127         285 $success &= $self->add_headers( $self->{'options'}{'default_headers'} );
1919             }
1920              
1921 127         142 my $handle = $self->{'handle'};
1922 127         127 my $position = my $line_number = 0;
1923 127         88 my @header_lines;
1924              
1925 127         135 my $strict = $self->{'options'}{'strict'};
1926              
1927 127         466 while ( my $line = <$handle> ) {
1928 3256         1796 $line_number++;
1929 3256         3026 strip($line);
1930 3256 50       3739 if ($line) {
1931 3256 100       6695 if ( $line =~ m'^(/end_header)\@?$'i ) {
    100          
    100          
1932 112         242 push( @header_lines, $1 );
1933 112         122 $position = tell($handle);
1934 112         149 last;
1935             } elsif ( $line =~ m"^/" ) {
1936 2433         2234 push( @header_lines, $line );
1937             } elsif ( $line =~ m"^!" ) {
1938 696         620 push( @comments, $line );
1939 696 100       1017 if ( $self->{'options'}{'preserve_header'} ) {
1940 10         10 push( @header_lines, $line );
1941             }
1942             } else { #TODO: search ahead for more headers or comments (in case of merely comment missing !) and fail if READ_STRICT
1943 15         19 seek( $handle, $position, SEEK_SET );
1944 15 50       21 if ( $strict & STRICT_READ ) {
1945 0         0 carp("File missing /end_header or comment missing !, assuming data start: line #$line_number ($line)");
1946             }
1947 15         17 last;
1948             }
1949             } ## end if ($line)
1950 3129         5953 $position = tell($handle);
1951             } ## end while (my $line = <$handle>)
1952              
1953             # add_headers looks at STRICT_WRITE, not STRICT_READ
1954 127 100       222 if ( $strict & STRICT_READ ) {
1955 5         15 $self->{'options'}{'strict'} |= STRICT_WRITE;
1956             } else {
1957 122         165 $self->{'options'}{'strict'} = 0;
1958             }
1959              
1960 127 100       252 if ( $self->{'options'}{'preserve_header'} ) {
1961 1         7 $self->{'preserved_header'} = [@header_lines];
1962             }
1963              
1964 127         232 $success &= $self->add_headers( \@header_lines );
1965              
1966             # restore strictness
1967 127         183 $self->{'options'}{'strict'} = $strict;
1968              
1969 127 50       251 if ( $self->{'options'}{'headers'} ) {
1970 127         223 $success &= $self->add_headers( $self->{'options'}{'headers'} );
1971             }
1972              
1973 127         139 my %headers = %{ $self->{'headers'} };
  127         1246  
1974              
1975 127 100       319 if ( $self->{'options'}{'preserve_comments'} ) {
1976 126         109 push( @{ $self->{'comments'} }, @comments );
  126         327  
1977             }
1978              
1979 127   66     352 my $missing = $headers{"${slash}missing"} || $DEFAULT_MISSING;
1980 127 100       244 if ( $self->{'options'}{'delete_missing_headers'} ) {
1981 1         5 while ( my ( $k, $v ) = each(%headers) ) {
1982 31 100       64 if ( $k =~ m"/?(?:end|begin)_header$|^/?missing$" ) {
1983 3         4 next;
1984             }
1985 28 100 66     140 if ( !defined($v) || $v =~ m"^n/?a(?:\[.*?\])?$"i || lc($v) eq lc($missing) ) {
      66        
1986 7         14 delete( $headers{$k} );
1987             }
1988             } ## end while (my ($k, $v) = each...)
1989             } ## end if ($self->{'options'}...)
1990              
1991 127 100       197 if ( $strict & STRICT_READ ) {
1992 5         11 foreach (@REQUIRED_HEADERS) {
1993 110 100       3137 if ( !exists( $headers{"${slash}$_"} ) ) {
1994 58         54 $success = 0;
1995 58         4940 carp("Missing required header: $_");
1996             }
1997             } ## end foreach (@REQUIRED_HEADERS)
1998 5         28 while ( my ( $header, $value ) = each(%headers) ) {
1999 72 50       79 if ($slash) {
2000 0         0 $header =~ s"^/"";
2001             }
2002 72 50 33 1220   155 if ( ( firstidx { $_ eq $header } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $header } @HIDDEN_HEADERS ) < 0 ) {
  1312         895  
  0         0  
2003 0         0 $success = 0;
2004 0         0 carp("$header not a standard header.");
2005             }
2006             } ## end while (my ($header, $value...))
2007 5 50 33     37 if ( $headers{"${slash}begin_header"} || $headers{"${slash}end_header"} ) {
2008 0         0 $success = 0;
2009 0         0 carp("begin_ or end_header incorrect");
2010             }
2011             } ## end if ($strict & STRICT_READ)
2012 127         211 foreach (@ABSOLUTELY_REQUIRED_HEADERS) {
2013 127 100       323 if ( !exists( $headers{"${slash}$_"} ) ) {
2014 1         1 $success = 0;
2015 1 50       2 if ( $strict & STRICT_READ ) {
2016 1         95 carp("Missing absolutely required header: $_");
2017             }
2018             }
2019             } ## end foreach (@ABSOLUTELY_REQUIRED_HEADERS)
2020              
2021 127   100     1384 $self->{'fields'} = [ split( /\s*,\s*/, $headers{"${slash}fields"} || '' ) ];
2022 127   100     1055 $self->{'actual_fields'} = [ split( /\s*,\s*/, $headers{"${slash}fields"} || '' ) ];
2023              
2024 127 100       311 if ( $headers{"${slash}units"} ) {
2025 79         543 $self->{'units'} = [ split( /\s*,\s*/, $headers{"${slash}units"} ) ];
2026 79         573 $self->{'actual_units'} = [ split( /\s*,\s*/, $headers{"${slash}units"} ) ];
2027             } else {
2028 48         87 my (@new_units1);
2029 48         41 foreach ( @{ $self->{'fields'} } ) {
  48         116  
2030 338         319 push( @new_units1, 'unitless' );
2031             }
2032 48         96 my @new_units2 = @new_units1;
2033 48         66 $self->{'units'} = \@new_units1;
2034 48         69 $self->{'actual_units'} = \@new_units2;
2035 48         163 $headers{"${slash}units"} = join( ',', @new_units1 );
2036             } ## end else [ if ($headers{"${slash}units"...})]
2037              
2038 127 100       146 if ( @{$self->{'fields'}} != @{$self->{'units'}} ) {
  127         171  
  127         265  
2039 1 50       3 if ( $strict & STRICT_READ ) {
2040 1         86 carp("/fields and /units don't match up");
2041 1         48 $success = 0;
2042             } else {
2043 0         0 while (@{$self->{'fields'}} > @{$self->{'units'}}){
  0         0  
  0         0  
2044 0         0 push(@{$self->{'units'}}, 'unitless');
  0         0  
2045             }
2046 0         0 while (@{$self->{'fields'}} < @{$self->{'units'}}){
  0         0  
  0         0  
2047 0         0 pop(@{$self->{'units'}});
  0         0  
2048             }
2049             }
2050             }
2051              
2052 127 100       229 unless ($success) {
2053 4 50       11 if ( $strict & STRICT_READ ) {
2054 4         344 croak("Error(s) reading SeaBASS file");
2055             } else {
2056 0         0 return;
2057             }
2058             }
2059              
2060 123         137 $self->{'missing'} = $missing;
2061 123   66     481 $self->{'below_detection_limit'} = $headers{"${slash}below_detection_limit"} || $missing;
2062 123   66     416 $self->{'above_detection_limit'} = $headers{"${slash}above_detection_limit"} || $missing;
2063 123         132 $self->{'line_number'} = $line_number;
2064 123         143 $self->{'data_start_line'} = $line_number;
2065 123         196 $self->{'data_start_position'} = $position;
2066              
2067 123 100       216 if ( $self->{'options'}{'cache'} ) {
2068 113         187 $self->{'data'} = [];
2069             }
2070 123         161 $self->{'dataidx'} = -1;
2071 123         126 $self->{'max_dataidx'} = -1;
2072              
2073 123         151 $self->{'headers'} = \%headers;
2074              
2075 123 100       469 if ( $self->{'options'}{'fill_ancillary_data'} ) {
2076 16         27 my @fields_lc = map {lc} @{ $self->{'fields'} };
  110         122  
  16         23  
2077 16         24 $self->{'fields_lc'} = \@fields_lc;
2078 16         21 $self->{'ancillary'} = {};
2079              
2080 16         20 foreach my $field (@FILL_ANCILLARY_DATA) {
2081 208         251 $self->find_ancillaries($field);
2082             }
2083              
2084 16         24 $self->{'case_conversion'} = {};
2085              
2086 16         15 while ( my ( $field, $value ) = each( %{ $self->{'ancillary'} } ) ) {
  159         344  
2087 143     801   242 my $idx = firstidx { $_ eq $field } @{ $self->{'fields_lc'} };
  801         484  
  143         216  
2088 143         168 my $new_field = $field;
2089 143 100       171 if ( $idx >= 0 ) {
2090 77         84 $new_field = $self->{'fields'}[$idx];
2091             }
2092              
2093 143 100       149 if ( ref($value) ) {
2094 29         55 for ( my $i = 1; $i < @$value; $i++ ) {
2095 52         44 my $new_arg = $value->[$i];
2096 52         136 for ( $value->[$i] =~ /\$(\{\w+\}|\w+)/g ) {
2097 37         82 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2098              
2099 37     196   79 my $idx = firstidx { $_ eq $variable } @{ $self->{'fields_lc'} };
  196         125  
  37         62  
2100 37         55 my $new_variable = $self->{'fields'}[$idx];
2101              
2102 37         277 $new_arg =~ s/\$$variable(\W|\b|$)/\$$new_variable$1/g;
2103 37         193 $new_arg =~ s/\$\{$variable\}/\$$new_variable/g;
2104             } ## end for ($value->[$i] =~ /\$(\{\w+\}|\w+)/g)
2105 52         107 $value->[$i] = $new_arg;
2106             } ## end for (my $i = 1; $i <= length...)
2107             } else {
2108 114         85 my $new_value = $value;
2109 114         323 for ( $value =~ /\$(\{\w+\}|\w+)/g ) {
2110 151         397 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2111              
2112 151     740   245 my $idx = firstidx { $_ eq $variable } @{ $self->{'fields_lc'} };
  740         437  
  151         226  
2113 151 100       298 if ( $idx >= 0 ) {
2114 145         159 my $new_variable = $self->{'fields'}[$idx];
2115              
2116 145         981 $new_value =~ s/\$$variable(\W|\b|$)/\$$new_variable$1/g;
2117 145         959 $new_value =~ s/\$\{$variable\}/\$$new_variable/g;
2118             } ## end if ($idx >= 0)
2119             } ## end for ($value =~ /\$(\{\w+\}|\w+)/g)
2120 114         146 $value = $new_value;
2121             } ## end else [ if (ref($value)) ]
2122 143 50       206 if ( $field ne $new_field ) {
2123 0         0 delete( $self->{'ancillary'}{$field} );
2124 0         0 $self->{'case_conversion'}{$field} = $new_field;
2125             }
2126 143         205 $self->{'ancillary'}{$new_field} = $value;
2127             } ## end while (my ($field, $value...))
2128              
2129 16         30 delete( $self->{'fields_lc'} );
2130             } ## end if ($self->{'options'}...)
2131              
2132 123         475 return 1;
2133             } ## end sub read_headers
2134              
2135             =head2 validate_header($header, $value, $strict)
2136              
2137             my ($k, $v, $string) = ('investigators','jason_lefler',0)
2138             $sb_file->validate_header($k, $v, $strict);
2139              
2140             For internal use only. C is in charge of properly formatting
2141             key/value pairs to add to the object. This function will modify the input
2142             variables in place to prepare them for use.
2143              
2144             Returns false if there was a problem with the inputs, such as C is set
2145             and an invalid header was passed in.
2146              
2147             C will set C to C<$DEFAULT_MISSING> (C<-999>) if it
2148             is blank or undefined.
2149              
2150             This function will also change the expected delimiter for rows that have not
2151             yet been cached.
2152              
2153             =cut
2154              
2155             sub validate_header {
2156 2559     2559 1 2295 my ( $self, $k, $v, $strict ) = @_;
2157              
2158 2559         1583 my $success = 1;
2159              
2160 2559 100       2549 if ( !defined($v) ) {
2161 199         189 $v = '';
2162             } else {
2163 2360         2175 strip($v);
2164             }
2165              
2166 2559         2345 strip($k);
2167              
2168 2559         2414 $k = lc($k);
2169              
2170 2559 50 66     4261 if ( length($v) == 0 && $k !~ /_header/ ) {
2171 0 0       0 if ($strict) {
2172 0         0 carp("$k missing value");
2173 0         0 $success = 0;
2174             } else {
2175 0         0 $v = "";
2176             }
2177             } ## end if (length($v) == 0 &&...)
2178              
2179 2559 100 100     6977 if ( !$self->{'options'}{'preserve_case'} || $k =~ /fields|units/ ) {
2180 1307         1184 $v = lc($v);
2181             }
2182              
2183 2559 100       2668 if ( $self->{'options'}{'keep_slashes'} ) {
2184 139 100       221 if ( $k =~ m"^[^/]" ) {
2185 2         5 $k = "/$k";
2186             }
2187              
2188 139 50 66 0   225 if ( $strict && ( firstidx { "/$_" eq $k } @ALL_HEADERS ) < 0 && ( firstidx { "/$_" eq $k } @HIDDEN_HEADERS ) < 0 ) {
  12   33     32  
  0         0  
2189 0         0 carp("Invalid header, $k");
2190 0         0 $success = 0;
2191             }
2192             } else {
2193 2420 100       4089 if ( $k =~ m"^/" ) {
2194 2418         3578 $k =~ s"^/"";
2195             }
2196              
2197 2420 50 66 0   3748 if ( $strict && ( firstidx { $_ eq $k } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $k } @HIDDEN_HEADERS ) < 0 ) {
  1328   33     899  
  0         0  
2198 0         0 carp("Invalid header, $k");
2199 0         0 $success = 0;
2200             }
2201             } ## end else [ if ($self->{'options'}...)]
2202              
2203 2559 100       7465 if ( $k =~ /_latitude|_longitude/){
    100          
    100          
    100          
    100          
    100          
2204 308         431 $v =~ s/\[deg\]$//i;
2205             } elsif ( $k =~ /_time/){
2206 158         287 $v =~ s/\[gmt\]$//i;
2207             } elsif ( $k =~ m"^/?delimiter$" ) {
2208 89 100       175 unless ( $self->set_delim( $strict, $v ) ) {
2209 1 50       4 if ($strict) {
2210 0         0 $success = 0;
2211             }
2212             }
2213             } elsif ( $k =~ m"^/?missing$" ) {
2214 110 50       275 $self->{'missing'} = ( length($v) ? $v : $DEFAULT_MISSING );
2215 110         389 $self->{'missing_is_number'} = looks_like_number( $self->{'missing'} );
2216             } elsif ( $k =~ m"^/?above_detection_limit" ) {
2217 3 50       7 $self->{'above_detection_limit'} = ( length($v) ? $v : $self->{'missing'} );
2218 3         13 $self->{'adl_is_number'} = looks_like_number( $self->{'above_detection_limit'} );
2219             } elsif ( $k =~ m"^/?below_detection_limit" ) {
2220 3 50       5 $self->{'below_detection_limit'} = ( length($v) ? $v : $self->{'missing'} );
2221 3         7 $self->{'bdl_is_number'} = looks_like_number( $self->{'below_detection_limit'} );
2222             }
2223              
2224 2559         1969 $_[1] = $k;
2225 2559         1688 $_[2] = $v;
2226              
2227 2559         2396 return $success;
2228             } ## end sub validate_header
2229              
2230             =head2 set_delim($strict, $delim)
2231              
2232             Takes a string declaring the delim (IE: 'comma', 'space', etc) and updates the
2233             object's internal delimiter regex.
2234              
2235             =cut
2236              
2237             sub set_delim {
2238 89     89 1 102 my $self = shift;
2239 89         74 my $strict = shift;
2240 89   50     163 my $delim = shift || '';
2241 89 50       275 if ( $delim eq 'comma' ) {
    50          
    100          
    50          
    100          
2242 0         0 $delim = qr/\s*,\s*/;
2243             } elsif ( $delim eq 'semicolon' ) {
2244 0         0 $delim = qr/\s*;\s*/;
2245             } elsif ( $delim eq 'space' ) {
2246 87         322 $delim = qr/\s+/;
2247             } elsif ( $delim eq 'tab' ) {
2248 0         0 $delim = qr/\t/;
2249             } elsif ($strict) {
2250 1         185 carp("delimiter not understood");
2251             } else {
2252 1 50       5 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
2253 1         3 $self->{'headers'}{"${slash}delimiter"} = 'comma';
2254 1         3 $delim = undef;
2255             }
2256 89         230 $self->{'delim'} = $delim;
2257 89 100       246 return ( $delim ? 1 : 0 );
2258             } ## end sub set_delim
2259              
2260             =head2 update_fields()
2261              
2262             C runs through the currently cached rows and calls
2263             C on each row. It then updates the /fields and /units
2264             headers in the header hash.
2265              
2266             =cut
2267              
2268             sub update_fields {
2269 28     28 1 26 my ($self) = @_;
2270 28 100 66     111 if ( $self->{'options'}{'cache'} && $self->{'max_dataidx'} >= 0 ) {
2271 4         6 foreach my $hash ( @{ $self->{'data'} } ) {
  4         7  
2272 9         14 $self->add_and_remove_fields($hash);
2273             }
2274             }
2275              
2276 28 100       70 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
2277 28         22 $self->{'headers'}{"${slash}fields"} = join( ',', @{ $self->{'actual_fields'} } );
  28         80  
2278 28         31 $self->{'headers'}{"${slash}units"} = join( ',', @{ $self->{'actual_units'} } );
  28         85  
2279             } ## end sub update_fields
2280              
2281             =head2 add_and_remove_fields(\%row)
2282              
2283             Given a reference to a row, this function deletes any fields removed with
2284             C and adds an undefined or /missing value for each field added
2285             via C. If C is set, an undefined value is
2286             given, otherwise, it is filled with the /missing value.
2287              
2288             If C is set, this function adds missing date, time,
2289             date_time, lat, lon, and depth fields to the retrieved row from the header.
2290              
2291             Needlessly returns the hash reference passed in.
2292              
2293             =cut
2294              
2295             sub add_and_remove_fields {
2296 274     274 1 246 my ( $self, $hash ) = @_;
2297 274         657 foreach my $field ( keys(%$hash) ) {
2298 1895 100   7730   2556 if ( ( firstidx { $_ eq $field } @{ $self->{'actual_fields'} } ) < 0 ) {
  7730         6993  
  1895         2485  
2299 21 50 33 0   49 unless ( $self->{'options'}{'fill_ancillary_data'} && ( firstidx { $_ eq $field } keys( %{ $self->{'ancillary'} } ) ) >= 0 ) {
  0         0  
  0         0  
2300 21         38 delete( $hash->{$field} );
2301             }
2302             }
2303             } ## end foreach my $field (keys(%$hash...))
2304              
2305 274 100       525 my $missing = ( $self->{'options'}{'missing_data_to_undef'} ? undef : $self->{'missing'} );
2306 274         623 while ( my ( $variable, $pad ) = each(%FIELD_FORMATTING) ) {
2307 1918   33     4157 my $case_var = $self->{'case_conversion'}{$variable} || $variable;
2308 1918 50 66     4248 if ( defined( $hash->{$case_var} ) && ( !defined($missing) || $hash->{$case_var} != $missing ) ) {
      66        
2309 57         160 $hash->{$case_var} = sprintf( $pad, $hash->{$case_var} );
2310             }
2311             } ## end while (my ($variable, $pad...))
2312              
2313 274 100       415 if ( defined($self->{'ancillary'}) ) {
2314 19         27 $self->{'ancillary_tmp'} = {};
2315 19         35 for my $variable (@FILL_ANCILLARY_DATA) {
2316 247 100       318 if ( defined($self->{'ancillary'}{$variable}) ) {
2317 173         224 my $value = $self->extrapolate_variables( $missing, $self->{'ancillary'}{$variable}, $hash );
2318 173 50       240 if ( defined($value) ) {
2319 173         277 $hash->{$variable} = $value;
2320             }
2321             } ## end if ($self->{'ancillary'...})
2322             } ## end for my $variable (@FILL_ANCILLARY_DATA)
2323             } ## end if ($self->{'ancillary'...})
2324              
2325 274         188 foreach my $field ( @{ $self->{'actual_fields'} } ) {
  274         379  
2326 1951 100       2451 if ( !exists( $hash->{$field} ) ) {
2327 61         93 $hash->{$field} = $missing;
2328             }
2329             }
2330              
2331 274         294 return $hash;
2332             } ## end sub add_and_remove_fields
2333              
2334             =head2 guess_delim($line)
2335              
2336             C is is used to guess the delimiter of a line. It is not very
2337             intelligent. If it sees any commas, it will assume the delimiter is a comma.
2338             Then, it checks for tabs, spaces, then semi-colons. Returns 1 on success. If
2339             it doesn't find any, it will throw a warning and return undef.
2340              
2341             =cut
2342              
2343             sub guess_delim {
2344 35     35 1 39 my ( $self, $line ) = @_;
2345 35         36 my $delim_string = '';
2346 35 100       180 if ( $line =~ /,/ ) {
    50          
    50          
    0          
2347 2         7 my $delim = qr/\s*,\s*/;
2348 2         3 $self->{'delim'} = $delim;
2349 2         3 $delim_string = 'comma';
2350             } elsif ( $line =~ /\t/ ) {
2351 0         0 my $delim = qr/\t/;
2352 0         0 $self->{'delim'} = $delim;
2353 0         0 $delim_string = 'tab';
2354             } elsif ( $line =~ /\s+/ ) {
2355 33         116 my $delim = qr/\s+/;
2356 33         47 $self->{'delim'} = $delim;
2357 33         51 $delim_string = 'space';
2358             } elsif ( $line =~ /;/ ) {
2359 0         0 my $delim = qr/\s*;\s*/;
2360 0         0 $self->{'delim'} = $delim;
2361 0         0 $delim_string = 'semicolon';
2362             } else {
2363 0         0 carp("No delimiter defined or can be guessed");
2364 0         0 return;
2365             }
2366 35 100       128 $self->{'headers'}{ ( $self->{'options'}{'keep_slashes'} ? '/' : '' ) . 'delimiter' } = $delim_string;
2367 35         68 return 1;
2368             } ## end sub guess_delim
2369              
2370             =head2 ingest_row(\%data_row | \@data_row | $data_row | %data_row)
2371              
2372             For mostly internal use, parses arguments for C, C, and C
2373             and returns a hash or hash reference of the data row. Given a hash reference,
2374             it will merely return it.
2375              
2376             Given an array or array reference, it will assume each element is a field as
2377             listed in either C or C. If the number of elements
2378             matches C, it uses assumes it's that. If it doesn't match, it is
2379             tried to match against C. If it doesn't match either, a warning is
2380             issued and the return is undefined.
2381              
2382             Given a non-reference scalar, it will split the scalar based on the current
2383             delimiter. If one is not defined, it is guessed. If it cannot be guessed, the
2384             return is undefined.
2385              
2386             If the inputs are successfully parsed, all keys are turned lowercase.
2387              
2388             =cut
2389              
2390             sub ingest_row {
2391 27     27 1 25 my $self = shift;
2392 27         21 my %new_row;
2393 27 100       52 if ( $#_ < 0 ) {
2394 1         163 carp("Incorrect number of arguments to ingest_row()");
2395 1         108 return;
2396             }
2397 26         47 my $arrayref;
2398 26 100       82 if ( ref( $_[0] ) eq 'HASH' ) {
    100          
    50          
2399 14         15 %new_row = %{ shift(@_) };
  14         44  
2400             } elsif ( ref( $_[0] ) eq 'ARRAY' ) {
2401 3         4 $arrayref = $_[0];
2402             } elsif ( !ref( $_[0] ) ) {
2403 9 100       17 if ( $#_ == 0 ) {
    100          
2404 7 50 66     23 if ( !$self->{'delim'} && !$self->guess_delim( $_[0] ) ) {
2405 0         0 return;
2406             }
2407 7         50 $arrayref = [ split( $self->{'delim'}, $_[0] ) ];
2408             } elsif ( $#_ % 2 == 1 ) {
2409 1         3 %new_row = @_;
2410             } else {
2411 1         107 carp('Even sized list, scalar, or hash/array reference expected');
2412 1         93 return;
2413             }
2414             } else {
2415 0         0 carp("Arguments to ingest_row() not understood.");
2416 0         0 return;
2417             }
2418              
2419 25 100       51 if ($arrayref) {
2420 10         6 my $iterator;
2421 10 100       11 if ( scalar( @{ $self->{'actual_fields'} } ) == scalar( @{$arrayref} ) ) {
  10 50       12  
  10         14  
2422 8         39 $iterator = each_arrayref( $self->{'actual_fields'}, $arrayref );
2423 2         2 } elsif ( scalar( @{ $self->{'fields'} } ) == scalar( @{$arrayref} ) ) {
  2         4  
2424 2         9 $iterator = each_arrayref( $self->{'fields'}, $arrayref );
2425 2         4 $self->add_and_remove_fields( \%new_row );
2426             } else {
2427 0         0 my $actual_fields = scalar( @{ $self->{'actual_fields'} } );
  0         0  
2428 0         0 my $fields = scalar( @{ $self->{'fields'} } );
  0         0  
2429 0 0       0 if ( $actual_fields == $fields ) {
2430 0         0 carp("Invalid number of elements, expected $fields");
2431             } else {
2432 0         0 carp("Invalid number of elements, expected $actual_fields or $fields");
2433             }
2434 0         0 return;
2435             } ## end else [ if (scalar(@{$self->{'actual_fields'...}}))]
2436 10         41 while ( my ( $k, $v ) = $iterator->() ) {
2437 52         142 $new_row{$k} = $v;
2438             }
2439             } ## end if ($arrayref)
2440              
2441 25         45 %new_row = map { lc($_) => $new_row{$_} } keys %new_row;
  71         152  
2442              
2443 25 50       52 if (wantarray) {
2444 0         0 return %new_row;
2445             } else {
2446 25         47 return \%new_row;
2447             }
2448             } ## end sub ingest_row
2449              
2450             =head2 find_ancillaries($field_name)
2451              
2452             Used by C to traverse through a field's possible
2453             substitutes in C<%ANCILLARY> and try to find the most suitable replacement.
2454             Values of fields in C<%ANCILLARY> are array references, where each element is
2455             either:
2456              
2457             =over 4
2458              
2459             =item * a string of existing field names used to create the value
2460              
2461             =item * an array reference of the form [converter function, parsing regex
2462             (optional), arguments to converter, ... ]
2463              
2464             =item * a hash reference of the form { header => qr/parsing_regex/ }
2465              
2466             =back
2467              
2468             If the element is an array reference and an argument requires a field from the
2469             file, all arguments are parsed and the variables within them extrapolated, then
2470             the array is put into C<< $self->{'ancillary'} >>.
2471              
2472             If no value can be ascertained, it will not be added to the data rows.
2473              
2474             The value found is stored in C<< $self->{'ancillary'} >>. Returns 1 on
2475             success, 0 if the field cannot be filled in.
2476              
2477             =cut
2478              
2479             sub find_ancillaries {
2480 377     377 1 303 my ( $self, $field ) = @_;
2481 377 100       552 if ( $self->{'ancillary'}{$field} ) {
2482 95         104 return 1;
2483             }
2484 282     1679   475 my $idx = firstidx { $_ eq $field } @{ $self->{'fields_lc'} };
  1679         1032  
  282         403  
2485 282 100       524 if ( $idx >= 0 ) {
2486 77         159 $self->{'ancillary'}{$field} = "\$\{$field\}";
2487 77         87 return 1;
2488             }
2489              
2490 205 50       243 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
2491 205 100       123 foreach my $attempt ( @{ $ANCILLARY{$field} || [] } ) {
  205         364  
2492 349 100       529 if ( ref($attempt) eq 'HASH' ) {
    100          
    50          
2493 242         145 keys( %{$attempt} ); #reset each() iterator between calls
  242         202  
2494 242         170 while ( my ( $where, $regex ) = each( %{$attempt} ) ) {
  457         867  
2495 242 100 66     772 if ( $where =~ /^\$/ ) {
    100 66        
2496 101 100   564   411 if ( ( firstidx { "\$$_" eq $where } $self->fields() ) >= 0 ) {
  564         612  
2497 15     33   57 $self->{'ancillary'}{$field} = [ sub { return shift; }, $regex, $where ];
  33         35  
2498 15         44 return 1;
2499             }
2500             } elsif ( defined( $self->{'headers'}{"$slash$where"} ) && $self->{'headers'}{"$slash$where"} =~ $regex && lc($1) ne 'na' ) {
2501 12         35 $self->{'ancillary'}{$field} = $1;
2502 12         22 return 1;
2503             }
2504             } ## end while (my ($where, $regex...))
2505             } elsif ( ref($attempt) eq 'ARRAY' ) {
2506 64         87 my @attempt = @$attempt;
2507 64         52 my $function = shift(@attempt);
2508 64         41 my $regex;
2509 64 100       120 if ( ref( $attempt[0] ) eq 'Regexp' ) {
2510 30         21 $regex = shift(@attempt);
2511             }
2512 64         41 my $success = 1;
2513 64         47 my @args;
2514 64         54 foreach my $argument (@attempt) {
2515 64         42 my $tmparg = $argument;
2516 64         231 for ( $argument =~ /\$(\{\w+\}|\w+)/g ) {
2517 86         168 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2518 86         113 $success &= $self->find_ancillaries($variable);
2519 86 100       92 if ($success) {
2520 36 50       43 if ( ref( $self->{'ancillary'}{$variable} ) ) {
2521 0         0 $tmparg =~ s/\$$variable(\W|\b|$)/\$\{$variable\}$1/g;
2522             } else {
2523 36         30 my $value = $self->{'ancillary'}{$variable};
2524 36         402 $tmparg =~ s/\$$variable(\W|\b|$)/$value$1/g;
2525 36         212 $tmparg =~ s/\$\{$variable\}/$value/g;
2526             }
2527             } else {
2528 50         44 last;
2529             }
2530             } ## end for ($argument =~ /\$(\{\w+\}|\w+)/g)
2531 64         101 push( @args, $tmparg );
2532             } ## end foreach my $argument (@attempt)
2533 64 100       103 if ($success) {
2534 14 100       24 if ($regex) {
2535 8         8 unshift( @args, $regex );
2536             }
2537 14         25 $self->{'ancillary'}{$field} = [ $function, @args ];
2538 14         28 return 1;
2539             } ## end if ($success)
2540             } elsif ( !ref($attempt) ) {
2541 43         32 my $success = 1;
2542 43         37 my $tmparg = $attempt;
2543 43         177 for ( $attempt =~ /\$(\{\w+\}|\w+)/g ) {
2544 83         151 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2545 83         109 $success &= $self->find_ancillaries($variable);
2546 83 100       94 if ($success) {
2547 65 100       86 if ( ref( $self->{'ancillary'}{$variable} ) ) {
2548 6         43 $tmparg =~ s/\$$variable(\W|\b|$)/\$\{$variable\}$1/g;
2549             } else {
2550 59         51 my $value = $self->{'ancillary'}{$variable};
2551 59         688 $tmparg =~ s/\$$variable(\W|\b|$)/$value$1/g;
2552 59         427 $tmparg =~ s/\$\{$variable\}/$value/g;
2553             }
2554             } else {
2555 18         18 last;
2556             }
2557             } ## end for ($attempt =~ /\$(\{\w+\}|\w+)/g)
2558 43 100       66 if ($success) {
2559 25         40 $self->{'ancillary'}{$field} = $tmparg;
2560 25         41 return 1;
2561             }
2562             } ## end elsif (!ref($attempt))
2563             } ## end foreach my $attempt (@{$ANCILLARY...})
2564              
2565 139         153 return 0;
2566             } ## end sub find_ancillaries
2567              
2568             =head2 extrapolate_variables($missing, $expression, \%row)
2569              
2570             Used by C to convert a parsed ancillary string, such as
2571             C<'$year$month$day'>, into a real value using the fields from the C<\%row>.
2572             C<$expression>s are strings figured out by C and stored in
2573             C<< $self->{'ancillary'} >>.
2574              
2575             The return is undefined if a value cannot be created (IE: a required field is
2576             missing).
2577              
2578             =cut
2579              
2580             sub extrapolate_variables {
2581 220     220 1 189 my ( $self, $missing, $expression, $row ) = @_;
2582              
2583 220 100       237 if ( ref($expression) ) {
2584 47         57 return $self->extrapolate_function( $missing, $expression, $row );
2585             } else {
2586 173         116 my $tmpexpr = $expression;
2587 173         472 for ( $expression =~ /\$(\{\w+\}|\w+)/g ) {
2588 221         399 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2589 221         104 my $value;
2590 221 100 66     531 if ( $self->{'ancillary_tmp'}{$variable} ) {
    50 33        
    0          
2591 123         110 $value = $self->{'ancillary_tmp'}{$variable};
2592             } elsif ( defined( $row->{$variable} ) && ( !defined($missing) || $row->{$variable} != $missing ) ) {
2593 98         85 $value = $row->{$variable};
2594 98         127 $self->{'ancillary_tmp'}{$variable} = $value;
2595             } elsif ( ref( $self->{'ancillary'}{$variable} ) ) {
2596 0         0 $value = $self->extrapolate_function( $missing, $self->{'ancillary'}{$variable}, $row );
2597 0 0       0 if ( !defined($value) ) {
2598 0         0 return;
2599             }
2600             } else {
2601 0         0 return;
2602             }
2603              
2604 221         1908 $tmpexpr =~ s/\$$variable(\W|\b|$)/$value$1/g;
2605 221         1132 $tmpexpr =~ s/\$\{$variable\}/$value/g;
2606             } ## end for ($expression =~ /\$(\{\w+\}|\w+)/g)
2607 173         271 return $tmpexpr;
2608             } ## end else [ if (ref($expression)) ]
2609             } ## end sub extrapolate_variables
2610              
2611             =head2 extrapolate_function($missing, $expression, \%row)
2612              
2613             If the value stored in C<< $self->{'ancillary'} >> is an array reference, this
2614             function uses the array to create an actual value. See
2615             L for an explanation of the
2616             array.
2617              
2618             =cut
2619              
2620             sub extrapolate_function {
2621 47     47 1 39 my ( $self, $missing, $expression, $row ) = @_;
2622 47         27 my $value;
2623 47         58 my ( $function, @args ) = @$expression;
2624 47         31 my $regex;
2625              
2626 47 100       73 if ( ref( $args[0] ) eq 'Regexp' ) {
2627 41         37 $regex = shift(@args);
2628             }
2629 47         53 for (@args) {
2630 47         63 $_ = $self->extrapolate_variables( $missing, $_, $row );
2631 47 50       79 if ( !defined($_) ) {
2632 0         0 return;
2633             }
2634             } ## end for (@args)
2635 47         67 $value = &$function(@args);
2636 47 100       69 if ($regex) {
2637 41 50       144 if ( $value =~ $regex ) {
2638 41         63 $value = $1;
2639             }
2640             }
2641              
2642 47         99 return $value;
2643             } ## end sub extrapolate_function
2644              
2645             =head1 STATIC METHODS
2646              
2647             =head2 strip(@list)
2648              
2649             my @space_filled_lines = (' line1 ', ' line2', 'line3 ', 'line4');
2650             strip(@space_filled_lines);
2651             print @space_filled_lines; #line1line2line3line4
2652              
2653             Runs through the list and removes leading and trailing whitespace. All changes
2654             are made in place.
2655              
2656             It is literally this:
2657              
2658             sub strip {
2659             s/^\s+|\s+$//g for @_;
2660             }
2661              
2662             =cut
2663              
2664             # Not an object method!
2665             sub strip {
2666 8438     8438 1 30754 s/^\s+|\s+$//g for @_;
2667             }
2668              
2669             =head2 julian_to_greg($yyyyjjj)
2670              
2671             Converts a date in the day of year format YYYYJJJ into YYYYMMDD. Returns the
2672             newly formatted string or undefined if the input does not match the required
2673             format.
2674              
2675             This uses the C function from C to do the heavy
2676             lifting.
2677              
2678             =cut
2679              
2680             # Not an object method!
2681             sub julian_to_greg {
2682 14     14 1 13 my ($yyyyjjj) = @_;
2683 14 50       39 if ( $yyyyjjj =~ /^(\d{4})(\d{3})$/ ) {
2684 14         72 my ( $y, $m, $d ) = Add_Delta_Days( $1, 1, 1, $2 - 1 );
2685 14         42 return sprintf( '%04d%02d%02d', $y, $m, $d );
2686             }
2687 0           return;
2688             } ## end sub julian_to_greg
2689              
2690             =head1 CAVEATS/ODDITIES
2691              
2692             =head2 Duplicate Fields
2693              
2694             This class will not allow a field to be added to the object if a field of the
2695             same name already exists. If a file being read has duplicate field names, only
2696             the B one is used. No warning is issued. If C is used to
2697             remove it, only the first instance will be deleted. To delete all instances,
2698             use C<< $sb_file->remove_field($sb_file->find_fields('chl')) >>. This may
2699             change in future releases.
2700              
2701             =head2 Changing Delimiter or Missing Value
2702              
2703             Modifying the delimiter header on a file that is being read will cause any
2704             non-cached rows to be split by the new delimiter, which should break most/all
2705             files. If the delimiter must be changed, call C to cache all the rows,
2706             then change it. This will obviously not work if caching is turned off. The
2707             same is true for setting the missing value, but only really applies when the
2708             C option is used (same goes to below detection limit).
2709              
2710             =head2 Below Detection Limit
2711              
2712             Below detection limit is only partially supported. If C is
2713             used, fields equal to C will be set to C, as
2714             well. Files modified while using C will have all data
2715             equal to C written out set to the missing value instead
2716             of the below detection limit value. If the below detection limit value is equal
2717             to the missing value or C is used, the
2718             C header will not be written.
2719              
2720             =head1 AUTHOR
2721              
2722             Jason Lefler, C<< >>
2723              
2724             =head1 BUGS
2725              
2726             Please report any bugs or feature requests to C
2727             rt.cpan.org>, or through the web interface at
2728             L. I will be
2729             notified, and then you'll automatically be notified of progress on your bug as
2730             I make changes.
2731              
2732             =head1 SUPPORT
2733              
2734             You can find documentation for this module with the perldoc command.
2735              
2736             perldoc SeaBASS::File
2737              
2738             You can also look for information at:
2739              
2740             =over 4
2741              
2742             =item * RT: CPAN's request tracker (report bugs here)
2743              
2744             L
2745              
2746             =item * AnnoCPAN: Annotated CPAN documentation
2747              
2748             L
2749              
2750             =item * CPAN Ratings
2751              
2752             L
2753              
2754             =item * Search CPAN
2755              
2756             L
2757              
2758             =back
2759              
2760             =head1 LICENSE AND COPYRIGHT
2761              
2762             Copyright 2014 Jason Lefler.
2763              
2764             This program is free software; you can redistribute it and/or modify it under
2765             the terms of either: the GNU General Public License as published by the Free
2766             Software Foundation; or the Artistic License.
2767              
2768             See http://dev.perl.org/licenses/ for more information.
2769              
2770             =cut
2771              
2772             1; # End of SeaBASS::File