File Coverage

blib/lib/SeaBASS/File.pm
Criterion Covered Total %
statement 903 1034 87.3
branch 467 586 79.6
condition 119 180 66.1
subroutine 71 75 94.6
pod 48 48 100.0
total 1608 1923 83.6


line stmt bran cond sub pod time code
1             package SeaBASS::File;
2              
3 24     24   1313482 use strict;
  24         36  
  24         604  
4 24     24   84 use warnings;
  24         29  
  24         1370  
5              
6             =head1 NAME
7              
8             SeaBASS::File - Object-oriented interface for reading/writing SeaBASS files
9              
10             =head1 VERSION
11              
12             version 0.170480
13              
14             =cut
15              
16             our $VERSION = '0.170480'; # VERSION
17              
18             =head1 SYNOPSIS
19              
20             To read SeaBASS files:
21              
22             use SeaBASS::File qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END);
23              
24             my $sb_file = SeaBASS::File->new("input.txt");
25              
26             # Calculate the average chlorophyll value using next
27             my $chl_total = 0;
28             my $measurements = 0;
29            
30             while (my $row = $sb_file->next()){
31             if (defined($row->{'chl'})){
32             $chl_total += $row->{'chl'};
33             $measurements++;
34             }
35             }
36             if ($measurements){
37             print $chl_total/$measurements;
38             } else {
39             print "No chl values.";
40             }
41            
42             #alternatively:
43             $sb_file->rewind();
44             while (my %row = $sb_file->next()){
45             if (defined($row{'chl'})){
46             $chl_total += $row{'chl'};
47             $measurements++;
48             }
49             }
50              
51             # Calculate the average chlorophyll value using where
52             my $chl_total2 = 0;
53             my $measurements2 = 0;
54             $sb_file->where(sub {
55             if (defined($_->{'chl'})){
56             $chl_total2 += $_->{'chl'};
57             $measurements2++;
58             }
59             });
60             if ($measurements2){
61             print $chl_total2/$measurements2;
62             } else {
63             print "No chl values.";
64             }
65            
66             Or to modify SeaBASS files:
67              
68             use SeaBASS::File qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END);
69              
70             my $sb_file = SeaBASS::File->new("input.txt");
71              
72             # Add a one degree bias to water temperature
73             while (my $row = $sb_file->next()){
74             $row->{'wt'} += 1;
75             $sb_file->update($row);
76             }
77            
78             $sb_file->write(); # to STDOUT
79              
80             # Remove the one degree bias to water temperature
81             $sb_file->where(sub {
82             $_->{'wt'} -= 1;
83             });
84            
85             $sb_file->write("output_file.txt");
86            
87             Or to start a SeaBASS file from scratch:
88              
89             use SeaBASS::File qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END);
90              
91             my $sb_file = SeaBASS::File->new({strict => 0, add_empty_headers => 1});
92             $sb_file->add_field('lat','degrees');
93             $sb_file->add_field('lon','degrees');
94             $sb_file->append({'lat' => 1, 'lon' => 2});
95             $sb_file->append("3,4"); # or if you're reading from a CSV file
96             $sb_file->write();
97              
98             =head1 DESCRIPTION
99              
100             C provides an easy to use, object-oriented interface for
101             reading, writing, and modifying SeaBASS data files.
102              
103             =head2 What is SeaBASS?
104              
105             L Bio-optical Archive and Storage
106             System housed at Goddard Space Flight Center.
107             L provides the permanent public
108             repository for data collected under the auspices of the NASA Ocean Biology and
109             Biogeochemistry Program. It also houses data collected by participants in the
110             NASA Sensor Intercomparision and Merger for Biological and Oceanic
111             Interdisciplinary Studies (SIMBIOS) Program. SeaBASS includes marine
112             bio-optical, biogeochemical, and (some) atmospheric data.
113              
114             =head2 SeaBASS File Format
115              
116             SeaBASS files are plain ASCII files with a special header and a matrix of
117             values.
118              
119             =head3 Header
120              
121             The SeaBASS header block consists of many lines of header-keyword pairs. Some
122             headers are optional but most, although technically not required for reading,
123             are required to be ingested into the system. More detailed information is
124             available in the SeaBASS L
125             article|http://seabass.gsfc.nasa.gov/wiki/article.cgi?article=metadataheaders>.
126             The only absolutely required header for this module to work is the /fields
127             line. This module turns fields and units lowercase at all times.
128              
129             /begin_header
130             /delimiter=space
131             /missing=-999
132             /fields=date,time,lat,lon,depth,wt,sal
133             /end_header
134            
135             =head3 Body
136              
137             The SeaBASS body is a matrix of data values, organized much like a spreadsheet.
138             Each column is separated by the value presented in the /delimiter header.
139             Likewise, missing values are indicated by the value presented in the /missing
140             header. The /fields header identifies the geophysical parameter presented in
141             each column.
142              
143             /begin_header
144             /delimiter=space
145             /missing=-999
146             /fields=date,time,lat,lon,depth,wt,sal
147             /end_header
148             19920109 16:30:00 31.389 -64.702 3.4 20.7320 -999
149             19920109 16:30:00 31.389 -64.702 19.1 20.7350 -999
150             19920109 16:30:00 31.389 -64.702 38.3 20.7400 -999
151             19920109 16:30:00 31.389 -64.702 59.6 20.7450 -999
152              
153             =head3 Strictly Speaking
154              
155             SeaBASS files are run through a program called
156             L before
157             they are submitted and before they are ingested into a NASA relational database
158             management system. Some of the things it checks for are required
159             L
160             and proper L
161             names|http://seabass.gsfc.nasa.gov/wiki/article.cgi?article=stdfields>. All
162             data must always have an associated depth, time, and location, though these
163             fields may be placed in the header and are not always required in the data.
164             Just because this module writes the files does not mean they will pass FCHECK.
165              
166             Files are case-INsensitive. Headers are not allowed to have any whitespace.
167              
168             =cut
169              
170 24     24   146 use Carp qw(:DEFAULT);
  24         26  
  24         3176  
171 24     24   107 use Fcntl qw(SEEK_SET);
  24         21  
  24         924  
172 24     24   6338 use List::MoreUtils qw(firstidx each_arrayref);
  24         91021  
  24         120  
173 24     24   19510 use Date::Calc qw(Add_Delta_Days);
  24         115228  
  24         1640  
174 24     24   134 use Scalar::Util qw(looks_like_number);
  24         31  
  24         219600  
175              
176             require Exporter;
177             our @ISA = qw(Exporter);
178             our @EXPORT_OK = qw(STRICT_READ STRICT_WRITE STRICT_ALL INSERT_BEGINNING INSERT_END);
179             our @EXPORT = qw();
180              
181             our $AUTOLOAD;
182              
183             =head1 EXPORT
184              
185             This module does not export anything by default.
186              
187             =head2 STRICT_READ
188              
189             C is used with the C option, enabling error messages when
190             reading header lines and inserting header data.
191              
192             =head2 STRICT_WRITE
193              
194             C is used with the C option, enabling error messages when
195             writing the data to a file/stream.
196              
197             =head2 STRICT_ALL
198              
199             C is used with the C option, enabling C and
200             C.
201              
202             =head2 INSERT_BEGINNING
203              
204             C is used with L
205             \@data_row | $data_row | %data_row)"> or L
206             $unit [, $position]])"> to insert a data row or field at the beginning of their
207             respective lists.
208              
209             =head2 INSERT_END
210              
211             C is used with L
212             $data_row | %data_row)"> or L
213             $position]])"> to insert a data row or field at the end of their respective
214             lists.
215              
216             =cut
217              
218 282     282 1 6755 sub STRICT_READ {1}
219 444     444 1 660 sub STRICT_WRITE {2}
220 1     1 1 1728 sub STRICT_ALL {3}
221 22     22 1 58 sub INSERT_BEGINNING {0}
222 104     104 1 209 sub INSERT_END {-1}
223              
224             my %DEFAULT_OPTIONS = (
225             default_headers => {},
226             headers => {},
227             preserve_case => 1,
228             keep_slashes => 0,
229             cache => 1,
230             delete_missing_headers => 0,
231             missing_data_to_undef => 1,
232             preserve_comments => 1,
233             add_empty_headers => 0,
234             strict => STRICT_WRITE,
235             fill_ancillary_data => 0,
236             preserve_header => 0,
237             preserve_detection_limits => 0,
238             );
239              
240             #return values for ref()
241             my %OPTION_TYPES = (
242             default_headers => [ 'ARRAY', 'HASH' ],
243             headers => [ 'ARRAY', 'HASH' ],
244             preserve_case => [''],
245             keep_slashes => [''],
246             cache => [''],
247             delete_missing_headers => [''],
248             missing_data_to_undef => [''],
249             preserve_comments => [''],
250             add_empty_headers => [''],
251             strict => [''],
252             fill_ancillary_data => [''],
253             preserve_header => [''],
254             preserve_detection_limits => [''],
255             );
256              
257             # All headers required by STRICT_READ and STRICT_WRITE
258             my @REQUIRED_HEADERS = qw(
259             begin_header
260             investigators
261             affiliations
262             contact
263             experiment
264             cruise
265             data_file_name
266             documents
267             calibration_files
268             data_type
269             start_date
270             end_date
271             start_time
272             end_time
273             north_latitude
274             south_latitude
275             east_longitude
276             west_longitude
277             missing
278             delimiter
279             units
280             end_header
281             );
282              
283             # Headers that must be specified, regardless of strictness.
284             my @ABSOLUTELY_REQUIRED_HEADERS = qw(
285             fields
286             );
287              
288             # Valid headers used for STRICT_READ and STRICT_WRITE
289             our @ALL_HEADERS = qw(
290             begin_header
291             investigators
292             affiliations
293             contact
294             experiment
295             cruise
296             station
297             data_file_name
298             documents
299             calibration_files
300             data_type
301             data_status
302             start_date
303             end_date
304             start_time
305             end_time
306             north_latitude
307             south_latitude
308             east_longitude
309             west_longitude
310             cloud_percent
311             measurement_depth
312             secchi_depth
313             water_depth
314             wave_height
315             wind_speed
316             missing
317             below_detection_limit
318             above_detection_limit
319             delimiter
320             fields
321             units
322             end_header
323             );
324              
325             # headers that are allowed but are not to be added during add_empty_headers
326             my @HIDDEN_HEADERS = qw(
327             received
328             );
329              
330             # what to set the missing value to if it's not defined
331             our $DEFAULT_MISSING = -999;
332             our $DEFAULT_BDL = -888;
333             our $DEFAULT_ADL = -777;
334              
335             # overly complex data structure only understandable by idiots
336             # your IQ is the percentage chance that it won't make sense
337             # IE: my IQ = 40, I have a 60% chance of it making sense to me
338             my %ANCILLARY = (
339             'lat' => [ { 'north_latitude' => qr/^(.*?)$/ }, { 'south_latitude' => qr/^(.*?)$/ } ],
340             'lon' => [ { 'east_longitude' => qr/^(.*?)$/ }, { 'west_longitude' => qr/^(.*?)$/ } ],
341             'depth' => [ { 'measurement_depth' => qr/^(.*?)$/ }, ],
342             'date_time' => [ '$date $time', ],
343             'date' => [ [ \&julian_to_greg, '$year$julian' ], [ \&julian_to_greg, '$year$jd' ], '$year$month$day', ],
344             'year' => [ { '$date' => qr/^(\d{4})/ }, { 'start_date' => qr/^(\d{4})/ }, ],
345             'month' => [ { '$date' => qr/^\d{4}(\d{2})/ }, [ \&julian_to_greg, qr/^\d{4}(\d{2})\d{2}$/, '$year$julian' ], [ \&julian_to_greg, qr/^\d{4}(\d{2})\d{2}$/, '$year$jd' ], { 'start_date' => qr/^\d{4}(\d{2})/ }, ],
346             'day' => [ { '$date' => qr/^\d{6}(\d{2})/ }, [ \&julian_to_greg, qr/^\d{4}\d{2}(\d{2})$/, '$year$julian' ], [ \&julian_to_greg, qr/^\d{4}\d{2}(\d{2})$/, '$year$jd' ], { 'start_date' => qr/^\d{6}(\d{2})/ }, ],
347             'time' => [ '$hour:$minute:$second', ],
348             'hour' => [ { '$time' => qr/^(\d+):/ }, { 'start_time' => qr/^(\d+):/ }, ],
349             'minute' => [ { '$time' => qr/:(\d+):/ }, { 'start_time' => qr/:(\d+):/ }, ],
350             'second' => [ { '$time' => qr/:(\d+)(?:[^:\d]|$)/ }, { 'start_time' => qr/:(\d+)(?:[^:\d]|$)/ }, ],
351             'station' => [ { 'station' => qr/^(.*?)$/ }, ],
352             );
353              
354             # what fill_ancillary_data adds to each row
355             #my @FILL_ANCILLARY_DATA = qw(date time date_time lat lon depth);
356             my @FILL_ANCILLARY_DATA = keys(%ANCILLARY);
357              
358             my %FIELD_FORMATTING = (
359             'year' => '%04d',
360             'month' => '%02d',
361             'day' => '%02d',
362             'julian' => '%03d',
363             'hour' => '%02d',
364             'minute' => '%02d',
365             'second' => '%02d',
366             );
367              
368             =head1 CONSTRUCTOR
369              
370             =head2 new([$filename,] [\%options])
371              
372             my $sb_file = SeaBASS::File->new("input_file.txt");
373             my $sb_file = SeaBASS::File->new("input_file.txt", { delete_missing_headers => 1 });
374             my $sb_file = SeaBASS::File->new("output_file.txt", { add_empty_headers => 1 });
375             my $sb_file = SeaBASS::File->new({ add_empty_headers => 1 });
376              
377             Creates a C object. If the file specified exists, the object
378             can be used to read the file. If the file specified does not exist, an empty
379             object is created and will be written to the specified file by default when
380             invoking C.
381              
382             Options should be given in a hash reference to ensure proper argument parsing.
383             If a file is specified, options can be given as a hash list.
384              
385             =over 4
386              
387             =item * default_headers
388              
389             =item * headers
390              
391             These two options accept either an array reference or a hash reference. They
392             are used to set or override header information. First, headers are read from
393             C, then from the data file itself, then are overridden by
394             whatever is in C.
395              
396             Arguments are an array reference of header lines, or a hash reference of
397             header/value pairs.
398              
399             my $sb_file = SeaBASS::File->new({
400             default_headers => [
401             '/cruise=fake_cruise',
402             '/experiment=default_experiment',
403             ],
404             headers => {
405             'experiment' => 'real_experiment',
406             },
407             });
408            
409             B Modifying the delimiter or missing value will likely break the
410             object. Modifying these will change the expected format for all rows. Do so
411             with caution.
412              
413             =item * preserve_case
414              
415             C<1> or C<0>, default C<1>. Setting this to C<0> will change all values in the
416             header to lowercase. Header descriptors (the /header part) are always turned
417             to lowercase, as well as all fields and units.
418              
419             =item * keep_slashes
420              
421             C<1> or C<0>, default C<0>. Forces the object to keep the / in the beginning of
422             headers when accessed. If set to C<1>, when using the L
423             \%new_headers | \@get_headers | @get_headers ])"> function, they will be
424             returned with leading slash.
425              
426             =item * cache
427              
428             C<1> or C<0>, default C<1>. Enables caching data rows as they are read. This
429             speeds up re-reads and allows the data to be modified. This is required for
430             writing files.
431              
432             =item * delete_missing_headers
433              
434             C<1> or C<0>, default C<0>. Any headers that are equal to the /missing header,
435             NA, or are not defined (when using the C options) are
436             deleted. They cannot be retrieved using C and will not be written.
437              
438             =item * missing_data_to_undef
439              
440             C<1> or C<0>, default C<1>. If any values in the data block are equal to the
441             /missing, /above_detection_limit, /below_detection_limit headers, they are set
442             to undef when they are retrieved.
443              
444             =item * preserve_comments
445              
446             C<1> or C<0>, default C<1>. Setting this option to zero will discard any
447             comments found in the header.
448              
449             =item * add_empty_headers
450              
451             C<0>, C<1>, or a string. If set to a string, this will populate any missing
452             headers, including optional ones, and will set their value to the string given.
453             If set to 1, the string 'NA' is used. This option disables C.
454              
455             =item * strict
456              
457             my $sb_file = SeaBASS::File->new("input_file.txt", {strict => STRICT_ALL});
458             my $sb_file = SeaBASS::File->new("input_file.txt", {strict => (STRICT_READ | STRICT_WRITE)});
459             my $sb_file = SeaBASS::File->new("input_file.txt", {strict => 0});
460             my $sb_file = SeaBASS::File->new("input_file.txt", {strict => STRICT_WRITE}); #default
461              
462             =over 4
463              
464             =item * C
465              
466             C will throw errors when reading invalid headers, missing required
467             ones, or an invalid delimiter. This may change in future revisions.
468              
469             =item * C
470              
471             C will throw the same errors when writing the data to a file or
472             stream. C only checks for required headers and invalid headers,
473             but does not check their values to see if they are actually filled. This may
474             change in future revisions.
475              
476             =back
477              
478             =item * fill_ancillary_data
479              
480             C<0> or C<1>, default C<0>. Insert date, time, measurement depth, station, and
481             location values to the data rows from the headers. Values are not overridden
482             if they are already present. This option is only useful when reading files.
483              
484             B It is bad practice to include these fields in the data if they don't
485             change throughout the file. This option is used to remove the burden of
486             checking whether they are in the data or header.
487              
488             B This option will also combine individual date/time
489             parts in the data (year/month/day/etc) to create more uniform date/time fields.
490              
491             B If any part of a date/time is missing, the fields
492             dependent on it will not be added to the row.
493              
494             =item * preserve_header
495              
496             C<0> or C<1>, default C<0>. Preserves header and comment order. This option
497             disables modifying the header, as well, but will not error if you try -- it will
498             simply not be reflected in the output.
499              
500             =item * preserve_detection_limits
501              
502             C<0> or C<1>, default C<0>. Disables setting values equal to below_detection_limit
503             or above_detection_limit to null while reading files. This should only be used
504             during read-only operation, as there is no telling missing data from data
505             outside limits.
506              
507             =back
508              
509             =cut
510              
511             sub new {
512 138     138 1 584931 my ( $class, $file ) = ( shift, shift );
513              
514 138         239 my $self = bless( {}, $class );
515              
516 138         153 my %myoptions;
517              
518 138 100       497 if ( ref($file) eq 'HASH' ) {
    100          
    50          
519 4         66 %myoptions = ( %DEFAULT_OPTIONS, %$file );
520 4         11 $file = '';
521             } elsif ( ref( $_[0] ) eq 'HASH' ) {
522 126         485 %myoptions = ( %DEFAULT_OPTIONS, %{ $_[0] } );
  126         769  
523             } elsif ( !ref( $_[0] ) ) {
524 8 100       23 if ( $#_ % 2 == 1 ) {
525 7         60 %myoptions = ( %DEFAULT_OPTIONS, @_ );
526             } else {
527 1         166 croak('Even sized list expected');
528             }
529             } else {
530 0         0 croak("Arguments not understood.");
531             }
532              
533 137         382 $self->{'options'} = \%myoptions;
534 137         344 $self->check_options();
535              
536 133 50       407 if ( ref($file) eq 'GLOB' ) {
    100          
    100          
537 0         0 $self->{'handle'} = $file;
538             } elsif ( ref($file) eq 'SCALAR' ) {
539 127         1232 open( my $fh, "<", $file );
540 127         583 $self->{'handle'} = $fh;
541             } elsif ($file) {
542 2 100       5 if ( !ref($file) ) {
543 1 50       32 if ( -r $file ) {
    0          
544 1         21 open( my $fh, "<", $file );
545 1         4 $self->{'handle'} = $fh;
546             } elsif ( $self->{'options'}{'strict'} & STRICT_READ ) {
547 0         0 croak("Strict read set, but input file not found or unreadable.");
548             } else {
549 0         0 $self->{'default_write_to'} = $file;
550 0         0 $file = '';
551             }
552             } else {
553 1         89 croak("Invalid parameter, expected file path or file handle.");
554             }
555             } ## end elsif ($file)
556 132 100       227 if ($file) {
557 128 50       273 unless ( $self->read_headers() ) {
558 0 0       0 unless ( $self->{'options'}{'strict'} & STRICT_READ ) {
559 0         0 return;
560             }
561             }
562             } else {
563 4         14 $self->create_blank_file();
564             }
565 128         354 return $self;
566             } ## end sub new
567              
568             =head1 OBJECT METHODS
569              
570             =head2 add_headers(\%headers | \@header_lines | @header_lines)
571              
572             $sb_file->add_headers({'investigators' => 'jason_lefler'});
573             $sb_file->add_headers(['/investigators=jason_lefler']);
574             $sb_file->add_headers('/investigators=jason_lefler');
575            
576             C is used to add or override metadata for a C, as
577             well as add comments.
578              
579             This function can not be used to change fields/units, see
580             L and
581             L for that.
582              
583             B Modifying the delimiter or missing value halfway through
584             reading/writing will likely break the object. Modifying these will change the
585             expected format for any new or non-cached rows. Do so with caution.
586              
587             =cut
588              
589             sub add_headers {
590 397     397 1 342 my $self = shift;
591 397         303 my $success = 1;
592 397         574 my $strict = $self->{'options'}{'strict'} & STRICT_WRITE;
593 397 100       821 if ( ref( $_[0] ) eq 'HASH' ) {
    100          
    50          
594 259         223 while ( my ( $k, $v ) = each( %{ $_[0] } ) ) {
  267         778  
595 8         11 $success &= $self->validate_header( $k, $v, $strict );
596 8         12 $self->{'headers'}{$k} = $v;
597             }
598             } elsif ( ref( $_[0] ) eq 'ARRAY' ) {
599 137         114 foreach ( @{ $_[0] } ) {
  137         253  
600 2570 100 100     5403 if ( $_ =~ /^\s*!/ ) {
    100          
601 11         14 push( @{ $self->{'comments'} }, $_ );
  11         15  
602             } elsif ( $strict && $_ !~ m"^/" ) {
603 2         349 carp("Invalid header line: $_");
604 2         179 $success = 0;
605             } else {
606 2557         4221 my ( $k, $v ) = split( /=/, $_, 2 );
607 2557         3513 $success &= $self->validate_header( $k, $v, $strict );
608 2557         4251 $self->{'headers'}{$k} = $v;
609             }
610             } ## end foreach (@{$_[0]})
611             } elsif ( !ref( $_[0] ) ) {
612 1         2 foreach (@_) {
613 1 50 33     10 if ( $_ =~ /^\s*!/ ) {
    50          
614 0         0 push( @{ $self->{'comments'} }, $_ );
  0         0  
615             } elsif ( $strict && $_ !~ m"^/" ) {
616 0         0 carp("Invalid header line: $_");
617 0         0 $success = 0;
618             } else {
619 1         4 my ( $k, $v ) = split( /=/, $_, 2 );
620 1         3 $success &= $self->validate_header( $k, $v, $strict );
621 1         3 $self->{'headers'}{$k} = $v;
622             }
623             } ## end foreach (@_)
624             } else {
625 0         0 $success = 0;
626             }
627 397         423 return $success;
628             } ## end sub add_headers
629              
630             =head2 headers([ \%new_headers | \@get_headers | @get_headers ])
631              
632             =head2 head
633              
634             =head2 h
635              
636             my %headers = $sb_file->headers(['investigators']);
637             print Dumper(\%headers); # { investigators => 'jason_lefler' }
638            
639             my ($inv) = $sb_file->headers('investigators');
640             print $inv; # jason_lefler
641            
642             $sb_file->headers({investigators => 'jason_lefler'});
643            
644             $sb_file->headers()->{'investigators'} = 'jason_lefler';
645            
646             C is used to read or modify header values. Given an array reference
647             of header names, it will return a hash/hash reference with header/value pairs.
648             Given a plain list of header names, it will return an array/array reference of
649             the given header values. Given a hash reference, this function is a proxy for
650             L.
651              
652             If C is set, then headers will be returned as such, IE: C<<
653             {'/investigators' => 'jason_lefler'} >>.
654              
655             This function can also be used to set header values without going through the
656             normal validation.
657              
658             C and C are aliases to C.
659              
660             =cut
661              
662 0     0 1 0 sub head { shift->headers(@_); }
663 22     22 1 891 sub h { shift->headers(@_); }
664              
665             sub headers {
666 25     25 1 569 my $self = shift;
667 25 100       45 if ( !@_ ) {
    100          
    100          
668 21         95 return $self->{'headers'};
669             } elsif ( ref( $_[0] ) eq 'HASH' ) {
670 1         3 return $self->add_headers(@_);
671             } elsif ( ref( $_[0] ) eq 'ARRAY' ) {
672 1         1 my %ret;
673 1         1 for my $header ( @{ $_[0] } ) {
  1         2  
674 2         4 $ret{$header} = $self->{'headers'}{ lc($header) };
675             }
676 1 50       3 if (wantarray) {
677 1         5 return %ret;
678             } else {
679 0         0 return \%ret;
680             }
681             } else {
682 2         1 my @ret;
683 2         5 foreach (@_) {
684 4 50       4 if ( !ref ) {
685 4         8 my $value = $self->{'headers'}{ lc($_) };
686 4 50       9 push( @ret, defined($value) ? $value : undef );
687             } else {
688 0         0 croak("Argument not understood: $_");
689             }
690             } ## end foreach (@_)
691 2 100       8 if (wantarray) {
    50          
692 1         4 return @ret;
693             } elsif ( $#ret == 0 ) {
694 0         0 return $ret[0];
695             } else {
696 1         4 return \@ret;
697             }
698             } ## end else [ if (!@_) ]
699              
700             } ## end sub headers
701              
702             =head2 data([$index])
703              
704             =head2 d
705              
706             =head2 body
707              
708             =head2 b
709              
710             =head2 all
711              
712             my $row = $sb_file->data(1);
713             my @rows = $sb_file->all();
714              
715             C is responsible for returning either a data line via an index or all of
716             the data lines at once.
717              
718             Data is returned as C<< field => value >> pairs.
719              
720             If given an index: in list context, returns the hash of the row; in scalar
721             context, returns a reference to the row.
722              
723             If not given an index: in list context, returns an array of the rows; in scalar
724             context, returns a reference to an array of the rows.
725              
726             If given an index out of range, returns undef. If given a negative index,
727             Cs the file, then returns undef.
728              
729             If C is enabled and the row has already been read, it is retrieved from
730             the cache. If it has not already be read, all rows leading up to the desired
731             row will be read and cached, and the desired row returned.
732              
733             If C is disabled and either all rows are retrieved or a previously
734             retrieved row is called again, the file will C, then seek to the
735             desired row.
736              
737             C, C, C, and C are all aliases to C. (Yes, that means
738             C can be used with arguments, it would just look silly.)
739              
740             =cut
741              
742 0     0 1 0 sub body { shift->data(@_); }
743 0     0 1 0 sub b { shift->data(@_); }
744 2     2 1 919 sub d { shift->data(@_); }
745 42     42 1 4645 sub all { shift->data(@_); }
746              
747             sub data {
748 133     133 1 4082 my ( $self, $index ) = @_;
749 133 100       231 if ( defined($index) ) {
750 81 100       157 if ( $index < 0 ) {
751 12         17 $self->rewind();
752 12         10 return;
753             }
754 69 100       124 if ( $self->{'options'}{'cache'} ) {
755 63 100       126 if ( $index > $self->{'max_dataidx'} ) {
756 39         48 my $startidx = $self->{'dataidx'};
757 39         94 for ( my $i = 0; $i < ( $index - $startidx ); $i++ ) {
758 73 100       117 if ( !$self->next() ) {
759 4         10 return;
760             }
761             }
762             } ## end if ($index > $self->{'max_dataidx'...})
763              
764 59         62 $self->{'dataidx'} = $index;
765              
766 59 50       96 if (wantarray) {
767 0         0 return %{ $self->{'data'}[$index] };
  0         0  
768             } else {
769 59         310 return $self->{'data'}[$index];
770             }
771             } else {
772 6 100       17 if ( $index <= $self->{'dataidx'} ) {
773 3         6 $self->rewind();
774             }
775 6         8 my $startidx = $self->{'dataidx'};
776 6         15 for ( my $i = 0; $i < ( $index - $startidx - 1 ); $i++ ) {
777 7 50       15 if ( !$self->next() ) {
778 0         0 return;
779             }
780             }
781 6         12 return $self->next();
782             } ## end else [ if ($self->{'options'}...)]
783             } else {
784 52 100       102 if ( $self->{'options'}{'cache'} ) {
785 45         97 while ( $self->next() ) {
786             # noop
787             }
788 45 100       88 if (wantarray) {
789 7         4 return @{ $self->{'data'} };
  7         28  
790             } else {
791 38         138 return $self->{'data'};
792             }
793             } else {
794 7         13 $self->rewind();
795 7         5 my @data_rows;
796 7         13 while ( my $data = $self->next() ) {
797 28         42 push( @data_rows, $data );
798             }
799 7 100       12 if (wantarray) {
800 6         22 return @data_rows;
801             } else {
802 1         4 return \@data_rows;
803             }
804             } ## end else [ if ($self->{'options'}...)]
805             } ## end else [ if (defined($index)) ]
806             } ## end sub data
807              
808             =head2 next()
809              
810             while (my $row = $sb_file->next()){
811             print $row->{'lat'};
812             ...
813             }
814             while (my %row = $sb_file->next()){
815             print $row{'lat'};
816             ...
817             }
818            
819             Returns the next data row in the file, returning C when it runs out of
820             rows.
821              
822             Data is returned as C<< field => value >> pairs.
823              
824             In list context, returns a hash of the row. In scalar context, returns a
825             reference to the hash of a row.
826              
827             After a C, C will return the very first data hash, then each row
828             in turn. If the row has been cached, it's retrieved from the cache instead of
829             rereading from the file.
830              
831             =cut
832              
833             sub next {
834 429     429 1 12943 my $self = shift;
835 429 50       678 if (@_) {
836 0         0 croak("invalid number of arguments on next(), expected 0.");
837             }
838              
839 429 100 100     1712 if ( $self->{'options'}{'cache'} && $self->{'dataidx'} < $self->{'max_dataidx'} ) {
    100          
840 73         66 $self->{'dataidx'}++;
841              
842 73 50       102 if (wantarray) {
843 0         0 return %{ $self->{'data'}[ $self->{'dataidx'} ] };
  0         0  
844             } else {
845 73         216 return $self->{'data'}[ $self->{'dataidx'} ];
846             }
847             } elsif ( $self->{'handle'} ) {
848 344         321 my $handle = $self->{'handle'};
849 344         288 my $line_number = $self->{'line_number'};
850              
851 344         1081 while ( my $line = <$handle> ) {
852 266         211 $line_number++;
853 266         678 strip($line);
854 266 50       397 if ($line) {
855 266         417 my $data_row = $self->make_data_hash($line);
856 266         280 $self->{'line_number'} = $line_number;
857 266 100       417 if ( $self->{'options'}{'cache'} ) {
858 204         145 push( @{ $self->{'data'} }, $data_row );
  204         273  
859             }
860 266         240 $self->{'dataidx'}++;
861 266 100       401 if ( $self->{'dataidx'} > $self->{'max_dataidx'} ) {
862 238         231 $self->{'max_dataidx'} = $self->{'dataidx'};
863             }
864 266 50       323 if (wantarray) {
865 0         0 return %{$data_row};
  0         0  
866             } else {
867 266         766 return $data_row;
868             }
869             } ## end if ($line)
870             } ## end while (my $line = <$handle>)
871             } ## end elsif ($self->{'handle'})
872 90         188 return;
873             } ## end sub next
874              
875             =head2 rewind()
876              
877             C seeks to the start of the data. The next C will return the
878             very first row (or C). If caching is enabled, it will not actually
879             perform a seek, it will merely reset the index interator. If caching is
880             disabled, a seek is performed on the file handle to return to the start of the
881             data.
882              
883             =cut
884              
885             sub rewind {
886 61     61 1 1049 my ($self) = @_;
887 61 100       155 if ( $self->{'dataidx'} != -1 ) {
888 31 100       68 if ( !$self->{'options'}{'cache'} ) {
889 9         22 seek( $self->{'handle'}, $self->{'data_start_position'}, SEEK_SET );
890             }
891 31         34 $self->{'line_number'} = $self->{'data_start_line'};
892 31         43 $self->{'dataidx'} = -1;
893             } ## end if ($self->{'dataidx'}...)
894             } ## end sub rewind
895              
896             =head2 update(\%data_row | \@data_row | $data_row | %data_row)
897              
898             while (my %row = $sb_file->next()){
899             if ($row{'depth'} == -999){
900             $row{'depth'} = 0;
901             }
902             $sb_file->update(\%row);
903             }
904            
905             # Less useful for update():
906             print join(',',@{$sb_file->actual_fields()}); #lat,lon,depth,chl
907            
908             while (my %row = $sb_file->next()){
909             if ($row{'depth'} == -999){
910             $row{'depth'} = 0;
911             }
912             $sb_file->update(@row{'lat','lon','depth','chl'});
913             # or
914             $sb_file->update([@row{'lat','lon','depth','chl'}]);
915             }
916              
917             C replaces the last row read (using C) with the input.
918              
919             Caching must be enabled to use C, C, or C.
920              
921             =cut
922              
923             sub update {
924 12     12 1 44 my $self = shift;
925 12 100       25 if ( !$self->{'options'}{'cache'} ) {
    100          
926 1         87 croak("Caching must be enabled to write.");
927             } elsif ( $self->{'dataidx'} == -1 ) {
928 1         150 croak("No rows read yet.");
929             }
930 10         17 my $new_row = $self->ingest_row(@_);
931 10 100       20 unless ( defined($new_row) ) {
932 2         133 croak("Error parsing inputs");
933             }
934 8         12 $self->{'data'}[ $self->{'dataidx'} ] = $new_row;
935             } ## end sub update
936              
937             =head2 set($index, \%data_row | \@data_row | $data_row | %data_row)
938              
939             my %row = (lat => 1, lon => 2, chl => 1);
940             $sb_file->set(0, \%row);
941            
942             print join(',',@{$sb_file->actual_fields()}); #lat,lon,chl
943             $sb_file->set(0, [1, 2, 1]);
944            
945              
946             C replaces the row at the given index with the input. Seeks to the
947             given index if it has not been read to yet. Cs if the file does not go
948             up to the index specified.
949              
950             Caching must be enabled to use C, C, or C.
951              
952             =cut
953              
954             sub set {
955 3     3 1 15 my $self = shift;
956 3         3 my $index = shift;
957 3 50       6 if ( !$self->{'options'}{'cache'} ) {
958 0         0 croak("Caching must be enabled to write");
959             }
960 3 100       7 if ( $index < 0 ) {
961 1         211 croak("Index must be positive integer");
962             }
963 2         8 my $new_row = $self->ingest_row(@_);
964 2 50       4 unless ( defined($new_row) ) {
965 0         0 croak("Error parsing inputs");
966             }
967              
968 2 50       6 if ( $index > $self->{'max_dataidx'} ) {
969 2         3 my $current_idx = $self->{'dataidx'};
970 2         3 $self->data($index);
971 2         2 $self->{'dataidx'} = $current_idx;
972              
973 2 100       6 if ( $index > $self->{'max_dataidx'} ) {
974 1         112 croak("Index out of bounds.");
975             }
976             } ## end if ($index > $self->{'max_dataidx'...})
977              
978 1         2 $self->{'data'}[$index] = $new_row;
979             } ## end sub set
980              
981             =head2 insert($index, \%data_row | \@data_row | $data_row | %data_row)
982              
983             use SeaBASS::File qw(INSERT_BEGINNING INSERT_END);
984             ...
985            
986             my %row = (lat => 1, lon => 2, chl => 1);
987             $sb_file->insert(INSERT_BEGINNING, \%row);
988            
989             print join(',',@{$sb_file->actual_fields()}); #lat,lon,chl
990            
991             $sb_file->insert(1, [1, 2, 1]);
992             $sb_file->insert(INSERT_END, [1, 2, 1]);
993            
994             Inserts the row into the given position. C inserts a new row
995             at the start of the data, C inserts one at the end of the data
996             block.
997              
998             The index must be a positive integer, C, or C.
999              
1000             If a row is inserted at the end, the entire data block is read from the file to
1001             cache every row, the row is appended to the end, and the current position is
1002             reset to the original position, so C will still return the real next
1003             row from the data.
1004              
1005             If a row is inserted before the current position, the current position is
1006             shifted accordingly and will still return the C real row.
1007              
1008             Caching must be enabled to use C, C, or C.
1009              
1010             =cut
1011              
1012             sub insert {
1013 16     16 1 31 my $self = shift;
1014 16         17 my $index = shift;
1015 16 50       39 if ( !$self->{'options'}{'cache'} ) {
1016 0         0 croak("Caching must be enabled to write.");
1017             }
1018 16 100       25 if ( $index < INSERT_END ) {
1019 1         166 croak("Index must be positive integer, or INSERT_BEGINNING (beginning), or INSERT_END (end)");
1020             }
1021 15         49 my $new_row = $self->ingest_row(@_);
1022 15 50       31 unless ( defined($new_row) ) {
1023 0         0 croak("Error parsing inputs");
1024             }
1025              
1026 15 100       30 if ( $index == INSERT_END ) {
    100          
1027 10         16 my $current_idx = $self->{'dataidx'};
1028 10         20 $self->data();
1029 10         14 $self->{'dataidx'} = $current_idx;
1030             } elsif ( $index > $self->{'max_dataidx'} ) {
1031 2         4 my $current_idx = $self->{'dataidx'};
1032 2         5 $self->data($index);
1033 2         2 $self->{'dataidx'} = $current_idx;
1034              
1035 2 50       6 if ( $index == $self->{'max_dataidx'} + 1 ) {
    100          
1036 0         0 $index = INSERT_END;
1037             } elsif ( $index > $self->{'max_dataidx'} ) {
1038 1         102 croak("Index out of bounds.");
1039             }
1040             } ## end elsif ($index > $self->{'max_dataidx'...})
1041              
1042 14 100 100     45 if ( $index <= $self->{'dataidx'} && $index != INSERT_END ) {
1043 3         5 $self->{'dataidx'}++;
1044             }
1045              
1046 14         17 $self->{'max_dataidx'}++;
1047              
1048 14 100       26 if ( $index == INSERT_BEGINNING ) {
    100          
1049 2         2 unshift( @{ $self->{'data'} }, $new_row );
  2         6  
1050             } elsif ( $index == INSERT_END ) {
1051 10         10 push( @{ $self->{'data'} }, $new_row );
  10         32  
1052             } else {
1053 2         2 splice( @{ $self->{'data'} }, $index, 0, $new_row );
  2         10  
1054             }
1055             } ## end sub insert
1056              
1057             =head2 prepend(\%data_row | \@data_row | $data_row | %data_row)
1058              
1059             C is short for C.
1060              
1061             =cut
1062              
1063             sub prepend {
1064 1     1 1 2 my $self = shift;
1065 1         3 $self->insert( INSERT_BEGINNING, @_ );
1066             }
1067              
1068             =head2 append(\%data_row | \@data_row | $data_row | %data_row)
1069              
1070             C is short for C.
1071              
1072             =cut
1073              
1074             sub append {
1075 9     9 1 44 my $self = shift;
1076 9         15 $self->insert( INSERT_END, @_ );
1077             }
1078              
1079             =head2 remove([$index])
1080              
1081             If index is specified, it deletes the desired index. If it is omitted, the
1082             last row read is deleted. The current position is modified accordingly.
1083              
1084             =cut
1085              
1086             sub remove {
1087 6     6 1 15 my ( $self, $index ) = @_;
1088              
1089 6 100 66     41 if ( !$self->{'options'}{'cache'} ) {
    50          
1090 1         164 croak("Caching must be enabled to write.");
1091             } elsif ( !defined($index) && $self->{'dataidx'} < 0 ) {
1092 0         0 croak("No rows read yet.");
1093             }
1094              
1095 5 100       9 if ( !defined($index) ) {
1096 2         5 $index = $self->{'dataidx'};
1097             }
1098              
1099 5 50       17 if ( $index < 0 ) {
    100          
1100 0         0 croak("Index must be positive integer");
1101             } elsif ( $index > $self->{'max_dataidx'} ) {
1102 2         3 my $current_idx = $self->{'dataidx'};
1103 2         3 $self->data($index);
1104 2         3 $self->{'dataidx'} = $current_idx;
1105              
1106 2 100       4 if ( $index > $self->{'max_dataidx'} ) {
1107 1         163 croak("Index out of bounds.");
1108             }
1109             } ## end elsif ($index > $self->{'max_dataidx'...})
1110              
1111 4 100       9 if ( $index <= $self->{'dataidx'} ) {
1112 3         3 $self->{'dataidx'}--;
1113             }
1114 4         5 $self->{'max_dataidx'}--;
1115              
1116 4         5 splice( @{ $self->{'data'} }, $index, 1 );
  4         12  
1117             } ## end sub remove
1118              
1119             =head2 where(\&function)
1120              
1121             # Find all rows with depth greater than 10 meters
1122             my @ret = $sb_file->where(sub {
1123             if ($_->{'depth'} > 10){
1124             return $_;
1125             } else {
1126             return undef;
1127             }
1128             });
1129              
1130             # Delete all measurements with depth less than 10 meters
1131             $sb_file->where(sub {
1132             if ($_->{'depth'} < 10){
1133             $_ = undef;
1134             }
1135             });
1136            
1137             # Calculate the average chlorophyll value
1138             my $chl_total = 0;
1139             my $measurements = 0;
1140             $sb_file->where(sub {
1141             if (defined($_->{'chl'})){
1142             $chl_total += $_->{'chl'};
1143             $measurements++;
1144             }
1145             });
1146             if ($measurements){
1147             print $chl_total/$measurements;
1148             } else {
1149             print "No chl values.";
1150             }
1151              
1152              
1153             Traverses through each data line, running the given function on each row.
1154             C<$_> is set to the current row. If C<$_> is set to undefined, C is
1155             called. Any changes in C<$_> will be reflected in the data.
1156              
1157             Any defined value returned is added to the return array. If nothing is
1158             returned, a 0 is added.
1159              
1160             =cut
1161              
1162             sub where {
1163 7     7 1 37 my ( $self, $function ) = ( shift, shift );
1164 7 100       23 if ( ref($function) ne 'CODE' ) {
1165 1         173 croak("Invalid arguments.");
1166             }
1167 6         11 my $currentidx = $self->{'dataidx'};
1168 6         14 $self->rewind();
1169              
1170 6         4 my @new_rows;
1171              
1172 6         15 while ( my $row = $self->next() ) {
1173 19         27 local *_ = \$row;
1174 19         40 my $ret = $function->();
1175 19 100 100     129 if ( defined($ret) && defined(wantarray) ) {
1176 12         15 push( @new_rows, $ret );
1177             }
1178 19 100       66 if ( !defined($row) ) {
1179 2 100       5 if ( $self->{'dataidx'} <= $currentidx ) {
1180 1         1 $currentidx--;
1181             }
1182 2         7 $self->remove();
1183             } ## end if (!defined($row))
1184             } ## end while (my $row = $self->next...)
1185              
1186 5         14 $self->data($currentidx);
1187              
1188 5         12 return @new_rows;
1189             } ## end sub where
1190              
1191             =head2 get_all($field_name [, ... ] [, \%options])
1192              
1193             Returns an array/arrayref of all the values matching each given field name.
1194             This function errors out if no field names are passed in or a non-existent
1195             field is requested.
1196              
1197             Available options are:
1198              
1199             =over 4
1200              
1201             =item * delete_missing
1202              
1203             If any of the fields are missing, the row will not be added to any of the
1204             return arrays. (Useful for plotting or statistics that don't work well with
1205             bad values.)
1206              
1207             =back
1208              
1209             =cut
1210              
1211             sub get_all {
1212 13     13 1 3606 my $self = shift;
1213 13         24 my %options = ( 'delete_missing' => 0 );
1214 13 100       32 if ( ref( $_[$#_] ) eq 'HASH' ) {
1215 5         7 %options = %{ pop(@_) };
  5         17  
1216             }
1217 13 50       21 if ( !@_ ) {
1218 0         0 croak("get_all must be called with at least one field name");
1219             }
1220              
1221 13 100       25 my $missing = ( $self->{'options'}{'missing_data_to_undef'} ? undef : $self->{'missing'} );
1222              
1223 13         12 my $currentidx = $self->{'dataidx'};
1224 13         23 $self->rewind();
1225              
1226 13         16 my @fields = map {lc} @_; # turn all inputs lowercase
  22         47  
1227              
1228 13         18 foreach my $field (@fields) {
1229 21 100   130   48 if ( ( firstidx { $_ eq $field } @{ $self->{'actual_fields'} } ) < 0 ) {
  130         117  
  21         47  
1230 4 100 66 9   29 if ( !$self->{'options'}{'fill_ancillary_data'} || ( firstidx { $_ eq $field } keys( %{ $self->{'ancillary'} } ) ) < 0 ) {
  9         10  
  1         4  
1231 3         389 croak("Field $field does not exist");
1232             }
1233             }
1234             } ## end foreach my $field (@fields)
1235              
1236 10         13 my @ret = map { [] } @fields; # make return array of arrays
  17         25  
1237              
1238 10         19 while ( my $row = $self->next() ) {
1239 40 100       54 if ( $options{'delete_missing'} ) {
1240 20         21 my $has_all = 1;
1241 20         18 foreach my $field (@fields) {
1242 36 100 100     123 unless ( defined( $row->{$field} ) && ( !defined($missing) || $row->{$field} != $missing ) ) {
      66        
1243 4         4 $has_all = 0;
1244 4         4 last;
1245             }
1246             } ## end foreach my $field (@fields)
1247 20 100       25 unless ($has_all) {
1248 4         7 next;
1249             }
1250             } ## end if ($options{'delete_missing'...})
1251              
1252 36         58 for ( my $i = 0; $i <= $#fields; $i++ ) {
1253 60         41 push( @{ $ret[$i] }, $row->{ $fields[$i] } );
  60         158  
1254             }
1255             } ## end while (my $row = $self->next...)
1256              
1257 10         21 $self->data($currentidx);
1258              
1259 10 100       20 if ( $#_ == 0 ) {
    100          
1260 3 100       4 if (wantarray) {
1261 1         2 return @{ $ret[0] };
  1         6  
1262             } else {
1263 2         8 return $ret[0];
1264             }
1265             } elsif (wantarray) {
1266 4         12 return @ret;
1267             } else {
1268 3         10 return \@ret;
1269             }
1270             } ## end sub get_all
1271              
1272             =head2 remove_field($field_name [, ... ])
1273              
1274             Removes a field from the file. C is called to remove the field
1275             from cached rows. Any new rows grabbed will have the removed fields omitted,
1276             as well. A warning is issued if the field does not exist.
1277              
1278             =cut
1279              
1280             sub remove_field {
1281 8     8 1 26 my $self = shift;
1282 8 100       21 if ( !@_ ) {
1283 1         182 croak("Field(s) must be specified.");
1284             }
1285 7         11 foreach my $field_orig (@_) {
1286 9         13 my $field = lc($field_orig);
1287              
1288 9     30   66 my $field_idx = firstidx { $_ eq $field } @{ $self->{'actual_fields'} };
  30         25  
  9         22  
1289              
1290 9 100       21 if ( $field_idx < 0 ) {
1291 1         118 carp("Field $field does not exist.");
1292             } else {
1293 8         10 splice( @{ $self->{'actual_fields'} }, $field_idx, 1 );
  8         13  
1294 8         9 splice( @{ $self->{'actual_units'} }, $field_idx, 1 );
  8         15  
1295             }
1296             } ## end foreach my $field_orig (@_)
1297 7         125 $self->update_fields();
1298             } ## end sub remove_field
1299              
1300             =head2 add_field($field_name [, $unit [, $position]])
1301              
1302             Adds a field to the file. C is called to populate all cached
1303             rows. Any rows retrieved will have the new field set to undefined or /missing,
1304             depending on if the option C is set.
1305              
1306             If the unit is not specified, it is set to unitless.
1307              
1308             If the position is not specified, the field is added to the end.
1309              
1310             =cut
1311              
1312             sub add_field {
1313 25     25 1 101 my ( $self, $field, $unit, $position ) = @_;
1314 25 50       72 if ( !$self->{'options'}{'cache'} ) {
    100          
1315 0         0 croak("Caching must be enabled to write.");
1316             } elsif ( !$field ) {
1317 1         141 croak("Field must be specified.");
1318             }
1319 24         28 $field = lc($field);
1320              
1321 24     121   81 my $field_idx = firstidx { $_ eq $field } @{ $self->{'actual_fields'} };
  121         87  
  24         60  
1322 24 100       74 if ( $field_idx >= 0 ) {
1323 3         483 croak("Field already exists.");
1324             }
1325 21 100       40 if ( !defined($position) ) {
1326 15         23 $position = INSERT_END;
1327             }
1328 21   100     44 $unit ||= 'unitless';
1329 21         26 $unit = lc($unit);
1330              
1331 21 100       27 if ( $position == INSERT_END ) {
    100          
1332 17         16 push( @{ $self->{'actual_fields'} }, $field );
  17         31  
1333 17         17 push( @{ $self->{'actual_units'} }, $unit );
  17         22  
1334             } elsif ( $position == INSERT_BEGINNING ) {
1335 2         3 unshift( @{ $self->{'actual_fields'} }, $field );
  2         6  
1336 2         2 unshift( @{ $self->{'actual_units'} }, $unit );
  2         3  
1337             } else {
1338 2         1 splice( @{ $self->{'actual_fields'} }, $position, 0, $field );
  2         5  
1339 2         3 splice( @{ $self->{'actual_units'} }, $position, 0, $unit );
  2         4  
1340             }
1341 21         52 $self->update_fields();
1342             } ## end sub add_field
1343              
1344             =head2 find_fields($string | qr/match/ [, ... ])
1345              
1346             Finds fields matching the string or regex given. If given a string, it must
1347             match a field exactly and entirely to be found. To find a substring, use
1348             C. Fields are returned in the order that they will be output. This
1349             function takes into account fields that are added or removed. All fields are
1350             always lowercase, so all matches are case insensitive.
1351              
1352             Given one argument, returns an array of the fields found. An empty array is
1353             returned if no fields match.
1354              
1355             Given multiple arguments, returns an array/arrayref of arrays of fields found.
1356             IE: C would return something like
1357             C<[['lw510','lw550'],['es510','es550']]>. If no field is matched, the inner
1358             array will be empty. IE: C<[[],[]]>.
1359              
1360             =cut
1361              
1362             sub find_fields {
1363 10     10 1 2685 my $self = shift;
1364 10 50       23 if ( $#_ < 0 ) {
1365 0         0 croak("Input must be a string or regex object.");
1366             }
1367              
1368 10         9 my @ret;
1369              
1370 10         16 foreach my $find (@_) {
1371 13         8 my ( $regex, @matching );
1372 13 50       25 if ( defined($find) ) {
1373 13 100       27 if ( !ref($find) ) {
    50          
1374 7         76 $regex = lc(qr/^$find$/i);
1375             } elsif ( ref($find) eq 'Regexp' ) {
1376 6         21 $regex = lc(qr/$find/i);
1377             } else {
1378 0         0 croak("Input must be a string or regex object.");
1379             }
1380              
1381 13         16 foreach my $field ( @{ $self->{'actual_fields'} } ) {
  13         22  
1382 92 100       267 if ( $field =~ $regex ) {
1383 23         28 push( @matching, $field );
1384             }
1385             }
1386             }
1387 13         26 push( @ret, \@matching );
1388             } ## end foreach my $find (@_)
1389              
1390 10 100       17 if ( $#_ == 0 ) {
1391 7         8 return @{ $ret[0] };
  7         21  
1392             } else {
1393 3 100       6 if (wantarray) {
1394 2         8 return @ret;
1395             } else {
1396 1         3 return \@ret;
1397             }
1398             } ## end else [ if ($#_ == 0) ]
1399             } ## end sub find_fields
1400              
1401             =head2 add_comment(@comments)
1402              
1403             Adds comments to the output file, which are printed, in bulk, after C.
1404             Comments are trimmed before entry and !s are added, if required.
1405              
1406             =cut
1407              
1408             sub add_comment {
1409 2     2 1 4 my $self = shift;
1410 2         5 push(@{$self->{'comments'}}, map {
1411 2         3 my $c = $_;
  3         3  
1412 3         12 $c =~ s/^\s+|\s+$//g;
1413 3 100       8 if ($c =~ /^!/){
1414 1         4 $c
1415             } else {
1416 2         7 "! $c"
1417             }
1418             } @_);
1419             }
1420              
1421             =head2 get_comments([@indices])
1422              
1423             Returns a list of the comments at the given indices. If no indices are passed
1424             in, return them all.
1425              
1426             =cut
1427              
1428             sub get_comments {
1429 6     6 1 12 my $self = shift;
1430 6         7 my @ret;
1431 6 100       12 if (@_){
1432 2         4 @ret = map {$self->{'comments'}[$_]} @_;
  3         7  
1433             } else {
1434 4         5 @ret = @{$self->{'comments'}};
  4         12  
1435             }
1436 6 50       18 if (wantarray){
1437 0         0 return @ret;
1438             } else {
1439 6         30 return \@ret;
1440             }
1441             }
1442              
1443             =head2 set_comments(@comments)
1444              
1445             Overwrites all of the comments in the file. For now, this is the proper way
1446             to remove comments. Comments are trimmed before entry and !s are added, if
1447             required.
1448              
1449             =cut
1450              
1451             sub set_comments {
1452 1     1 1 2 my $self = shift;
1453             $self->{'comments'} = [map {
1454 1         2 my $c = $_;
  2         2  
1455 2         5 $c =~ s/^\s+|\s+$//g;
1456 2 100       5 if ($c =~ /^!/){
1457 1         3 $c
1458             } else {
1459 1         2 "! $c"
1460             }
1461             } @_];
1462             }
1463              
1464             =head2 write([$filename | $file_handle | \*GLOB])
1465              
1466             Outputs the current header and data to the given handle or glob. If no
1467             arguments are given, and a non-existent filename was given to C, the
1468             contents are output into that. If an output file was not given, C
1469             outputs to STDOUT.
1470              
1471             If C is enabled, the headers are checked for invalid headers and
1472             missing required headers and errors/warnings can be thrown accordingly.
1473              
1474             The headers are output in a somewhat-arbitrary but consistent order. If
1475             C is enabled, placeholders are added for every header that
1476             does not exist. A comment section is also added if one is not present.
1477              
1478             =cut
1479              
1480             sub write {
1481 15     15 1 8432 my ( $self, $write_to_h ) = @_;
1482              
1483 15         34 my $strict_write = $self->{'options'}{'strict'} & STRICT_WRITE;
1484 15 100       38 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
1485 15         15 my $error = 0;
1486              
1487 15 100       33 if ($strict_write) {
1488 3         3 foreach my $header ( keys( %{ $self->{'headers'} } ) ) {
  3         11  
1489 65         56 ( my $header_no_slash = $header ) =~ s"^/"";
1490 65 50 33 600   117 if ( ( firstidx { $_ eq $header_no_slash } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $header_no_slash } @HIDDEN_HEADERS ) < 0 ) {
  1104         734  
  0         0  
1491 0         0 carp("Invalid header: $header");
1492 0         0 $error = 1;
1493             }
1494             } ## end foreach my $header (keys(%{...}))
1495              
1496 3         7 foreach my $header (@REQUIRED_HEADERS) {
1497 66 100       129 if ( !exists( $self->{'headers'}{$header} ) ) {
1498 20         1236 carp("Missing required header: $header");
1499 20         643 $error = 1;
1500             }
1501             } ## end foreach my $header (@REQUIRED_HEADERS)
1502             } ## end if ($strict_write)
1503              
1504 15 100       32 if ( !$error ) {
1505 14         14 my $close_write_to = 0;
1506 14         31 my $old_fh = select();
1507              
1508 14 50 33     92 if ( !$write_to_h && exists( $self->{'default_write_to'} ) ) {
1509 0   0     0 $write_to_h ||= $self->{'default_write_to'};
1510             }
1511              
1512 14 50       28 if ( defined($write_to_h) ) {
1513 0 0       0 if ( ref($write_to_h) eq 'GLOB' ) {
    0          
1514 0         0 select($write_to_h);
1515             } elsif ( !ref($write_to_h) ) {
1516 0         0 my $write_to = $write_to_h;
1517 0         0 $write_to_h = undef;
1518 0 0       0 open( $write_to_h, ">", $write_to ) || croak("Invalid argument for write().");
1519 0         0 $close_write_to = 1;
1520 0         0 select($write_to_h);
1521             } else {
1522 0         0 croak("Invalid argument for write().");
1523             }
1524             } ## end if (defined($write_to_h...))
1525              
1526 14   50     48 $self->{'headers'}{"${slash}delimiter"} ||= 'comma';
1527 14         34 my $actual_delim = lc( $self->{'headers'}{"${slash}delimiter"} );
1528 14 100       44 if ( $actual_delim eq 'comma' ) {
    50          
    0          
    0          
1529 2         4 $actual_delim = ',';
1530             } elsif ( $actual_delim eq 'space' ) {
1531 12         16 $actual_delim = ' ';
1532             } elsif ( $actual_delim eq 'tab' ) {
1533 0         0 $actual_delim = "\t";
1534             } elsif ( $actual_delim eq 'semicolon' ) {
1535 0         0 $actual_delim = ';';
1536             } else {
1537 0         0 $actual_delim = ',';
1538 0         0 $self->{'headers'}{"${slash}delimiter"} = 'comma';
1539             }
1540              
1541 14 50       32 my $missing = ( exists( $self->{'missing'} ) ? $self->{'missing'} : $DEFAULT_MISSING );
1542 14 50       37 my $bdl = ( exists( $self->{'below_detection_limit'} ) ? $self->{'below_detection_limit'} : $DEFAULT_BDL );
1543 14 50       32 my $adl = ( exists( $self->{'above_detection_limit'} ) ? $self->{'above_detection_limit'} : $DEFAULT_ADL );
1544              
1545 14 100       26 if ( $self->{'options'}{'preserve_header'} ) {
1546 1         2 print join("\n", @{ $self->{'preserved_header'} }, '');
  1         65  
1547             } else {
1548 13 100       34 if ( !exists( $self->{'headers'}{"${slash}begin_header"} ) ) {
1549 3         99 print "/begin_header\n";
1550             }
1551              
1552 13         22 my $add_missing_headers = $self->{'options'}{'add_empty_headers'};
1553 13 100 66     33 if ( $add_missing_headers && $add_missing_headers eq '1' ) {
1554 1         2 $add_missing_headers = 'NA';
1555             }
1556 13 100       29 if ( !$self->{'options'}{'preserve_case'} ) {
1557 1   50     4 $add_missing_headers = lc( $add_missing_headers || '' );
1558             }
1559            
1560 13         12 my @headers_to_print;
1561            
1562 13         80 @headers_to_print = @ALL_HEADERS;
1563 13 100 66     73 if ($missing eq $adl || ($self->{'options'}{'missing_data_to_undef'} && !$self->{'options'}{'preserve_detection_limits'})){
      66        
1564 12         164 @headers_to_print = grep(!/above_detection_limit/i, @headers_to_print);
1565             }
1566 13 100 66     72 if ($missing eq $bdl || ($self->{'options'}{'missing_data_to_undef'} && !$self->{'options'}{'preserve_detection_limits'})){
      66        
1567 12         140 @headers_to_print = grep(!/below_detection_limit/i, @headers_to_print);
1568             }
1569            
1570 13 100       27 unless (grep($_ ne 'unitless', @{ $self->{'actual_units'} })){
  13         53  
1571 6         62 @headers_to_print = grep(!/units/i, @headers_to_print);
1572             }
1573              
1574 13         27 foreach my $header (@headers_to_print) {
1575 399 100       1260 if ( $header eq 'missing' ) {
    100          
    100          
    100          
    50          
1576 13         17 while ( my ( $h, $k ) = each( %{ $self->{'headers'} } ) ) {
  165         395  
1577 152         161 ( my $header_no_slash = $h ) =~ s"^/"";
1578 152 50   3097   697 if ( ( firstidx { $_ eq $header_no_slash } @ALL_HEADERS ) < 0 ) {
  3097         2253  
1579 0         0 print "/$h=$k\n";
1580             }
1581             } ## end while (my ($h, $k) = each...)
1582              
1583 13         16 foreach my $comment ( @{ $self->{'comments'} } ) {
  13         26  
1584 18         123 print "$comment\n";
1585             }
1586 13 100 100     12 if ( !@{ $self->{'comments'} } && $add_missing_headers ) {
  13         64  
1587 1         20 print "! Comments: \n!\n";
1588             }
1589 13 100       38 if ( !exists( $self->{'headers'}{"$slash$header"} ) ) {
1590 3         51 print "/missing=$missing\n";
1591             } else {
1592 10         180 print '/', $header, '=', $self->{'headers'}{"$slash$header"}, "\n";
1593             }
1594             } elsif ( $header eq 'fields' ) {
1595 13         21 print "/$header=", join( ',', @{ $self->{'actual_fields'} } ), "\n";
  13         121  
1596             } elsif ( $header eq 'units' ) {
1597 7         10 print "/$header=", join( ',', @{ $self->{'actual_units'} } ), "\n";
  7         63  
1598             } elsif ( exists( $self->{'headers'}{"$slash$header"} ) ) {
1599 110 100       729 if ( $header =~ /_header/ ) {
    100          
    50          
1600 20         500 print "/$header\n";
1601             } elsif (length($self->{'headers'}{"$slash$header"})) {
1602 65         298 my $v = $self->{'headers'}{"$slash$header"};
1603 65 100       150 if ( $header =~ /_latitude|_longitude/ ) {
    100          
1604 8         118 print "/$header=$v\[deg]\n";
1605             } elsif ( $header =~ /_time/ ) {
1606 4         39 print "/$header=$v\[gmt]\n";
1607             } else {
1608 53         471 print "/$header=$v\n";
1609             }
1610             # print '/', $header, '=', $self->{'headers'}{"$slash$header"}, "\n";
1611             } elsif ($add_missing_headers) {
1612 25 100       56 if ( $header =~ /_latitude|_longitude/ ) {
    100          
1613 4         38 print "/$header=$add_missing_headers\[deg]\n";
1614             } elsif ( $header =~ /_time/ ) {
1615 2         21 print "/$header=$add_missing_headers\[gmt]\n";
1616             } else {
1617 19         170 print "/$header=$add_missing_headers\n";
1618             }
1619             }
1620             } elsif ($add_missing_headers) {
1621 0 0       0 if ( $header =~ /_latitude|_longitude/ ) {
    0          
1622 0         0 print "/$header=$add_missing_headers\[deg]\n";
1623             } elsif ( $header =~ /_time/ ) {
1624 0         0 print "/$header=$add_missing_headers\[gmt]\n";
1625             } else {
1626 0         0 print "/$header=$add_missing_headers\n";
1627             }
1628             } ## end elsif ($add_missing_headers)
1629             } ## end foreach my $header (@ALL_HEADERS)
1630              
1631 13 100       55 if ( !exists( $self->{'headers'}{"${slash}end_header"} ) ) {
1632 3         29 print "/end_header\n";
1633             }
1634             } ## end else [ if ($self->{'options'}...)]
1635              
1636 14         44 $self->rewind();
1637              
1638 14         35 while ( my $row = $self->next() ) {
1639 24         19 my @values;
1640 24         19 foreach my $field ( @{ $self->{'actual_fields'} } ) {
  24         43  
1641 118 100       191 push( @values, ( defined( $row->{$field} ) ? $row->{$field} : $missing ) );
1642             }
1643 24         326 print join( $actual_delim, @values ), "\n";
1644             } ## end while (my $row = $self->next...)
1645              
1646 14         46 select($old_fh);
1647 14 50       43 if ($close_write_to) {
1648 0         0 close($write_to_h);
1649             }
1650             } else {
1651 1         56 croak("Error(s) writing file");
1652             }
1653 14         46 return;
1654             } ## end sub write
1655              
1656             =head2 close()
1657              
1658             If a file handle is opened for reading, this function closes it. This is
1659             automatically called when the object is destroyed. This is useful to replace
1660             the file being read with the current changes.
1661              
1662             =cut
1663              
1664             sub close {
1665 138     138 1 148 my ($self) = @_;
1666 138 100       272 if ( $self->{'handle'} ) {
1667 128         359 my $ret = close( $self->{'handle'} );
1668 128         333 delete( $self->{'handle'} );
1669 128         1707 return $ret;
1670             } else {
1671 10         72 return;
1672             }
1673             } ## end sub close
1674              
1675             =head2 make_data_hash($line [,\@field_list])
1676              
1677             my %row = $sb_file->make_data_hash("1.5,2,2.5");
1678             my %row = $sb_file->make_data_hash("1.5,2,2.5", [qw(lat lon sal)]);
1679             my %row = $sb_file->make_data_hash("1.5,2,2.5", [$sb_file->fields()]);
1680             my %row = $sb_file->make_data_hash("1.5,2,2.5", [$sb_file->actual_fields()]);
1681              
1682             For mostly internal use. This function parses a data line. It first splits
1683             the data via the delimiter, assigns a field to each value, and returns a hash
1684             or hash reference.
1685              
1686             If C<@field_list> is not set, C<< $sb_file->fields() >> is used.
1687              
1688             If a delimiter is not set (a blank file was created, a file without a
1689             /delimiter header is read, etc), the delimiter is guessed and set using
1690             L.
1691              
1692             Cs if the delimiter could not be guessed or the number of fields the
1693             line is split into does not match up with the field list.
1694              
1695             =cut
1696              
1697             sub make_data_hash {
1698 266     266 1 306 my ( $self, $line, $field_list ) = @_;
1699 266 50 66     513 if ( !$self->{'delim'} && !$self->guess_delim($line) ) {
1700 0         0 croak("Need a delimiter");
1701             }
1702 266         1618 my @values = split( $self->{'delim'}, $line );
1703 266   33     830 $field_list ||= $self->{'fields'};
1704              
1705 266         180 my ( $num_expected, $num_got ) = ( scalar( @{ $self->{'fields'} } ), scalar(@values) );
  266         361  
1706 266 50       460 if ( $num_expected != $num_got ) {
1707 0         0 croak("Incorrect number of fields or elements: got $num_got, expected $num_expected");
1708             }
1709              
1710 266         201 my %ret;
1711              
1712 266         1227 my $iterator = each_arrayref( $field_list, \@values );
1713 266         896 while ( my ( $k, $v ) = $iterator->() ) {
1714 1869 100       1921 if ( $self->{'options'}{'missing_data_to_undef'} ) {
1715 958 100 100     10649 if ( $self->{'missing_is_number'} && looks_like_number($v) && $v == $self->{'missing'} ) {
    100 100        
    100 66        
    100 100        
      100        
      66        
      100        
      100        
      33        
      66        
      66        
1716 68         178 $ret{$k} = undef;
1717             } elsif ( !$self->{'options'}{'preserve_detection_limits'} && $self->{'adl_is_number'} && looks_like_number($v) && $v == $self->{'above_detection_limit'} ){
1718 1         3 $ret{$k} = undef;
1719             } elsif ( !$self->{'options'}{'preserve_detection_limits'} && $self->{'bdl_is_number'} && looks_like_number($v) && $v == $self->{'below_detection_limit'} ){
1720 1         4 $ret{$k} = undef;
1721             } elsif ($v eq $self->{'missing'} || (!$self->{'options'}{'preserve_detection_limits'} && ($v eq $self->{'below_detection_limit'} || $v eq $self->{'above_detection_limit'}))) {
1722 8         26 $ret{$k} = undef;
1723             } else {
1724 880         2647 $ret{$k} = $v;
1725             }
1726             } else {
1727 911         2151 $ret{$k} = $v;
1728             }
1729             } ## end while (my ($k, $v) = $iterator...)
1730 266         478 $self->add_and_remove_fields( \%ret );
1731 266 50       351 if (wantarray) {
1732 0         0 return %ret;
1733             } else {
1734 266         1070 return \%ret;
1735             }
1736             } ## end sub make_data_hash
1737              
1738             =head2 AUTOLOAD
1739              
1740             print $sb_file->missing();
1741             print $sb_file->dataidx();
1742             print $sb_file->actual_fields();
1743             ...
1744              
1745             Returns a few internal variables. The accessor is read only, but some
1746             variables can be returned as a reference, and can be modified afterwards.
1747             Though, do it knowing this is a terrible idea.
1748              
1749             If the variable retrieved is an array or hash reference and this is called in a
1750             list context, the variable is dereferenced first.
1751              
1752             Here are a few "useful" variables:
1753              
1754             =over 4
1755              
1756             =item * dataidx
1757              
1758             The current row index.
1759              
1760             =item * max_dataidx
1761              
1762             The highest row index read so far.
1763              
1764             =item * fields
1765              
1766             An array of the original fields.
1767              
1768             =item * actual_fields
1769              
1770             An array of the current fields, as modified by C or C.
1771              
1772             =item * delim
1773              
1774             The regex used to split data lines.
1775              
1776             =item * missing
1777              
1778             The null/fill/missing value of the SeaBASS file.
1779              
1780             =item * delim
1781              
1782             The current line delimiter regex.
1783              
1784             =back
1785              
1786             =cut
1787              
1788             sub AUTOLOAD {
1789 109     109   152 my $self = shift;
1790 109 50       169 if ( !ref($self) ) {
1791 0         0 croak("$self is not an object");
1792             }
1793              
1794 109         87 my $name = $AUTOLOAD;
1795 109 50       206 if ($name) {
1796 109         310 $name =~ s/.*://;
1797 109         127 my $value = $self->{$name};
1798 109 100       180 if ( !defined($value) ) {
1799 1         5 return;
1800             }
1801 108 100 66     369 if ( ref($value) eq 'ARRAY' && wantarray ) {
    50 33        
1802 104         72 return @{$value};
  104         343  
1803             } elsif ( ref($value) eq 'HASH' && wantarray ) {
1804 0         0 return %{$value};
  0         0  
1805             }
1806 4         17 return $value;
1807             } ## end if ($name)
1808             } ## end sub AUTOLOAD
1809              
1810             sub DESTROY {
1811 138     138   49033 my $self = shift;
1812 138         303 $self->close();
1813             }
1814              
1815             =head1 INTERNAL METHODS
1816              
1817             =head2 check_options()
1818              
1819             For internal use only. This function is in charge of checking the options to
1820             make sure they are of the right type (array/hash reference where appropriate).
1821              
1822             If add_empty_headers is set, this function turns off C.
1823              
1824             Called by the object, accepts no arguments.
1825              
1826             =cut
1827              
1828             #<<< perltidy destroys this function
1829             sub check_options {
1830 137     137 1 136 my $self = shift;
1831 137         163 while (my ($k, $v) = each(%{$self->{'options'}})) {
  1895         3544  
1832 1762 100       3429 if (!exists($DEFAULT_OPTIONS{$k})) {
    100          
1833 2         213 croak("Option not understood: $k");
1834 2025     2025   4304 } elsif ((firstidx { $_ eq ref($v) } @{$OPTION_TYPES{$k}}) < 0) {
  1760         2737  
1835 2         3 my $expected_ref = join('/', @{$OPTION_TYPES{$k}});
  2         3  
1836 2 100       196 croak("Option $k not of the right type, expected: " . ($expected_ref ? "$expected_ref reference" : 'scalar'));
1837             } ## end elsif ((firstidx { $_ eq ...}))
1838             } ## end while (my ($k, $v) = each...)
1839 133 100       357 if ($self->{'options'}{'add_empty_headers'}) {
1840 1         4 $self->{'options'}{'strict'} &= STRICT_READ;
1841             }
1842             } ## end sub check_options
1843             #>>>
1844              
1845             =head2 create_blank_file()
1846              
1847             For internal use only. C populates the object with proper
1848             internal variables, as well as adding blank headers if C is
1849             set.
1850              
1851             By default, the missing value is set to C<$DEFAULT_MISSING> (C<-999>).
1852              
1853             This function turns on the C option, as C must be enabled to
1854             write.
1855              
1856             The delimiter is left undefined and will be guessed upon reading the first data
1857             line using the L function.
1858              
1859             Called by the object, accepts no arguments.
1860              
1861             =cut
1862              
1863             sub create_blank_file {
1864 4     4 1 6 my ($self) = @_;
1865 4         9 $self->{'actual_fields'} = [];
1866 4         7 $self->{'actual_units'} = [];
1867 4         8 $self->{'fields'} = [];
1868 4         7 $self->{'units'} = [];
1869 4         7 $self->{'headers'} = {};
1870 4         7 $self->{'comments'} = [];
1871 4         11 $self->{'data'} = [];
1872 4         7 $self->{'dataidx'} = -1;
1873 4         5 $self->{'max_dataidx'} = -1;
1874 4         6 $self->{'delim'} = undef;
1875 4         7 $self->{'missing'} = $DEFAULT_MISSING;
1876 4         6 $self->{'below_detection_limit'} = $DEFAULT_BDL;
1877 4         5 $self->{'above_detection_limit'} = $DEFAULT_ADL;
1878              
1879 4         6 $self->{'options'}{'cache'} = 1;
1880 4         6 $self->{'options'}{'fill_ancillary_data'} = 0;
1881              
1882 4 100       11 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
1883 4 100       10 if ($self->{'options'}{'add_empty_headers'}) {
1884 1         3 foreach (@ALL_HEADERS) {
1885 33 50       63 if ( !exists( $self->{'headers'}{"${slash}$_"} ) ) {
1886 33         56 $self->{'headers'}{"${slash}$_"} = '';
1887 33 100       56 if ( $_ eq 'missing' ) {
1888 1         4 $self->{'headers'}{"${slash}missing"} = $DEFAULT_MISSING;
1889             }
1890             } ## end if (!exists($self->{'headers'...}))
1891             } ## end foreach (@ALL_HEADERS)
1892             } ## end if ($add_missing_headers)
1893              
1894 4         7 my $success = 1;
1895 4 50       10 if ( $self->{'options'}{'default_headers'} ) {
1896 4         11 $success &= $self->add_headers( $self->{'options'}{'default_headers'} );
1897             }
1898 4 50       10 if ( $self->{'options'}{'headers'} ) {
1899 4         10 $success &= $self->add_headers( $self->{'options'}{'headers'} );
1900             }
1901 4 50       11 unless ($success) {
1902 0         0 croak("Error creating blank file.");
1903             }
1904             } ## end sub create_blank_file
1905              
1906             =head2 read_headers()
1907              
1908             For internal use only. C reads the metadata at the beginning of
1909             a SeaBASS file.
1910              
1911             Called by the object, accepts no arguments.
1912              
1913             =cut
1914              
1915             sub read_headers {
1916 128     128 1 153 my $self = shift;
1917              
1918 128 50       280 if ( $self->{'headers'} ) {
1919 0         0 return;
1920             }
1921              
1922 128 100       269 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
1923 128         112 my $success = 1;
1924 128         108 my @comments;
1925              
1926 128         170 $self->{'headers'} = {};
1927 128         199 $self->{'comments'} = [];
1928              
1929 128 50       253 if ( $self->{'options'}{'default_headers'} ) {
1930 128         294 $success &= $self->add_headers( $self->{'options'}{'default_headers'} );
1931             }
1932              
1933 128         139 my $handle = $self->{'handle'};
1934 128         129 my $position = my $line_number = 0;
1935 128         90 my @header_lines;
1936              
1937 128         131 my $strict = $self->{'options'}{'strict'};
1938              
1939 128         457 while ( my $line = <$handle> ) {
1940 3263         1865 $line_number++;
1941 3263         3014 strip($line);
1942 3263 50       3697 if ($line) {
1943 3263 100       6681 if ( $line =~ m'^(/end_header)\@?$'i ) {
    100          
    100          
1944 113         263 push( @header_lines, $1 );
1945 113         122 $position = tell($handle);
1946 113         149 last;
1947             } elsif ( $line =~ m"^/" ) {
1948 2439         2333 push( @header_lines, $line );
1949             } elsif ( $line =~ m"^!" ) {
1950 696         620 push( @comments, $line );
1951 696 100       1027 if ( $self->{'options'}{'preserve_header'} ) {
1952 10         8 push( @header_lines, $line );
1953             }
1954             } else { #TODO: search ahead for more headers or comments (in case of merely comment missing !) and fail if READ_STRICT
1955 15         26 seek( $handle, $position, SEEK_SET );
1956 15 50       24 if ( $strict & STRICT_READ ) {
1957 0         0 carp("File missing /end_header or comment missing !, assuming data start: line #$line_number ($line)");
1958             }
1959 15         25 last;
1960             }
1961             } ## end if ($line)
1962 3135         6006 $position = tell($handle);
1963             } ## end while (my $line = <$handle>)
1964              
1965             # add_headers looks at STRICT_WRITE, not STRICT_READ
1966 128 100       238 if ( $strict & STRICT_READ ) {
1967 5         12 $self->{'options'}{'strict'} |= STRICT_WRITE;
1968             } else {
1969 123         173 $self->{'options'}{'strict'} = 0;
1970             }
1971              
1972 128 100       277 if ( $self->{'options'}{'preserve_header'} ) {
1973 1         5 $self->{'preserved_header'} = [@header_lines];
1974             }
1975              
1976 128         241 $success &= $self->add_headers( \@header_lines );
1977              
1978             # restore strictness
1979 128         208 $self->{'options'}{'strict'} = $strict;
1980              
1981 128 50       257 if ( $self->{'options'}{'headers'} ) {
1982 128         246 $success &= $self->add_headers( $self->{'options'}{'headers'} );
1983             }
1984              
1985 128         134 my %headers = %{ $self->{'headers'} };
  128         1172  
1986              
1987 128 100       341 if ( $self->{'options'}{'preserve_comments'} ) {
1988 127         143 push( @{ $self->{'comments'} }, @comments );
  127         341  
1989             }
1990              
1991 128   66     407 my $missing = $headers{"${slash}missing"} || $DEFAULT_MISSING;
1992 128 100       262 if ( $self->{'options'}{'delete_missing_headers'} ) {
1993 1         5 while ( my ( $k, $v ) = each(%headers) ) {
1994 31 100       67 if ( $k =~ m"/?(?:end|begin)_header$|^/?missing$" ) {
1995 3         6 next;
1996             }
1997 28 100 66     133 if ( !defined($v) || $v =~ m"^n/?a(?:\[.*?\])?$"i || lc($v) eq lc($missing) ) {
      66        
1998 7         15 delete( $headers{$k} );
1999             }
2000             } ## end while (my ($k, $v) = each...)
2001             } ## end if ($self->{'options'}...)
2002              
2003 128 100       195 if ( $strict & STRICT_READ ) {
2004 5         10 foreach (@REQUIRED_HEADERS) {
2005 110 100       3040 if ( !exists( $headers{"${slash}$_"} ) ) {
2006 58         70 $success = 0;
2007 58         4779 carp("Missing required header: $_");
2008             }
2009             } ## end foreach (@REQUIRED_HEADERS)
2010 5         22 while ( my ( $header, $value ) = each(%headers) ) {
2011 72 50       81 if ($slash) {
2012 0         0 $header =~ s"^/"";
2013             }
2014 72 50 33 1220   156 if ( ( firstidx { $_ eq $header } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $header } @HIDDEN_HEADERS ) < 0 ) {
  1312         862  
  0         0  
2015 0         0 $success = 0;
2016 0         0 carp("$header not a standard header.");
2017             }
2018             } ## end while (my ($header, $value...))
2019 5 50 33     35 if ( $headers{"${slash}begin_header"} || $headers{"${slash}end_header"} ) {
2020 0         0 $success = 0;
2021 0         0 carp("begin_ or end_header incorrect");
2022             }
2023             } ## end if ($strict & STRICT_READ)
2024 128         202 foreach (@ABSOLUTELY_REQUIRED_HEADERS) {
2025 128 100       361 if ( !exists( $headers{"${slash}$_"} ) ) {
2026 1         1 $success = 0;
2027 1 50       2 if ( $strict & STRICT_READ ) {
2028 1         84 carp("Missing absolutely required header: $_");
2029             }
2030             }
2031             } ## end foreach (@ABSOLUTELY_REQUIRED_HEADERS)
2032              
2033 128   100     1529 $self->{'fields'} = [ split( /\s*,\s*/, $headers{"${slash}fields"} || '' ) ];
2034 128   100     1128 $self->{'actual_fields'} = [ split( /\s*,\s*/, $headers{"${slash}fields"} || '' ) ];
2035              
2036 128 100       317 if ( $headers{"${slash}units"} ) {
2037 79         566 $self->{'units'} = [ split( /\s*,\s*/, $headers{"${slash}units"} ) ];
2038 79         586 $self->{'actual_units'} = [ split( /\s*,\s*/, $headers{"${slash}units"} ) ];
2039             } else {
2040 49         76 my (@new_units1);
2041 49         50 foreach ( @{ $self->{'fields'} } ) {
  49         116  
2042 345         327 push( @new_units1, 'unitless' );
2043             }
2044 49         111 my @new_units2 = @new_units1;
2045 49         68 $self->{'units'} = \@new_units1;
2046 49         87 $self->{'actual_units'} = \@new_units2;
2047 49         160 $headers{"${slash}units"} = join( ',', @new_units1 );
2048             } ## end else [ if ($headers{"${slash}units"...})]
2049              
2050 128 100       140 if ( @{$self->{'fields'}} != @{$self->{'units'}} ) {
  128         170  
  128         260  
2051 1 50       2 if ( $strict & STRICT_READ ) {
2052 1         82 carp("/fields and /units don't match up");
2053 1         49 $success = 0;
2054             } else {
2055 0         0 while (@{$self->{'fields'}} > @{$self->{'units'}}){
  0         0  
  0         0  
2056 0         0 push(@{$self->{'units'}}, 'unitless');
  0         0  
2057             }
2058 0         0 while (@{$self->{'fields'}} < @{$self->{'units'}}){
  0         0  
  0         0  
2059 0         0 pop(@{$self->{'units'}});
  0         0  
2060             }
2061             }
2062             }
2063              
2064 128 100       222 unless ($success) {
2065 4 50       10 if ( $strict & STRICT_READ ) {
2066 4         373 croak("Error(s) reading SeaBASS file");
2067             } else {
2068 0         0 return;
2069             }
2070             }
2071              
2072 124         143 $self->{'missing'} = $missing;
2073 124   66     495 $self->{'below_detection_limit'} = $headers{"${slash}below_detection_limit"} || $missing;
2074 124   66     418 $self->{'above_detection_limit'} = $headers{"${slash}above_detection_limit"} || $missing;
2075 124         145 $self->{'line_number'} = $line_number;
2076 124         149 $self->{'data_start_line'} = $line_number;
2077 124         199 $self->{'data_start_position'} = $position;
2078              
2079 124 100       231 if ( $self->{'options'}{'cache'} ) {
2080 114         203 $self->{'data'} = [];
2081             }
2082 124         161 $self->{'dataidx'} = -1;
2083 124         138 $self->{'max_dataidx'} = -1;
2084              
2085 124         152 $self->{'headers'} = \%headers;
2086              
2087 124 100       472 if ( $self->{'options'}{'fill_ancillary_data'} ) {
2088 16         13 my @fields_lc = map {lc} @{ $self->{'fields'} };
  110         134  
  16         29  
2089 16         26 $self->{'fields_lc'} = \@fields_lc;
2090 16         30 $self->{'ancillary'} = {};
2091              
2092 16         30 foreach my $field (@FILL_ANCILLARY_DATA) {
2093 208         288 $self->find_ancillaries($field);
2094             }
2095              
2096 16         33 $self->{'case_conversion'} = {};
2097              
2098 16         18 while ( my ( $field, $value ) = each( %{ $self->{'ancillary'} } ) ) {
  159         389  
2099 143     801   274 my $idx = firstidx { $_ eq $field } @{ $self->{'fields_lc'} };
  801         520  
  143         260  
2100 143         206 my $new_field = $field;
2101 143 100       196 if ( $idx >= 0 ) {
2102 77         99 $new_field = $self->{'fields'}[$idx];
2103             }
2104              
2105 143 100       171 if ( ref($value) ) {
2106 29         53 for ( my $i = 1; $i < @$value; $i++ ) {
2107 52         48 my $new_arg = $value->[$i];
2108 52         160 for ( $value->[$i] =~ /\$(\{\w+\}|\w+)/g ) {
2109 37         107 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2110              
2111 37     196   88 my $idx = firstidx { $_ eq $variable } @{ $self->{'fields_lc'} };
  196         135  
  37         77  
2112 37         67 my $new_variable = $self->{'fields'}[$idx];
2113              
2114 37         301 $new_arg =~ s/\$$variable(\W|\b|$)/\$$new_variable$1/g;
2115 37         248 $new_arg =~ s/\$\{$variable\}/\$$new_variable/g;
2116             } ## end for ($value->[$i] =~ /\$(\{\w+\}|\w+)/g)
2117 52         117 $value->[$i] = $new_arg;
2118             } ## end for (my $i = 1; $i <= length...)
2119             } else {
2120 114         91 my $new_value = $value;
2121 114         404 for ( $value =~ /\$(\{\w+\}|\w+)/g ) {
2122 151         466 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2123              
2124 151     740   300 my $idx = firstidx { $_ eq $variable } @{ $self->{'fields_lc'} };
  740         487  
  151         302  
2125 151 100       313 if ( $idx >= 0 ) {
2126 145         147 my $new_variable = $self->{'fields'}[$idx];
2127              
2128 145         1253 $new_value =~ s/\$$variable(\W|\b|$)/\$$new_variable$1/g;
2129 145         1098 $new_value =~ s/\$\{$variable\}/\$$new_variable/g;
2130             } ## end if ($idx >= 0)
2131             } ## end for ($value =~ /\$(\{\w+\}|\w+)/g)
2132 114         150 $value = $new_value;
2133             } ## end else [ if (ref($value)) ]
2134 143 50       211 if ( $field ne $new_field ) {
2135 0         0 delete( $self->{'ancillary'}{$field} );
2136 0         0 $self->{'case_conversion'}{$field} = $new_field;
2137             }
2138 143         247 $self->{'ancillary'}{$new_field} = $value;
2139             } ## end while (my ($field, $value...))
2140              
2141 16         37 delete( $self->{'fields_lc'} );
2142             } ## end if ($self->{'options'}...)
2143              
2144 124         487 return 1;
2145             } ## end sub read_headers
2146              
2147             =head2 validate_header($header, $value, $strict)
2148              
2149             my ($k, $v, $string) = ('investigators','jason_lefler',0)
2150             $sb_file->validate_header($k, $v, $strict);
2151              
2152             For internal use only. C is in charge of properly formatting
2153             key/value pairs to add to the object. This function will modify the input
2154             variables in place to prepare them for use.
2155              
2156             Returns false if there was a problem with the inputs, such as C is set
2157             and an invalid header was passed in.
2158              
2159             C will set C to C<$DEFAULT_MISSING> (C<-999>) if it
2160             is blank or undefined.
2161              
2162             This function will also change the expected delimiter for rows that have not
2163             yet been cached.
2164              
2165             =cut
2166              
2167             sub validate_header {
2168 2566     2566 1 2319 my ( $self, $k, $v, $strict ) = @_;
2169              
2170 2566         1625 my $success = 1;
2171              
2172 2566 100       2492 if ( !defined($v) ) {
2173 201         195 $v = '';
2174             } else {
2175 2365         2228 strip($v);
2176             }
2177              
2178 2566         2381 strip($k);
2179              
2180 2566         2781 $k = lc($k);
2181              
2182 2566 50 66     4393 if ( length($v) == 0 && $k !~ /_header/ ) {
2183 0 0       0 if ($strict) {
2184 0         0 carp("$k missing value");
2185 0         0 $success = 0;
2186             } else {
2187 0         0 $v = "";
2188             }
2189             } ## end if (length($v) == 0 &&...)
2190              
2191 2566 100 100     7098 if ( !$self->{'options'}{'preserve_case'} || $k =~ /fields|units/ ) {
2192 1308         1182 $v = lc($v);
2193             }
2194              
2195 2566 100       2795 if ( $self->{'options'}{'keep_slashes'} ) {
2196 139 100       196 if ( $k =~ m"^[^/]" ) {
2197 2         5 $k = "/$k";
2198             }
2199              
2200 139 50 66 12   235 if ( $strict && ( firstidx { "/$_" eq $k } @ALL_HEADERS ) < 0 && ( firstidx { "/$_" eq $k } @HIDDEN_HEADERS ) < 0 ) {
  12   33     31  
  0         0  
2201 0         0 carp("Invalid header, $k");
2202 0         0 $success = 0;
2203             }
2204             } else {
2205 2427 100       4142 if ( $k =~ m"^/" ) {
2206 2425         3711 $k =~ s"^/"";
2207             }
2208              
2209 2427 50 66 520   3826 if ( $strict && ( firstidx { $_ eq $k } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $k } @HIDDEN_HEADERS ) < 0 ) {
  1328   33     1259  
  0         0  
2210 0         0 carp("Invalid header, $k");
2211 0         0 $success = 0;
2212             }
2213             } ## end else [ if ($self->{'options'}...)]
2214              
2215 2566 100       7765 if ( $k =~ /_latitude|_longitude/){
    100          
    100          
    100          
    100          
    100          
2216 308         434 $v =~ s/\[deg\]$//i;
2217             } elsif ( $k =~ /_time/){
2218 158         302 $v =~ s/\[gmt\]$//i;
2219             } elsif ( $k =~ m"^/?delimiter$" ) {
2220 90 100       181 unless ( $self->set_delim( $strict, $v ) ) {
2221 1 50       4 if ($strict) {
2222 0         0 $success = 0;
2223             }
2224             }
2225             } elsif ( $k =~ m"^/?missing$" ) {
2226 111 50       296 $self->{'missing'} = ( length($v) ? $v : $DEFAULT_MISSING );
2227 111         388 $self->{'missing_is_number'} = looks_like_number( $self->{'missing'} );
2228             } elsif ( $k =~ m"^/?above_detection_limit" ) {
2229 4 50       10 $self->{'above_detection_limit'} = ( length($v) ? $v : $self->{'missing'} );
2230 4         12 $self->{'adl_is_number'} = looks_like_number( $self->{'above_detection_limit'} );
2231             } elsif ( $k =~ m"^/?below_detection_limit" ) {
2232 4 50       9 $self->{'below_detection_limit'} = ( length($v) ? $v : $self->{'missing'} );
2233 4         13 $self->{'bdl_is_number'} = looks_like_number( $self->{'below_detection_limit'} );
2234             }
2235              
2236 2566         2013 $_[1] = $k;
2237 2566         1697 $_[2] = $v;
2238              
2239 2566         2464 return $success;
2240             } ## end sub validate_header
2241              
2242             =head2 set_delim($strict, $delim)
2243              
2244             Takes a string declaring the delim (IE: 'comma', 'space', etc) and updates the
2245             object's internal delimiter regex.
2246              
2247             =cut
2248              
2249             sub set_delim {
2250 90     90 1 103 my $self = shift;
2251 90         84 my $strict = shift;
2252 90   50     183 my $delim = shift || '';
2253 90 50       273 if ( $delim eq 'comma' ) {
    50          
    100          
    50          
    100          
2254 0         0 $delim = qr/\s*,\s*/;
2255             } elsif ( $delim eq 'semicolon' ) {
2256 0         0 $delim = qr/\s*;\s*/;
2257             } elsif ( $delim eq 'space' ) {
2258 88         326 $delim = qr/\s+/;
2259             } elsif ( $delim eq 'tab' ) {
2260 0         0 $delim = qr/\t/;
2261             } elsif ($strict) {
2262 1         143 carp("delimiter not understood");
2263             } else {
2264 1 50       4 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
2265 1         2 $self->{'headers'}{"${slash}delimiter"} = 'comma';
2266 1         2 $delim = undef;
2267             }
2268 90         296 $self->{'delim'} = $delim;
2269 90 100       256 return ( $delim ? 1 : 0 );
2270             } ## end sub set_delim
2271              
2272             =head2 update_fields()
2273              
2274             C runs through the currently cached rows and calls
2275             C on each row. It then updates the /fields and /units
2276             headers in the header hash.
2277              
2278             =cut
2279              
2280             sub update_fields {
2281 28     28 1 33 my ($self) = @_;
2282 28 100 66     125 if ( $self->{'options'}{'cache'} && $self->{'max_dataidx'} >= 0 ) {
2283 4         6 foreach my $hash ( @{ $self->{'data'} } ) {
  4         9  
2284 9         12 $self->add_and_remove_fields($hash);
2285             }
2286             }
2287              
2288 28 100       52 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
2289 28         26 $self->{'headers'}{"${slash}fields"} = join( ',', @{ $self->{'actual_fields'} } );
  28         85  
2290 28         35 $self->{'headers'}{"${slash}units"} = join( ',', @{ $self->{'actual_units'} } );
  28         91  
2291             } ## end sub update_fields
2292              
2293             =head2 add_and_remove_fields(\%row)
2294              
2295             Given a reference to a row, this function deletes any fields removed with
2296             C and adds an undefined or /missing value for each field added
2297             via C. If C is set, an undefined value is
2298             given, otherwise, it is filled with the /missing value.
2299              
2300             If C is set, this function adds missing date, time,
2301             date_time, lat, lon, and depth fields to the retrieved row from the header.
2302              
2303             Needlessly returns the hash reference passed in.
2304              
2305             =cut
2306              
2307             sub add_and_remove_fields {
2308 277     277 1 253 my ( $self, $hash ) = @_;
2309 277         675 foreach my $field ( keys(%$hash) ) {
2310 1916 100   7814   2695 if ( ( firstidx { $_ eq $field } @{ $self->{'actual_fields'} } ) < 0 ) {
  7814         7137  
  1916         2569  
2311 21 50 33 0   47 unless ( $self->{'options'}{'fill_ancillary_data'} && ( firstidx { $_ eq $field } keys( %{ $self->{'ancillary'} } ) ) >= 0 ) {
  0         0  
  0         0  
2312 21         38 delete( $hash->{$field} );
2313             }
2314             }
2315             } ## end foreach my $field (keys(%$hash...))
2316              
2317 277 100       552 my $missing = ( $self->{'options'}{'missing_data_to_undef'} ? undef : $self->{'missing'} );
2318 277         608 while ( my ( $variable, $pad ) = each(%FIELD_FORMATTING) ) {
2319 1939   33     3874 my $case_var = $self->{'case_conversion'}{$variable} || $variable;
2320 1939 50 66     4267 if ( defined( $hash->{$case_var} ) && ( !defined($missing) || $hash->{$case_var} != $missing ) ) {
      66        
2321 57         219 $hash->{$case_var} = sprintf( $pad, $hash->{$case_var} );
2322             }
2323             } ## end while (my ($variable, $pad...))
2324              
2325 277 100       406 if ( defined($self->{'ancillary'}) ) {
2326 19         33 $self->{'ancillary_tmp'} = {};
2327 19         28 for my $variable (@FILL_ANCILLARY_DATA) {
2328 247 100       370 if ( defined($self->{'ancillary'}{$variable}) ) {
2329 173         242 my $value = $self->extrapolate_variables( $missing, $self->{'ancillary'}{$variable}, $hash );
2330 173 50       247 if ( defined($value) ) {
2331 173         273 $hash->{$variable} = $value;
2332             }
2333             } ## end if ($self->{'ancillary'...})
2334             } ## end for my $variable (@FILL_ANCILLARY_DATA)
2335             } ## end if ($self->{'ancillary'...})
2336              
2337 277         203 foreach my $field ( @{ $self->{'actual_fields'} } ) {
  277         382  
2338 1972 100       2482 if ( !exists( $hash->{$field} ) ) {
2339 61         88 $hash->{$field} = $missing;
2340             }
2341             }
2342              
2343 277         279 return $hash;
2344             } ## end sub add_and_remove_fields
2345              
2346             =head2 guess_delim($line)
2347              
2348             C is is used to guess the delimiter of a line. It is not very
2349             intelligent. If it sees any commas, it will assume the delimiter is a comma.
2350             Then, it checks for tabs, spaces, then semi-colons. Returns 1 on success. If
2351             it doesn't find any, it will throw a warning and return undef.
2352              
2353             =cut
2354              
2355             sub guess_delim {
2356 35     35 1 50 my ( $self, $line ) = @_;
2357 35         40 my $delim_string = '';
2358 35 100       205 if ( $line =~ /,/ ) {
    50          
    50          
    0          
2359 2         16 my $delim = qr/\s*,\s*/;
2360 2         5 $self->{'delim'} = $delim;
2361 2         5 $delim_string = 'comma';
2362             } elsif ( $line =~ /\t/ ) {
2363 0         0 my $delim = qr/\t/;
2364 0         0 $self->{'delim'} = $delim;
2365 0         0 $delim_string = 'tab';
2366             } elsif ( $line =~ /\s+/ ) {
2367 33         200 my $delim = qr/\s+/;
2368 33         70 $self->{'delim'} = $delim;
2369 33         48 $delim_string = 'space';
2370             } elsif ( $line =~ /;/ ) {
2371 0         0 my $delim = qr/\s*;\s*/;
2372 0         0 $self->{'delim'} = $delim;
2373 0         0 $delim_string = 'semicolon';
2374             } else {
2375 0         0 carp("No delimiter defined or can be guessed");
2376 0         0 return;
2377             }
2378 35 100       116 $self->{'headers'}{ ( $self->{'options'}{'keep_slashes'} ? '/' : '' ) . 'delimiter' } = $delim_string;
2379 35         77 return 1;
2380             } ## end sub guess_delim
2381              
2382             =head2 ingest_row(\%data_row | \@data_row | $data_row | %data_row)
2383              
2384             For mostly internal use, parses arguments for C, C, and C
2385             and returns a hash or hash reference of the data row. Given a hash reference,
2386             it will merely return it.
2387              
2388             Given an array or array reference, it will assume each element is a field as
2389             listed in either C or C. If the number of elements
2390             matches C, it uses assumes it's that. If it doesn't match, it is
2391             tried to match against C. If it doesn't match either, a warning is
2392             issued and the return is undefined.
2393              
2394             Given a non-reference scalar, it will split the scalar based on the current
2395             delimiter. If one is not defined, it is guessed. If it cannot be guessed, the
2396             return is undefined.
2397              
2398             If the inputs are successfully parsed, all keys are turned lowercase.
2399              
2400             =cut
2401              
2402             sub ingest_row {
2403 27     27 1 27 my $self = shift;
2404 27         44 my %new_row;
2405 27 100       77 if ( $#_ < 0 ) {
2406 1         81 carp("Incorrect number of arguments to ingest_row()");
2407 1         79 return;
2408             }
2409 26         22 my $arrayref;
2410 26 100       77 if ( ref( $_[0] ) eq 'HASH' ) {
    100          
    50          
2411 14         14 %new_row = %{ shift(@_) };
  14         64  
2412             } elsif ( ref( $_[0] ) eq 'ARRAY' ) {
2413 3         3 $arrayref = $_[0];
2414             } elsif ( !ref( $_[0] ) ) {
2415 9 100       16 if ( $#_ == 0 ) {
    100          
2416 7 50 66     25 if ( !$self->{'delim'} && !$self->guess_delim( $_[0] ) ) {
2417 0         0 return;
2418             }
2419 7         60 $arrayref = [ split( $self->{'delim'}, $_[0] ) ];
2420             } elsif ( $#_ % 2 == 1 ) {
2421 1         3 %new_row = @_;
2422             } else {
2423 1         94 carp('Even sized list, scalar, or hash/array reference expected');
2424 1         71 return;
2425             }
2426             } else {
2427 0         0 carp("Arguments to ingest_row() not understood.");
2428 0         0 return;
2429             }
2430              
2431 25 100       56 if ($arrayref) {
2432 10         9 my $iterator;
2433 10 100       8 if ( scalar( @{ $self->{'actual_fields'} } ) == scalar( @{$arrayref} ) ) {
  10 50       15  
  10         16  
2434 8         50 $iterator = each_arrayref( $self->{'actual_fields'}, $arrayref );
2435 2         3 } elsif ( scalar( @{ $self->{'fields'} } ) == scalar( @{$arrayref} ) ) {
  2         4  
2436 2         5 $iterator = each_arrayref( $self->{'fields'}, $arrayref );
2437 2         5 $self->add_and_remove_fields( \%new_row );
2438             } else {
2439 0         0 my $actual_fields = scalar( @{ $self->{'actual_fields'} } );
  0         0  
2440 0         0 my $fields = scalar( @{ $self->{'fields'} } );
  0         0  
2441 0 0       0 if ( $actual_fields == $fields ) {
2442 0         0 carp("Invalid number of elements, expected $fields");
2443             } else {
2444 0         0 carp("Invalid number of elements, expected $actual_fields or $fields");
2445             }
2446 0         0 return;
2447             } ## end else [ if (scalar(@{$self->{'actual_fields'...}}))]
2448 10         38 while ( my ( $k, $v ) = $iterator->() ) {
2449 52         138 $new_row{$k} = $v;
2450             }
2451             } ## end if ($arrayref)
2452              
2453 25         56 %new_row = map { lc($_) => $new_row{$_} } keys %new_row;
  71         174  
2454              
2455 25 50       54 if (wantarray) {
2456 0         0 return %new_row;
2457             } else {
2458 25         50 return \%new_row;
2459             }
2460             } ## end sub ingest_row
2461              
2462             =head2 find_ancillaries($field_name)
2463              
2464             Used by C to traverse through a field's possible
2465             substitutes in C<%ANCILLARY> and try to find the most suitable replacement.
2466             Values of fields in C<%ANCILLARY> are array references, where each element is
2467             either:
2468              
2469             =over 4
2470              
2471             =item * a string of existing field names used to create the value
2472              
2473             =item * an array reference of the form [converter function, parsing regex
2474             (optional), arguments to converter, ... ]
2475              
2476             =item * a hash reference of the form { header => qr/parsing_regex/ }
2477              
2478             =back
2479              
2480             If the element is an array reference and an argument requires a field from the
2481             file, all arguments are parsed and the variables within them extrapolated, then
2482             the array is put into C<< $self->{'ancillary'} >>.
2483              
2484             If no value can be ascertained, it will not be added to the data rows.
2485              
2486             The value found is stored in C<< $self->{'ancillary'} >>. Returns 1 on
2487             success, 0 if the field cannot be filled in.
2488              
2489             =cut
2490              
2491             sub find_ancillaries {
2492 377     377 1 322 my ( $self, $field ) = @_;
2493 377 100       560 if ( $self->{'ancillary'}{$field} ) {
2494 95         123 return 1;
2495             }
2496 282     1679   555 my $idx = firstidx { $_ eq $field } @{ $self->{'fields_lc'} };
  1679         1058  
  282         513  
2497 282 100       554 if ( $idx >= 0 ) {
2498 77         152 $self->{'ancillary'}{$field} = "\$\{$field\}";
2499 77         104 return 1;
2500             }
2501              
2502 205 50       285 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
2503 205 100       134 foreach my $attempt ( @{ $ANCILLARY{$field} || [] } ) {
  205         480  
2504 349 100       532 if ( ref($attempt) eq 'HASH' ) {
    100          
    50          
2505 242         156 keys( %{$attempt} ); #reset each() iterator between calls
  242         233  
2506 242         189 while ( my ( $where, $regex ) = each( %{$attempt} ) ) {
  457         887  
2507 242 100 66     795 if ( $where =~ /^\$/ ) {
    100 66        
2508 101 100   564   447 if ( ( firstidx { "\$$_" eq $where } $self->fields() ) >= 0 ) {
  564         666  
2509 15     33   60 $self->{'ancillary'}{$field} = [ sub { return shift; }, $regex, $where ];
  33         33  
2510 15         42 return 1;
2511             }
2512             } elsif ( defined( $self->{'headers'}{"$slash$where"} ) && $self->{'headers'}{"$slash$where"} =~ $regex && lc($1) ne 'na' ) {
2513 12         22 $self->{'ancillary'}{$field} = $1;
2514 12         27 return 1;
2515             }
2516             } ## end while (my ($where, $regex...))
2517             } elsif ( ref($attempt) eq 'ARRAY' ) {
2518 64         101 my @attempt = @$attempt;
2519 64         60 my $function = shift(@attempt);
2520 64         45 my $regex;
2521 64 100       93 if ( ref( $attempt[0] ) eq 'Regexp' ) {
2522 30         32 $regex = shift(@attempt);
2523             }
2524 64         43 my $success = 1;
2525 64         45 my @args;
2526 64         65 foreach my $argument (@attempt) {
2527 64         49 my $tmparg = $argument;
2528 64         294 for ( $argument =~ /\$(\{\w+\}|\w+)/g ) {
2529 86         185 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2530 86         145 $success &= $self->find_ancillaries($variable);
2531 86 100       101 if ($success) {
2532 36 50       58 if ( ref( $self->{'ancillary'}{$variable} ) ) {
2533 0         0 $tmparg =~ s/\$$variable(\W|\b|$)/\$\{$variable\}$1/g;
2534             } else {
2535 36         33 my $value = $self->{'ancillary'}{$variable};
2536 36         493 $tmparg =~ s/\$$variable(\W|\b|$)/$value$1/g;
2537 36         247 $tmparg =~ s/\$\{$variable\}/$value/g;
2538             }
2539             } else {
2540 50         55 last;
2541             }
2542             } ## end for ($argument =~ /\$(\{\w+\}|\w+)/g)
2543 64         144 push( @args, $tmparg );
2544             } ## end foreach my $argument (@attempt)
2545 64 100       133 if ($success) {
2546 14 100       24 if ($regex) {
2547 8         12 unshift( @args, $regex );
2548             }
2549 14         36 $self->{'ancillary'}{$field} = [ $function, @args ];
2550 14         36 return 1;
2551             } ## end if ($success)
2552             } elsif ( !ref($attempt) ) {
2553 43         30 my $success = 1;
2554 43         37 my $tmparg = $attempt;
2555 43         203 for ( $attempt =~ /\$(\{\w+\}|\w+)/g ) {
2556 83         173 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2557 83         125 $success &= $self->find_ancillaries($variable);
2558 83 100       96 if ($success) {
2559 65 100       87 if ( ref( $self->{'ancillary'}{$variable} ) ) {
2560 6         55 $tmparg =~ s/\$$variable(\W|\b|$)/\$\{$variable\}$1/g;
2561             } else {
2562 59         52 my $value = $self->{'ancillary'}{$variable};
2563 59         803 $tmparg =~ s/\$$variable(\W|\b|$)/$value$1/g;
2564 59         453 $tmparg =~ s/\$\{$variable\}/$value/g;
2565             }
2566             } else {
2567 18         16 last;
2568             }
2569             } ## end for ($attempt =~ /\$(\{\w+\}|\w+)/g)
2570 43 100       69 if ($success) {
2571 25         34 $self->{'ancillary'}{$field} = $tmparg;
2572 25         56 return 1;
2573             }
2574             } ## end elsif (!ref($attempt))
2575             } ## end foreach my $attempt (@{$ANCILLARY...})
2576              
2577 139         175 return 0;
2578             } ## end sub find_ancillaries
2579              
2580             =head2 extrapolate_variables($missing, $expression, \%row)
2581              
2582             Used by C to convert a parsed ancillary string, such as
2583             C<'$year$month$day'>, into a real value using the fields from the C<\%row>.
2584             C<$expression>s are strings figured out by C and stored in
2585             C<< $self->{'ancillary'} >>.
2586              
2587             The return is undefined if a value cannot be created (IE: a required field is
2588             missing).
2589              
2590             =cut
2591              
2592             sub extrapolate_variables {
2593 220     220 1 235 my ( $self, $missing, $expression, $row ) = @_;
2594              
2595 220 100       252 if ( ref($expression) ) {
2596 47         71 return $self->extrapolate_function( $missing, $expression, $row );
2597             } else {
2598 173         151 my $tmpexpr = $expression;
2599 173         539 for ( $expression =~ /\$(\{\w+\}|\w+)/g ) {
2600 221         460 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2601 221         133 my $value;
2602 221 100 66     603 if ( $self->{'ancillary_tmp'}{$variable} ) {
    50 33        
    0          
2603 123         111 $value = $self->{'ancillary_tmp'}{$variable};
2604             } elsif ( defined( $row->{$variable} ) && ( !defined($missing) || $row->{$variable} != $missing ) ) {
2605 98         91 $value = $row->{$variable};
2606 98         128 $self->{'ancillary_tmp'}{$variable} = $value;
2607             } elsif ( ref( $self->{'ancillary'}{$variable} ) ) {
2608 0         0 $value = $self->extrapolate_function( $missing, $self->{'ancillary'}{$variable}, $row );
2609 0 0       0 if ( !defined($value) ) {
2610 0         0 return;
2611             }
2612             } else {
2613 0         0 return;
2614             }
2615              
2616 221         2080 $tmpexpr =~ s/\$$variable(\W|\b|$)/$value$1/g;
2617 221         1112 $tmpexpr =~ s/\$\{$variable\}/$value/g;
2618             } ## end for ($expression =~ /\$(\{\w+\}|\w+)/g)
2619 173         297 return $tmpexpr;
2620             } ## end else [ if (ref($expression)) ]
2621             } ## end sub extrapolate_variables
2622              
2623             =head2 extrapolate_function($missing, $expression, \%row)
2624              
2625             If the value stored in C<< $self->{'ancillary'} >> is an array reference, this
2626             function uses the array to create an actual value. See
2627             L for an explanation of the
2628             array.
2629              
2630             =cut
2631              
2632             sub extrapolate_function {
2633 47     47 1 55 my ( $self, $missing, $expression, $row ) = @_;
2634 47         31 my $value;
2635 47         67 my ( $function, @args ) = @$expression;
2636 47         28 my $regex;
2637              
2638 47 100       81 if ( ref( $args[0] ) eq 'Regexp' ) {
2639 41         40 $regex = shift(@args);
2640             }
2641 47         75 for (@args) {
2642 47         64 $_ = $self->extrapolate_variables( $missing, $_, $row );
2643 47 50       87 if ( !defined($_) ) {
2644 0         0 return;
2645             }
2646             } ## end for (@args)
2647 47         90 $value = &$function(@args);
2648 47 100       75 if ($regex) {
2649 41 50       178 if ( $value =~ $regex ) {
2650 41         59 $value = $1;
2651             }
2652             }
2653              
2654 47         121 return $value;
2655             } ## end sub extrapolate_function
2656              
2657             =head1 STATIC METHODS
2658              
2659             =head2 strip(@list)
2660              
2661             my @space_filled_lines = (' line1 ', ' line2', 'line3 ', 'line4');
2662             strip(@space_filled_lines);
2663             print @space_filled_lines; #line1line2line3line4
2664              
2665             Runs through the list and removes leading and trailing whitespace. All changes
2666             are made in place.
2667              
2668             It is literally this:
2669              
2670             sub strip {
2671             s/^\s+|\s+$//g for @_;
2672             }
2673              
2674             =cut
2675              
2676             # Not an object method!
2677             sub strip {
2678 8460     8460 1 30814 s/^\s+|\s+$//g for @_;
2679             }
2680              
2681             =head2 julian_to_greg($yyyyjjj)
2682              
2683             Converts a date in the day of year format YYYYJJJ into YYYYMMDD. Returns the
2684             newly formatted string or undefined if the input does not match the required
2685             format.
2686              
2687             This uses the C function from C to do the heavy
2688             lifting.
2689              
2690             =cut
2691              
2692             # Not an object method!
2693             sub julian_to_greg {
2694 14     14 1 18 my ($yyyyjjj) = @_;
2695 14 50       55 if ( $yyyyjjj =~ /^(\d{4})(\d{3})$/ ) {
2696 14         84 my ( $y, $m, $d ) = Add_Delta_Days( $1, 1, 1, $2 - 1 );
2697 14         50 return sprintf( '%04d%02d%02d', $y, $m, $d );
2698             }
2699 0           return;
2700             } ## end sub julian_to_greg
2701              
2702             =head1 CAVEATS/ODDITIES
2703              
2704             =head2 Duplicate Fields
2705              
2706             This class will not allow a field to be added to the object if a field of the
2707             same name already exists. If a file being read has duplicate field names, only
2708             the B one is used. No warning is issued. If C is used to
2709             remove it, only the first instance will be deleted. To delete all instances,
2710             use C<< $sb_file->remove_field($sb_file->find_fields('chl')) >>. This may
2711             change in future releases.
2712              
2713             =head2 Changing Delimiter or Missing Value
2714              
2715             Modifying the delimiter header on a file that is being read will cause any
2716             non-cached rows to be split by the new delimiter, which should break most/all
2717             files. If the delimiter must be changed, call C to cache all the rows,
2718             then change it. This will obviously not work if caching is turned off. The
2719             same is true for setting the missing value, but only really applies when the
2720             C option is used (same goes to below detection limit).
2721              
2722             =head2 Below Detection Limit
2723              
2724             Below detection limit is only partially supported. If C is
2725             used, fields equal to C will be set to C, as
2726             well. Files modified while using C will have all data
2727             equal to C written out set to the missing value instead
2728             of the below detection limit value. If the below detection limit value is equal
2729             to the missing value or C is used, the
2730             C header will not be written.
2731              
2732             =head1 AUTHOR
2733              
2734             Jason Lefler, C<< >>
2735              
2736             =head1 BUGS
2737              
2738             Please report any bugs or feature requests to C
2739             rt.cpan.org>, or through the web interface at
2740             L. I will be
2741             notified, and then you'll automatically be notified of progress on your bug as
2742             I make changes.
2743              
2744             =head1 SUPPORT
2745              
2746             You can find documentation for this module with the perldoc command.
2747              
2748             perldoc SeaBASS::File
2749              
2750             You can also look for information at:
2751              
2752             =over 4
2753              
2754             =item * RT: CPAN's request tracker (report bugs here)
2755              
2756             L
2757              
2758             =item * AnnoCPAN: Annotated CPAN documentation
2759              
2760             L
2761              
2762             =item * CPAN Ratings
2763              
2764             L
2765              
2766             =item * Search CPAN
2767              
2768             L
2769              
2770             =back
2771              
2772             =head1 LICENSE AND COPYRIGHT
2773              
2774             Copyright 2014 Jason Lefler.
2775              
2776             This program is free software; you can redistribute it and/or modify it under
2777             the terms of either: the GNU General Public License as published by the Free
2778             Software Foundation; or the Artistic License.
2779              
2780             See http://dev.perl.org/licenses/ for more information.
2781              
2782             =cut
2783              
2784             1; # End of SeaBASS::File