File Coverage

blib/lib/SeaBASS/File.pm
Criterion Covered Total %
statement 903 1034 87.3
branch 467 586 79.6
condition 125 180 69.4
subroutine 70 75 93.3
pod 48 48 100.0
total 1613 1923 83.8


line stmt bran cond sub pod time code
1             package SeaBASS::File;
2              
3 24     24   1608657 use strict;
  24         71  
  24         749  
4 24     24   147 use warnings;
  24         56  
  24         1770  
5              
6             =head1 NAME
7              
8             SeaBASS::File - Object-oriented interface for reading/writing SeaBASS files
9              
10             =head1 VERSION
11              
12             version 0.171600
13              
14             =cut
15              
16             our $VERSION = '0.171600'; # VERSION
17              
18             =head1 SYNOPSIS
19              
20             To read SeaBASS files:
21              
22             use SeaBASS::File qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END);
23              
24             my $sb_file = SeaBASS::File->new("input.txt");
25              
26             # Calculate the average chlorophyll value using next
27             my $chl_total = 0;
28             my $measurements = 0;
29            
30             while (my $row = $sb_file->next()){
31             if (defined($row->{'chl'})){
32             $chl_total += $row->{'chl'};
33             $measurements++;
34             }
35             }
36             if ($measurements){
37             print $chl_total/$measurements;
38             } else {
39             print "No chl values.";
40             }
41            
42             #alternatively:
43             $sb_file->rewind();
44             while (my %row = $sb_file->next()){
45             if (defined($row{'chl'})){
46             $chl_total += $row{'chl'};
47             $measurements++;
48             }
49             }
50              
51             # Calculate the average chlorophyll value using where
52             my $chl_total2 = 0;
53             my $measurements2 = 0;
54             $sb_file->where(sub {
55             if (defined($_->{'chl'})){
56             $chl_total2 += $_->{'chl'};
57             $measurements2++;
58             }
59             });
60             if ($measurements2){
61             print $chl_total2/$measurements2;
62             } else {
63             print "No chl values.";
64             }
65            
66             Or to modify SeaBASS files:
67              
68             use SeaBASS::File qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END);
69              
70             my $sb_file = SeaBASS::File->new("input.txt");
71              
72             # Add a one degree bias to water temperature
73             while (my $row = $sb_file->next()){
74             $row->{'wt'} += 1;
75             $sb_file->update($row);
76             }
77            
78             $sb_file->write(); # to STDOUT
79              
80             # Remove the one degree bias to water temperature
81             $sb_file->where(sub {
82             $_->{'wt'} -= 1;
83             });
84            
85             $sb_file->write("output_file.txt");
86            
87             Or to start a SeaBASS file from scratch:
88              
89             use SeaBASS::File qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END);
90              
91             my $sb_file = SeaBASS::File->new({strict => 0, add_empty_headers => 1});
92             $sb_file->add_field('lat','degrees');
93             $sb_file->add_field('lon','degrees');
94             $sb_file->append({'lat' => 1, 'lon' => 2});
95             $sb_file->append("3,4"); # or if you're reading from a CSV file
96             $sb_file->write();
97              
98             =head1 DESCRIPTION
99              
100             C provides an easy to use, object-oriented interface for
101             reading, writing, and modifying SeaBASS data files.
102              
103             =head2 What is SeaBASS?
104              
105             L Bio-optical Archive and Storage
106             System housed at Goddard Space Flight Center.
107             L provides the permanent public
108             repository for data collected under the auspices of the NASA Ocean Biology and
109             Biogeochemistry Program. It also houses data collected by participants in the
110             NASA Sensor Intercomparision and Merger for Biological and Oceanic
111             Interdisciplinary Studies (SIMBIOS) Program. SeaBASS includes marine
112             bio-optical, biogeochemical, and (some) atmospheric data.
113              
114             =head2 SeaBASS File Format
115              
116             SeaBASS files are plain ASCII files with a special header and a matrix of
117             values.
118              
119             =head3 Header
120              
121             The SeaBASS header block consists of many lines of header-keyword pairs. Some
122             headers are optional but most, although technically not required for reading,
123             are required to be ingested into the system. More detailed information is
124             available in the SeaBASS L
125             article|http://seabass.gsfc.nasa.gov/wiki/article.cgi?article=metadataheaders>.
126             The only absolutely required header for this module to work is the /fields
127             line. This module turns fields and units lowercase at all times.
128              
129             /begin_header
130             /delimiter=space
131             /missing=-999
132             /fields=date,time,lat,lon,depth,wt,sal
133             /end_header
134            
135             =head3 Body
136              
137             The SeaBASS body is a matrix of data values, organized much like a spreadsheet.
138             Each column is separated by the value presented in the /delimiter header.
139             Likewise, missing values are indicated by the value presented in the /missing
140             header. The /fields header identifies the geophysical parameter presented in
141             each column.
142              
143             /begin_header
144             /delimiter=space
145             /missing=-999
146             /fields=date,time,lat,lon,depth,wt,sal
147             /end_header
148             19920109 16:30:00 31.389 -64.702 3.4 20.7320 -999
149             19920109 16:30:00 31.389 -64.702 19.1 20.7350 -999
150             19920109 16:30:00 31.389 -64.702 38.3 20.7400 -999
151             19920109 16:30:00 31.389 -64.702 59.6 20.7450 -999
152              
153             =head3 Strictly Speaking
154              
155             SeaBASS files are run through a program called
156             L before
157             they are submitted and before they are ingested into a NASA relational database
158             management system. Some of the things it checks for are required
159             L
160             and proper L
161             names|http://seabass.gsfc.nasa.gov/wiki/article.cgi?article=stdfields>. All
162             data must always have an associated depth, time, and location, though these
163             fields may be placed in the header and are not always required in the data.
164             Just because this module writes the files does not mean they will pass FCHECK.
165              
166             Files are case-INsensitive. Headers are not allowed to have any whitespace.
167              
168             =cut
169              
170 24     24   157 use Carp qw(:DEFAULT);
  24         64  
  24         4198  
171 24     24   170 use Fcntl qw(SEEK_SET);
  24         52  
  24         1121  
172 24     24   6346 use List::MoreUtils qw(firstidx each_arrayref);
  24         96925  
  24         225  
173 24     24   29910 use Date::Calc qw(Add_Delta_Days);
  24         139818  
  24         2087  
174 24     24   224 use Scalar::Util qw(looks_like_number);
  24         60  
  24         281267  
175              
176             require Exporter;
177             our @ISA = qw(Exporter);
178             our @EXPORT_OK = qw(STRICT_READ STRICT_WRITE STRICT_ALL INSERT_BEGINNING INSERT_END);
179             our @EXPORT = qw();
180              
181             our $AUTOLOAD;
182              
183             =head1 EXPORT
184              
185             This module does not export anything by default.
186              
187             =head2 STRICT_READ
188              
189             C is used with the C option, enabling error messages when
190             reading header lines and inserting header data.
191              
192             =head2 STRICT_WRITE
193              
194             C is used with the C option, enabling error messages when
195             writing the data to a file/stream.
196              
197             =head2 STRICT_ALL
198              
199             C is used with the C option, enabling C and
200             C.
201              
202             =head2 INSERT_BEGINNING
203              
204             C is used with L
205             \@data_row | $data_row | %data_row)"> or L
206             $unit [, $position]])"> to insert a data row or field at the beginning of their
207             respective lists.
208              
209             =head2 INSERT_END
210              
211             C is used with L
212             $data_row | %data_row)"> or L
213             $position]])"> to insert a data row or field at the end of their respective
214             lists.
215              
216             =cut
217              
218 282     282 1 9007 sub STRICT_READ {1}
219 444     444 1 1167 sub STRICT_WRITE {2}
220 1     1 1 2576 sub STRICT_ALL {3}
221 22     22 1 88 sub INSERT_BEGINNING {0}
222 104     104 1 306 sub INSERT_END {-1}
223              
224             my %DEFAULT_OPTIONS = (
225             default_headers => {},
226             headers => {},
227             preserve_case => 1,
228             keep_slashes => 0,
229             cache => 1,
230             delete_missing_headers => 0,
231             missing_data_to_undef => 1,
232             preserve_comments => 1,
233             add_empty_headers => 0,
234             strict => STRICT_WRITE,
235             fill_ancillary_data => 0,
236             preserve_header => 0,
237             preserve_detection_limits => 0,
238             );
239              
240             #return values for ref()
241             my %OPTION_TYPES = (
242             default_headers => [ 'ARRAY', 'HASH' ],
243             headers => [ 'ARRAY', 'HASH' ],
244             preserve_case => [''],
245             keep_slashes => [''],
246             cache => [''],
247             delete_missing_headers => [''],
248             missing_data_to_undef => [''],
249             preserve_comments => [''],
250             add_empty_headers => [''],
251             strict => [''],
252             fill_ancillary_data => [''],
253             preserve_header => [''],
254             preserve_detection_limits => [''],
255             );
256              
257             # All headers required by STRICT_READ and STRICT_WRITE
258             my @REQUIRED_HEADERS = qw(
259             begin_header
260             investigators
261             affiliations
262             contact
263             experiment
264             cruise
265             data_file_name
266             documents
267             calibration_files
268             data_type
269             start_date
270             end_date
271             start_time
272             end_time
273             north_latitude
274             south_latitude
275             east_longitude
276             west_longitude
277             missing
278             delimiter
279             units
280             end_header
281             );
282              
283             # Headers that must be specified, regardless of strictness.
284             my @ABSOLUTELY_REQUIRED_HEADERS = qw(
285             fields
286             );
287              
288             # Valid headers used for STRICT_READ and STRICT_WRITE
289             our @ALL_HEADERS = qw(
290             begin_header
291             investigators
292             affiliations
293             contact
294             experiment
295             cruise
296             station
297             data_file_name
298             documents
299             calibration_files
300             data_type
301             data_status
302             start_date
303             end_date
304             start_time
305             end_time
306             north_latitude
307             south_latitude
308             east_longitude
309             west_longitude
310             cloud_percent
311             measurement_depth
312             secchi_depth
313             water_depth
314             wave_height
315             wind_speed
316             missing
317             below_detection_limit
318             above_detection_limit
319             delimiter
320             fields
321             units
322             end_header
323             );
324              
325             # headers that are allowed but are not to be added during add_empty_headers
326             my @HIDDEN_HEADERS = qw(
327             received
328             );
329              
330             # what to set the missing value to if it's not defined
331             our $DEFAULT_MISSING = -999;
332             our $DEFAULT_BDL = -888;
333             our $DEFAULT_ADL = -777;
334              
335             # overly complex data structure only understandable by idiots
336             # your IQ is the percentage chance that it won't make sense
337             # IE: my IQ = 40, I have a 60% chance of it making sense to me
338             my %ANCILLARY = (
339             'lat' => [ { 'north_latitude' => qr/^(.*?)$/ }, { 'south_latitude' => qr/^(.*?)$/ } ],
340             'lon' => [ { 'east_longitude' => qr/^(.*?)$/ }, { 'west_longitude' => qr/^(.*?)$/ } ],
341             'depth' => [ { 'measurement_depth' => qr/^(.*?)$/ }, ],
342             'date_time' => [ '$date $time', ],
343             'date' => [ [ \&julian_to_greg, '$year$julian' ], [ \&julian_to_greg, '$year$jd' ], [ \&julian_to_greg, '$year$sdy' ], '$year$month$day', ],
344             'year' => [ { '$date' => qr/^(\d{4})/ }, { 'start_date' => qr/^(\d{4})/ }, ],
345             'month' => [ { '$date' => qr/^\d{4}(\d{2})/ }, [ \&julian_to_greg, qr/^\d{4}(\d{2})\d{2}$/, '$year$julian' ], [ \&julian_to_greg, qr/^\d{4}(\d{2})\d{2}$/, '$year$jd' ], [ \&julian_to_greg, qr/^\d{4}(\d{2})\d{2}$/, '$year$sdy' ], { 'start_date' => qr/^\d{4}(\d{2})/ }, ],
346             'day' => [ { '$date' => qr/^\d{6}(\d{2})/ }, [ \&julian_to_greg, qr/^\d{4}\d{2}(\d{2})$/, '$year$julian' ], [ \&julian_to_greg, qr/^\d{4}\d{2}(\d{2})$/, '$year$jd' ], [ \&julian_to_greg, qr/^\d{4}\d{2}(\d{2})$/, '$year$sdy' ], { 'start_date' => qr/^\d{6}(\d{2})/ }, ],
347             'time' => [ '$hour:$minute:$second', ],
348             'hour' => [ { '$time' => qr/^(\d+):/ }, { 'start_time' => qr/^(\d+):/ }, ],
349             'minute' => [ { '$time' => qr/:(\d+):/ }, { 'start_time' => qr/:(\d+):/ }, ],
350             'second' => [ { '$time' => qr/:(\d+)(?:[^:\d]|$)/ }, { 'start_time' => qr/:(\d+)(?:[^:\d]|$)/ }, ],
351             'station' => [ { 'station' => qr/^(.*?)$/ }, ],
352             );
353              
354             # what fill_ancillary_data adds to each row
355             #my @FILL_ANCILLARY_DATA = qw(date time date_time lat lon depth);
356             my @FILL_ANCILLARY_DATA = keys(%ANCILLARY);
357              
358             my %FIELD_FORMATTING = (
359             'year' => '%04d',
360             'month' => '%02d',
361             'day' => '%02d',
362             'julian' => '%03d',
363             'sdy' => '%03d',
364             'hour' => '%02d',
365             'minute' => '%02d',
366             'second' => '%02d',
367             );
368              
369             =head1 CONSTRUCTOR
370              
371             =head2 new([$filename,] [\%options])
372              
373             my $sb_file = SeaBASS::File->new("input_file.txt");
374             my $sb_file = SeaBASS::File->new("input_file.txt", { delete_missing_headers => 1 });
375             my $sb_file = SeaBASS::File->new("output_file.txt", { add_empty_headers => 1 });
376             my $sb_file = SeaBASS::File->new({ add_empty_headers => 1 });
377              
378             Creates a C object. If the file specified exists, the object
379             can be used to read the file. If the file specified does not exist, an empty
380             object is created and will be written to the specified file by default when
381             invoking C.
382              
383             Options should be given in a hash reference to ensure proper argument parsing.
384             If a file is specified, options can be given as a hash list.
385              
386             =over 4
387              
388             =item * default_headers
389              
390             =item * headers
391              
392             These two options accept either an array reference or a hash reference. They
393             are used to set or override header information. First, headers are read from
394             C, then from the data file itself, then are overridden by
395             whatever is in C.
396              
397             Arguments are an array reference of header lines, or a hash reference of
398             header/value pairs.
399              
400             my $sb_file = SeaBASS::File->new({
401             default_headers => [
402             '/cruise=fake_cruise',
403             '/experiment=default_experiment',
404             ],
405             headers => {
406             'experiment' => 'real_experiment',
407             },
408             });
409            
410             B Modifying the delimiter or missing value will likely break the
411             object. Modifying these will change the expected format for all rows. Do so
412             with caution.
413              
414             =item * preserve_case
415              
416             C<1> or C<0>, default C<1>. Setting this to C<0> will change all values in the
417             header to lowercase. Header descriptors (the /header part) are always turned
418             to lowercase, as well as all fields and units.
419              
420             =item * keep_slashes
421              
422             C<1> or C<0>, default C<0>. Forces the object to keep the / in the beginning of
423             headers when accessed. If set to C<1>, when using the L
424             \%new_headers | \@get_headers | @get_headers ])"> function, they will be
425             returned with leading slash.
426              
427             =item * cache
428              
429             C<1> or C<0>, default C<1>. Enables caching data rows as they are read. This
430             speeds up re-reads and allows the data to be modified. This is required for
431             writing files.
432              
433             =item * delete_missing_headers
434              
435             C<1> or C<0>, default C<0>. Any headers that are equal to the /missing header,
436             NA, or are not defined (when using the C options) are
437             deleted. They cannot be retrieved using C and will not be written.
438              
439             =item * missing_data_to_undef
440              
441             C<1> or C<0>, default C<1>. If any values in the data block are equal to the
442             /missing, /above_detection_limit, /below_detection_limit headers, they are set
443             to undef when they are retrieved.
444              
445             =item * preserve_comments
446              
447             C<1> or C<0>, default C<1>. Setting this option to zero will discard any
448             comments found in the header.
449              
450             =item * add_empty_headers
451              
452             C<0>, C<1>, or a string. If set to a string, this will populate any missing
453             headers, including optional ones, and will set their value to the string given.
454             If set to 1, the string 'NA' is used. This option disables C.
455              
456             =item * strict
457              
458             my $sb_file = SeaBASS::File->new("input_file.txt", {strict => STRICT_ALL});
459             my $sb_file = SeaBASS::File->new("input_file.txt", {strict => (STRICT_READ | STRICT_WRITE)});
460             my $sb_file = SeaBASS::File->new("input_file.txt", {strict => 0});
461             my $sb_file = SeaBASS::File->new("input_file.txt", {strict => STRICT_WRITE}); #default
462              
463             =over 4
464              
465             =item * C
466              
467             C will throw errors when reading invalid headers, missing required
468             ones, or an invalid delimiter. This may change in future revisions.
469              
470             =item * C
471              
472             C will throw the same errors when writing the data to a file or
473             stream. C only checks for required headers and invalid headers,
474             but does not check their values to see if they are actually filled. This may
475             change in future revisions.
476              
477             =back
478              
479             =item * fill_ancillary_data
480              
481             C<0> or C<1>, default C<0>. Insert date, time, measurement depth, station, and
482             location values to the data rows from the headers. Values are not overridden
483             if they are already present. This option is only useful when reading files.
484              
485             B It is bad practice to include these fields in the data if they don't
486             change throughout the file. This option is used to remove the burden of
487             checking whether they are in the data or header.
488              
489             B This option will also combine individual date/time
490             parts in the data (year/month/day/etc) to create more uniform date/time fields.
491              
492             B If any part of a date/time is missing, the fields
493             dependent on it will not be added to the row.
494              
495             =item * preserve_header
496              
497             C<0> or C<1>, default C<0>. Preserves header and comment order. This option
498             disables modifying the header, as well, but will not error if you try -- it will
499             simply not be reflected in the output.
500              
501             =item * preserve_detection_limits
502              
503             C<0> or C<1>, default C<0>. Disables setting values equal to below_detection_limit
504             or above_detection_limit to null while reading files. This should only be used
505             during read-only operation, as there is no telling missing data from data
506             outside limits.
507              
508             =back
509              
510             =cut
511              
512             sub new {
513 138     138 1 250357 my ( $class, $file ) = ( shift, shift );
514              
515 138         477 my $self = bless( {}, $class );
516              
517 138         303 my %myoptions;
518              
519 138 100       759 if ( ref($file) eq 'HASH' ) {
    100          
    50          
520 4         52 %myoptions = ( %DEFAULT_OPTIONS, %$file );
521 4         13 $file = '';
522             } elsif ( ref( $_[0] ) eq 'HASH' ) {
523 126         720 %myoptions = ( %DEFAULT_OPTIONS, %{ $_[0] } );
  126         1061  
524             } elsif ( !ref( $_[0] ) ) {
525 8 100       35 if ( $#_ % 2 == 1 ) {
526 7         81 %myoptions = ( %DEFAULT_OPTIONS, @_ );
527             } else {
528 1         334 croak('Even sized list expected');
529             }
530             } else {
531 0         0 croak("Arguments not understood.");
532             }
533              
534 137         601 $self->{'options'} = \%myoptions;
535 137         702 $self->check_options();
536              
537 133 50       614 if ( ref($file) eq 'GLOB' ) {
    100          
    100          
538 0         0 $self->{'handle'} = $file;
539             } elsif ( ref($file) eq 'SCALAR' ) {
540 127         1516 open( my $fh, "<", $file );
541 127         462 $self->{'handle'} = $fh;
542             } elsif ($file) {
543 2 100       9 if ( !ref($file) ) {
544 1 50       17 if ( -r $file ) {
    0          
545 1         21 open( my $fh, "<", $file );
546 1         4 $self->{'handle'} = $fh;
547             } elsif ( $self->{'options'}{'strict'} & STRICT_READ ) {
548 0         0 croak("Strict read set, but input file not found or unreadable.");
549             } else {
550 0         0 $self->{'default_write_to'} = $file;
551 0         0 $file = '';
552             }
553             } else {
554 1         94 croak("Invalid parameter, expected file path or file handle.");
555             }
556             } ## end elsif ($file)
557 132 100       494 if ($file) {
558 128 50       550 unless ( $self->read_headers() ) {
559 0 0       0 unless ( $self->{'options'}{'strict'} & STRICT_READ ) {
560 0         0 return;
561             }
562             }
563             } else {
564 4         20 $self->create_blank_file();
565             }
566 128         491 return $self;
567             } ## end sub new
568              
569             =head1 OBJECT METHODS
570              
571             =head2 add_headers(\%headers | \@header_lines | @header_lines)
572              
573             $sb_file->add_headers({'investigators' => 'jason_lefler'});
574             $sb_file->add_headers(['/investigators=jason_lefler']);
575             $sb_file->add_headers('/investigators=jason_lefler');
576            
577             C is used to add or override metadata for a C, as
578             well as add comments.
579              
580             This function can not be used to change fields/units, see
581             L and
582             L for that.
583              
584             B Modifying the delimiter or missing value halfway through
585             reading/writing will likely break the object. Modifying these will change the
586             expected format for any new or non-cached rows. Do so with caution.
587              
588             =cut
589              
590             sub add_headers {
591 397     397 1 749 my $self = shift;
592 397         689 my $success = 1;
593 397         1098 my $strict = $self->{'options'}{'strict'} & STRICT_WRITE;
594 397 100       1427 if ( ref( $_[0] ) eq 'HASH' ) {
    100          
    50          
595 259         518 while ( my ( $k, $v ) = each( %{ $_[0] } ) ) {
  267         1171  
596 8         26 $success &= $self->validate_header( $k, $v, $strict );
597 8         26 $self->{'headers'}{$k} = $v;
598             }
599             } elsif ( ref( $_[0] ) eq 'ARRAY' ) {
600 137         270 foreach ( @{ $_[0] } ) {
  137         410  
601 2570 100 100     9047 if ( $_ =~ /^\s*!/ ) {
    100          
602 11         19 push( @{ $self->{'comments'} }, $_ );
  11         29  
603             } elsif ( $strict && $_ !~ m"^/" ) {
604 2         458 carp("Invalid header line: $_");
605 2         290 $success = 0;
606             } else {
607 2557         7679 my ( $k, $v ) = split( /=/, $_, 2 );
608 2557         6039 $success &= $self->validate_header( $k, $v, $strict );
609 2557         7330 $self->{'headers'}{$k} = $v;
610             }
611             } ## end foreach (@{$_[0]})
612             } elsif ( !ref( $_[0] ) ) {
613 1         5 foreach (@_) {
614 1 50 33     15 if ( $_ =~ /^\s*!/ ) {
    50          
615 0         0 push( @{ $self->{'comments'} }, $_ );
  0         0  
616             } elsif ( $strict && $_ !~ m"^/" ) {
617 0         0 carp("Invalid header line: $_");
618 0         0 $success = 0;
619             } else {
620 1         8 my ( $k, $v ) = split( /=/, $_, 2 );
621 1         5 $success &= $self->validate_header( $k, $v, $strict );
622 1         6 $self->{'headers'}{$k} = $v;
623             }
624             } ## end foreach (@_)
625             } else {
626 0         0 $success = 0;
627             }
628 397         859 return $success;
629             } ## end sub add_headers
630              
631             =head2 headers([ \%new_headers | \@get_headers | @get_headers ])
632              
633             =head2 head
634              
635             =head2 h
636              
637             my %headers = $sb_file->headers(['investigators']);
638             print Dumper(\%headers); # { investigators => 'jason_lefler' }
639            
640             my ($inv) = $sb_file->headers('investigators');
641             print $inv; # jason_lefler
642            
643             $sb_file->headers({investigators => 'jason_lefler'});
644            
645             $sb_file->headers()->{'investigators'} = 'jason_lefler';
646            
647             C is used to read or modify header values. Given an array reference
648             of header names, it will return a hash/hash reference with header/value pairs.
649             Given a plain list of header names, it will return an array/array reference of
650             the given header values. Given a hash reference, this function is a proxy for
651             L.
652              
653             If C is set, then headers will be returned as such, IE: C<<
654             {'/investigators' => 'jason_lefler'} >>.
655              
656             This function can also be used to set header values without going through the
657             normal validation.
658              
659             C and C are aliases to C.
660              
661             =cut
662              
663 0     0 1 0 sub head { shift->headers(@_); }
664 22     22 1 1347 sub h { shift->headers(@_); }
665              
666             sub headers {
667 25     25 1 844 my $self = shift;
668 25 100       86 if ( !@_ ) {
    100          
    100          
669 21         159 return $self->{'headers'};
670             } elsif ( ref( $_[0] ) eq 'HASH' ) {
671 1         5 return $self->add_headers(@_);
672             } elsif ( ref( $_[0] ) eq 'ARRAY' ) {
673 1         33 my %ret;
674 1         8 for my $header ( @{ $_[0] } ) {
  1         15  
675 2         10 $ret{$header} = $self->{'headers'}{ lc($header) };
676             }
677 1 50       9 if (wantarray) {
678 1         10 return %ret;
679             } else {
680 0         0 return \%ret;
681             }
682             } else {
683 2         4 my @ret;
684 2         6 foreach (@_) {
685 4 50       12 if ( !ref ) {
686 4         12 my $value = $self->{'headers'}{ lc($_) };
687 4 50       13 push( @ret, defined($value) ? $value : undef );
688             } else {
689 0         0 croak("Argument not understood: $_");
690             }
691             } ## end foreach (@_)
692 2 100       10 if (wantarray) {
    50          
693 1         5 return @ret;
694             } elsif ( $#ret == 0 ) {
695 0         0 return $ret[0];
696             } else {
697 1         6 return \@ret;
698             }
699             } ## end else [ if (!@_) ]
700              
701             } ## end sub headers
702              
703             =head2 data([$index])
704              
705             =head2 d
706              
707             =head2 body
708              
709             =head2 b
710              
711             =head2 all
712              
713             my $row = $sb_file->data(1);
714             my @rows = $sb_file->all();
715              
716             C is responsible for returning either a data line via an index or all of
717             the data lines at once.
718              
719             Data is returned as C<< field => value >> pairs.
720              
721             If given an index: in list context, returns the hash of the row; in scalar
722             context, returns a reference to the row.
723              
724             If not given an index: in list context, returns an array of the rows; in scalar
725             context, returns a reference to an array of the rows.
726              
727             If given an index out of range, returns undef. If given a negative index,
728             Cs the file, then returns undef.
729              
730             If C is enabled and the row has already been read, it is retrieved from
731             the cache. If it has not already be read, all rows leading up to the desired
732             row will be read and cached, and the desired row returned.
733              
734             If C is disabled and either all rows are retrieved or a previously
735             retrieved row is called again, the file will C, then seek to the
736             desired row.
737              
738             C, C, C, and C are all aliases to C. (Yes, that means
739             C can be used with arguments, it would just look silly.)
740              
741             =cut
742              
743 0     0 1 0 sub body { shift->data(@_); }
744 0     0 1 0 sub b { shift->data(@_); }
745 2     2 1 1558 sub d { shift->data(@_); }
746 42     42 1 9020 sub all { shift->data(@_); }
747              
748             sub data {
749 133     133 1 6904 my ( $self, $index ) = @_;
750 133 100       375 if ( defined($index) ) {
751 81 100       233 if ( $index < 0 ) {
752 12         28 $self->rewind();
753 12         21 return;
754             }
755 69 100       186 if ( $self->{'options'}{'cache'} ) {
756 63 100       189 if ( $index > $self->{'max_dataidx'} ) {
757 39         68 my $startidx = $self->{'dataidx'};
758 39         120 for ( my $i = 0; $i < ( $index - $startidx ); $i++ ) {
759 73 100       181 if ( !$self->next() ) {
760 4         15 return;
761             }
762             }
763             } ## end if ($index > $self->{'max_dataidx'...})
764              
765 59         116 $self->{'dataidx'} = $index;
766              
767 59 50       137 if (wantarray) {
768 0         0 return %{ $self->{'data'}[$index] };
  0         0  
769             } else {
770 59         351 return $self->{'data'}[$index];
771             }
772             } else {
773 6 100       22 if ( $index <= $self->{'dataidx'} ) {
774 3         12 $self->rewind();
775             }
776 6         12 my $startidx = $self->{'dataidx'};
777 6         21 for ( my $i = 0; $i < ( $index - $startidx - 1 ); $i++ ) {
778 7 50       21 if ( !$self->next() ) {
779 0         0 return;
780             }
781             }
782 6         15 return $self->next();
783             } ## end else [ if ($self->{'options'}...)]
784             } else {
785 52 100       175 if ( $self->{'options'}{'cache'} ) {
786 45         151 while ( $self->next() ) {
787             # noop
788             }
789 45 100       139 if (wantarray) {
790 7         16 return @{ $self->{'data'} };
  7         53  
791             } else {
792 38         208 return $self->{'data'};
793             }
794             } else {
795 7         29 $self->rewind();
796 7         15 my @data_rows;
797 7         24 while ( my $data = $self->next() ) {
798 28         99 push( @data_rows, $data );
799             }
800 7 100       25 if (wantarray) {
801 6         34 return @data_rows;
802             } else {
803 1         6 return \@data_rows;
804             }
805             } ## end else [ if ($self->{'options'}...)]
806             } ## end else [ if (defined($index)) ]
807             } ## end sub data
808              
809             =head2 next()
810              
811             while (my $row = $sb_file->next()){
812             print $row->{'lat'};
813             ...
814             }
815             while (my %row = $sb_file->next()){
816             print $row{'lat'};
817             ...
818             }
819            
820             Returns the next data row in the file, returning C when it runs out of
821             rows.
822              
823             Data is returned as C<< field => value >> pairs.
824              
825             In list context, returns a hash of the row. In scalar context, returns a
826             reference to the hash of a row.
827              
828             After a C, C will return the very first data hash, then each row
829             in turn. If the row has been cached, it's retrieved from the cache instead of
830             rereading from the file.
831              
832             =cut
833              
834             sub next {
835 429     429 1 22006 my $self = shift;
836 429 50       1117 if (@_) {
837 0         0 croak("invalid number of arguments on next(), expected 0.");
838             }
839              
840 429 100 100     2265 if ( $self->{'options'}{'cache'} && $self->{'dataidx'} < $self->{'max_dataidx'} ) {
    100          
841 73         140 $self->{'dataidx'}++;
842              
843 73 50       165 if (wantarray) {
844 0         0 return %{ $self->{'data'}[ $self->{'dataidx'} ] };
  0         0  
845             } else {
846 73         309 return $self->{'data'}[ $self->{'dataidx'} ];
847             }
848             } elsif ( $self->{'handle'} ) {
849 344         625 my $handle = $self->{'handle'};
850 344         584 my $line_number = $self->{'line_number'};
851              
852 344         1548 while ( my $line = <$handle> ) {
853 266         534 $line_number++;
854 266         763 strip($line);
855 266 50       753 if ($line) {
856 266         858 my $data_row = $self->make_data_hash($line);
857 266         565 $self->{'line_number'} = $line_number;
858 266 100       704 if ( $self->{'options'}{'cache'} ) {
859 204         320 push( @{ $self->{'data'} }, $data_row );
  204         477  
860             }
861 266         470 $self->{'dataidx'}++;
862 266 100       693 if ( $self->{'dataidx'} > $self->{'max_dataidx'} ) {
863 238         435 $self->{'max_dataidx'} = $self->{'dataidx'};
864             }
865 266 50       556 if (wantarray) {
866 0         0 return %{$data_row};
  0         0  
867             } else {
868 266         1135 return $data_row;
869             }
870             } ## end if ($line)
871             } ## end while (my $line = <$handle>)
872             } ## end elsif ($self->{'handle'})
873 90         321 return;
874             } ## end sub next
875              
876             =head2 rewind()
877              
878             C seeks to the start of the data. The next C will return the
879             very first row (or C). If caching is enabled, it will not actually
880             perform a seek, it will merely reset the index interator. If caching is
881             disabled, a seek is performed on the file handle to return to the start of the
882             data.
883              
884             =cut
885              
886             sub rewind {
887 61     61 1 1616 my ($self) = @_;
888 61 100       219 if ( $self->{'dataidx'} != -1 ) {
889 31 100       104 if ( !$self->{'options'}{'cache'} ) {
890 9         36 seek( $self->{'handle'}, $self->{'data_start_position'}, SEEK_SET );
891             }
892 31         66 $self->{'line_number'} = $self->{'data_start_line'};
893 31         66 $self->{'dataidx'} = -1;
894             } ## end if ($self->{'dataidx'}...)
895             } ## end sub rewind
896              
897             =head2 update(\%data_row | \@data_row | $data_row | %data_row)
898              
899             while (my %row = $sb_file->next()){
900             if ($row{'depth'} == -999){
901             $row{'depth'} = 0;
902             }
903             $sb_file->update(\%row);
904             }
905            
906             # Less useful for update():
907             print join(',',@{$sb_file->actual_fields()}); #lat,lon,depth,chl
908            
909             while (my %row = $sb_file->next()){
910             if ($row{'depth'} == -999){
911             $row{'depth'} = 0;
912             }
913             $sb_file->update(@row{'lat','lon','depth','chl'});
914             # or
915             $sb_file->update([@row{'lat','lon','depth','chl'}]);
916             }
917              
918             C replaces the last row read (using C) with the input.
919              
920             Caching must be enabled to use C, C, or C.
921              
922             =cut
923              
924             sub update {
925 12     12 1 79 my $self = shift;
926 12 100       50 if ( !$self->{'options'}{'cache'} ) {
    100          
927 1         131 croak("Caching must be enabled to write.");
928             } elsif ( $self->{'dataidx'} == -1 ) {
929 1         184 croak("No rows read yet.");
930             }
931 10         34 my $new_row = $self->ingest_row(@_);
932 10 100       31 unless ( defined($new_row) ) {
933 2         145 croak("Error parsing inputs");
934             }
935 8         21 $self->{'data'}[ $self->{'dataidx'} ] = $new_row;
936             } ## end sub update
937              
938             =head2 set($index, \%data_row | \@data_row | $data_row | %data_row)
939              
940             my %row = (lat => 1, lon => 2, chl => 1);
941             $sb_file->set(0, \%row);
942            
943             print join(',',@{$sb_file->actual_fields()}); #lat,lon,chl
944             $sb_file->set(0, [1, 2, 1]);
945            
946              
947             C replaces the row at the given index with the input. Seeks to the
948             given index if it has not been read to yet. Cs if the file does not go
949             up to the index specified.
950              
951             Caching must be enabled to use C, C, or C.
952              
953             =cut
954              
955             sub set {
956 3     3 1 20 my $self = shift;
957 3         6 my $index = shift;
958 3 50       9 if ( !$self->{'options'}{'cache'} ) {
959 0         0 croak("Caching must be enabled to write");
960             }
961 3 100       10 if ( $index < 0 ) {
962 1         163 croak("Index must be positive integer");
963             }
964 2         7 my $new_row = $self->ingest_row(@_);
965 2 50       5 unless ( defined($new_row) ) {
966 0         0 croak("Error parsing inputs");
967             }
968              
969 2 50       8 if ( $index > $self->{'max_dataidx'} ) {
970 2         3 my $current_idx = $self->{'dataidx'};
971 2         7 $self->data($index);
972 2         4 $self->{'dataidx'} = $current_idx;
973              
974 2 100       6 if ( $index > $self->{'max_dataidx'} ) {
975 1         87 croak("Index out of bounds.");
976             }
977             } ## end if ($index > $self->{'max_dataidx'...})
978              
979 1         3 $self->{'data'}[$index] = $new_row;
980             } ## end sub set
981              
982             =head2 insert($index, \%data_row | \@data_row | $data_row | %data_row)
983              
984             use SeaBASS::File qw(INSERT_BEGINNING INSERT_END);
985             ...
986            
987             my %row = (lat => 1, lon => 2, chl => 1);
988             $sb_file->insert(INSERT_BEGINNING, \%row);
989            
990             print join(',',@{$sb_file->actual_fields()}); #lat,lon,chl
991            
992             $sb_file->insert(1, [1, 2, 1]);
993             $sb_file->insert(INSERT_END, [1, 2, 1]);
994            
995             Inserts the row into the given position. C inserts a new row
996             at the start of the data, C inserts one at the end of the data
997             block.
998              
999             The index must be a positive integer, C, or C.
1000              
1001             If a row is inserted at the end, the entire data block is read from the file to
1002             cache every row, the row is appended to the end, and the current position is
1003             reset to the original position, so C will still return the real next
1004             row from the data.
1005              
1006             If a row is inserted before the current position, the current position is
1007             shifted accordingly and will still return the C real row.
1008              
1009             Caching must be enabled to use C, C, or C.
1010              
1011             =cut
1012              
1013             sub insert {
1014 16     16 1 49 my $self = shift;
1015 16         30 my $index = shift;
1016 16 50       58 if ( !$self->{'options'}{'cache'} ) {
1017 0         0 croak("Caching must be enabled to write.");
1018             }
1019 16 100       45 if ( $index < INSERT_END ) {
1020 1         182 croak("Index must be positive integer, or INSERT_BEGINNING (beginning), or INSERT_END (end)");
1021             }
1022 15         60 my $new_row = $self->ingest_row(@_);
1023 15 50       50 unless ( defined($new_row) ) {
1024 0         0 croak("Error parsing inputs");
1025             }
1026              
1027 15 100       43 if ( $index == INSERT_END ) {
    100          
1028 10         23 my $current_idx = $self->{'dataidx'};
1029 10         40 $self->data();
1030 10         19 $self->{'dataidx'} = $current_idx;
1031             } elsif ( $index > $self->{'max_dataidx'} ) {
1032 2         5 my $current_idx = $self->{'dataidx'};
1033 2         10 $self->data($index);
1034 2         5 $self->{'dataidx'} = $current_idx;
1035              
1036 2 50       11 if ( $index == $self->{'max_dataidx'} + 1 ) {
    100          
1037 0         0 $index = INSERT_END;
1038             } elsif ( $index > $self->{'max_dataidx'} ) {
1039 1         154 croak("Index out of bounds.");
1040             }
1041             } ## end elsif ($index > $self->{'max_dataidx'...})
1042              
1043 14 100 100     58 if ( $index <= $self->{'dataidx'} && $index != INSERT_END ) {
1044 3         10 $self->{'dataidx'}++;
1045             }
1046              
1047 14         30 $self->{'max_dataidx'}++;
1048              
1049 14 100       46 if ( $index == INSERT_BEGINNING ) {
    100          
1050 2         8 unshift( @{ $self->{'data'} }, $new_row );
  2         13  
1051             } elsif ( $index == INSERT_END ) {
1052 10         15 push( @{ $self->{'data'} }, $new_row );
  10         30  
1053             } else {
1054 2         4 splice( @{ $self->{'data'} }, $index, 0, $new_row );
  2         24  
1055             }
1056             } ## end sub insert
1057              
1058             =head2 prepend(\%data_row | \@data_row | $data_row | %data_row)
1059              
1060             C is short for C.
1061              
1062             =cut
1063              
1064             sub prepend {
1065 1     1 1 3 my $self = shift;
1066 1         5 $self->insert( INSERT_BEGINNING, @_ );
1067             }
1068              
1069             =head2 append(\%data_row | \@data_row | $data_row | %data_row)
1070              
1071             C is short for C.
1072              
1073             =cut
1074              
1075             sub append {
1076 9     9 1 50 my $self = shift;
1077 9         20 $self->insert( INSERT_END, @_ );
1078             }
1079              
1080             =head2 remove([$index])
1081              
1082             If index is specified, it deletes the desired index. If it is omitted, the
1083             last row read is deleted. The current position is modified accordingly.
1084              
1085             =cut
1086              
1087             sub remove {
1088 6     6 1 24 my ( $self, $index ) = @_;
1089              
1090 6 100 66     37 if ( !$self->{'options'}{'cache'} ) {
    50          
1091 1         145 croak("Caching must be enabled to write.");
1092             } elsif ( !defined($index) && $self->{'dataidx'} < 0 ) {
1093 0         0 croak("No rows read yet.");
1094             }
1095              
1096 5 100       15 if ( !defined($index) ) {
1097 2         5 $index = $self->{'dataidx'};
1098             }
1099              
1100 5 50       21 if ( $index < 0 ) {
    100          
1101 0         0 croak("Index must be positive integer");
1102             } elsif ( $index > $self->{'max_dataidx'} ) {
1103 2         5 my $current_idx = $self->{'dataidx'};
1104 2         6 $self->data($index);
1105 2         3 $self->{'dataidx'} = $current_idx;
1106              
1107 2 100       6 if ( $index > $self->{'max_dataidx'} ) {
1108 1         165 croak("Index out of bounds.");
1109             }
1110             } ## end elsif ($index > $self->{'max_dataidx'...})
1111              
1112 4 100       12 if ( $index <= $self->{'dataidx'} ) {
1113 3         6 $self->{'dataidx'}--;
1114             }
1115 4         7 $self->{'max_dataidx'}--;
1116              
1117 4         8 splice( @{ $self->{'data'} }, $index, 1 );
  4         14  
1118             } ## end sub remove
1119              
1120             =head2 where(\&function)
1121              
1122             # Find all rows with depth greater than 10 meters
1123             my @ret = $sb_file->where(sub {
1124             if ($_->{'depth'} > 10){
1125             return $_;
1126             } else {
1127             return undef;
1128             }
1129             });
1130              
1131             # Delete all measurements with depth less than 10 meters
1132             $sb_file->where(sub {
1133             if ($_->{'depth'} < 10){
1134             $_ = undef;
1135             }
1136             });
1137            
1138             # Calculate the average chlorophyll value
1139             my $chl_total = 0;
1140             my $measurements = 0;
1141             $sb_file->where(sub {
1142             if (defined($_->{'chl'})){
1143             $chl_total += $_->{'chl'};
1144             $measurements++;
1145             }
1146             });
1147             if ($measurements){
1148             print $chl_total/$measurements;
1149             } else {
1150             print "No chl values.";
1151             }
1152              
1153              
1154             Traverses through each data line, running the given function on each row.
1155             C<$_> is set to the current row. If C<$_> is set to undefined, C is
1156             called. Any changes in C<$_> will be reflected in the data.
1157              
1158             Any defined value returned is added to the return array. If nothing is
1159             returned, a 0 is added.
1160              
1161             =cut
1162              
1163             sub where {
1164 7     7 1 48 my ( $self, $function ) = ( shift, shift );
1165 7 100       69 if ( ref($function) ne 'CODE' ) {
1166 1         152 croak("Invalid arguments.");
1167             }
1168 6         16 my $currentidx = $self->{'dataidx'};
1169 6         23 $self->rewind();
1170              
1171 6         11 my @new_rows;
1172              
1173 6         18 while ( my $row = $self->next() ) {
1174 19         34 local *_ = \$row;
1175 19         47 my $ret = $function->();
1176 19 100 100     160 if ( defined($ret) && defined(wantarray) ) {
1177 12         20 push( @new_rows, $ret );
1178             }
1179 19 100       64 if ( !defined($row) ) {
1180 2 100       7 if ( $self->{'dataidx'} <= $currentidx ) {
1181 1         1 $currentidx--;
1182             }
1183 2         8 $self->remove();
1184             } ## end if (!defined($row))
1185             } ## end while (my $row = $self->next...)
1186              
1187 5         21 $self->data($currentidx);
1188              
1189 5         17 return @new_rows;
1190             } ## end sub where
1191              
1192             =head2 get_all($field_name [, ... ] [, \%options])
1193              
1194             Returns an array/arrayref of all the values matching each given field name.
1195             This function errors out if no field names are passed in or a non-existent
1196             field is requested.
1197              
1198             Available options are:
1199              
1200             =over 4
1201              
1202             =item * delete_missing
1203              
1204             If any of the fields are missing, the row will not be added to any of the
1205             return arrays. (Useful for plotting or statistics that don't work well with
1206             bad values.)
1207              
1208             =back
1209              
1210             =cut
1211              
1212             sub get_all {
1213 13     13 1 6348 my $self = shift;
1214 13         39 my %options = ( 'delete_missing' => 0 );
1215 13 100       45 if ( ref( $_[$#_] ) eq 'HASH' ) {
1216 5         10 %options = %{ pop(@_) };
  5         19  
1217             }
1218 13 50       36 if ( !@_ ) {
1219 0         0 croak("get_all must be called with at least one field name");
1220             }
1221              
1222 13 100       37 my $missing = ( $self->{'options'}{'missing_data_to_undef'} ? undef : $self->{'missing'} );
1223              
1224 13         21 my $currentidx = $self->{'dataidx'};
1225 13         39 $self->rewind();
1226              
1227 13         29 my @fields = map {lc} @_; # turn all inputs lowercase
  22         72  
1228              
1229 13         33 foreach my $field (@fields) {
1230 21 100   130   107 if ( ( firstidx { $_ eq $field } @{ $self->{'actual_fields'} } ) < 0 ) {
  130         775  
  21         70  
1231 4 100 66 2   35 if ( !$self->{'options'}{'fill_ancillary_data'} || ( firstidx { $_ eq $field } keys( %{ $self->{'ancillary'} } ) ) < 0 ) {
  2         13  
  1         7  
1232 3         461 croak("Field $field does not exist");
1233             }
1234             }
1235             } ## end foreach my $field (@fields)
1236              
1237 10         65 my @ret = map { [] } @fields; # make return array of arrays
  17         42  
1238              
1239 10         34 while ( my $row = $self->next() ) {
1240 40 100       86 if ( $options{'delete_missing'} ) {
1241 20         32 my $has_all = 1;
1242 20         33 foreach my $field (@fields) {
1243 36 100 100     164 unless ( defined( $row->{$field} ) && ( !defined($missing) || $row->{$field} != $missing ) ) {
      100        
1244 4         9 $has_all = 0;
1245 4         5 last;
1246             }
1247             } ## end foreach my $field (@fields)
1248 20 100       43 unless ($has_all) {
1249 4         9 next;
1250             }
1251             } ## end if ($options{'delete_missing'...})
1252              
1253 36         88 for ( my $i = 0; $i <= $#fields; $i++ ) {
1254 60         79 push( @{ $ret[$i] }, $row->{ $fields[$i] } );
  60         202  
1255             }
1256             } ## end while (my $row = $self->next...)
1257              
1258 10         33 $self->data($currentidx);
1259              
1260 10 100       32 if ( $#_ == 0 ) {
    100          
1261 3 100       7 if (wantarray) {
1262 1         3 return @{ $ret[0] };
  1         6  
1263             } else {
1264 2         9 return $ret[0];
1265             }
1266             } elsif (wantarray) {
1267 4         17 return @ret;
1268             } else {
1269 3         13 return \@ret;
1270             }
1271             } ## end sub get_all
1272              
1273             =head2 remove_field($field_name [, ... ])
1274              
1275             Removes a field from the file. C is called to remove the field
1276             from cached rows. Any new rows grabbed will have the removed fields omitted,
1277             as well. A warning is issued if the field does not exist.
1278              
1279             =cut
1280              
1281             sub remove_field {
1282 8     8 1 44 my $self = shift;
1283 8 100       29 if ( !@_ ) {
1284 1         258 croak("Field(s) must be specified.");
1285             }
1286 7         17 foreach my $field_orig (@_) {
1287 9         25 my $field = lc($field_orig);
1288              
1289 9     30   78 my $field_idx = firstidx { $_ eq $field } @{ $self->{'actual_fields'} };
  30         192  
  9         38  
1290              
1291 9 100       52 if ( $field_idx < 0 ) {
1292 1         173 carp("Field $field does not exist.");
1293             } else {
1294 8         17 splice( @{ $self->{'actual_fields'} }, $field_idx, 1 );
  8         25  
1295 8         13 splice( @{ $self->{'actual_units'} }, $field_idx, 1 );
  8         19  
1296             }
1297             } ## end foreach my $field_orig (@_)
1298 7         183 $self->update_fields();
1299             } ## end sub remove_field
1300              
1301             =head2 add_field($field_name [, $unit [, $position]])
1302              
1303             Adds a field to the file. C is called to populate all cached
1304             rows. Any rows retrieved will have the new field set to undefined or /missing,
1305             depending on if the option C is set.
1306              
1307             If the unit is not specified, it is set to unitless.
1308              
1309             If the position is not specified, the field is added to the end.
1310              
1311             =cut
1312              
1313             sub add_field {
1314 25     25 1 158 my ( $self, $field, $unit, $position ) = @_;
1315 25 50       128 if ( !$self->{'options'}{'cache'} ) {
    100          
1316 0         0 croak("Caching must be enabled to write.");
1317             } elsif ( !$field ) {
1318 1         208 croak("Field must be specified.");
1319             }
1320 24         58 $field = lc($field);
1321              
1322 24     121   109 my $field_idx = firstidx { $_ eq $field } @{ $self->{'actual_fields'} };
  121         746  
  24         89  
1323 24 100       237 if ( $field_idx >= 0 ) {
1324 3         407 croak("Field already exists.");
1325             }
1326 21 100       62 if ( !defined($position) ) {
1327 15         45 $position = INSERT_END;
1328             }
1329 21   100     77 $unit ||= 'unitless';
1330 21         46 $unit = lc($unit);
1331              
1332 21 100       48 if ( $position == INSERT_END ) {
    100          
1333 17         34 push( @{ $self->{'actual_fields'} }, $field );
  17         47  
1334 17         37 push( @{ $self->{'actual_units'} }, $unit );
  17         44  
1335             } elsif ( $position == INSERT_BEGINNING ) {
1336 2         4 unshift( @{ $self->{'actual_fields'} }, $field );
  2         7  
1337 2         4 unshift( @{ $self->{'actual_units'} }, $unit );
  2         5  
1338             } else {
1339 2         4 splice( @{ $self->{'actual_fields'} }, $position, 0, $field );
  2         9  
1340 2         5 splice( @{ $self->{'actual_units'} }, $position, 0, $unit );
  2         7  
1341             }
1342 21         67 $self->update_fields();
1343             } ## end sub add_field
1344              
1345             =head2 find_fields($string | qr/match/ [, ... ])
1346              
1347             Finds fields matching the string or regex given. If given a string, it must
1348             match a field exactly and entirely to be found. To find a substring, use
1349             C. Fields are returned in the order that they will be output. This
1350             function takes into account fields that are added or removed. All fields are
1351             always lowercase, so all matches are case insensitive.
1352              
1353             Given one argument, returns an array of the fields found. An empty array is
1354             returned if no fields match.
1355              
1356             Given multiple arguments, returns an array/arrayref of arrays of fields found.
1357             IE: C would return something like
1358             C<[['lw510','lw550'],['es510','es550']]>. If no field is matched, the inner
1359             array will be empty. IE: C<[[],[]]>.
1360              
1361             =cut
1362              
1363             sub find_fields {
1364 10     10 1 5633 my $self = shift;
1365 10 50       33 if ( $#_ < 0 ) {
1366 0         0 croak("Input must be a string or regex object.");
1367             }
1368              
1369 10         20 my @ret;
1370              
1371 10         23 foreach my $find (@_) {
1372 13         22 my ( $regex, @matching );
1373 13 50       30 if ( defined($find) ) {
1374 13 100       40 if ( !ref($find) ) {
    50          
1375 7         102 $regex = lc(qr/^$find$/i);
1376             } elsif ( ref($find) eq 'Regexp' ) {
1377 6         28 $regex = lc(qr/$find/i);
1378             } else {
1379 0         0 croak("Input must be a string or regex object.");
1380             }
1381              
1382 13         31 foreach my $field ( @{ $self->{'actual_fields'} } ) {
  13         31  
1383 92 100       321 if ( $field =~ $regex ) {
1384 23         49 push( @matching, $field );
1385             }
1386             }
1387             }
1388 13         32 push( @ret, \@matching );
1389             } ## end foreach my $find (@_)
1390              
1391 10 100       28 if ( $#_ == 0 ) {
1392 7         12 return @{ $ret[0] };
  7         37  
1393             } else {
1394 3 100       9 if (wantarray) {
1395 2         8 return @ret;
1396             } else {
1397 1         4 return \@ret;
1398             }
1399             } ## end else [ if ($#_ == 0) ]
1400             } ## end sub find_fields
1401              
1402             =head2 add_comment(@comments)
1403              
1404             Adds comments to the output file, which are printed, in bulk, after C.
1405             Comments are trimmed before entry and !s are added, if required.
1406              
1407             =cut
1408              
1409             sub add_comment {
1410 2     2 1 4 my $self = shift;
1411 2         6 push(@{$self->{'comments'}}, map {
1412 2         4 my $c = $_;
  3         33  
1413 3         13 $c =~ s/^\s+|\s+$//g;
1414 3 100       12 if ($c =~ /^!/){
1415 1         4 $c
1416             } else {
1417 2         7 "! $c"
1418             }
1419             } @_);
1420             }
1421              
1422             =head2 get_comments([@indices])
1423              
1424             Returns a list of the comments at the given indices. If no indices are passed
1425             in, return them all.
1426              
1427             =cut
1428              
1429             sub get_comments {
1430 6     6 1 16 my $self = shift;
1431 6         13 my @ret;
1432 6 100       16 if (@_){
1433 2         5 @ret = map {$self->{'comments'}[$_]} @_;
  3         10  
1434             } else {
1435 4         6 @ret = @{$self->{'comments'}};
  4         12  
1436             }
1437 6 50       17 if (wantarray){
1438 0         0 return @ret;
1439             } else {
1440 6         36 return \@ret;
1441             }
1442             }
1443              
1444             =head2 set_comments(@comments)
1445              
1446             Overwrites all of the comments in the file. For now, this is the proper way
1447             to remove comments. Comments are trimmed before entry and !s are added, if
1448             required.
1449              
1450             =cut
1451              
1452             sub set_comments {
1453 1     1 1 3 my $self = shift;
1454             $self->{'comments'} = [map {
1455 1         3 my $c = $_;
  2         5  
1456 2         6 $c =~ s/^\s+|\s+$//g;
1457 2 100       8 if ($c =~ /^!/){
1458 1         4 $c
1459             } else {
1460 1         3 "! $c"
1461             }
1462             } @_];
1463             }
1464              
1465             =head2 write([$filename | $file_handle | \*GLOB])
1466              
1467             Outputs the current header and data to the given handle or glob. If no
1468             arguments are given, and a non-existent filename was given to C, the
1469             contents are output into that. If an output file was not given, C
1470             outputs to STDOUT.
1471              
1472             If C is enabled, the headers are checked for invalid headers and
1473             missing required headers and errors/warnings can be thrown accordingly.
1474              
1475             The headers are output in a somewhat-arbitrary but consistent order. If
1476             C is enabled, placeholders are added for every header that
1477             does not exist. A comment section is also added if one is not present.
1478              
1479             =cut
1480              
1481             sub write {
1482 15     15 1 11686 my ( $self, $write_to_h ) = @_;
1483              
1484 15         60 my $strict_write = $self->{'options'}{'strict'} & STRICT_WRITE;
1485 15 100       54 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
1486 15         32 my $error = 0;
1487              
1488 15 100       49 if ($strict_write) {
1489 3         7 foreach my $header ( keys( %{ $self->{'headers'} } ) ) {
  3         18  
1490 65         338 ( my $header_no_slash = $header ) =~ s"^/"";
1491 65 50 33 0   233 if ( ( firstidx { $_ eq $header_no_slash } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $header_no_slash } @HIDDEN_HEADERS ) < 0 ) {
  1104         5804  
  0         0  
1492 0         0 carp("Invalid header: $header");
1493 0         0 $error = 1;
1494             }
1495             } ## end foreach my $header (keys(%{...}))
1496              
1497 3         38 foreach my $header (@REQUIRED_HEADERS) {
1498 66 100       188 if ( !exists( $self->{'headers'}{$header} ) ) {
1499 20         1230 carp("Missing required header: $header");
1500 20         770 $error = 1;
1501             }
1502             } ## end foreach my $header (@REQUIRED_HEADERS)
1503             } ## end if ($strict_write)
1504              
1505 15 100       60 if ( !$error ) {
1506 14         29 my $close_write_to = 0;
1507 14         55 my $old_fh = select();
1508              
1509 14 50 33     106 if ( !$write_to_h && exists( $self->{'default_write_to'} ) ) {
1510 0   0     0 $write_to_h ||= $self->{'default_write_to'};
1511             }
1512              
1513 14 50       79 if ( defined($write_to_h) ) {
1514 0 0       0 if ( ref($write_to_h) eq 'GLOB' ) {
    0          
1515 0         0 select($write_to_h);
1516             } elsif ( !ref($write_to_h) ) {
1517 0         0 my $write_to = $write_to_h;
1518 0         0 $write_to_h = undef;
1519 0 0       0 open( $write_to_h, ">", $write_to ) || croak("Invalid argument for write().");
1520 0         0 $close_write_to = 1;
1521 0         0 select($write_to_h);
1522             } else {
1523 0         0 croak("Invalid argument for write().");
1524             }
1525             } ## end if (defined($write_to_h...))
1526              
1527 14   50     67 $self->{'headers'}{"${slash}delimiter"} ||= 'comma';
1528 14         50 my $actual_delim = lc( $self->{'headers'}{"${slash}delimiter"} );
1529 14 100       83 if ( $actual_delim eq 'comma' ) {
    50          
    0          
    0          
1530 2         3 $actual_delim = ',';
1531             } elsif ( $actual_delim eq 'space' ) {
1532 12         27 $actual_delim = ' ';
1533             } elsif ( $actual_delim eq 'tab' ) {
1534 0         0 $actual_delim = "\t";
1535             } elsif ( $actual_delim eq 'semicolon' ) {
1536 0         0 $actual_delim = ';';
1537             } else {
1538 0         0 $actual_delim = ',';
1539 0         0 $self->{'headers'}{"${slash}delimiter"} = 'comma';
1540             }
1541              
1542 14 50       53 my $missing = ( exists( $self->{'missing'} ) ? $self->{'missing'} : $DEFAULT_MISSING );
1543 14 50       49 my $bdl = ( exists( $self->{'below_detection_limit'} ) ? $self->{'below_detection_limit'} : $DEFAULT_BDL );
1544 14 50       47 my $adl = ( exists( $self->{'above_detection_limit'} ) ? $self->{'above_detection_limit'} : $DEFAULT_ADL );
1545              
1546 14 100       40 if ( $self->{'options'}{'preserve_header'} ) {
1547 1         2 print join("\n", @{ $self->{'preserved_header'} }, '');
  1         60  
1548             } else {
1549 13 100       57 if ( !exists( $self->{'headers'}{"${slash}begin_header"} ) ) {
1550 3         149 print "/begin_header\n";
1551             }
1552              
1553 13         38 my $add_missing_headers = $self->{'options'}{'add_empty_headers'};
1554 13 100 66     60 if ( $add_missing_headers && $add_missing_headers eq '1' ) {
1555 1         5 $add_missing_headers = 'NA';
1556             }
1557 13 100       42 if ( !$self->{'options'}{'preserve_case'} ) {
1558 1   50     4 $add_missing_headers = lc( $add_missing_headers || '' );
1559             }
1560            
1561 13         25 my @headers_to_print;
1562            
1563 13         112 @headers_to_print = @ALL_HEADERS;
1564 13 100 66     86 if ($missing eq $adl || ($self->{'options'}{'missing_data_to_undef'} && !$self->{'options'}{'preserve_detection_limits'})){
      100        
1565 12         145 @headers_to_print = grep(!/above_detection_limit/i, @headers_to_print);
1566             }
1567 13 100 66     78 if ($missing eq $bdl || ($self->{'options'}{'missing_data_to_undef'} && !$self->{'options'}{'preserve_detection_limits'})){
      100        
1568 12         98 @headers_to_print = grep(!/below_detection_limit/i, @headers_to_print);
1569             }
1570            
1571 13 100       27 unless (grep($_ ne 'unitless', @{ $self->{'actual_units'} })){
  13         63  
1572 6         75 @headers_to_print = grep(!/units/i, @headers_to_print);
1573             }
1574              
1575 13         38 foreach my $header (@headers_to_print) {
1576 399 100       1530 if ( $header eq 'missing' ) {
    100          
    100          
    100          
    50          
1577 13         30 while ( my ( $h, $k ) = each( %{ $self->{'headers'} } ) ) {
  165         1214  
1578 152         324 ( my $header_no_slash = $h ) =~ s"^/"";
1579 152 50   3097   637 if ( ( firstidx { $_ eq $header_no_slash } @ALL_HEADERS ) < 0 ) {
  3097         17304  
1580 0         0 print "/$h=$k\n";
1581             }
1582             } ## end while (my ($h, $k) = each...)
1583              
1584 13         31 foreach my $comment ( @{ $self->{'comments'} } ) {
  13         42  
1585 18         202 print "$comment\n";
1586             }
1587 13 100 100     32 if ( !@{ $self->{'comments'} } && $add_missing_headers ) {
  13         94  
1588 1         47 print "! Comments: \n!\n";
1589             }
1590 13 100       64 if ( !exists( $self->{'headers'}{"$slash$header"} ) ) {
1591 3         70 print "/missing=$missing\n";
1592             } else {
1593 10         162 print '/', $header, '=', $self->{'headers'}{"$slash$header"}, "\n";
1594             }
1595             } elsif ( $header eq 'fields' ) {
1596 13         38 print "/$header=", join( ',', @{ $self->{'actual_fields'} } ), "\n";
  13         153  
1597             } elsif ( $header eq 'units' ) {
1598 7         31 print "/$header=", join( ',', @{ $self->{'actual_units'} } ), "\n";
  7         114  
1599             } elsif ( exists( $self->{'headers'}{"$slash$header"} ) ) {
1600 110 100       393 if ( $header =~ /_header/ ) {
    100          
    50          
1601 20         609 print "/$header\n";
1602             } elsif (length($self->{'headers'}{"$slash$header"})) {
1603 65         149 my $v = $self->{'headers'}{"$slash$header"};
1604 65 100       207 if ( $header =~ /_latitude|_longitude/ ) {
    100          
1605 8         60 print "/$header=$v\[deg]\n";
1606             } elsif ( $header =~ /_time/ ) {
1607 4         47 print "/$header=$v\[gmt]\n";
1608             } else {
1609 53         466 print "/$header=$v\n";
1610             }
1611             # print '/', $header, '=', $self->{'headers'}{"$slash$header"}, "\n";
1612             } elsif ($add_missing_headers) {
1613 25 100       67 if ( $header =~ /_latitude|_longitude/ ) {
    100          
1614 4         28 print "/$header=$add_missing_headers\[deg]\n";
1615             } elsif ( $header =~ /_time/ ) {
1616 2         15 print "/$header=$add_missing_headers\[gmt]\n";
1617             } else {
1618 19         117 print "/$header=$add_missing_headers\n";
1619             }
1620             }
1621             } elsif ($add_missing_headers) {
1622 0 0       0 if ( $header =~ /_latitude|_longitude/ ) {
    0          
1623 0         0 print "/$header=$add_missing_headers\[deg]\n";
1624             } elsif ( $header =~ /_time/ ) {
1625 0         0 print "/$header=$add_missing_headers\[gmt]\n";
1626             } else {
1627 0         0 print "/$header=$add_missing_headers\n";
1628             }
1629             } ## end elsif ($add_missing_headers)
1630             } ## end foreach my $header (@ALL_HEADERS)
1631              
1632 13 100       81 if ( !exists( $self->{'headers'}{"${slash}end_header"} ) ) {
1633 3         45 print "/end_header\n";
1634             }
1635             } ## end else [ if ($self->{'options'}...)]
1636              
1637 14         84 $self->rewind();
1638              
1639 14         55 while ( my $row = $self->next() ) {
1640 24         53 my @values;
1641 24         87 foreach my $field ( @{ $self->{'actual_fields'} } ) {
  24         71  
1642 118 100       346 push( @values, ( defined( $row->{$field} ) ? $row->{$field} : $missing ) );
1643             }
1644 24         498 print join( $actual_delim, @values ), "\n";
1645             } ## end while (my $row = $self->next...)
1646              
1647 14         71 select($old_fh);
1648 14 50       56 if ($close_write_to) {
1649 0         0 close($write_to_h);
1650             }
1651             } else {
1652 1         65 croak("Error(s) writing file");
1653             }
1654 14         91 return;
1655             } ## end sub write
1656              
1657             =head2 close()
1658              
1659             If a file handle is opened for reading, this function closes it. This is
1660             automatically called when the object is destroyed. This is useful to replace
1661             the file being read with the current changes.
1662              
1663             =cut
1664              
1665             sub close {
1666 138     138 1 367 my ($self) = @_;
1667 138 100       454 if ( $self->{'handle'} ) {
1668 128         608 my $ret = close( $self->{'handle'} );
1669 128         520 delete( $self->{'handle'} );
1670 128         2626 return $ret;
1671             } else {
1672 10         95 return;
1673             }
1674             } ## end sub close
1675              
1676             =head2 make_data_hash($line [,\@field_list])
1677              
1678             my %row = $sb_file->make_data_hash("1.5,2,2.5");
1679             my %row = $sb_file->make_data_hash("1.5,2,2.5", [qw(lat lon sal)]);
1680             my %row = $sb_file->make_data_hash("1.5,2,2.5", [$sb_file->fields()]);
1681             my %row = $sb_file->make_data_hash("1.5,2,2.5", [$sb_file->actual_fields()]);
1682              
1683             For mostly internal use. This function parses a data line. It first splits
1684             the data via the delimiter, assigns a field to each value, and returns a hash
1685             or hash reference.
1686              
1687             If C<@field_list> is not set, C<< $sb_file->fields() >> is used.
1688              
1689             If a delimiter is not set (a blank file was created, a file without a
1690             /delimiter header is read, etc), the delimiter is guessed and set using
1691             L.
1692              
1693             Cs if the delimiter could not be guessed or the number of fields the
1694             line is split into does not match up with the field list.
1695              
1696             =cut
1697              
1698             sub make_data_hash {
1699 266     266 1 690 my ( $self, $line, $field_list ) = @_;
1700 266 50 66     912 if ( !$self->{'delim'} && !$self->guess_delim($line) ) {
1701 0         0 croak("Need a delimiter");
1702             }
1703 266         2420 my @values = split( $self->{'delim'}, $line );
1704 266   33     1450 $field_list ||= $self->{'fields'};
1705              
1706 266         437 my ( $num_expected, $num_got ) = ( scalar( @{ $self->{'fields'} } ), scalar(@values) );
  266         653  
1707 266 50       740 if ( $num_expected != $num_got ) {
1708 0         0 croak("Incorrect number of fields or elements: got $num_got, expected $num_expected");
1709             }
1710              
1711 266         458 my %ret;
1712              
1713 266         1002 my $iterator = each_arrayref( $field_list, \@values );
1714 266         5859 while ( my ( $k, $v ) = $iterator->() ) {
1715 1869 100       17906 if ( $self->{'options'}{'missing_data_to_undef'} ) {
1716 958 100 100     15060 if ( $self->{'missing_is_number'} && looks_like_number($v) && $v == $self->{'missing'} ) {
    100 100        
    100 100        
    100 100        
      100        
      100        
      100        
      100        
      33        
      66        
      66        
1717 68         238 $ret{$k} = undef;
1718             } elsif ( !$self->{'options'}{'preserve_detection_limits'} && $self->{'adl_is_number'} && looks_like_number($v) && $v == $self->{'above_detection_limit'} ){
1719 1         3 $ret{$k} = undef;
1720             } elsif ( !$self->{'options'}{'preserve_detection_limits'} && $self->{'bdl_is_number'} && looks_like_number($v) && $v == $self->{'below_detection_limit'} ){
1721 1         3 $ret{$k} = undef;
1722             } elsif ($v eq $self->{'missing'} || (!$self->{'options'}{'preserve_detection_limits'} && ($v eq $self->{'below_detection_limit'} || $v eq $self->{'above_detection_limit'}))) {
1723 8         25 $ret{$k} = undef;
1724             } else {
1725 880         3059 $ret{$k} = $v;
1726             }
1727             } else {
1728 911         2449 $ret{$k} = $v;
1729             }
1730             } ## end while (my ($k, $v) = $iterator...)
1731 266         2170 $self->add_and_remove_fields( \%ret );
1732 266 50       626 if (wantarray) {
1733 0         0 return %ret;
1734             } else {
1735 266         1795 return \%ret;
1736             }
1737             } ## end sub make_data_hash
1738              
1739             =head2 AUTOLOAD
1740              
1741             print $sb_file->missing();
1742             print $sb_file->dataidx();
1743             print $sb_file->actual_fields();
1744             ...
1745              
1746             Returns a few internal variables. The accessor is read only, but some
1747             variables can be returned as a reference, and can be modified afterwards.
1748             Though, do it knowing this is a terrible idea.
1749              
1750             If the variable retrieved is an array or hash reference and this is called in a
1751             list context, the variable is dereferenced first.
1752              
1753             Here are a few "useful" variables:
1754              
1755             =over 4
1756              
1757             =item * dataidx
1758              
1759             The current row index.
1760              
1761             =item * max_dataidx
1762              
1763             The highest row index read so far.
1764              
1765             =item * fields
1766              
1767             An array of the original fields.
1768              
1769             =item * actual_fields
1770              
1771             An array of the current fields, as modified by C or C.
1772              
1773             =item * delim
1774              
1775             The regex used to split data lines.
1776              
1777             =item * missing
1778              
1779             The null/fill/missing value of the SeaBASS file.
1780              
1781             =item * delim
1782              
1783             The current line delimiter regex.
1784              
1785             =back
1786              
1787             =cut
1788              
1789             sub AUTOLOAD {
1790 130     130   307 my $self = shift;
1791 130 50       303 if ( !ref($self) ) {
1792 0         0 croak("$self is not an object");
1793             }
1794              
1795 130         235 my $name = $AUTOLOAD;
1796 130 50       309 if ($name) {
1797 130         449 $name =~ s/.*://;
1798 130         256 my $value = $self->{$name};
1799 130 100       298 if ( !defined($value) ) {
1800 1         7 return;
1801             }
1802 129 100 66     528 if ( ref($value) eq 'ARRAY' && wantarray ) {
    50 33        
1803 125         195 return @{$value};
  125         483  
1804             } elsif ( ref($value) eq 'HASH' && wantarray ) {
1805 0         0 return %{$value};
  0         0  
1806             }
1807 4         28 return $value;
1808             } ## end if ($name)
1809             } ## end sub AUTOLOAD
1810              
1811             sub DESTROY {
1812 138     138   85238 my $self = shift;
1813 138         540 $self->close();
1814             }
1815              
1816             =head1 INTERNAL METHODS
1817              
1818             =head2 check_options()
1819              
1820             For internal use only. This function is in charge of checking the options to
1821             make sure they are of the right type (array/hash reference where appropriate).
1822              
1823             If add_empty_headers is set, this function turns off C.
1824              
1825             Called by the object, accepts no arguments.
1826              
1827             =cut
1828              
1829             #<<< perltidy destroys this function
1830             sub check_options {
1831 137     137 1 296 my $self = shift;
1832 137         328 while (my ($k, $v) = each(%{$self->{'options'}})) {
  1885         11668  
1833 1752 100       5720 if (!exists($DEFAULT_OPTIONS{$k})) {
    100          
1834 2         208 croak("Option not understood: $k");
1835 2012     2012   16473 } elsif ((firstidx { $_ eq ref($v) } @{$OPTION_TYPES{$k}}) < 0) {
  1750         5216  
1836 2         11 my $expected_ref = join('/', @{$OPTION_TYPES{$k}});
  2         6  
1837 2 100       203 croak("Option $k not of the right type, expected: " . ($expected_ref ? "$expected_ref reference" : 'scalar'));
1838             } ## end elsif ((firstidx { $_ eq ...}))
1839             } ## end while (my ($k, $v) = each...)
1840 133 100       516 if ($self->{'options'}{'add_empty_headers'}) {
1841 1         5 $self->{'options'}{'strict'} &= STRICT_READ;
1842             }
1843             } ## end sub check_options
1844             #>>>
1845              
1846             =head2 create_blank_file()
1847              
1848             For internal use only. C populates the object with proper
1849             internal variables, as well as adding blank headers if C is
1850             set.
1851              
1852             By default, the missing value is set to C<$DEFAULT_MISSING> (C<-999>).
1853              
1854             This function turns on the C option, as C must be enabled to
1855             write.
1856              
1857             The delimiter is left undefined and will be guessed upon reading the first data
1858             line using the L function.
1859              
1860             Called by the object, accepts no arguments.
1861              
1862             =cut
1863              
1864             sub create_blank_file {
1865 4     4 1 11 my ($self) = @_;
1866 4         14 $self->{'actual_fields'} = [];
1867 4         14 $self->{'actual_units'} = [];
1868 4         13 $self->{'fields'} = [];
1869 4         11 $self->{'units'} = [];
1870 4         12 $self->{'headers'} = {};
1871 4         11 $self->{'comments'} = [];
1872 4         15 $self->{'data'} = [];
1873 4         12 $self->{'dataidx'} = -1;
1874 4         12 $self->{'max_dataidx'} = -1;
1875 4         12 $self->{'delim'} = undef;
1876 4         12 $self->{'missing'} = $DEFAULT_MISSING;
1877 4         12 $self->{'below_detection_limit'} = $DEFAULT_BDL;
1878 4         11 $self->{'above_detection_limit'} = $DEFAULT_ADL;
1879              
1880 4         10 $self->{'options'}{'cache'} = 1;
1881 4         11 $self->{'options'}{'fill_ancillary_data'} = 0;
1882              
1883 4 100       15 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
1884 4 100       18 if ($self->{'options'}{'add_empty_headers'}) {
1885 1         5 foreach (@ALL_HEADERS) {
1886 33 50       81 if ( !exists( $self->{'headers'}{"${slash}$_"} ) ) {
1887 33         69 $self->{'headers'}{"${slash}$_"} = '';
1888 33 100       74 if ( $_ eq 'missing' ) {
1889 1         4 $self->{'headers'}{"${slash}missing"} = $DEFAULT_MISSING;
1890             }
1891             } ## end if (!exists($self->{'headers'...}))
1892             } ## end foreach (@ALL_HEADERS)
1893             } ## end if ($add_missing_headers)
1894              
1895 4         7 my $success = 1;
1896 4 50       13 if ( $self->{'options'}{'default_headers'} ) {
1897 4         23 $success &= $self->add_headers( $self->{'options'}{'default_headers'} );
1898             }
1899 4 50       15 if ( $self->{'options'}{'headers'} ) {
1900 4         13 $success &= $self->add_headers( $self->{'options'}{'headers'} );
1901             }
1902 4 50       18 unless ($success) {
1903 0         0 croak("Error creating blank file.");
1904             }
1905             } ## end sub create_blank_file
1906              
1907             =head2 read_headers()
1908              
1909             For internal use only. C reads the metadata at the beginning of
1910             a SeaBASS file.
1911              
1912             Called by the object, accepts no arguments.
1913              
1914             =cut
1915              
1916             sub read_headers {
1917 128     128 1 377 my $self = shift;
1918              
1919 128 50       623 if ( $self->{'headers'} ) {
1920 0         0 return;
1921             }
1922              
1923 128 100       438 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
1924 128         255 my $success = 1;
1925 128         230 my @comments;
1926              
1927 128         326 $self->{'headers'} = {};
1928 128         359 $self->{'comments'} = [];
1929              
1930 128 50       454 if ( $self->{'options'}{'default_headers'} ) {
1931 128         509 $success &= $self->add_headers( $self->{'options'}{'default_headers'} );
1932             }
1933              
1934 128         365 my $handle = $self->{'handle'};
1935 128         275 my $position = my $line_number = 0;
1936 128         235 my @header_lines;
1937              
1938 128         272 my $strict = $self->{'options'}{'strict'};
1939              
1940 128         758 while ( my $line = <$handle> ) {
1941 3263         4932 $line_number++;
1942 3263         7193 strip($line);
1943 3263 50       7829 if ($line) {
1944 3263 100       11139 if ( $line =~ m'^(/end_header)\@?$'i ) {
    100          
    100          
1945 113         425 push( @header_lines, $1 );
1946 113         258 $position = tell($handle);
1947 113         279 last;
1948             } elsif ( $line =~ m"^/" ) {
1949 2439         4745 push( @header_lines, $line );
1950             } elsif ( $line =~ m"^!" ) {
1951 696         1318 push( @comments, $line );
1952 696 100       1711 if ( $self->{'options'}{'preserve_header'} ) {
1953 10         19 push( @header_lines, $line );
1954             }
1955             } else { #TODO: search ahead for more headers or comments (in case of merely comment missing !) and fail if READ_STRICT
1956 15         32 seek( $handle, $position, SEEK_SET );
1957 15 50       39 if ( $strict & STRICT_READ ) {
1958 0         0 carp("File missing /end_header or comment missing !, assuming data start: line #$line_number ($line)");
1959             }
1960 15         31 last;
1961             }
1962             } ## end if ($line)
1963 3135         9962 $position = tell($handle);
1964             } ## end while (my $line = <$handle>)
1965              
1966             # add_headers looks at STRICT_WRITE, not STRICT_READ
1967 128 100       404 if ( $strict & STRICT_READ ) {
1968 5         18 $self->{'options'}{'strict'} |= STRICT_WRITE;
1969             } else {
1970 123         394 $self->{'options'}{'strict'} = 0;
1971             }
1972              
1973 128 100       404 if ( $self->{'options'}{'preserve_header'} ) {
1974 1         5 $self->{'preserved_header'} = [@header_lines];
1975             }
1976              
1977 128         484 $success &= $self->add_headers( \@header_lines );
1978              
1979             # restore strictness
1980 128         330 $self->{'options'}{'strict'} = $strict;
1981              
1982 128 50       428 if ( $self->{'options'}{'headers'} ) {
1983 128         411 $success &= $self->add_headers( $self->{'options'}{'headers'} );
1984             }
1985              
1986 128         308 my %headers = %{ $self->{'headers'} };
  128         1686  
1987              
1988 128 100       579 if ( $self->{'options'}{'preserve_comments'} ) {
1989 127         230 push( @{ $self->{'comments'} }, @comments );
  127         524  
1990             }
1991              
1992 128   66     643 my $missing = $headers{"${slash}missing"} || $DEFAULT_MISSING;
1993 128 100       423 if ( $self->{'options'}{'delete_missing_headers'} ) {
1994 1         9 while ( my ( $k, $v ) = each(%headers) ) {
1995 31 100       222 if ( $k =~ m"/?(?:end|begin)_header$|^/?missing$" ) {
1996 3         17 next;
1997             }
1998 28 100 66     314 if ( !defined($v) || $v =~ m"^n/?a(?:\[.*?\])?$"i || lc($v) eq lc($missing) ) {
      66        
1999 7         41 delete( $headers{$k} );
2000             }
2001             } ## end while (my ($k, $v) = each...)
2002             } ## end if ($self->{'options'}...)
2003              
2004 128 100       369 if ( $strict & STRICT_READ ) {
2005 5         13 foreach (@REQUIRED_HEADERS) {
2006 110 100       3549 if ( !exists( $headers{"${slash}$_"} ) ) {
2007 58         96 $success = 0;
2008 58         4966 carp("Missing required header: $_");
2009             }
2010             } ## end foreach (@REQUIRED_HEADERS)
2011 5         35 while ( my ( $header, $value ) = each(%headers) ) {
2012 72 50       480 if ($slash) {
2013 0         0 $header =~ s"^/"";
2014             }
2015 72 50 33 504   278 if ( ( firstidx { $_ eq $header } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $header } @HIDDEN_HEADERS ) < 0 ) {
  1312         6772  
  0         0  
2016 0         0 $success = 0;
2017 0         0 carp("$header not a standard header.");
2018             }
2019             } ## end while (my ($header, $value...))
2020 5 50 33     78 if ( $headers{"${slash}begin_header"} || $headers{"${slash}end_header"} ) {
2021 0         0 $success = 0;
2022 0         0 carp("begin_ or end_header incorrect");
2023             }
2024             } ## end if ($strict & STRICT_READ)
2025 128         347 foreach (@ABSOLUTELY_REQUIRED_HEADERS) {
2026 128 100       570 if ( !exists( $headers{"${slash}$_"} ) ) {
2027 1         2 $success = 0;
2028 1 50       4 if ( $strict & STRICT_READ ) {
2029 1         94 carp("Missing absolutely required header: $_");
2030             }
2031             }
2032             } ## end foreach (@ABSOLUTELY_REQUIRED_HEADERS)
2033              
2034 128   100     1767 $self->{'fields'} = [ split( /\s*,\s*/, $headers{"${slash}fields"} || '' ) ];
2035 128   100     1398 $self->{'actual_fields'} = [ split( /\s*,\s*/, $headers{"${slash}fields"} || '' ) ];
2036              
2037 128 100       553 if ( $headers{"${slash}units"} ) {
2038 79         717 $self->{'units'} = [ split( /\s*,\s*/, $headers{"${slash}units"} ) ];
2039 79         798 $self->{'actual_units'} = [ split( /\s*,\s*/, $headers{"${slash}units"} ) ];
2040             } else {
2041 49         129 my (@new_units1);
2042 49         88 foreach ( @{ $self->{'fields'} } ) {
  49         124  
2043 345         625 push( @new_units1, 'unitless' );
2044             }
2045 49         183 my @new_units2 = @new_units1;
2046 49         125 $self->{'units'} = \@new_units1;
2047 49         126 $self->{'actual_units'} = \@new_units2;
2048 49         221 $headers{"${slash}units"} = join( ',', @new_units1 );
2049             } ## end else [ if ($headers{"${slash}units"...})]
2050              
2051 128 100       299 if ( @{$self->{'fields'}} != @{$self->{'units'}} ) {
  128         328  
  128         430  
2052 1 50       3 if ( $strict & STRICT_READ ) {
2053 1         82 carp("/fields and /units don't match up");
2054 1         53 $success = 0;
2055             } else {
2056 0         0 while (@{$self->{'fields'}} > @{$self->{'units'}}){
  0         0  
  0         0  
2057 0         0 push(@{$self->{'units'}}, 'unitless');
  0         0  
2058             }
2059 0         0 while (@{$self->{'fields'}} < @{$self->{'units'}}){
  0         0  
  0         0  
2060 0         0 pop(@{$self->{'units'}});
  0         0  
2061             }
2062             }
2063             }
2064              
2065 128 100       406 unless ($success) {
2066 4 50       21 if ( $strict & STRICT_READ ) {
2067 4         431 croak("Error(s) reading SeaBASS file");
2068             } else {
2069 0         0 return;
2070             }
2071             }
2072              
2073 124         269 $self->{'missing'} = $missing;
2074 124   66     797 $self->{'below_detection_limit'} = $headers{"${slash}below_detection_limit"} || $missing;
2075 124   66     700 $self->{'above_detection_limit'} = $headers{"${slash}above_detection_limit"} || $missing;
2076 124         301 $self->{'line_number'} = $line_number;
2077 124         286 $self->{'data_start_line'} = $line_number;
2078 124         359 $self->{'data_start_position'} = $position;
2079              
2080 124 100       364 if ( $self->{'options'}{'cache'} ) {
2081 114         329 $self->{'data'} = [];
2082             }
2083 124         348 $self->{'dataidx'} = -1;
2084 124         273 $self->{'max_dataidx'} = -1;
2085              
2086 124         287 $self->{'headers'} = \%headers;
2087              
2088 124 100       706 if ( $self->{'options'}{'fill_ancillary_data'} ) {
2089 16         31 my @fields_lc = map {lc} @{ $self->{'fields'} };
  110         234  
  16         80  
2090 16         43 $self->{'fields_lc'} = \@fields_lc;
2091 16         36 $self->{'ancillary'} = {};
2092              
2093 16         30 foreach my $field (@FILL_ANCILLARY_DATA) {
2094 208         412 $self->find_ancillaries($field);
2095             }
2096              
2097 16         43 $self->{'case_conversion'} = {};
2098              
2099 16         31 while ( my ( $field, $value ) = each( %{ $self->{'ancillary'} } ) ) {
  159         506  
2100 143     801   399 my $idx = firstidx { $_ eq $field } @{ $self->{'fields_lc'} };
  801         4584  
  143         373  
2101 143         545 my $new_field = $field;
2102 143 100       329 if ( $idx >= 0 ) {
2103 77         147 $new_field = $self->{'fields'}[$idx];
2104             }
2105              
2106 143 100       290 if ( ref($value) ) {
2107 29         70 for ( my $i = 1; $i < @$value; $i++ ) {
2108 52         99 my $new_arg = $value->[$i];
2109 52         184 for ( $value->[$i] =~ /\$(\{\w+\}|\w+)/g ) {
2110 37         125 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2111              
2112 37     196   117 my $idx = firstidx { $_ eq $variable } @{ $self->{'fields_lc'} };
  196         1062  
  37         96  
2113 37         134 my $new_variable = $self->{'fields'}[$idx];
2114              
2115 37         327 $new_arg =~ s/\$$variable(\W|\b|$)/\$$new_variable$1/g;
2116 37         289 $new_arg =~ s/\$\{$variable\}/\$$new_variable/g;
2117             } ## end for ($value->[$i] =~ /\$(\{\w+\}|\w+)/g)
2118 52         179 $value->[$i] = $new_arg;
2119             } ## end for (my $i = 1; $i <= length...)
2120             } else {
2121 114         162 my $new_value = $value;
2122 114         456 for ( $value =~ /\$(\{\w+\}|\w+)/g ) {
2123 151         586 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2124              
2125 151     740   433 my $idx = firstidx { $_ eq $variable } @{ $self->{'fields_lc'} };
  740         3980  
  151         385  
2126 151 100       600 if ( $idx >= 0 ) {
2127 145         253 my $new_variable = $self->{'fields'}[$idx];
2128              
2129 145         1349 $new_value =~ s/\$$variable(\W|\b|$)/\$$new_variable$1/g;
2130 145         1223 $new_value =~ s/\$\{$variable\}/\$$new_variable/g;
2131             } ## end if ($idx >= 0)
2132             } ## end for ($value =~ /\$(\{\w+\}|\w+)/g)
2133 114         270 $value = $new_value;
2134             } ## end else [ if (ref($value)) ]
2135 143 50       319 if ( $field ne $new_field ) {
2136 0         0 delete( $self->{'ancillary'}{$field} );
2137 0         0 $self->{'case_conversion'}{$field} = $new_field;
2138             }
2139 143         360 $self->{'ancillary'}{$new_field} = $value;
2140             } ## end while (my ($field, $value...))
2141              
2142 16         50 delete( $self->{'fields_lc'} );
2143             } ## end if ($self->{'options'}...)
2144              
2145 124         775 return 1;
2146             } ## end sub read_headers
2147              
2148             =head2 validate_header($header, $value, $strict)
2149              
2150             my ($k, $v, $string) = ('investigators','jason_lefler',0)
2151             $sb_file->validate_header($k, $v, $strict);
2152              
2153             For internal use only. C is in charge of properly formatting
2154             key/value pairs to add to the object. This function will modify the input
2155             variables in place to prepare them for use.
2156              
2157             Returns false if there was a problem with the inputs, such as C is set
2158             and an invalid header was passed in.
2159              
2160             C will set C to C<$DEFAULT_MISSING> (C<-999>) if it
2161             is blank or undefined.
2162              
2163             This function will also change the expected delimiter for rows that have not
2164             yet been cached.
2165              
2166             =cut
2167              
2168             sub validate_header {
2169 2566     2566 1 5045 my ( $self, $k, $v, $strict ) = @_;
2170              
2171 2566         3783 my $success = 1;
2172              
2173 2566 100       5082 if ( !defined($v) ) {
2174 201         412 $v = '';
2175             } else {
2176 2365         4512 strip($v);
2177             }
2178              
2179 2566         5768 strip($k);
2180              
2181 2566         5086 $k = lc($k);
2182              
2183 2566 50 66     7400 if ( length($v) == 0 && $k !~ /_header/ ) {
2184 0 0       0 if ($strict) {
2185 0         0 carp("$k missing value");
2186 0         0 $success = 0;
2187             } else {
2188 0         0 $v = "";
2189             }
2190             } ## end if (length($v) == 0 &&...)
2191              
2192 2566 100 100     11029 if ( !$self->{'options'}{'preserve_case'} || $k =~ /fields|units/ ) {
2193 1308         2300 $v = lc($v);
2194             }
2195              
2196 2566 100       5420 if ( $self->{'options'}{'keep_slashes'} ) {
2197 139 100       344 if ( $k =~ m"^[^/]" ) {
2198 2         6 $k = "/$k";
2199             }
2200              
2201 139 50 66 12   344 if ( $strict && ( firstidx { "/$_" eq $k } @ALL_HEADERS ) < 0 && ( firstidx { "/$_" eq $k } @HIDDEN_HEADERS ) < 0 ) {
  12   33     96  
  0         0  
2202 0         0 carp("Invalid header, $k");
2203 0         0 $success = 0;
2204             }
2205             } else {
2206 2427 100       7209 if ( $k =~ m"^/" ) {
2207 2425         6649 $k =~ s"^/"";
2208             }
2209              
2210 2427 50 66 520   6676 if ( $strict && ( firstidx { $_ eq $k } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $k } @HIDDEN_HEADERS ) < 0 ) {
  1328   33     7419  
  0         0  
2211 0         0 carp("Invalid header, $k");
2212 0         0 $success = 0;
2213             }
2214             } ## end else [ if ($self->{'options'}...)]
2215              
2216 2566 100       12089 if ( $k =~ /_latitude|_longitude/){
    100          
    100          
    100          
    100          
    100          
2217 308         883 $v =~ s/\[deg\]$//i;
2218             } elsif ( $k =~ /_time/){
2219 158         505 $v =~ s/\[gmt\]$//i;
2220             } elsif ( $k =~ m"^/?delimiter$" ) {
2221 90 100       364 unless ( $self->set_delim( $strict, $v ) ) {
2222 1 50       3 if ($strict) {
2223 0         0 $success = 0;
2224             }
2225             }
2226             } elsif ( $k =~ m"^/?missing$" ) {
2227 111 50       483 $self->{'missing'} = ( length($v) ? $v : $DEFAULT_MISSING );
2228 111         814 $self->{'missing_is_number'} = looks_like_number( $self->{'missing'} );
2229             } elsif ( $k =~ m"^/?above_detection_limit" ) {
2230 4 50       17 $self->{'above_detection_limit'} = ( length($v) ? $v : $self->{'missing'} );
2231 4         16 $self->{'adl_is_number'} = looks_like_number( $self->{'above_detection_limit'} );
2232             } elsif ( $k =~ m"^/?below_detection_limit" ) {
2233 4 50       14 $self->{'below_detection_limit'} = ( length($v) ? $v : $self->{'missing'} );
2234 4         16 $self->{'bdl_is_number'} = looks_like_number( $self->{'below_detection_limit'} );
2235             }
2236              
2237 2566         4633 $_[1] = $k;
2238 2566         4630 $_[2] = $v;
2239              
2240 2566         4826 return $success;
2241             } ## end sub validate_header
2242              
2243             =head2 set_delim($strict, $delim)
2244              
2245             Takes a string declaring the delim (IE: 'comma', 'space', etc) and updates the
2246             object's internal delimiter regex.
2247              
2248             =cut
2249              
2250             sub set_delim {
2251 90     90 1 211 my $self = shift;
2252 90         167 my $strict = shift;
2253 90   50     334 my $delim = shift || '';
2254 90 50       457 if ( $delim eq 'comma' ) {
    50          
    100          
    50          
    100          
2255 0         0 $delim = qr/\s*,\s*/;
2256             } elsif ( $delim eq 'semicolon' ) {
2257 0         0 $delim = qr/\s*;\s*/;
2258             } elsif ( $delim eq 'space' ) {
2259 88         619 $delim = qr/\s+/;
2260             } elsif ( $delim eq 'tab' ) {
2261 0         0 $delim = qr/\t/;
2262             } elsif ($strict) {
2263 1         184 carp("delimiter not understood");
2264             } else {
2265 1 50       5 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
2266 1         3 $self->{'headers'}{"${slash}delimiter"} = 'comma';
2267 1         3 $delim = undef;
2268             }
2269 90         388 $self->{'delim'} = $delim;
2270 90 100       445 return ( $delim ? 1 : 0 );
2271             } ## end sub set_delim
2272              
2273             =head2 update_fields()
2274              
2275             C runs through the currently cached rows and calls
2276             C on each row. It then updates the /fields and /units
2277             headers in the header hash.
2278              
2279             =cut
2280              
2281             sub update_fields {
2282 28     28 1 65 my ($self) = @_;
2283 28 100 66     177 if ( $self->{'options'}{'cache'} && $self->{'max_dataidx'} >= 0 ) {
2284 4         10 foreach my $hash ( @{ $self->{'data'} } ) {
  4         19  
2285 9         22 $self->add_and_remove_fields($hash);
2286             }
2287             }
2288              
2289 28 100       102 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
2290 28         54 $self->{'headers'}{"${slash}fields"} = join( ',', @{ $self->{'actual_fields'} } );
  28         126  
2291 28         55 $self->{'headers'}{"${slash}units"} = join( ',', @{ $self->{'actual_units'} } );
  28         123  
2292             } ## end sub update_fields
2293              
2294             =head2 add_and_remove_fields(\%row)
2295              
2296             Given a reference to a row, this function deletes any fields removed with
2297             C and adds an undefined or /missing value for each field added
2298             via C. If C is set, an undefined value is
2299             given, otherwise, it is filled with the /missing value.
2300              
2301             If C is set, this function adds missing date, time,
2302             date_time, lat, lon, and depth fields to the retrieved row from the header.
2303              
2304             Needlessly returns the hash reference passed in.
2305              
2306             =cut
2307              
2308             sub add_and_remove_fields {
2309 277     277 1 595 my ( $self, $hash ) = @_;
2310 277         1045 foreach my $field ( keys(%$hash) ) {
2311 1916 100   7814   9496 if ( ( firstidx { $_ eq $field } @{ $self->{'actual_fields'} } ) < 0 ) {
  7814         45961  
  1916         5241  
2312 21 50 33 0   136 unless ( $self->{'options'}{'fill_ancillary_data'} && ( firstidx { $_ eq $field } keys( %{ $self->{'ancillary'} } ) ) >= 0 ) {
  0         0  
  0         0  
2313 21         68 delete( $hash->{$field} );
2314             }
2315             }
2316             } ## end foreach my $field (keys(%$hash...))
2317              
2318 277 100       1692 my $missing = ( $self->{'options'}{'missing_data_to_undef'} ? undef : $self->{'missing'} );
2319 277         1103 while ( my ( $variable, $pad ) = each(%FIELD_FORMATTING) ) {
2320 2216   33     7208 my $case_var = $self->{'case_conversion'}{$variable} || $variable;
2321 2216 50 66     7892 if ( defined( $hash->{$case_var} ) && ( !defined($missing) || $hash->{$case_var} != $missing ) ) {
      100        
2322 57         245 $hash->{$case_var} = sprintf( $pad, $hash->{$case_var} );
2323             }
2324             } ## end while (my ($variable, $pad...))
2325              
2326 277 100       718 if ( defined($self->{'ancillary'}) ) {
2327 19         39 $self->{'ancillary_tmp'} = {};
2328 19         43 for my $variable (@FILL_ANCILLARY_DATA) {
2329 247 100       549 if ( defined($self->{'ancillary'}{$variable}) ) {
2330 173         351 my $value = $self->extrapolate_variables( $missing, $self->{'ancillary'}{$variable}, $hash );
2331 173 50       395 if ( defined($value) ) {
2332 173         391 $hash->{$variable} = $value;
2333             }
2334             } ## end if ($self->{'ancillary'...})
2335             } ## end for my $variable (@FILL_ANCILLARY_DATA)
2336             } ## end if ($self->{'ancillary'...})
2337              
2338 277         428 foreach my $field ( @{ $self->{'actual_fields'} } ) {
  277         645  
2339 1972 100       4434 if ( !exists( $hash->{$field} ) ) {
2340 61         166 $hash->{$field} = $missing;
2341             }
2342             }
2343              
2344 277         528 return $hash;
2345             } ## end sub add_and_remove_fields
2346              
2347             =head2 guess_delim($line)
2348              
2349             C is is used to guess the delimiter of a line. It is not very
2350             intelligent. If it sees any commas, it will assume the delimiter is a comma.
2351             Then, it checks for tabs, spaces, then semi-colons. Returns 1 on success. If
2352             it doesn't find any, it will throw a warning and return undef.
2353              
2354             =cut
2355              
2356             sub guess_delim {
2357 35     35 1 96 my ( $self, $line ) = @_;
2358 35         71 my $delim_string = '';
2359 35 100       253 if ( $line =~ /,/ ) {
    50          
    50          
    0          
2360 2         12 my $delim = qr/\s*,\s*/;
2361 2         5 $self->{'delim'} = $delim;
2362 2         4 $delim_string = 'comma';
2363             } elsif ( $line =~ /\t/ ) {
2364 0         0 my $delim = qr/\t/;
2365 0         0 $self->{'delim'} = $delim;
2366 0         0 $delim_string = 'tab';
2367             } elsif ( $line =~ /\s+/ ) {
2368 33         204 my $delim = qr/\s+/;
2369 33         87 $self->{'delim'} = $delim;
2370 33         93 $delim_string = 'space';
2371             } elsif ( $line =~ /;/ ) {
2372 0         0 my $delim = qr/\s*;\s*/;
2373 0         0 $self->{'delim'} = $delim;
2374 0         0 $delim_string = 'semicolon';
2375             } else {
2376 0         0 carp("No delimiter defined or can be guessed");
2377 0         0 return;
2378             }
2379 35 100       148 $self->{'headers'}{ ( $self->{'options'}{'keep_slashes'} ? '/' : '' ) . 'delimiter' } = $delim_string;
2380 35         160 return 1;
2381             } ## end sub guess_delim
2382              
2383             =head2 ingest_row(\%data_row | \@data_row | $data_row | %data_row)
2384              
2385             For mostly internal use, parses arguments for C, C, and C
2386             and returns a hash or hash reference of the data row. Given a hash reference,
2387             it will merely return it.
2388              
2389             Given an array or array reference, it will assume each element is a field as
2390             listed in either C or C. If the number of elements
2391             matches C, it uses assumes it's that. If it doesn't match, it is
2392             tried to match against C. If it doesn't match either, a warning is
2393             issued and the return is undefined.
2394              
2395             Given a non-reference scalar, it will split the scalar based on the current
2396             delimiter. If one is not defined, it is guessed. If it cannot be guessed, the
2397             return is undefined.
2398              
2399             If the inputs are successfully parsed, all keys are turned lowercase.
2400              
2401             =cut
2402              
2403             sub ingest_row {
2404 27     27 1 54 my $self = shift;
2405 27         63 my %new_row;
2406 27 100       99 if ( $#_ < 0 ) {
2407 1         177 carp("Incorrect number of arguments to ingest_row()");
2408 1         138 return;
2409             }
2410 26         87 my $arrayref;
2411 26 100       110 if ( ref( $_[0] ) eq 'HASH' ) {
    100          
    50          
2412 14         39 %new_row = %{ shift(@_) };
  14         78  
2413             } elsif ( ref( $_[0] ) eq 'ARRAY' ) {
2414 3         6 $arrayref = $_[0];
2415             } elsif ( !ref( $_[0] ) ) {
2416 9 100       33 if ( $#_ == 0 ) {
    100          
2417 7 50 66     34 if ( !$self->{'delim'} && !$self->guess_delim( $_[0] ) ) {
2418 0         0 return;
2419             }
2420 7         67 $arrayref = [ split( $self->{'delim'}, $_[0] ) ];
2421             } elsif ( $#_ % 2 == 1 ) {
2422 1         5 %new_row = @_;
2423             } else {
2424 1         164 carp('Even sized list, scalar, or hash/array reference expected');
2425 1         146 return;
2426             }
2427             } else {
2428 0         0 carp("Arguments to ingest_row() not understood.");
2429 0         0 return;
2430             }
2431              
2432 25 100       94 if ($arrayref) {
2433 10         17 my $iterator;
2434 10 100       18 if ( scalar( @{ $self->{'actual_fields'} } ) == scalar( @{$arrayref} ) ) {
  10 50       21  
  10         30  
2435 8         32 $iterator = each_arrayref( $self->{'actual_fields'}, $arrayref );
2436 2         4 } elsif ( scalar( @{ $self->{'fields'} } ) == scalar( @{$arrayref} ) ) {
  2         5  
2437 2         7 $iterator = each_arrayref( $self->{'fields'}, $arrayref );
2438 2         43 $self->add_and_remove_fields( \%new_row );
2439             } else {
2440 0         0 my $actual_fields = scalar( @{ $self->{'actual_fields'} } );
  0         0  
2441 0         0 my $fields = scalar( @{ $self->{'fields'} } );
  0         0  
2442 0 0       0 if ( $actual_fields == $fields ) {
2443 0         0 carp("Invalid number of elements, expected $fields");
2444             } else {
2445 0         0 carp("Invalid number of elements, expected $actual_fields or $fields");
2446             }
2447 0         0 return;
2448             } ## end else [ if (scalar(@{$self->{'actual_fields'...}}))]
2449 10         185 while ( my ( $k, $v ) = $iterator->() ) {
2450 52         521 $new_row{$k} = $v;
2451             }
2452             } ## end if ($arrayref)
2453              
2454 25         174 %new_row = map { lc($_) => $new_row{$_} } keys %new_row;
  71         252  
2455              
2456 25 50       85 if (wantarray) {
2457 0         0 return %new_row;
2458             } else {
2459 25         75 return \%new_row;
2460             }
2461             } ## end sub ingest_row
2462              
2463             =head2 find_ancillaries($field_name)
2464              
2465             Used by C to traverse through a field's possible
2466             substitutes in C<%ANCILLARY> and try to find the most suitable replacement.
2467             Values of fields in C<%ANCILLARY> are array references, where each element is
2468             either:
2469              
2470             =over 4
2471              
2472             =item * a string of existing field names used to create the value
2473              
2474             =item * an array reference of the form [converter function, parsing regex
2475             (optional), arguments to converter, ... ]
2476              
2477             =item * a hash reference of the form { header => qr/parsing_regex/ }
2478              
2479             =back
2480              
2481             If the element is an array reference and an argument requires a field from the
2482             file, all arguments are parsed and the variables within them extrapolated, then
2483             the array is put into C<< $self->{'ancillary'} >>.
2484              
2485             If no value can be ascertained, it will not be added to the data rows.
2486              
2487             The value found is stored in C<< $self->{'ancillary'} >>. Returns 1 on
2488             success, 0 if the field cannot be filled in.
2489              
2490             =cut
2491              
2492             sub find_ancillaries {
2493 406     406 1 671 my ( $self, $field ) = @_;
2494 406 100       888 if ( $self->{'ancillary'}{$field} ) {
2495 99         170 return 1;
2496             }
2497 307     1833   819 my $idx = firstidx { $_ eq $field } @{ $self->{'fields_lc'} };
  1833         9922  
  307         883  
2498 307 100       1496 if ( $idx >= 0 ) {
2499 77         209 $self->{'ancillary'}{$field} = "\$\{$field\}";
2500 77         150 return 1;
2501             }
2502              
2503 230 50       562 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
2504 230 100       301 foreach my $attempt ( @{ $ANCILLARY{$field} || [] } ) {
  230         618  
2505 416 100       1034 if ( ref($attempt) eq 'HASH' ) {
    100          
    50          
2506 284         381 keys( %{$attempt} ); #reset each() iterator between calls
  284         434  
2507 284         497 while ( my ( $where, $regex ) = each( %{$attempt} ) ) {
  541         1907  
2508 284 100 66     1178 if ( $where =~ /^\$/ ) {
    100 66        
2509 122 100   690   626 if ( ( firstidx { "\$$_" eq $where } $self->fields() ) >= 0 ) {
  690         3892  
2510 15     33   98 $self->{'ancillary'}{$field} = [ sub { return shift; }, $regex, $where ];
  33         69  
2511 15         55 return 1;
2512             }
2513             } elsif ( defined( $self->{'headers'}{"$slash$where"} ) && $self->{'headers'}{"$slash$where"} =~ $regex && lc($1) ne 'na' ) {
2514 12         34 $self->{'ancillary'}{$field} = $1;
2515 12         37 return 1;
2516             }
2517             } ## end while (my ($where, $regex...))
2518             } elsif ( ref($attempt) eq 'ARRAY' ) {
2519 89         192 my @attempt = @$attempt;
2520 89         138 my $function = shift(@attempt);
2521 89         123 my $regex;
2522 89 100       208 if ( ref( $attempt[0] ) eq 'Regexp' ) {
2523 41         57 $regex = shift(@attempt);
2524             }
2525 89         122 my $success = 1;
2526 89         124 my @args;
2527 89         135 foreach my $argument (@attempt) {
2528 89         133 my $tmparg = $argument;
2529 89         427 for ( $argument =~ /\$(\{\w+\}|\w+)/g ) {
2530 115         321 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2531 115         250 $success &= $self->find_ancillaries($variable);
2532 115 100       219 if ($success) {
2533 40 50       84 if ( ref( $self->{'ancillary'}{$variable} ) ) {
2534 0         0 $tmparg =~ s/\$$variable(\W|\b|$)/\$\{$variable\}$1/g;
2535             } else {
2536 40         67 my $value = $self->{'ancillary'}{$variable};
2537 40         500 $tmparg =~ s/\$$variable(\W|\b|$)/$value$1/g;
2538 40         282 $tmparg =~ s/\$\{$variable\}/$value/g;
2539             }
2540             } else {
2541 75         120 last;
2542             }
2543             } ## end for ($argument =~ /\$(\{\w+\}|\w+)/g)
2544 89         210 push( @args, $tmparg );
2545             } ## end foreach my $argument (@attempt)
2546 89 100       225 if ($success) {
2547 14 100       32 if ($regex) {
2548 8         18 unshift( @args, $regex );
2549             }
2550 14         44 $self->{'ancillary'}{$field} = [ $function, @args ];
2551 14         39 return 1;
2552             } ## end if ($success)
2553             } elsif ( !ref($attempt) ) {
2554 43         67 my $success = 1;
2555 43         63 my $tmparg = $attempt;
2556 43         213 for ( $attempt =~ /\$(\{\w+\}|\w+)/g ) {
2557 83         241 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2558 83         177 $success &= $self->find_ancillaries($variable);
2559 83 100       208 if ($success) {
2560 65 100       141 if ( ref( $self->{'ancillary'}{$variable} ) ) {
2561 6         50 $tmparg =~ s/\$$variable(\W|\b|$)/\$\{$variable\}$1/g;
2562             } else {
2563 59         95 my $value = $self->{'ancillary'}{$variable};
2564 59         896 $tmparg =~ s/\$$variable(\W|\b|$)/$value$1/g;
2565 59         528 $tmparg =~ s/\$\{$variable\}/$value/g;
2566             }
2567             } else {
2568 18         27 last;
2569             }
2570             } ## end for ($attempt =~ /\$(\{\w+\}|\w+)/g)
2571 43 100       113 if ($success) {
2572 25         62 $self->{'ancillary'}{$field} = $tmparg;
2573 25         61 return 1;
2574             }
2575             } ## end elsif (!ref($attempt))
2576             } ## end foreach my $attempt (@{$ANCILLARY...})
2577              
2578 164         284 return 0;
2579             } ## end sub find_ancillaries
2580              
2581             =head2 extrapolate_variables($missing, $expression, \%row)
2582              
2583             Used by C to convert a parsed ancillary string, such as
2584             C<'$year$month$day'>, into a real value using the fields from the C<\%row>.
2585             C<$expression>s are strings figured out by C and stored in
2586             C<< $self->{'ancillary'} >>.
2587              
2588             The return is undefined if a value cannot be created (IE: a required field is
2589             missing).
2590              
2591             =cut
2592              
2593             sub extrapolate_variables {
2594 220     220 1 410 my ( $self, $missing, $expression, $row ) = @_;
2595              
2596 220 100       418 if ( ref($expression) ) {
2597 47         100 return $self->extrapolate_function( $missing, $expression, $row );
2598             } else {
2599 173         256 my $tmpexpr = $expression;
2600 173         651 for ( $expression =~ /\$(\{\w+\}|\w+)/g ) {
2601 221         593 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2602 221         321 my $value;
2603 221 100 66     746 if ( $self->{'ancillary_tmp'}{$variable} ) {
    50 33        
    0          
2604 123         210 $value = $self->{'ancillary_tmp'}{$variable};
2605             } elsif ( defined( $row->{$variable} ) && ( !defined($missing) || $row->{$variable} != $missing ) ) {
2606 98         173 $value = $row->{$variable};
2607 98         188 $self->{'ancillary_tmp'}{$variable} = $value;
2608             } elsif ( ref( $self->{'ancillary'}{$variable} ) ) {
2609 0         0 $value = $self->extrapolate_function( $missing, $self->{'ancillary'}{$variable}, $row );
2610 0 0       0 if ( !defined($value) ) {
2611 0         0 return;
2612             }
2613             } else {
2614 0         0 return;
2615             }
2616              
2617 221         2304 $tmpexpr =~ s/\$$variable(\W|\b|$)/$value$1/g;
2618 221         1466 $tmpexpr =~ s/\$\{$variable\}/$value/g;
2619             } ## end for ($expression =~ /\$(\{\w+\}|\w+)/g)
2620 173         466 return $tmpexpr;
2621             } ## end else [ if (ref($expression)) ]
2622             } ## end sub extrapolate_variables
2623              
2624             =head2 extrapolate_function($missing, $expression, \%row)
2625              
2626             If the value stored in C<< $self->{'ancillary'} >> is an array reference, this
2627             function uses the array to create an actual value. See
2628             L for an explanation of the
2629             array.
2630              
2631             =cut
2632              
2633             sub extrapolate_function {
2634 47     47 1 83 my ( $self, $missing, $expression, $row ) = @_;
2635 47         75 my $value;
2636 47         114 my ( $function, @args ) = @$expression;
2637 47         65 my $regex;
2638              
2639 47 100       119 if ( ref( $args[0] ) eq 'Regexp' ) {
2640 41         72 $regex = shift(@args);
2641             }
2642 47         83 for (@args) {
2643 47         106 $_ = $self->extrapolate_variables( $missing, $_, $row );
2644 47 50       128 if ( !defined($_) ) {
2645 0         0 return;
2646             }
2647             } ## end for (@args)
2648 47         108 $value = &$function(@args);
2649 47 100       113 if ($regex) {
2650 41 50       204 if ( $value =~ $regex ) {
2651 41         96 $value = $1;
2652             }
2653             }
2654              
2655 47         122 return $value;
2656             } ## end sub extrapolate_function
2657              
2658             =head1 STATIC METHODS
2659              
2660             =head2 strip(@list)
2661              
2662             my @space_filled_lines = (' line1 ', ' line2', 'line3 ', 'line4');
2663             strip(@space_filled_lines);
2664             print @space_filled_lines; #line1line2line3line4
2665              
2666             Runs through the list and removes leading and trailing whitespace. All changes
2667             are made in place.
2668              
2669             It is literally this:
2670              
2671             sub strip {
2672             s/^\s+|\s+$//g for @_;
2673             }
2674              
2675             =cut
2676              
2677             # Not an object method!
2678             sub strip {
2679 8460     8460 1 40951 s/^\s+|\s+$//g for @_;
2680             }
2681              
2682             =head2 julian_to_greg($yyyyjjj)
2683              
2684             Converts a date in the day of year format YYYYJJJ into YYYYMMDD. Returns the
2685             newly formatted string or undefined if the input does not match the required
2686             format.
2687              
2688             This uses the C function from C to do the heavy
2689             lifting.
2690              
2691             =cut
2692              
2693             # Not an object method!
2694             sub julian_to_greg {
2695 14     14 1 27 my ($yyyyjjj) = @_;
2696 14 50       53 if ( $yyyyjjj =~ /^(\d{4})(\d{3})$/ ) {
2697 14         83 my ( $y, $m, $d ) = Add_Delta_Days( $1, 1, 1, $2 - 1 );
2698 14         57 return sprintf( '%04d%02d%02d', $y, $m, $d );
2699             }
2700 0           return;
2701             } ## end sub julian_to_greg
2702              
2703             =head1 CAVEATS/ODDITIES
2704              
2705             =head2 Duplicate Fields
2706              
2707             This class will not allow a field to be added to the object if a field of the
2708             same name already exists. If a file being read has duplicate field names, only
2709             the B one is used. No warning is issued. If C is used to
2710             remove it, only the first instance will be deleted. To delete all instances,
2711             use C<< $sb_file->remove_field($sb_file->find_fields('chl')) >>. This may
2712             change in future releases.
2713              
2714             =head2 Changing Delimiter or Missing Value
2715              
2716             Modifying the delimiter header on a file that is being read will cause any
2717             non-cached rows to be split by the new delimiter, which should break most/all
2718             files. If the delimiter must be changed, call C to cache all the rows,
2719             then change it. This will obviously not work if caching is turned off. The
2720             same is true for setting the missing value, but only really applies when the
2721             C option is used (same goes to below detection limit).
2722              
2723             =head2 Below Detection Limit
2724              
2725             Below detection limit is only partially supported. If C is
2726             used, fields equal to C will be set to C, as
2727             well. Files modified while using C will have all data
2728             equal to C written out set to the missing value instead
2729             of the below detection limit value. If the below detection limit value is equal
2730             to the missing value or C is used, the
2731             C header will not be written.
2732              
2733             =head1 AUTHOR
2734              
2735             Jason Lefler, C<< >>
2736              
2737             =head1 BUGS
2738              
2739             Please report any bugs or feature requests to C
2740             rt.cpan.org>, or through the web interface at
2741             L. I will be
2742             notified, and then you'll automatically be notified of progress on your bug as
2743             I make changes.
2744              
2745             =head1 SUPPORT
2746              
2747             You can find documentation for this module with the perldoc command.
2748              
2749             perldoc SeaBASS::File
2750              
2751             You can also look for information at:
2752              
2753             =over 4
2754              
2755             =item * RT: CPAN's request tracker (report bugs here)
2756              
2757             L
2758              
2759             =item * AnnoCPAN: Annotated CPAN documentation
2760              
2761             L
2762              
2763             =item * CPAN Ratings
2764              
2765             L
2766              
2767             =item * Search CPAN
2768              
2769             L
2770              
2771             =back
2772              
2773             =head1 LICENSE AND COPYRIGHT
2774              
2775             Copyright 2014 Jason Lefler.
2776              
2777             This program is free software; you can redistribute it and/or modify it under
2778             the terms of either: the GNU General Public License as published by the Free
2779             Software Foundation; or the Artistic License.
2780              
2781             See http://dev.perl.org/licenses/ for more information.
2782              
2783             =cut
2784              
2785             1; # End of SeaBASS::File