File Coverage

blib/lib/Data/SeaBASS.pm
Criterion Covered Total %
statement 907 1040 87.2
branch 471 592 79.5
condition 132 192 68.7
subroutine 69 75 92.0
pod 48 48 100.0
total 1627 1947 83.5


line stmt bran cond sub pod time code
1             package Data::SeaBASS;
2              
3 25     25   2249342 use strict;
  25         202  
  25         659  
4 25     25   125 use warnings;
  25         44  
  25         1449  
5              
6             =head1 NAME
7              
8             Data::SeaBASS - Object-oriented interface for reading/writing SeaBASS files
9              
10             =head1 VERSION
11              
12             version 0.172890
13              
14             =cut
15              
16             our $VERSION = '0.172890'; # VERSION
17              
18             =head1 SYNOPSIS
19              
20             To read SeaBASS files:
21              
22             use Data::SeaBASS qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END);
23              
24             my $sb_file = Data::SeaBASS->new("input.txt");
25              
26             # Calculate the average chlorophyll value using next
27             my $chl_total = 0;
28             my $measurements = 0;
29            
30             while (my $row = $sb_file->next()){
31             if (defined($row->{'chl'})){
32             $chl_total += $row->{'chl'};
33             $measurements++;
34             }
35             }
36             if ($measurements){
37             print $chl_total/$measurements;
38             } else {
39             print "No chl values.";
40             }
41            
42             #alternatively:
43             $sb_file->rewind();
44             while (my %row = $sb_file->next()){
45             if (defined($row{'chl'})){
46             $chl_total += $row{'chl'};
47             $measurements++;
48             }
49             }
50              
51             # Calculate the average chlorophyll value using where
52             my $chl_total2 = 0;
53             my $measurements2 = 0;
54             $sb_file->where(sub {
55             if (defined($_->{'chl'})){
56             $chl_total2 += $_->{'chl'};
57             $measurements2++;
58             }
59             });
60             if ($measurements2){
61             print $chl_total2/$measurements2;
62             } else {
63             print "No chl values.";
64             }
65            
66             Or to modify SeaBASS files:
67              
68             use Data::SeaBASS qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END);
69              
70             my $sb_file = Data::SeaBASS->new("input.txt");
71              
72             # Add a one degree bias to water temperature
73             while (my $row = $sb_file->next()){
74             $row->{'wt'} += 1;
75             $sb_file->update($row);
76             }
77            
78             $sb_file->write(); # to STDOUT
79              
80             # Remove the one degree bias to water temperature
81             $sb_file->where(sub {
82             $_->{'wt'} -= 1;
83             });
84            
85             $sb_file->write("output_file.txt");
86            
87             Or to start a SeaBASS file from scratch:
88              
89             use Data::SeaBASS qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END);
90              
91             my $sb_file = Data::SeaBASS->new({strict => 0, add_empty_headers => 1});
92             $sb_file->add_field('lat','degrees');
93             $sb_file->add_field('lon','degrees');
94             $sb_file->append({'lat' => 1, 'lon' => 2});
95             $sb_file->append("3,4"); # or if you're reading from a CSV file
96             $sb_file->write();
97              
98             =head1 DESCRIPTION
99              
100             C provides an easy to use, object-oriented interface for
101             reading, writing, and modifying SeaBASS data files.
102              
103             =head2 What is SeaBASS?
104              
105             L Bio-optical Archive and Storage
106             System housed at Goddard Space Flight Center.
107             L provides the permanent public
108             repository for data collected under the auspices of the NASA Ocean Biology and
109             Biogeochemistry Program. It also houses data collected by participants in the
110             NASA Sensor Intercomparision and Merger for Biological and Oceanic
111             Interdisciplinary Studies (SIMBIOS) Program. SeaBASS includes marine
112             bio-optical, biogeochemical, and (some) atmospheric data.
113              
114             =head2 SeaBASS File Format
115              
116             SeaBASS files are plain ASCII files with a special header and a matrix of
117             values.
118              
119             =head3 Header
120              
121             The SeaBASS header block consists of many lines of header-keyword pairs. Some
122             headers are optional but most, although technically not required for reading,
123             are required to be ingested into the system. More detailed information is
124             available in the SeaBASS L
125             article|http://seabass.gsfc.nasa.gov/wiki/article.cgi?article=metadataheaders>.
126             The only absolutely required header for this module to work is the /fields
127             line. This module turns fields and units lowercase at all times.
128              
129             /begin_header
130             /delimiter=space
131             /missing=-999
132             /fields=date,time,lat,lon,depth,wt,sal
133             /end_header
134            
135             =head3 Body
136              
137             The SeaBASS body is a matrix of data values, organized much like a spreadsheet.
138             Each column is separated by the value presented in the /delimiter header.
139             Likewise, missing values are indicated by the value presented in the /missing
140             header. The /fields header identifies the geophysical parameter presented in
141             each column.
142              
143             /begin_header
144             /delimiter=space
145             /missing=-999
146             /fields=date,time,lat,lon,depth,wt,sal
147             /end_header
148             19920109 16:30:00 31.389 -64.702 3.4 20.7320 -999
149             19920109 16:30:00 31.389 -64.702 19.1 20.7350 -999
150             19920109 16:30:00 31.389 -64.702 38.3 20.7400 -999
151             19920109 16:30:00 31.389 -64.702 59.6 20.7450 -999
152              
153             =head3 Strictly Speaking
154              
155             SeaBASS files are run through a program called
156             L before
157             they are submitted and before they are ingested into a NASA relational database
158             management system. Some of the things it checks for are required
159             L
160             and proper L
161             names|http://seabass.gsfc.nasa.gov/wiki/article.cgi?article=stdfields>. All
162             data must always have an associated depth, time, and location, though these
163             fields may be placed in the header and are not always required in the data.
164             Just because this module writes the files does not mean they will pass FCHECK.
165              
166             Files are case-INsensitive. Headers are not allowed to have any whitespace.
167              
168             =cut
169              
170 25     25   139 use Carp qw(:DEFAULT);
  25         45  
  25         3325  
171 25     25   156 use Fcntl qw(SEEK_SET);
  25         42  
  25         1104  
172 25     25   4757 use List::MoreUtils qw(firstidx each_arrayref);
  25         129155  
  25         150  
173 25     25   25961 use Date::Calc qw(Add_Delta_Days);
  25         115299  
  25         1882  
174 25     25   176 use Scalar::Util qw(looks_like_number);
  25         50  
  25         264949  
175              
176             require Exporter;
177             our @ISA = qw(Exporter);
178             our @EXPORT_OK = qw(STRICT_READ STRICT_WRITE STRICT_ALL INSERT_BEGINNING INSERT_END);
179             our @EXPORT = qw();
180              
181             our $AUTOLOAD;
182              
183             =head1 EXPORT
184              
185             This module does not export anything by default.
186              
187             =head2 STRICT_READ
188              
189             C is used with the C option, enabling error messages when
190             reading header lines and inserting header data.
191              
192             =head2 STRICT_WRITE
193              
194             C is used with the C option, enabling error messages when
195             writing the data to a file/stream.
196              
197             =head2 STRICT_ALL
198              
199             C is used with the C option, enabling C and
200             C.
201              
202             =head2 INSERT_BEGINNING
203              
204             C is used with L
205             \@data_row | $data_row | %data_row)"> or L
206             $unit [, $position]])"> to insert a data row or field at the beginning of their
207             respective lists.
208              
209             =head2 INSERT_END
210              
211             C is used with L
212             $data_row | %data_row)"> or L
213             $position]])"> to insert a data row or field at the end of their respective
214             lists.
215              
216             =cut
217              
218 288     288 1 7749 sub STRICT_READ {1}
219 454     454 1 962 sub STRICT_WRITE {2}
220 1     1 1 1899 sub STRICT_ALL {3}
221 22     22 1 69 sub INSERT_BEGINNING {0}
222 104     104 1 216 sub INSERT_END {-1}
223              
224             my %DEFAULT_OPTIONS = (
225             default_headers => {},
226             headers => {},
227             preserve_case => 1,
228             keep_slashes => 0,
229             cache => 1,
230             delete_missing_headers => 0,
231             missing_data_to_undef => 1,
232             preserve_comments => 1,
233             add_empty_headers => 0,
234             strict => STRICT_WRITE,
235             fill_ancillary_data => 0,
236             preserve_header => 0,
237             preserve_detection_limits => 0,
238             optional_warnings => 1,
239             );
240              
241             #return values for ref()
242             my %OPTION_TYPES = (
243             default_headers => [ 'ARRAY', 'HASH' ],
244             headers => [ 'ARRAY', 'HASH' ],
245             preserve_case => [''],
246             keep_slashes => [''],
247             cache => [''],
248             delete_missing_headers => [''],
249             missing_data_to_undef => [''],
250             preserve_comments => [''],
251             add_empty_headers => [''],
252             strict => [''],
253             fill_ancillary_data => [''],
254             preserve_header => [''],
255             preserve_detection_limits => [''],
256             optional_warnings => [''],
257             );
258              
259             # All headers required by STRICT_READ and STRICT_WRITE
260             my @REQUIRED_HEADERS = qw(
261             begin_header
262             investigators
263             affiliations
264             contact
265             experiment
266             cruise
267             data_file_name
268             documents
269             calibration_files
270             data_type
271             start_date
272             end_date
273             start_time
274             end_time
275             north_latitude
276             south_latitude
277             east_longitude
278             west_longitude
279             missing
280             delimiter
281             units
282             end_header
283             );
284              
285             # Headers that must be specified, regardless of strictness.
286             my @ABSOLUTELY_REQUIRED_HEADERS = qw(
287             fields
288             );
289              
290             # Valid headers used for STRICT_READ and STRICT_WRITE
291             our @ALL_HEADERS = qw(
292             begin_header
293             investigators
294             affiliations
295             contact
296             experiment
297             cruise
298             station
299             data_file_name
300             documents
301             calibration_files
302             data_type
303             data_status
304             start_date
305             end_date
306             start_time
307             end_time
308             north_latitude
309             south_latitude
310             east_longitude
311             west_longitude
312             cloud_percent
313             measurement_depth
314             secchi_depth
315             optical_depth_warning
316             water_depth
317             wave_height
318             wind_speed
319             missing
320             below_detection_limit
321             above_detection_limit
322             delimiter
323             fields
324             units
325             end_header
326             );
327              
328             our %HEADER_DEFAULTS = (
329             optical_depth_warning => 'false',
330             );
331              
332             our %OMIT_EMPTY_HEADERS = (
333             optical_depth_warning => 1,
334             );
335              
336             # headers that are allowed but are not to be added during add_empty_headers
337             my @HIDDEN_HEADERS = qw(
338             received
339             );
340              
341             # what to set the missing value to if it's not defined
342             our $DEFAULT_MISSING = -999;
343             our $DEFAULT_BDL = -888;
344             our $DEFAULT_ADL = -777;
345              
346             # overly complex data structure only understandable by idiots
347             # your IQ is the percentage chance that it won't make sense
348             # IE: my IQ = 40, I have a 60% chance of it making sense to me
349             my %ANCILLARY = (
350             'lat' => [ { 'north_latitude' => qr/^(.*?)$/ }, { 'south_latitude' => qr/^(.*?)$/ } ],
351             'lon' => [ { 'east_longitude' => qr/^(.*?)$/ }, { 'west_longitude' => qr/^(.*?)$/ } ],
352             'depth' => [ { 'measurement_depth' => qr/^(.*?)$/ }, ],
353             'date_time' => [ '$date $time', ],
354             'date' => [ [ \&julian_to_greg, '$year$julian' ], [ \&julian_to_greg, '$year$jd' ], [ \&julian_to_greg, '$year$sdy' ], '$year$month$day', ],
355             'year' => [ { '$date' => qr/^(\d{4})/ }, { 'start_date' => qr/^(\d{4})/ }, ],
356             'month' => [ { '$date' => qr/^\d{4}(\d{2})/ }, [ \&julian_to_greg, qr/^\d{4}(\d{2})\d{2}$/, '$year$julian' ], [ \&julian_to_greg, qr/^\d{4}(\d{2})\d{2}$/, '$year$jd' ], [ \&julian_to_greg, qr/^\d{4}(\d{2})\d{2}$/, '$year$sdy' ], { 'start_date' => qr/^\d{4}(\d{2})/ }, ],
357             'day' => [ { '$date' => qr/^\d{6}(\d{2})/ }, [ \&julian_to_greg, qr/^\d{4}\d{2}(\d{2})$/, '$year$julian' ], [ \&julian_to_greg, qr/^\d{4}\d{2}(\d{2})$/, '$year$jd' ], [ \&julian_to_greg, qr/^\d{4}\d{2}(\d{2})$/, '$year$sdy' ], { 'start_date' => qr/^\d{6}(\d{2})/ }, ],
358             'time' => [ '$hour:$minute:$second', ],
359             'hour' => [ { '$time' => qr/^(\d+):/ }, { 'start_time' => qr/^(\d+):/ }, ],
360             'minute' => [ { '$time' => qr/:(\d+):/ }, { 'start_time' => qr/:(\d+):/ }, ],
361             'second' => [ { '$time' => qr/:(\d+)(?:[^:\d]|$)/ }, { 'start_time' => qr/:(\d+)(?:[^:\d]|$)/ }, ],
362             'station' => [ { 'station' => qr/^(.*?)$/ }, ],
363             );
364              
365             # what fill_ancillary_data adds to each row
366             #my @FILL_ANCILLARY_DATA = qw(date time date_time lat lon depth);
367             my @FILL_ANCILLARY_DATA = keys(%ANCILLARY);
368              
369             my %FIELD_FORMATTING = (
370             'year' => '%04d',
371             'month' => '%02d',
372             'day' => '%02d',
373             'julian' => '%03d',
374             'sdy' => '%03d',
375             'hour' => '%02d',
376             'minute' => '%02d',
377             'second' => '%02d',
378             );
379              
380             =head1 CONSTRUCTOR
381              
382             =head2 new([$filename,] [\%options])
383              
384             my $sb_file = Data::SeaBASS->new("input_file.txt");
385             my $sb_file = Data::SeaBASS->new("input_file.txt", { delete_missing_headers => 1 });
386             my $sb_file = Data::SeaBASS->new("output_file.txt", { add_empty_headers => 1 });
387             my $sb_file = Data::SeaBASS->new({ add_empty_headers => 1 });
388              
389             Creates a C object. If the file specified exists, the object
390             can be used to read the file. If the file specified does not exist, an empty
391             object is created and will be written to the specified file by default when
392             invoking C.
393              
394             Options should be given in a hash reference to ensure proper argument parsing.
395             If a file is specified, options can be given as a hash list.
396              
397             =over 4
398              
399             =item * default_headers
400              
401             =item * headers
402              
403             These two options accept either an array reference or a hash reference. They
404             are used to set or override header information. First, headers are read from
405             C, then from the data file itself, then are overridden by
406             whatever is in C.
407              
408             Arguments are an array reference of header lines, or a hash reference of
409             header/value pairs.
410              
411             my $sb_file = Data::SeaBASS->new({
412             default_headers => [
413             '/cruise=fake_cruise',
414             '/experiment=default_experiment',
415             ],
416             headers => {
417             'experiment' => 'real_experiment',
418             },
419             });
420            
421             B Modifying the delimiter or missing value will likely break the
422             object. Modifying these will change the expected format for all rows. Do so
423             with caution.
424              
425             =item * preserve_case
426              
427             C<1> or C<0>, default C<1>. Setting this to C<0> will change all values in the
428             header to lowercase. Header descriptors (the /header part) are always turned
429             to lowercase, as well as all fields and units.
430              
431             =item * keep_slashes
432              
433             C<1> or C<0>, default C<0>. Forces the object to keep the / in the beginning of
434             headers when accessed. If set to C<1>, when using the L
435             \%new_headers | \@get_headers | @get_headers ])"> function, they will be
436             returned with leading slash.
437              
438             =item * cache
439              
440             C<1> or C<0>, default C<1>. Enables caching data rows as they are read. This
441             speeds up re-reads and allows the data to be modified. This is required for
442             writing files.
443              
444             =item * delete_missing_headers
445              
446             C<1> or C<0>, default C<0>. Any headers that are equal to the /missing header,
447             NA, or are not defined (when using the C options) are
448             deleted. They cannot be retrieved using C and will not be written.
449              
450             =item * missing_data_to_undef
451              
452             C<1> or C<0>, default C<1>. If any values in the data block are equal to the
453             /missing, /above_detection_limit, /below_detection_limit headers, they are set
454             to undef when they are retrieved.
455              
456             =item * preserve_comments
457              
458             C<1> or C<0>, default C<1>. Setting this option to zero will discard any
459             comments found in the header.
460              
461             =item * add_empty_headers
462              
463             C<0>, C<1>, or a string. If set to a string, this will populate any missing
464             headers, including optional ones, and will set their value to the string given.
465             If set to 1, the string 'NA' is used. This option disables C.
466              
467             =item * strict
468              
469             my $sb_file = Data::SeaBASS->new("input_file.txt", {strict => STRICT_ALL});
470             my $sb_file = Data::SeaBASS->new("input_file.txt", {strict => (STRICT_READ | STRICT_WRITE)});
471             my $sb_file = Data::SeaBASS->new("input_file.txt", {strict => 0});
472             my $sb_file = Data::SeaBASS->new("input_file.txt", {strict => STRICT_WRITE}); #default
473              
474             =over 4
475              
476             =item * C
477              
478             C will throw errors when reading invalid headers, missing required
479             ones, or an invalid delimiter. This may change in future revisions.
480              
481             =item * C
482              
483             C will throw the same errors when writing the data to a file or
484             stream. C only checks for required headers and invalid headers,
485             but does not check their values to see if they are actually filled. This may
486             change in future revisions.
487              
488             =back
489              
490             =item * fill_ancillary_data
491              
492             C<0> or C<1>, default C<0>. Insert date, time, measurement depth, station, and
493             location values to the data rows from the headers. Values are not overridden
494             if they are already present. This option is only useful when reading files.
495              
496             B It is bad practice to include these fields in the data if they don't
497             change throughout the file. This option is used to remove the burden of
498             checking whether they are in the data or header.
499              
500             B This option will also combine individual date/time
501             parts in the data (year/month/day/etc) to create more uniform date/time fields.
502              
503             B If any part of a date/time is missing, the fields
504             dependent on it will not be added to the row.
505              
506             =item * preserve_header
507              
508             C<0> or C<1>, default C<0>. Preserves header and comment order. This option
509             disables modifying the header, as well, but will not error if you try -- it will
510             simply not be reflected in the output.
511              
512             =item * preserve_detection_limits
513              
514             C<0> or C<1>, default C<0>. Disables setting values equal to below_detection_limit
515             or above_detection_limit to null while reading files. This should only be used
516             during read-only operation, as there is no telling missing data from data
517             outside limits.
518              
519             =item * optional_warnings
520              
521             C<0> or C<1>, default C<1>. Determines whether or not to print warnings deemed
522             optional. For the moment, the only defined warning is for optically shallow data.
523              
524             =back
525              
526             =cut
527              
528             sub new {
529 141     141 1 209433 my ( $class, $file ) = ( shift, shift );
530              
531 141         390 my $self = bless( {}, $class );
532              
533 141         228 my %myoptions;
534              
535 141 100       552 if ( ref($file) eq 'HASH' ) {
    100          
    50          
536 4         34 %myoptions = ( %DEFAULT_OPTIONS, %$file );
537 4         10 $file = '';
538             } elsif ( ref( $_[0] ) eq 'HASH' ) {
539 127         518 %myoptions = ( %DEFAULT_OPTIONS, %{ $_[0] } );
  127         806  
540             } elsif ( !ref( $_[0] ) ) {
541 10 100       34 if ( $#_ % 2 == 1 ) {
542 9         78 %myoptions = ( %DEFAULT_OPTIONS, @_ );
543             } else {
544 1         150 croak('Even sized list expected');
545             }
546             } else {
547 0         0 croak("Arguments not understood.");
548             }
549              
550 140         521 $self->{'options'} = \%myoptions;
551 140         446 $self->check_options();
552              
553 136 50       433 if ( ref($file) eq 'GLOB' ) {
    100          
    100          
554 0         0 $self->{'handle'} = $file;
555             } elsif ( ref($file) eq 'SCALAR' ) {
556 130         1189 open( my $fh, "<", $file );
557 130         362 $self->{'handle'} = $fh;
558             } elsif ($file) {
559 2 100       5 if ( !ref($file) ) {
560 1 50       17 if ( -r $file ) {
    0          
561 1         19 open( my $fh, "<", $file );
562 1         5 $self->{'handle'} = $fh;
563             } elsif ( $self->{'options'}{'strict'} & STRICT_READ ) {
564 0         0 croak("Strict read set, but input file not found or unreadable.");
565             } else {
566 0         0 $self->{'default_write_to'} = $file;
567 0         0 $file = '';
568             }
569             } else {
570 1         90 croak("Invalid parameter, expected file path or file handle.");
571             }
572             } ## end elsif ($file)
573 135 100       291 if ($file) {
574 131 50       327 unless ( $self->read_headers() ) {
575 0 0       0 unless ( $self->{'options'}{'strict'} & STRICT_READ ) {
576 0         0 return;
577             }
578             }
579             } else {
580 4         10 $self->create_blank_file();
581             }
582 131         386 return $self;
583             } ## end sub new
584              
585             =head1 OBJECT METHODS
586              
587             =head2 add_headers(\%headers | \@header_lines | @header_lines)
588              
589             $sb_file->add_headers({'investigators' => 'jason_lefler'});
590             $sb_file->add_headers(['/investigators=jason_lefler']);
591             $sb_file->add_headers('/investigators=jason_lefler');
592            
593             C is used to add or override metadata for a C, as
594             well as add comments.
595              
596             This function can not be used to change fields/units, see
597             L and
598             L for that.
599              
600             B Modifying the delimiter or missing value halfway through
601             reading/writing will likely break the object. Modifying these will change the
602             expected format for any new or non-cached rows. Do so with caution.
603              
604             =cut
605              
606             sub add_headers {
607 406     406 1 580 my $self = shift;
608 406         511 my $success = 1;
609 406         788 my $strict = $self->{'options'}{'strict'} & STRICT_WRITE;
610 406 100       1020 if ( ref( $_[0] ) eq 'HASH' ) {
    100          
    50          
611 265         364 while ( my ( $k, $v ) = each( %{ $_[0] } ) ) {
  273         880  
612 8         15 $success &= $self->validate_header( $k, $v, $strict );
613 8         20 $self->{'headers'}{$k} = $v;
614             }
615             } elsif ( ref( $_[0] ) eq 'ARRAY' ) {
616 140         210 foreach ( @{ $_[0] } ) {
  140         289  
617 2594 100 100     6108 if ( $_ =~ /^\s*!/ ) {
    100          
618 11         15 push( @{ $self->{'comments'} }, $_ );
  11         22  
619             } elsif ( $strict && $_ !~ m"^/" ) {
620 2         288 carp("Invalid header line: $_");
621 2         174 $success = 0;
622             } else {
623 2581         5692 my ( $k, $v ) = split( /=/, $_, 2 );
624 2581         4451 $success &= $self->validate_header( $k, $v, $strict );
625 2581         5700 $self->{'headers'}{$k} = $v;
626             }
627             } ## end foreach (@{$_[0]})
628             } elsif ( !ref( $_[0] ) ) {
629 1         2 foreach (@_) {
630 1 50 33     10 if ( $_ =~ /^\s*!/ ) {
    50          
631 0         0 push( @{ $self->{'comments'} }, $_ );
  0         0  
632             } elsif ( $strict && $_ !~ m"^/" ) {
633 0         0 carp("Invalid header line: $_");
634 0         0 $success = 0;
635             } else {
636 1         4 my ( $k, $v ) = split( /=/, $_, 2 );
637 1         2 $success &= $self->validate_header( $k, $v, $strict );
638 1         3 $self->{'headers'}{$k} = $v;
639             }
640             } ## end foreach (@_)
641             } else {
642 0         0 $success = 0;
643             }
644 406         686 return $success;
645             } ## end sub add_headers
646              
647             =head2 headers([ \%new_headers | \@get_headers | @get_headers ])
648              
649             =head2 head
650              
651             =head2 h
652              
653             my %headers = $sb_file->headers(['investigators']);
654             print Dumper(\%headers); # { investigators => 'jason_lefler' }
655            
656             my ($inv) = $sb_file->headers('investigators');
657             print $inv; # jason_lefler
658            
659             $sb_file->headers({investigators => 'jason_lefler'});
660            
661             $sb_file->headers()->{'investigators'} = 'jason_lefler';
662            
663             C is used to read or modify header values. Given an array reference
664             of header names, it will return a hash/hash reference with header/value pairs.
665             Given a plain list of header names, it will return an array/array reference of
666             the given header values. Given a hash reference, this function is a proxy for
667             L.
668              
669             If C is set, then headers will be returned as such, IE: C<<
670             {'/investigators' => 'jason_lefler'} >>.
671              
672             This function can also be used to set header values without going through the
673             normal validation.
674              
675             C and C are aliases to C.
676              
677             =cut
678              
679 0     0 1 0 sub head { shift->headers(@_); }
680 22     22 1 1376 sub h { shift->headers(@_); }
681              
682             sub headers {
683 25     25 1 508 my $self = shift;
684 25 100       57 if ( !@_ ) {
    100          
    100          
685 21         110 return $self->{'headers'};
686             } elsif ( ref( $_[0] ) eq 'HASH' ) {
687 1         4 return $self->add_headers(@_);
688             } elsif ( ref( $_[0] ) eq 'ARRAY' ) {
689 1         2 my %ret;
690 1         1 for my $header ( @{ $_[0] } ) {
  1         3  
691 2         5 $ret{$header} = $self->{'headers'}{ lc($header) };
692             }
693 1 50       13 if (wantarray) {
694 1         5 return %ret;
695             } else {
696 0         0 return \%ret;
697             }
698             } else {
699 2         3 my @ret;
700 2         4 foreach (@_) {
701 4 50       8 if ( !ref ) {
702 4         9 my $value = $self->{'headers'}{ lc($_) };
703 4 50       9 push( @ret, defined($value) ? $value : undef );
704             } else {
705 0         0 croak("Argument not understood: $_");
706             }
707             } ## end foreach (@_)
708 2 100       6 if (wantarray) {
    50          
709 1         4 return @ret;
710             } elsif ( $#ret == 0 ) {
711 0         0 return $ret[0];
712             } else {
713 1         4 return \@ret;
714             }
715             } ## end else [ if (!@_) ]
716              
717             } ## end sub headers
718              
719             =head2 data([$index])
720              
721             =head2 d
722              
723             =head2 body
724              
725             =head2 b
726              
727             =head2 all
728              
729             my $row = $sb_file->data(1);
730             my @rows = $sb_file->all();
731              
732             C is responsible for returning either a data line via an index or all of
733             the data lines at once.
734              
735             Data is returned as C<< field => value >> pairs.
736              
737             If given an index: in list context, returns the hash of the row; in scalar
738             context, returns a reference to the row.
739              
740             If not given an index: in list context, returns an array of the rows; in scalar
741             context, returns a reference to an array of the rows.
742              
743             If given an index out of range, returns undef. If given a negative index,
744             Cs the file, then returns undef.
745              
746             If C is enabled and the row has already been read, it is retrieved from
747             the cache. If it has not already be read, all rows leading up to the desired
748             row will be read and cached, and the desired row returned.
749              
750             If C is disabled and either all rows are retrieved or a previously
751             retrieved row is called again, the file will C, then seek to the
752             desired row.
753              
754             C, C, C, and C are all aliases to C. (Yes, that means
755             C can be used with arguments, it would just look silly.)
756              
757             =cut
758              
759 0     0 1 0 sub body { shift->data(@_); }
760 0     0 1 0 sub b { shift->data(@_); }
761 2     2 1 1012 sub d { shift->data(@_); }
762 42     42 1 6250 sub all { shift->data(@_); }
763              
764             sub data {
765 133     133 1 5626 my ( $self, $index ) = @_;
766 133 100       286 if ( defined($index) ) {
767 81 100       192 if ( $index < 0 ) {
768 12         25 $self->rewind();
769 12         14 return;
770             }
771 69 100       148 if ( $self->{'options'}{'cache'} ) {
772 63 100       136 if ( $index > $self->{'max_dataidx'} ) {
773 39         64 my $startidx = $self->{'dataidx'};
774 39         89 for ( my $i = 0; $i < ( $index - $startidx ); $i++ ) {
775 73 100       159 if ( !$self->next() ) {
776 4         11 return;
777             }
778             }
779             } ## end if ($index > $self->{'max_dataidx'...})
780              
781 59         90 $self->{'dataidx'} = $index;
782              
783 59 50       110 if (wantarray) {
784 0         0 return %{ $self->{'data'}[$index] };
  0         0  
785             } else {
786 59         292 return $self->{'data'}[$index];
787             }
788             } else {
789 6 100       17 if ( $index <= $self->{'dataidx'} ) {
790 3         9 $self->rewind();
791             }
792 6         11 my $startidx = $self->{'dataidx'};
793 6         16 for ( my $i = 0; $i < ( $index - $startidx - 1 ); $i++ ) {
794 7 50       12 if ( !$self->next() ) {
795 0         0 return;
796             }
797             }
798 6         14 return $self->next();
799             } ## end else [ if ($self->{'options'}...)]
800             } else {
801 52 100       122 if ( $self->{'options'}{'cache'} ) {
802 45         110 while ( $self->next() ) {
803             # noop
804             }
805 45 100       96 if (wantarray) {
806 7         12 return @{ $self->{'data'} };
  7         35  
807             } else {
808 38         139 return $self->{'data'};
809             }
810             } else {
811 7         23 $self->rewind();
812 7         8 my @data_rows;
813 7         23 while ( my $data = $self->next() ) {
814 28         63 push( @data_rows, $data );
815             }
816 7 100       14 if (wantarray) {
817 6         27 return @data_rows;
818             } else {
819 1         4 return \@data_rows;
820             }
821             } ## end else [ if ($self->{'options'}...)]
822             } ## end else [ if (defined($index)) ]
823             } ## end sub data
824              
825             =head2 next()
826              
827             while (my $row = $sb_file->next()){
828             print $row->{'lat'};
829             ...
830             }
831             while (my %row = $sb_file->next()){
832             print $row{'lat'};
833             ...
834             }
835            
836             Returns the next data row in the file, returning C when it runs out of
837             rows.
838              
839             Data is returned as C<< field => value >> pairs.
840              
841             In list context, returns a hash of the row. In scalar context, returns a
842             reference to the hash of a row.
843              
844             After a C, C will return the very first data hash, then each row
845             in turn. If the row has been cached, it's retrieved from the cache instead of
846             rereading from the file.
847              
848             =cut
849              
850             sub next {
851 429     429 1 16664 my $self = shift;
852 429 50       787 if (@_) {
853 0         0 croak("invalid number of arguments on next(), expected 0.");
854             }
855              
856 429 100 100     1610 if ( $self->{'options'}{'cache'} && $self->{'dataidx'} < $self->{'max_dataidx'} ) {
    100          
857 73         110 $self->{'dataidx'}++;
858              
859 73 50       112 if (wantarray) {
860 0         0 return %{ $self->{'data'}[ $self->{'dataidx'} ] };
  0         0  
861             } else {
862 73         222 return $self->{'data'}[ $self->{'dataidx'} ];
863             }
864             } elsif ( $self->{'handle'} ) {
865 344         500 my $handle = $self->{'handle'};
866 344         441 my $line_number = $self->{'line_number'};
867              
868 344         1102 while ( my $line = <$handle> ) {
869 266         351 $line_number++;
870 266         571 strip($line);
871 266 50       523 if ($line) {
872 266         581 my $data_row = $self->make_data_hash($line);
873 266         445 $self->{'line_number'} = $line_number;
874 266 100       501 if ( $self->{'options'}{'cache'} ) {
875 204         254 push( @{ $self->{'data'} }, $data_row );
  204         367  
876             }
877 266         360 $self->{'dataidx'}++;
878 266 100       535 if ( $self->{'dataidx'} > $self->{'max_dataidx'} ) {
879 238         341 $self->{'max_dataidx'} = $self->{'dataidx'};
880             }
881 266 50       413 if (wantarray) {
882 0         0 return %{$data_row};
  0         0  
883             } else {
884 266         850 return $data_row;
885             }
886             } ## end if ($line)
887             } ## end while (my $line = <$handle>)
888             } ## end elsif ($self->{'handle'})
889 90         207 return;
890             } ## end sub next
891              
892             =head2 rewind()
893              
894             C seeks to the start of the data. The next C will return the
895             very first row (or C). If caching is enabled, it will not actually
896             perform a seek, it will merely reset the index interator. If caching is
897             disabled, a seek is performed on the file handle to return to the start of the
898             data.
899              
900             =cut
901              
902             sub rewind {
903 61     61 1 1268 my ($self) = @_;
904 61 100       145 if ( $self->{'dataidx'} != -1 ) {
905 31 100       72 if ( !$self->{'options'}{'cache'} ) {
906 9         25 seek( $self->{'handle'}, $self->{'data_start_position'}, SEEK_SET );
907             }
908 31         49 $self->{'line_number'} = $self->{'data_start_line'};
909 31         54 $self->{'dataidx'} = -1;
910             } ## end if ($self->{'dataidx'}...)
911             } ## end sub rewind
912              
913             =head2 update(\%data_row | \@data_row | $data_row | %data_row)
914              
915             while (my %row = $sb_file->next()){
916             if ($row{'depth'} == -999){
917             $row{'depth'} = 0;
918             }
919             $sb_file->update(\%row);
920             }
921            
922             # Less useful for update():
923             print join(',',@{$sb_file->actual_fields()}); #lat,lon,depth,chl
924            
925             while (my %row = $sb_file->next()){
926             if ($row{'depth'} == -999){
927             $row{'depth'} = 0;
928             }
929             $sb_file->update(@row{'lat','lon','depth','chl'});
930             # or
931             $sb_file->update([@row{'lat','lon','depth','chl'}]);
932             }
933              
934             C replaces the last row read (using C) with the input.
935              
936             Caching must be enabled to use C, C, or C.
937              
938             =cut
939              
940             sub update {
941 12     12 1 48 my $self = shift;
942 12 100       29 if ( !$self->{'options'}{'cache'} ) {
    100          
943 1         85 croak("Caching must be enabled to write.");
944             } elsif ( $self->{'dataidx'} == -1 ) {
945 1         134 croak("No rows read yet.");
946             }
947 10         21 my $new_row = $self->ingest_row(@_);
948 10 100       21 unless ( defined($new_row) ) {
949 2         128 croak("Error parsing inputs");
950             }
951 8         27 $self->{'data'}[ $self->{'dataidx'} ] = $new_row;
952             } ## end sub update
953              
954             =head2 set($index, \%data_row | \@data_row | $data_row | %data_row)
955              
956             my %row = (lat => 1, lon => 2, chl => 1);
957             $sb_file->set(0, \%row);
958            
959             print join(',',@{$sb_file->actual_fields()}); #lat,lon,chl
960             $sb_file->set(0, [1, 2, 1]);
961            
962              
963             C replaces the row at the given index with the input. Seeks to the
964             given index if it has not been read to yet. Cs if the file does not go
965             up to the index specified.
966              
967             Caching must be enabled to use C, C, or C.
968              
969             =cut
970              
971             sub set {
972 3     3 1 20 my $self = shift;
973 3         4 my $index = shift;
974 3 50       10 if ( !$self->{'options'}{'cache'} ) {
975 0         0 croak("Caching must be enabled to write");
976             }
977 3 100       8 if ( $index < 0 ) {
978 1         138 croak("Index must be positive integer");
979             }
980 2         8 my $new_row = $self->ingest_row(@_);
981 2 50       7 unless ( defined($new_row) ) {
982 0         0 croak("Error parsing inputs");
983             }
984              
985 2 50       9 if ( $index > $self->{'max_dataidx'} ) {
986 2         11 my $current_idx = $self->{'dataidx'};
987 2         9 $self->data($index);
988 2         3 $self->{'dataidx'} = $current_idx;
989              
990 2 100       5 if ( $index > $self->{'max_dataidx'} ) {
991 1         121 croak("Index out of bounds.");
992             }
993             } ## end if ($index > $self->{'max_dataidx'...})
994              
995 1         5 $self->{'data'}[$index] = $new_row;
996             } ## end sub set
997              
998             =head2 insert($index, \%data_row | \@data_row | $data_row | %data_row)
999              
1000             use Data::SeaBASS qw(INSERT_BEGINNING INSERT_END);
1001             ...
1002            
1003             my %row = (lat => 1, lon => 2, chl => 1);
1004             $sb_file->insert(INSERT_BEGINNING, \%row);
1005            
1006             print join(',',@{$sb_file->actual_fields()}); #lat,lon,chl
1007            
1008             $sb_file->insert(1, [1, 2, 1]);
1009             $sb_file->insert(INSERT_END, [1, 2, 1]);
1010            
1011             Inserts the row into the given position. C inserts a new row
1012             at the start of the data, C inserts one at the end of the data
1013             block.
1014              
1015             The index must be a positive integer, C, or C.
1016              
1017             If a row is inserted at the end, the entire data block is read from the file to
1018             cache every row, the row is appended to the end, and the current position is
1019             reset to the original position, so C will still return the real next
1020             row from the data.
1021              
1022             If a row is inserted before the current position, the current position is
1023             shifted accordingly and will still return the C real row.
1024              
1025             Caching must be enabled to use C, C, or C.
1026              
1027             =cut
1028              
1029             sub insert {
1030 16     16 1 34 my $self = shift;
1031 16         24 my $index = shift;
1032 16 50       34 if ( !$self->{'options'}{'cache'} ) {
1033 0         0 croak("Caching must be enabled to write.");
1034             }
1035 16 100       26 if ( $index < INSERT_END ) {
1036 1         124 croak("Index must be positive integer, or INSERT_BEGINNING (beginning), or INSERT_END (end)");
1037             }
1038 15         51 my $new_row = $self->ingest_row(@_);
1039 15 50       35 unless ( defined($new_row) ) {
1040 0         0 croak("Error parsing inputs");
1041             }
1042              
1043 15 100       22 if ( $index == INSERT_END ) {
    100          
1044 10         13 my $current_idx = $self->{'dataidx'};
1045 10         25 $self->data();
1046 10         14 $self->{'dataidx'} = $current_idx;
1047             } elsif ( $index > $self->{'max_dataidx'} ) {
1048 2         5 my $current_idx = $self->{'dataidx'};
1049 2         7 $self->data($index);
1050 2         3 $self->{'dataidx'} = $current_idx;
1051              
1052 2 50       9 if ( $index == $self->{'max_dataidx'} + 1 ) {
    100          
1053 0         0 $index = INSERT_END;
1054             } elsif ( $index > $self->{'max_dataidx'} ) {
1055 1         78 croak("Index out of bounds.");
1056             }
1057             } ## end elsif ($index > $self->{'max_dataidx'...})
1058              
1059 14 100 100     40 if ( $index <= $self->{'dataidx'} && $index != INSERT_END ) {
1060 3         4 $self->{'dataidx'}++;
1061             }
1062              
1063 14         23 $self->{'max_dataidx'}++;
1064              
1065 14 100       23 if ( $index == INSERT_BEGINNING ) {
    100          
1066 2         4 unshift( @{ $self->{'data'} }, $new_row );
  2         7  
1067             } elsif ( $index == INSERT_END ) {
1068 10         13 push( @{ $self->{'data'} }, $new_row );
  10         24  
1069             } else {
1070 2         4 splice( @{ $self->{'data'} }, $index, 0, $new_row );
  2         9  
1071             }
1072             } ## end sub insert
1073              
1074             =head2 prepend(\%data_row | \@data_row | $data_row | %data_row)
1075              
1076             C is short for C.
1077              
1078             =cut
1079              
1080             sub prepend {
1081 1     1 1 2 my $self = shift;
1082 1         3 $self->insert( INSERT_BEGINNING, @_ );
1083             }
1084              
1085             =head2 append(\%data_row | \@data_row | $data_row | %data_row)
1086              
1087             C is short for C.
1088              
1089             =cut
1090              
1091             sub append {
1092 9     9 1 36 my $self = shift;
1093 9         14 $self->insert( INSERT_END, @_ );
1094             }
1095              
1096             =head2 remove([$index])
1097              
1098             If index is specified, it deletes the desired index. If it is omitted, the
1099             last row read is deleted. The current position is modified accordingly.
1100              
1101             =cut
1102              
1103             sub remove {
1104 6     6 1 19 my ( $self, $index ) = @_;
1105              
1106 6 100 66     29 if ( !$self->{'options'}{'cache'} ) {
    50          
1107 1         162 croak("Caching must be enabled to write.");
1108             } elsif ( !defined($index) && $self->{'dataidx'} < 0 ) {
1109 0         0 croak("No rows read yet.");
1110             }
1111              
1112 5 100       15 if ( !defined($index) ) {
1113 2         5 $index = $self->{'dataidx'};
1114             }
1115              
1116 5 50       14 if ( $index < 0 ) {
    100          
1117 0         0 croak("Index must be positive integer");
1118             } elsif ( $index > $self->{'max_dataidx'} ) {
1119 2         3 my $current_idx = $self->{'dataidx'};
1120 2         5 $self->data($index);
1121 2         3 $self->{'dataidx'} = $current_idx;
1122              
1123 2 100       4 if ( $index > $self->{'max_dataidx'} ) {
1124 1         130 croak("Index out of bounds.");
1125             }
1126             } ## end elsif ($index > $self->{'max_dataidx'...})
1127              
1128 4 100       14 if ( $index <= $self->{'dataidx'} ) {
1129 3         5 $self->{'dataidx'}--;
1130             }
1131 4         6 $self->{'max_dataidx'}--;
1132              
1133 4         6 splice( @{ $self->{'data'} }, $index, 1 );
  4         14  
1134             } ## end sub remove
1135              
1136             =head2 where(\&function)
1137              
1138             # Find all rows with depth greater than 10 meters
1139             my @ret = $sb_file->where(sub {
1140             if ($_->{'depth'} > 10){
1141             return $_;
1142             } else {
1143             return undef;
1144             }
1145             });
1146              
1147             # Delete all measurements with depth less than 10 meters
1148             $sb_file->where(sub {
1149             if ($_->{'depth'} < 10){
1150             $_ = undef;
1151             }
1152             });
1153            
1154             # Calculate the average chlorophyll value
1155             my $chl_total = 0;
1156             my $measurements = 0;
1157             $sb_file->where(sub {
1158             if (defined($_->{'chl'})){
1159             $chl_total += $_->{'chl'};
1160             $measurements++;
1161             }
1162             });
1163             if ($measurements){
1164             print $chl_total/$measurements;
1165             } else {
1166             print "No chl values.";
1167             }
1168              
1169              
1170             Traverses through each data line, running the given function on each row.
1171             C<$_> is set to the current row. If C<$_> is set to undefined, C is
1172             called. Any changes in C<$_> will be reflected in the data.
1173              
1174             Any defined value returned is added to the return array. If nothing is
1175             returned, a 0 is added.
1176              
1177             =cut
1178              
1179             sub where {
1180 7     7 1 73 my ( $self, $function ) = ( shift, shift );
1181 7 100       24 if ( ref($function) ne 'CODE' ) {
1182 1         166 croak("Invalid arguments.");
1183             }
1184 6         14 my $currentidx = $self->{'dataidx'};
1185 6         16 $self->rewind();
1186              
1187 6         7 my @new_rows;
1188              
1189 6         17 while ( my $row = $self->next() ) {
1190 19         57 local *_ = \$row;
1191 19         42 my $ret = $function->();
1192 19 100 100     153 if ( defined($ret) && defined(wantarray) ) {
1193 12         18 push( @new_rows, $ret );
1194             }
1195 19 100       55 if ( !defined($row) ) {
1196 2 100       7 if ( $self->{'dataidx'} <= $currentidx ) {
1197 1         1 $currentidx--;
1198             }
1199 2         12 $self->remove();
1200             } ## end if (!defined($row))
1201             } ## end while (my $row = $self->next...)
1202              
1203 5         18 $self->data($currentidx);
1204              
1205 5         17 return @new_rows;
1206             } ## end sub where
1207              
1208             =head2 get_all($field_name [, ... ] [, \%options])
1209              
1210             Returns an array/arrayref of all the values matching each given field name.
1211             This function errors out if no field names are passed in or a non-existent
1212             field is requested.
1213              
1214             Available options are:
1215              
1216             =over 4
1217              
1218             =item * delete_missing
1219              
1220             If any of the fields are missing, the row will not be added to any of the
1221             return arrays. (Useful for plotting or statistics that don't work well with
1222             bad values.)
1223              
1224             =back
1225              
1226             =cut
1227              
1228             sub get_all {
1229 13     13 1 4157 my $self = shift;
1230 13         25 my %options = ( 'delete_missing' => 0 );
1231 13 100       33 if ( ref( $_[$#_] ) eq 'HASH' ) {
1232 5         8 %options = %{ pop(@_) };
  5         13  
1233             }
1234 13 50       25 if ( !@_ ) {
1235 0         0 croak("get_all must be called with at least one field name");
1236             }
1237              
1238 13 100       28 my $missing = ( $self->{'options'}{'missing_data_to_undef'} ? undef : $self->{'missing'} );
1239              
1240 13         17 my $currentidx = $self->{'dataidx'};
1241 13         26 $self->rewind();
1242              
1243 13         22 my @fields = map {lc} @_; # turn all inputs lowercase
  22         51  
1244              
1245 13         22 foreach my $field (@fields) {
1246 21 100   130   55 if ( ( firstidx { $_ eq $field } @{ $self->{'actual_fields'} } ) < 0 ) {
  130         173  
  21         47  
1247 4 100 66 2   10 if ( !$self->{'options'}{'fill_ancillary_data'} || ( firstidx { $_ eq $field } keys( %{ $self->{'ancillary'} } ) ) < 0 ) {
  2         7  
  1         5  
1248 3         272 croak("Field $field does not exist");
1249             }
1250             }
1251             } ## end foreach my $field (@fields)
1252              
1253 10         13 my @ret = map { [] } @fields; # make return array of arrays
  17         28  
1254              
1255 10         24 while ( my $row = $self->next() ) {
1256 40 100       71 if ( $options{'delete_missing'} ) {
1257 20         24 my $has_all = 1;
1258 20         24 foreach my $field (@fields) {
1259 36 100 100     123 unless ( defined( $row->{$field} ) && ( !defined($missing) || $row->{$field} != $missing ) ) {
      100        
1260 4         6 $has_all = 0;
1261 4         6 last;
1262             }
1263             } ## end foreach my $field (@fields)
1264 20 100       31 unless ($has_all) {
1265 4         8 next;
1266             }
1267             } ## end if ($options{'delete_missing'...})
1268              
1269 36         57 for ( my $i = 0; $i <= $#fields; $i++ ) {
1270 60         69 push( @{ $ret[$i] }, $row->{ $fields[$i] } );
  60         154  
1271             }
1272             } ## end while (my $row = $self->next...)
1273              
1274 10         21 $self->data($currentidx);
1275              
1276 10 100       21 if ( $#_ == 0 ) {
    100          
1277 3 100       4 if (wantarray) {
1278 1         10 return @{ $ret[0] };
  1         6  
1279             } else {
1280 2         8 return $ret[0];
1281             }
1282             } elsif (wantarray) {
1283 4         15 return @ret;
1284             } else {
1285 3         12 return \@ret;
1286             }
1287             } ## end sub get_all
1288              
1289             =head2 remove_field($field_name [, ... ])
1290              
1291             Removes a field from the file. C is called to remove the field
1292             from cached rows. Any new rows grabbed will have the removed fields omitted,
1293             as well. A warning is issued if the field does not exist.
1294              
1295             =cut
1296              
1297             sub remove_field {
1298 8     8 1 27 my $self = shift;
1299 8 100       17 if ( !@_ ) {
1300 1         134 croak("Field(s) must be specified.");
1301             }
1302 7         15 foreach my $field_orig (@_) {
1303 9         15 my $field = lc($field_orig);
1304              
1305 9     30   29 my $field_idx = firstidx { $_ eq $field } @{ $self->{'actual_fields'} };
  30         40  
  9         26  
1306              
1307 9 100       29 if ( $field_idx < 0 ) {
1308 1         86 carp("Field $field does not exist.");
1309             } else {
1310 8         12 splice( @{ $self->{'actual_fields'} }, $field_idx, 1 );
  8         17  
1311 8         13 splice( @{ $self->{'actual_units'} }, $field_idx, 1 );
  8         17  
1312             }
1313             } ## end foreach my $field_orig (@_)
1314 7         103 $self->update_fields();
1315             } ## end sub remove_field
1316              
1317             =head2 add_field($field_name [, $unit [, $position]])
1318              
1319             Adds a field to the file. C is called to populate all cached
1320             rows. Any rows retrieved will have the new field set to undefined or /missing,
1321             depending on if the option C is set.
1322              
1323             If the unit is not specified, it is set to unitless.
1324              
1325             If the position is not specified, the field is added to the end.
1326              
1327             =cut
1328              
1329             sub add_field {
1330 25     25 1 111 my ( $self, $field, $unit, $position ) = @_;
1331 25 50       75 if ( !$self->{'options'}{'cache'} ) {
    100          
1332 0         0 croak("Caching must be enabled to write.");
1333             } elsif ( !$field ) {
1334 1         74 croak("Field must be specified.");
1335             }
1336 24         42 $field = lc($field);
1337              
1338 24     121   77 my $field_idx = firstidx { $_ eq $field } @{ $self->{'actual_fields'} };
  121         148  
  24         66  
1339 24 100       76 if ( $field_idx >= 0 ) {
1340 3         505 croak("Field already exists.");
1341             }
1342 21 100       39 if ( !defined($position) ) {
1343 15         31 $position = INSERT_END;
1344             }
1345 21   100     55 $unit ||= 'unitless';
1346 21         29 $unit = lc($unit);
1347              
1348 21 100       37 if ( $position == INSERT_END ) {
    100          
1349 17         21 push( @{ $self->{'actual_fields'} }, $field );
  17         36  
1350 17         24 push( @{ $self->{'actual_units'} }, $unit );
  17         29  
1351             } elsif ( $position == INSERT_BEGINNING ) {
1352 2         3 unshift( @{ $self->{'actual_fields'} }, $field );
  2         5  
1353 2         4 unshift( @{ $self->{'actual_units'} }, $unit );
  2         3  
1354             } else {
1355 2         3 splice( @{ $self->{'actual_fields'} }, $position, 0, $field );
  2         5  
1356 2         3 splice( @{ $self->{'actual_units'} }, $position, 0, $unit );
  2         4  
1357             }
1358 21         42 $self->update_fields();
1359             } ## end sub add_field
1360              
1361             =head2 find_fields($string | qr/match/ [, ... ])
1362              
1363             Finds fields matching the string or regex given. If given a string, it must
1364             match a field exactly and entirely to be found. To find a substring, use
1365             C. Fields are returned in the order that they will be output. This
1366             function takes into account fields that are added or removed. All fields are
1367             always lowercase, so all matches are case insensitive.
1368              
1369             Given one argument, returns an array of the fields found. An empty array is
1370             returned if no fields match.
1371              
1372             Given multiple arguments, returns an array/arrayref of arrays of fields found.
1373             IE: C would return something like
1374             C<[['lw510','lw550'],['es510','es550']]>. If no field is matched, the inner
1375             array will be empty. IE: C<[[],[]]>.
1376              
1377             =cut
1378              
1379             sub find_fields {
1380 10     10 1 4439 my $self = shift;
1381 10 50       27 if ( $#_ < 0 ) {
1382 0         0 croak("Input must be a string or regex object.");
1383             }
1384              
1385 10         15 my @ret;
1386              
1387 10         16 foreach my $find (@_) {
1388 13         19 my ( $regex, @matching );
1389 13 50       24 if ( defined($find) ) {
1390 13 100       31 if ( !ref($find) ) {
    50          
1391 7         70 $regex = lc(qr/^$find$/i);
1392             } elsif ( ref($find) eq 'Regexp' ) {
1393 6         24 $regex = lc(qr/$find/i);
1394             } else {
1395 0         0 croak("Input must be a string or regex object.");
1396             }
1397              
1398 13         24 foreach my $field ( @{ $self->{'actual_fields'} } ) {
  13         25  
1399 92 100       242 if ( $field =~ $regex ) {
1400 23         42 push( @matching, $field );
1401             }
1402             }
1403             }
1404 13         26 push( @ret, \@matching );
1405             } ## end foreach my $find (@_)
1406              
1407 10 100       20 if ( $#_ == 0 ) {
1408 7         11 return @{ $ret[0] };
  7         28  
1409             } else {
1410 3 100       7 if (wantarray) {
1411 2         6 return @ret;
1412             } else {
1413 1         3 return \@ret;
1414             }
1415             } ## end else [ if ($#_ == 0) ]
1416             } ## end sub find_fields
1417              
1418             =head2 add_comment(@comments)
1419              
1420             Adds comments to the output file, which are printed, in bulk, after C.
1421             Comments are trimmed before entry and !s are added, if required.
1422              
1423             =cut
1424              
1425             sub add_comment {
1426 2     2 1 3 my $self = shift;
1427 2         5 push(@{$self->{'comments'}}, map {
1428 2         4 my $c = $_;
  3         4  
1429 3         14 $c =~ s/^\s+|\s+$//g;
1430 3 100       8 if ($c =~ /^!/){
1431 1         4 $c
1432             } else {
1433 2         8 "! $c"
1434             }
1435             } @_);
1436             }
1437              
1438             =head2 get_comments([@indices])
1439              
1440             Returns a list of the comments at the given indices. If no indices are passed
1441             in, return them all.
1442              
1443             =cut
1444              
1445             sub get_comments {
1446 6     6 1 14 my $self = shift;
1447 6         8 my @ret;
1448 6 100       11 if (@_){
1449 2         4 @ret = map {$self->{'comments'}[$_]} @_;
  3         8  
1450             } else {
1451 4         5 @ret = @{$self->{'comments'}};
  4         10  
1452             }
1453 6 50       13 if (wantarray){
1454 0         0 return @ret;
1455             } else {
1456 6         25 return \@ret;
1457             }
1458             }
1459              
1460             =head2 set_comments(@comments)
1461              
1462             Overwrites all of the comments in the file. For now, this is the proper way
1463             to remove comments. Comments are trimmed before entry and !s are added, if
1464             required.
1465              
1466             =cut
1467              
1468             sub set_comments {
1469 1     1 1 3 my $self = shift;
1470             $self->{'comments'} = [map {
1471 1         2 my $c = $_;
  2         3  
1472 2         5 $c =~ s/^\s+|\s+$//g;
1473 2 100       7 if ($c =~ /^!/){
1474 1         4 $c
1475             } else {
1476 1         3 "! $c"
1477             }
1478             } @_];
1479             }
1480              
1481             =head2 write([$filename | $file_handle | \*GLOB])
1482              
1483             Outputs the current header and data to the given handle or glob. If no
1484             arguments are given, and a non-existent filename was given to C, the
1485             contents are output into that. If an output file was not given, C
1486             outputs to STDOUT.
1487              
1488             If C is enabled, the headers are checked for invalid headers and
1489             missing required headers and errors/warnings can be thrown accordingly.
1490              
1491             The headers are output in a somewhat-arbitrary but consistent order. If
1492             C is enabled, placeholders are added for every header that
1493             does not exist. A comment section is also added if one is not present.
1494              
1495             =cut
1496              
1497             sub write {
1498 15     15 1 10810 my ( $self, $write_to_h ) = @_;
1499              
1500 15         38 my $strict_write = $self->{'options'}{'strict'} & STRICT_WRITE;
1501 15 100       38 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
1502 15         23 my $error = 0;
1503              
1504 15 100       32 if ($strict_write) {
1505 3         4 foreach my $header ( keys( %{ $self->{'headers'} } ) ) {
  3         15  
1506 65         96 ( my $header_no_slash = $header ) =~ s"^/"";
1507 65 50 33 512   169 if ( ( firstidx { $_ eq $header_no_slash } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $header_no_slash } @HIDDEN_HEADERS ) < 0 ) {
  1123         1240  
  0         0  
1508 0         0 carp("Invalid header: $header");
1509 0         0 $error = 1;
1510             }
1511             } ## end foreach my $header (keys(%{...}))
1512              
1513 3         11 foreach my $header (@REQUIRED_HEADERS) {
1514 66 100       136 if ( !exists( $self->{'headers'}{$header} ) ) {
1515 20         1221 carp("Missing required header: $header");
1516 20         675 $error = 1;
1517             }
1518             } ## end foreach my $header (@REQUIRED_HEADERS)
1519             } ## end if ($strict_write)
1520              
1521 15 100       30 if ( !$error ) {
1522 14         21 my $close_write_to = 0;
1523 14         33 my $old_fh = select();
1524              
1525 14 50 33     59 if ( !$write_to_h && exists( $self->{'default_write_to'} ) ) {
1526 0   0     0 $write_to_h ||= $self->{'default_write_to'};
1527             }
1528              
1529 14 50       40 if ( defined($write_to_h) ) {
1530 0 0       0 if ( ref($write_to_h) eq 'GLOB' ) {
    0          
1531 0         0 select($write_to_h);
1532             } elsif ( !ref($write_to_h) ) {
1533 0         0 my $write_to = $write_to_h;
1534 0         0 $write_to_h = undef;
1535 0 0       0 open( $write_to_h, ">", $write_to ) || croak("Invalid argument for write().");
1536 0         0 $close_write_to = 1;
1537 0         0 select($write_to_h);
1538             } else {
1539 0         0 croak("Invalid argument for write().");
1540             }
1541             } ## end if (defined($write_to_h...))
1542              
1543 14   50     42 $self->{'headers'}{"${slash}delimiter"} ||= 'comma';
1544 14         33 my $actual_delim = lc( $self->{'headers'}{"${slash}delimiter"} );
1545 14 100       42 if ( $actual_delim eq 'comma' ) {
    50          
    0          
    0          
1546 2         5 $actual_delim = ',';
1547             } elsif ( $actual_delim eq 'space' ) {
1548 12         18 $actual_delim = ' ';
1549             } elsif ( $actual_delim eq 'tab' ) {
1550 0         0 $actual_delim = "\t";
1551             } elsif ( $actual_delim eq 'semicolon' ) {
1552 0         0 $actual_delim = ';';
1553             } else {
1554 0         0 $actual_delim = ',';
1555 0         0 $self->{'headers'}{"${slash}delimiter"} = 'comma';
1556             }
1557              
1558 14 50       38 my $missing = ( exists( $self->{'missing'} ) ? $self->{'missing'} : $DEFAULT_MISSING );
1559 14 50       30 my $bdl = ( exists( $self->{'below_detection_limit'} ) ? $self->{'below_detection_limit'} : $DEFAULT_BDL );
1560 14 50       32 my $adl = ( exists( $self->{'above_detection_limit'} ) ? $self->{'above_detection_limit'} : $DEFAULT_ADL );
1561              
1562 14 100       40 if ( $self->{'options'}{'preserve_header'} ) {
1563 1         3 print join("\n", @{ $self->{'preserved_header'} }, '');
  1         53  
1564             } else {
1565 13 100       32 if ( !exists( $self->{'headers'}{"${slash}begin_header"} ) ) {
1566 3         75 print "/begin_header\n";
1567             }
1568              
1569 13         22 my $add_missing_headers = $self->{'options'}{'add_empty_headers'};
1570 13 100 66     35 if ( $add_missing_headers && $add_missing_headers eq '1' ) {
1571 1         2 $add_missing_headers = 'NA';
1572             }
1573 13 100       28 if ( !$self->{'options'}{'preserve_case'} ) {
1574 1   50     3 $add_missing_headers = lc( $add_missing_headers || '' );
1575             }
1576            
1577 13         14 my @headers_to_print;
1578            
1579 13         65 @headers_to_print = @ALL_HEADERS;
1580 13 100 66     58 if ($missing eq $adl || ($self->{'options'}{'missing_data_to_undef'} && !$self->{'options'}{'preserve_detection_limits'})){
      100        
1581 12         120 @headers_to_print = grep(!/above_detection_limit/i, @headers_to_print);
1582             }
1583 13 100 66     50 if ($missing eq $bdl || ($self->{'options'}{'missing_data_to_undef'} && !$self->{'options'}{'preserve_detection_limits'})){
      100        
1584 12         86 @headers_to_print = grep(!/below_detection_limit/i, @headers_to_print);
1585             }
1586            
1587 13 100       17 unless (grep($_ ne 'unitless', @{ $self->{'actual_units'} })){
  13         43  
1588 6         58 @headers_to_print = grep(!/units/i, @headers_to_print);
1589             }
1590              
1591 13         25 foreach my $header (@headers_to_print) {
1592 412 100       1061 if ( $header eq 'missing' ) {
    100          
    100          
    100          
    50          
1593 13         18 while ( my ( $h, $k ) = each( %{ $self->{'headers'} } ) ) {
  166         411  
1594 153         210 ( my $header_no_slash = $h ) =~ s"^/"";
1595 153 50   3197   390 if ( ( firstidx { $_ eq $header_no_slash } @ALL_HEADERS ) < 0 ) {
  3197         3410  
1596 0         0 print "/$h=$k\n";
1597             }
1598             } ## end while (my ($h, $k) = each...)
1599              
1600 13         17 foreach my $comment ( @{ $self->{'comments'} } ) {
  13         26  
1601 18         92 print "$comment\n";
1602             }
1603 13 100 100     18 if ( !@{ $self->{'comments'} } && $add_missing_headers ) {
  13         46  
1604 1         8 print "! Comments: \n!\n";
1605             }
1606 13 100       34 if ( !exists( $self->{'headers'}{"$slash$header"} ) ) {
1607 3         25 print "/missing=$missing\n";
1608             } else {
1609 10         74 print '/', $header, '=', $self->{'headers'}{"$slash$header"}, "\n";
1610             }
1611             } elsif ( $header eq 'fields' ) {
1612 13         23 print "/$header=", join( ',', @{ $self->{'actual_fields'} } ), "\n";
  13         76  
1613             } elsif ( $header eq 'units' ) {
1614 7         10 print "/$header=", join( ',', @{ $self->{'actual_units'} } ), "\n";
  7         31  
1615             } elsif ( exists( $self->{'headers'}{"$slash$header"} ) ) {
1616 111 100       267 if ( $header =~ /_header/ ) {
    100          
    50          
1617 20         387 print "/$header\n";
1618             } elsif (length($self->{'headers'}{"$slash$header"})) {
1619 65         107 my $v = $self->{'headers'}{"$slash$header"};
1620 65 100       134 if ( $header =~ /_latitude|_longitude/ ) {
    100          
1621 8         33 print "/$header=$v\[deg]\n";
1622             } elsif ( $header =~ /_time/ ) {
1623 4         15 print "/$header=$v\[gmt]\n";
1624             } else {
1625 53         239 print "/$header=$v\n";
1626             }
1627             # print '/', $header, '=', $self->{'headers'}{"$slash$header"}, "\n";
1628             } elsif ($add_missing_headers) {
1629 26 100       49 next if ($OMIT_EMPTY_HEADERS{$header});
1630              
1631 25   33     58 my $value = $HEADER_DEFAULTS{$header} || $add_missing_headers;
1632 25 100       53 if ( $header =~ /_latitude|_longitude/ ) {
    100          
1633 4         19 print "/$header=$value\[deg]\n";
1634             } elsif ( $header =~ /_time/ ) {
1635 2         8 print "/$header=$value\[gmt]\n";
1636             } else {
1637 19         102 print "/$header=$value\n";
1638             }
1639             }
1640             } elsif ($add_missing_headers) {
1641 0 0       0 next if ($OMIT_EMPTY_HEADERS{$header});
1642              
1643 0   0     0 my $value = $HEADER_DEFAULTS{$header} || $add_missing_headers;
1644 0 0       0 if ( $header =~ /_latitude|_longitude/ ) {
    0          
1645 0         0 print "/$header=$value\[deg]\n";
1646             } elsif ( $header =~ /_time/ ) {
1647 0         0 print "/$header=$value\[gmt]\n";
1648             } else {
1649 0         0 print "/$header=$value\n";
1650             }
1651             } ## end elsif ($add_missing_headers)
1652             } ## end foreach my $header (@ALL_HEADERS)
1653              
1654 13 100       54 if ( !exists( $self->{'headers'}{"${slash}end_header"} ) ) {
1655 3         13 print "/end_header\n";
1656             }
1657             } ## end else [ if ($self->{'options'}...)]
1658              
1659 14         49 $self->rewind();
1660              
1661 14         39 while ( my $row = $self->next() ) {
1662 24         34 my @values;
1663 24         30 foreach my $field ( @{ $self->{'actual_fields'} } ) {
  24         47  
1664 118 100       237 push( @values, ( defined( $row->{$field} ) ? $row->{$field} : $missing ) );
1665             }
1666 24         215 print join( $actual_delim, @values ), "\n";
1667             } ## end while (my $row = $self->next...)
1668              
1669 14         49 select($old_fh);
1670 14 50       36 if ($close_write_to) {
1671 0         0 close($write_to_h);
1672             }
1673             } else {
1674 1         54 croak("Error(s) writing file");
1675             }
1676 14         47 return;
1677             } ## end sub write
1678              
1679             =head2 close()
1680              
1681             If a file handle is opened for reading, this function closes it. This is
1682             automatically called when the object is destroyed. This is useful to replace
1683             the file being read with the current changes.
1684              
1685             =cut
1686              
1687             sub close {
1688 141     141 1 256 my ($self) = @_;
1689 141 100       331 if ( $self->{'handle'} ) {
1690 131         425 my $ret = close( $self->{'handle'} );
1691 131         374 delete( $self->{'handle'} );
1692 131         1946 return $ret;
1693             } else {
1694 10         64 return;
1695             }
1696             } ## end sub close
1697              
1698             =head2 make_data_hash($line [,\@field_list])
1699              
1700             my %row = $sb_file->make_data_hash("1.5,2,2.5");
1701             my %row = $sb_file->make_data_hash("1.5,2,2.5", [qw(lat lon sal)]);
1702             my %row = $sb_file->make_data_hash("1.5,2,2.5", [$sb_file->fields()]);
1703             my %row = $sb_file->make_data_hash("1.5,2,2.5", [$sb_file->actual_fields()]);
1704              
1705             For mostly internal use. This function parses a data line. It first splits
1706             the data via the delimiter, assigns a field to each value, and returns a hash
1707             or hash reference.
1708              
1709             If C<@field_list> is not set, C<< $sb_file->fields() >> is used.
1710              
1711             If a delimiter is not set (a blank file was created, a file without a
1712             /delimiter header is read, etc), the delimiter is guessed and set using
1713             L.
1714              
1715             Cs if the delimiter could not be guessed or the number of fields the
1716             line is split into does not match up with the field list.
1717              
1718             =cut
1719              
1720             sub make_data_hash {
1721 266     266 1 544 my ( $self, $line, $field_list ) = @_;
1722 266 50 66     591 if ( !$self->{'delim'} && !$self->guess_delim($line) ) {
1723 0         0 croak("Need a delimiter");
1724             }
1725 266         1744 my @values = split( $self->{'delim'}, $line );
1726 266   33     1069 $field_list ||= $self->{'fields'};
1727              
1728 266         313 my ( $num_expected, $num_got ) = ( scalar( @{ $self->{'fields'} } ), scalar(@values) );
  266         531  
1729 266 50       521 if ( $num_expected != $num_got ) {
1730 0         0 croak("Incorrect number of fields or elements: got $num_got, expected $num_expected");
1731             }
1732              
1733 266         337 my %ret;
1734              
1735 266         1351 my $iterator = each_arrayref( $field_list, \@values );
1736 266         1053 while ( my ( $k, $v ) = $iterator->() ) {
1737 1869 100       2898 if ( $self->{'options'}{'missing_data_to_undef'} ) {
1738 958 100 100     8706 if ( $self->{'missing_is_number'} && looks_like_number($v) && $v == $self->{'missing'} ) {
    100 100        
    100 100        
    100 100        
      100        
      100        
      100        
      100        
      33        
      66        
      66        
1739 68         217 $ret{$k} = undef;
1740             } elsif ( !$self->{'options'}{'preserve_detection_limits'} && $self->{'adl_is_number'} && looks_like_number($v) && $v == $self->{'above_detection_limit'} ){
1741 1         3 $ret{$k} = undef;
1742             } elsif ( !$self->{'options'}{'preserve_detection_limits'} && $self->{'bdl_is_number'} && looks_like_number($v) && $v == $self->{'below_detection_limit'} ){
1743 1         4 $ret{$k} = undef;
1744             } elsif ($v eq $self->{'missing'} || (!$self->{'options'}{'preserve_detection_limits'} && ($v eq $self->{'below_detection_limit'} || $v eq $self->{'above_detection_limit'}))) {
1745 8         31 $ret{$k} = undef;
1746             } else {
1747 880         3113 $ret{$k} = $v;
1748             }
1749             } else {
1750 911         2584 $ret{$k} = $v;
1751             }
1752             } ## end while (my ($k, $v) = $iterator...)
1753 266         695 $self->add_and_remove_fields( \%ret );
1754 266 50       453 if (wantarray) {
1755 0         0 return %ret;
1756             } else {
1757 266         1201 return \%ret;
1758             }
1759             } ## end sub make_data_hash
1760              
1761             =head2 AUTOLOAD
1762              
1763             print $sb_file->missing();
1764             print $sb_file->dataidx();
1765             print $sb_file->actual_fields();
1766             ...
1767              
1768             Returns a few internal variables. The accessor is read only, but some
1769             variables can be returned as a reference, and can be modified afterwards.
1770             Though, do it knowing this is a terrible idea.
1771              
1772             If the variable retrieved is an array or hash reference and this is called in a
1773             list context, the variable is dereferenced first.
1774              
1775             Here are a few "useful" variables:
1776              
1777             =over 4
1778              
1779             =item * dataidx
1780              
1781             The current row index.
1782              
1783             =item * max_dataidx
1784              
1785             The highest row index read so far.
1786              
1787             =item * fields
1788              
1789             An array of the original fields.
1790              
1791             =item * actual_fields
1792              
1793             An array of the current fields, as modified by C or C.
1794              
1795             =item * delim
1796              
1797             The regex used to split data lines.
1798              
1799             =item * missing
1800              
1801             The null/fill/missing value of the SeaBASS file.
1802              
1803             =item * delim
1804              
1805             The current line delimiter regex.
1806              
1807             =back
1808              
1809             =cut
1810              
1811             sub AUTOLOAD {
1812 130     130   242 my $self = shift;
1813 130 50       209 if ( !ref($self) ) {
1814 0         0 croak("$self is not an object");
1815             }
1816              
1817 130         165 my $name = $AUTOLOAD;
1818 130 50       242 if ($name) {
1819 130         368 $name =~ s/.*://;
1820 130         209 my $value = $self->{$name};
1821 130 100       203 if ( !defined($value) ) {
1822 1         5 return;
1823             }
1824 129 100 66     388 if ( ref($value) eq 'ARRAY' && wantarray ) {
    50 33        
1825 125         140 return @{$value};
  125         404  
1826             } elsif ( ref($value) eq 'HASH' && wantarray ) {
1827 0         0 return %{$value};
  0         0  
1828             }
1829 4         17 return $value;
1830             } ## end if ($name)
1831             } ## end sub AUTOLOAD
1832              
1833             sub DESTROY {
1834 141     141   67258 my $self = shift;
1835 141         362 $self->close();
1836             }
1837              
1838             =head1 INTERNAL METHODS
1839              
1840             =head2 check_options()
1841              
1842             For internal use only. This function is in charge of checking the options to
1843             make sure they are of the right type (array/hash reference where appropriate).
1844              
1845             If add_empty_headers is set, this function turns off C.
1846              
1847             Called by the object, accepts no arguments.
1848              
1849             =cut
1850              
1851             #<<< perltidy destroys this function
1852             sub check_options {
1853 140     140 1 212 my $self = shift;
1854 140         217 while (my ($k, $v) = each(%{$self->{'options'}})) {
  2077         4811  
1855 1941 100       4553 if (!exists($DEFAULT_OPTIONS{$k})) {
    100          
1856 2         214 croak("Option not understood: $k");
1857 2209     2209   5663 } elsif ((firstidx { $_ eq ref($v) } @{$OPTION_TYPES{$k}}) < 0) {
  1939         4129  
1858 2         4 my $expected_ref = join('/', @{$OPTION_TYPES{$k}});
  2         5  
1859 2 100       192 croak("Option $k not of the right type, expected: " . ($expected_ref ? "$expected_ref reference" : 'scalar'));
1860             } ## end elsif ((firstidx { $_ eq ...}))
1861             } ## end while (my ($k, $v) = each...)
1862 136 100       361 if ($self->{'options'}{'add_empty_headers'}) {
1863 1         5 $self->{'options'}{'strict'} &= STRICT_READ;
1864             }
1865             } ## end sub check_options
1866             #>>>
1867              
1868             =head2 create_blank_file()
1869              
1870             For internal use only. C populates the object with proper
1871             internal variables, as well as adding blank headers if C is
1872             set.
1873              
1874             By default, the missing value is set to C<$DEFAULT_MISSING> (C<-999>).
1875              
1876             This function turns on the C option, as C must be enabled to
1877             write.
1878              
1879             The delimiter is left undefined and will be guessed upon reading the first data
1880             line using the L function.
1881              
1882             Called by the object, accepts no arguments.
1883              
1884             =cut
1885              
1886             sub create_blank_file {
1887 4     4 1 7 my ($self) = @_;
1888 4         7 $self->{'actual_fields'} = [];
1889 4         7 $self->{'actual_units'} = [];
1890 4         7 $self->{'fields'} = [];
1891 4         6 $self->{'units'} = [];
1892 4         8 $self->{'headers'} = {};
1893 4         5 $self->{'comments'} = [];
1894 4         11 $self->{'data'} = [];
1895 4         5 $self->{'dataidx'} = -1;
1896 4         8 $self->{'max_dataidx'} = -1;
1897 4         6 $self->{'delim'} = undef;
1898 4         5 $self->{'missing'} = $DEFAULT_MISSING;
1899 4         8 $self->{'below_detection_limit'} = $DEFAULT_BDL;
1900 4         5 $self->{'above_detection_limit'} = $DEFAULT_ADL;
1901              
1902 4         5 $self->{'options'}{'cache'} = 1;
1903 4         4 $self->{'options'}{'fill_ancillary_data'} = 0;
1904              
1905 4 100       10 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
1906 4 100       8 if ($self->{'options'}{'add_empty_headers'}) {
1907 1         4 foreach (@ALL_HEADERS) {
1908 34 50       56 if ( !exists( $self->{'headers'}{"${slash}$_"} ) ) {
1909 34         54 $self->{'headers'}{"${slash}$_"} = '';
1910 34 100       51 if ( $_ eq 'missing' ) {
1911 1         3 $self->{'headers'}{"${slash}missing"} = $DEFAULT_MISSING;
1912             }
1913             } ## end if (!exists($self->{'headers'...}))
1914             } ## end foreach (@ALL_HEADERS)
1915             } ## end if ($add_missing_headers)
1916              
1917 4         6 my $success = 1;
1918 4 50       8 if ( $self->{'options'}{'default_headers'} ) {
1919 4         9 $success &= $self->add_headers( $self->{'options'}{'default_headers'} );
1920             }
1921 4 50       11 if ( $self->{'options'}{'headers'} ) {
1922 4         9 $success &= $self->add_headers( $self->{'options'}{'headers'} );
1923             }
1924 4 50       12 unless ($success) {
1925 0         0 croak("Error creating blank file.");
1926             }
1927             } ## end sub create_blank_file
1928              
1929             =head2 read_headers()
1930              
1931             For internal use only. C reads the metadata at the beginning of
1932             a SeaBASS file.
1933              
1934             Called by the object, accepts no arguments.
1935              
1936             =cut
1937              
1938             sub read_headers {
1939 131     131 1 207 my $self = shift;
1940              
1941 131 50       304 if ( $self->{'headers'} ) {
1942 0         0 return;
1943             }
1944              
1945 131 100       310 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
1946 131         197 my $success = 1;
1947 131         192 my @comments;
1948              
1949 131         228 $self->{'headers'} = {};
1950 131         268 $self->{'comments'} = [];
1951              
1952 131 50       285 if ( $self->{'options'}{'default_headers'} ) {
1953 131         344 $success &= $self->add_headers( $self->{'options'}{'default_headers'} );
1954             }
1955              
1956 131         206 my $handle = $self->{'handle'};
1957 131         194 my $position = my $line_number = 0;
1958 131         170 my @header_lines;
1959              
1960 131         210 my $strict = $self->{'options'}{'strict'};
1961              
1962 131         565 while ( my $line = <$handle> ) {
1963 3287         3802 $line_number++;
1964 3287         5249 strip($line);
1965 3287 50       5345 if ($line) {
1966 3287 100       8196 if ( $line =~ m'^(/end_header)\@?$'i ) {
    100          
    100          
1967 116         339 push( @header_lines, $1 );
1968 116         183 $position = tell($handle);
1969 116         228 last;
1970             } elsif ( $line =~ m"^/" ) {
1971 2460         3564 push( @header_lines, $line );
1972             } elsif ( $line =~ m"^!" ) {
1973 696         963 push( @comments, $line );
1974 696 100       1112 if ( $self->{'options'}{'preserve_header'} ) {
1975 10         14 push( @header_lines, $line );
1976             }
1977             } else { #TODO: search ahead for more headers or comments (in case of merely comment missing !) and fail if READ_STRICT
1978 15         37 seek( $handle, $position, SEEK_SET );
1979 15 50       34 if ( $strict & STRICT_READ ) {
1980 0         0 carp("File missing /end_header or comment missing !, assuming data start: line #$line_number ($line)");
1981             }
1982 15         25 last;
1983             }
1984             } ## end if ($line)
1985 3156         7355 $position = tell($handle);
1986             } ## end while (my $line = <$handle>)
1987              
1988             # add_headers looks at STRICT_WRITE, not STRICT_READ
1989 131 100       273 if ( $strict & STRICT_READ ) {
1990 5         12 $self->{'options'}{'strict'} |= STRICT_WRITE;
1991             } else {
1992 126         272 $self->{'options'}{'strict'} = 0;
1993             }
1994              
1995 131 100       300 if ( $self->{'options'}{'preserve_header'} ) {
1996 1         6 $self->{'preserved_header'} = [@header_lines];
1997             }
1998              
1999 131         310 $success &= $self->add_headers( \@header_lines );
2000              
2001             # restore strictness
2002 131         254 $self->{'options'}{'strict'} = $strict;
2003              
2004 131 50       307 if ( $self->{'options'}{'headers'} ) {
2005 131         304 $success &= $self->add_headers( $self->{'options'}{'headers'} );
2006             }
2007              
2008 131         197 my %headers = %{ $self->{'headers'} };
  131         1307  
2009              
2010 131 100       446 if ( $self->{'options'}{'preserve_comments'} ) {
2011 130         192 push( @{ $self->{'comments'} }, @comments );
  130         392  
2012             }
2013              
2014 131   66     441 my $missing = $headers{"${slash}missing"} || $DEFAULT_MISSING;
2015 131 100       316 if ( $self->{'options'}{'delete_missing_headers'} ) {
2016 1         6 while ( my ( $k, $v ) = each(%headers) ) {
2017 31 100       84 if ( $k =~ m"/?(?:end|begin)_header$|^/?missing$" ) {
2018 3         9 next;
2019             }
2020 28 100 66     132 if ( !defined($v) || $v =~ m"^n/?a(?:\[.*?\])?$"i || lc($v) eq lc($missing) ) {
      66        
2021 7         19 delete( $headers{$k} );
2022             }
2023             } ## end while (my ($k, $v) = each...)
2024             } ## end if ($self->{'options'}...)
2025              
2026 131 100       287 if ( $strict & STRICT_READ ) {
2027 5         15 foreach (@REQUIRED_HEADERS) {
2028 110 100       3508 if ( !exists( $headers{"${slash}$_"} ) ) {
2029 58         90 $success = 0;
2030 58         5317 carp("Missing required header: $_");
2031             }
2032             } ## end foreach (@REQUIRED_HEADERS)
2033 5         28 while ( my ( $header, $value ) = each(%headers) ) {
2034 72 50       132 if ($slash) {
2035 0         0 $header =~ s"^/"";
2036             }
2037 72 50 33 1338   214 if ( ( firstidx { $_ eq $header } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $header } @HIDDEN_HEADERS ) < 0 ) {
  1338         1721  
  0         0  
2038 0         0 $success = 0;
2039 0         0 carp("$header not a standard header.");
2040             }
2041             } ## end while (my ($header, $value...))
2042 5 50 33     37 if ( $headers{"${slash}begin_header"} || $headers{"${slash}end_header"} ) {
2043 0         0 $success = 0;
2044 0         0 carp("begin_ or end_header incorrect");
2045             }
2046             } ## end if ($strict & STRICT_READ)
2047 131         268 foreach (@ABSOLUTELY_REQUIRED_HEADERS) {
2048 131 100       415 if ( !exists( $headers{"${slash}$_"} ) ) {
2049 1         2 $success = 0;
2050 1 50       3 if ( $strict & STRICT_READ ) {
2051 1         93 carp("Missing absolutely required header: $_");
2052             }
2053             }
2054             } ## end foreach (@ABSOLUTELY_REQUIRED_HEADERS)
2055              
2056 131   100     1457 $self->{'fields'} = [ split( /\s*,\s*/, $headers{"${slash}fields"} || '' ) ];
2057 131   100     1160 $self->{'actual_fields'} = [ split( /\s*,\s*/, $headers{"${slash}fields"} || '' ) ];
2058              
2059 131 100       422 if ( $headers{"${slash}units"} ) {
2060 79         546 $self->{'units'} = [ split( /\s*,\s*/, $headers{"${slash}units"} ) ];
2061 79         557 $self->{'actual_units'} = [ split( /\s*,\s*/, $headers{"${slash}units"} ) ];
2062             } else {
2063 52         82 my (@new_units1);
2064 52         73 foreach ( @{ $self->{'fields'} } ) {
  52         104  
2065 366         495 push( @new_units1, 'unitless' );
2066             }
2067 52         140 my @new_units2 = @new_units1;
2068 52         98 $self->{'units'} = \@new_units1;
2069 52         103 $self->{'actual_units'} = \@new_units2;
2070 52         194 $headers{"${slash}units"} = join( ',', @new_units1 );
2071             } ## end else [ if ($headers{"${slash}units"...})]
2072              
2073 131 100       223 if ( @{$self->{'fields'}} != @{$self->{'units'}} ) {
  131         249  
  131         320  
2074 1 50       2 if ( $strict & STRICT_READ ) {
2075 1         77 carp("/fields and /units don't match up");
2076 1         47 $success = 0;
2077             } else {
2078 0         0 while (@{$self->{'fields'}} > @{$self->{'units'}}){
  0         0  
  0         0  
2079 0         0 push(@{$self->{'units'}}, 'unitless');
  0         0  
2080             }
2081 0         0 while (@{$self->{'fields'}} < @{$self->{'units'}}){
  0         0  
  0         0  
2082 0         0 pop(@{$self->{'units'}});
  0         0  
2083             }
2084             }
2085             }
2086              
2087 131 100       309 unless ($success) {
2088 4 50       10 if ( $strict & STRICT_READ ) {
2089 4         365 croak("Error(s) reading SeaBASS file");
2090             } else {
2091 0         0 return;
2092             }
2093             }
2094              
2095 127         239 $self->{'missing'} = $missing;
2096 127   66     547 $self->{'below_detection_limit'} = $headers{"${slash}below_detection_limit"} || $missing;
2097 127   66     518 $self->{'above_detection_limit'} = $headers{"${slash}above_detection_limit"} || $missing;
2098 127         337 $self->{'line_number'} = $line_number;
2099 127         238 $self->{'data_start_line'} = $line_number;
2100 127         302 $self->{'data_start_position'} = $position;
2101              
2102 127 100       383 if ( $self->{'options'}{'cache'} ) {
2103 117         245 $self->{'data'} = [];
2104             }
2105 127         254 $self->{'dataidx'} = -1;
2106 127         202 $self->{'max_dataidx'} = -1;
2107              
2108 127         497 $self->{'headers'} = \%headers;
2109              
2110 127 100       318 if ( $self->{'options'}{'fill_ancillary_data'} ) {
2111 16         27 my @fields_lc = map {lc} @{ $self->{'fields'} };
  110         184  
  16         32  
2112 16         38 $self->{'fields_lc'} = \@fields_lc;
2113 16         28 $self->{'ancillary'} = {};
2114              
2115 16         31 foreach my $field (@FILL_ANCILLARY_DATA) {
2116 208         316 $self->find_ancillaries($field);
2117             }
2118              
2119 16         30 $self->{'case_conversion'} = {};
2120              
2121 16         28 while ( my ( $field, $value ) = each( %{ $self->{'ancillary'} } ) ) {
  159         430  
2122 143     801   325 my $idx = firstidx { $_ eq $field } @{ $self->{'fields_lc'} };
  801         851  
  143         280  
2123 143         278 my $new_field = $field;
2124 143 100       246 if ( $idx >= 0 ) {
2125 77         116 $new_field = $self->{'fields'}[$idx];
2126             }
2127              
2128 143 100       219 if ( ref($value) ) {
2129 29         57 for ( my $i = 1; $i < @$value; $i++ ) {
2130 52         70 my $new_arg = $value->[$i];
2131 52         160 for ( $value->[$i] =~ /\$(\{\w+\}|\w+)/g ) {
2132 37         123 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2133              
2134 37     196   87 my $idx = firstidx { $_ eq $variable } @{ $self->{'fields_lc'} };
  196         210  
  37         79  
2135 37         82 my $new_variable = $self->{'fields'}[$idx];
2136              
2137 37         315 $new_arg =~ s/\$$variable(\W|\b|$)/\$$new_variable$1/g;
2138 37         232 $new_arg =~ s/\$\{$variable\}/\$$new_variable/g;
2139             } ## end for ($value->[$i] =~ /\$(\{\w+\}|\w+)/g)
2140 52         141 $value->[$i] = $new_arg;
2141             } ## end for (my $i = 1; $i <= length...)
2142             } else {
2143 114         126 my $new_value = $value;
2144 114         410 for ( $value =~ /\$(\{\w+\}|\w+)/g ) {
2145 151         527 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2146              
2147 151     740   350 my $idx = firstidx { $_ eq $variable } @{ $self->{'fields_lc'} };
  740         819  
  151         312  
2148 151 100       330 if ( $idx >= 0 ) {
2149 145         252 my $new_variable = $self->{'fields'}[$idx];
2150              
2151 145         1221 $new_value =~ s/\$$variable(\W|\b|$)/\$$new_variable$1/g;
2152 145         1118 $new_value =~ s/\$\{$variable\}/\$$new_variable/g;
2153             } ## end if ($idx >= 0)
2154             } ## end for ($value =~ /\$(\{\w+\}|\w+)/g)
2155 114         235 $value = $new_value;
2156             } ## end else [ if (ref($value)) ]
2157 143 50       255 if ( $field ne $new_field ) {
2158 0         0 delete( $self->{'ancillary'}{$field} );
2159 0         0 $self->{'case_conversion'}{$field} = $new_field;
2160             }
2161 143         295 $self->{'ancillary'}{$new_field} = $value;
2162             } ## end while (my ($field, $value...))
2163              
2164 16         39 delete( $self->{'fields_lc'} );
2165             } ## end if ($self->{'options'}...)
2166              
2167 127 100 100     597 if ($self->{'options'}{'optional_warnings'} && $headers{"${slash}optical_depth_warning"} && $headers{"${slash}optical_depth_warning"} =~ /true/){
      100        
2168 1         157 carp("/optical_depth_warning=true, indicating optically shallow water. Use caution when using this data.");
2169             }
2170              
2171 127         647 return 1;
2172             } ## end sub read_headers
2173              
2174             =head2 validate_header($header, $value, $strict)
2175              
2176             my ($k, $v, $string) = ('investigators','jason_lefler',0)
2177             $sb_file->validate_header($k, $v, $strict);
2178              
2179             For internal use only. C is in charge of properly formatting
2180             key/value pairs to add to the object. This function will modify the input
2181             variables in place to prepare them for use.
2182              
2183             Returns false if there was a problem with the inputs, such as C is set
2184             and an invalid header was passed in.
2185              
2186             C will set C to C<$DEFAULT_MISSING> (C<-999>) if it
2187             is blank or undefined.
2188              
2189             This function will also change the expected delimiter for rows that have not
2190             yet been cached.
2191              
2192             =cut
2193              
2194             sub validate_header {
2195 2590     2590 1 4526 my ( $self, $k, $v, $strict ) = @_;
2196              
2197 2590         2940 my $success = 1;
2198              
2199 2590 100       3597 if ( !defined($v) ) {
2200 207         290 $v = '';
2201             } else {
2202 2383         3106 strip($v);
2203             }
2204              
2205 2590         4337 strip($k);
2206              
2207 2590         3823 $k = lc($k);
2208              
2209 2590 50 66     4923 if ( length($v) == 0 && $k !~ /_header/ ) {
2210 0 0       0 if ($strict) {
2211 0         0 carp("$k missing value");
2212 0         0 $success = 0;
2213             } else {
2214 0         0 $v = "";
2215             }
2216             } ## end if (length($v) == 0 &&...)
2217              
2218 2590 100 100     7569 if ( !$self->{'options'}{'preserve_case'} || $k =~ /fields|units/ ) {
2219 1311         1710 $v = lc($v);
2220             }
2221              
2222 2590 100       3916 if ( $self->{'options'}{'keep_slashes'} ) {
2223 139 100       231 if ( $k =~ m"^[^/]" ) {
2224 2         5 $k = "/$k";
2225             }
2226              
2227 139 50 66 0   231 if ( $strict && ( firstidx { "/$_" eq $k } @ALL_HEADERS ) < 0 && ( firstidx { "/$_" eq $k } @HIDDEN_HEADERS ) < 0 ) {
  12   33     34  
  0         0  
2228 0         0 carp("Invalid header, $k");
2229 0         0 $success = 0;
2230             }
2231             } else {
2232 2451 100       4900 if ( $k =~ m"^/" ) {
2233 2449         5076 $k =~ s"^/"";
2234             }
2235              
2236 2451 50 66 0   4568 if ( $strict && ( firstidx { $_ eq $k } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $k } @HIDDEN_HEADERS ) < 0 ) {
  1354   33     1586  
  0         0  
2237 0         0 carp("Invalid header, $k");
2238 0         0 $success = 0;
2239             }
2240             } ## end else [ if ($self->{'options'}...)]
2241              
2242 2590 100       8029 if ( $k =~ /_latitude|_longitude/){
    100          
    100          
    100          
    100          
    100          
2243 308         627 $v =~ s/\[deg\]$//i;
2244             } elsif ( $k =~ /_time/){
2245 158         392 $v =~ s/\[gmt\]$//i;
2246             } elsif ( $k =~ m"^/?delimiter$" ) {
2247 93 100       254 unless ( $self->set_delim( $strict, $v ) ) {
2248 1 50       3 if ($strict) {
2249 0         0 $success = 0;
2250             }
2251             }
2252             } elsif ( $k =~ m"^/?missing$" ) {
2253 114 50       330 $self->{'missing'} = ( length($v) ? $v : $DEFAULT_MISSING );
2254 114         528 $self->{'missing_is_number'} = looks_like_number( $self->{'missing'} );
2255             } elsif ( $k =~ m"^/?above_detection_limit" ) {
2256 7 50       19 $self->{'above_detection_limit'} = ( length($v) ? $v : $self->{'missing'} );
2257 7         24 $self->{'adl_is_number'} = looks_like_number( $self->{'above_detection_limit'} );
2258             } elsif ( $k =~ m"^/?below_detection_limit" ) {
2259 7 50       20 $self->{'below_detection_limit'} = ( length($v) ? $v : $self->{'missing'} );
2260 7         27 $self->{'bdl_is_number'} = looks_like_number( $self->{'below_detection_limit'} );
2261             }
2262              
2263 2590         3441 $_[1] = $k;
2264 2590         2982 $_[2] = $v;
2265              
2266 2590         3553 return $success;
2267             } ## end sub validate_header
2268              
2269             =head2 set_delim($strict, $delim)
2270              
2271             Takes a string declaring the delim (IE: 'comma', 'space', etc) and updates the
2272             object's internal delimiter regex.
2273              
2274             =cut
2275              
2276             sub set_delim {
2277 93     93 1 143 my $self = shift;
2278 93         129 my $strict = shift;
2279 93   50     214 my $delim = shift || '';
2280 93 50       338 if ( $delim eq 'comma' ) {
    50          
    100          
    50          
    100          
2281 0         0 $delim = qr/\s*,\s*/;
2282             } elsif ( $delim eq 'semicolon' ) {
2283 0         0 $delim = qr/\s*;\s*/;
2284             } elsif ( $delim eq 'space' ) {
2285 91         446 $delim = qr/\s+/;
2286             } elsif ( $delim eq 'tab' ) {
2287 0         0 $delim = qr/\t/;
2288             } elsif ($strict) {
2289 1         107 carp("delimiter not understood");
2290             } else {
2291 1 50       3 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
2292 1         3 $self->{'headers'}{"${slash}delimiter"} = 'comma';
2293 1         3 $delim = undef;
2294             }
2295 93         261 $self->{'delim'} = $delim;
2296 93 100       316 return ( $delim ? 1 : 0 );
2297             } ## end sub set_delim
2298              
2299             =head2 update_fields()
2300              
2301             C runs through the currently cached rows and calls
2302             C on each row. It then updates the /fields and /units
2303             headers in the header hash.
2304              
2305             =cut
2306              
2307             sub update_fields {
2308 28     28 1 48 my ($self) = @_;
2309 28 100 66     116 if ( $self->{'options'}{'cache'} && $self->{'max_dataidx'} >= 0 ) {
2310 4         7 foreach my $hash ( @{ $self->{'data'} } ) {
  4         8  
2311 9         24 $self->add_and_remove_fields($hash);
2312             }
2313             }
2314              
2315 28 100       64 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
2316 28         35 $self->{'headers'}{"${slash}fields"} = join( ',', @{ $self->{'actual_fields'} } );
  28         93  
2317 28         54 $self->{'headers'}{"${slash}units"} = join( ',', @{ $self->{'actual_units'} } );
  28         92  
2318             } ## end sub update_fields
2319              
2320             =head2 add_and_remove_fields(\%row)
2321              
2322             Given a reference to a row, this function deletes any fields removed with
2323             C and adds an undefined or /missing value for each field added
2324             via C. If C is set, an undefined value is
2325             given, otherwise, it is filled with the /missing value.
2326              
2327             If C is set, this function adds missing date, time,
2328             date_time, lat, lon, and depth fields to the retrieved row from the header.
2329              
2330             Needlessly returns the hash reference passed in.
2331              
2332             =cut
2333              
2334             sub add_and_remove_fields {
2335 277     277 1 413 my ( $self, $hash ) = @_;
2336 277         863 foreach my $field ( keys(%$hash) ) {
2337 1916 100   7814   3818 if ( ( firstidx { $_ eq $field } @{ $self->{'actual_fields'} } ) < 0 ) {
  7814         10838  
  1916         3439  
2338 21 50 33 0   45 unless ( $self->{'options'}{'fill_ancillary_data'} && ( firstidx { $_ eq $field } keys( %{ $self->{'ancillary'} } ) ) >= 0 ) {
  0         0  
  0         0  
2339 21         50 delete( $hash->{$field} );
2340             }
2341             }
2342             } ## end foreach my $field (keys(%$hash...))
2343              
2344 277 100       715 my $missing = ( $self->{'options'}{'missing_data_to_undef'} ? undef : $self->{'missing'} );
2345 277         788 while ( my ( $variable, $pad ) = each(%FIELD_FORMATTING) ) {
2346 2216   33     4641 my $case_var = $self->{'case_conversion'}{$variable} || $variable;
2347 2216 50 66     5365 if ( defined( $hash->{$case_var} ) && ( !defined($missing) || $hash->{$case_var} != $missing ) ) {
      100        
2348 57         197 $hash->{$case_var} = sprintf( $pad, $hash->{$case_var} );
2349             }
2350             } ## end while (my ($variable, $pad...))
2351              
2352 277 100       539 if ( defined($self->{'ancillary'}) ) {
2353 19         42 $self->{'ancillary_tmp'} = {};
2354 19         33 for my $variable (@FILL_ANCILLARY_DATA) {
2355 247 100       403 if ( defined($self->{'ancillary'}{$variable}) ) {
2356 173         296 my $value = $self->extrapolate_variables( $missing, $self->{'ancillary'}{$variable}, $hash );
2357 173 50       293 if ( defined($value) ) {
2358 173         327 $hash->{$variable} = $value;
2359             }
2360             } ## end if ($self->{'ancillary'...})
2361             } ## end for my $variable (@FILL_ANCILLARY_DATA)
2362             } ## end if ($self->{'ancillary'...})
2363              
2364 277         330 foreach my $field ( @{ $self->{'actual_fields'} } ) {
  277         462  
2365 1972 100       3020 if ( !exists( $hash->{$field} ) ) {
2366 61         122 $hash->{$field} = $missing;
2367             }
2368             }
2369              
2370 277         409 return $hash;
2371             } ## end sub add_and_remove_fields
2372              
2373             =head2 guess_delim($line)
2374              
2375             C is is used to guess the delimiter of a line. It is not very
2376             intelligent. If it sees any commas, it will assume the delimiter is a comma.
2377             Then, it checks for tabs, spaces, then semi-colons. Returns 1 on success. If
2378             it doesn't find any, it will throw a warning and return undef.
2379              
2380             =cut
2381              
2382             sub guess_delim {
2383 35     35 1 65 my ( $self, $line ) = @_;
2384 35         55 my $delim_string = '';
2385 35 100       187 if ( $line =~ /,/ ) {
    50          
    50          
    0          
2386 2         37 my $delim = qr/\s*,\s*/;
2387 2         3 $self->{'delim'} = $delim;
2388 2         4 $delim_string = 'comma';
2389             } elsif ( $line =~ /\t/ ) {
2390 0         0 my $delim = qr/\t/;
2391 0         0 $self->{'delim'} = $delim;
2392 0         0 $delim_string = 'tab';
2393             } elsif ( $line =~ /\s+/ ) {
2394 33         136 my $delim = qr/\s+/;
2395 33         66 $self->{'delim'} = $delim;
2396 33         56 $delim_string = 'space';
2397             } elsif ( $line =~ /;/ ) {
2398 0         0 my $delim = qr/\s*;\s*/;
2399 0         0 $self->{'delim'} = $delim;
2400 0         0 $delim_string = 'semicolon';
2401             } else {
2402 0         0 carp("No delimiter defined or can be guessed");
2403 0         0 return;
2404             }
2405 35 100       122 $self->{'headers'}{ ( $self->{'options'}{'keep_slashes'} ? '/' : '' ) . 'delimiter' } = $delim_string;
2406 35         128 return 1;
2407             } ## end sub guess_delim
2408              
2409             =head2 ingest_row(\%data_row | \@data_row | $data_row | %data_row)
2410              
2411             For mostly internal use, parses arguments for C, C, and C
2412             and returns a hash or hash reference of the data row. Given a hash reference,
2413             it will merely return it.
2414              
2415             Given an array or array reference, it will assume each element is a field as
2416             listed in either C or C. If the number of elements
2417             matches C, it uses assumes it's that. If it doesn't match, it is
2418             tried to match against C. If it doesn't match either, a warning is
2419             issued and the return is undefined.
2420              
2421             Given a non-reference scalar, it will split the scalar based on the current
2422             delimiter. If one is not defined, it is guessed. If it cannot be guessed, the
2423             return is undefined.
2424              
2425             If the inputs are successfully parsed, all keys are turned lowercase.
2426              
2427             =cut
2428              
2429             sub ingest_row {
2430 27     27 1 35 my $self = shift;
2431 27         34 my %new_row;
2432 27 100       68 if ( $#_ < 0 ) {
2433 1         89 carp("Incorrect number of arguments to ingest_row()");
2434 1         75 return;
2435             }
2436 26         36 my $arrayref;
2437 26 100       76 if ( ref( $_[0] ) eq 'HASH' ) {
    100          
    50          
2438 14         19 %new_row = %{ shift(@_) };
  14         52  
2439             } elsif ( ref( $_[0] ) eq 'ARRAY' ) {
2440 3         4 $arrayref = $_[0];
2441             } elsif ( !ref( $_[0] ) ) {
2442 9 100       21 if ( $#_ == 0 ) {
    100          
2443 7 50 66     23 if ( !$self->{'delim'} && !$self->guess_delim( $_[0] ) ) {
2444 0         0 return;
2445             }
2446 7         49 $arrayref = [ split( $self->{'delim'}, $_[0] ) ];
2447             } elsif ( $#_ % 2 == 1 ) {
2448 1         3 %new_row = @_;
2449             } else {
2450 1         91 carp('Even sized list, scalar, or hash/array reference expected');
2451 1         78 return;
2452             }
2453             } else {
2454 0         0 carp("Arguments to ingest_row() not understood.");
2455 0         0 return;
2456             }
2457              
2458 25 100       63 if ($arrayref) {
2459 10         12 my $iterator;
2460 10 100       11 if ( scalar( @{ $self->{'actual_fields'} } ) == scalar( @{$arrayref} ) ) {
  10 50       14  
  10         19  
2461 8         38 $iterator = each_arrayref( $self->{'actual_fields'}, $arrayref );
2462 2         4 } elsif ( scalar( @{ $self->{'fields'} } ) == scalar( @{$arrayref} ) ) {
  2         5  
2463 2         6 $iterator = each_arrayref( $self->{'fields'}, $arrayref );
2464 2         4 $self->add_and_remove_fields( \%new_row );
2465             } else {
2466 0         0 my $actual_fields = scalar( @{ $self->{'actual_fields'} } );
  0         0  
2467 0         0 my $fields = scalar( @{ $self->{'fields'} } );
  0         0  
2468 0 0       0 if ( $actual_fields == $fields ) {
2469 0         0 carp("Invalid number of elements, expected $fields");
2470             } else {
2471 0         0 carp("Invalid number of elements, expected $actual_fields or $fields");
2472             }
2473 0         0 return;
2474             } ## end else [ if (scalar(@{$self->{'actual_fields'...}}))]
2475 10         43 while ( my ( $k, $v ) = $iterator->() ) {
2476 52         163 $new_row{$k} = $v;
2477             }
2478             } ## end if ($arrayref)
2479              
2480 25         66 %new_row = map { lc($_) => $new_row{$_} } keys %new_row;
  71         178  
2481              
2482 25 50       67 if (wantarray) {
2483 0         0 return %new_row;
2484             } else {
2485 25         60 return \%new_row;
2486             }
2487             } ## end sub ingest_row
2488              
2489             =head2 find_ancillaries($field_name)
2490              
2491             Used by C to traverse through a field's possible
2492             substitutes in C<%ANCILLARY> and try to find the most suitable replacement.
2493             Values of fields in C<%ANCILLARY> are array references, where each element is
2494             either:
2495              
2496             =over 4
2497              
2498             =item * a string of existing field names used to create the value
2499              
2500             =item * an array reference of the form [converter function, parsing regex
2501             (optional), arguments to converter, ... ]
2502              
2503             =item * a hash reference of the form { header => qr/parsing_regex/ }
2504              
2505             =back
2506              
2507             If the element is an array reference and an argument requires a field from the
2508             file, all arguments are parsed and the variables within them extrapolated, then
2509             the array is put into C<< $self->{'ancillary'} >>.
2510              
2511             If no value can be ascertained, it will not be added to the data rows.
2512              
2513             The value found is stored in C<< $self->{'ancillary'} >>. Returns 1 on
2514             success, 0 if the field cannot be filled in.
2515              
2516             =cut
2517              
2518             sub find_ancillaries {
2519 406     406 1 564 my ( $self, $field ) = @_;
2520 406 100       668 if ( $self->{'ancillary'}{$field} ) {
2521 99         160 return 1;
2522             }
2523 307     1833   746 my $idx = firstidx { $_ eq $field } @{ $self->{'fields_lc'} };
  1833         1908  
  307         622  
2524 307 100       715 if ( $idx >= 0 ) {
2525 77         175 $self->{'ancillary'}{$field} = "\$\{$field\}";
2526 77         140 return 1;
2527             }
2528              
2529 230 50       420 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
2530 230 100       255 foreach my $attempt ( @{ $ANCILLARY{$field} || [] } ) {
  230         472  
2531 416 100       715 if ( ref($attempt) eq 'HASH' ) {
    100          
    50          
2532 284         318 keys( %{$attempt} ); #reset each() iterator between calls
  284         360  
2533 284         336 while ( my ( $where, $regex ) = each( %{$attempt} ) ) {
  541         1188  
2534 284 100 66     909 if ( $where =~ /^\$/ ) {
    100 66        
2535 122 100   690   517 if ( ( firstidx { "\$$_" eq $where } $self->fields() ) >= 0 ) {
  690         1058  
2536 15     33   54 $self->{'ancillary'}{$field} = [ sub { return shift; }, $regex, $where ];
  33         52  
2537 15         52 return 1;
2538             }
2539             } elsif ( defined( $self->{'headers'}{"$slash$where"} ) && $self->{'headers'}{"$slash$where"} =~ $regex && lc($1) ne 'na' ) {
2540 12         32 $self->{'ancillary'}{$field} = $1;
2541 12         29 return 1;
2542             }
2543             } ## end while (my ($where, $regex...))
2544             } elsif ( ref($attempt) eq 'ARRAY' ) {
2545 89         152 my @attempt = @$attempt;
2546 89         121 my $function = shift(@attempt);
2547 89         99 my $regex;
2548 89 100       144 if ( ref( $attempt[0] ) eq 'Regexp' ) {
2549 41         51 $regex = shift(@attempt);
2550             }
2551 89         99 my $success = 1;
2552 89         91 my @args;
2553 89         105 foreach my $argument (@attempt) {
2554 89         103 my $tmparg = $argument;
2555 89         363 for ( $argument =~ /\$(\{\w+\}|\w+)/g ) {
2556 115         284 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2557 115         199 $success &= $self->find_ancillaries($variable);
2558 115 100       161 if ($success) {
2559 40 50       64 if ( ref( $self->{'ancillary'}{$variable} ) ) {
2560 0         0 $tmparg =~ s/\$$variable(\W|\b|$)/\$\{$variable\}$1/g;
2561             } else {
2562 40         57 my $value = $self->{'ancillary'}{$variable};
2563 40         396 $tmparg =~ s/\$$variable(\W|\b|$)/$value$1/g;
2564 40         251 $tmparg =~ s/\$\{$variable\}/$value/g;
2565             }
2566             } else {
2567 75         92 last;
2568             }
2569             } ## end for ($argument =~ /\$(\{\w+\}|\w+)/g)
2570 89         162 push( @args, $tmparg );
2571             } ## end foreach my $argument (@attempt)
2572 89 100       172 if ($success) {
2573 14 100       23 if ($regex) {
2574 8         12 unshift( @args, $regex );
2575             }
2576 14         41 $self->{'ancillary'}{$field} = [ $function, @args ];
2577 14         37 return 1;
2578             } ## end if ($success)
2579             } elsif ( !ref($attempt) ) {
2580 43         48 my $success = 1;
2581 43         55 my $tmparg = $attempt;
2582 43         218 for ( $attempt =~ /\$(\{\w+\}|\w+)/g ) {
2583 83         501 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2584 83         175 $success &= $self->find_ancillaries($variable);
2585 83 100       141 if ($success) {
2586 65 100       112 if ( ref( $self->{'ancillary'}{$variable} ) ) {
2587 6         44 $tmparg =~ s/\$$variable(\W|\b|$)/\$\{$variable\}$1/g;
2588             } else {
2589 59         78 my $value = $self->{'ancillary'}{$variable};
2590 59         886 $tmparg =~ s/\$$variable(\W|\b|$)/$value$1/g;
2591 59         505 $tmparg =~ s/\$\{$variable\}/$value/g;
2592             }
2593             } else {
2594 18         17 last;
2595             }
2596             } ## end for ($attempt =~ /\$(\{\w+\}|\w+)/g)
2597 43 100       96 if ($success) {
2598 25         55 $self->{'ancillary'}{$field} = $tmparg;
2599 25         88 return 1;
2600             }
2601             } ## end elsif (!ref($attempt))
2602             } ## end foreach my $attempt (@{$ANCILLARY...})
2603              
2604 164         257 return 0;
2605             } ## end sub find_ancillaries
2606              
2607             =head2 extrapolate_variables($missing, $expression, \%row)
2608              
2609             Used by C to convert a parsed ancillary string, such as
2610             C<'$year$month$day'>, into a real value using the fields from the C<\%row>.
2611             C<$expression>s are strings figured out by C and stored in
2612             C<< $self->{'ancillary'} >>.
2613              
2614             The return is undefined if a value cannot be created (IE: a required field is
2615             missing).
2616              
2617             =cut
2618              
2619             sub extrapolate_variables {
2620 220     220 1 333 my ( $self, $missing, $expression, $row ) = @_;
2621              
2622 220 100       338 if ( ref($expression) ) {
2623 47         79 return $self->extrapolate_function( $missing, $expression, $row );
2624             } else {
2625 173         205 my $tmpexpr = $expression;
2626 173         598 for ( $expression =~ /\$(\{\w+\}|\w+)/g ) {
2627 221         574 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2628 221         237 my $value;
2629 221 100 66     580 if ( $self->{'ancillary_tmp'}{$variable} ) {
    50 33        
    0          
2630 123         171 $value = $self->{'ancillary_tmp'}{$variable};
2631             } elsif ( defined( $row->{$variable} ) && ( !defined($missing) || $row->{$variable} != $missing ) ) {
2632 98         128 $value = $row->{$variable};
2633 98         165 $self->{'ancillary_tmp'}{$variable} = $value;
2634             } elsif ( ref( $self->{'ancillary'}{$variable} ) ) {
2635 0         0 $value = $self->extrapolate_function( $missing, $self->{'ancillary'}{$variable}, $row );
2636 0 0       0 if ( !defined($value) ) {
2637 0         0 return;
2638             }
2639             } else {
2640 0         0 return;
2641             }
2642              
2643 221         2355 $tmpexpr =~ s/\$$variable(\W|\b|$)/$value$1/g;
2644 221         1413 $tmpexpr =~ s/\$\{$variable\}/$value/g;
2645             } ## end for ($expression =~ /\$(\{\w+\}|\w+)/g)
2646 173         432 return $tmpexpr;
2647             } ## end else [ if (ref($expression)) ]
2648             } ## end sub extrapolate_variables
2649              
2650             =head2 extrapolate_function($missing, $expression, \%row)
2651              
2652             If the value stored in C<< $self->{'ancillary'} >> is an array reference, this
2653             function uses the array to create an actual value. See
2654             L for an explanation of the
2655             array.
2656              
2657             =cut
2658              
2659             sub extrapolate_function {
2660 47     47 1 80 my ( $self, $missing, $expression, $row ) = @_;
2661 47         53 my $value;
2662 47         92 my ( $function, @args ) = @$expression;
2663 47         53 my $regex;
2664              
2665 47 100       92 if ( ref( $args[0] ) eq 'Regexp' ) {
2666 41         54 $regex = shift(@args);
2667             }
2668 47         74 for (@args) {
2669 47         78 $_ = $self->extrapolate_variables( $missing, $_, $row );
2670 47 50       98 if ( !defined($_) ) {
2671 0         0 return;
2672             }
2673             } ## end for (@args)
2674 47         99 $value = &$function(@args);
2675 47 100       85 if ($regex) {
2676 41 50       165 if ( $value =~ $regex ) {
2677 41         95 $value = $1;
2678             }
2679             }
2680              
2681 47         106 return $value;
2682             } ## end sub extrapolate_function
2683              
2684             =head1 STATIC METHODS
2685              
2686             =head2 strip(@list)
2687              
2688             my @space_filled_lines = (' line1 ', ' line2', 'line3 ', 'line4');
2689             strip(@space_filled_lines);
2690             print @space_filled_lines; #line1line2line3line4
2691              
2692             Runs through the list and removes leading and trailing whitespace. All changes
2693             are made in place.
2694              
2695             It is literally this:
2696              
2697             sub strip {
2698             s/^\s+|\s+$//g for @_;
2699             }
2700              
2701             =cut
2702              
2703             # Not an object method!
2704             sub strip {
2705 8526     8526 1 35449 s/^\s+|\s+$//g for @_;
2706             }
2707              
2708             =head2 julian_to_greg($yyyyjjj)
2709              
2710             Converts a date in the day of year format YYYYJJJ into YYYYMMDD. Returns the
2711             newly formatted string or undefined if the input does not match the required
2712             format.
2713              
2714             This uses the C function from C to do the heavy
2715             lifting.
2716              
2717             =cut
2718              
2719             # Not an object method!
2720             sub julian_to_greg {
2721 14     14 1 24 my ($yyyyjjj) = @_;
2722 14 50       44 if ( $yyyyjjj =~ /^(\d{4})(\d{3})$/ ) {
2723 14         76 my ( $y, $m, $d ) = Add_Delta_Days( $1, 1, 1, $2 - 1 );
2724 14         53 return sprintf( '%04d%02d%02d', $y, $m, $d );
2725             }
2726 0           return;
2727             } ## end sub julian_to_greg
2728              
2729             =head1 CAVEATS/ODDITIES
2730              
2731             =head2 Duplicate Fields
2732              
2733             This class will not allow a field to be added to the object if a field of the
2734             same name already exists. If a file being read has duplicate field names, only
2735             the B one is used. No warning is issued. If C is used to
2736             remove it, only the first instance will be deleted. To delete all instances,
2737             use C<< $sb_file->remove_field($sb_file->find_fields('chl')) >>. This may
2738             change in future releases.
2739              
2740             =head2 Changing Delimiter or Missing Value
2741              
2742             Modifying the delimiter header on a file that is being read will cause any
2743             non-cached rows to be split by the new delimiter, which should break most/all
2744             files. If the delimiter must be changed, call C to cache all the rows,
2745             then change it. This will obviously not work if caching is turned off. The
2746             same is true for setting the missing value, but only really applies when the
2747             C option is used (same goes to below detection limit).
2748              
2749             =head2 Below Detection Limit
2750              
2751             Below detection limit is only partially supported. If C is
2752             used, fields equal to C will be set to C, as
2753             well. Files modified while using C will have all data
2754             equal to C written out set to the missing value instead
2755             of the below detection limit value. If the below detection limit value is equal
2756             to the missing value or C is used, the
2757             C header will not be written.
2758              
2759             =head1 AUTHOR
2760              
2761             Jason Lefler, C<< >>
2762              
2763             =head1 BUGS
2764              
2765             Please report any bugs or feature requests to C
2766             rt.cpan.org>, or through the web interface at
2767             L. I will be
2768             notified, and then you'll automatically be notified of progress on your bug as
2769             I make changes.
2770              
2771             =head1 SUPPORT
2772              
2773             You can find documentation for this module with the perldoc command.
2774              
2775             perldoc Data::SeaBASS
2776              
2777             You can also look for information at:
2778              
2779             =over 4
2780              
2781             =item * RT: CPAN's request tracker (report bugs here)
2782              
2783             L
2784              
2785             =item * AnnoCPAN: Annotated CPAN documentation
2786              
2787             L
2788              
2789             =item * CPAN Ratings
2790              
2791             L
2792              
2793             =item * Search CPAN
2794              
2795             L
2796              
2797             =back
2798              
2799             =head1 LICENSE AND COPYRIGHT
2800              
2801             Copyright 2014 Jason Lefler.
2802              
2803             This program is free software; you can redistribute it and/or modify it under
2804             the terms of either: the GNU General Public License as published by the Free
2805             Software Foundation; or the Artistic License.
2806              
2807             See http://dev.perl.org/licenses/ for more information.
2808              
2809             =cut
2810              
2811             1; # End of Data::SeaBASS