File Coverage

blib/lib/Data/SeaBASS.pm
Criterion Covered Total %
statement 908 1042 87.1
branch 472 594 79.4
condition 135 198 68.1
subroutine 71 75 94.6
pod 48 48 100.0
total 1634 1957 83.5


line stmt bran cond sub pod time code
1             package Data::SeaBASS;
2              
3 25     25   2713960 use strict;
  25         251  
  25         783  
4 25     25   153 use warnings;
  25         50  
  25         1786  
5              
6             =head1 NAME
7              
8             Data::SeaBASS - Object-oriented interface for reading/writing SeaBASS files
9              
10             =head1 VERSION
11              
12             version 0.173030
13              
14             =cut
15              
16             our $VERSION = '0.173030'; # VERSION
17              
18             =head1 SYNOPSIS
19              
20             To read SeaBASS files:
21              
22             use Data::SeaBASS qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END);
23              
24             my $sb_file = Data::SeaBASS->new("input.txt");
25              
26             # Calculate the average chlorophyll value using next
27             my $chl_total = 0;
28             my $measurements = 0;
29            
30             while (my $row = $sb_file->next()){
31             if (defined($row->{'chl'})){
32             $chl_total += $row->{'chl'};
33             $measurements++;
34             }
35             }
36             if ($measurements){
37             print $chl_total/$measurements;
38             } else {
39             print "No chl values.";
40             }
41            
42             #alternatively:
43             $sb_file->rewind();
44             while (my %row = $sb_file->next()){
45             if (defined($row{'chl'})){
46             $chl_total += $row{'chl'};
47             $measurements++;
48             }
49             }
50              
51             # Calculate the average chlorophyll value using where
52             my $chl_total2 = 0;
53             my $measurements2 = 0;
54             $sb_file->where(sub {
55             if (defined($_->{'chl'})){
56             $chl_total2 += $_->{'chl'};
57             $measurements2++;
58             }
59             });
60             if ($measurements2){
61             print $chl_total2/$measurements2;
62             } else {
63             print "No chl values.";
64             }
65            
66             Or to modify SeaBASS files:
67              
68             use Data::SeaBASS qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END);
69              
70             my $sb_file = Data::SeaBASS->new("input.txt");
71              
72             # Add a one degree bias to water temperature
73             while (my $row = $sb_file->next()){
74             $row->{'wt'} += 1;
75             $sb_file->update($row);
76             }
77            
78             $sb_file->write(); # to STDOUT
79              
80             # Remove the one degree bias to water temperature
81             $sb_file->where(sub {
82             $_->{'wt'} -= 1;
83             });
84            
85             $sb_file->write("output_file.txt");
86            
87             Or to start a SeaBASS file from scratch:
88              
89             use Data::SeaBASS qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END);
90              
91             my $sb_file = Data::SeaBASS->new({strict => 0, add_empty_headers => 1});
92             $sb_file->add_field('lat','degrees');
93             $sb_file->add_field('lon','degrees');
94             $sb_file->append({'lat' => 1, 'lon' => 2});
95             $sb_file->append("3,4"); # or if you're reading from a CSV file
96             $sb_file->write();
97              
98             =head1 DESCRIPTION
99              
100             C provides an easy to use, object-oriented interface for
101             reading, writing, and modifying SeaBASS data files.
102              
103             =head2 What is SeaBASS?
104              
105             L Bio-optical Archive and Storage
106             System housed at Goddard Space Flight Center.
107             L provides the permanent public
108             repository for data collected under the auspices of the NASA Ocean Biology and
109             Biogeochemistry Program. It also houses data collected by participants in the
110             NASA Sensor Intercomparision and Merger for Biological and Oceanic
111             Interdisciplinary Studies (SIMBIOS) Program. SeaBASS includes marine
112             bio-optical, biogeochemical, and (some) atmospheric data.
113              
114             =head2 SeaBASS File Format
115              
116             SeaBASS files are plain ASCII files with a special header and a matrix of
117             values.
118              
119             =head3 Header
120              
121             The SeaBASS header block consists of many lines of header-keyword pairs. Some
122             headers are optional but most, although technically not required for reading,
123             are required to be ingested into the system. More detailed information is
124             available in the SeaBASS L
125             article|http://seabass.gsfc.nasa.gov/wiki/article.cgi?article=metadataheaders>.
126             The only absolutely required header for this module to work is the /fields
127             line. This module turns fields and units lowercase at all times.
128              
129             /begin_header
130             /delimiter=space
131             /missing=-999
132             /fields=date,time,lat,lon,depth,wt,sal
133             /end_header
134            
135             =head3 Body
136              
137             The SeaBASS body is a matrix of data values, organized much like a spreadsheet.
138             Each column is separated by the value presented in the /delimiter header.
139             Likewise, missing values are indicated by the value presented in the /missing
140             header. The /fields header identifies the geophysical parameter presented in
141             each column.
142              
143             /begin_header
144             /delimiter=space
145             /missing=-999
146             /fields=date,time,lat,lon,depth,wt,sal
147             /end_header
148             19920109 16:30:00 31.389 -64.702 3.4 20.7320 -999
149             19920109 16:30:00 31.389 -64.702 19.1 20.7350 -999
150             19920109 16:30:00 31.389 -64.702 38.3 20.7400 -999
151             19920109 16:30:00 31.389 -64.702 59.6 20.7450 -999
152              
153             =head3 Strictly Speaking
154              
155             SeaBASS files are run through a program called
156             L before
157             they are submitted and before they are ingested into a NASA relational database
158             management system. Some of the things it checks for are required
159             L
160             and proper L
161             names|http://seabass.gsfc.nasa.gov/wiki/article.cgi?article=stdfields>. All
162             data must always have an associated depth, time, and location, though these
163             fields may be placed in the header and are not always required in the data.
164             Just because this module writes the files does not mean they will pass FCHECK.
165              
166             Files are case-INsensitive. Headers are not allowed to have any whitespace.
167              
168             =cut
169              
170 25     25   176 use Carp qw(:DEFAULT);
  25         55  
  25         4144  
171 25     25   177 use Fcntl qw(SEEK_SET);
  25         62  
  25         1306  
172 25     25   5662 use List::MoreUtils qw(firstidx each_arrayref);
  25         152733  
  25         206  
173 25     25   31695 use Date::Calc qw(Add_Delta_Days);
  25         133781  
  25         2401  
174 25     25   239 use Scalar::Util qw(looks_like_number);
  25         58  
  25         305703  
175              
176             require Exporter;
177             our @ISA = qw(Exporter);
178             our @EXPORT_OK = qw(STRICT_READ STRICT_WRITE STRICT_ALL INSERT_BEGINNING INSERT_END);
179             our @EXPORT = qw();
180              
181             our $AUTOLOAD;
182              
183             =head1 EXPORT
184              
185             This module does not export anything by default.
186              
187             =head2 STRICT_READ
188              
189             C is used with the C option, enabling error messages when
190             reading header lines and inserting header data.
191              
192             =head2 STRICT_WRITE
193              
194             C is used with the C option, enabling error messages when
195             writing the data to a file/stream.
196              
197             =head2 STRICT_ALL
198              
199             C is used with the C option, enabling C and
200             C.
201              
202             =head2 INSERT_BEGINNING
203              
204             C is used with L
205             \@data_row | $data_row | %data_row)"> or L
206             $unit [, $position]])"> to insert a data row or field at the beginning of their
207             respective lists.
208              
209             =head2 INSERT_END
210              
211             C is used with L
212             $data_row | %data_row)"> or L
213             $position]])"> to insert a data row or field at the end of their respective
214             lists.
215              
216             =cut
217              
218 288     288 1 7430 sub STRICT_READ {1}
219 454     454 1 1023 sub STRICT_WRITE {2}
220 1     1 1 2375 sub STRICT_ALL {3}
221 22     22 1 86 sub INSERT_BEGINNING {0}
222 104     104 1 308 sub INSERT_END {-1}
223              
224             my %DEFAULT_OPTIONS = (
225             default_headers => {},
226             headers => {},
227             preserve_case => 1,
228             keep_slashes => 0,
229             cache => 1,
230             delete_missing_headers => 0,
231             missing_data_to_undef => 1,
232             preserve_comments => 1,
233             add_empty_headers => 0,
234             strict => STRICT_WRITE,
235             fill_ancillary_data => 0,
236             preserve_header => 0,
237             preserve_detection_limits => 0,
238             optional_warnings => 1,
239             );
240              
241             #return values for ref()
242             my %OPTION_TYPES = (
243             default_headers => [ 'ARRAY', 'HASH' ],
244             headers => [ 'ARRAY', 'HASH' ],
245             preserve_case => [''],
246             keep_slashes => [''],
247             cache => [''],
248             delete_missing_headers => [''],
249             missing_data_to_undef => [''],
250             preserve_comments => [''],
251             add_empty_headers => [''],
252             strict => [''],
253             fill_ancillary_data => [''],
254             preserve_header => [''],
255             preserve_detection_limits => [''],
256             optional_warnings => [''],
257             );
258              
259             # All headers required by STRICT_READ and STRICT_WRITE
260             my @REQUIRED_HEADERS = qw(
261             begin_header
262             investigators
263             affiliations
264             contact
265             experiment
266             cruise
267             data_file_name
268             documents
269             calibration_files
270             data_type
271             start_date
272             end_date
273             start_time
274             end_time
275             north_latitude
276             south_latitude
277             east_longitude
278             west_longitude
279             missing
280             delimiter
281             units
282             end_header
283             );
284              
285             # Headers that must be specified, regardless of strictness.
286             my @ABSOLUTELY_REQUIRED_HEADERS = qw(
287             fields
288             );
289              
290             # Valid headers used for STRICT_READ and STRICT_WRITE
291             our @ALL_HEADERS = qw(
292             begin_header
293             investigators
294             affiliations
295             contact
296             experiment
297             cruise
298             station
299             data_file_name
300             documents
301             calibration_files
302             data_type
303             data_status
304             start_date
305             end_date
306             start_time
307             end_time
308             north_latitude
309             south_latitude
310             east_longitude
311             west_longitude
312             cloud_percent
313             measurement_depth
314             secchi_depth
315             optical_depth_warning
316             water_depth
317             wave_height
318             wind_speed
319             missing
320             below_detection_limit
321             above_detection_limit
322             delimiter
323             fields
324             units
325             end_header
326             );
327              
328             our %HEADER_DEFAULTS = (
329             optical_depth_warning => 'false',
330             );
331              
332             our %OMIT_EMPTY_HEADERS = (
333             optical_depth_warning => 1,
334             );
335              
336             # headers that are allowed but are not to be added during add_empty_headers
337             my @HIDDEN_HEADERS = qw(
338             received
339             );
340              
341             # what to set the missing value to if it's not defined
342             our $DEFAULT_MISSING = -999;
343             our $DEFAULT_BDL = -888;
344             our $DEFAULT_ADL = -777;
345              
346             # overly complex data structure only understandable by idiots
347             # your IQ is the percentage chance that it won't make sense
348             # IE: my IQ = 40, I have a 60% chance of it making sense to me
349             my %ANCILLARY = (
350             'lat' => [ { 'north_latitude' => qr/^(.*?)$/ }, { 'south_latitude' => qr/^(.*?)$/ } ],
351             'lon' => [ { 'east_longitude' => qr/^(.*?)$/ }, { 'west_longitude' => qr/^(.*?)$/ } ],
352             'depth' => [ { 'measurement_depth' => qr/^(.*?)$/ }, ],
353             'date_time' => [ '$date $time', ],
354             'date' => [ [ \&julian_to_greg, '$year$julian' ], [ \&julian_to_greg, '$year$jd' ], [ \&julian_to_greg, '$year$sdy' ], '$year$month$day', ],
355             'year' => [ { '$date' => qr/^(\d{4})/ }, { 'start_date' => qr/^(\d{4})/ }, ],
356             'month' => [ { '$date' => qr/^\d{4}(\d{2})/ }, [ \&julian_to_greg, qr/^\d{4}(\d{2})\d{2}$/, '$year$julian' ], [ \&julian_to_greg, qr/^\d{4}(\d{2})\d{2}$/, '$year$jd' ], [ \&julian_to_greg, qr/^\d{4}(\d{2})\d{2}$/, '$year$sdy' ], { 'start_date' => qr/^\d{4}(\d{2})/ }, ],
357             'day' => [ { '$date' => qr/^\d{6}(\d{2})/ }, [ \&julian_to_greg, qr/^\d{4}\d{2}(\d{2})$/, '$year$julian' ], [ \&julian_to_greg, qr/^\d{4}\d{2}(\d{2})$/, '$year$jd' ], [ \&julian_to_greg, qr/^\d{4}\d{2}(\d{2})$/, '$year$sdy' ], { 'start_date' => qr/^\d{6}(\d{2})/ }, ],
358             'time' => [ '$hour:$minute:$second', ],
359             'hour' => [ { '$time' => qr/^(\d+):/ }, { 'start_time' => qr/^(\d+):/ }, ],
360             'minute' => [ { '$time' => qr/:(\d+):/ }, { 'start_time' => qr/:(\d+):/ }, ],
361             'second' => [ { '$time' => qr/:(\d+)(?:[^:\d]|$)/ }, { 'start_time' => qr/:(\d+)(?:[^:\d]|$)/ }, ],
362             'station' => [ { 'station' => qr/^(.*?)$/ }, ],
363             );
364              
365             # what fill_ancillary_data adds to each row
366             #my @FILL_ANCILLARY_DATA = qw(date time date_time lat lon depth);
367             my @FILL_ANCILLARY_DATA = keys(%ANCILLARY);
368              
369             my %FIELD_FORMATTING = (
370             'year' => '%04d',
371             'month' => '%02d',
372             'day' => '%02d',
373             'julian' => '%03d',
374             'sdy' => '%03d',
375             'hour' => '%02d',
376             'minute' => '%02d',
377             'second' => '%02d',
378             );
379              
380             =head1 CONSTRUCTOR
381              
382             =head2 new([$filename,] [\%options])
383              
384             my $sb_file = Data::SeaBASS->new("input_file.txt");
385             my $sb_file = Data::SeaBASS->new("input_file.txt", { delete_missing_headers => 1 });
386             my $sb_file = Data::SeaBASS->new("output_file.txt", { add_empty_headers => 1 });
387             my $sb_file = Data::SeaBASS->new({ add_empty_headers => 1 });
388              
389             Creates a C object. If the file specified exists, the object
390             can be used to read the file. If the file specified does not exist, an empty
391             object is created and will be written to the specified file by default when
392             invoking C.
393              
394             Options should be given in a hash reference to ensure proper argument parsing.
395             If a file is specified, options can be given as a hash list.
396              
397             =over 4
398              
399             =item * default_headers
400              
401             =item * headers
402              
403             These two options accept either an array reference or a hash reference. They
404             are used to set or override header information. First, headers are read from
405             C, then from the data file itself, then are overridden by
406             whatever is in C.
407              
408             Arguments are an array reference of header lines, or a hash reference of
409             header/value pairs.
410              
411             my $sb_file = Data::SeaBASS->new({
412             default_headers => [
413             '/cruise=fake_cruise',
414             '/experiment=default_experiment',
415             ],
416             headers => {
417             'experiment' => 'real_experiment',
418             },
419             });
420            
421             B Modifying the delimiter or missing value will likely break the
422             object. Modifying these will change the expected format for all rows. Do so
423             with caution.
424              
425             =item * preserve_case
426              
427             C<1> or C<0>, default C<1>. Setting this to C<0> will change all values in the
428             header to lowercase. Header descriptors (the /header part) are always turned
429             to lowercase, as well as all fields and units.
430              
431             =item * keep_slashes
432              
433             C<1> or C<0>, default C<0>. Forces the object to keep the / in the beginning of
434             headers when accessed. If set to C<1>, when using the L
435             \%new_headers | \@get_headers | @get_headers ])"> function, they will be
436             returned with leading slash.
437              
438             =item * cache
439              
440             C<1> or C<0>, default C<1>. Enables caching data rows as they are read. This
441             speeds up re-reads and allows the data to be modified. This is required for
442             writing files.
443              
444             =item * delete_missing_headers
445              
446             C<1> or C<0>, default C<0>. Any headers that are equal to the /missing header,
447             NA, or are not defined (when using the C options) are
448             deleted. They cannot be retrieved using C and will not be written.
449              
450             =item * missing_data_to_undef
451              
452             C<1> or C<0>, default C<1>. If any values in the data block are equal to the
453             /missing, /above_detection_limit, /below_detection_limit headers, they are set
454             to undef when they are retrieved.
455              
456             =item * preserve_comments
457              
458             C<1> or C<0>, default C<1>. Setting this option to zero will discard any
459             comments found in the header.
460              
461             =item * add_empty_headers
462              
463             C<0>, C<1>, or a string. If set to a string, this will populate any missing
464             headers, including optional ones, and will set their value to the string given.
465             If set to 1, the string 'NA' is used. This option disables C.
466              
467             =item * strict
468              
469             my $sb_file = Data::SeaBASS->new("input_file.txt", {strict => STRICT_ALL});
470             my $sb_file = Data::SeaBASS->new("input_file.txt", {strict => (STRICT_READ | STRICT_WRITE)});
471             my $sb_file = Data::SeaBASS->new("input_file.txt", {strict => 0});
472             my $sb_file = Data::SeaBASS->new("input_file.txt", {strict => STRICT_WRITE}); #default
473              
474             =over 4
475              
476             =item * C
477              
478             C will throw errors when reading invalid headers, missing required
479             ones, or an invalid delimiter. This may change in future revisions.
480              
481             =item * C
482              
483             C will throw the same errors when writing the data to a file or
484             stream. C only checks for required headers and invalid headers,
485             but does not check their values to see if they are actually filled. This may
486             change in future revisions.
487              
488             =back
489              
490             =item * fill_ancillary_data
491              
492             C<0> or C<1>, default C<0>. Insert date, time, measurement depth, station, and
493             location values to the data rows from the headers. Values are not overridden
494             if they are already present. This option is only useful when reading files.
495              
496             B It is bad practice to include these fields in the data if they don't
497             change throughout the file. This option is used to remove the burden of
498             checking whether they are in the data or header.
499              
500             B This option will also combine individual date/time
501             parts in the data (year/month/day/etc) to create more uniform date/time fields.
502              
503             B If any part of a date/time is missing, the fields
504             dependent on it will not be added to the row.
505              
506             =item * preserve_header
507              
508             C<0> or C<1>, default C<0>. Preserves header and comment order. This option
509             disables modifying the header, as well, but will not error if you try -- it will
510             simply not be reflected in the output.
511              
512             =item * preserve_detection_limits
513              
514             C<0> or C<1>, default C<0>. Disables setting values equal to below_detection_limit
515             or above_detection_limit to null while reading files. This should only be used
516             during read-only operation, as there is no telling missing data from data
517             outside limits.
518              
519             =item * optional_warnings
520              
521             C<0> or C<1>, default C<1>. Determines whether or not to print warnings deemed
522             optional. For the moment, the only defined warning is for optically shallow data.
523              
524             =back
525              
526             =cut
527              
528             sub new {
529 141     141 1 545454 my ( $class, $file ) = ( shift, shift );
530              
531 141         390 my $self = bless( {}, $class );
532              
533 141         279 my %myoptions;
534              
535 141 100       687 if ( ref($file) eq 'HASH' ) {
    100          
    50          
536 4         51 %myoptions = ( %DEFAULT_OPTIONS, %$file );
537 4         14 $file = '';
538             } elsif ( ref( $_[0] ) eq 'HASH' ) {
539 127         675 %myoptions = ( %DEFAULT_OPTIONS, %{ $_[0] } );
  127         1102  
540             } elsif ( !ref( $_[0] ) ) {
541 10 100       43 if ( $#_ % 2 == 1 ) {
542 9         99 %myoptions = ( %DEFAULT_OPTIONS, @_ );
543             } else {
544 1         263 croak('Even sized list expected');
545             }
546             } else {
547 0         0 croak("Arguments not understood.");
548             }
549              
550 140         639 $self->{'options'} = \%myoptions;
551 140         590 $self->check_options();
552              
553 136 50       524 if ( ref($file) eq 'GLOB' ) {
    100          
    100          
554 0         0 $self->{'handle'} = $file;
555             } elsif ( ref($file) eq 'SCALAR' ) {
556 130         1453 open( my $fh, "<", $file );
557 130         471 $self->{'handle'} = $fh;
558             } elsif ($file) {
559 2 100       10 if ( !ref($file) ) {
560 1 50       15 if ( -r $file ) {
    0          
561 1         19 open( my $fh, "<", $file );
562 1         4 $self->{'handle'} = $fh;
563             } elsif ( $self->{'options'}{'strict'} & STRICT_READ ) {
564 0         0 croak("Strict read set, but input file not found or unreadable.");
565             } else {
566 0         0 $self->{'default_write_to'} = $file;
567 0         0 $file = '';
568             }
569             } else {
570 1         164 croak("Invalid parameter, expected file path or file handle.");
571             }
572             } ## end elsif ($file)
573 135 100       379 if ($file) {
574 131 50       414 unless ( $self->read_headers() ) {
575 0 0       0 unless ( $self->{'options'}{'strict'} & STRICT_READ ) {
576 0         0 return;
577             }
578             }
579             } else {
580 4         16 $self->create_blank_file();
581             }
582 131         474 return $self;
583             } ## end sub new
584              
585             =head1 OBJECT METHODS
586              
587             =head2 add_headers(\%headers | \@header_lines | @header_lines)
588              
589             $sb_file->add_headers({'investigators' => 'jason_lefler'});
590             $sb_file->add_headers(['/investigators=jason_lefler']);
591             $sb_file->add_headers('/investigators=jason_lefler');
592            
593             C is used to add or override metadata for a C, as
594             well as add comments.
595              
596             This function can not be used to change fields/units, see
597             L and
598             L for that.
599              
600             B Modifying the delimiter or missing value halfway through
601             reading/writing will likely break the object. Modifying these will change the
602             expected format for any new or non-cached rows. Do so with caution.
603              
604             =cut
605              
606             sub add_headers {
607 406     406 1 626 my $self = shift;
608 406         637 my $success = 1;
609 406         980 my $strict = $self->{'options'}{'strict'} & STRICT_WRITE;
610 406 100       1195 if ( ref( $_[0] ) eq 'HASH' ) {
    100          
    50          
611 265         391 while ( my ( $k, $v ) = each( %{ $_[0] } ) ) {
  273         1010  
612 8         22 $success &= $self->validate_header( $k, $v, $strict );
613 8         26 $self->{'headers'}{$k} = $v;
614             }
615             } elsif ( ref( $_[0] ) eq 'ARRAY' ) {
616 140         212 foreach ( @{ $_[0] } ) {
  140         360  
617 2594 100 100     7120 if ( $_ =~ /^\s*!/ ) {
    100          
618 11         20 push( @{ $self->{'comments'} }, $_ );
  11         30  
619             } elsif ( $strict && $_ !~ m"^/" ) {
620 2         356 carp("Invalid header line: $_");
621 2         233 $success = 0;
622             } else {
623 2581         7055 my ( $k, $v ) = split( /=/, $_, 2 );
624 2581         5502 $success &= $self->validate_header( $k, $v, $strict );
625 2581         7151 $self->{'headers'}{$k} = $v;
626             }
627             } ## end foreach (@{$_[0]})
628             } elsif ( !ref( $_[0] ) ) {
629 1         3 foreach (@_) {
630 1 50 33     11 if ( $_ =~ /^\s*!/ ) {
    50          
631 0         0 push( @{ $self->{'comments'} }, $_ );
  0         0  
632             } elsif ( $strict && $_ !~ m"^/" ) {
633 0         0 carp("Invalid header line: $_");
634 0         0 $success = 0;
635             } else {
636 1         4 my ( $k, $v ) = split( /=/, $_, 2 );
637 1         5 $success &= $self->validate_header( $k, $v, $strict );
638 1         4 $self->{'headers'}{$k} = $v;
639             }
640             } ## end foreach (@_)
641             } else {
642 0         0 $success = 0;
643             }
644 406         754 return $success;
645             } ## end sub add_headers
646              
647             =head2 headers([ \%new_headers | \@get_headers | @get_headers ])
648              
649             =head2 head
650              
651             =head2 h
652              
653             my %headers = $sb_file->headers(['investigators']);
654             print Dumper(\%headers); # { investigators => 'jason_lefler' }
655            
656             my ($inv) = $sb_file->headers('investigators');
657             print $inv; # jason_lefler
658            
659             $sb_file->headers({investigators => 'jason_lefler'});
660            
661             $sb_file->headers()->{'investigators'} = 'jason_lefler';
662            
663             C is used to read or modify header values. Given an array reference
664             of header names, it will return a hash/hash reference with header/value pairs.
665             Given a plain list of header names, it will return an array/array reference of
666             the given header values. Given a hash reference, this function is a proxy for
667             L.
668              
669             If C is set, then headers will be returned as such, IE: C<<
670             {'/investigators' => 'jason_lefler'} >>.
671              
672             This function can also be used to set header values without going through the
673             normal validation.
674              
675             C and C are aliases to C.
676              
677             =cut
678              
679 0     0 1 0 sub head { shift->headers(@_); }
680 22     22 1 2240 sub h { shift->headers(@_); }
681              
682             sub headers {
683 25     25 1 1215 my $self = shift;
684 25 100       76 if ( !@_ ) {
    100          
    100          
685 21         127 return $self->{'headers'};
686             } elsif ( ref( $_[0] ) eq 'HASH' ) {
687 1         7 return $self->add_headers(@_);
688             } elsif ( ref( $_[0] ) eq 'ARRAY' ) {
689 1         3 my %ret;
690 1         3 for my $header ( @{ $_[0] } ) {
  1         4  
691 2         9 $ret{$header} = $self->{'headers'}{ lc($header) };
692             }
693 1 50       5 if (wantarray) {
694 1         10 return %ret;
695             } else {
696 0         0 return \%ret;
697             }
698             } else {
699 2         6 my @ret;
700 2         7 foreach (@_) {
701 4 50       16 if ( !ref ) {
702 4         16 my $value = $self->{'headers'}{ lc($_) };
703 4 50       17 push( @ret, defined($value) ? $value : undef );
704             } else {
705 0         0 croak("Argument not understood: $_");
706             }
707             } ## end foreach (@_)
708 2 100       11 if (wantarray) {
    50          
709 1         6 return @ret;
710             } elsif ( $#ret == 0 ) {
711 0         0 return $ret[0];
712             } else {
713 1         6 return \@ret;
714             }
715             } ## end else [ if (!@_) ]
716              
717             } ## end sub headers
718              
719             =head2 data([$index])
720              
721             =head2 d
722              
723             =head2 body
724              
725             =head2 b
726              
727             =head2 all
728              
729             my $row = $sb_file->data(1);
730             my @rows = $sb_file->all();
731              
732             C is responsible for returning either a data line via an index or all of
733             the data lines at once.
734              
735             Data is returned as C<< field => value >> pairs.
736              
737             If given an index: in list context, returns the hash of the row; in scalar
738             context, returns a reference to the row.
739              
740             If not given an index: in list context, returns an array of the rows; in scalar
741             context, returns a reference to an array of the rows.
742              
743             If given an index out of range, returns undef. If given a negative index,
744             Cs the file, then returns undef.
745              
746             If C is enabled and the row has already been read, it is retrieved from
747             the cache. If it has not already be read, all rows leading up to the desired
748             row will be read and cached, and the desired row returned.
749              
750             If C is disabled and either all rows are retrieved or a previously
751             retrieved row is called again, the file will C, then seek to the
752             desired row.
753              
754             C, C, C, and C are all aliases to C. (Yes, that means
755             C can be used with arguments, it would just look silly.)
756              
757             =cut
758              
759 0     0 1 0 sub body { shift->data(@_); }
760 0     0 1 0 sub b { shift->data(@_); }
761 2     2 1 1006 sub d { shift->data(@_); }
762 42     42 1 5887 sub all { shift->data(@_); }
763              
764             sub data {
765 133     133 1 4734 my ( $self, $index ) = @_;
766 133 100       329 if ( defined($index) ) {
767 81 100       219 if ( $index < 0 ) {
768 12         29 $self->rewind();
769 12         18 return;
770             }
771 69 100       186 if ( $self->{'options'}{'cache'} ) {
772 63 100       180 if ( $index > $self->{'max_dataidx'} ) {
773 39         75 my $startidx = $self->{'dataidx'};
774 39         130 for ( my $i = 0; $i < ( $index - $startidx ); $i++ ) {
775 73 100       191 if ( !$self->next() ) {
776 4         12 return;
777             }
778             }
779             } ## end if ($index > $self->{'max_dataidx'...})
780              
781 59         97 $self->{'dataidx'} = $index;
782              
783 59 50       128 if (wantarray) {
784 0         0 return %{ $self->{'data'}[$index] };
  0         0  
785             } else {
786 59         353 return $self->{'data'}[$index];
787             }
788             } else {
789 6 100       13 if ( $index <= $self->{'dataidx'} ) {
790 3         7 $self->rewind();
791             }
792 6         9 my $startidx = $self->{'dataidx'};
793 6         17 for ( my $i = 0; $i < ( $index - $startidx - 1 ); $i++ ) {
794 7 50       13 if ( !$self->next() ) {
795 0         0 return;
796             }
797             }
798 6         15 return $self->next();
799             } ## end else [ if ($self->{'options'}...)]
800             } else {
801 52 100       148 if ( $self->{'options'}{'cache'} ) {
802 45         125 while ( $self->next() ) {
803             # noop
804             }
805 45 100       117 if (wantarray) {
806 7         11 return @{ $self->{'data'} };
  7         33  
807             } else {
808 38         170 return $self->{'data'};
809             }
810             } else {
811 7         28 $self->rewind();
812 7         10 my @data_rows;
813 7         19 while ( my $data = $self->next() ) {
814 28         72 push( @data_rows, $data );
815             }
816 7 100       18 if (wantarray) {
817 6         29 return @data_rows;
818             } else {
819 1         5 return \@data_rows;
820             }
821             } ## end else [ if ($self->{'options'}...)]
822             } ## end else [ if (defined($index)) ]
823             } ## end sub data
824              
825             =head2 next()
826              
827             while (my $row = $sb_file->next()){
828             print $row->{'lat'};
829             ...
830             }
831             while (my %row = $sb_file->next()){
832             print $row{'lat'};
833             ...
834             }
835            
836             Returns the next data row in the file, returning C when it runs out of
837             rows.
838              
839             Data is returned as C<< field => value >> pairs.
840              
841             In list context, returns a hash of the row. In scalar context, returns a
842             reference to the hash of a row.
843              
844             After a C, C will return the very first data hash, then each row
845             in turn. If the row has been cached, it's retrieved from the cache instead of
846             rereading from the file.
847              
848             =cut
849              
850             sub next {
851 429     429 1 15190 my $self = shift;
852 429 50       843 if (@_) {
853 0         0 croak("invalid number of arguments on next(), expected 0.");
854             }
855              
856 429 100 100     1760 if ( $self->{'options'}{'cache'} && $self->{'dataidx'} < $self->{'max_dataidx'} ) {
    100          
857 73         114 $self->{'dataidx'}++;
858              
859 73 50       129 if (wantarray) {
860 0         0 return %{ $self->{'data'}[ $self->{'dataidx'} ] };
  0         0  
861             } else {
862 73         251 return $self->{'data'}[ $self->{'dataidx'} ];
863             }
864             } elsif ( $self->{'handle'} ) {
865 344         531 my $handle = $self->{'handle'};
866 344         483 my $line_number = $self->{'line_number'};
867              
868 344         1267 while ( my $line = <$handle> ) {
869 266         377 $line_number++;
870 266         630 strip($line);
871 266 50       608 if ($line) {
872 266         589 my $data_row = $self->make_data_hash($line);
873 266         535 $self->{'line_number'} = $line_number;
874 266 100       610 if ( $self->{'options'}{'cache'} ) {
875 204         298 push( @{ $self->{'data'} }, $data_row );
  204         427  
876             }
877 266         418 $self->{'dataidx'}++;
878 266 100       590 if ( $self->{'dataidx'} > $self->{'max_dataidx'} ) {
879 238         367 $self->{'max_dataidx'} = $self->{'dataidx'};
880             }
881 266 50       476 if (wantarray) {
882 0         0 return %{$data_row};
  0         0  
883             } else {
884 266         961 return $data_row;
885             }
886             } ## end if ($line)
887             } ## end while (my $line = <$handle>)
888             } ## end elsif ($self->{'handle'})
889 90         255 return;
890             } ## end sub next
891              
892             =head2 rewind()
893              
894             C seeks to the start of the data. The next C will return the
895             very first row (or C). If caching is enabled, it will not actually
896             perform a seek, it will merely reset the index interator. If caching is
897             disabled, a seek is performed on the file handle to return to the start of the
898             data.
899              
900             =cut
901              
902             sub rewind {
903 61     61 1 1155 my ($self) = @_;
904 61 100       169 if ( $self->{'dataidx'} != -1 ) {
905 31 100       94 if ( !$self->{'options'}{'cache'} ) {
906 9         27 seek( $self->{'handle'}, $self->{'data_start_position'}, SEEK_SET );
907             }
908 31         52 $self->{'line_number'} = $self->{'data_start_line'};
909 31         60 $self->{'dataidx'} = -1;
910             } ## end if ($self->{'dataidx'}...)
911             } ## end sub rewind
912              
913             =head2 update(\%data_row | \@data_row | $data_row | %data_row)
914              
915             while (my %row = $sb_file->next()){
916             if ($row{'depth'} == -999){
917             $row{'depth'} = 0;
918             }
919             $sb_file->update(\%row);
920             }
921            
922             # Less useful for update():
923             print join(',',@{$sb_file->actual_fields()}); #lat,lon,depth,chl
924            
925             while (my %row = $sb_file->next()){
926             if ($row{'depth'} == -999){
927             $row{'depth'} = 0;
928             }
929             $sb_file->update(@row{'lat','lon','depth','chl'});
930             # or
931             $sb_file->update([@row{'lat','lon','depth','chl'}]);
932             }
933              
934             C replaces the last row read (using C) with the input.
935              
936             Caching must be enabled to use C, C, or C.
937              
938             =cut
939              
940             sub update {
941 12     12 1 57 my $self = shift;
942 12 100       33 if ( !$self->{'options'}{'cache'} ) {
    100          
943 1         96 croak("Caching must be enabled to write.");
944             } elsif ( $self->{'dataidx'} == -1 ) {
945 1         161 croak("No rows read yet.");
946             }
947 10         23 my $new_row = $self->ingest_row(@_);
948 10 100       28 unless ( defined($new_row) ) {
949 2         138 croak("Error parsing inputs");
950             }
951 8         37 $self->{'data'}[ $self->{'dataidx'} ] = $new_row;
952             } ## end sub update
953              
954             =head2 set($index, \%data_row | \@data_row | $data_row | %data_row)
955              
956             my %row = (lat => 1, lon => 2, chl => 1);
957             $sb_file->set(0, \%row);
958            
959             print join(',',@{$sb_file->actual_fields()}); #lat,lon,chl
960             $sb_file->set(0, [1, 2, 1]);
961            
962              
963             C replaces the row at the given index with the input. Seeks to the
964             given index if it has not been read to yet. Cs if the file does not go
965             up to the index specified.
966              
967             Caching must be enabled to use C, C, or C.
968              
969             =cut
970              
971             sub set {
972 3     3 1 17 my $self = shift;
973 3         4 my $index = shift;
974 3 50       8 if ( !$self->{'options'}{'cache'} ) {
975 0         0 croak("Caching must be enabled to write");
976             }
977 3 100       7 if ( $index < 0 ) {
978 1         146 croak("Index must be positive integer");
979             }
980 2         7 my $new_row = $self->ingest_row(@_);
981 2 50       6 unless ( defined($new_row) ) {
982 0         0 croak("Error parsing inputs");
983             }
984              
985 2 50       5 if ( $index > $self->{'max_dataidx'} ) {
986 2         4 my $current_idx = $self->{'dataidx'};
987 2         6 $self->data($index);
988 2         3 $self->{'dataidx'} = $current_idx;
989              
990 2 100       5 if ( $index > $self->{'max_dataidx'} ) {
991 1         110 croak("Index out of bounds.");
992             }
993             } ## end if ($index > $self->{'max_dataidx'...})
994              
995 1         3 $self->{'data'}[$index] = $new_row;
996             } ## end sub set
997              
998             =head2 insert($index, \%data_row | \@data_row | $data_row | %data_row)
999              
1000             use Data::SeaBASS qw(INSERT_BEGINNING INSERT_END);
1001             ...
1002            
1003             my %row = (lat => 1, lon => 2, chl => 1);
1004             $sb_file->insert(INSERT_BEGINNING, \%row);
1005            
1006             print join(',',@{$sb_file->actual_fields()}); #lat,lon,chl
1007            
1008             $sb_file->insert(1, [1, 2, 1]);
1009             $sb_file->insert(INSERT_END, [1, 2, 1]);
1010            
1011             Inserts the row into the given position. C inserts a new row
1012             at the start of the data, C inserts one at the end of the data
1013             block.
1014              
1015             The index must be a positive integer, C, or C.
1016              
1017             If a row is inserted at the end, the entire data block is read from the file to
1018             cache every row, the row is appended to the end, and the current position is
1019             reset to the original position, so C will still return the real next
1020             row from the data.
1021              
1022             If a row is inserted before the current position, the current position is
1023             shifted accordingly and will still return the C real row.
1024              
1025             Caching must be enabled to use C, C, or C.
1026              
1027             =cut
1028              
1029             sub insert {
1030 16     16 1 52 my $self = shift;
1031 16         24 my $index = shift;
1032 16 50       48 if ( !$self->{'options'}{'cache'} ) {
1033 0         0 croak("Caching must be enabled to write.");
1034             }
1035 16 100       43 if ( $index < INSERT_END ) {
1036 1         162 croak("Index must be positive integer, or INSERT_BEGINNING (beginning), or INSERT_END (end)");
1037             }
1038 15         59 my $new_row = $self->ingest_row(@_);
1039 15 50       48 unless ( defined($new_row) ) {
1040 0         0 croak("Error parsing inputs");
1041             }
1042              
1043 15 100       40 if ( $index == INSERT_END ) {
    100          
1044 10         22 my $current_idx = $self->{'dataidx'};
1045 10         31 $self->data();
1046 10         20 $self->{'dataidx'} = $current_idx;
1047             } elsif ( $index > $self->{'max_dataidx'} ) {
1048 2         4 my $current_idx = $self->{'dataidx'};
1049 2         9 $self->data($index);
1050 2         5 $self->{'dataidx'} = $current_idx;
1051              
1052 2 50       10 if ( $index == $self->{'max_dataidx'} + 1 ) {
    100          
1053 0         0 $index = INSERT_END;
1054             } elsif ( $index > $self->{'max_dataidx'} ) {
1055 1         205 croak("Index out of bounds.");
1056             }
1057             } ## end elsif ($index > $self->{'max_dataidx'...})
1058              
1059 14 100 100     54 if ( $index <= $self->{'dataidx'} && $index != INSERT_END ) {
1060 3         7 $self->{'dataidx'}++;
1061             }
1062              
1063 14         32 $self->{'max_dataidx'}++;
1064              
1065 14 100       35 if ( $index == INSERT_BEGINNING ) {
    100          
1066 2         4 unshift( @{ $self->{'data'} }, $new_row );
  2         10  
1067             } elsif ( $index == INSERT_END ) {
1068 10         12 push( @{ $self->{'data'} }, $new_row );
  10         31  
1069             } else {
1070 2         4 splice( @{ $self->{'data'} }, $index, 0, $new_row );
  2         6  
1071             }
1072             } ## end sub insert
1073              
1074             =head2 prepend(\%data_row | \@data_row | $data_row | %data_row)
1075              
1076             C is short for C.
1077              
1078             =cut
1079              
1080             sub prepend {
1081 1     1 1 4 my $self = shift;
1082 1         5 $self->insert( INSERT_BEGINNING, @_ );
1083             }
1084              
1085             =head2 append(\%data_row | \@data_row | $data_row | %data_row)
1086              
1087             C is short for C.
1088              
1089             =cut
1090              
1091             sub append {
1092 9     9 1 61 my $self = shift;
1093 9         17 $self->insert( INSERT_END, @_ );
1094             }
1095              
1096             =head2 remove([$index])
1097              
1098             If index is specified, it deletes the desired index. If it is omitted, the
1099             last row read is deleted. The current position is modified accordingly.
1100              
1101             =cut
1102              
1103             sub remove {
1104 6     6 1 27 my ( $self, $index ) = @_;
1105              
1106 6 100 66     34 if ( !$self->{'options'}{'cache'} ) {
    50          
1107 1         146 croak("Caching must be enabled to write.");
1108             } elsif ( !defined($index) && $self->{'dataidx'} < 0 ) {
1109 0         0 croak("No rows read yet.");
1110             }
1111              
1112 5 100       13 if ( !defined($index) ) {
1113 2         4 $index = $self->{'dataidx'};
1114             }
1115              
1116 5 50       22 if ( $index < 0 ) {
    100          
1117 0         0 croak("Index must be positive integer");
1118             } elsif ( $index > $self->{'max_dataidx'} ) {
1119 2         4 my $current_idx = $self->{'dataidx'};
1120 2         8 $self->data($index);
1121 2         4 $self->{'dataidx'} = $current_idx;
1122              
1123 2 100       5 if ( $index > $self->{'max_dataidx'} ) {
1124 1         254 croak("Index out of bounds.");
1125             }
1126             } ## end elsif ($index > $self->{'max_dataidx'...})
1127              
1128 4 100       9 if ( $index <= $self->{'dataidx'} ) {
1129 3         6 $self->{'dataidx'}--;
1130             }
1131 4         6 $self->{'max_dataidx'}--;
1132              
1133 4         5 splice( @{ $self->{'data'} }, $index, 1 );
  4         14  
1134             } ## end sub remove
1135              
1136             =head2 where(\&function)
1137              
1138             # Find all rows with depth greater than 10 meters
1139             my @ret = $sb_file->where(sub {
1140             if ($_->{'depth'} > 10){
1141             return $_;
1142             } else {
1143             return undef;
1144             }
1145             });
1146              
1147             # Delete all measurements with depth less than 10 meters
1148             $sb_file->where(sub {
1149             if ($_->{'depth'} < 10){
1150             $_ = undef;
1151             }
1152             });
1153            
1154             # Calculate the average chlorophyll value
1155             my $chl_total = 0;
1156             my $measurements = 0;
1157             $sb_file->where(sub {
1158             if (defined($_->{'chl'})){
1159             $chl_total += $_->{'chl'};
1160             $measurements++;
1161             }
1162             });
1163             if ($measurements){
1164             print $chl_total/$measurements;
1165             } else {
1166             print "No chl values.";
1167             }
1168              
1169              
1170             Traverses through each data line, running the given function on each row.
1171             C<$_> is set to the current row. If C<$_> is set to undefined, C is
1172             called. Any changes in C<$_> will be reflected in the data.
1173              
1174             Any defined value returned is added to the return array. If nothing is
1175             returned, a 0 is added.
1176              
1177             =cut
1178              
1179             sub where {
1180 7     7 1 44 my ( $self, $function ) = ( shift, shift );
1181 7 100       21 if ( ref($function) ne 'CODE' ) {
1182 1         155 croak("Invalid arguments.");
1183             }
1184 6         13 my $currentidx = $self->{'dataidx'};
1185 6         18 $self->rewind();
1186              
1187 6         7 my @new_rows;
1188              
1189 6         18 while ( my $row = $self->next() ) {
1190 19         34 local *_ = \$row;
1191 19         42 my $ret = $function->();
1192 19 100 100     127 if ( defined($ret) && defined(wantarray) ) {
1193 12         17 push( @new_rows, $ret );
1194             }
1195 19 100       53 if ( !defined($row) ) {
1196 2 100       6 if ( $self->{'dataidx'} <= $currentidx ) {
1197 1         2 $currentidx--;
1198             }
1199 2         6 $self->remove();
1200             } ## end if (!defined($row))
1201             } ## end while (my $row = $self->next...)
1202              
1203 5         17 $self->data($currentidx);
1204              
1205 5         14 return @new_rows;
1206             } ## end sub where
1207              
1208             =head2 get_all($field_name [, ... ] [, \%options])
1209              
1210             Returns an array/arrayref of all the values matching each given field name.
1211             This function errors out if no field names are passed in or a non-existent
1212             field is requested.
1213              
1214             Available options are:
1215              
1216             =over 4
1217              
1218             =item * delete_missing
1219              
1220             If any of the fields are missing, the row will not be added to any of the
1221             return arrays. (Useful for plotting or statistics that don't work well with
1222             bad values.)
1223              
1224             =back
1225              
1226             =cut
1227              
1228             sub get_all {
1229 13     13 1 5471 my $self = shift;
1230 13         31 my %options = ( 'delete_missing' => 0 );
1231 13 100       41 if ( ref( $_[$#_] ) eq 'HASH' ) {
1232 5         8 %options = %{ pop(@_) };
  5         16  
1233             }
1234 13 50       30 if ( !@_ ) {
1235 0         0 croak("get_all must be called with at least one field name");
1236             }
1237              
1238 13 100       34 my $missing = ( $self->{'options'}{'missing_data_to_undef'} ? undef : $self->{'missing'} );
1239              
1240 13         22 my $currentidx = $self->{'dataidx'};
1241 13         36 $self->rewind();
1242              
1243 13         25 my @fields = map {lc} @_; # turn all inputs lowercase
  22         60  
1244              
1245 13         27 foreach my $field (@fields) {
1246 21 100   130   94 if ( ( firstidx { $_ eq $field } @{ $self->{'actual_fields'} } ) < 0 ) {
  130         186  
  21         63  
1247 4 100 66 3   17 if ( !$self->{'options'}{'fill_ancillary_data'} || ( firstidx { $_ eq $field } keys( %{ $self->{'ancillary'} } ) ) < 0 ) {
  3         9  
  1         6  
1248 3         441 croak("Field $field does not exist");
1249             }
1250             }
1251             } ## end foreach my $field (@fields)
1252              
1253 10         19 my @ret = map { [] } @fields; # make return array of arrays
  17         31  
1254              
1255 10         28 while ( my $row = $self->next() ) {
1256 40 100       77 if ( $options{'delete_missing'} ) {
1257 20         26 my $has_all = 1;
1258 20         49 foreach my $field (@fields) {
1259 36 100 100     148 unless ( defined( $row->{$field} ) && ( !defined($missing) || $row->{$field} != $missing ) ) {
      100        
1260 4         6 $has_all = 0;
1261 4         6 last;
1262             }
1263             } ## end foreach my $field (@fields)
1264 20 100       36 unless ($has_all) {
1265 4         8 next;
1266             }
1267             } ## end if ($options{'delete_missing'...})
1268              
1269 36         75 for ( my $i = 0; $i <= $#fields; $i++ ) {
1270 60         84 push( @{ $ret[$i] }, $row->{ $fields[$i] } );
  60         195  
1271             }
1272             } ## end while (my $row = $self->next...)
1273              
1274 10         33 $self->data($currentidx);
1275              
1276 10 100       26 if ( $#_ == 0 ) {
    100          
1277 3 100       6 if (wantarray) {
1278 1         2 return @{ $ret[0] };
  1         7  
1279             } else {
1280 2         8 return $ret[0];
1281             }
1282             } elsif (wantarray) {
1283 4         16 return @ret;
1284             } else {
1285 3         12 return \@ret;
1286             }
1287             } ## end sub get_all
1288              
1289             =head2 remove_field($field_name [, ... ])
1290              
1291             Removes a field from the file. C is called to remove the field
1292             from cached rows. Any new rows grabbed will have the removed fields omitted,
1293             as well. A warning is issued if the field does not exist.
1294              
1295             =cut
1296              
1297             sub remove_field {
1298 8     8 1 28 my $self = shift;
1299 8 100       22 if ( !@_ ) {
1300 1         186 croak("Field(s) must be specified.");
1301             }
1302 7         16 foreach my $field_orig (@_) {
1303 9         18 my $field = lc($field_orig);
1304              
1305 9     30   30 my $field_idx = firstidx { $_ eq $field } @{ $self->{'actual_fields'} };
  30         40  
  9         27  
1306              
1307 9 100       32 if ( $field_idx < 0 ) {
1308 1         87 carp("Field $field does not exist.");
1309             } else {
1310 8         9 splice( @{ $self->{'actual_fields'} }, $field_idx, 1 );
  8         17  
1311 8         12 splice( @{ $self->{'actual_units'} }, $field_idx, 1 );
  8         19  
1312             }
1313             } ## end foreach my $field_orig (@_)
1314 7         143 $self->update_fields();
1315             } ## end sub remove_field
1316              
1317             =head2 add_field($field_name [, $unit [, $position]])
1318              
1319             Adds a field to the file. C is called to populate all cached
1320             rows. Any rows retrieved will have the new field set to undefined or /missing,
1321             depending on if the option C is set.
1322              
1323             If the unit is not specified, it is set to unitless.
1324              
1325             If the position is not specified, the field is added to the end.
1326              
1327             =cut
1328              
1329             sub add_field {
1330 25     25 1 127 my ( $self, $field, $unit, $position ) = @_;
1331 25 50       82 if ( !$self->{'options'}{'cache'} ) {
    100          
1332 0         0 croak("Caching must be enabled to write.");
1333             } elsif ( !$field ) {
1334 1         135 croak("Field must be specified.");
1335             }
1336 24         47 $field = lc($field);
1337              
1338 24     121   81 my $field_idx = firstidx { $_ eq $field } @{ $self->{'actual_fields'} };
  121         148  
  24         71  
1339 24 100       86 if ( $field_idx >= 0 ) {
1340 3         419 croak("Field already exists.");
1341             }
1342 21 100       47 if ( !defined($position) ) {
1343 15         40 $position = INSERT_END;
1344             }
1345 21   100     61 $unit ||= 'unitless';
1346 21         40 $unit = lc($unit);
1347              
1348 21 100       39 if ( $position == INSERT_END ) {
    100          
1349 17         30 push( @{ $self->{'actual_fields'} }, $field );
  17         46  
1350 17         30 push( @{ $self->{'actual_units'} }, $unit );
  17         36  
1351             } elsif ( $position == INSERT_BEGINNING ) {
1352 2         3 unshift( @{ $self->{'actual_fields'} }, $field );
  2         7  
1353 2         4 unshift( @{ $self->{'actual_units'} }, $unit );
  2         5  
1354             } else {
1355 2         4 splice( @{ $self->{'actual_fields'} }, $position, 0, $field );
  2         6  
1356 2         3 splice( @{ $self->{'actual_units'} }, $position, 0, $unit );
  2         4  
1357             }
1358 21         71 $self->update_fields();
1359             } ## end sub add_field
1360              
1361             =head2 find_fields($string | qr/match/ [, ... ])
1362              
1363             Finds fields matching the string or regex given. If given a string, it must
1364             match a field exactly and entirely to be found. To find a substring, use
1365             C. Fields are returned in the order that they will be output. This
1366             function takes into account fields that are added or removed. All fields are
1367             always lowercase, so all matches are case insensitive.
1368              
1369             Given one argument, returns an array of the fields found. An empty array is
1370             returned if no fields match.
1371              
1372             Given multiple arguments, returns an array/arrayref of arrays of fields found.
1373             IE: C would return something like
1374             C<[['lw510','lw550'],['es510','es550']]>. If no field is matched, the inner
1375             array will be empty. IE: C<[[],[]]>.
1376              
1377             =cut
1378              
1379             sub find_fields {
1380 10     10 1 4830 my $self = shift;
1381 10 50       27 if ( $#_ < 0 ) {
1382 0         0 croak("Input must be a string or regex object.");
1383             }
1384              
1385 10         18 my @ret;
1386              
1387 10         17 foreach my $find (@_) {
1388 13         20 my ( $regex, @matching );
1389 13 50       23 if ( defined($find) ) {
1390 13 100       35 if ( !ref($find) ) {
    50          
1391 7         79 $regex = lc(qr/^$find$/i);
1392             } elsif ( ref($find) eq 'Regexp' ) {
1393 6         26 $regex = lc(qr/$find/i);
1394             } else {
1395 0         0 croak("Input must be a string or regex object.");
1396             }
1397              
1398 13         28 foreach my $field ( @{ $self->{'actual_fields'} } ) {
  13         25  
1399 92 100       259 if ( $field =~ $regex ) {
1400 23         40 push( @matching, $field );
1401             }
1402             }
1403             }
1404 13         30 push( @ret, \@matching );
1405             } ## end foreach my $find (@_)
1406              
1407 10 100       22 if ( $#_ == 0 ) {
1408 7         10 return @{ $ret[0] };
  7         30  
1409             } else {
1410 3 100       8 if (wantarray) {
1411 2         7 return @ret;
1412             } else {
1413 1         4 return \@ret;
1414             }
1415             } ## end else [ if ($#_ == 0) ]
1416             } ## end sub find_fields
1417              
1418             =head2 add_comment(@comments)
1419              
1420             Adds comments to the output file, which are printed, in bulk, after C.
1421             Comments are trimmed before entry and !s are added, if required.
1422              
1423             =cut
1424              
1425             sub add_comment {
1426 2     2 1 6 my $self = shift;
1427 2         7 push(@{$self->{'comments'}}, map {
1428 2         4 my $c = $_;
  3         7  
1429 3         23 $c =~ s/^\s+|\s+$//g;
1430 3 100       12 if ($c =~ /^!/){
1431 1         5 $c
1432             } else {
1433 2         8 "! $c"
1434             }
1435             } @_);
1436             }
1437              
1438             =head2 get_comments([@indices])
1439              
1440             Returns a list of the comments at the given indices. If no indices are passed
1441             in, return them all.
1442              
1443             =cut
1444              
1445             sub get_comments {
1446 6     6 1 17 my $self = shift;
1447 6         8 my @ret;
1448 6 100       14 if (@_){
1449 2         4 @ret = map {$self->{'comments'}[$_]} @_;
  3         8  
1450             } else {
1451 4         7 @ret = @{$self->{'comments'}};
  4         13  
1452             }
1453 6 50       15 if (wantarray){
1454 0         0 return @ret;
1455             } else {
1456 6         37 return \@ret;
1457             }
1458             }
1459              
1460             =head2 set_comments(@comments)
1461              
1462             Overwrites all of the comments in the file. For now, this is the proper way
1463             to remove comments. Comments are trimmed before entry and !s are added, if
1464             required.
1465              
1466             =cut
1467              
1468             sub set_comments {
1469 1     1 1 3 my $self = shift;
1470             $self->{'comments'} = [map {
1471 1         4 my $c = $_;
  2         5  
1472 2         9 $c =~ s/^\s+|\s+$//g;
1473 2 100       11 if ($c =~ /^!/){
1474 1         9 $c
1475             } else {
1476 1         5 "! $c"
1477             }
1478             } @_];
1479             }
1480              
1481             =head2 write([$filename | $file_handle | \*GLOB])
1482              
1483             Outputs the current header and data to the given handle or glob. If no
1484             arguments are given, and a non-existent filename was given to C, the
1485             contents are output into that. If an output file was not given, C
1486             outputs to STDOUT.
1487              
1488             If C is enabled, the headers are checked for invalid headers and
1489             missing required headers and errors/warnings can be thrown accordingly.
1490              
1491             The headers are output in a somewhat-arbitrary but consistent order. If
1492             C is enabled, placeholders are added for every header that
1493             does not exist. A comment section is also added if one is not present.
1494              
1495             =cut
1496              
1497             sub write {
1498 15     15 1 11383 my ( $self, $write_to_h ) = @_;
1499              
1500 15         47 my $strict_write = $self->{'options'}{'strict'} & STRICT_WRITE;
1501 15 100       57 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
1502 15         22 my $error = 0;
1503              
1504 15 100       37 if ($strict_write) {
1505 3         4 foreach my $header ( keys( %{ $self->{'headers'} } ) ) {
  3         17  
1506 65         87 ( my $header_no_slash = $header ) =~ s"^/"";
1507 65 50 33 611   152 if ( ( firstidx { $_ eq $header_no_slash } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $header_no_slash } @HIDDEN_HEADERS ) < 0 ) {
  1123         1219  
  0         0  
1508 0         0 carp("Invalid header: $header");
1509 0         0 $error = 1;
1510             }
1511             } ## end foreach my $header (keys(%{...}))
1512              
1513 3         12 foreach my $header (@REQUIRED_HEADERS) {
1514 66 100       153 if ( !exists( $self->{'headers'}{$header} ) ) {
1515 20         1243 carp("Missing required header: $header");
1516 20         734 $error = 1;
1517             }
1518             } ## end foreach my $header (@REQUIRED_HEADERS)
1519             } ## end if ($strict_write)
1520              
1521 15 100       39 if ( !$error ) {
1522 14         24 my $close_write_to = 0;
1523 14         39 my $old_fh = select();
1524              
1525 14 50 33     76 if ( !$write_to_h && exists( $self->{'default_write_to'} ) ) {
1526 0   0     0 $write_to_h ||= $self->{'default_write_to'};
1527             }
1528              
1529 14 50       48 if ( defined($write_to_h) ) {
1530 0 0       0 if ( ref($write_to_h) eq 'GLOB' ) {
    0          
1531 0         0 select($write_to_h);
1532             } elsif ( !ref($write_to_h) ) {
1533 0         0 my $write_to = $write_to_h;
1534 0         0 $write_to_h = undef;
1535 0 0       0 open( $write_to_h, ">", $write_to ) || croak("Invalid argument for write().");
1536 0         0 $close_write_to = 1;
1537 0         0 select($write_to_h);
1538             } else {
1539 0         0 croak("Invalid argument for write().");
1540             }
1541             } ## end if (defined($write_to_h...))
1542              
1543 14   50     54 $self->{'headers'}{"${slash}delimiter"} ||= 'comma';
1544 14         43 my $actual_delim = lc( $self->{'headers'}{"${slash}delimiter"} );
1545 14 100       49 if ( $actual_delim eq 'comma' ) {
    50          
    0          
    0          
1546 2         4 $actual_delim = ',';
1547             } elsif ( $actual_delim eq 'space' ) {
1548 12         24 $actual_delim = ' ';
1549             } elsif ( $actual_delim eq 'tab' ) {
1550 0         0 $actual_delim = "\t";
1551             } elsif ( $actual_delim eq 'semicolon' ) {
1552 0         0 $actual_delim = ';';
1553             } else {
1554 0         0 $actual_delim = ',';
1555 0         0 $self->{'headers'}{"${slash}delimiter"} = 'comma';
1556             }
1557              
1558 14 50       46 my $missing = ( exists( $self->{'missing'} ) ? $self->{'missing'} : $DEFAULT_MISSING );
1559 14 50       36 my $bdl = ( exists( $self->{'below_detection_limit'} ) ? $self->{'below_detection_limit'} : $DEFAULT_BDL );
1560 14 50       39 my $adl = ( exists( $self->{'above_detection_limit'} ) ? $self->{'above_detection_limit'} : $DEFAULT_ADL );
1561              
1562 14 100       51 if ( $self->{'options'}{'preserve_header'} ) {
1563 1         2 print join("\n", @{ $self->{'preserved_header'} }, '');
  1         45  
1564             } else {
1565 13 100       47 if ( !exists( $self->{'headers'}{"${slash}begin_header"} ) ) {
1566 3         120 print "/begin_header\n";
1567             }
1568              
1569 13         28 my $add_missing_headers = $self->{'options'}{'add_empty_headers'};
1570 13 100 66     37 if ( $add_missing_headers && $add_missing_headers eq '1' ) {
1571 1         2 $add_missing_headers = 'NA';
1572             }
1573 13 100       36 if ( !$self->{'options'}{'preserve_case'} ) {
1574 1   50     9 $add_missing_headers = lc( $add_missing_headers || '' );
1575             }
1576            
1577 13         21 my @headers_to_print;
1578            
1579 13         88 @headers_to_print = @ALL_HEADERS;
1580 13 100 66     70 if ($missing eq $adl || ($self->{'options'}{'missing_data_to_undef'} && !$self->{'options'}{'preserve_detection_limits'})){
      100        
1581 12         149 @headers_to_print = grep(!/above_detection_limit/i, @headers_to_print);
1582             }
1583 13 100 66     65 if ($missing eq $bdl || ($self->{'options'}{'missing_data_to_undef'} && !$self->{'options'}{'preserve_detection_limits'})){
      100        
1584 12         97 @headers_to_print = grep(!/below_detection_limit/i, @headers_to_print);
1585             }
1586            
1587 13 100       21 unless (grep($_ ne 'unitless', @{ $self->{'actual_units'} })){
  13         56  
1588 6         57 @headers_to_print = grep(!/units/i, @headers_to_print);
1589             }
1590              
1591 13         34 foreach my $header (@headers_to_print) {
1592 412 100       1161 if ( $header eq 'missing' ) {
    100          
    100          
    100          
    50          
1593 13         20 while ( my ( $h, $k ) = each( %{ $self->{'headers'} } ) ) {
  166         475  
1594 153         244 ( my $header_no_slash = $h ) =~ s"^/"";
1595 153 50   3197   445 if ( ( firstidx { $_ eq $header_no_slash } @ALL_HEADERS ) < 0 ) {
  3197         11742  
1596 0         0 print "/$h=$k\n";
1597             }
1598             } ## end while (my ($h, $k) = each...)
1599              
1600 13         23 foreach my $comment ( @{ $self->{'comments'} } ) {
  13         36  
1601 18         100 print "$comment\n";
1602             }
1603 13 100 100     22 if ( !@{ $self->{'comments'} } && $add_missing_headers ) {
  13         69  
1604 1         7 print "! Comments: \n!\n";
1605             }
1606 13 100       46 if ( !exists( $self->{'headers'}{"$slash$header"} ) ) {
1607 3         45 print "/missing=$missing\n";
1608             } else {
1609 10         94 print '/', $header, '=', $self->{'headers'}{"$slash$header"}, "\n";
1610             }
1611             } elsif ( $header eq 'fields' ) {
1612 13         36 print "/$header=", join( ',', @{ $self->{'actual_fields'} } ), "\n";
  13         92  
1613             } elsif ( $header eq 'units' ) {
1614 7         16 print "/$header=", join( ',', @{ $self->{'actual_units'} } ), "\n";
  7         33  
1615             } elsif ( exists( $self->{'headers'}{"$slash$header"} ) ) {
1616 111 100       312 if ( $header =~ /_header/ ) {
    100          
    50          
1617 20         385 print "/$header\n";
1618             } elsif (length($self->{'headers'}{"$slash$header"})) {
1619 65         135 my $v = $self->{'headers'}{"$slash$header"};
1620 65 100       161 if ( $header =~ /_latitude|_longitude/ ) {
    100          
1621 8         36 print "/$header=$v\[deg]\n";
1622             } elsif ( $header =~ /_time/ ) {
1623 4         19 print "/$header=$v\[gmt]\n";
1624             } else {
1625 53         294 print "/$header=$v\n";
1626             }
1627             # print '/', $header, '=', $self->{'headers'}{"$slash$header"}, "\n";
1628             } elsif ($add_missing_headers) {
1629 26 100       57 next if ($OMIT_EMPTY_HEADERS{$header});
1630              
1631 25   33     83 my $value = $HEADER_DEFAULTS{$header} || $add_missing_headers;
1632 25 100       70 if ( $header =~ /_latitude|_longitude/ ) {
    100          
1633 4         19 print "/$header=$value\[deg]\n";
1634             } elsif ( $header =~ /_time/ ) {
1635 2         16 print "/$header=$value\[gmt]\n";
1636             } else {
1637 19         146 print "/$header=$value\n";
1638             }
1639             }
1640             } elsif ($add_missing_headers) {
1641 0 0       0 next if ($OMIT_EMPTY_HEADERS{$header});
1642              
1643 0   0     0 my $value = $HEADER_DEFAULTS{$header} || $add_missing_headers;
1644 0 0       0 if ( $header =~ /_latitude|_longitude/ ) {
    0          
1645 0         0 print "/$header=$value\[deg]\n";
1646             } elsif ( $header =~ /_time/ ) {
1647 0         0 print "/$header=$value\[gmt]\n";
1648             } else {
1649 0         0 print "/$header=$value\n";
1650             }
1651             } ## end elsif ($add_missing_headers)
1652             } ## end foreach my $header (@ALL_HEADERS)
1653              
1654 13 100       63 if ( !exists( $self->{'headers'}{"${slash}end_header"} ) ) {
1655 3         17 print "/end_header\n";
1656             }
1657             } ## end else [ if ($self->{'options'}...)]
1658              
1659 14         58 $self->rewind();
1660              
1661 14         39 while ( my $row = $self->next() ) {
1662 24         34 my @values;
1663 24         46 foreach my $field ( @{ $self->{'actual_fields'} } ) {
  24         50  
1664 118 100       257 push( @values, ( defined( $row->{$field} ) ? $row->{$field} : $missing ) );
1665             }
1666 24         235 print join( $actual_delim, @values ), "\n";
1667             } ## end while (my $row = $self->next...)
1668              
1669 14         51 select($old_fh);
1670 14 50       40 if ($close_write_to) {
1671 0         0 close($write_to_h);
1672             }
1673             } else {
1674 1         56 croak("Error(s) writing file");
1675             }
1676 14         60 return;
1677             } ## end sub write
1678              
1679             =head2 close()
1680              
1681             If a file handle is opened for reading, this function closes it. This is
1682             automatically called when the object is destroyed. This is useful to replace
1683             the file being read with the current changes.
1684              
1685             =cut
1686              
1687             sub close {
1688 141     141 1 314 my ($self) = @_;
1689 141 100       464 if ( $self->{'handle'} ) {
1690 131         530 my $ret = close( $self->{'handle'} );
1691 131         442 delete( $self->{'handle'} );
1692 131         2287 return $ret;
1693             } else {
1694 10         144 return;
1695             }
1696             } ## end sub close
1697              
1698             =head2 make_data_hash($line [,\@field_list])
1699              
1700             my %row = $sb_file->make_data_hash("1.5,2,2.5");
1701             my %row = $sb_file->make_data_hash("1.5,2,2.5", [qw(lat lon sal)]);
1702             my %row = $sb_file->make_data_hash("1.5,2,2.5", [$sb_file->fields()]);
1703             my %row = $sb_file->make_data_hash("1.5,2,2.5", [$sb_file->actual_fields()]);
1704              
1705             For mostly internal use. This function parses a data line. It first splits
1706             the data via the delimiter, assigns a field to each value, and returns a hash
1707             or hash reference.
1708              
1709             If C<@field_list> is not set, C<< $sb_file->fields() >> is used.
1710              
1711             If a delimiter is not set (a blank file was created, a file without a
1712             /delimiter header is read, etc), the delimiter is guessed and set using
1713             L.
1714              
1715             Cs if the delimiter could not be guessed or the number of fields the
1716             line is split into does not match up with the field list.
1717              
1718             =cut
1719              
1720             sub make_data_hash {
1721 266     266 1 581 my ( $self, $line, $field_list ) = @_;
1722 266 50 66     711 if ( !$self->{'delim'} && !$self->guess_delim($line) ) {
1723 0         0 croak("Need a delimiter");
1724             }
1725 266         1951 my @values = split( $self->{'delim'}, $line );
1726 266   33     1142 $field_list ||= $self->{'fields'};
1727              
1728 266         363 my ( $num_expected, $num_got ) = ( scalar( @{ $self->{'fields'} } ), scalar(@values) );
  266         591  
1729 266 50       591 if ( $num_expected != $num_got ) {
1730 0         0 croak("Incorrect number of fields or elements: got $num_got, expected $num_expected");
1731             }
1732              
1733 266         352 my %ret;
1734              
1735 266         1601 my $iterator = each_arrayref( $field_list, \@values );
1736 266         1214 while ( my ( $k, $v ) = $iterator->() ) {
1737 1869 100       3169 if ( $self->{'options'}{'missing_data_to_undef'} ) {
1738 958 50 100     10012 if (!length($v)){
    100 100        
    100 100        
    100 100        
    100 100        
      100        
      100        
      100        
      33        
      66        
      66        
1739 0         0 $ret{$k} = undef;
1740             } elsif ( $self->{'missing_is_number'} && looks_like_number($v) && $v == $self->{'missing'} ) {
1741 68         242 $ret{$k} = undef;
1742             } elsif ( !$self->{'options'}{'preserve_detection_limits'} && $self->{'adl_is_number'} && looks_like_number($v) && $v == $self->{'above_detection_limit'} ){
1743 1         4 $ret{$k} = undef;
1744             } elsif ( !$self->{'options'}{'preserve_detection_limits'} && $self->{'bdl_is_number'} && looks_like_number($v) && $v == $self->{'below_detection_limit'} ){
1745 1         3 $ret{$k} = undef;
1746             } elsif ($v eq $self->{'missing'} || (!$self->{'options'}{'preserve_detection_limits'} && ($v eq $self->{'below_detection_limit'} || $v eq $self->{'above_detection_limit'}))) {
1747 8         37 $ret{$k} = undef;
1748             } else {
1749 880         3462 $ret{$k} = $v;
1750             }
1751             } else {
1752 911         3075 $ret{$k} = $v;
1753             }
1754             } ## end while (my ($k, $v) = $iterator...)
1755 266         829 $self->add_and_remove_fields( \%ret );
1756 266 50       537 if (wantarray) {
1757 0         0 return %ret;
1758             } else {
1759 266         1463 return \%ret;
1760             }
1761             } ## end sub make_data_hash
1762              
1763             =head2 AUTOLOAD
1764              
1765             print $sb_file->missing();
1766             print $sb_file->dataidx();
1767             print $sb_file->actual_fields();
1768             ...
1769              
1770             Returns a few internal variables. The accessor is read only, but some
1771             variables can be returned as a reference, and can be modified afterwards.
1772             Though, do it knowing this is a terrible idea.
1773              
1774             If the variable retrieved is an array or hash reference and this is called in a
1775             list context, the variable is dereferenced first.
1776              
1777             Here are a few "useful" variables:
1778              
1779             =over 4
1780              
1781             =item * dataidx
1782              
1783             The current row index.
1784              
1785             =item * max_dataidx
1786              
1787             The highest row index read so far.
1788              
1789             =item * fields
1790              
1791             An array of the original fields.
1792              
1793             =item * actual_fields
1794              
1795             An array of the current fields, as modified by C or C.
1796              
1797             =item * delim
1798              
1799             The regex used to split data lines.
1800              
1801             =item * missing
1802              
1803             The null/fill/missing value of the SeaBASS file.
1804              
1805             =item * delim
1806              
1807             The current line delimiter regex.
1808              
1809             =back
1810              
1811             =cut
1812              
1813             sub AUTOLOAD {
1814 130     130   274 my $self = shift;
1815 130 50       244 if ( !ref($self) ) {
1816 0         0 croak("$self is not an object");
1817             }
1818              
1819 130         189 my $name = $AUTOLOAD;
1820 130 50       289 if ($name) {
1821 130         414 $name =~ s/.*://;
1822 130         227 my $value = $self->{$name};
1823 130 100       234 if ( !defined($value) ) {
1824 1         5 return;
1825             }
1826 129 100 66     390 if ( ref($value) eq 'ARRAY' && wantarray ) {
    50 33        
1827 125         148 return @{$value};
  125         469  
1828             } elsif ( ref($value) eq 'HASH' && wantarray ) {
1829 0         0 return %{$value};
  0         0  
1830             }
1831 4         25 return $value;
1832             } ## end if ($name)
1833             } ## end sub AUTOLOAD
1834              
1835             sub DESTROY {
1836 141     141   77480 my $self = shift;
1837 141         485 $self->close();
1838             }
1839              
1840             =head1 INTERNAL METHODS
1841              
1842             =head2 check_options()
1843              
1844             For internal use only. This function is in charge of checking the options to
1845             make sure they are of the right type (array/hash reference where appropriate).
1846              
1847             If add_empty_headers is set, this function turns off C.
1848              
1849             Called by the object, accepts no arguments.
1850              
1851             =cut
1852              
1853             #<<< perltidy destroys this function
1854             sub check_options {
1855 140     140 1 273 my $self = shift;
1856 140         254 while (my ($k, $v) = each(%{$self->{'options'}})) {
  2071         5562  
1857 1935 100       5347 if (!exists($DEFAULT_OPTIONS{$k})) {
    100          
1858 2         437 croak("Option not understood: $k");
1859 2203     2203   6507 } elsif ((firstidx { $_ eq ref($v) } @{$OPTION_TYPES{$k}}) < 0) {
  1933         4692  
1860 2         6 my $expected_ref = join('/', @{$OPTION_TYPES{$k}});
  2         10  
1861 2 100       530 croak("Option $k not of the right type, expected: " . ($expected_ref ? "$expected_ref reference" : 'scalar'));
1862             } ## end elsif ((firstidx { $_ eq ...}))
1863             } ## end while (my ($k, $v) = each...)
1864 136 100       488 if ($self->{'options'}{'add_empty_headers'}) {
1865 1         4 $self->{'options'}{'strict'} &= STRICT_READ;
1866             }
1867             } ## end sub check_options
1868             #>>>
1869              
1870             =head2 create_blank_file()
1871              
1872             For internal use only. C populates the object with proper
1873             internal variables, as well as adding blank headers if C is
1874             set.
1875              
1876             By default, the missing value is set to C<$DEFAULT_MISSING> (C<-999>).
1877              
1878             This function turns on the C option, as C must be enabled to
1879             write.
1880              
1881             The delimiter is left undefined and will be guessed upon reading the first data
1882             line using the L function.
1883              
1884             Called by the object, accepts no arguments.
1885              
1886             =cut
1887              
1888             sub create_blank_file {
1889 4     4 1 7 my ($self) = @_;
1890 4         15 $self->{'actual_fields'} = [];
1891 4         9 $self->{'actual_units'} = [];
1892 4         11 $self->{'fields'} = [];
1893 4         8 $self->{'units'} = [];
1894 4         9 $self->{'headers'} = {};
1895 4         8 $self->{'comments'} = [];
1896 4         55 $self->{'data'} = [];
1897 4         13 $self->{'dataidx'} = -1;
1898 4         13 $self->{'max_dataidx'} = -1;
1899 4         11 $self->{'delim'} = undef;
1900 4         13 $self->{'missing'} = $DEFAULT_MISSING;
1901 4         11 $self->{'below_detection_limit'} = $DEFAULT_BDL;
1902 4         13 $self->{'above_detection_limit'} = $DEFAULT_ADL;
1903              
1904 4         12 $self->{'options'}{'cache'} = 1;
1905 4         11 $self->{'options'}{'fill_ancillary_data'} = 0;
1906              
1907 4 100       17 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
1908 4 100       17 if ($self->{'options'}{'add_empty_headers'}) {
1909 1         4 foreach (@ALL_HEADERS) {
1910 34 50       54 if ( !exists( $self->{'headers'}{"${slash}$_"} ) ) {
1911 34         60 $self->{'headers'}{"${slash}$_"} = '';
1912 34 100       49 if ( $_ eq 'missing' ) {
1913 1         3 $self->{'headers'}{"${slash}missing"} = $DEFAULT_MISSING;
1914             }
1915             } ## end if (!exists($self->{'headers'...}))
1916             } ## end foreach (@ALL_HEADERS)
1917             } ## end if ($add_missing_headers)
1918              
1919 4         9 my $success = 1;
1920 4 50       15 if ( $self->{'options'}{'default_headers'} ) {
1921 4         19 $success &= $self->add_headers( $self->{'options'}{'default_headers'} );
1922             }
1923 4 50       13 if ( $self->{'options'}{'headers'} ) {
1924 4         9 $success &= $self->add_headers( $self->{'options'}{'headers'} );
1925             }
1926 4 50       14 unless ($success) {
1927 0         0 croak("Error creating blank file.");
1928             }
1929             } ## end sub create_blank_file
1930              
1931             =head2 read_headers()
1932              
1933             For internal use only. C reads the metadata at the beginning of
1934             a SeaBASS file.
1935              
1936             Called by the object, accepts no arguments.
1937              
1938             =cut
1939              
1940             sub read_headers {
1941 131     131 1 258 my $self = shift;
1942              
1943 131 50       389 if ( $self->{'headers'} ) {
1944 0         0 return;
1945             }
1946              
1947 131 100       389 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
1948 131         249 my $success = 1;
1949 131         340 my @comments;
1950              
1951 131         285 $self->{'headers'} = {};
1952 131         318 $self->{'comments'} = [];
1953              
1954 131 50       386 if ( $self->{'options'}{'default_headers'} ) {
1955 131         443 $success &= $self->add_headers( $self->{'options'}{'default_headers'} );
1956             }
1957              
1958 131         265 my $handle = $self->{'handle'};
1959 131         252 my $position = my $line_number = 0;
1960 131         189 my @header_lines;
1961              
1962 131         238 my $strict = $self->{'options'}{'strict'};
1963              
1964 131         712 while ( my $line = <$handle> ) {
1965 3287         4457 $line_number++;
1966 3287         6184 strip($line);
1967 3287 50       6153 if ($line) {
1968 3287 100       10122 if ( $line =~ m'^(/end_header)\@?$'i ) {
    100          
    100          
1969 116         385 push( @header_lines, $1 );
1970 116         229 $position = tell($handle);
1971 116         246 last;
1972             } elsif ( $line =~ m"^/" ) {
1973 2460         4348 push( @header_lines, $line );
1974             } elsif ( $line =~ m"^!" ) {
1975 696         1176 push( @comments, $line );
1976 696 100       1520 if ( $self->{'options'}{'preserve_header'} ) {
1977 10         24 push( @header_lines, $line );
1978             }
1979             } else { #TODO: search ahead for more headers or comments (in case of merely comment missing !) and fail if READ_STRICT
1980 15         48 seek( $handle, $position, SEEK_SET );
1981 15 50       47 if ( $strict & STRICT_READ ) {
1982 0         0 carp("File missing /end_header or comment missing !, assuming data start: line #$line_number ($line)");
1983             }
1984 15         41 last;
1985             }
1986             } ## end if ($line)
1987 3156         9153 $position = tell($handle);
1988             } ## end while (my $line = <$handle>)
1989              
1990             # add_headers looks at STRICT_WRITE, not STRICT_READ
1991 131 100       351 if ( $strict & STRICT_READ ) {
1992 5         16 $self->{'options'}{'strict'} |= STRICT_WRITE;
1993             } else {
1994 126         283 $self->{'options'}{'strict'} = 0;
1995             }
1996              
1997 131 100       359 if ( $self->{'options'}{'preserve_header'} ) {
1998 1         12 $self->{'preserved_header'} = [@header_lines];
1999             }
2000              
2001 131         379 $success &= $self->add_headers( \@header_lines );
2002              
2003             # restore strictness
2004 131         276 $self->{'options'}{'strict'} = $strict;
2005              
2006 131 50       360 if ( $self->{'options'}{'headers'} ) {
2007 131         386 $success &= $self->add_headers( $self->{'options'}{'headers'} );
2008             }
2009              
2010 131         214 my %headers = %{ $self->{'headers'} };
  131         1506  
2011              
2012 131 100       513 if ( $self->{'options'}{'preserve_comments'} ) {
2013 130         215 push( @{ $self->{'comments'} }, @comments );
  130         496  
2014             }
2015              
2016 131   66     541 my $missing = $headers{"${slash}missing"} || $DEFAULT_MISSING;
2017 131 100       393 if ( $self->{'options'}{'delete_missing_headers'} ) {
2018 1         10 while ( my ( $k, $v ) = each(%headers) ) {
2019 31 100       128 if ( $k =~ m"/?(?:end|begin)_header$|^/?missing$" ) {
2020 3         14 next;
2021             }
2022 28 100 66     292 if ( !defined($v) || $v =~ m"^n/?a(?:\[.*?\])?$"i || lc($v) eq lc($missing) ) {
      66        
2023 7         34 delete( $headers{$k} );
2024             }
2025             } ## end while (my ($k, $v) = each...)
2026             } ## end if ($self->{'options'}...)
2027              
2028 131 100       315 if ( $strict & STRICT_READ ) {
2029 5         11 foreach (@REQUIRED_HEADERS) {
2030 110 100       3186 if ( !exists( $headers{"${slash}$_"} ) ) {
2031 58         81 $success = 0;
2032 58         5168 carp("Missing required header: $_");
2033             }
2034             } ## end foreach (@REQUIRED_HEADERS)
2035 5         26 while ( my ( $header, $value ) = each(%headers) ) {
2036 72 50       120 if ($slash) {
2037 0         0 $header =~ s"^/"";
2038             }
2039 72 50 33 731   188 if ( ( firstidx { $_ eq $header } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $header } @HIDDEN_HEADERS ) < 0 ) {
  1338         1567  
  0         0  
2040 0         0 $success = 0;
2041 0         0 carp("$header not a standard header.");
2042             }
2043             } ## end while (my ($header, $value...))
2044 5 50 33     32 if ( $headers{"${slash}begin_header"} || $headers{"${slash}end_header"} ) {
2045 0         0 $success = 0;
2046 0         0 carp("begin_ or end_header incorrect");
2047             }
2048             } ## end if ($strict & STRICT_READ)
2049 131         334 foreach (@ABSOLUTELY_REQUIRED_HEADERS) {
2050 131 100       497 if ( !exists( $headers{"${slash}$_"} ) ) {
2051 1         2 $success = 0;
2052 1 50       4 if ( $strict & STRICT_READ ) {
2053 1         80 carp("Missing absolutely required header: $_");
2054             }
2055             }
2056             } ## end foreach (@ABSOLUTELY_REQUIRED_HEADERS)
2057              
2058 131   100     1674 $self->{'fields'} = [ split( /\s*,\s*/, $headers{"${slash}fields"} || '' ) ];
2059 131   100     1468 $self->{'actual_fields'} = [ split( /\s*,\s*/, $headers{"${slash}fields"} || '' ) ];
2060              
2061 131 100       527 if ( $headers{"${slash}units"} ) {
2062 79         667 $self->{'units'} = [ split( /\s*,\s*/, $headers{"${slash}units"} ) ];
2063 79         709 $self->{'actual_units'} = [ split( /\s*,\s*/, $headers{"${slash}units"} ) ];
2064             } else {
2065 52         89 my (@new_units1);
2066 52         85 foreach ( @{ $self->{'fields'} } ) {
  52         119  
2067 366         571 push( @new_units1, 'unitless' );
2068             }
2069 52         159 my @new_units2 = @new_units1;
2070 52         136 $self->{'units'} = \@new_units1;
2071 52         114 $self->{'actual_units'} = \@new_units2;
2072 52         232 $headers{"${slash}units"} = join( ',', @new_units1 );
2073             } ## end else [ if ($headers{"${slash}units"...})]
2074              
2075 131 100       266 if ( @{$self->{'fields'}} != @{$self->{'units'}} ) {
  131         317  
  131         393  
2076 1 50       2 if ( $strict & STRICT_READ ) {
2077 1         79 carp("/fields and /units don't match up");
2078 1         47 $success = 0;
2079             } else {
2080 0         0 while (@{$self->{'fields'}} > @{$self->{'units'}}){
  0         0  
  0         0  
2081 0         0 push(@{$self->{'units'}}, 'unitless');
  0         0  
2082             }
2083 0         0 while (@{$self->{'fields'}} < @{$self->{'units'}}){
  0         0  
  0         0  
2084 0         0 pop(@{$self->{'units'}});
  0         0  
2085             }
2086             }
2087             }
2088              
2089 131 100       370 unless ($success) {
2090 4 50       12 if ( $strict & STRICT_READ ) {
2091 4         379 croak("Error(s) reading SeaBASS file");
2092             } else {
2093 0         0 return;
2094             }
2095             }
2096              
2097 127         588 $self->{'missing'} = $missing;
2098 127   66     672 $self->{'below_detection_limit'} = $headers{"${slash}below_detection_limit"} || $missing;
2099 127   66     636 $self->{'above_detection_limit'} = $headers{"${slash}above_detection_limit"} || $missing;
2100 127         376 $self->{'line_number'} = $line_number;
2101 127         297 $self->{'data_start_line'} = $line_number;
2102 127         342 $self->{'data_start_position'} = $position;
2103              
2104 127 100       424 if ( $self->{'options'}{'cache'} ) {
2105 117         328 $self->{'data'} = [];
2106             }
2107 127         294 $self->{'dataidx'} = -1;
2108 127         247 $self->{'max_dataidx'} = -1;
2109              
2110 127         585 $self->{'headers'} = \%headers;
2111              
2112 127 100       398 if ( $self->{'options'}{'fill_ancillary_data'} ) {
2113 16         25 my @fields_lc = map {lc} @{ $self->{'fields'} };
  110         189  
  16         37  
2114 16         48 $self->{'fields_lc'} = \@fields_lc;
2115 16         32 $self->{'ancillary'} = {};
2116              
2117 16         38 foreach my $field (@FILL_ANCILLARY_DATA) {
2118 208         347 $self->find_ancillaries($field);
2119             }
2120              
2121 16         38 $self->{'case_conversion'} = {};
2122              
2123 16         30 while ( my ( $field, $value ) = each( %{ $self->{'ancillary'} } ) ) {
  159         460  
2124 143     801   345 my $idx = firstidx { $_ eq $field } @{ $self->{'fields_lc'} };
  801         883  
  143         295  
2125 143         286 my $new_field = $field;
2126 143 100       243 if ( $idx >= 0 ) {
2127 77         126 $new_field = $self->{'fields'}[$idx];
2128             }
2129              
2130 143 100       226 if ( ref($value) ) {
2131 29         66 for ( my $i = 1; $i < @$value; $i++ ) {
2132 52         73 my $new_arg = $value->[$i];
2133 52         228 for ( $value->[$i] =~ /\$(\{\w+\}|\w+)/g ) {
2134 37         125 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2135              
2136 37     196   98 my $idx = firstidx { $_ eq $variable } @{ $self->{'fields_lc'} };
  196         228  
  37         87  
2137 37         93 my $new_variable = $self->{'fields'}[$idx];
2138              
2139 37         315 $new_arg =~ s/\$$variable(\W|\b|$)/\$$new_variable$1/g;
2140 37         246 $new_arg =~ s/\$\{$variable\}/\$$new_variable/g;
2141             } ## end for ($value->[$i] =~ /\$(\{\w+\}|\w+)/g)
2142 52         153 $value->[$i] = $new_arg;
2143             } ## end for (my $i = 1; $i <= length...)
2144             } else {
2145 114         140 my $new_value = $value;
2146 114         436 for ( $value =~ /\$(\{\w+\}|\w+)/g ) {
2147 151         571 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2148              
2149 151     740   374 my $idx = firstidx { $_ eq $variable } @{ $self->{'fields_lc'} };
  740         799  
  151         314  
2150 151 100       354 if ( $idx >= 0 ) {
2151 145         234 my $new_variable = $self->{'fields'}[$idx];
2152              
2153 145         1292 $new_value =~ s/\$$variable(\W|\b|$)/\$$new_variable$1/g;
2154 145         1163 $new_value =~ s/\$\{$variable\}/\$$new_variable/g;
2155             } ## end if ($idx >= 0)
2156             } ## end for ($value =~ /\$(\{\w+\}|\w+)/g)
2157 114         240 $value = $new_value;
2158             } ## end else [ if (ref($value)) ]
2159 143 50       238 if ( $field ne $new_field ) {
2160 0         0 delete( $self->{'ancillary'}{$field} );
2161 0         0 $self->{'case_conversion'}{$field} = $new_field;
2162             }
2163 143         336 $self->{'ancillary'}{$new_field} = $value;
2164             } ## end while (my ($field, $value...))
2165              
2166 16         48 delete( $self->{'fields_lc'} );
2167             } ## end if ($self->{'options'}...)
2168              
2169 127 100 100     838 if ($self->{'options'}{'optional_warnings'} && $headers{"${slash}optical_depth_warning"} && $headers{"${slash}optical_depth_warning"} =~ /true/){
      100        
2170 1         278 carp("/optical_depth_warning=true, indicating optically shallow water. Use caution when using this data.");
2171             }
2172              
2173 127         826 return 1;
2174             } ## end sub read_headers
2175              
2176             =head2 validate_header($header, $value, $strict)
2177              
2178             my ($k, $v, $string) = ('investigators','jason_lefler',0)
2179             $sb_file->validate_header($k, $v, $strict);
2180              
2181             For internal use only. C is in charge of properly formatting
2182             key/value pairs to add to the object. This function will modify the input
2183             variables in place to prepare them for use.
2184              
2185             Returns false if there was a problem with the inputs, such as C is set
2186             and an invalid header was passed in.
2187              
2188             C will set C to C<$DEFAULT_MISSING> (C<-999>) if it
2189             is blank or undefined.
2190              
2191             This function will also change the expected delimiter for rows that have not
2192             yet been cached.
2193              
2194             =cut
2195              
2196             sub validate_header {
2197 2590     2590 1 4652 my ( $self, $k, $v, $strict ) = @_;
2198              
2199 2590         3327 my $success = 1;
2200              
2201 2590 100       4108 if ( !defined($v) ) {
2202 207         316 $v = '';
2203             } else {
2204 2383         3792 strip($v);
2205             }
2206              
2207 2590         5154 strip($k);
2208              
2209 2590         4526 $k = lc($k);
2210              
2211 2590 50 66     5741 if ( length($v) == 0 && $k !~ /_header/ ) {
2212 0 0       0 if ($strict) {
2213 0         0 carp("$k missing value");
2214 0         0 $success = 0;
2215             } else {
2216 0         0 $v = "";
2217             }
2218             } ## end if (length($v) == 0 &&...)
2219              
2220 2590 100 100     8811 if ( !$self->{'options'}{'preserve_case'} || $k =~ /fields|units/ ) {
2221 1311         2224 $v = lc($v);
2222             }
2223              
2224 2590 100       4663 if ( $self->{'options'}{'keep_slashes'} ) {
2225 139 100       316 if ( $k =~ m"^[^/]" ) {
2226 2         7 $k = "/$k";
2227             }
2228              
2229 139 50 66 12   296 if ( $strict && ( firstidx { "/$_" eq $k } @ALL_HEADERS ) < 0 && ( firstidx { "/$_" eq $k } @HIDDEN_HEADERS ) < 0 ) {
  12   33     36  
  0         0  
2230 0         0 carp("Invalid header, $k");
2231 0         0 $success = 0;
2232             }
2233             } else {
2234 2451 100       5913 if ( $k =~ m"^/" ) {
2235 2449         6219 $k =~ s"^/"";
2236             }
2237              
2238 2451 50 66 1259   5373 if ( $strict && ( firstidx { $_ eq $k } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $k } @HIDDEN_HEADERS ) < 0 ) {
  1354   33     1571  
  0         0  
2239 0         0 carp("Invalid header, $k");
2240 0         0 $success = 0;
2241             }
2242             } ## end else [ if ($self->{'options'}...)]
2243              
2244 2590 100       9423 if ( $k =~ /_latitude|_longitude/){
    100          
    100          
    100          
    100          
    100          
2245 308         814 $v =~ s/\[deg\]$//i;
2246             } elsif ( $k =~ /_time/){
2247 158         486 $v =~ s/\[gmt\]$//i;
2248             } elsif ( $k =~ m"^/?delimiter$" ) {
2249 93 100       315 unless ( $self->set_delim( $strict, $v ) ) {
2250 1 50       3 if ($strict) {
2251 0         0 $success = 0;
2252             }
2253             }
2254             } elsif ( $k =~ m"^/?missing$" ) {
2255 114 50       403 $self->{'missing'} = ( length($v) ? $v : $DEFAULT_MISSING );
2256 114         578 $self->{'missing_is_number'} = looks_like_number( $self->{'missing'} );
2257             } elsif ( $k =~ m"^/?above_detection_limit" ) {
2258 7 50       30 $self->{'above_detection_limit'} = ( length($v) ? $v : $self->{'missing'} );
2259 7         24 $self->{'adl_is_number'} = looks_like_number( $self->{'above_detection_limit'} );
2260             } elsif ( $k =~ m"^/?below_detection_limit" ) {
2261 7 50       26 $self->{'below_detection_limit'} = ( length($v) ? $v : $self->{'missing'} );
2262 7         24 $self->{'bdl_is_number'} = looks_like_number( $self->{'below_detection_limit'} );
2263             }
2264              
2265 2590         3901 $_[1] = $k;
2266 2590         3353 $_[2] = $v;
2267              
2268 2590         4254 return $success;
2269             } ## end sub validate_header
2270              
2271             =head2 set_delim($strict, $delim)
2272              
2273             Takes a string declaring the delim (IE: 'comma', 'space', etc) and updates the
2274             object's internal delimiter regex.
2275              
2276             =cut
2277              
2278             sub set_delim {
2279 93     93 1 181 my $self = shift;
2280 93         152 my $strict = shift;
2281 93   50     276 my $delim = shift || '';
2282 93 50       442 if ( $delim eq 'comma' ) {
    50          
    100          
    50          
    100          
2283 0         0 $delim = qr/\s*,\s*/;
2284             } elsif ( $delim eq 'semicolon' ) {
2285 0         0 $delim = qr/\s*;\s*/;
2286             } elsif ( $delim eq 'space' ) {
2287 91         522 $delim = qr/\s+/;
2288             } elsif ( $delim eq 'tab' ) {
2289 0         0 $delim = qr/\t/;
2290             } elsif ($strict) {
2291 1         124 carp("delimiter not understood");
2292             } else {
2293 1 50       3 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
2294 1         4 $self->{'headers'}{"${slash}delimiter"} = 'comma';
2295 1         2 $delim = undef;
2296             }
2297 93         322 $self->{'delim'} = $delim;
2298 93 100       368 return ( $delim ? 1 : 0 );
2299             } ## end sub set_delim
2300              
2301             =head2 update_fields()
2302              
2303             C runs through the currently cached rows and calls
2304             C on each row. It then updates the /fields and /units
2305             headers in the header hash.
2306              
2307             =cut
2308              
2309             sub update_fields {
2310 28     28 1 59 my ($self) = @_;
2311 28 100 66     137 if ( $self->{'options'}{'cache'} && $self->{'max_dataidx'} >= 0 ) {
2312 4         7 foreach my $hash ( @{ $self->{'data'} } ) {
  4         11  
2313 9         28 $self->add_and_remove_fields($hash);
2314             }
2315             }
2316              
2317 28 100       73 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
2318 28         40 $self->{'headers'}{"${slash}fields"} = join( ',', @{ $self->{'actual_fields'} } );
  28         113  
2319 28         52 $self->{'headers'}{"${slash}units"} = join( ',', @{ $self->{'actual_units'} } );
  28         111  
2320             } ## end sub update_fields
2321              
2322             =head2 add_and_remove_fields(\%row)
2323              
2324             Given a reference to a row, this function deletes any fields removed with
2325             C and adds an undefined or /missing value for each field added
2326             via C. If C is set, an undefined value is
2327             given, otherwise, it is filled with the /missing value.
2328              
2329             If C is set, this function adds missing date, time,
2330             date_time, lat, lon, and depth fields to the retrieved row from the header.
2331              
2332             Needlessly returns the hash reference passed in.
2333              
2334             =cut
2335              
2336             sub add_and_remove_fields {
2337 277     277 1 480 my ( $self, $hash ) = @_;
2338 277         975 foreach my $field ( keys(%$hash) ) {
2339 1916 100   7814   4326 if ( ( firstidx { $_ eq $field } @{ $self->{'actual_fields'} } ) < 0 ) {
  7814         11799  
  1916         3793  
2340 21 50 33 0   51 unless ( $self->{'options'}{'fill_ancillary_data'} && ( firstidx { $_ eq $field } keys( %{ $self->{'ancillary'} } ) ) >= 0 ) {
  0         0  
  0         0  
2341 21         46 delete( $hash->{$field} );
2342             }
2343             }
2344             } ## end foreach my $field (keys(%$hash...))
2345              
2346 277 100       850 my $missing = ( $self->{'options'}{'missing_data_to_undef'} ? undef : $self->{'missing'} );
2347 277         883 while ( my ( $variable, $pad ) = each(%FIELD_FORMATTING) ) {
2348 2216   33     5105 my $case_var = $self->{'case_conversion'}{$variable} || $variable;
2349 2216         2803 my $v = $hash->{$case_var};
2350 2216 50 66     8763 if ( defined($v) && length($v) && (!defined($missing) || $v != $missing)) {
      66        
      100        
2351 57         265 $hash->{$case_var} = sprintf($pad, $v);
2352             }
2353             } ## end while (my ($variable, $pad...))
2354              
2355 277 100       595 if ( defined($self->{'ancillary'}) ) {
2356 19         43 $self->{'ancillary_tmp'} = {};
2357 19         34 for my $variable (@FILL_ANCILLARY_DATA) {
2358 247 100       447 if ( defined($self->{'ancillary'}{$variable}) ) {
2359 173         295 my $value = $self->extrapolate_variables( $missing, $self->{'ancillary'}{$variable}, $hash );
2360 173 50       353 if ( defined($value) ) {
2361 173         344 $hash->{$variable} = $value;
2362             }
2363             } ## end if ($self->{'ancillary'...})
2364             } ## end for my $variable (@FILL_ANCILLARY_DATA)
2365             } ## end if ($self->{'ancillary'...})
2366              
2367 277         383 foreach my $field ( @{ $self->{'actual_fields'} } ) {
  277         564  
2368 1972 100       3334 if ( !exists( $hash->{$field} ) ) {
2369 61         174 $hash->{$field} = $missing;
2370             }
2371             }
2372              
2373 277         472 return $hash;
2374             } ## end sub add_and_remove_fields
2375              
2376             =head2 guess_delim($line)
2377              
2378             C is is used to guess the delimiter of a line. It is not very
2379             intelligent. If it sees any commas, it will assume the delimiter is a comma.
2380             Then, it checks for tabs, spaces, then semi-colons. Returns 1 on success. If
2381             it doesn't find any, it will throw a warning and return undef.
2382              
2383             =cut
2384              
2385             sub guess_delim {
2386 35     35 1 87 my ( $self, $line ) = @_;
2387 35         68 my $delim_string = '';
2388 35 100       266 if ( $line =~ /,/ ) {
    50          
    50          
    0          
2389 2         16 my $delim = qr/\s*,\s*/;
2390 2         6 $self->{'delim'} = $delim;
2391 2         5 $delim_string = 'comma';
2392             } elsif ( $line =~ /\t/ ) {
2393 0         0 my $delim = qr/\t/;
2394 0         0 $self->{'delim'} = $delim;
2395 0         0 $delim_string = 'tab';
2396             } elsif ( $line =~ /\s+/ ) {
2397 33         168 my $delim = qr/\s+/;
2398 33         80 $self->{'delim'} = $delim;
2399 33         69 $delim_string = 'space';
2400             } elsif ( $line =~ /;/ ) {
2401 0         0 my $delim = qr/\s*;\s*/;
2402 0         0 $self->{'delim'} = $delim;
2403 0         0 $delim_string = 'semicolon';
2404             } else {
2405 0         0 carp("No delimiter defined or can be guessed");
2406 0         0 return;
2407             }
2408 35 100       175 $self->{'headers'}{ ( $self->{'options'}{'keep_slashes'} ? '/' : '' ) . 'delimiter' } = $delim_string;
2409 35         130 return 1;
2410             } ## end sub guess_delim
2411              
2412             =head2 ingest_row(\%data_row | \@data_row | $data_row | %data_row)
2413              
2414             For mostly internal use, parses arguments for C, C, and C
2415             and returns a hash or hash reference of the data row. Given a hash reference,
2416             it will merely return it.
2417              
2418             Given an array or array reference, it will assume each element is a field as
2419             listed in either C or C. If the number of elements
2420             matches C, it uses assumes it's that. If it doesn't match, it is
2421             tried to match against C. If it doesn't match either, a warning is
2422             issued and the return is undefined.
2423              
2424             Given a non-reference scalar, it will split the scalar based on the current
2425             delimiter. If one is not defined, it is guessed. If it cannot be guessed, the
2426             return is undefined.
2427              
2428             If the inputs are successfully parsed, all keys are turned lowercase.
2429              
2430             =cut
2431              
2432             sub ingest_row {
2433 27     27 1 43 my $self = shift;
2434 27         44 my %new_row;
2435 27 100       75 if ( $#_ < 0 ) {
2436 1         139 carp("Incorrect number of arguments to ingest_row()");
2437 1         95 return;
2438             }
2439 26         41 my $arrayref;
2440 26 100       98 if ( ref( $_[0] ) eq 'HASH' ) {
    100          
    50          
2441 14         27 %new_row = %{ shift(@_) };
  14         61  
2442             } elsif ( ref( $_[0] ) eq 'ARRAY' ) {
2443 3         4 $arrayref = $_[0];
2444             } elsif ( !ref( $_[0] ) ) {
2445 9 100       27 if ( $#_ == 0 ) {
    100          
2446 7 50 66     30 if ( !$self->{'delim'} && !$self->guess_delim( $_[0] ) ) {
2447 0         0 return;
2448             }
2449 7         65 $arrayref = [ split( $self->{'delim'}, $_[0] ) ];
2450             } elsif ( $#_ % 2 == 1 ) {
2451 1         3 %new_row = @_;
2452             } else {
2453 1         99 carp('Even sized list, scalar, or hash/array reference expected');
2454 1         89 return;
2455             }
2456             } else {
2457 0         0 carp("Arguments to ingest_row() not understood.");
2458 0         0 return;
2459             }
2460              
2461 25 100       71 if ($arrayref) {
2462 10         17 my $iterator;
2463 10 100       12 if ( scalar( @{ $self->{'actual_fields'} } ) == scalar( @{$arrayref} ) ) {
  10 50       24  
  10         23  
2464 8         64 $iterator = each_arrayref( $self->{'actual_fields'}, $arrayref );
2465 2         3 } elsif ( scalar( @{ $self->{'fields'} } ) == scalar( @{$arrayref} ) ) {
  2         5  
2466 2         9 $iterator = each_arrayref( $self->{'fields'}, $arrayref );
2467 2         7 $self->add_and_remove_fields( \%new_row );
2468             } else {
2469 0         0 my $actual_fields = scalar( @{ $self->{'actual_fields'} } );
  0         0  
2470 0         0 my $fields = scalar( @{ $self->{'fields'} } );
  0         0  
2471 0 0       0 if ( $actual_fields == $fields ) {
2472 0         0 carp("Invalid number of elements, expected $fields");
2473             } else {
2474 0         0 carp("Invalid number of elements, expected $actual_fields or $fields");
2475             }
2476 0         0 return;
2477             } ## end else [ if (scalar(@{$self->{'actual_fields'...}}))]
2478 10         62 while ( my ( $k, $v ) = $iterator->() ) {
2479 52         204 $new_row{$k} = $v;
2480             }
2481             } ## end if ($arrayref)
2482              
2483 25         84 %new_row = map { lc($_) => $new_row{$_} } keys %new_row;
  71         238  
2484              
2485 25 50       83 if (wantarray) {
2486 0         0 return %new_row;
2487             } else {
2488 25         80 return \%new_row;
2489             }
2490             } ## end sub ingest_row
2491              
2492             =head2 find_ancillaries($field_name)
2493              
2494             Used by C to traverse through a field's possible
2495             substitutes in C<%ANCILLARY> and try to find the most suitable replacement.
2496             Values of fields in C<%ANCILLARY> are array references, where each element is
2497             either:
2498              
2499             =over 4
2500              
2501             =item * a string of existing field names used to create the value
2502              
2503             =item * an array reference of the form [converter function, parsing regex
2504             (optional), arguments to converter, ... ]
2505              
2506             =item * a hash reference of the form { header => qr/parsing_regex/ }
2507              
2508             =back
2509              
2510             If the element is an array reference and an argument requires a field from the
2511             file, all arguments are parsed and the variables within them extrapolated, then
2512             the array is put into C<< $self->{'ancillary'} >>.
2513              
2514             If no value can be ascertained, it will not be added to the data rows.
2515              
2516             The value found is stored in C<< $self->{'ancillary'} >>. Returns 1 on
2517             success, 0 if the field cannot be filled in.
2518              
2519             =cut
2520              
2521             sub find_ancillaries {
2522 406     406 1 622 my ( $self, $field ) = @_;
2523 406 100       682 if ( $self->{'ancillary'}{$field} ) {
2524 99         148 return 1;
2525             }
2526 307     1833   732 my $idx = firstidx { $_ eq $field } @{ $self->{'fields_lc'} };
  1833         2055  
  307         705  
2527 307 100       761 if ( $idx >= 0 ) {
2528 77         204 $self->{'ancillary'}{$field} = "\$\{$field\}";
2529 77         141 return 1;
2530             }
2531              
2532 230 50       419 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
2533 230 100       270 foreach my $attempt ( @{ $ANCILLARY{$field} || [] } ) {
  230         529  
2534 416 100       776 if ( ref($attempt) eq 'HASH' ) {
    100          
    50          
2535 284         310 keys( %{$attempt} ); #reset each() iterator between calls
  284         372  
2536 284         348 while ( my ( $where, $regex ) = each( %{$attempt} ) ) {
  541         1272  
2537 284 100 66     1051 if ( $where =~ /^\$/ ) {
    100 66        
2538 122 100   690   588 if ( ( firstidx { "\$$_" eq $where } $self->fields() ) >= 0 ) {
  690         1098  
2539 15     33   61 $self->{'ancillary'}{$field} = [ sub { return shift; }, $regex, $where ];
  33         47  
2540 15         56 return 1;
2541             }
2542             } elsif ( defined( $self->{'headers'}{"$slash$where"} ) && $self->{'headers'}{"$slash$where"} =~ $regex && lc($1) ne 'na' ) {
2543 12         37 $self->{'ancillary'}{$field} = $1;
2544 12         37 return 1;
2545             }
2546             } ## end while (my ($where, $regex...))
2547             } elsif ( ref($attempt) eq 'ARRAY' ) {
2548 89         157 my @attempt = @$attempt;
2549 89         110 my $function = shift(@attempt);
2550 89         114 my $regex;
2551 89 100       158 if ( ref( $attempt[0] ) eq 'Regexp' ) {
2552 41         55 $regex = shift(@attempt);
2553             }
2554 89         103 my $success = 1;
2555 89         94 my @args;
2556 89         110 foreach my $argument (@attempt) {
2557 89         100 my $tmparg = $argument;
2558 89         398 for ( $argument =~ /\$(\{\w+\}|\w+)/g ) {
2559 115         283 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2560 115         206 $success &= $self->find_ancillaries($variable);
2561 115 100       185 if ($success) {
2562 40 50       65 if ( ref( $self->{'ancillary'}{$variable} ) ) {
2563 0         0 $tmparg =~ s/\$$variable(\W|\b|$)/\$\{$variable\}$1/g;
2564             } else {
2565 40         53 my $value = $self->{'ancillary'}{$variable};
2566 40         490 $tmparg =~ s/\$$variable(\W|\b|$)/$value$1/g;
2567 40         268 $tmparg =~ s/\$\{$variable\}/$value/g;
2568             }
2569             } else {
2570 75         112 last;
2571             }
2572             } ## end for ($argument =~ /\$(\{\w+\}|\w+)/g)
2573 89         184 push( @args, $tmparg );
2574             } ## end foreach my $argument (@attempt)
2575 89 100       186 if ($success) {
2576 14 100       27 if ($regex) {
2577 8         12 unshift( @args, $regex );
2578             }
2579 14         35 $self->{'ancillary'}{$field} = [ $function, @args ];
2580 14         42 return 1;
2581             } ## end if ($success)
2582             } elsif ( !ref($attempt) ) {
2583 43         58 my $success = 1;
2584 43         60 my $tmparg = $attempt;
2585 43         246 for ( $attempt =~ /\$(\{\w+\}|\w+)/g ) {
2586 83         254 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2587 83         207 $success &= $self->find_ancillaries($variable);
2588 83 100       155 if ($success) {
2589 65 100       121 if ( ref( $self->{'ancillary'}{$variable} ) ) {
2590 6         54 $tmparg =~ s/\$$variable(\W|\b|$)/\$\{$variable\}$1/g;
2591             } else {
2592 59         102 my $value = $self->{'ancillary'}{$variable};
2593 59         947 $tmparg =~ s/\$$variable(\W|\b|$)/$value$1/g;
2594 59         557 $tmparg =~ s/\$\{$variable\}/$value/g;
2595             }
2596             } else {
2597 18         25 last;
2598             }
2599             } ## end for ($attempt =~ /\$(\{\w+\}|\w+)/g)
2600 43 100       112 if ($success) {
2601 25         62 $self->{'ancillary'}{$field} = $tmparg;
2602 25         95 return 1;
2603             }
2604             } ## end elsif (!ref($attempt))
2605             } ## end foreach my $attempt (@{$ANCILLARY...})
2606              
2607 164         275 return 0;
2608             } ## end sub find_ancillaries
2609              
2610             =head2 extrapolate_variables($missing, $expression, \%row)
2611              
2612             Used by C to convert a parsed ancillary string, such as
2613             C<'$year$month$day'>, into a real value using the fields from the C<\%row>.
2614             C<$expression>s are strings figured out by C and stored in
2615             C<< $self->{'ancillary'} >>.
2616              
2617             The return is undefined if a value cannot be created (IE: a required field is
2618             missing).
2619              
2620             =cut
2621              
2622             sub extrapolate_variables {
2623 220     220 1 379 my ( $self, $missing, $expression, $row ) = @_;
2624              
2625 220 100       340 if ( ref($expression) ) {
2626 47         85 return $self->extrapolate_function( $missing, $expression, $row );
2627             } else {
2628 173         203 my $tmpexpr = $expression;
2629 173         617 for ( $expression =~ /\$(\{\w+\}|\w+)/g ) {
2630 221         581 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2631 221         265 my $value;
2632 221 100 33     727 if ( $self->{'ancillary_tmp'}{$variable} ) {
    50 66        
    0 33        
2633 123         169 $value = $self->{'ancillary_tmp'}{$variable};
2634             } elsif ( defined($row->{$variable}) && length($row->{$variable}) && (!defined($missing) || $row->{$variable} != $missing)) {
2635 98         161 $value = $row->{$variable};
2636 98         170 $self->{'ancillary_tmp'}{$variable} = $value;
2637             } elsif ( ref( $self->{'ancillary'}{$variable} ) ) {
2638 0         0 $value = $self->extrapolate_function($missing, $self->{'ancillary'}{$variable}, $row);
2639 0 0       0 if ( !defined($value) ) {
2640 0         0 return;
2641             }
2642             } else {
2643 0         0 return;
2644             }
2645              
2646 221         2415 $tmpexpr =~ s/\$$variable(\W|\b|$)/$value$1/g;
2647 221         1412 $tmpexpr =~ s/\$\{$variable\}/$value/g;
2648             } ## end for ($expression =~ /\$(\{\w+\}|\w+)/g)
2649 173         446 return $tmpexpr;
2650             } ## end else [ if (ref($expression)) ]
2651             } ## end sub extrapolate_variables
2652              
2653             =head2 extrapolate_function($missing, $expression, \%row)
2654              
2655             If the value stored in C<< $self->{'ancillary'} >> is an array reference, this
2656             function uses the array to create an actual value. See
2657             L for an explanation of the
2658             array.
2659              
2660             =cut
2661              
2662             sub extrapolate_function {
2663 47     47 1 73 my ( $self, $missing, $expression, $row ) = @_;
2664 47         53 my $value;
2665 47         89 my ( $function, @args ) = @$expression;
2666 47         57 my $regex;
2667              
2668 47 100       100 if ( ref( $args[0] ) eq 'Regexp' ) {
2669 41         54 $regex = shift(@args);
2670             }
2671 47         71 for (@args) {
2672 47         90 $_ = $self->extrapolate_variables( $missing, $_, $row );
2673 47 50       133 if ( !defined($_) ) {
2674 0         0 return;
2675             }
2676             } ## end for (@args)
2677 47         81 $value = &$function(@args);
2678 47 100       87 if ($regex) {
2679 41 50       202 if ( $value =~ $regex ) {
2680 41         82 $value = $1;
2681             }
2682             }
2683              
2684 47         105 return $value;
2685             } ## end sub extrapolate_function
2686              
2687             =head1 STATIC METHODS
2688              
2689             =head2 strip(@list)
2690              
2691             my @space_filled_lines = (' line1 ', ' line2', 'line3 ', 'line4');
2692             strip(@space_filled_lines);
2693             print @space_filled_lines; #line1line2line3line4
2694              
2695             Runs through the list and removes leading and trailing whitespace. All changes
2696             are made in place.
2697              
2698             It is literally this:
2699              
2700             sub strip {
2701             s/^\s+|\s+$//g for @_;
2702             }
2703              
2704             =cut
2705              
2706             # Not an object method!
2707             sub strip {
2708 8526     8526 1 42550 s/^\s+|\s+$//g for @_;
2709             }
2710              
2711             =head2 julian_to_greg($yyyyjjj)
2712              
2713             Converts a date in the day of year format YYYYJJJ into YYYYMMDD. Returns the
2714             newly formatted string or undefined if the input does not match the required
2715             format.
2716              
2717             This uses the C function from C to do the heavy
2718             lifting.
2719              
2720             =cut
2721              
2722             # Not an object method!
2723             sub julian_to_greg {
2724 14     14 1 25 my ($yyyyjjj) = @_;
2725 14 50       50 if ( $yyyyjjj =~ /^(\d{4})(\d{3})$/ ) {
2726 14         86 my ( $y, $m, $d ) = Add_Delta_Days( $1, 1, 1, $2 - 1 );
2727 14         67 return sprintf( '%04d%02d%02d', $y, $m, $d );
2728             }
2729 0           return;
2730             } ## end sub julian_to_greg
2731              
2732             =head1 CAVEATS/ODDITIES
2733              
2734             =head2 Duplicate Fields
2735              
2736             This class will not allow a field to be added to the object if a field of the
2737             same name already exists. If a file being read has duplicate field names, only
2738             the B one is used. No warning is issued. If C is used to
2739             remove it, only the first instance will be deleted. To delete all instances,
2740             use C<< $sb_file->remove_field($sb_file->find_fields('chl')) >>. This may
2741             change in future releases.
2742              
2743             =head2 Changing Delimiter or Missing Value
2744              
2745             Modifying the delimiter header on a file that is being read will cause any
2746             non-cached rows to be split by the new delimiter, which should break most/all
2747             files. If the delimiter must be changed, call C to cache all the rows,
2748             then change it. This will obviously not work if caching is turned off. The
2749             same is true for setting the missing value, but only really applies when the
2750             C option is used (same goes to below detection limit).
2751              
2752             =head2 Below Detection Limit
2753              
2754             Below detection limit is only partially supported. If C is
2755             used, fields equal to C will be set to C, as
2756             well. Files modified while using C will have all data
2757             equal to C written out set to the missing value instead
2758             of the below detection limit value. If the below detection limit value is equal
2759             to the missing value or C is used, the
2760             C header will not be written.
2761              
2762             =head1 AUTHOR
2763              
2764             Jason Lefler, C<< >>
2765              
2766             =head1 BUGS
2767              
2768             Please report any bugs or feature requests to C
2769             rt.cpan.org>, or through the web interface at
2770             L. I will be
2771             notified, and then you'll automatically be notified of progress on your bug as
2772             I make changes.
2773              
2774             =head1 SUPPORT
2775              
2776             You can find documentation for this module with the perldoc command.
2777              
2778             perldoc Data::SeaBASS
2779              
2780             You can also look for information at:
2781              
2782             =over 4
2783              
2784             =item * RT: CPAN's request tracker (report bugs here)
2785              
2786             L
2787              
2788             =item * AnnoCPAN: Annotated CPAN documentation
2789              
2790             L
2791              
2792             =item * CPAN Ratings
2793              
2794             L
2795              
2796             =item * Search CPAN
2797              
2798             L
2799              
2800             =back
2801              
2802             =head1 LICENSE AND COPYRIGHT
2803              
2804             Copyright 2014 Jason Lefler.
2805              
2806             This program is free software; you can redistribute it and/or modify it under
2807             the terms of either: the GNU General Public License as published by the Free
2808             Software Foundation; or the Artistic License.
2809              
2810             See http://dev.perl.org/licenses/ for more information.
2811              
2812             =cut
2813              
2814             1; # End of Data::SeaBASS