File Coverage

blib/lib/Data/SeaBASS.pm
Criterion Covered Total %
statement 915 1047 87.3
branch 480 600 80.0
condition 139 204 68.1
subroutine 69 75 92.0
pod 48 48 100.0
total 1651 1974 83.6


line stmt bran cond sub pod time code
1             package Data::SeaBASS;
2              
3 25     25   2774966 use strict;
  25         209  
  25         665  
4 25     25   111 use warnings;
  25         39  
  25         1412  
5              
6             =head1 NAME
7              
8             Data::SeaBASS - Object-oriented interface for reading/writing SeaBASS files
9              
10             =head1 VERSION
11              
12             version 0.192600
13              
14             =cut
15              
16             our $VERSION = '0.192600'; # VERSION
17              
18             =head1 SYNOPSIS
19              
20             To read SeaBASS files:
21              
22             use Data::SeaBASS qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END);
23              
24             my $sb_file = Data::SeaBASS->new("input.txt");
25              
26             # Calculate the average chlorophyll value using next
27             my $chl_total = 0;
28             my $measurements = 0;
29            
30             while (my $row = $sb_file->next()){
31             if (defined($row->{'chl'})){
32             $chl_total += $row->{'chl'};
33             $measurements++;
34             }
35             }
36             if ($measurements){
37             print $chl_total/$measurements;
38             } else {
39             print "No chl values.";
40             }
41            
42             #alternatively:
43             $sb_file->rewind();
44             while (my %row = $sb_file->next()){
45             if (defined($row{'chl'})){
46             $chl_total += $row{'chl'};
47             $measurements++;
48             }
49             }
50              
51             # Calculate the average chlorophyll value using where
52             my $chl_total2 = 0;
53             my $measurements2 = 0;
54             $sb_file->where(sub {
55             if (defined($_->{'chl'})){
56             $chl_total2 += $_->{'chl'};
57             $measurements2++;
58             }
59             });
60             if ($measurements2){
61             print $chl_total2/$measurements2;
62             } else {
63             print "No chl values.";
64             }
65            
66             Or to modify SeaBASS files:
67              
68             use Data::SeaBASS qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END);
69              
70             my $sb_file = Data::SeaBASS->new("input.txt");
71              
72             # Add a one degree bias to water temperature
73             while (my $row = $sb_file->next()){
74             $row->{'wt'} += 1;
75             $sb_file->update($row);
76             }
77            
78             $sb_file->write(); # to STDOUT
79              
80             # Remove the one degree bias to water temperature
81             $sb_file->where(sub {
82             $_->{'wt'} -= 1;
83             });
84            
85             $sb_file->write("output_file.txt");
86            
87             Or to start a SeaBASS file from scratch:
88              
89             use Data::SeaBASS qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END);
90              
91             my $sb_file = Data::SeaBASS->new({strict => 0, add_empty_headers => 1});
92             $sb_file->add_field('lat','degrees');
93             $sb_file->add_field('lon','degrees');
94             $sb_file->append({'lat' => 1, 'lon' => 2});
95             $sb_file->append("3,4"); # or if you're reading from a CSV file
96             $sb_file->write();
97              
98             =head1 DESCRIPTION
99              
100             C provides an easy to use, object-oriented interface for
101             reading, writing, and modifying SeaBASS data files.
102              
103             =head2 What is SeaBASS?
104              
105             L Bio-optical Archive and Storage
106             System housed at Goddard Space Flight Center.
107             L provides the permanent public
108             repository for data collected under the auspices of the NASA Ocean Biology and
109             Biogeochemistry Program. It also houses data collected by participants in the
110             NASA Sensor Intercomparision and Merger for Biological and Oceanic
111             Interdisciplinary Studies (SIMBIOS) Program. SeaBASS includes marine
112             bio-optical, biogeochemical, and (some) atmospheric data.
113              
114             =head2 SeaBASS File Format
115              
116             SeaBASS files are plain ASCII files with a special header and a matrix of
117             values.
118              
119             =head3 Header
120              
121             The SeaBASS header block consists of many lines of header-keyword pairs. Some
122             headers are optional but most, although technically not required for reading,
123             are required to be ingested into the system. More detailed information is
124             available in the SeaBASS L
125             article|http://seabass.gsfc.nasa.gov/wiki/article.cgi?article=metadataheaders>.
126             The only absolutely required header for this module to work is the /fields
127             line. This module turns fields and units lowercase at all times.
128              
129             /begin_header
130             /delimiter=space
131             /missing=-999
132             /fields=date,time,lat,lon,depth,wt,sal
133             /end_header
134            
135             =head3 Body
136              
137             The SeaBASS body is a matrix of data values, organized much like a spreadsheet.
138             Each column is separated by the value presented in the /delimiter header.
139             Likewise, missing values are indicated by the value presented in the /missing
140             header. The /fields header identifies the geophysical parameter presented in
141             each column.
142              
143             /begin_header
144             /delimiter=space
145             /missing=-999
146             /fields=date,time,lat,lon,depth,wt,sal
147             /end_header
148             19920109 16:30:00 31.389 -64.702 3.4 20.7320 -999
149             19920109 16:30:00 31.389 -64.702 19.1 20.7350 -999
150             19920109 16:30:00 31.389 -64.702 38.3 20.7400 -999
151             19920109 16:30:00 31.389 -64.702 59.6 20.7450 -999
152              
153             =head3 Strictly Speaking
154              
155             SeaBASS files are run through a program called
156             L before
157             they are submitted and before they are ingested into a NASA relational database
158             management system. Some of the things it checks for are required
159             L
160             and proper L
161             names|http://seabass.gsfc.nasa.gov/wiki/article.cgi?article=stdfields>. All
162             data must always have an associated depth, time, and location, though these
163             fields may be placed in the header and are not always required in the data.
164             Just because this module writes the files does not mean they will pass FCHECK.
165              
166             Files are case-INsensitive. Headers are not allowed to have any whitespace.
167              
168             =cut
169              
170 25     25   123 use Carp qw(:DEFAULT);
  25         37  
  25         3563  
171 25     25   147 use Fcntl qw(SEEK_SET);
  25         54  
  25         1000  
172 25     25   6863 use List::MoreUtils qw(firstidx each_arrayref);
  25         152091  
  25         137  
173 25     25   28893 use Date::Calc qw(Add_Delta_Days);
  25         132987  
  25         1807  
174 25     25   164 use Scalar::Util qw(looks_like_number);
  25         46  
  25         308087  
175              
176             require Exporter;
177             our @ISA = qw(Exporter);
178             our @EXPORT_OK = qw(STRICT_READ STRICT_WRITE STRICT_ALL INSERT_BEGINNING INSERT_END);
179             our @EXPORT = qw();
180              
181             our $AUTOLOAD;
182              
183             =head1 EXPORT
184              
185             This module does not export anything by default.
186              
187             =head2 STRICT_READ
188              
189             C is used with the C option, enabling error messages when
190             reading header lines and inserting header data.
191              
192             =head2 STRICT_WRITE
193              
194             C is used with the C option, enabling error messages when
195             writing the data to a file/stream.
196              
197             =head2 STRICT_ALL
198              
199             C is used with the C option, enabling C and
200             C.
201              
202             =head2 INSERT_BEGINNING
203              
204             C is used with L
205             \@data_row | $data_row | %data_row)"> or L
206             $unit [, $position]])"> to insert a data row or field at the beginning of their
207             respective lists.
208              
209             =head2 INSERT_END
210              
211             C is used with L
212             $data_row | %data_row)"> or L
213             $position]])"> to insert a data row or field at the end of their respective
214             lists.
215              
216             =cut
217              
218 294     294 1 7464 sub STRICT_READ {1}
219 460     460 1 1092 sub STRICT_WRITE {2}
220 1     1 1 2679 sub STRICT_ALL {3}
221 22     22 1 64 sub INSERT_BEGINNING {0}
222 104     104 1 205 sub INSERT_END {-1}
223              
224             my %DEFAULT_OPTIONS = (
225             default_headers => {},
226             headers => {},
227             preserve_case => 1,
228             keep_slashes => 0,
229             cache => 1,
230             delete_missing_headers => 0,
231             missing_data_to_undef => 1,
232             preserve_comments => 1,
233             add_empty_headers => 0,
234             strict => STRICT_WRITE,
235             fill_ancillary_data => 0,
236             preserve_header => 0,
237             preserve_detection_limits => 0,
238             optional_warnings => 1,
239             );
240              
241             #return values for ref()
242             my %OPTION_TYPES = (
243             default_headers => [ 'ARRAY', 'HASH' ],
244             headers => [ 'ARRAY', 'HASH' ],
245             preserve_case => [''],
246             keep_slashes => [''],
247             cache => [''],
248             delete_missing_headers => [''],
249             missing_data_to_undef => [''],
250             preserve_comments => [''],
251             add_empty_headers => [''],
252             strict => [''],
253             fill_ancillary_data => [''],
254             preserve_header => [''],
255             preserve_detection_limits => [''],
256             optional_warnings => [''],
257             );
258              
259             # All headers required by STRICT_READ and STRICT_WRITE
260             my @REQUIRED_HEADERS = qw(
261             begin_header
262             investigators
263             affiliations
264             contact
265             experiment
266             cruise
267             data_file_name
268             documents
269             calibration_files
270             data_type
271             start_date
272             end_date
273             start_time
274             end_time
275             north_latitude
276             south_latitude
277             east_longitude
278             west_longitude
279             missing
280             delimiter
281             units
282             end_header
283             );
284              
285             # Headers that must be specified, regardless of strictness.
286             my @ABSOLUTELY_REQUIRED_HEADERS = qw(
287             fields
288             );
289              
290             # Valid headers used for STRICT_READ and STRICT_WRITE
291             our @ALL_HEADERS = qw(
292             begin_header
293             investigators
294             affiliations
295             contact
296             experiment
297             cruise
298             station
299             data_file_name
300             documents
301             calibration_files
302             data_type
303             data_status
304             start_date
305             end_date
306             start_time
307             end_time
308             north_latitude
309             south_latitude
310             east_longitude
311             west_longitude
312             cloud_percent
313             measurement_depth
314             secchi_depth
315             data_use_warning
316             water_depth
317             wave_height
318             wind_speed
319             missing
320             below_detection_limit
321             above_detection_limit
322             delimiter
323             fields
324             units
325             end_header
326             );
327              
328             our %HEADER_DEFAULTS = (
329             data_use_warning => '',
330             );
331              
332             our %OMIT_EMPTY_HEADERS = (
333             data_use_warning => 1,
334             );
335              
336             # headers that are allowed but are not to be added during add_empty_headers
337             my @HIDDEN_HEADERS = qw(
338             received
339             );
340              
341             # what to set the missing value to if it's not defined
342             our $DEFAULT_MISSING = -999;
343             our $DEFAULT_BDL = -888;
344             our $DEFAULT_ADL = -777;
345              
346             # overly complex data structure only understandable by idiots
347             # your IQ is the percentage chance that it won't make sense
348             # IE: my IQ = 40, I have a 60% chance of it making sense to me
349             my %ANCILLARY = (
350             'lat' => [ { 'north_latitude' => qr/^(.*?)$/ }, { 'south_latitude' => qr/^(.*?)$/ } ],
351             'lon' => [ { 'east_longitude' => qr/^(.*?)$/ }, { 'west_longitude' => qr/^(.*?)$/ } ],
352             'depth' => [ { 'measurement_depth' => qr/^(.*?)$/ }, ],
353             'date_time' => [ '$date $time', ],
354             'date' => [ [ \&julian_to_greg, '$year$julian' ], [ \&julian_to_greg, '$year$jd' ], [ \&julian_to_greg, '$year$sdy' ], '$year$month$day', ],
355             'year' => [ { '$date' => qr/^(\d{4})/ }, { 'start_date' => qr/^(\d{4})/ }, ],
356             'month' => [ { '$date' => qr/^\d{4}(\d{2})/ }, [ \&julian_to_greg, qr/^\d{4}(\d{2})\d{2}$/, '$year$julian' ], [ \&julian_to_greg, qr/^\d{4}(\d{2})\d{2}$/, '$year$jd' ], [ \&julian_to_greg, qr/^\d{4}(\d{2})\d{2}$/, '$year$sdy' ], { 'start_date' => qr/^\d{4}(\d{2})/ }, ],
357             'day' => [ { '$date' => qr/^\d{6}(\d{2})/ }, [ \&julian_to_greg, qr/^\d{4}\d{2}(\d{2})$/, '$year$julian' ], [ \&julian_to_greg, qr/^\d{4}\d{2}(\d{2})$/, '$year$jd' ], [ \&julian_to_greg, qr/^\d{4}\d{2}(\d{2})$/, '$year$sdy' ], { 'start_date' => qr/^\d{6}(\d{2})/ }, ],
358             'time' => [ '$hour:$minute:$second', ],
359             'hour' => [ { '$time' => qr/^(\d+):/ }, { 'start_time' => qr/^(\d+):/ }, ],
360             'minute' => [ { '$time' => qr/:(\d+):/ }, { 'start_time' => qr/:(\d+):/ }, ],
361             'second' => [ { '$time' => qr/:(\d+(?:\.\d*)?)(?:[^:\d]|$)/ }, { 'start_time' => qr/:(\d+(?:\.\d*)?)(?:[^:\d]|$)/ }, ],
362             'station' => [ { 'station' => qr/^(.*?)$/ }, ],
363             );
364              
365             # what fill_ancillary_data adds to each row
366             #my @FILL_ANCILLARY_DATA = qw(date time date_time lat lon depth);
367             my @FILL_ANCILLARY_DATA = keys(%ANCILLARY);
368              
369             my %FIELD_FORMATTING = (
370             'year' => '%04d',
371             'month' => '%02d',
372             'day' => '%02d',
373             'julian' => '%03d',
374             'sdy' => '%03d',
375             'hour' => '%02d',
376             'minute' => '%02d',
377             'second' => '%02d', # this is actually overridden in the function checking this for milliseconds, etc
378             );
379              
380             =head1 CONSTRUCTOR
381              
382             =head2 new([$filename,] [\%options])
383              
384             my $sb_file = Data::SeaBASS->new("input_file.txt");
385             my $sb_file = Data::SeaBASS->new("input_file.txt", { delete_missing_headers => 1 });
386             my $sb_file = Data::SeaBASS->new("output_file.txt", { add_empty_headers => 1 });
387             my $sb_file = Data::SeaBASS->new({ add_empty_headers => 1 });
388              
389             Creates a C object. If the file specified exists, the object
390             can be used to read the file. If the file specified does not exist, an empty
391             object is created and will be written to the specified file by default when
392             invoking C.
393              
394             Options should be given in a hash reference to ensure proper argument parsing.
395             If a file is specified, options can be given as a hash list.
396              
397             =over 4
398              
399             =item * default_headers
400              
401             =item * headers
402              
403             These two options accept either an array reference or a hash reference. They
404             are used to set or override header information. First, headers are read from
405             C, then from the data file itself, then are overridden by
406             whatever is in C.
407              
408             Arguments are an array reference of header lines, or a hash reference of
409             header/value pairs.
410              
411             my $sb_file = Data::SeaBASS->new({
412             default_headers => [
413             '/cruise=fake_cruise',
414             '/experiment=default_experiment',
415             ],
416             headers => {
417             'experiment' => 'real_experiment',
418             },
419             });
420            
421             B Modifying the delimiter or missing value will likely break the
422             object. Modifying these will change the expected format for all rows. Do so
423             with caution.
424              
425             =item * preserve_case
426              
427             C<1> or C<0>, default C<1>. Setting this to C<0> will change all values in the
428             header to lowercase. Header descriptors (the /header part) are always turned
429             to lowercase, as well as all fields and units.
430              
431             =item * keep_slashes
432              
433             C<1> or C<0>, default C<0>. Forces the object to keep the / in the beginning of
434             headers when accessed. If set to C<1>, when using the L
435             \%new_headers | \@get_headers | @get_headers ])"> function, they will be
436             returned with leading slash.
437              
438             =item * cache
439              
440             C<1> or C<0>, default C<1>. Enables caching data rows as they are read. This
441             speeds up re-reads and allows the data to be modified. This is required for
442             writing files.
443              
444             =item * delete_missing_headers
445              
446             C<1> or C<0>, default C<0>. Any headers that are equal to the /missing header,
447             NA, or are not defined (when using the C options) are
448             deleted. They cannot be retrieved using C and will not be written.
449              
450             =item * missing_data_to_undef
451              
452             C<1> or C<0>, default C<1>. If any values in the data block are equal to the
453             /missing, /above_detection_limit, /below_detection_limit headers, they are set
454             to undef when they are retrieved.
455              
456             =item * preserve_comments
457              
458             C<1> or C<0>, default C<1>. Setting this option to zero will discard any
459             comments found in the header.
460              
461             =item * add_empty_headers
462              
463             C<0>, C<1>, or a string. If set to a string, this will populate any missing
464             headers, including optional ones, and will set their value to the string given.
465             If set to 1, the string 'NA' is used. This option disables C.
466              
467             =item * strict
468              
469             my $sb_file = Data::SeaBASS->new("input_file.txt", {strict => STRICT_ALL});
470             my $sb_file = Data::SeaBASS->new("input_file.txt", {strict => (STRICT_READ | STRICT_WRITE)});
471             my $sb_file = Data::SeaBASS->new("input_file.txt", {strict => 0});
472             my $sb_file = Data::SeaBASS->new("input_file.txt", {strict => STRICT_WRITE}); #default
473              
474             =over 4
475              
476             =item * C
477              
478             C will throw errors when reading invalid headers, missing required
479             ones, or an invalid delimiter. This may change in future revisions.
480              
481             =item * C
482              
483             C will throw the same errors when writing the data to a file or
484             stream. C only checks for required headers and invalid headers,
485             but does not check their values to see if they are actually filled. This may
486             change in future revisions.
487              
488             =back
489              
490             =item * fill_ancillary_data
491              
492             C<0> or C<1>, default C<0>. Insert date, time, measurement depth, station, and
493             location values to the data rows from the headers. Values are not overridden
494             if they are already present. This option is only useful when reading files.
495              
496             B It is bad practice to include these fields in the data if they don't
497             change throughout the file. This option is used to remove the burden of
498             checking whether they are in the data or header.
499              
500             B This option will also combine individual date/time
501             parts in the data (year/month/day/etc) to create more uniform date/time fields.
502              
503             B If any part of a date/time is missing, the fields
504             dependent on it will not be added to the row.
505              
506             =item * preserve_header
507              
508             C<0> or C<1>, default C<0>. Preserves header and comment order. This option
509             disables modifying the header, as well, but will not error if you try -- it will
510             simply not be reflected in the output.
511              
512             =item * preserve_detection_limits
513              
514             C<0> or C<1>, default C<0>. Disables setting values equal to below_detection_limit
515             or above_detection_limit to null while reading files. This should only be used
516             during read-only operation, as there is no telling missing data from data
517             outside limits.
518              
519             =item * optional_warnings
520              
521             C<0> or C<1>, default C<1>. Determines whether or not to print warnings deemed
522             optional. For the moment, the only defined warning is for optically shallow data.
523              
524             =back
525              
526             =cut
527              
528             sub new {
529 143     143 1 272283 my ( $class, $file ) = ( shift, shift );
530              
531 143         318 my $self = bless( {}, $class );
532              
533 143         696 my %myoptions;
534              
535 143 100       721 if ( ref($file) eq 'HASH' ) {
    100          
    50          
536 4         31 %myoptions = ( %DEFAULT_OPTIONS, %$file );
537 4         10 $file = '';
538             } elsif ( ref( $_[0] ) eq 'HASH' ) {
539 129         634 %myoptions = ( %DEFAULT_OPTIONS, %{ $_[0] } );
  129         850  
540             } elsif ( !ref( $_[0] ) ) {
541 10 100       35 if ( $#_ % 2 == 1 ) {
542 9         85 %myoptions = ( %DEFAULT_OPTIONS, @_ );
543             } else {
544 1         204 croak('Even sized list expected');
545             }
546             } else {
547 0         0 croak("Arguments not understood.");
548             }
549              
550 142         476 $self->{'options'} = \%myoptions;
551 142         434 $self->check_options();
552              
553 138 50       460 if ( ref($file) eq 'GLOB' ) {
    100          
    100          
554 0         0 $self->{'handle'} = $file;
555             } elsif ( ref($file) eq 'SCALAR' ) {
556 132         1272 open( my $fh, "<", $file );
557 132         343 $self->{'handle'} = $fh;
558             } elsif ($file) {
559 2 100       6 if ( !ref($file) ) {
560 1 50       20 if ( -r $file ) {
    0          
561 1         30 open( my $fh, "<", $file );
562 1         6 $self->{'handle'} = $fh;
563             } elsif ( $self->{'options'}{'strict'} & STRICT_READ ) {
564 0         0 croak("Strict read set, but input file not found or unreadable.");
565             } else {
566 0         0 $self->{'default_write_to'} = $file;
567 0         0 $file = '';
568             }
569             } else {
570 1         92 croak("Invalid parameter, expected file path or file handle.");
571             }
572             } ## end elsif ($file)
573 137 100       430 if ($file) {
574 133 50       346 unless ( $self->read_headers() ) {
575 0 0       0 unless ( $self->{'options'}{'strict'} & STRICT_READ ) {
576 0         0 return;
577             }
578             }
579             } else {
580 4         12 $self->create_blank_file();
581             }
582 133         400 return $self;
583             } ## end sub new
584              
585             =head1 OBJECT METHODS
586              
587             =head2 add_headers(\%headers | \@header_lines | @header_lines)
588              
589             $sb_file->add_headers({'investigators' => 'jason_lefler'});
590             $sb_file->add_headers(['/investigators=jason_lefler']);
591             $sb_file->add_headers('/investigators=jason_lefler');
592            
593             C is used to add or override metadata for a C, as
594             well as add comments.
595              
596             This function can not be used to change fields/units, see
597             L and
598             L for that.
599              
600             B Modifying the delimiter or missing value halfway through
601             reading/writing will likely break the object. Modifying these will change the
602             expected format for any new or non-cached rows. Do so with caution.
603              
604             =cut
605              
606             sub add_headers {
607 412     412 1 535 my $self = shift;
608 412         554 my $success = 1;
609 412         824 my $strict = $self->{'options'}{'strict'} & STRICT_WRITE;
610 412 100       1036 if ( ref( $_[0] ) eq 'HASH' ) {
    100          
    50          
611 269         341 while ( my ( $k, $v ) = each( %{ $_[0] } ) ) {
  277         986  
612 8         17 $success &= $self->validate_header( $k, $v, $strict );
613 8         370 $self->{'headers'}{$k} = $v;
614             }
615             } elsif ( ref( $_[0] ) eq 'ARRAY' ) {
616 142         322 foreach ( @{ $_[0] } ) {
  142         307  
617 2599 100 100     6443 if ( $_ =~ /^\s*!/ ) {
    100          
618 11         13 push( @{ $self->{'comments'} }, $_ );
  11         19  
619             } elsif ( $strict && $_ !~ m"^/" ) {
620 2         453 carp("Invalid header line: $_");
621 2         220 $success = 0;
622             } else {
623 2586         6264 my ( $k, $v ) = split( /=/, $_, 2 );
624 2586         5249 $success &= $self->validate_header( $k, $v, $strict );
625 2586         6781 $self->{'headers'}{$k} = $v;
626             }
627             } ## end foreach (@{$_[0]})
628             } elsif ( !ref( $_[0] ) ) {
629 1         3 foreach (@_) {
630 1 50 33     10 if ( $_ =~ /^\s*!/ ) {
    50          
631 0         0 push( @{ $self->{'comments'} }, $_ );
  0         0  
632             } elsif ( $strict && $_ !~ m"^/" ) {
633 0         0 carp("Invalid header line: $_");
634 0         0 $success = 0;
635             } else {
636 1         5 my ( $k, $v ) = split( /=/, $_, 2 );
637 1         3 $success &= $self->validate_header( $k, $v, $strict );
638 1         3 $self->{'headers'}{$k} = $v;
639             }
640             } ## end foreach (@_)
641             } else {
642 0         0 $success = 0;
643             }
644 412         652 return $success;
645             } ## end sub add_headers
646              
647             =head2 headers([ \%new_headers | \@get_headers | @get_headers ])
648              
649             =head2 head
650              
651             =head2 h
652              
653             my %headers = $sb_file->headers(['investigators']);
654             print Dumper(\%headers); # { investigators => 'jason_lefler' }
655            
656             my ($inv) = $sb_file->headers('investigators');
657             print $inv; # jason_lefler
658            
659             $sb_file->headers({investigators => 'jason_lefler'});
660            
661             $sb_file->headers()->{'investigators'} = 'jason_lefler';
662            
663             C is used to read or modify header values. Given an array reference
664             of header names, it will return a hash/hash reference with header/value pairs.
665             Given a plain list of header names, it will return an array/array reference of
666             the given header values. Given a hash reference, this function is a proxy for
667             L.
668              
669             If C is set, then headers will be returned as such, IE: C<<
670             {'/investigators' => 'jason_lefler'} >>.
671              
672             This function can also be used to set header values without going through the
673             normal validation.
674              
675             C and C are aliases to C.
676              
677             =cut
678              
679 0     0 1 0 sub head { shift->headers(@_); }
680 22     22 1 1334 sub h { shift->headers(@_); }
681              
682             sub headers {
683 25     25 1 788 my $self = shift;
684 25 100       58 if ( !@_ ) {
    100          
    100          
685 21         151 return $self->{'headers'};
686             } elsif ( ref( $_[0] ) eq 'HASH' ) {
687 1         4 return $self->add_headers(@_);
688             } elsif ( ref( $_[0] ) eq 'ARRAY' ) {
689 1         2 my %ret;
690 1         2 for my $header ( @{ $_[0] } ) {
  1         3  
691 2         6 $ret{$header} = $self->{'headers'}{ lc($header) };
692             }
693 1 50       3 if (wantarray) {
694 1         6 return %ret;
695             } else {
696 0         0 return \%ret;
697             }
698             } else {
699 2         3 my @ret;
700 2         5 foreach (@_) {
701 4 50       8 if ( !ref ) {
702 4         8 my $value = $self->{'headers'}{ lc($_) };
703 4 50       11 push( @ret, defined($value) ? $value : undef );
704             } else {
705 0         0 croak("Argument not understood: $_");
706             }
707             } ## end foreach (@_)
708 2 100       6 if (wantarray) {
    50          
709 1         4 return @ret;
710             } elsif ( $#ret == 0 ) {
711 0         0 return $ret[0];
712             } else {
713 1         3 return \@ret;
714             }
715             } ## end else [ if (!@_) ]
716              
717             } ## end sub headers
718              
719             =head2 data([$index])
720              
721             =head2 d
722              
723             =head2 body
724              
725             =head2 b
726              
727             =head2 all
728              
729             my $row = $sb_file->data(1);
730             my @rows = $sb_file->all();
731              
732             C is responsible for returning either a data line via an index or all of
733             the data lines at once.
734              
735             Data is returned as C<< field => value >> pairs.
736              
737             If given an index: in list context, returns the hash of the row; in scalar
738             context, returns a reference to the row.
739              
740             If not given an index: in list context, returns an array of the rows; in scalar
741             context, returns a reference to an array of the rows.
742              
743             If given an index out of range, returns undef. If given a negative index,
744             Cs the file, then returns undef.
745              
746             If C is enabled and the row has already been read, it is retrieved from
747             the cache. If it has not already be read, all rows leading up to the desired
748             row will be read and cached, and the desired row returned.
749              
750             If C is disabled and either all rows are retrieved or a previously
751             retrieved row is called again, the file will C, then seek to the
752             desired row.
753              
754             C, C, C, and C are all aliases to C. (Yes, that means
755             C can be used with arguments, it would just look silly.)
756              
757             =cut
758              
759 0     0 1 0 sub body { shift->data(@_); }
760 0     0 1 0 sub b { shift->data(@_); }
761 2     2 1 1612 sub d { shift->data(@_); }
762 42     42 1 6328 sub all { shift->data(@_); }
763              
764             sub data {
765 135     135 1 5858 my ( $self, $index ) = @_;
766 135 100       289 if ( defined($index) ) {
767 83 100       188 if ( $index < 0 ) {
768 12         26 $self->rewind();
769 12         17 return;
770             }
771 71 100       146 if ( $self->{'options'}{'cache'} ) {
772 65 100       150 if ( $index > $self->{'max_dataidx'} ) {
773 41         69 my $startidx = $self->{'dataidx'};
774 41         108 for ( my $i = 0; $i < ( $index - $startidx ); $i++ ) {
775 75 100       133 if ( !$self->next() ) {
776 4         10 return;
777             }
778             }
779             } ## end if ($index > $self->{'max_dataidx'...})
780              
781 61         420 $self->{'dataidx'} = $index;
782              
783 61 50       101 if (wantarray) {
784 0         0 return %{ $self->{'data'}[$index] };
  0         0  
785             } else {
786 61         304 return $self->{'data'}[$index];
787             }
788             } else {
789 6 100       15 if ( $index <= $self->{'dataidx'} ) {
790 3         10 $self->rewind();
791             }
792 6         10 my $startidx = $self->{'dataidx'};
793 6         17 for ( my $i = 0; $i < ( $index - $startidx - 1 ); $i++ ) {
794 7 50       15 if ( !$self->next() ) {
795 0         0 return;
796             }
797             }
798 6         87 return $self->next();
799             } ## end else [ if ($self->{'options'}...)]
800             } else {
801 52 100       113 if ( $self->{'options'}{'cache'} ) {
802 45         100 while ( $self->next() ) {
803             # noop
804             }
805 45 100       91 if (wantarray) {
806 7         10 return @{ $self->{'data'} };
  7         25  
807             } else {
808 38         135 return $self->{'data'};
809             }
810             } else {
811 7         21 $self->rewind();
812 7         9 my @data_rows;
813 7         16 while ( my $data = $self->next() ) {
814 28         62 push( @data_rows, $data );
815             }
816 7 100       16 if (wantarray) {
817 6         22 return @data_rows;
818             } else {
819 1         5 return \@data_rows;
820             }
821             } ## end else [ if ($self->{'options'}...)]
822             } ## end else [ if (defined($index)) ]
823             } ## end sub data
824              
825             =head2 next()
826              
827             while (my $row = $sb_file->next()){
828             print $row->{'lat'};
829             ...
830             }
831             while (my %row = $sb_file->next()){
832             print $row{'lat'};
833             ...
834             }
835            
836             Returns the next data row in the file, returning C when it runs out of
837             rows.
838              
839             Data is returned as C<< field => value >> pairs.
840              
841             In list context, returns a hash of the row. In scalar context, returns a
842             reference to the hash of a row.
843              
844             After a C, C will return the very first data hash, then each row
845             in turn. If the row has been cached, it's retrieved from the cache instead of
846             rereading from the file.
847              
848             =cut
849              
850             sub next {
851 431     431 1 15578 my $self = shift;
852 431 50       810 if (@_) {
853 0         0 croak("invalid number of arguments on next(), expected 0.");
854             }
855              
856 431 100 100     1665 if ( $self->{'options'}{'cache'} && $self->{'dataidx'} < $self->{'max_dataidx'} ) {
    100          
857 73         91 $self->{'dataidx'}++;
858              
859 73 50       118 if (wantarray) {
860 0         0 return %{ $self->{'data'}[ $self->{'dataidx'} ] };
  0         0  
861             } else {
862 73         224 return $self->{'data'}[ $self->{'dataidx'} ];
863             }
864             } elsif ( $self->{'handle'} ) {
865 346         528 my $handle = $self->{'handle'};
866 346         452 my $line_number = $self->{'line_number'};
867              
868 346         1085 while ( my $line = <$handle> ) {
869 268         331 $line_number++;
870 268         571 strip($line);
871 268 50       567 if ($line) {
872 268         596 my $data_row = $self->make_data_hash($line);
873 268         507 $self->{'line_number'} = $line_number;
874 268 100       504 if ( $self->{'options'}{'cache'} ) {
875 206         234 push( @{ $self->{'data'} }, $data_row );
  206         361  
876             }
877 268         355 $self->{'dataidx'}++;
878 268 100       612 if ( $self->{'dataidx'} > $self->{'max_dataidx'} ) {
879 240         335 $self->{'max_dataidx'} = $self->{'dataidx'};
880             }
881 268 50       419 if (wantarray) {
882 0         0 return %{$data_row};
  0         0  
883             } else {
884 268         1004 return $data_row;
885             }
886             } ## end if ($line)
887             } ## end while (my $line = <$handle>)
888             } ## end elsif ($self->{'handle'})
889 90         209 return;
890             } ## end sub next
891              
892             =head2 rewind()
893              
894             C seeks to the start of the data. The next C will return the
895             very first row (or C). If caching is enabled, it will not actually
896             perform a seek, it will merely reset the index interator. If caching is
897             disabled, a seek is performed on the file handle to return to the start of the
898             data.
899              
900             =cut
901              
902             sub rewind {
903 61     61 1 1575 my ($self) = @_;
904 61 100       159 if ( $self->{'dataidx'} != -1 ) {
905 31 100       82 if ( !$self->{'options'}{'cache'} ) {
906 9         24 seek( $self->{'handle'}, $self->{'data_start_position'}, SEEK_SET );
907             }
908 31         53 $self->{'line_number'} = $self->{'data_start_line'};
909 31         47 $self->{'dataidx'} = -1;
910             } ## end if ($self->{'dataidx'}...)
911             } ## end sub rewind
912              
913             =head2 update(\%data_row | \@data_row | $data_row | %data_row)
914              
915             while (my %row = $sb_file->next()){
916             if ($row{'depth'} == -999){
917             $row{'depth'} = 0;
918             }
919             $sb_file->update(\%row);
920             }
921            
922             # Less useful for update():
923             print join(',',@{$sb_file->actual_fields()}); #lat,lon,depth,chl
924            
925             while (my %row = $sb_file->next()){
926             if ($row{'depth'} == -999){
927             $row{'depth'} = 0;
928             }
929             $sb_file->update(@row{'lat','lon','depth','chl'});
930             # or
931             $sb_file->update([@row{'lat','lon','depth','chl'}]);
932             }
933              
934             C replaces the last row read (using C) with the input.
935              
936             Caching must be enabled to use C, C, or C.
937              
938             =cut
939              
940             sub update {
941 12     12 1 59 my $self = shift;
942 12 100       152 if ( !$self->{'options'}{'cache'} ) {
    100          
943 1         100 croak("Caching must be enabled to write.");
944             } elsif ( $self->{'dataidx'} == -1 ) {
945 1         156 croak("No rows read yet.");
946             }
947 10         25 my $new_row = $self->ingest_row(@_);
948 10 100       25 unless ( defined($new_row) ) {
949 2         138 croak("Error parsing inputs");
950             }
951 8         28 $self->{'data'}[ $self->{'dataidx'} ] = $new_row;
952             } ## end sub update
953              
954             =head2 set($index, \%data_row | \@data_row | $data_row | %data_row)
955              
956             my %row = (lat => 1, lon => 2, chl => 1);
957             $sb_file->set(0, \%row);
958            
959             print join(',',@{$sb_file->actual_fields()}); #lat,lon,chl
960             $sb_file->set(0, [1, 2, 1]);
961            
962              
963             C replaces the row at the given index with the input. Seeks to the
964             given index if it has not been read to yet. Cs if the file does not go
965             up to the index specified.
966              
967             Caching must be enabled to use C, C, or C.
968              
969             =cut
970              
971             sub set {
972 3     3 1 18 my $self = shift;
973 3         4 my $index = shift;
974 3 50       7 if ( !$self->{'options'}{'cache'} ) {
975 0         0 croak("Caching must be enabled to write");
976             }
977 3 100       6 if ( $index < 0 ) {
978 1         159 croak("Index must be positive integer");
979             }
980 2         7 my $new_row = $self->ingest_row(@_);
981 2 50       5 unless ( defined($new_row) ) {
982 0         0 croak("Error parsing inputs");
983             }
984              
985 2 50       4 if ( $index > $self->{'max_dataidx'} ) {
986 2         4 my $current_idx = $self->{'dataidx'};
987 2         6 $self->data($index);
988 2         3 $self->{'dataidx'} = $current_idx;
989              
990 2 100       5 if ( $index > $self->{'max_dataidx'} ) {
991 1         101 croak("Index out of bounds.");
992             }
993             } ## end if ($index > $self->{'max_dataidx'...})
994              
995 1         3 $self->{'data'}[$index] = $new_row;
996             } ## end sub set
997              
998             =head2 insert($index, \%data_row | \@data_row | $data_row | %data_row)
999              
1000             use Data::SeaBASS qw(INSERT_BEGINNING INSERT_END);
1001             ...
1002            
1003             my %row = (lat => 1, lon => 2, chl => 1);
1004             $sb_file->insert(INSERT_BEGINNING, \%row);
1005            
1006             print join(',',@{$sb_file->actual_fields()}); #lat,lon,chl
1007            
1008             $sb_file->insert(1, [1, 2, 1]);
1009             $sb_file->insert(INSERT_END, [1, 2, 1]);
1010            
1011             Inserts the row into the given position. C inserts a new row
1012             at the start of the data, C inserts one at the end of the data
1013             block.
1014              
1015             The index must be a positive integer, C, or C.
1016              
1017             If a row is inserted at the end, the entire data block is read from the file to
1018             cache every row, the row is appended to the end, and the current position is
1019             reset to the original position, so C will still return the real next
1020             row from the data.
1021              
1022             If a row is inserted before the current position, the current position is
1023             shifted accordingly and will still return the C real row.
1024              
1025             Caching must be enabled to use C, C, or C.
1026              
1027             =cut
1028              
1029             sub insert {
1030 16     16 1 36 my $self = shift;
1031 16         21 my $index = shift;
1032 16 50       38 if ( !$self->{'options'}{'cache'} ) {
1033 0         0 croak("Caching must be enabled to write.");
1034             }
1035 16 100       32 if ( $index < INSERT_END ) {
1036 1         162 croak("Index must be positive integer, or INSERT_BEGINNING (beginning), or INSERT_END (end)");
1037             }
1038 15         41 my $new_row = $self->ingest_row(@_);
1039 15 50       32 unless ( defined($new_row) ) {
1040 0         0 croak("Error parsing inputs");
1041             }
1042              
1043 15 100       36 if ( $index == INSERT_END ) {
    100          
1044 10         21 my $current_idx = $self->{'dataidx'};
1045 10         25 $self->data();
1046 10         16 $self->{'dataidx'} = $current_idx;
1047             } elsif ( $index > $self->{'max_dataidx'} ) {
1048 2         4 my $current_idx = $self->{'dataidx'};
1049 2         8 $self->data($index);
1050 2         4 $self->{'dataidx'} = $current_idx;
1051              
1052 2 50       6 if ( $index == $self->{'max_dataidx'} + 1 ) {
    100          
1053 0         0 $index = INSERT_END;
1054             } elsif ( $index > $self->{'max_dataidx'} ) {
1055 1         107 croak("Index out of bounds.");
1056             }
1057             } ## end elsif ($index > $self->{'max_dataidx'...})
1058              
1059 14 100 100     39 if ( $index <= $self->{'dataidx'} && $index != INSERT_END ) {
1060 3         5 $self->{'dataidx'}++;
1061             }
1062              
1063 14         21 $self->{'max_dataidx'}++;
1064              
1065 14 100       30 if ( $index == INSERT_BEGINNING ) {
    100          
1066 2         3 unshift( @{ $self->{'data'} }, $new_row );
  2         9  
1067             } elsif ( $index == INSERT_END ) {
1068 10         13 push( @{ $self->{'data'} }, $new_row );
  10         26  
1069             } else {
1070 2         3 splice( @{ $self->{'data'} }, $index, 0, $new_row );
  2         8  
1071             }
1072             } ## end sub insert
1073              
1074             =head2 prepend(\%data_row | \@data_row | $data_row | %data_row)
1075              
1076             C is short for C.
1077              
1078             =cut
1079              
1080             sub prepend {
1081 1     1 1 3 my $self = shift;
1082 1         3 $self->insert( INSERT_BEGINNING, @_ );
1083             }
1084              
1085             =head2 append(\%data_row | \@data_row | $data_row | %data_row)
1086              
1087             C is short for C.
1088              
1089             =cut
1090              
1091             sub append {
1092 9     9 1 36 my $self = shift;
1093 9         29 $self->insert( INSERT_END, @_ );
1094             }
1095              
1096             =head2 remove([$index])
1097              
1098             If index is specified, it deletes the desired index. If it is omitted, the
1099             last row read is deleted. The current position is modified accordingly.
1100              
1101             =cut
1102              
1103             sub remove {
1104 6     6 1 31 my ( $self, $index ) = @_;
1105              
1106 6 100 66     75 if ( !$self->{'options'}{'cache'} ) {
    50          
1107 1         135 croak("Caching must be enabled to write.");
1108             } elsif ( !defined($index) && $self->{'dataidx'} < 0 ) {
1109 0         0 croak("No rows read yet.");
1110             }
1111              
1112 5 100       13 if ( !defined($index) ) {
1113 2         4 $index = $self->{'dataidx'};
1114             }
1115              
1116 5 50       35 if ( $index < 0 ) {
    100          
1117 0         0 croak("Index must be positive integer");
1118             } elsif ( $index > $self->{'max_dataidx'} ) {
1119 2         4 my $current_idx = $self->{'dataidx'};
1120 2         5 $self->data($index);
1121 2         3 $self->{'dataidx'} = $current_idx;
1122              
1123 2 100       5 if ( $index > $self->{'max_dataidx'} ) {
1124 1         262 croak("Index out of bounds.");
1125             }
1126             } ## end elsif ($index > $self->{'max_dataidx'...})
1127              
1128 4 100       13 if ( $index <= $self->{'dataidx'} ) {
1129 3         5 $self->{'dataidx'}--;
1130             }
1131 4         6 $self->{'max_dataidx'}--;
1132              
1133 4         5 splice( @{ $self->{'data'} }, $index, 1 );
  4         17  
1134             } ## end sub remove
1135              
1136             =head2 where(\&function)
1137              
1138             # Find all rows with depth greater than 10 meters
1139             my @ret = $sb_file->where(sub {
1140             if ($_->{'depth'} > 10){
1141             return $_;
1142             } else {
1143             return undef;
1144             }
1145             });
1146              
1147             # Delete all measurements with depth less than 10 meters
1148             $sb_file->where(sub {
1149             if ($_->{'depth'} < 10){
1150             $_ = undef;
1151             }
1152             });
1153            
1154             # Calculate the average chlorophyll value
1155             my $chl_total = 0;
1156             my $measurements = 0;
1157             $sb_file->where(sub {
1158             if (defined($_->{'chl'})){
1159             $chl_total += $_->{'chl'};
1160             $measurements++;
1161             }
1162             });
1163             if ($measurements){
1164             print $chl_total/$measurements;
1165             } else {
1166             print "No chl values.";
1167             }
1168              
1169              
1170             Traverses through each data line, running the given function on each row.
1171             C<$_> is set to the current row. If C<$_> is set to undefined, C is
1172             called. Any changes in C<$_> will be reflected in the data.
1173              
1174             Any defined value returned is added to the return array. If nothing is
1175             returned, a 0 is added.
1176              
1177             =cut
1178              
1179             sub where {
1180 7     7 1 40 my ( $self, $function ) = ( shift, shift );
1181 7 100       20 if ( ref($function) ne 'CODE' ) {
1182 1         159 croak("Invalid arguments.");
1183             }
1184 6         11 my $currentidx = $self->{'dataidx'};
1185 6         17 $self->rewind();
1186              
1187 6         9 my @new_rows;
1188              
1189 6         13 while ( my $row = $self->next() ) {
1190 19         38 local *_ = \$row;
1191 19         41 my $ret = $function->();
1192 19 100 100     126 if ( defined($ret) && defined(wantarray) ) {
1193 12         14 push( @new_rows, $ret );
1194             }
1195 19 100       271 if ( !defined($row) ) {
1196 2 100       6 if ( $self->{'dataidx'} <= $currentidx ) {
1197 1         2 $currentidx--;
1198             }
1199 2         6 $self->remove();
1200             } ## end if (!defined($row))
1201             } ## end while (my $row = $self->next...)
1202              
1203 5         16 $self->data($currentidx);
1204              
1205 5         14 return @new_rows;
1206             } ## end sub where
1207              
1208             =head2 get_all($field_name [, ... ] [, \%options])
1209              
1210             Returns an array/arrayref of all the values matching each given field name.
1211             This function errors out if no field names are passed in or a non-existent
1212             field is requested.
1213              
1214             Available options are:
1215              
1216             =over 4
1217              
1218             =item * delete_missing
1219              
1220             If any of the fields are missing, the row will not be added to any of the
1221             return arrays. (Useful for plotting or statistics that don't work well with
1222             bad values.)
1223              
1224             =back
1225              
1226             =cut
1227              
1228             sub get_all {
1229 13     13 1 4034 my $self = shift;
1230 13         26 my %options = ( 'delete_missing' => 0 );
1231 13 100       31 if ( ref( $_[$#_] ) eq 'HASH' ) {
1232 5         7 %options = %{ pop(@_) };
  5         14  
1233             }
1234 13 50       25 if ( !@_ ) {
1235 0         0 croak("get_all must be called with at least one field name");
1236             }
1237              
1238 13 100       32 my $missing = ( $self->{'options'}{'missing_data_to_undef'} ? undef : $self->{'missing'} );
1239              
1240 13         17 my $currentidx = $self->{'dataidx'};
1241 13         28 $self->rewind();
1242              
1243 13         23 my @fields = map {lc} @_; # turn all inputs lowercase
  22         51  
1244              
1245 13         20 foreach my $field (@fields) {
1246 21 100   130   56 if ( ( firstidx { $_ eq $field } @{ $self->{'actual_fields'} } ) < 0 ) {
  130         168  
  21         47  
1247 4 100 66 8   12 if ( !$self->{'options'}{'fill_ancillary_data'} || ( firstidx { $_ eq $field } keys( %{ $self->{'ancillary'} } ) ) < 0 ) {
  8         23  
  1         6  
1248 3         311 croak("Field $field does not exist");
1249             }
1250             }
1251             } ## end foreach my $field (@fields)
1252              
1253 10         19 my @ret = map { [] } @fields; # make return array of arrays
  17         27  
1254              
1255 10         21 while ( my $row = $self->next() ) {
1256 40 100       60 if ( $options{'delete_missing'} ) {
1257 20         21 my $has_all = 1;
1258 20         23 foreach my $field (@fields) {
1259 36 100 100     121 unless ( defined( $row->{$field} ) && ( !defined($missing) || $row->{$field} != $missing ) ) {
      100        
1260 4         5 $has_all = 0;
1261 4         6 last;
1262             }
1263             } ## end foreach my $field (@fields)
1264 20 100       33 unless ($has_all) {
1265 4         6 next;
1266             }
1267             } ## end if ($options{'delete_missing'...})
1268              
1269 36         63 for ( my $i = 0; $i <= $#fields; $i++ ) {
1270 60         64 push( @{ $ret[$i] }, $row->{ $fields[$i] } );
  60         172  
1271             }
1272             } ## end while (my $row = $self->next...)
1273              
1274 10         33 $self->data($currentidx);
1275              
1276 10 100       22 if ( $#_ == 0 ) {
    100          
1277 3 100       6 if (wantarray) {
1278 1         2 return @{ $ret[0] };
  1         5  
1279             } else {
1280 2         7 return $ret[0];
1281             }
1282             } elsif (wantarray) {
1283 4         15 return @ret;
1284             } else {
1285 3         11 return \@ret;
1286             }
1287             } ## end sub get_all
1288              
1289             =head2 remove_field($field_name [, ... ])
1290              
1291             Removes a field from the file. C is called to remove the field
1292             from cached rows. Any new rows grabbed will have the removed fields omitted,
1293             as well. A warning is issued if the field does not exist.
1294              
1295             =cut
1296              
1297             sub remove_field {
1298 8     8 1 28 my $self = shift;
1299 8 100       19 if ( !@_ ) {
1300 1         152 croak("Field(s) must be specified.");
1301             }
1302 7         15 foreach my $field_orig (@_) {
1303 9         14 my $field = lc($field_orig);
1304              
1305 9     30   29 my $field_idx = firstidx { $_ eq $field } @{ $self->{'actual_fields'} };
  30         36  
  9         23  
1306              
1307 9 100       26 if ( $field_idx < 0 ) {
1308 1         105 carp("Field $field does not exist.");
1309             } else {
1310 8         12 splice( @{ $self->{'actual_fields'} }, $field_idx, 1 );
  8         16  
1311 8         11 splice( @{ $self->{'actual_units'} }, $field_idx, 1 );
  8         17  
1312             }
1313             } ## end foreach my $field_orig (@_)
1314 7         121 $self->update_fields();
1315             } ## end sub remove_field
1316              
1317             =head2 add_field($field_name [, $unit [, $position]])
1318              
1319             Adds a field to the file. C is called to populate all cached
1320             rows. Any rows retrieved will have the new field set to undefined or /missing,
1321             depending on if the option C is set.
1322              
1323             If the unit is not specified, it is set to unitless.
1324              
1325             If the position is not specified, the field is added to the end.
1326              
1327             =cut
1328              
1329             sub add_field {
1330 25     25 1 103 my ( $self, $field, $unit, $position ) = @_;
1331 25 50       69 if ( !$self->{'options'}{'cache'} ) {
    100          
1332 0         0 croak("Caching must be enabled to write.");
1333             } elsif ( !$field ) {
1334 1         75 croak("Field must be specified.");
1335             }
1336 24         39 $field = lc($field);
1337              
1338 24     121   66 my $field_idx = firstidx { $_ eq $field } @{ $self->{'actual_fields'} };
  121         131  
  24         62  
1339 24 100       71 if ( $field_idx >= 0 ) {
1340 3         361 croak("Field already exists.");
1341             }
1342 21 100       45 if ( !defined($position) ) {
1343 15         30 $position = INSERT_END;
1344             }
1345 21   100     52 $unit ||= 'unitless';
1346 21         34 $unit = lc($unit);
1347              
1348 21 100       32 if ( $position == INSERT_END ) {
    100          
1349 17         21 push( @{ $self->{'actual_fields'} }, $field );
  17         33  
1350 17         23 push( @{ $self->{'actual_units'} }, $unit );
  17         26  
1351             } elsif ( $position == INSERT_BEGINNING ) {
1352 2         4 unshift( @{ $self->{'actual_fields'} }, $field );
  2         6  
1353 2         3 unshift( @{ $self->{'actual_units'} }, $unit );
  2         4  
1354             } else {
1355 2         2 splice( @{ $self->{'actual_fields'} }, $position, 0, $field );
  2         4  
1356 2         3 splice( @{ $self->{'actual_units'} }, $position, 0, $unit );
  2         3  
1357             }
1358 21         40 $self->update_fields();
1359             } ## end sub add_field
1360              
1361             =head2 find_fields($string | qr/match/ [, ... ])
1362              
1363             Finds fields matching the string or regex given. If given a string, it must
1364             match a field exactly and entirely to be found. To find a substring, use
1365             C. Fields are returned in the order that they will be output. This
1366             function takes into account fields that are added or removed. All fields are
1367             always lowercase, so all matches are case insensitive.
1368              
1369             Given one argument, returns an array of the fields found. An empty array is
1370             returned if no fields match.
1371              
1372             Given multiple arguments, returns an array/arrayref of arrays of fields found.
1373             IE: C would return something like
1374             C<[['lw510','lw550'],['es510','es550']]>. If no field is matched, the inner
1375             array will be empty. IE: C<[[],[]]>.
1376              
1377             =cut
1378              
1379             sub find_fields {
1380 10     10 1 3823 my $self = shift;
1381 10 50       25 if ( $#_ < 0 ) {
1382 0         0 croak("Input must be a string or regex object.");
1383             }
1384              
1385 10         16 my @ret;
1386              
1387 10         18 foreach my $find (@_) {
1388 13         17 my ( $regex, @matching );
1389 13 50       24 if ( defined($find) ) {
1390 13 100       29 if ( !ref($find) ) {
    50          
1391 7         77 $regex = lc(qr/^$find$/i);
1392             } elsif ( ref($find) eq 'Regexp' ) {
1393 6         24 $regex = lc(qr/$find/i);
1394             } else {
1395 0         0 croak("Input must be a string or regex object.");
1396             }
1397              
1398 13         24 foreach my $field ( @{ $self->{'actual_fields'} } ) {
  13         27  
1399 92 100       250 if ( $field =~ $regex ) {
1400 23         51 push( @matching, $field );
1401             }
1402             }
1403             }
1404 13         27 push( @ret, \@matching );
1405             } ## end foreach my $find (@_)
1406              
1407 10 100       23 if ( $#_ == 0 ) {
1408 7         8 return @{ $ret[0] };
  7         27  
1409             } else {
1410 3 100       6 if (wantarray) {
1411 2         7 return @ret;
1412             } else {
1413 1         3 return \@ret;
1414             }
1415             } ## end else [ if ($#_ == 0) ]
1416             } ## end sub find_fields
1417              
1418             =head2 add_comment(@comments)
1419              
1420             Adds comments to the output file, which are printed, in bulk, after C.
1421             Comments are trimmed before entry and !s are added, if required.
1422              
1423             =cut
1424              
1425             sub add_comment {
1426 2     2 1 4 my $self = shift;
1427 2         4 push(@{$self->{'comments'}}, map {
1428 2         3 my $c = $_;
  3         4  
1429 3         13 $c =~ s/^\s+|\s+$//g;
1430 3 100       8 if ($c =~ /^!/){
1431 1         3 $c
1432             } else {
1433 2         6 "! $c"
1434             }
1435             } @_);
1436             }
1437              
1438             =head2 get_comments([@indices])
1439              
1440             Returns a list of the comments at the given indices. If no indices are passed
1441             in, return them all.
1442              
1443             =cut
1444              
1445             sub get_comments {
1446 6     6 1 14 my $self = shift;
1447 6         7 my @ret;
1448 6 100       11 if (@_){
1449 2         5 @ret = map {$self->{'comments'}[$_]} @_;
  3         7  
1450             } else {
1451 4         5 @ret = @{$self->{'comments'}};
  4         9  
1452             }
1453 6 50       12 if (wantarray){
1454 0         0 return @ret;
1455             } else {
1456 6         26 return \@ret;
1457             }
1458             }
1459              
1460             =head2 set_comments(@comments)
1461              
1462             Overwrites all of the comments in the file. For now, this is the proper way
1463             to remove comments. Comments are trimmed before entry and !s are added, if
1464             required.
1465              
1466             =cut
1467              
1468             sub set_comments {
1469 1     1 1 2 my $self = shift;
1470             $self->{'comments'} = [map {
1471 1         2 my $c = $_;
  2         3  
1472 2         6 $c =~ s/^\s+|\s+$//g;
1473 2 100       5 if ($c =~ /^!/){
1474 1         4 $c
1475             } else {
1476 1         3 "! $c"
1477             }
1478             } @_];
1479             }
1480              
1481             =head2 write([$filename | $file_handle | \*GLOB])
1482              
1483             Outputs the current header and data to the given handle or glob. If no
1484             arguments are given, and a non-existent filename was given to C, the
1485             contents are output into that. If an output file was not given, C
1486             outputs to STDOUT.
1487              
1488             If C is enabled, the headers are checked for invalid headers and
1489             missing required headers and errors/warnings can be thrown accordingly.
1490              
1491             The headers are output in a somewhat-arbitrary but consistent order. If
1492             C is enabled, placeholders are added for every header that
1493             does not exist. A comment section is also added if one is not present.
1494              
1495             =cut
1496              
1497             sub write {
1498 15     15 1 13144 my ( $self, $write_to_h ) = @_;
1499              
1500 15         41 my $strict_write = $self->{'options'}{'strict'} & STRICT_WRITE;
1501 15 100       38 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
1502 15         23 my $error = 0;
1503              
1504 15 100       31 if ($strict_write) {
1505 3         4 foreach my $header ( keys( %{ $self->{'headers'} } ) ) {
  3         14  
1506 65         80 ( my $header_no_slash = $header ) =~ s"^/"";
1507 65 50 33 0   154 if ( ( firstidx { $_ eq $header_no_slash } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $header_no_slash } @HIDDEN_HEADERS ) < 0 ) {
  1123         1143  
  0         0  
1508 0         0 carp("Invalid header: $header");
1509 0         0 $error = 1;
1510             }
1511             } ## end foreach my $header (keys(%{...}))
1512              
1513 3         10 foreach my $header (@REQUIRED_HEADERS) {
1514 66 100       134 if ( !exists( $self->{'headers'}{$header} ) ) {
1515 20         1267 carp("Missing required header: $header");
1516 20         791 $error = 1;
1517             }
1518             } ## end foreach my $header (@REQUIRED_HEADERS)
1519             } ## end if ($strict_write)
1520              
1521 15 100       46 if ( !$error ) {
1522 14         20 my $close_write_to = 0;
1523 14         51 my $old_fh = select();
1524              
1525 14 50 33     69 if ( !$write_to_h && exists( $self->{'default_write_to'} ) ) {
1526 0   0     0 $write_to_h ||= $self->{'default_write_to'};
1527             }
1528              
1529 14 50       27 if ( defined($write_to_h) ) {
1530 0 0       0 if ( ref($write_to_h) eq 'GLOB' ) {
    0          
1531 0         0 select($write_to_h);
1532             } elsif ( !ref($write_to_h) ) {
1533 0         0 my $write_to = $write_to_h;
1534 0         0 $write_to_h = undef;
1535 0 0       0 open( $write_to_h, ">", $write_to ) || croak("Invalid argument for write().");
1536 0         0 $close_write_to = 1;
1537 0         0 select($write_to_h);
1538             } else {
1539 0         0 croak("Invalid argument for write().");
1540             }
1541             } ## end if (defined($write_to_h...))
1542              
1543 14   50     90 $self->{'headers'}{"${slash}delimiter"} ||= 'comma';
1544 14         52 my $actual_delim = lc( $self->{'headers'}{"${slash}delimiter"} );
1545 14 100       42 if ( $actual_delim eq 'comma' ) {
    50          
    0          
    0          
1546 2         12 $actual_delim = ',';
1547             } elsif ( $actual_delim eq 'space' ) {
1548 12         17 $actual_delim = ' ';
1549             } elsif ( $actual_delim eq 'tab' ) {
1550 0         0 $actual_delim = "\t";
1551             } elsif ( $actual_delim eq 'semicolon' ) {
1552 0         0 $actual_delim = ';';
1553             } else {
1554 0         0 $actual_delim = ',';
1555 0         0 $self->{'headers'}{"${slash}delimiter"} = 'comma';
1556             }
1557              
1558 14 50       39 my $missing = ( exists( $self->{'missing'} ) ? $self->{'missing'} : $DEFAULT_MISSING );
1559 14 50       31 my $bdl = ( exists( $self->{'below_detection_limit'} ) ? $self->{'below_detection_limit'} : $DEFAULT_BDL );
1560 14 50       28 my $adl = ( exists( $self->{'above_detection_limit'} ) ? $self->{'above_detection_limit'} : $DEFAULT_ADL );
1561              
1562 14 100       32 if ( $self->{'options'}{'preserve_header'} ) {
1563 1         2 print join("\n", @{ $self->{'preserved_header'} }, '');
  1         52  
1564             } else {
1565 13 100       35 if ( !exists( $self->{'headers'}{"${slash}begin_header"} ) ) {
1566 3         73 print "/begin_header\n";
1567             }
1568              
1569 13         27 my $add_missing_headers = $self->{'options'}{'add_empty_headers'};
1570 13 100 66     37 if ( $add_missing_headers && $add_missing_headers eq '1' ) {
1571 1         2 $add_missing_headers = 'NA';
1572             }
1573 13 100       28 if ( !$self->{'options'}{'preserve_case'} ) {
1574 1   50     3 $add_missing_headers = lc( $add_missing_headers || '' );
1575             }
1576            
1577 13         24 my @headers_to_print;
1578            
1579 13         68 @headers_to_print = @ALL_HEADERS;
1580 13 100 66     58 if ($missing eq $adl || ($self->{'options'}{'missing_data_to_undef'} && !$self->{'options'}{'preserve_detection_limits'})){
      100        
1581 12         135 @headers_to_print = grep(!/above_detection_limit/i, @headers_to_print);
1582             }
1583 13 100 66     48 if ($missing eq $bdl || ($self->{'options'}{'missing_data_to_undef'} && !$self->{'options'}{'preserve_detection_limits'})){
      100        
1584 12         84 @headers_to_print = grep(!/below_detection_limit/i, @headers_to_print);
1585             }
1586            
1587 13 100       21 unless (grep($_ ne 'unitless', @{ $self->{'actual_units'} })){
  13         47  
1588 6         56 @headers_to_print = grep(!/units/i, @headers_to_print);
1589             }
1590              
1591 13         24 foreach my $header (@headers_to_print) {
1592 412 100       1369 if ( $header eq 'missing' ) {
    100          
    100          
    100          
    50          
1593 13         18 while ( my ( $h, $k ) = each( %{ $self->{'headers'} } ) ) {
  166         392  
1594 153         215 ( my $header_no_slash = $h ) =~ s"^/"";
1595 153 50   3197   378 if ( ( firstidx { $_ eq $header_no_slash } @ALL_HEADERS ) < 0 ) {
  3197         3252  
1596 0         0 print "/$h=$k\n";
1597             }
1598             } ## end while (my ($h, $k) = each...)
1599              
1600 13         17 foreach my $comment ( @{ $self->{'comments'} } ) {
  13         27  
1601 18         157 print "$comment\n";
1602             }
1603 13 100 100     17 if ( !@{ $self->{'comments'} } && $add_missing_headers ) {
  13         50  
1604 1         11 print "! Comments: \n!\n";
1605             }
1606 13 100       51 if ( !exists( $self->{'headers'}{"$slash$header"} ) ) {
1607 3         41 print "/missing=$missing\n";
1608             } else {
1609 10         131 print '/', $header, '=', $self->{'headers'}{"$slash$header"}, "\n";
1610             }
1611             } elsif ( $header eq 'fields' ) {
1612 13         27 print "/$header=", join( ',', @{ $self->{'actual_fields'} } ), "\n";
  13         128  
1613             } elsif ( $header eq 'units' ) {
1614 7         13 print "/$header=", join( ',', @{ $self->{'actual_units'} } ), "\n";
  7         64  
1615             } elsif ( exists( $self->{'headers'}{"$slash$header"} ) ) {
1616 111 100       317 if ( $header =~ /_header/ ) {
    100          
    50          
1617 20         493 print "/$header\n";
1618             } elsif (length($self->{'headers'}{"$slash$header"})) {
1619 65         110 my $v = $self->{'headers'}{"$slash$header"};
1620 65 100       136 if ( $header =~ /_latitude|_longitude/ ) {
    100          
1621 8         69 print "/$header=$v\[deg]\n";
1622             } elsif ( $header =~ /_time/ ) {
1623 4         37 print "/$header=$v\[gmt]\n";
1624             } else {
1625 53         458 print "/$header=$v\n";
1626             }
1627             # print '/', $header, '=', $self->{'headers'}{"$slash$header"}, "\n";
1628             } elsif ($add_missing_headers) {
1629 26 100       44 next if ($OMIT_EMPTY_HEADERS{$header});
1630              
1631 25   33     66 my $value = $HEADER_DEFAULTS{$header} || $add_missing_headers;
1632 25 100       54 if ( $header =~ /_latitude|_longitude/ ) {
    100          
1633 4         44 print "/$header=$value\[deg]\n";
1634             } elsif ( $header =~ /_time/ ) {
1635 2         19 print "/$header=$value\[gmt]\n";
1636             } else {
1637 19         162 print "/$header=$value\n";
1638             }
1639             }
1640             } elsif ($add_missing_headers) {
1641 0 0       0 next if ($OMIT_EMPTY_HEADERS{$header});
1642              
1643 0   0     0 my $value = $HEADER_DEFAULTS{$header} || $add_missing_headers;
1644 0 0       0 if ( $header =~ /_latitude|_longitude/ ) {
    0          
1645 0         0 print "/$header=$value\[deg]\n";
1646             } elsif ( $header =~ /_time/ ) {
1647 0         0 print "/$header=$value\[gmt]\n";
1648             } else {
1649 0         0 print "/$header=$value\n";
1650             }
1651             } ## end elsif ($add_missing_headers)
1652             } ## end foreach my $header (@ALL_HEADERS)
1653              
1654 13 100       64 if ( !exists( $self->{'headers'}{"${slash}end_header"} ) ) {
1655 3         27 print "/end_header\n";
1656             }
1657             } ## end else [ if ($self->{'options'}...)]
1658              
1659 14         81 $self->rewind();
1660              
1661 14         39 while ( my $row = $self->next() ) {
1662 24         28 my @values;
1663 24         28 foreach my $field ( @{ $self->{'actual_fields'} } ) {
  24         43  
1664 118 100       252 push( @values, ( defined( $row->{$field} ) ? $row->{$field} : $missing ) );
1665             }
1666 24         324 print join( $actual_delim, @values ), "\n";
1667             } ## end while (my $row = $self->next...)
1668              
1669 14         47 select($old_fh);
1670 14 50       43 if ($close_write_to) {
1671 0         0 close($write_to_h);
1672             }
1673             } else {
1674 1         58 croak("Error(s) writing file");
1675             }
1676 14         46 return;
1677             } ## end sub write
1678              
1679             =head2 close()
1680              
1681             If a file handle is opened for reading, this function closes it. This is
1682             automatically called when the object is destroyed. This is useful to replace
1683             the file being read with the current changes.
1684              
1685             =cut
1686              
1687             sub close {
1688 143     143 1 302 my ($self) = @_;
1689 143 100       367 if ( $self->{'handle'} ) {
1690 133         419 my $ret = close( $self->{'handle'} );
1691 133         367 delete( $self->{'handle'} );
1692 133         2404 return $ret;
1693             } else {
1694 10         60 return;
1695             }
1696             } ## end sub close
1697              
1698             =head2 make_data_hash($line [,\@field_list])
1699              
1700             my %row = $sb_file->make_data_hash("1.5,2,2.5");
1701             my %row = $sb_file->make_data_hash("1.5,2,2.5", [qw(lat lon sal)]);
1702             my %row = $sb_file->make_data_hash("1.5,2,2.5", [$sb_file->fields()]);
1703             my %row = $sb_file->make_data_hash("1.5,2,2.5", [$sb_file->actual_fields()]);
1704              
1705             For mostly internal use. This function parses a data line. It first splits
1706             the data via the delimiter, assigns a field to each value, and returns a hash
1707             or hash reference.
1708              
1709             If C<@field_list> is not set, C<< $sb_file->fields() >> is used.
1710              
1711             If a delimiter is not set (a blank file was created, a file without a
1712             /delimiter header is read, etc), the delimiter is guessed and set using
1713             L.
1714              
1715             Cs if the delimiter could not be guessed or the number of fields the
1716             line is split into does not match up with the field list.
1717              
1718             =cut
1719              
1720             sub make_data_hash {
1721 268     268 1 490 my ( $self, $line, $field_list ) = @_;
1722 268 50 66     598 if ( !$self->{'delim'} && !$self->guess_delim($line) ) {
1723 0         0 croak("Need a delimiter");
1724             }
1725 268         1855 my @values = split( $self->{'delim'}, $line );
1726 268   33     1025 $field_list ||= $self->{'fields'};
1727              
1728 268         316 my ( $num_expected, $num_got ) = ( scalar( @{ $self->{'fields'} } ), scalar(@values) );
  268         570  
1729 268 50       563 if ( $num_expected != $num_got ) {
1730 0         0 croak("Incorrect number of fields or elements: got $num_got, expected $num_expected");
1731             }
1732              
1733 268         320 my %ret;
1734              
1735 268         1793 my $iterator = each_arrayref( $field_list, \@values );
1736 268         2302 while ( my ( $k, $v ) = $iterator->() ) {
1737 1879 100       3799 if ( $self->{'options'}{'missing_data_to_undef'} ) {
1738 968 50 100     10132 if (!length($v)){
    100 100        
    100 100        
    100 100        
    100 100        
      100        
      100        
      100        
      33        
      66        
      66        
1739 0         0 $ret{$k} = undef;
1740             } elsif ( $self->{'missing_is_number'} && looks_like_number($v) && $v == $self->{'missing'} ) {
1741 68         218 $ret{$k} = undef;
1742             } elsif ( !$self->{'options'}{'preserve_detection_limits'} && $self->{'adl_is_number'} && looks_like_number($v) && $v == $self->{'above_detection_limit'} ){
1743 1         3 $ret{$k} = undef;
1744             } elsif ( !$self->{'options'}{'preserve_detection_limits'} && $self->{'bdl_is_number'} && looks_like_number($v) && $v == $self->{'below_detection_limit'} ){
1745 1         4 $ret{$k} = undef;
1746             } elsif ($v eq $self->{'missing'} || (!$self->{'options'}{'preserve_detection_limits'} && ($v eq $self->{'below_detection_limit'} || $v eq $self->{'above_detection_limit'}))) {
1747 10         34 $ret{$k} = undef;
1748             } else {
1749 888         9822 $ret{$k} = $v;
1750             }
1751             } else {
1752 911         2925 $ret{$k} = $v;
1753             }
1754             } ## end while (my ($k, $v) = $iterator...)
1755 268         752 $self->add_and_remove_fields( \%ret );
1756 268 50       441 if (wantarray) {
1757 0         0 return %ret;
1758             } else {
1759 268         1226 return \%ret;
1760             }
1761             } ## end sub make_data_hash
1762              
1763             =head2 AUTOLOAD
1764              
1765             print $sb_file->missing();
1766             print $sb_file->dataidx();
1767             print $sb_file->actual_fields();
1768             ...
1769              
1770             Returns a few internal variables. The accessor is read only, but some
1771             variables can be returned as a reference, and can be modified afterwards.
1772             Though, do it knowing this is a terrible idea.
1773              
1774             If the variable retrieved is an array or hash reference and this is called in a
1775             list context, the variable is dereferenced first.
1776              
1777             Here are a few "useful" variables:
1778              
1779             =over 4
1780              
1781             =item * dataidx
1782              
1783             The current row index.
1784              
1785             =item * max_dataidx
1786              
1787             The highest row index read so far.
1788              
1789             =item * fields
1790              
1791             An array of the original fields.
1792              
1793             =item * actual_fields
1794              
1795             An array of the current fields, as modified by C or C.
1796              
1797             =item * delim
1798              
1799             The regex used to split data lines.
1800              
1801             =item * missing
1802              
1803             The null/fill/missing value of the SeaBASS file.
1804              
1805             =item * delim
1806              
1807             The current line delimiter regex.
1808              
1809             =back
1810              
1811             =cut
1812              
1813             sub AUTOLOAD {
1814 138     138   231 my $self = shift;
1815 138 50       250 if ( !ref($self) ) {
1816 0         0 croak("$self is not an object");
1817             }
1818              
1819 138         222 my $name = $AUTOLOAD;
1820 138 50       337 if ($name) {
1821 138         418 $name =~ s/.*://;
1822 138         220 my $value = $self->{$name};
1823 138 100       218 if ( !defined($value) ) {
1824 1         4 return;
1825             }
1826 137 100 66     403 if ( ref($value) eq 'ARRAY' && wantarray ) {
    50 33        
1827 133         149 return @{$value};
  133         447  
1828             } elsif ( ref($value) eq 'HASH' && wantarray ) {
1829 0         0 return %{$value};
  0         0  
1830             }
1831 4         17 return $value;
1832             } ## end if ($name)
1833             } ## end sub AUTOLOAD
1834              
1835             sub DESTROY {
1836 143     143   67673 my $self = shift;
1837 143         519 $self->close();
1838             }
1839              
1840             =head1 INTERNAL METHODS
1841              
1842             =head2 check_options()
1843              
1844             For internal use only. This function is in charge of checking the options to
1845             make sure they are of the right type (array/hash reference where appropriate).
1846              
1847             If add_empty_headers is set, this function turns off C.
1848              
1849             Called by the object, accepts no arguments.
1850              
1851             =cut
1852              
1853             #<<< perltidy destroys this function
1854             sub check_options {
1855 142     142 1 221 my $self = shift;
1856 142         219 while (my ($k, $v) = each(%{$self->{'options'}})) {
  2108         5015  
1857 1970 100       4642 if (!exists($DEFAULT_OPTIONS{$k})) {
    100          
1858 2         198 croak("Option not understood: $k");
1859 2242     2242   5825 } elsif ((firstidx { $_ eq ref($v) } @{$OPTION_TYPES{$k}}) < 0) {
  1968         4059  
1860 2         3 my $expected_ref = join('/', @{$OPTION_TYPES{$k}});
  2         5  
1861 2 100       209 croak("Option $k not of the right type, expected: " . ($expected_ref ? "$expected_ref reference" : 'scalar'));
1862             } ## end elsif ((firstidx { $_ eq ...}))
1863             } ## end while (my ($k, $v) = each...)
1864 138 100       357 if ($self->{'options'}{'add_empty_headers'}) {
1865 1         3 $self->{'options'}{'strict'} &= STRICT_READ;
1866             }
1867             } ## end sub check_options
1868             #>>>
1869              
1870             =head2 create_blank_file()
1871              
1872             For internal use only. C populates the object with proper
1873             internal variables, as well as adding blank headers if C is
1874             set.
1875              
1876             By default, the missing value is set to C<$DEFAULT_MISSING> (C<-999>).
1877              
1878             This function turns on the C option, as C must be enabled to
1879             write.
1880              
1881             The delimiter is left undefined and will be guessed upon reading the first data
1882             line using the L function.
1883              
1884             Called by the object, accepts no arguments.
1885              
1886             =cut
1887              
1888             sub create_blank_file {
1889 4     4 1 7 my ($self) = @_;
1890 4         7 $self->{'actual_fields'} = [];
1891 4         9 $self->{'actual_units'} = [];
1892 4         5 $self->{'fields'} = [];
1893 4         7 $self->{'units'} = [];
1894 4         6 $self->{'headers'} = {};
1895 4         7 $self->{'comments'} = [];
1896 4         9 $self->{'data'} = [];
1897 4         7 $self->{'dataidx'} = -1;
1898 4         6 $self->{'max_dataidx'} = -1;
1899 4         5 $self->{'delim'} = undef;
1900 4         7 $self->{'missing'} = $DEFAULT_MISSING;
1901 4         6 $self->{'below_detection_limit'} = $DEFAULT_BDL;
1902 4         6 $self->{'above_detection_limit'} = $DEFAULT_ADL;
1903              
1904 4         6 $self->{'options'}{'cache'} = 1;
1905 4         5 $self->{'options'}{'fill_ancillary_data'} = 0;
1906              
1907 4 100       9 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
1908 4 100       9 if ($self->{'options'}{'add_empty_headers'}) {
1909 1         3 foreach (@ALL_HEADERS) {
1910 34 50       56 if ( !exists( $self->{'headers'}{"${slash}$_"} ) ) {
1911 34         52 $self->{'headers'}{"${slash}$_"} = '';
1912 34 100       47 if ( $_ eq 'missing' ) {
1913 1         4 $self->{'headers'}{"${slash}missing"} = $DEFAULT_MISSING;
1914             }
1915             } ## end if (!exists($self->{'headers'...}))
1916             } ## end foreach (@ALL_HEADERS)
1917             } ## end if ($add_missing_headers)
1918              
1919 4         6 my $success = 1;
1920 4 50       9 if ( $self->{'options'}{'default_headers'} ) {
1921 4         11 $success &= $self->add_headers( $self->{'options'}{'default_headers'} );
1922             }
1923 4 50       9 if ( $self->{'options'}{'headers'} ) {
1924 4         7 $success &= $self->add_headers( $self->{'options'}{'headers'} );
1925             }
1926 4 50       10 unless ($success) {
1927 0         0 croak("Error creating blank file.");
1928             }
1929             } ## end sub create_blank_file
1930              
1931             =head2 read_headers()
1932              
1933             For internal use only. C reads the metadata at the beginning of
1934             a SeaBASS file.
1935              
1936             Called by the object, accepts no arguments.
1937              
1938             =cut
1939              
1940             sub read_headers {
1941 133     133 1 328 my $self = shift;
1942              
1943 133 50       477 if ( $self->{'headers'} ) {
1944 0         0 return;
1945             }
1946              
1947 133 100       593 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
1948 133         218 my $success = 1;
1949 133         171 my @comments;
1950              
1951 133         589 $self->{'headers'} = {};
1952 133         266 $self->{'comments'} = [];
1953              
1954 133 50       320 if ( $self->{'options'}{'default_headers'} ) {
1955 133         742 $success &= $self->add_headers( $self->{'options'}{'default_headers'} );
1956             }
1957              
1958 133         301 my $handle = $self->{'handle'};
1959 133         190 my $position = my $line_number = 0;
1960 133         176 my @header_lines;
1961              
1962 133         214 my $strict = $self->{'options'}{'strict'};
1963              
1964 133         611 while ( my $line = <$handle> ) {
1965 3294         4490 $line_number++;
1966 3294         6225 strip($line);
1967 3294 50       5810 if ($line) {
1968 3294 100       9334 if ( $line =~ m'^(/end_header)\@?$'i ) {
    100          
    100          
1969 116         449 push( @header_lines, $1 );
1970 116         180 $position = tell($handle);
1971 116         205 last;
1972             } elsif ( $line =~ m"^/" ) {
1973 2465         3674 push( @header_lines, $line );
1974             } elsif ( $line =~ m"^!" ) {
1975 696         998 push( @comments, $line );
1976 696 100       1387 if ( $self->{'options'}{'preserve_header'} ) {
1977 10         15 push( @header_lines, $line );
1978             }
1979             } else { #TODO: search ahead for more headers or comments (in case of merely comment missing !) and fail if READ_STRICT
1980 17         34 seek( $handle, $position, SEEK_SET );
1981 17 50       45 if ( $strict & STRICT_READ ) {
1982 0         0 carp("File missing /end_header or comment missing !, assuming data start: line #$line_number ($line)");
1983             }
1984 17         32 last;
1985             }
1986             } ## end if ($line)
1987 3161         7891 $position = tell($handle);
1988             } ## end while (my $line = <$handle>)
1989              
1990             # add_headers looks at STRICT_WRITE, not STRICT_READ
1991 133 100       287 if ( $strict & STRICT_READ ) {
1992 5         12 $self->{'options'}{'strict'} |= STRICT_WRITE;
1993             } else {
1994 128         321 $self->{'options'}{'strict'} = 0;
1995             }
1996              
1997 133 100       312 if ( $self->{'options'}{'preserve_header'} ) {
1998 1         6 $self->{'preserved_header'} = [@header_lines];
1999             }
2000              
2001 133         317 $success &= $self->add_headers( \@header_lines );
2002              
2003             # restore strictness
2004 133         279 $self->{'options'}{'strict'} = $strict;
2005              
2006 133 50       310 if ( $self->{'options'}{'headers'} ) {
2007 133         329 $success &= $self->add_headers( $self->{'options'}{'headers'} );
2008             }
2009              
2010 133         204 my %headers = %{ $self->{'headers'} };
  133         1632  
2011              
2012 133 100       466 if ( $self->{'options'}{'preserve_comments'} ) {
2013 132         193 push( @{ $self->{'comments'} }, @comments );
  132         401  
2014             }
2015              
2016 133   66     487 my $missing = $headers{"${slash}missing"} || $DEFAULT_MISSING;
2017 133 100       325 if ( $self->{'options'}{'delete_missing_headers'} ) {
2018 1         5 while ( my ( $k, $v ) = each(%headers) ) {
2019 31 100       79 if ( $k =~ m"/?(?:end|begin)_header$|^/?missing$" ) {
2020 3         8 next;
2021             }
2022 28 100 66     122 if ( !defined($v) || $v =~ m"^n/?a(?:\[.*?\])?$"i || lc($v) eq lc($missing) ) {
      66        
2023 7         21 delete( $headers{$k} );
2024             }
2025             } ## end while (my ($k, $v) = each...)
2026             } ## end if ($self->{'options'}...)
2027              
2028 133 100       281 if ( $strict & STRICT_READ ) {
2029 5         12 foreach (@REQUIRED_HEADERS) {
2030 110 100       3395 if ( !exists( $headers{"${slash}$_"} ) ) {
2031 58         72 $success = 0;
2032 58         4754 carp("Missing required header: $_");
2033             }
2034             } ## end foreach (@REQUIRED_HEADERS)
2035 5         24 while ( my ( $header, $value ) = each(%headers) ) {
2036 72 50       102 if ($slash) {
2037 0         0 $header =~ s"^/"";
2038             }
2039 72 50 33 607   178 if ( ( firstidx { $_ eq $header } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $header } @HIDDEN_HEADERS ) < 0 ) {
  1338         1534  
  0         0  
2040 0         0 $success = 0;
2041 0         0 carp("$header not a standard header.");
2042             }
2043             } ## end while (my ($header, $value...))
2044 5 50 33     32 if ( $headers{"${slash}begin_header"} || $headers{"${slash}end_header"} ) {
2045 0         0 $success = 0;
2046 0         0 carp("begin_ or end_header incorrect");
2047             }
2048             } ## end if ($strict & STRICT_READ)
2049 133         274 foreach (@ABSOLUTELY_REQUIRED_HEADERS) {
2050 133 100       440 if ( !exists( $headers{"${slash}$_"} ) ) {
2051 1         3 $success = 0;
2052 1 50       3 if ( $strict & STRICT_READ ) {
2053 1         83 carp("Missing absolutely required header: $_");
2054             }
2055             }
2056             } ## end foreach (@ABSOLUTELY_REQUIRED_HEADERS)
2057              
2058 133   100     1523 $self->{'fields'} = [ split( /\s*,\s*/, $headers{"${slash}fields"} || '' ) ];
2059 133   100     1468 $self->{'actual_fields'} = [ split( /\s*,\s*/, $headers{"${slash}fields"} || '' ) ];
2060              
2061 133 100       589 if ( $headers{"${slash}units"} ) {
2062 79         965 $self->{'units'} = [ split( /\s*,\s*/, $headers{"${slash}units"} ) ];
2063 79         754 $self->{'actual_units'} = [ split( /\s*,\s*/, $headers{"${slash}units"} ) ];
2064             } else {
2065 54         79 my (@new_units1);
2066 54         68 foreach ( @{ $self->{'fields'} } ) {
  54         105  
2067 376         504 push( @new_units1, 'unitless' );
2068             }
2069 54         144 my @new_units2 = @new_units1;
2070 54         104 $self->{'units'} = \@new_units1;
2071 54         101 $self->{'actual_units'} = \@new_units2;
2072 54         209 $headers{"${slash}units"} = join( ',', @new_units1 );
2073             } ## end else [ if ($headers{"${slash}units"...})]
2074              
2075 133 100       257 if ( @{$self->{'fields'}} != @{$self->{'units'}} ) {
  133         248  
  133         354  
2076 1 50       3 if ( $strict & STRICT_READ ) {
2077 1         80 carp("/fields and /units don't match up");
2078 1         52 $success = 0;
2079             } else {
2080 0         0 while (@{$self->{'fields'}} > @{$self->{'units'}}){
  0         0  
  0         0  
2081 0         0 push(@{$self->{'units'}}, 'unitless');
  0         0  
2082             }
2083 0         0 while (@{$self->{'fields'}} < @{$self->{'units'}}){
  0         0  
  0         0  
2084 0         0 pop(@{$self->{'units'}});
  0         0  
2085             }
2086             }
2087             }
2088              
2089 133 100       334 unless ($success) {
2090 4 50       12 if ( $strict & STRICT_READ ) {
2091 4         385 croak("Error(s) reading SeaBASS file");
2092             } else {
2093 0         0 return;
2094             }
2095             }
2096              
2097 129         300 $self->{'missing'} = $missing;
2098 129   66     564 $self->{'below_detection_limit'} = $headers{"${slash}below_detection_limit"} || $missing;
2099 129   66     536 $self->{'above_detection_limit'} = $headers{"${slash}above_detection_limit"} || $missing;
2100 129         434 $self->{'line_number'} = $line_number;
2101 129         272 $self->{'data_start_line'} = $line_number;
2102 129         317 $self->{'data_start_position'} = $position;
2103              
2104 129 100       401 if ( $self->{'options'}{'cache'} ) {
2105 119         260 $self->{'data'} = [];
2106             }
2107 129         271 $self->{'dataidx'} = -1;
2108 129         232 $self->{'max_dataidx'} = -1;
2109              
2110 129         492 $self->{'headers'} = \%headers;
2111              
2112 129 100       664 if ( $self->{'options'}{'fill_ancillary_data'} ) {
2113 18         27 my @fields_lc = map {lc} @{ $self->{'fields'} };
  120         195  
  18         36  
2114 18         43 $self->{'fields_lc'} = \@fields_lc;
2115 18         34 $self->{'ancillary'} = {};
2116              
2117 18         29 foreach my $field (@FILL_ANCILLARY_DATA) {
2118 234         331 $self->find_ancillaries($field);
2119             }
2120              
2121 18         34 $self->{'case_conversion'} = {};
2122              
2123 18         31 while ( my ( $field, $value ) = each( %{ $self->{'ancillary'} } ) ) {
  181         498  
2124 163     887   353 my $idx = firstidx { $_ eq $field } @{ $self->{'fields_lc'} };
  887         941  
  163         445  
2125 163         303 my $new_field = $field;
2126 163 100       257 if ( $idx >= 0 ) {
2127 83         133 $new_field = $self->{'fields'}[$idx];
2128             }
2129              
2130 163 100       243 if ( ref($value) ) {
2131 29         55 for ( my $i = 1; $i < @$value; $i++ ) {
2132 52         64 my $new_arg = $value->[$i];
2133 52         174 for ( $value->[$i] =~ /\$(\{\w+\}|\w+)/g ) {
2134 37         113 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2135              
2136 37     196   88 my $idx = firstidx { $_ eq $variable } @{ $self->{'fields_lc'} };
  196         204  
  37         76  
2137 37         77 my $new_variable = $self->{'fields'}[$idx];
2138              
2139 37         297 $new_arg =~ s/\$$variable(\W|\b|$)/\$$new_variable$1/g;
2140 37         228 $new_arg =~ s/\$\{$variable\}/\$$new_variable/g;
2141             } ## end for ($value->[$i] =~ /\$(\{\w+\}|\w+)/g)
2142 52         128 $value->[$i] = $new_arg;
2143             } ## end for (my $i = 1; $i <= length...)
2144             } else {
2145 134         155 my $new_value = $value;
2146 134         571 for ( $value =~ /\$(\{\w+\}|\w+)/g ) {
2147 165         576 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2148              
2149 165     808   367 my $idx = firstidx { $_ eq $variable } @{ $self->{'fields_lc'} };
  808         810  
  165         370  
2150 165 100       386 if ( $idx >= 0 ) {
2151 159         221 my $new_variable = $self->{'fields'}[$idx];
2152              
2153 159         1253 $new_value =~ s/\$$variable(\W|\b|$)/\$$new_variable$1/g;
2154 159         1155 $new_value =~ s/\$\{$variable\}/\$$new_variable/g;
2155             } ## end if ($idx >= 0)
2156             } ## end for ($value =~ /\$(\{\w+\}|\w+)/g)
2157 134         235 $value = $new_value;
2158             } ## end else [ if (ref($value)) ]
2159 163 50       280 if ( $field ne $new_field ) {
2160 0         0 delete( $self->{'ancillary'}{$field} );
2161 0         0 $self->{'case_conversion'}{$field} = $new_field;
2162             }
2163 163         309 $self->{'ancillary'}{$new_field} = $value;
2164             } ## end while (my ($field, $value...))
2165              
2166 18         45 delete( $self->{'fields_lc'} );
2167             } ## end if ($self->{'options'}...)
2168 129         290 my $data_use_header = "${slash}data_use_warning";
2169 129 100 100     512 if ($self->{'options'}{'optional_warnings'} && $headers{$data_use_header}){
2170 1         196 carp("/data_use_warning=$headers{$data_use_header}!\nUse caution when using this data.");
2171             }
2172              
2173 129         671 return 1;
2174             } ## end sub read_headers
2175              
2176             =head2 validate_header($header, $value, $strict)
2177              
2178             my ($k, $v, $string) = ('investigators','jason_lefler',0)
2179             $sb_file->validate_header($k, $v, $strict);
2180              
2181             For internal use only. C is in charge of properly formatting
2182             key/value pairs to add to the object. This function will modify the input
2183             variables in place to prepare them for use.
2184              
2185             Returns false if there was a problem with the inputs, such as C is set
2186             and an invalid header was passed in.
2187              
2188             C will set C to C<$DEFAULT_MISSING> (C<-999>) if it
2189             is blank or undefined.
2190              
2191             This function will also change the expected delimiter for rows that have not
2192             yet been cached.
2193              
2194             =cut
2195              
2196             sub validate_header {
2197 2595     2595 1 5334 my ( $self, $k, $v, $strict ) = @_;
2198              
2199 2595         3035 my $success = 1;
2200              
2201 2595 100       3832 if ( !defined($v) ) {
2202 207         343 $v = '';
2203             } else {
2204 2388         3265 strip($v);
2205             }
2206              
2207 2595         4682 strip($k);
2208              
2209 2595         4200 $k = lc($k);
2210              
2211 2595 100 100     15100 if ( length($v) == 0 && $k !~ /_header/ ) {
2212 1 50       3 if ($strict) {
2213 0         0 carp("$k missing value");
2214 0         0 $success = 0;
2215             } else {
2216 1         1 $v = "";
2217             }
2218             } ## end if (length($v) == 0 &&...)
2219              
2220 2595 100 100     8545 if ( !$self->{'options'}{'preserve_case'} || $k =~ /fields|units/ ) {
2221 1313         1824 $v = lc($v);
2222             }
2223              
2224 2595 100       3980 if ( $self->{'options'}{'keep_slashes'} ) {
2225 139 100       237 if ( $k =~ m"^[^/]" ) {
2226 2         5 $k = "/$k";
2227             }
2228              
2229 139 50 66 12   238 if ( $strict && ( firstidx { "/$_" eq $k } @ALL_HEADERS ) < 0 && ( firstidx { "/$_" eq $k } @HIDDEN_HEADERS ) < 0 ) {
  12   33     34  
  0         0  
2230 0         0 carp("Invalid header, $k");
2231 0         0 $success = 0;
2232             }
2233             } else {
2234 2456 100       5542 if ( $k =~ m"^/" ) {
2235 2454         5572 $k =~ s"^/"";
2236             }
2237              
2238 2456 50 66 0   4750 if ( $strict && ( firstidx { $_ eq $k } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $k } @HIDDEN_HEADERS ) < 0 ) {
  1354   33     1477  
  0         0  
2239 0         0 carp("Invalid header, $k");
2240 0         0 $success = 0;
2241             }
2242             } ## end else [ if ($self->{'options'}...)]
2243              
2244 2595 100       8104 if ( $k =~ /_latitude|_longitude/){
    100          
    100          
    100          
    100          
    100          
2245 308         608 $v =~ s/\[deg\]$//i;
2246             } elsif ( $k =~ /_time/){
2247 159         408 $v =~ s/\[gmt\]$//i;
2248             } elsif ( $k =~ m"^/?delimiter$" ) {
2249 93 100       293 unless ( $self->set_delim( $strict, $v ) ) {
2250 1 50       3 if ($strict) {
2251 0         0 $success = 0;
2252             }
2253             }
2254             } elsif ( $k =~ m"^/?missing$" ) {
2255 114 50       360 $self->{'missing'} = ( length($v) ? $v : $DEFAULT_MISSING );
2256 114         479 $self->{'missing_is_number'} = looks_like_number( $self->{'missing'} );
2257             } elsif ( $k =~ m"^/?above_detection_limit" ) {
2258 7 50       29 $self->{'above_detection_limit'} = ( length($v) ? $v : $self->{'missing'} );
2259 7         21 $self->{'adl_is_number'} = looks_like_number( $self->{'above_detection_limit'} );
2260             } elsif ( $k =~ m"^/?below_detection_limit" ) {
2261 7 50       20 $self->{'below_detection_limit'} = ( length($v) ? $v : $self->{'missing'} );
2262 7         22 $self->{'bdl_is_number'} = looks_like_number( $self->{'below_detection_limit'} );
2263             }
2264              
2265 2595         3664 $_[1] = $k;
2266 2595         2957 $_[2] = $v;
2267              
2268 2595         3822 return $success;
2269             } ## end sub validate_header
2270              
2271             =head2 set_delim($strict, $delim)
2272              
2273             Takes a string declaring the delim (IE: 'comma', 'space', etc) and updates the
2274             object's internal delimiter regex.
2275              
2276             =cut
2277              
2278             sub set_delim {
2279 93     93 1 185 my $self = shift;
2280 93         125 my $strict = shift;
2281 93   50     231 my $delim = shift || '';
2282 93 50       399 if ( $delim eq 'comma' ) {
    50          
    100          
    50          
    100          
2283 0         0 $delim = qr/\s*,\s*/;
2284             } elsif ( $delim eq 'semicolon' ) {
2285 0         0 $delim = qr/\s*;\s*/;
2286             } elsif ( $delim eq 'space' ) {
2287 91         433 $delim = qr/\s+/;
2288             } elsif ( $delim eq 'tab' ) {
2289 0         0 $delim = qr/\t/;
2290             } elsif ($strict) {
2291 1         118 carp("delimiter not understood");
2292             } else {
2293 1 50       3 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
2294 1         2 $self->{'headers'}{"${slash}delimiter"} = 'comma';
2295 1         3 $delim = undef;
2296             }
2297 93         290 $self->{'delim'} = $delim;
2298 93 100       308 return ( $delim ? 1 : 0 );
2299             } ## end sub set_delim
2300              
2301             =head2 update_fields()
2302              
2303             C runs through the currently cached rows and calls
2304             C on each row. It then updates the /fields and /units
2305             headers in the header hash.
2306              
2307             =cut
2308              
2309             sub update_fields {
2310 28     28 1 45 my ($self) = @_;
2311 28 100 66     102 if ( $self->{'options'}{'cache'} && $self->{'max_dataidx'} >= 0 ) {
2312 4         6 foreach my $hash ( @{ $self->{'data'} } ) {
  4         9  
2313 9         16 $self->add_and_remove_fields($hash);
2314             }
2315             }
2316              
2317 28 100       55 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
2318 28         35 $self->{'headers'}{"${slash}fields"} = join( ',', @{ $self->{'actual_fields'} } );
  28         106  
2319 28         40 $self->{'headers'}{"${slash}units"} = join( ',', @{ $self->{'actual_units'} } );
  28         88  
2320             } ## end sub update_fields
2321              
2322             =head2 add_and_remove_fields(\%row)
2323              
2324             Given a reference to a row, this function deletes any fields removed with
2325             C and adds an undefined or /missing value for each field added
2326             via C. If C is set, an undefined value is
2327             given, otherwise, it is filled with the /missing value.
2328              
2329             If C is set, this function adds missing date, time,
2330             date_time, lat, lon, and depth fields to the retrieved row from the header.
2331              
2332             Needlessly returns the hash reference passed in.
2333              
2334             =cut
2335              
2336             sub add_and_remove_fields {
2337 279     279 1 484 my ( $self, $hash ) = @_;
2338 279         907 foreach my $field ( keys(%$hash) ) {
2339 1926 100   7848   3953 if ( ( firstidx { $_ eq $field } @{ $self->{'actual_fields'} } ) < 0 ) {
  7848         11153  
  1926         3269  
2340 21 50 33 0   41 unless ( $self->{'options'}{'fill_ancillary_data'} && ( firstidx { $_ eq $field } keys( %{ $self->{'ancillary'} } ) ) >= 0 ) {
  0         0  
  0         0  
2341 21         48 delete( $hash->{$field} );
2342             }
2343             }
2344             } ## end foreach my $field (keys(%$hash...))
2345              
2346 279 100       939 my $missing = ( $self->{'options'}{'missing_data_to_undef'} ? undef : $self->{'missing'} );
2347 279         888 while ( my ( $variable, $pad ) = each(%FIELD_FORMATTING) ) {
2348 2232   33     4806 my $case_var = $self->{'case_conversion'}{$variable} || $variable;
2349 2232         2487 my $v = $hash->{$case_var};
2350 2232 100 66     6611 if ($case_var eq 'second'){
    100 66        
      66        
2351 279 50 66     885 if ( defined($v) && length($v) && (!defined($missing) || $v != $missing)) {
      66        
      100        
2352 11 100       40 if ($v =~ /\D/){
2353 1         11 $hash->{$case_var} = sprintf('%02.3f', $v);
2354             } else {
2355 10         56 $hash->{$case_var} = sprintf('%02d', $v);
2356             }
2357             }
2358             } elsif ( defined($v) && length($v) && (!defined($missing) || $v != $missing)) {
2359 50         154 $hash->{$case_var} = sprintf($pad, $v);
2360             }
2361             } ## end while (my ($variable, $pad...))
2362              
2363 279 100       515 if ( defined($self->{'ancillary'}) ) {
2364 21         40 $self->{'ancillary_tmp'} = {};
2365 21         31 for my $variable (@FILL_ANCILLARY_DATA) {
2366 273 100       463 if ( defined($self->{'ancillary'}{$variable}) ) {
2367 193         354 my $value = $self->extrapolate_variables( $missing, $self->{'ancillary'}{$variable}, $hash );
2368 193 50       365 if ( defined($value) ) {
2369 193         338 $hash->{$variable} = $value;
2370             }
2371             } ## end if ($self->{'ancillary'...})
2372             } ## end for my $variable (@FILL_ANCILLARY_DATA)
2373             } ## end if ($self->{'ancillary'...})
2374              
2375 279         360 foreach my $field ( @{ $self->{'actual_fields'} } ) {
  279         499  
2376 1982 100       3519 if ( !exists( $hash->{$field} ) ) {
2377 61         132 $hash->{$field} = $missing;
2378             }
2379             }
2380              
2381 279         392 return $hash;
2382             } ## end sub add_and_remove_fields
2383              
2384             =head2 guess_delim($line)
2385              
2386             C is is used to guess the delimiter of a line. It is not very
2387             intelligent. If it sees any commas, it will assume the delimiter is a comma.
2388             Then, it checks for tabs, spaces, then semi-colons. Returns 1 on success. If
2389             it doesn't find any, it will throw a warning and return undef.
2390              
2391             =cut
2392              
2393             sub guess_delim {
2394 37     37 1 102 my ( $self, $line ) = @_;
2395 37         59 my $delim_string = '';
2396 37 100       189 if ( $line =~ /,/ ) {
    50          
    50          
    0          
2397 2         9 my $delim = qr/\s*,\s*/;
2398 2         28 $self->{'delim'} = $delim;
2399 2         6 $delim_string = 'comma';
2400             } elsif ( $line =~ /\t/ ) {
2401 0         0 my $delim = qr/\t/;
2402 0         0 $self->{'delim'} = $delim;
2403 0         0 $delim_string = 'tab';
2404             } elsif ( $line =~ /\s+/ ) {
2405 35         146 my $delim = qr/\s+/;
2406 35         72 $self->{'delim'} = $delim;
2407 35         59 $delim_string = 'space';
2408             } elsif ( $line =~ /;/ ) {
2409 0         0 my $delim = qr/\s*;\s*/;
2410 0         0 $self->{'delim'} = $delim;
2411 0         0 $delim_string = 'semicolon';
2412             } else {
2413 0         0 carp("No delimiter defined or can be guessed");
2414 0         0 return;
2415             }
2416 37 100       131 $self->{'headers'}{ ( $self->{'options'}{'keep_slashes'} ? '/' : '' ) . 'delimiter' } = $delim_string;
2417 37         113 return 1;
2418             } ## end sub guess_delim
2419              
2420             =head2 ingest_row(\%data_row | \@data_row | $data_row | %data_row)
2421              
2422             For mostly internal use, parses arguments for C, C, and C
2423             and returns a hash or hash reference of the data row. Given a hash reference,
2424             it will merely return it.
2425              
2426             Given an array or array reference, it will assume each element is a field as
2427             listed in either C or C. If the number of elements
2428             matches C, it uses assumes it's that. If it doesn't match, it is
2429             tried to match against C. If it doesn't match either, a warning is
2430             issued and the return is undefined.
2431              
2432             Given a non-reference scalar, it will split the scalar based on the current
2433             delimiter. If one is not defined, it is guessed. If it cannot be guessed, the
2434             return is undefined.
2435              
2436             If the inputs are successfully parsed, all keys are turned lowercase.
2437              
2438             =cut
2439              
2440             sub ingest_row {
2441 27     27 1 41 my $self = shift;
2442 27         34 my %new_row;
2443 27 100       55 if ( $#_ < 0 ) {
2444 1         126 carp("Incorrect number of arguments to ingest_row()");
2445 1         78 return;
2446             }
2447 26         38 my $arrayref;
2448 26 100       157 if ( ref( $_[0] ) eq 'HASH' ) {
    100          
    50          
2449 14         21 %new_row = %{ shift(@_) };
  14         49  
2450             } elsif ( ref( $_[0] ) eq 'ARRAY' ) {
2451 3         5 $arrayref = $_[0];
2452             } elsif ( !ref( $_[0] ) ) {
2453 9 100       25 if ( $#_ == 0 ) {
    100          
2454 7 50 66     30 if ( !$self->{'delim'} && !$self->guess_delim( $_[0] ) ) {
2455 0         0 return;
2456             }
2457 7         59 $arrayref = [ split( $self->{'delim'}, $_[0] ) ];
2458             } elsif ( $#_ % 2 == 1 ) {
2459 1         4 %new_row = @_;
2460             } else {
2461 1         129 carp('Even sized list, scalar, or hash/array reference expected');
2462 1         109 return;
2463             }
2464             } else {
2465 0         0 carp("Arguments to ingest_row() not understood.");
2466 0         0 return;
2467             }
2468              
2469 25 100       58 if ($arrayref) {
2470 10         12 my $iterator;
2471 10 100       12 if ( scalar( @{ $self->{'actual_fields'} } ) == scalar( @{$arrayref} ) ) {
  10 50       17  
  10         19  
2472 8         40 $iterator = each_arrayref( $self->{'actual_fields'}, $arrayref );
2473 2         15 } elsif ( scalar( @{ $self->{'fields'} } ) == scalar( @{$arrayref} ) ) {
  2         6  
2474 2         10 $iterator = each_arrayref( $self->{'fields'}, $arrayref );
2475 2         6 $self->add_and_remove_fields( \%new_row );
2476             } else {
2477 0         0 my $actual_fields = scalar( @{ $self->{'actual_fields'} } );
  0         0  
2478 0         0 my $fields = scalar( @{ $self->{'fields'} } );
  0         0  
2479 0 0       0 if ( $actual_fields == $fields ) {
2480 0         0 carp("Invalid number of elements, expected $fields");
2481             } else {
2482 0         0 carp("Invalid number of elements, expected $actual_fields or $fields");
2483             }
2484 0         0 return;
2485             } ## end else [ if (scalar(@{$self->{'actual_fields'...}}))]
2486 10         45 while ( my ( $k, $v ) = $iterator->() ) {
2487 52         155 $new_row{$k} = $v;
2488             }
2489             } ## end if ($arrayref)
2490              
2491 25         63 %new_row = map { lc($_) => $new_row{$_} } keys %new_row;
  71         166  
2492              
2493 25 50       59 if (wantarray) {
2494 0         0 return %new_row;
2495             } else {
2496 25         60 return \%new_row;
2497             }
2498             } ## end sub ingest_row
2499              
2500             =head2 find_ancillaries($field_name)
2501              
2502             Used by C to traverse through a field's possible
2503             substitutes in C<%ANCILLARY> and try to find the most suitable replacement.
2504             Values of fields in C<%ANCILLARY> are array references, where each element is
2505             either:
2506              
2507             =over 4
2508              
2509             =item * a string of existing field names used to create the value
2510              
2511             =item * an array reference of the form [converter function, parsing regex
2512             (optional), arguments to converter, ... ]
2513              
2514             =item * a hash reference of the form { header => qr/parsing_regex/ }
2515              
2516             =back
2517              
2518             If the element is an array reference and an argument requires a field from the
2519             file, all arguments are parsed and the variables within them extrapolated, then
2520             the array is put into C<< $self->{'ancillary'} >>.
2521              
2522             If no value can be ascertained, it will not be added to the data rows.
2523              
2524             The value found is stored in C<< $self->{'ancillary'} >>. Returns 1 on
2525             success, 0 if the field cannot be filled in.
2526              
2527             =cut
2528              
2529             sub find_ancillaries {
2530 478     478 1 687 my ( $self, $field ) = @_;
2531 478 100       736 if ( $self->{'ancillary'}{$field} ) {
2532 130         210 return 1;
2533             }
2534 348     2018   727 my $idx = firstidx { $_ eq $field } @{ $self->{'fields_lc'} };
  2018         2071  
  348         697  
2535 348 100       756 if ( $idx >= 0 ) {
2536 83         199 $self->{'ancillary'}{$field} = "\$\{$field\}";
2537 83         138 return 1;
2538             }
2539              
2540 265 50       445 my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' );
2541 265 100       269 foreach my $attempt ( @{ $ANCILLARY{$field} || [] } ) {
  265         536  
2542 461 100       792 if ( ref($attempt) eq 'HASH' ) {
    100          
    50          
2543 308         310 keys( %{$attempt} ); #reset each() iterator between calls
  308         364  
2544 308         335 while ( my ( $where, $regex ) = each( %{$attempt} ) ) {
  581         1203  
2545 308 100 66     1037 if ( $where =~ /^\$/ ) {
    100 66        
2546 130 100   722   567 if ( ( firstidx { "\$$_" eq $where } $self->fields() ) >= 0 ) {
  722         1093  
2547 15     33   71 $self->{'ancillary'}{$field} = [ sub { return shift; }, $regex, $where ];
  33         70  
2548 15         53 return 1;
2549             }
2550             } elsif ( defined( $self->{'headers'}{"$slash$where"} ) && $self->{'headers'}{"$slash$where"} =~ $regex && lc($1) ne 'na' ) {
2551 20         52 $self->{'ancillary'}{$field} = $1;
2552 20         50 return 1;
2553             }
2554             } ## end while (my ($where, $regex...))
2555             } elsif ( ref($attempt) eq 'ARRAY' ) {
2556 104         195 my @attempt = @$attempt;
2557 104         126 my $function = shift(@attempt);
2558 104         110 my $regex;
2559 104 100       169 if ( ref( $attempt[0] ) eq 'Regexp' ) {
2560 50         68 $regex = shift(@attempt);
2561             }
2562 104         110 my $success = 1;
2563 104         106 my @args;
2564 104         125 foreach my $argument (@attempt) {
2565 104         122 my $tmparg = $argument;
2566 104         411 for ( $argument =~ /\$(\{\w+\}|\w+)/g ) {
2567 145         343 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2568 145         244 $success &= $self->find_ancillaries($variable);
2569 145 100       193 if ($success) {
2570 55 50       94 if ( ref( $self->{'ancillary'}{$variable} ) ) {
2571 0         0 $tmparg =~ s/\$$variable(\W|\b|$)/\$\{$variable\}$1/g;
2572             } else {
2573 55         63 my $value = $self->{'ancillary'}{$variable};
2574 55         532 $tmparg =~ s/\$$variable(\W|\b|$)/$value$1/g;
2575 55         302 $tmparg =~ s/\$\{$variable\}/$value/g;
2576             }
2577             } else {
2578 90         118 last;
2579             }
2580             } ## end for ($argument =~ /\$(\{\w+\}|\w+)/g)
2581 104         195 push( @args, $tmparg );
2582             } ## end foreach my $argument (@attempt)
2583 104 100       192 if ($success) {
2584 14 100       24 if ($regex) {
2585 8         12 unshift( @args, $regex );
2586             }
2587 14         32 $self->{'ancillary'}{$field} = [ $function, @args ];
2588 14         37 return 1;
2589             } ## end if ($success)
2590             } elsif ( !ref($attempt) ) {
2591 49         60 my $success = 1;
2592 49         57 my $tmparg = $attempt;
2593 49         217 for ( $attempt =~ /\$(\{\w+\}|\w+)/g ) {
2594 99         248 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2595 99         163 $success &= $self->find_ancillaries($variable);
2596 99 100       147 if ($success) {
2597 81 100       130 if ( ref( $self->{'ancillary'}{$variable} ) ) {
2598 6         45 $tmparg =~ s/\$$variable(\W|\b|$)/\$\{$variable\}$1/g;
2599             } else {
2600 75         117 my $value = $self->{'ancillary'}{$variable};
2601 75         935 $tmparg =~ s/\$$variable(\W|\b|$)/$value$1/g;
2602 75         576 $tmparg =~ s/\$\{$variable\}/$value/g;
2603             }
2604             } else {
2605 18         23 last;
2606             }
2607             } ## end for ($attempt =~ /\$(\{\w+\}|\w+)/g)
2608 49 100       96 if ($success) {
2609 31         66 $self->{'ancillary'}{$field} = $tmparg;
2610 31         68 return 1;
2611             }
2612             } ## end elsif (!ref($attempt))
2613             } ## end foreach my $attempt (@{$ANCILLARY...})
2614              
2615 185         258 return 0;
2616             } ## end sub find_ancillaries
2617              
2618             =head2 extrapolate_variables($missing, $expression, \%row)
2619              
2620             Used by C to convert a parsed ancillary string, such as
2621             C<'$year$month$day'>, into a real value using the fields from the C<\%row>.
2622             C<$expression>s are strings figured out by C and stored in
2623             C<< $self->{'ancillary'} >>.
2624              
2625             The return is undefined if a value cannot be created (IE: a required field is
2626             missing).
2627              
2628             =cut
2629              
2630             sub extrapolate_variables {
2631 240     240 1 391 my ( $self, $missing, $expression, $row ) = @_;
2632              
2633 240 100       341 if ( ref($expression) ) {
2634 47         81 return $self->extrapolate_function( $missing, $expression, $row );
2635             } else {
2636 193         237 my $tmpexpr = $expression;
2637 193         678 for ( $expression =~ /\$(\{\w+\}|\w+)/g ) {
2638 235         1314 ( my $variable = $_ ) =~ s/^\{|\}$//g;
2639 235         267 my $value;
2640 235 100 33     1123 if ( $self->{'ancillary_tmp'}{$variable} ) {
    50 66        
    0 33        
2641 131         168 $value = $self->{'ancillary_tmp'}{$variable};
2642             } elsif ( defined($row->{$variable}) && length($row->{$variable}) && (!defined($missing) || $row->{$variable} != $missing)) {
2643 104         157 $value = $row->{$variable};
2644 104         161 $self->{'ancillary_tmp'}{$variable} = $value;
2645             } elsif ( ref( $self->{'ancillary'}{$variable} ) ) {
2646 0         0 $value = $self->extrapolate_function($missing, $self->{'ancillary'}{$variable}, $row);
2647 0 0       0 if ( !defined($value) ) {
2648 0         0 return;
2649             }
2650             } else {
2651 0         0 return;
2652             }
2653              
2654 235         2623 $tmpexpr =~ s/\$$variable(\W|\b|$)/$value$1/g;
2655 235         1550 $tmpexpr =~ s/\$\{$variable\}/$value/g;
2656             } ## end for ($expression =~ /\$(\{\w+\}|\w+)/g)
2657 193         466 return $tmpexpr;
2658             } ## end else [ if (ref($expression)) ]
2659             } ## end sub extrapolate_variables
2660              
2661             =head2 extrapolate_function($missing, $expression, \%row)
2662              
2663             If the value stored in C<< $self->{'ancillary'} >> is an array reference, this
2664             function uses the array to create an actual value. See
2665             L for an explanation of the
2666             array.
2667              
2668             =cut
2669              
2670             sub extrapolate_function {
2671 47     47 1 68 my ( $self, $missing, $expression, $row ) = @_;
2672 47         48 my $value;
2673 47         100 my ( $function, @args ) = @$expression;
2674 47         101 my $regex;
2675              
2676 47 100       96 if ( ref( $args[0] ) eq 'Regexp' ) {
2677 41         50 $regex = shift(@args);
2678             }
2679 47         65 for (@args) {
2680 47         87 $_ = $self->extrapolate_variables( $missing, $_, $row );
2681 47 50       106 if ( !defined($_) ) {
2682 0         0 return;
2683             }
2684             } ## end for (@args)
2685 47         78 $value = &$function(@args);
2686 47 100       119 if ($regex) {
2687 41 50       189 if ( $value =~ $regex ) {
2688 41         81 $value = $1;
2689             }
2690             }
2691              
2692 47         102 return $value;
2693             } ## end sub extrapolate_function
2694              
2695             =head1 STATIC METHODS
2696              
2697             =head2 strip(@list)
2698              
2699             my @space_filled_lines = (' line1 ', ' line2', 'line3 ', 'line4');
2700             strip(@space_filled_lines);
2701             print @space_filled_lines; #line1line2line3line4
2702              
2703             Runs through the list and removes leading and trailing whitespace. All changes
2704             are made in place.
2705              
2706             It is literally this:
2707              
2708             sub strip {
2709             s/^\s+|\s+$//g for @_;
2710             }
2711              
2712             =cut
2713              
2714             # Not an object method!
2715             sub strip {
2716 8545     8545 1 43191 s/^\s+|\s+$//g for @_;
2717             }
2718              
2719             =head2 julian_to_greg($yyyyjjj)
2720              
2721             Converts a date in the day of year format YYYYJJJ into YYYYMMDD. Returns the
2722             newly formatted string or undefined if the input does not match the required
2723             format.
2724              
2725             This uses the C function from C to do the heavy
2726             lifting.
2727              
2728             =cut
2729              
2730             # Not an object method!
2731             sub julian_to_greg {
2732 14     14 1 100 my ($yyyyjjj) = @_;
2733 14 50       46 if ( $yyyyjjj =~ /^(\d{4})(\d{3})$/ ) {
2734 14         80 my ( $y, $m, $d ) = Add_Delta_Days( $1, 1, 1, $2 - 1 );
2735 14         56 return sprintf( '%04d%02d%02d', $y, $m, $d );
2736             }
2737 0           return;
2738             } ## end sub julian_to_greg
2739              
2740             =head1 CAVEATS/ODDITIES
2741              
2742             =head2 Duplicate Fields
2743              
2744             This class will not allow a field to be added to the object if a field of the
2745             same name already exists. If a file being read has duplicate field names, only
2746             the B one is used. No warning is issued. If C is used to
2747             remove it, only the first instance will be deleted. To delete all instances,
2748             use C<< $sb_file->remove_field($sb_file->find_fields('chl')) >>. This may
2749             change in future releases.
2750              
2751             =head2 Changing Delimiter or Missing Value
2752              
2753             Modifying the delimiter header on a file that is being read will cause any
2754             non-cached rows to be split by the new delimiter, which should break most/all
2755             files. If the delimiter must be changed, call C to cache all the rows,
2756             then change it. This will obviously not work if caching is turned off. The
2757             same is true for setting the missing value, but only really applies when the
2758             C option is used (same goes to below detection limit).
2759              
2760             =head2 Below Detection Limit
2761              
2762             Below detection limit is only partially supported. If C is
2763             used, fields equal to C will be set to C, as
2764             well. Files modified while using C will have all data
2765             equal to C written out set to the missing value instead
2766             of the below detection limit value. If the below detection limit value is equal
2767             to the missing value or C is used, the
2768             C header will not be written.
2769              
2770             =head1 AUTHOR
2771              
2772             Jason Lefler, C<< >>
2773              
2774             =head1 BUGS
2775              
2776             Please report any bugs or feature requests to C
2777             rt.cpan.org>, or through the web interface at
2778             L. I will be
2779             notified, and then you'll automatically be notified of progress on your bug as
2780             I make changes.
2781              
2782             =head1 SUPPORT
2783              
2784             You can find documentation for this module with the perldoc command.
2785              
2786             perldoc Data::SeaBASS
2787              
2788             You can also look for information at:
2789              
2790             =over 4
2791              
2792             =item * RT: CPAN's request tracker (report bugs here)
2793              
2794             L
2795              
2796             =item * AnnoCPAN: Annotated CPAN documentation
2797              
2798             L
2799              
2800             =item * CPAN Ratings
2801              
2802             L
2803              
2804             =item * Search CPAN
2805              
2806             L
2807              
2808             =back
2809              
2810             =head1 LICENSE AND COPYRIGHT
2811              
2812             Copyright 2014 Jason Lefler.
2813              
2814             This program is free software; you can redistribute it and/or modify it under
2815             the terms of either: the GNU General Public License as published by the Free
2816             Software Foundation; or the Artistic License.
2817              
2818             See http://dev.perl.org/licenses/ for more information.
2819              
2820             =cut
2821              
2822             1; # End of Data::SeaBASS