File Coverage

lib/Weather/GHCN/StationTable.pm
Criterion Covered Total %
statement 942 993 94.8
branch 295 350 84.0
condition 140 175 80.0
subroutine 81 89 91.0
pod 31 33 93.9
total 1489 1640 90.7


line stmt bran cond sub pod time code
1             # Weather::GHCN::StationTable.pm - class for collecting station objects and weather data
2            
3             ## no critic (Documentation::RequirePodAtEnd)
4            
5             =head1 NAME
6            
7             Weather::GHCN::StationTable - collect station objects and weather data
8              
9             =head1 VERSION
10              
11             version v0.0.010
12            
13             =head1 SYNOPSIS
14            
15             use Weather::GHCN::StationTable;
16            
17             my $ghcn = Weather::GHCN::StationTable->new;
18            
19             my ($opt, @errors) = $ghcn->set_options(
20             user_options => {
21             country => 'US',
22             state => 'NY',
23             location => 'New York',
24             report => 'yearly',
25             },
26             );
27             die @errors if @errors;
28            
29             $ghcn->load_stations;
30            
31             # generate a list of the stations that were selected
32             say $ghcn->get_stations( kept => 1 );
33            
34             if ($opt->report) {
35             say $ghcn->get_header;
36            
37             $ghcn->load_data();
38             $ghcn->summarize_data;
39            
40             say $ghcn->get_summary_data;
41             say $ghcn->get_footer;
42             }
43            
44            
45             =head1 DESCRIPTION
46            
47             The B module provides a class that is used to
48             fetch stations information from the NOAA Global Historical Climatology
49             Network database, along with temperature and/or precipitation records
50             from the daily historical records.
51            
52             For a more comprehensive example than the above Synopsis, see the
53             section EXAMPLE PROGRAM.
54            
55             Caveat emptor: incompatible interface changes may occur on releases
56             prior to v1.00.000. (See VERSIONING and COMPATIBILITY.)
57            
58             The module is primarily for use by modules Weather::GHCN::Fetch.
59            
60             =cut
61            
62             # these are needed because perlcritic fails to detect that Object::Pad handles these things
63             ## no critic [ValuesAndExpressions::ProhibitVersionStrings]
64            
65 3     3   7632 use v5.18; # minimum for Object::Pad
  3         23  
66 3     3   18 use warnings;
  3         6  
  3         117  
67 3     3   1241 use Object::Pad 0.66 qw( :experimental(init_expr) );
  3         24563  
  3         17  
68            
69             package Weather::GHCN::StationTable;
70             class Weather::GHCN::StationTable;
71            
72             our $VERSION = 'v0.0.010';
73            
74             # directly used by this module
75 3     3   4882 use Carp qw( carp croak );
  3         7  
  3         176  
76 3     3   854 use Const::Fast;
  3         5423  
  3         16  
77 3     3   1690 use HTML::Entities;
  3         17910  
  3         239  
78 3     3   1429 use Devel::Size;
  3         1522  
  3         157  
79 3     3   21 use FindBin;
  3         6  
  3         114  
80 3     3   1500 use Math::Trig;
  3         48191  
  3         498  
81 3     3   1984 use Path::Tiny;
  3         27477  
  3         183  
82 3     3   1104 use Try::Tiny;
  3         4273  
  3         193  
83 3     3   932 use Weather::GHCN::Common qw( :all );
  3         9  
  3         434  
84 3     3   1333 use Weather::GHCN::TimingStats;
  3         9  
  3         131  
85             #use Hash::Wrap {-lvalue => 1, -defined => 1, -as => '_wrap_hash' };
86            
87             # included so consumers of this module don't have to include these
88 3     3   1378 use Weather::GHCN::CacheURI;
  3         9  
  3         141  
89 3     3   1571 use Weather::GHCN::CountryCodes;
  3         7  
  3         246  
90 3     3   1242 use Weather::GHCN::Measures;
  3         7  
  3         119  
91 3     3   1474 use Weather::GHCN::Options;
  3         11  
  3         140  
92 3     3   1347 use Weather::GHCN::Station;
  3         8  
  3         22864  
93            
94             const my $EMPTY => q(); # empty string
95             const my $SPACE => q( ); # space character
96             const my $DASH => q(-); # dash character
97             const my $TAB => qq(\t); # tab character
98             const my $NL => qq(\n); # perl platform-universal newline
99             const my $TRUE => 1; # perl's usual TRUE
100             const my $FALSE => not $TRUE; # a dual-var consisting of '' and 0
101            
102             const my $DEFAULT_PROFILE_FILE => '~/.ghcn_fetch.yaml';
103            
104             const my %MMM_TO_MM => (
105             Jan=>1, Feb=>2, Mar=>3, Apr=>4, May=>5, Jun=>6,
106             Jul=>7, Aug=>8, Sep=>9, Oct=>10, Nov=>11, Dec=>12,
107             );
108            
109             const my $GHCN_DATA => 'https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/all/';
110             const my $GHCN_STN_LIST_URL => 'https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt';
111             const my $GHCN_STN_INVEN_URL => 'https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-inventory.txt';
112            
113             const my $STN_ID_RE => qr{ [[:upper:]]{2} [[:alnum:]\_\-]{9} }xms;
114             const my $STN_LIST_RE => qr{ \A $STN_ID_RE ( [,] $STN_ID_RE )+ \Z }xms;
115            
116             # station errors (rejected)
117             const my $ERR_FETCH => 1;
118             const my $ERR_METRICS => 2;
119             const my $ERR_NOT_ACTIVE => 3;
120             const my $ERR_RANGE_FULL => 4;
121             const my $ERR_NOT_PARTIAL => 5;
122             const my $ERR_NOACTIVE => 6;
123             const my $ERR_INSUFF => 7;
124             const my $ERR_NOT_GSN => 8;
125             const my $ERR_NO_GIS => 9;
126            
127             # station warnings (not rejected)
128             const my $WARN_MISS_YA => 51;
129             const my $WARN_MISS_YF => 52;
130             const my $WARN_MISS_MO => 53;
131             const my $WARN_MISS_DY => 54;
132             const my $WARN_ANOM => 55;
133            
134             my %NoteMsg = ( ## no critic [ProhibitMagicNumbers]
135             1 => q|*E* unable to fetch data from URL|,
136             2 => q|*E* station doesn't have any of the required measurements|,
137             3 => q|*E* station active range isn't a subset of the -active range|,
138             4 => q|*E* station active range doesn't fully intersect -range|,
139             5 => q|*E* station active range doesn't partially intersect -active|,
140             6 => q|*E* option -range or -active specified but station has no active range in the inventory|,
141             7 => q|*E* insufficient data to meet quality threshold|,
142             8 => q|*E* not a GSN station|,
143             9 => q|*E* -gps specified but station is without GIS coordinates|,
144            
145             51 => q|*W* station missing data in years of the active range|,
146             52 => q|*W* station missing data in years of the filter range|,
147             53 => q|*W* station missing one or more entire months of data in a year|,
148             54 => q|*W* station missing one or more days of data in a year|,
149             55 => q|*W* station has insufficient baseline years - anomalies not calculated|,
150             );
151            
152             # Global lexical option object, created by new()
153             my $Opt;
154            
155             # private fields that are intialized by set_options()
156             field $_ghcn_opt_obj; # [0] Weather::GHCN::Options object providing filtering and other options
157            
158             # private fields that are intialized by load()
159             field $_measures_obj; # [1] Measures object
160            
161             # private fields that are intialized by get_header()
162             field $_measure_begin_idx {0}; # [2] column index where measures will go in the output
163            
164             # private fields that are intialized by new() i.e. automatically or within sub BUILD
165             field %_hstats; # [3] Hash for capturing data field hash statistics
166             field $_tstats; # [4] TimingStats object (or undef)
167            
168             # data fields
169             field %_station; # [5] loaded station objects, key station_id
170             field $_aggregate_href { {} } ; # [6] hashref of aggregated (summarized0 daily data
171             field $_flag_cnts_href { {} } ; # [7] hashref of flag counts
172             field $_daily_href { {} } ; # [8] hashref of most recent daily data loaded
173             field $_baseline_href { {} } ; # [9] hashref of baseline data
174            
175             # readable fields that are populated by set_options
176 0     0 1 0 field $_opt_obj :reader; # [10] $ghcn_opt_obj->opt_obj (a Hash::Wrap of $ghcn_opt_obj->opt_href)
  0         0  
177 0     0 1 0 field $_opt_href :reader; # [11] $ghcn_opt_obj->opt_href
  0         0  
178 0     0 0 0 field $_cache_obj :reader; # [12] cache object
  0         0  
179 3     3 0 11 field $_cachedir :reader; # [13] cache directory
  3         46  
180 4     4 1 800 field $_profile_file :reader; # [14]
  4         48  
181 0     0 1 0 field $_profile_href :reader; # [15] hash reference containing cache and alias options
  0         0  
182            
183             # other fields with read methods
184 5     5 1 20 field $_stn_count :reader; # [16]
  5         618  
185 5     5 1 34 field $_stn_selected_count :reader; # [17]
  5         106  
186 25     25 1 63 field $_stn_filtered_count :reader; # [18]
  25         201  
187 0     0 1 0 field $_missing_href :reader { {} }; # [19]
  0         0  
188            
189             # fields for API use that are readable and writable
190 3 100   3 1 1790 field $_stnid_filter_href :accessor; # [20] hashref of station id's to be loaded, or empty hash
  3         23  
191 0 0   0 1 0 field $_return_list :accessor; # [21] return method results as tsv if true, a list if false
  0         0  
192            
193             =head1 FIELDS (read-only)
194            
195             =over 4
196            
197             =item opt_obj
198            
199             Returns a reference to the Options object created by set_options.
200            
201             =item opt_href
202            
203             Returns a reference to a hash of the Options created by set_options.
204            
205             =item profile_file
206            
207             Returns the name of the profile file, if one was passed to
208             set_options.
209            
210             =item profile_href
211            
212             Returns a reference to a hash containing the profile options
213             set by set_options (if any).
214            
215             =item stn_count
216            
217             Returns a count of the total number of stations found in the station
218             list.
219            
220             =item stn_selected_count
221            
222             Returns a count of the number of stations that were selected for
223             processng.
224            
225             =item stn_filtered_count
226            
227             Returns a count of the number of stations that were selected for
228             processing, excluding those rejected due to errors or other criteria.
229            
230             =item missing_href
231            
232             Returns a hash of the missing months and days for the selected
233             data.
234            
235             =back
236            
237             =head1 FIELDS (read or write)
238            
239             =over 4
240            
241             =item return_list()
242            
243             For API use. By default, get methods return a tab-separated string
244             of results. If return_list is set to a perl true value, then these
245             methods will return a list (or list of lists). If no argument is
246             given, the current value of return_list is returned.
247            
248             =item stnid_filter_href(\%stnid_filter)
249            
250             For API use. With no argument, the current value is returned. If an
251             argument is given, it must be a hash reference the keys of which are
252             the specific station id's you want to fetch and process. When this is
253             used, many filtering options set via set_options will be overridden;
254             e.g. country, state, location etc.
255            
256             =back
257            
258             =cut
259            
260             =head1 METHODS
261            
262             =head2 new ()
263            
264             Create a new StationTable object.
265            
266             =cut
267            
268             BUILD {
269             %_hstats = ();
270             $_tstats = Weather::GHCN::TimingStats->new();
271             }
272            
273             =head2 flag_counts ()
274            
275             The load_stations() and load_data() methods may reject a station or a
276             particular data entry due to quality or other issues. These
277             decisions are kept in a hash field, and a reference to that hash is
278             returned by this method. The caller can then report the values.
279            
280             =cut
281            
282 1     1 1 693 method flag_counts () {
  1         4  
  1         4  
283 1         3 return $_flag_cnts_href;
284             }
285            
286             =head2 get_flag_statistics ( list => 0, no_header => 0 )
287            
288             Gets a header row and summary table of data points that were kept and rejected, along
289             with counts of QFLAGS (quality flags). Returns tab-separated
290             text, or a list if the list argument is true. A heading line
291             is provided unless no_header is true.
292            
293             =over 4
294            
295             =item argument: list =>
296            
297             If the arguments include the 'list' keyword and a true value, then a
298             list is returned rather than tab-separated lines of text. Defaults
299             to false.
300            
301             =item argument: no_header =>
302            
303             If the arguments include the 'no_header' keyword and a true value,
304             then the return value will not include a header line. Default is
305             false.
306            
307             =back
308            
309             =cut
310            
311 3     3 1 4781 method get_flag_statistics ( %args ) {
  3         9  
  3         8  
  3         6  
312 3   66     38 my $return_list = $args{list} // $_return_list;
313            
314 3         5 my @output;
315            
316             # print summary of data points kept and rejected, with qflag counts
317             push @output, ['Values', 'Kept', 'Rejected', 'Flags']
318 3 50       18 unless $args{no_header};
319 3         21 foreach my $elem ( sort keys $_flag_cnts_href->%* ) {
320 9 100       42 next if $elem =~ m{ [[:lower:]] }xms;
321 6 50       18 next if $elem =~ m{ \A A_ }xms;
322 6         26 my $flag_info = _qflags_as_string( $_flag_cnts_href->{$elem}->{QFLAGS} );
323            
324             push @output, [
325             $elem,
326             ( $_flag_cnts_href->{$elem}->{KEPT} // 0 ),
327 6   50     45 ( $_flag_cnts_href->{$elem}->{REJECTED} // 0 ),
      50        
328             $flag_info,
329             ];
330             }
331            
332 3 100       26 return $return_list ? @output : tsv(\@output);
333             }
334            
335             =head2 get_footer( list => 0 )
336            
337             Get a footing section with explanatory notes about the output data
338             produced by detail and summary reports.
339            
340             =over 4
341            
342             =item argument: list =>
343            
344             If the arguments include the 'list' keyword and a true value, then a
345             list is returned rather than tab-separated lines of text. Defaults
346             to false.
347            
348             =back
349            
350             =cut
351            
352 3     3 1 5781 method get_footer ( %args ) {
  3         8  
  3         8  
  3         8  
353 3   66     14 my $return_list = $args{list} // $_return_list;
354            
355 3         7 my @output;
356            
357 3         10 push @output, 'Notes:';
358 3         7 push @output, ' 1. Data is obtained from the GHCN GHCN repository, specifically:';
359 3         13 push @output, $TAB . $GHCN_STN_LIST_URL;
360 3         10 push @output, $TAB . $GHCN_STN_INVEN_URL;
361 3         9 push @output, $TAB . $GHCN_DATA;
362 3         7 push @output, ' 2. Temperatures are in Celsius, precipitation in mm and snowfall/depth in cm.';
363 3         9 push @output, ' 3. TAVG is a daily average computed at each station; Tavg is the average of TMAX and TMIN.';
364 3         6 push @output, ' 4. Data is averaged at the daily level across multiple stations.';
365 3         9 push @output, ' 5. Data is summarized at the monthly or yearly level using different rules depending on the measure:';
366 3         10 push @output, $TAB . '- TMAX is aggregated by max(); TMIN is aggregated by min().';
367 3         8 push @output, $TAB . '- TAVG and Tavg are aggregated by average().';
368 3         8 push @output, $TAB . '- PRCP and SNOW are aggregated by sum().';
369 3         9 push @output, $TAB . '- SNWD is aggregated by max().';
370 3         6 push @output, ' 6. Decades begin on Jan 1 in calendar years ending in zero.';
371 3         6 push @output, ' 7. Seasonal decades/year/quarters begin Dec 1 of the previous calendar year.';
372            
373 3 100       22 return $return_list ? @output : tsv(\@output);
374             }
375            
376             =head2 get_hash_stats ( list => 0, no_header => 0 )
377            
378             Gets the hash sizes collected during the execution of StationTable
379             methods, notably load_stations and load_data, as tab-separated
380             lines of text.
381            
382             =over 4
383            
384             =item argument: list =>
385            
386             If the arguments include the 'list' keyword and a true value, then a
387             list is returned rather than tab-separated lines of text. Defaults
388             to false.
389            
390             =item argument: no_header =>
391            
392             If the arguments include the 'no_header' keyword and a true value,
393             then the return value will not include a header line. Default is
394             false.
395            
396             =back
397            
398             =cut
399            
400 4     4 1 2252 method get_hash_stats ( %args ) {
  4         11  
  4         13  
  4         8  
401 4   66     18 my $return_list = $args{list} // $_return_list;
402            
403 4         9 my @output;
404            
405 4         31 my @keys = sort keys %_hstats;
406            
407 4 100 66     27 if ( @keys and not $args{no_header} ) {
408 3         10 push @output, [ 'Hash sizes:' ];
409 3         12 push @output, [ qw(Hash Subject Iteration Size) ];
410             }
411            
412 4         14 foreach my $hash ( sort @keys ) {
413 16         49 foreach my $subject ( sort keys $_hstats{$hash}->%* ) {
414 20         54 foreach my $iter ( sort keys $_hstats{$hash}->{$subject}->%* ) {
415 20         56 my $sz = commify( $_hstats{$hash}->{$subject}->{$iter} );
416 20         80 push @output, [ $hash, $subject, $iter, $sz ];
417             }
418             }
419             }
420            
421 4 100       36 return $return_list ? @output : tsv(\@output);
422             }
423            
424             =head2 get_header ( list => 0 )
425            
426             The weather data obtained by the laod_data() method is essentially a
427             table. Which columns are returned depends on various options. For
428             example, if report => monthly is given, then the key columns will be
429             year and month -- no day. If the precip option is given, then
430             extra columns are included for precipitation values.
431            
432             This variabiliy makes it difficult for a consumer of these modules
433             to emit a heading that matches the underlying columns. The purpose of
434             this method is to return a set of column headings that will match
435             the data. The value returned is a tab-separated string.
436            
437             =over 4
438            
439             =item argument: list =>
440            
441             If the arguments include the 'list' keyword and a true value, then a
442             list is returned rather than tab-separated lines of text. Defaults
443             to false.
444            
445             =back
446            
447             =cut
448            
449 19     19 1 6070 method get_header ( %args ) {
  19         44  
  19         53  
  19         36  
450 19   66     84 my $return_list = $args{list} // $_return_list;
451            
452             # if this is a summary report, then alter the measure column labels
453 19 100       464 if ( not $Opt->report eq 'detail' ) {
454 4         113 foreach my $label ($_measures_obj->measures) {
455 12         44 $label =~ s{ \A TMAX }{TMAX max}xms;
456 12         32 $label =~ s{ \A TMIN }{TMIN min}xms;
457            
458 12         28 $label =~ s{ \A TAVG }{TAVG avg}xms;
459 12         30 $label =~ s{ \A Tavg }{Tavg avg}xms;
460            
461 12         24 $label =~ s{ \A PRCP }{PRCP sum}xms;
462 12         23 $label =~ s{ \A SNOW }{SNOW sum}xms;
463            
464 12         20 $label =~ s{ \A SNWD }{SNWD max}xms;
465            
466 12         30 $label =~ s{ \A A_ (\w+) }{$1 anom}xms;
467             }
468             }
469            
470 19   100     542 my $includes_month =
471             $Opt->report eq 'detail' ||
472             $Opt->report eq 'daily' ||
473             $Opt->report eq 'monthly';
474            
475            
476             # generate and print the header row
477 19         486 my @output;
478 19         60 push @output, 'Year';
479 19 100       61 push @output, 'Month' if $includes_month;
480 19 100 100     312 push @output, 'Day' if $Opt->report eq 'detail' or $Opt->report eq 'daily';
481 19         387 push @output, 'Decade';
482 19 100       68 push @output, 'S_Decade' if $includes_month;
483 19 100       60 push @output, 'S_Year' if $includes_month;
484 19 100       63 push @output, 'S_Qtr' if $includes_month;
485            
486 19         41 $_measure_begin_idx = @output;
487 19         68 push @output, $_measures_obj->measures;
488            
489 19 100       329 push @output, 'QFLAGS' if $Opt->report eq 'detail';
490 19 100       495 push @output, 'StationId' if $Opt->report eq 'detail';
491 19 100       559 push @output, 'Location' if $Opt->report eq 'detail';
492 19 100       504 push @output, 'StnIdx' if $Opt->report eq 'detail';
493 19 100       509 push @output, 'Grid' if $Opt->report eq 'detail';
494            
495 19 100       452 return $return_list ? @output : join $TAB, @output;
496             }
497            
498             =head2 get_missing_data_ranges( list => 0, no_header => 0 )
499            
500             Gets a list, by station id and year, of any months or day ranges
501             when data was found to be missing. Missing data can lead to incorrect
502             interpretation and can cause a station to be rejected if the percent
503             of found data does not meet the -quality threshold (normally 90%).
504            
505             Returns a heading line followed by lines of tab-separated strings.
506            
507             =over 4
508            
509             =item argument: list =>
510            
511             If the arguments include the 'list' keyword and a true value, then a
512             list of lists (stations containing years) is returned rather than
513             tab-separated lines of text. Defaults to false.
514            
515             =item argument: no_header =>
516            
517             If the arguments include the 'no_header' keyword and a true value,
518             then the return value will not include a header line. Default is
519             false.
520            
521             =item option: report
522            
523             Determines the number and content of heading values.
524            
525             =back
526            
527             =cut
528            
529 3     3 1 974 method get_missing_data_ranges ( %args ) {
  3         7  
  3         8  
  3         7  
530 3   66     18 my $return_list = $args{list} // $_return_list;
531            
532 3         6 my @output;
533            
534             push @output, ['Missing year, months and days by station id and year (for selected stations):']
535 3 50       17 unless $args{no_header};
536            
537 3         18 foreach my $stnid ( sort keys $_missing_href->%* ) {
538 5         10 my $stnobj = $_station{$stnid};
539 5 50       22 next if $stnobj->error_count > 0;
540 5         14 my $yyyy_href = $_missing_href->{$stnid};
541            
542 5         22 foreach my $yyyy ( sort keys $yyyy_href->%* ) {
543 9         17 my $values_href = $yyyy_href->{$yyyy};
544 9         22 foreach my $v ( keys $values_href->%* ) {
545 9         31 push @output, [ $stnid, $yyyy, $v ];
546             }
547             }
548             }
549            
550 3 100       23 return $return_list ? @output : tsv(\@output);
551             }
552            
553             =head2 datarow_as_hash ( $row_aref )
554            
555             This is a convenience method that may be used to convert table rows
556             returned by the row_sub callback subroutine of load_data from a perl
557             list into a hash. It automatically calls get_header to get the
558             headers for the table data. When you pass it a reference to a data
559             row (obtained vis the row_sub callback routine given to load_data)
560             it combines the elements of the data row list with the column headings
561             and returns a hash.
562            
563             =cut
564            
565 9     9 1 1844 method datarow_as_hash ( $row_aref ) {
  9         17  
  9         13  
  9         18  
566 9         30 my @header = $self->get_header( list => 1 );
567 9         19 my %h;
568            
569             ## no critic [ProhibitDoubleSigils]
570 9         94 @h{@header} = $row_aref->@*;
571            
572 9         121 return %h;
573             }
574            
575             =head2 get_missing_rows( list => 0 )
576            
577             In support of a -nogaps option, to generate detail output that does
578             not have any gaps due to missing data, this method gets a list of
579             rows for the months and days that had missing data for a given
580             station id in a given year.
581            
582             Returns lines of tab-separated strings.
583            
584             =over 4
585            
586             =item argument: list =>
587            
588             If the arguments include the 'list' keyword and a true value, then a
589             list is returned rather than tab-separated lines of text. Defaults
590             to false.
591            
592             =item option: nogaps
593            
594             Emits extra rows after the detail data rows to make up for missing
595             months or days. This is primarily so that if the data is charted
596             by date, then the x-axis will have all the dates from start to finish.
597             Otherwise, the chart and any trends that are projected on it will
598             be distorted by the missing data.
599            
600             =back
601            
602             =cut
603            
604 2     2 1 2742 method get_missing_rows ( %args ) {
  2         6  
  2         4  
  2         5  
605 2   33     22 my $return_list = $args{list} // $_return_list;
606            
607 2         4 my @output;
608            
609             my %loc;
610 2         7 map { $loc{$_} = $_station{$_}->name } keys %_station;
  4         13  
611            
612             ## no critic [ProhibitDoubleSigils]
613             ## no critic [ProhibitMagicNumbers]
614 2         9 foreach my $stnid ( sort keys $_missing_href->%* ) {
615 4         10 my $yyyy_href = $_missing_href->{$stnid};
616 4         13 foreach my $yyyy ( sort keys $yyyy_href->%* ) {
617 8         18 my $values_href = $yyyy_href->{$yyyy};
618 8         18 foreach my $v ( keys $values_href->%* ) {
619 8         21 my ($months_aref, $mmdd_aref) = _parse_missing_text($v);
620 8         31 foreach my $mm ( $months_aref->@* ) {
621 0         0 my $ndays = _days_in_month($yyyy, $mm);
622 0         0 foreach my $day (1..$ndays) {
623 0         0 push @output, [ $yyyy, $mm, $day, ($EMPTY) x 8, $stnid, $loc{$stnid} ];
624             }
625             }
626 8         17 foreach my $mmdd_aref ( $mmdd_aref->@* ) {
627 98         165 my ($mm,$dd) = $mmdd_aref->@*;
628 98         390 push @output, [ $yyyy, $mm, $dd, ($EMPTY) x 8, $stnid, $loc{$stnid} ];
629             }
630             }
631             }
632             }
633            
634 2 100       19 return $return_list ? @output : tsv(\@output);
635             }
636            
637             =head2 get_options ( list => 0, no_header => 0 )
638            
639             Get text which shows the options that were in effect for this
640             processing run, in a Getopt style. Includes a heading and a
641             footing with explanatory notes. If argument 'list' is true, returns
642             the lines as a list. Line [1] contains the options string.
643            
644             =over 4
645            
646             =item argument: list =>
647            
648             If the arguments include the 'list' keyword and a true value, then a
649             list is returned rather than tab-separated lines of text. Defaults
650             to false.
651            
652             =item argument: no_header =>
653            
654             If the arguments include the 'no_header' keyword and a true value,
655             then the return value will not include a header line or the explanatory
656             footing notes. Default is false.
657            
658             =back
659            
660             =cut
661            
662 5     5 1 3370 method get_options ( %args ) {
  5         13  
  5         12  
  5         8  
663 5   66     35 my $return_list = $args{list} // $_return_list;
664            
665 5         11 my @output;
666            
667             push @output, 'Options:'
668 5 50       26 unless $args{no_header};
669            
670 5         31 push @output, $TAB . $_ghcn_opt_obj->options_as_string;
671            
672 5 50       26 if ( not $args{no_header} ) {
673 5         11 push @output, $EMPTY;
674 5         21 push @output, $TAB . 'Note that quality is a percentage; radius in km';
675             }
676            
677 5 100       33 return $return_list ? @output : tsv(\@output);
678             }
679            
680             =head2 get_stations ( list => 0, kept => 1, no_header => 0 )
681            
682             Return lines of text with tab-separated columns describing each of
683             the stations for stations that were found to meet the filtering
684             criteria specified in the user options.
685            
686             =over 4
687            
688             =item argument: kept =>
689            
690             If the argument kept => 0 is specified, and load_data has already
691             been invoked, then the stations which were rejected due to quality flags
692             or missing data will be returned. If kept => 1 is specified, then
693             the stations that were kept will be returned.
694            
695             =item argument: list =>
696            
697             If the arguments include the 'list' keyword and a true value, then a
698             list is returned rather than tab-separated lines of text. Defaults
699             to false.
700            
701             =item argument: no_header =>
702            
703             If the arguments include the 'no_header' keyword and a true value,
704             then the return value will not include a header line. Default is
705             false.
706            
707             =back
708            
709             =cut
710            
711 15     15 1 123 method get_stations ( %args ) {
  15         38  
  15         81  
  15         33  
712 15   66     76 my $return_list = $args{list} // $_return_list;
713 15         39 my $kept = $args{kept};
714 15         42 my $no_header = $args{no_header};
715            
716 15         34 my @output;
717            
718             # no stations, so just return empty
719 15 50       70 return $return_list ? @output : tsv(\@output)
    100          
720             if $self->stn_filtered_count == 0;
721            
722 14 100       74 push @output, scalar Weather::GHCN::Station::Headings
723             unless $no_header;
724            
725 14         33 my $ii = 0;
726 14         1365 foreach my $id ( sort keys %_station ) {
727 1829         3430 my $stn = $_station{$id};
728 1829         2441 $ii++;
729 1829 100 100     5224 if (not defined $kept
      100        
      100        
      100        
730             or $kept and $stn->error_count == 0
731             or not $kept and $stn->error_count > 0)
732             {
733 46         157 push @output, scalar $stn->row;
734             }
735             }
736            
737 14 50       282 croak '*E* get_stations called before load_stations'
738             if $ii == 0;
739            
740 14 100       112 return $return_list ? @output : tsv(\@output);
741             }
742            
743             =head2 get_station_note_list ()
744            
745             Return a list consisting of tab-separated code/description pairs that
746             rejected stations were flagged with; i.e. the reasons for their
747             rejection.
748            
749             =cut
750            
751             # TODO: consider removing this as it is no longer used anywere
752 1     1 1 899 method get_station_note_list () {
  1         3  
  1         3  
753 1         4 my @stn_notes;
754            
755 1         7 my $notes_nrs = rng_new();
756            
757 1         6 foreach my $id ( sort keys %_station ) {
758 2         175 my $stn = $_station{$id};
759             # combine all the notes
760 2         9 $notes_nrs->add( $stn->note_nrs->as_array );
761             }
762            
763 1 50       123 if ( not $notes_nrs->is_empty ) {
764 1         12 foreach my $note_code ($notes_nrs->as_array) {
765 1         37 push @stn_notes, join $TAB, $note_code, $NoteMsg{$note_code};
766             }
767             }
768            
769 1         5 return @stn_notes;
770             }
771            
772             =head2 get_summary_data ( list => 0 )
773            
774             Gets a list of summarized the temperature or precipitation data
775             by day, month or year depending on the report option.
776            
777             Returns undef if the report option is 'detail'.
778            
779             The actual columns that are returned is dictated by the report option
780             and by the tavg and precip options provided when the object was
781             instantiated by new().
782            
783             =over 4
784            
785             =item argument: list =>
786            
787             If the arguments include the 'list' keyword and a true value, then a
788             list is returned rather than tab-separated lines of text. Defaults
789             to false.
790            
791             =item option: report
792            
793             Determines the level of summarization.
794            
795             =item option: range
796            
797             If the range option is provided, the output rows are restricted to
798             those years that are within the specified range(s).
799            
800             =back
801            
802             =cut
803            
804 6     6 1 66 method get_summary_data ( %args ) {
  6         19  
  6         20  
  6         14  
805 6   66     34 my $return_list = $args{list} // $_return_list;
806            
807             # when an 'detail' report is requested, we generate detail data only
808             # so there is no summary data to print
809 6 50       151 return if $Opt->report eq 'detail';
810            
811 6         101 my @output;
812            
813             # build hash of measure names and indices so measures can be
814             # inserted into the correct columns
815             my %measure_idx;
816 6         17 my $ii = 0;
817 6         41 foreach my $m ( $_measures_obj->measures ) {
818 21         57 $measure_idx{$m} = $_measure_begin_idx + $ii++;
819             }
820            
821 6         133 my $opt_range_nrs = rng_new($Opt->range);
822            
823             # generate and print the data rows
824 6         277 foreach my $key ( sort keys $_aggregate_href->%* ) {
825 433         1642 my ($year, $month, $day) = unpack 'A4 A2 A2', $key;
826            
827 433 50 33     8258 next if $Opt->range and not $opt_range_nrs->contains($year);
828            
829 433         14585 my $row = $_aggregate_href->{$key};
830            
831 433         609 my @row;
832 433         903 push @row, $year;
833 433 100 100     7199 push @row, $month if $Opt->report eq 'daily' or $Opt->report eq 'monthly';
834 433 100       11619 push @row, $day if $Opt->report eq 'daily';
835 433         4638 push @row, int($year / 10) * 10; ## no critic [ProhibitMagicNumbers]
836 433 100       6609 if ( not $Opt->report eq 'yearly' ) {
837 431         4443 push @row, _seasonal_decade($year, $month);
838 431         929 push @row, _seasonal_year($year, $month);
839 431         791 push @row, _seasonal_qtr($year, $month);
840             }
841            
842 433         1149 foreach my $elem ( $_measures_obj->measures ) {
843 1302         2893 my $sum = $row->{$elem}->[0];
844 1302         1927 my $cnt = $row->{$elem}->[1];
845 1302 100 50     6792 $row[ $measure_idx{$elem} ] = $cnt ? sprintf '%.2f', ($sum // 0) / $cnt : $EMPTY;
846             }
847            
848 433         1240 push @output, \@row;
849             }
850            
851 6 100       104 return $return_list ? @output : tsv(\@output);
852             }
853            
854            
855             =head2 get_timing_stats ( list => 0 )
856            
857             Get a list of the timers, with durations and notes, in alphabetical
858             order by timer label.
859            
860             =over 4
861            
862             =item argument: list =>
863            
864             If the arguments include the 'list' keyword and a true value, then a
865             list is returned rather than tab-separated lines of text. Defaults
866             to false.
867            
868             =back
869            
870             =cut
871            
872 3     3 1 794 method get_timing_stats ( %args ) {
  3         7  
  3         9  
  3         7  
873 3   66     14 my $return_list = $args{list} // $_return_list;
874            
875 3         7 my @output;
876            
877 3         13 foreach my $k ( $_tstats->get_timers() ) {
878 38   66     89 my $note = $_tstats->get_note($k) // $EMPTY;
879             ## no critic [ProhibitMagicNumbers]
880 38         87 my $dur = sprintf '%.1f', $_tstats->get_duration($k) * 1000.0;
881 38         86 my $label = $k . $SPACE . $note;
882             push @output, $args{rows_as_tsv}
883 38 50       137 ? [ $dur, $label ]
884             : join $TAB, $dur, $label;
885             }
886            
887 3 100       23 return $return_list ? @output : tsv(\@output);
888             }
889            
890             =head2 has_missing_data ()
891            
892             Returns true if any missing data was detected amongst the stations
893             that were processed. The calling script can use this to decide
894             whether to issue a warning to the user. A list of missing data
895             specifics can be sent to the output by calling method
896             get_missing_data_ranges.
897            
898             =cut
899            
900 4     4 1 24 method has_missing_data () {
  4         9  
  4         9  
901 4         16 my $keycount = ( keys $_missing_href->%* );
902 4 100       173 return $keycount ? $TRUE : $FALSE;
903             }
904            
905             =head2 load_data ( progress_sub => undef, row_sub => sub { say @_ } )
906            
907             Load the daily weather data for each of the stations that are were
908             loaded into the collection. Print the data if option report detail is
909             given. Otherwise cache the data so it can be aggregated at a later
910             step.
911            
912             =over 4
913            
914             =item argument: progress_sub => undef
915            
916             As fetching and parsing each daily data page can take some time, an
917             optional callback hook is provided so the caller can emit a
918             progress message before each station's data is loaded; e.g.
919             progress => sub{ say {STDERR} @_ }.
920            
921             =item argument: row_sub => sub { say @_ }
922            
923             Optional callback hook to allow the caller to provide their own
924             subroutine for printing (or collecting in a list, or both) the
925             row-level station data that is fetched when the report option is 'detail'.
926             Defaults to printing via the 'say' operator.
927            
928             =item option: report
929            
930             When report detail is specified, the weather data for each station is
931             printed immediately (via the row_sub callback hook).
932            
933             For all other report options, the data is fetched from each station
934             and kept in a cache so that it can be aggregated by invoking
935             summarize_data(). The row_sub hook is not invoked.
936            
937             =back
938            
939             =cut
940            
941 12     12 1 7798 method load_data ( %args ) {
  12         34  
  12         41  
  12         29  
942 12         39 my $progress_callback = $args{progress_sub};
943 12         35 my $row_callback = $args{row_sub};
944            
945             my @station_objs =
946 3         22 sort { $a->id cmp $b->id }
947 12         47 grep { $_->error_count == 0 } values %_station;
  15         63  
948            
949 12         45 my $stn_count = @station_objs;
950            
951 12         44 my $ii = 0;
952 12         49 foreach my $stn ( @station_objs ) {
953 15         73 my $daily_url = $GHCN_DATA . $stn->id . '.dly';
954 15         99 my $content = $self->_fetch_url($daily_url, 'URI::Fetch_daily');
955            
956 15 100       86 if ($progress_callback) {
957 3     3   30 no strict 'refs'; ## no critic [ProhibitNoStrict]
  3         7  
  3         46022  
958 4         23 my $msg = sprintf 'processing station %d/%d %s %s', ++$ii, $stn_count, $stn->id, $stn->name;;
959 4         23 $progress_callback->($msg);
960             }
961            
962 15 50       77 if (not $content) {
963 0         0 $stn->add_note($ERR_FETCH, $stn->id . $TAB . 'fetch daily URL failed');
964 0         0 next;
965             }
966            
967 15         80 my $insufficient_quality = $self->_load_daily_data($stn, $content);
968            
969 15 100       349 if ( $Opt->report eq 'detail' ) {
970 4         100 $self->_print_detail_data( $_measure_begin_idx, $stn, $row_callback );
971             } else {
972 11 100       264 $self->_aggregate_station_data($stn)
973             unless $insufficient_quality;
974             }
975            
976 15         109 $self->_clear_daily_data();
977            
978 15         114 $self->_capture_data_hash_stats($stn->id, $ii);
979             }
980            
981 12         77 return;
982             }
983            
984             =head2 load_stations ()
985            
986             Read the GHCN stations list and the stations inventory list and create
987             a hash of Station objects, keyed on station id, filtered according
988             to the options provided in set_options().
989            
990             Returns a hash of Weather::GHCN::Station objects, keyed on station id.
991            
992             =over 4
993            
994             =item option: country
995            
996             Selects only those stations that match the 2-digit GEC (formerly
997             FIPS) country code or that uniquely match the name or partial name
998             given in .
999            
1000             =item option: state
1001            
1002             Selects only those stations that match a US state or Canadian provinc
1003             code.
1004            
1005             =item option: location
1006            
1007             Selects only those stations with a name that matches the specified
1008             pattern, which can be either a station id, or a comma-separated
1009             list of station id's, or a regex. If a regex, then it is anchored
1010             on the left and whitespace is NOT ignored.
1011            
1012             =item option: gps
1013            
1014             This option selects stations within a certain radius of the designated
1015             latitude and longitude, expressed as positive and negative numbers
1016             (not using N, S, W, E designators).
1017            
1018             =item option: radius
1019            
1020             In conjunction the gps options, determines the radius in kilometers
1021             for the search area. Defaults to 25 km.
1022            
1023             =item option: gsn
1024            
1025             Select only GCOS Surface Network stations, which is a baseline
1026             network comprising a subset of about 1000 stations chosen mainly to
1027             give a fairly uniform spatial coverage from places where there is a
1028             good length and quality of data record. See
1029             L
1030             cos-surface-network-gsn-program-overview>
1031            
1032             =back
1033            
1034             =cut
1035            
1036 19     19 1 13758 method load_stations () {
  19         53  
  19         35  
1037            
1038 19         103 my $stations_content = $self->_fetch_url( $GHCN_STN_LIST_URL, 'URI::Fetch_stn');
1039            
1040 19 50       22691 if ( $stations_content =~ m{(.*?)}xms ) {
1041 0         0 croak '*E* unable to fetch data from ' . $GHCN_STN_LIST_URL . ': ' . $1;
1042             }
1043            
1044             ## no critic [InputOutput::RequireBriefOpen]
1045 19 50   2   351208 open my $stn_fh, '<', \$stations_content
  2         28  
  2         5  
  2         20  
1046             or croak '*E* unable to open stations_content string';
1047            
1048 19         42621 $_tstats->start('Parse_stn');
1049            
1050 19 100       129 my $is_stnid_filter = keys $_stnid_filter_href->%*
1051             if $_stnid_filter_href;
1052            
1053 19         69 my %stnidx;
1054            
1055             # Scan the station table
1056             # - filtering on country, state, location and GIS distance according to options
1057 19         270 while ( my $line = <$stn_fh> ) {
1058 2318893         13225552 $_stn_count++; # increment the stn count in the object
1059            
1060             # |--- 0---|--- 10---|--- 20---|--- 30---|--- 40---|--- 50---|--- 60---
1061             # |123456789|123456789|123456789|123456789|123456789|123456789|123456789
1062             # (stationid).(latitu).(longitu).(elev).st.--name-----------------------
1063             # ACW00011604 17.1167 -61.7833 10.1 ST JOHNS COOLIDGE FLD
1064            
1065             ## no critic [ProhibitDoubleSigils]
1066             ## no critic [ProhibitMagicNumbers]
1067 2318893         4144469 my $id = substr $line, 0, 11;
1068            
1069 2318893 100 100     5360200 next if $is_stnid_filter and not $_stnid_filter_href->{$id};
1070            
1071 1952758         5007268 my $lat = 0 + substr $line, 12, 8; # coerce to number
1072 1952758         3810323 my $long = 0 + substr $line, 21, 9; # coerce to number
1073 1952758         3755601 my $elev = 0 + substr $line, 31, 6; # coerce to number
1074 1952758         3173635 my $state = substr $line, 38, 2;
1075 1952758         3160412 my $name = substr $line, 41, 30;
1076 1952758         2911928 my $gsn_flag = substr $line, 72, 3;
1077             # my $hcr_crn_flag = substr $line, 76, 3;
1078             # my $wmo_id = substr $line, 80, 5;
1079            
1080 1952758         2803075 my $country = substr $id, 0, 2;
1081 1952758         7041239 $name =~ s{ \s+ \Z }{}xms;
1082            
1083 1952758 100       4480259 my $gsn = $gsn_flag eq 'GSN' ? 'GSN' : $EMPTY;
1084            
1085             ## use critic [ProhibitMagicNumbers]
1086            
1087 1952758 100       3817081 if (not $is_stnid_filter) {
1088             ## no critic [RequireExtendedFormatting]
1089 1952752         33234700 my $opt_country = $Opt->country;
1090 1952752 100 100     45409551 next if $Opt->country and $country !~ m{\A$opt_country}msi;
1091            
1092 1685072         40334868 my $opt_state = $Opt->state;
1093 1685072 100 100     38165896 next if $Opt->state and $state !~ m{\A$opt_state}msi;
1094            
1095             ## use critic [RequireExtendedFormatting]
1096 1351232 100 100     31222010 next if $Opt->location and not _match_location($id, $name, $Opt->location);
1097            
1098 123845 100       3019537 if ( $Opt->gps ) {
1099 122047         2885924 my ($opt_lat, $opt_long) = split m{[,;\s]}xms, $Opt->gps;
1100 122047         1456233 my $distance = _gis_distance($opt_lat, $opt_long, $lat, $long);
1101 122047 100       2197611 next if $distance > $Opt->radius;
1102             }
1103            
1104 1818 50 33     43966 next if $Opt->gsn and not $gsn;
1105             }
1106            
1107 1824         28669 $_station{$id} = Weather::GHCN::Station->new(
1108             id => $id,
1109             country => $country,
1110             state => $state,
1111             active => $EMPTY,
1112             lat => $lat,
1113             long => $long,
1114             elev => $elev,
1115             name => $name,
1116             gsn => $gsn
1117             );
1118            
1119 1824         6615 $stnidx{$_station{$id}->coordinates}++;
1120             }
1121 19 50       334 close $stn_fh or croak '*E* unable to close stations_content string';
1122            
1123 19         186 $_tstats->stop('Parse_stn');
1124            
1125 19         103 $_stn_selected_count = keys %_station;
1126            
1127             # assign a unique index to each station with matching coordinates
1128 19         43 my $ii = 0;
1129 19         1446 foreach my $coord (sort keys %stnidx) {
1130 1805         3129 $stnidx{$coord} = ++$ii;
1131             }
1132            
1133 19         1320 foreach my $stnid ( sort keys %_station ) {
1134 1824         2905 my $stn = $_station{$stnid};
1135 1824         3845 $stn->idx = $stnidx{$stn->coordinates};
1136             }
1137            
1138 19         243 $_stn_filtered_count = $self->_load_station_inventories();
1139            
1140 19         844 return \%_station;
1141             }
1142            
1143             =head2 report_kml( list => 0 )
1144            
1145             Output the coordinates of the station collection in KML format, for
1146             import into Google Earth as placemarks. The active range of each
1147             station will be included as timespans so that you can view the
1148             placemarks across time.
1149            
1150             =over 4
1151            
1152             =item argument: list
1153            
1154             If the argument list contains the 'list' keyword and a true value,
1155             then a perl list is returned. Otherwise, a string consisting of lines
1156             of text is returned.
1157            
1158             =item option: kml
1159            
1160             Print KML on stdout.
1161            
1162             =item option: kmlcolor
1163            
1164             A color name, one of blue, green, azure, purple, red, white or yellow.
1165             Only the first character is recognized, so 'b' and 'bob' both result
1166             in blue. All colors are given an opacity of 50 (the range is 00 to ff).
1167            
1168             =back
1169            
1170             =cut
1171            
1172 2     2 1 1295 method report_kml ( %arg ) {
  2         5  
  2         6  
  2         4  
1173 2   66     12 my $return_list = $arg{list} // $_return_list;
1174            
1175 2         50 my $kml_color = _get_kml_color( $Opt->kmlcolor );
1176 2         7 my @output;
1177            
1178 2         9 push @output, '';
1179 2         6 push @output, '';
1180 2         5 push @output, '';
1181 2         6 push @output, ' ';
1190            
1191 2         8 foreach my $stn ( values %_station ) {
1192 13 100       36 next if $stn->error_count;
1193             # TODO: use ->sets to get a list of spans and use the first span instead of splitting run_list
1194 4         24 my ($start, $end) = split m{ [-] }xms, $stn->active;
1195            
1196 4         20 my $desc = $stn->description();
1197            
1198 4         14 push @output, ' ';
1199 4         10 push @output, ' #mypin';
1200 4         18 push @output, sprintf ' %s', encode_entities($stn->name);
1201 4         97 push @output, sprintf ' %s', encode_entities($desc);
1202 4         68 push @output, ' ';
1203 4         23 push @output, sprintf ' %s-01-01T00:00:00Z', $start;
1204 4         141 push @output, sprintf ' %s-12-31T23:59:59Z', $end;
1205 4         14 push @output, ' ';
1206 4         18 push @output, sprintf ' %f, %f, %f', $stn->long, $stn->lat, $stn->elev;
1207 4         18 push @output, ' ';
1208             }
1209            
1210 2         8 push @output, '';
1211 2         6 push @output, '';
1212            
1213 2 100       24 return $return_list ? @output : tsv(\@output);
1214             }
1215            
1216             =head2 report_urls( list => 0, curl => 0 )
1217            
1218             Output the URL of the .dly (daily weather data) file for each of the
1219             stations that meet the selection criteria.
1220            
1221             =over 4
1222            
1223             =item argument: list
1224            
1225             If the argument list contains the 'list' keyword and a true value,
1226             then a perl list is returned. Otherwise, a string consisting of lines
1227             of text is returned.
1228            
1229             =item argument: curl
1230            
1231             If the argument list contains the 'curl' keyword and a true value,
1232             then the output will be a set of lines that can be saved in a file
1233             for subsequent input to the B program using the B<-K> option.
1234             This facilitates bulk fetching of .dly files into the cache.
1235            
1236             =back
1237            
1238             =cut
1239            
1240 0     0 1 0 method report_urls ( %arg ) {
  0         0  
  0         0  
  0         0  
1241 0   0     0 my $return_list = $arg{list} // $_return_list;
1242            
1243 0         0 my @output;
1244            
1245             push @output, "# Use curl -K to download these URL's"
1246 0 0       0 if $arg{curl};
1247            
1248 0         0 foreach my $stn ( values %_station ) {
1249 0 0       0 next if $stn->error_count;
1250             # TODO: use ->sets to get a list of spans and use the first span instead of splitting run_list
1251 0         0 my ($start, $end) = split m{ [-] }xms, $stn->active;
1252            
1253 0 0       0 if ( $arg{curl} ) {
1254 0         0 my @parts = split '/', $stn->url;
1255 0         0 push @output, 'output = ' . $parts[-1];
1256 0         0 push @output, 'url = ' . $stn->url;
1257             } else {
1258 0         0 push @output, $stn->url;
1259             }
1260             }
1261            
1262 0 0       0 return $return_list ? @output : tsv(\@output);
1263             }
1264            
1265             =head2 ($opt, @errors) = set_options ( %args )
1266            
1267             Set various options for this StationTable instance. These options
1268             will affect the processing and output by subsequent method calls.
1269            
1270             Returns an Option object and a list of errors. It is advised you
1271             check @errors after calling set_options to report the errors and to
1272             cease processing if there are any; e.g. I.
1273            
1274             You may want to set up a file-scoped lexical variable to hold the
1275             options object. That way it is accessible throughout your code.
1276             The typical calling pattern would look like this:
1277            
1278             my $Opt; # a file-scope lexical
1279            
1280             sub run (@ARGV) {
1281             my $ghcn = Weather::GHCN::StationTable->new;
1282            
1283             my @errors;
1284             ($Opt, @errors) = set_options(...);
1285             die @errors if @errors;
1286             ...
1287             }
1288            
1289             =over 4
1290            
1291             =item timing_stats => $TimingStats_obj
1292            
1293             This optional argument should point to a TimingStats object that was
1294             created by the caller and will be used to collect timing statistics.
1295            
1296             =item hash_stats => \%hash_stats
1297            
1298             This optional argument should be a reference to a hash that was
1299             created by the caller and will be used to collect performance and
1300             memory statistics.
1301            
1302             =back
1303            
1304             =cut
1305            
1306 22     22 1 9503 method set_options (%user_options) {
  22         64  
  22         149  
  22         45  
1307            
1308 22 100       134 if ( defined $user_options{'profile'} ) {
1309             # save the expanded profile file path in the object
1310 7         47 $_profile_file = path( $user_options{'profile'} )->absolute()->stringify;
1311 7         568 $_profile_href = _get_profile_options($_profile_file);
1312             }
1313            
1314 22   66     265 $_ghcn_opt_obj //= Weather::GHCN::Options->new();
1315             # combine user-specified options with the defaults
1316 22         132 ($_opt_href, $_opt_obj) = $_ghcn_opt_obj->combine_options(\%user_options, $_profile_href);
1317            
1318 22 50       113 if ( $_opt_href->{'cachedir'} ) {
1319 22         171 $_cachedir = path( $_opt_href->{'cachedir'} )->absolute()->stringify;
1320 22         2828 $_cache_obj = Weather::GHCN::CacheURI->new($_cachedir, $_opt_obj->refresh);
1321             } else {
1322 0         0 $_cache_obj = Weather::GHCN::CacheURI->new($EMPTY, $_opt_obj->refresh);
1323             }
1324            
1325            
1326 22         168 my @errors = $_ghcn_opt_obj->validate();
1327            
1328 22 100 100     168 if ( defined $_opt_href->{'aliases'} and defined $_opt_href->{'location'} ) {
1329             # if the location matches an aliases, then pull the list of
1330             # stations from the alias definition and assign it to the
1331             # $_stnid_filter
1332 6   66     35 my $stnid_string = $_opt_href->{'aliases'}->{ $_opt_href->{'location'} } // $EMPTY;
1333 6 100       21 if ($stnid_string) {
1334 4         31 my @stns = split m{ [,;\s] }xms, $stnid_string;
1335 4         9 my %stnid_filter;
1336 4         17 foreach my $stnid (@stns) {
1337 8         21 $stnid_filter{$stnid} = $TRUE;
1338             }
1339 4         11 $_stnid_filter_href = \%stnid_filter;
1340 4         15 $_opt_href->{'location'} = undef;
1341             }
1342             }
1343            
1344             # update the combined options hash in the Options object
1345 22         113 $_ghcn_opt_obj->opt_href = $_opt_href;
1346             # update the combined options object in the Options object
1347 22         97 $_ghcn_opt_obj->opt_obj = $_opt_obj;
1348             # save the combined option object in a file-scoped lexical for use throughout this code
1349 22         118 $Opt = $_opt_obj;
1350            
1351 22         235 $_measures_obj = Weather::GHCN::Measures->new($_opt_href);
1352            
1353 22         146 return ($_opt_obj, @errors);
1354             }
1355            
1356             =head2 summarize_data ()
1357            
1358             Aggregate the daily weather data for the stations that were loaded,
1359             according to the report option.
1360            
1361             =over 4
1362            
1363             =item option: report => 'daily|monthly|yearly'
1364            
1365             When the report option is 'detail', no summarization is needed and the
1366             method immediately returns undef.
1367            
1368             =back
1369            
1370            
1371             =cut
1372            
1373 8     8 1 63 method summarize_data () {
  8         19  
  8         18  
1374            
1375             # when an 'detail' report is requested, we generate detail data only
1376             # so there is no need to summarize data.
1377 8 50       202 return if $Opt->report eq 'detail';
1378            
1379             # We'll be replacing $_aggregate_href with this hash after we're
1380             # done, but we can't loop over $_aggregate_href and be changing
1381             # it within the loop. Hence the need for another hash.
1382 8         147 my %summary;
1383            
1384 8         40 $_tstats->start('Summarize_data');
1385            
1386 8         60 while ( my ($k,$href) = each $_aggregate_href->%* ) {
1387             ## no critic [ProhibitMagicNumbers]
1388 3311         6921 my $year = substr $k, 0, 4;
1389 3311         5044 my $month = substr $k, 4, 2;
1390 3311         4952 my $day = substr $k, 6, 2;
1391             ## use critic [ProhibitMagicNumbers]
1392            
1393 3311         4735 my $key = $year;
1394 3311 100 100     63221 $key .= $month if $Opt->report eq 'monthly' or $Opt->report eq 'daily';
1395 3311 100       133227 $key .= $day if $Opt->report eq 'daily';
1396            
1397 3311         38350 foreach my $elem ( keys $href->%* ) {
1398 9989         18541 my $a = $_aggregate_href->{$k}->{$elem};
1399            
1400 9989         21242 my $v = _ddivide( $a->[0], $a->[1] );
1401            
1402 9989   100     25131 my $s = $summary{$key}{$elem} //= [undef, undef];
1403            
1404 9989 100 66     31734 if ($elem eq 'TMIN') {
    100 100        
    100          
1405             # for TMIN we keep the minimum value
1406 3270         6159 $s->[0] = _dmin($s->[0], $v);
1407 3270         9969 $s->[1] = 1.0;
1408             }
1409             elsif ( $elem eq 'TMAX' or $elem eq 'SNWD' ) {
1410             # For TMAX SNOW SNWD PRCP we keep the maximum value
1411 3293         6107 $s->[0] = _dmax($s->[0], $v);
1412 3293         8483 $s->[1] = 1.0;
1413             }
1414             elsif ( $elem eq 'PRCP' or $elem eq 'SNOW' ) {
1415             # for PRCP and SNOW, sum and use a count of 1 so we sum across time
1416 174         332 $s->[0] = _dsum($s->[0], $v);
1417 174         467 $s->[1] = 1.0;
1418             }
1419             else {
1420             # for TAVG and Tavg and the anomaly values we keep the sum and count,
1421             # so we can calculate an average of them across time
1422 3252         6334 $s->[0] = _dsum($s->[0], $v);
1423 3252         6442 $s->[1] = _dcount($s->[1], $v);
1424             }
1425             }
1426             }
1427            
1428 8         216 $_tstats->stop('Summarize_data', _memsize(\%summary, $Opt->performance));
1429            
1430 8         3088 $_aggregate_href = \%summary;
1431            
1432 8         141 return;
1433             }
1434            
1435             =head2 tstats ()
1436            
1437             Provides access to the TimingStats object so the caller can start
1438             and stop script-level timers.
1439            
1440             =cut
1441            
1442 18     18 1 781 method tstats () {
  18         35  
  18         30  
1443 18         89 return $_tstats;
1444             }
1445            
1446             =head2 DOES
1447            
1448             Defined by Object::Pad. Included for POD::Coverage.
1449            
1450             =head2 META
1451            
1452             Defined by Object::Pad. Included for POD::Coverage.
1453            
1454             =cut
1455            
1456             #---------------------------------------------------------------------
1457             # Private Methods
1458             #---------------------------------------------------------------------
1459            
1460 10     10   46 method _aggregate_station_data ($stn) {
  10         25  
  10         30  
  10         21  
1461            
1462 10         45 $_tstats->start('Aggregate_station_data');
1463            
1464            
1465 10         72 while ( my ($yyyymmdd,$href) = each $_daily_href->%* ) {
1466 3997         11214 while ( my ($elem,$v) = each $href->%* ) {
1467             # autovivify the slot, initialize the array, and get a reference to the
1468             # slot to avoid the overhead of multiple key lookups later
1469 11953   100     40579 my $slot_href = $_aggregate_href->{$yyyymmdd}->{$elem} //= [undef, undef];
1470            
1471             # We accumulate into two array elements. The first contains
1472             # the sum and the second the count so we can compute avg = sum/count.
1473             # _dsum and _dcount return the second arg if the first arg is undef
1474             # otherwise, _dsum returns the sum of the two args and _dcount returns the
1475             # value of the first arg incremented by 1
1476 11953         21238 $slot_href->[0] = _dsum($slot_href->[0], $v);
1477 11953         19556 $slot_href->[1] = _dcount($slot_href->[1], $v);
1478             }
1479             }
1480            
1481 10         470 $_tstats->stop('Aggregate_station_data', _memsize($_aggregate_href, $Opt->performance));
1482            
1483 10         38 return;
1484             }
1485            
1486 15     15   65 method _capture_data_hash_stats ($subject, $iter) {
  15         32  
  15         35  
  15         40  
  15         25  
1487            
1488 15         4762 $_hstats{'aggregate'}{$subject}{$iter} = Devel::Size::total_size( $_aggregate_href );
1489 15         343 $_hstats{'flag_cnts'}{$subject}{$iter} = Devel::Size::total_size( $_flag_cnts_href );
1490 15         157 $_hstats{'daily'}{$subject}{$iter} = Devel::Size::total_size( $_daily_href );
1491 15         152 $_hstats{'baseline'}{$subject}{$iter} = Devel::Size::total_size( $_baseline_href );
1492            
1493 15         90 return;
1494             }
1495            
1496 15     15   59 method _clear_daily_data () {
  15         30  
  15         36  
1497            
1498 15         2307 $_daily_href = { };
1499            
1500 15         52 return;
1501             }
1502            
1503 1     1   4 method _compute_anomalies () {
  1         3  
  1         4  
1504            
1505 1         5 $_tstats->start('Compute_anomalies');
1506            
1507 1         10 while ( my ($k,$href) = each $_daily_href->%* ) {
1508 266         659 my ($year,$month,$day) = unpack 'A4 A2 A2', $k;
1509            
1510 266         519 my $base_href = $_baseline_href->{ $month . $day };
1511            
1512 266         652 foreach my $elem ( keys $href->%* ) {
1513 784 50       1358 next if not exists $base_href->{$elem};
1514 784         1988 $href->{'A_' . $elem} = $href->{$elem} - $base_href->{$elem};
1515             }
1516             }
1517            
1518 1         8 $_tstats->stop('Compute_anomalies');
1519            
1520 1         4 return;
1521             }
1522            
1523 1     1   22 method _compute_baseline ($stn, $baseline) {
  1         4  
  1         2  
  1         3  
  1         2  
1524            
1525 1         7 $_tstats->start('Compute_baseline');
1526            
1527 1         6 my $baseline_nrs = rng_new($baseline);
1528            
1529 1         6 my %baseline_data;
1530            
1531 1         6 my $gap_nrs = rng_new( $baseline_nrs->as_string );
1532            
1533 1         9 while ( my ($k,$href) = each $_daily_href->%* ) {
1534 266         6588 my ($year,$month,$day) = unpack 'A4 A2 A2', $k;
1535            
1536 266 100       559 next unless $baseline_nrs->contains($year);
1537            
1538 4 50       98 $gap_nrs->remove($year) if $baseline_nrs->contains($year);
1539            
1540 4         569 while ( my ($elem,$v) = each $href->%* ) {
1541 12   50     42 $baseline_data{ $month . $day }->{$elem}->[0] += $v // 0;
1542 12         48 $baseline_data{ $month . $day }->{$elem}->[1] ++;
1543             }
1544             }
1545            
1546 1 50       31 if ($gap_nrs->cardinality > 0) {
1547 0         0 my $msg = sprintf "%s\tanomalies not calculated\tmissing %d years from the baseline (%s)",
1548             $stn->id, $gap_nrs->cardinality, $gap_nrs->as_string;
1549 0         0 $stn->add_note($WARN_ANOM, $msg);
1550 0         0 $_baseline_href = { };
1551 0         0 return;
1552             }
1553            
1554             ## no critic [ProhibitDoubleSigils]
1555 1         32 while ( my ($md,$elem_href) = each %baseline_data ) {
1556 2         10 while ( my ($elem, $aref) = each $elem_href->%* ) {
1557 6         15 my ($sum, $count) = $aref->@*;
1558 6         23 $baseline_data{$md}->{$elem} = $sum / $count;
1559             }
1560             }
1561            
1562 1         5 $_baseline_href = \%baseline_data;
1563            
1564 1         7 $_tstats->stop('Compute_baseline');
1565            
1566 1         5 return;
1567             }
1568            
1569 15     15   52 method _compute_mean_and_counts ($stn) {
  15         32  
  15         35  
  15         29  
1570            
1571 15         61 $_tstats->start('Compute_mean');
1572            
1573             # In this subroutine we loop through the daily min and max values that
1574             # were captured from the daily page and replace them with the calculated
1575             # mean. We replace in order to save on the memory and performance overhead
1576             # of creating a new hash table. Because the daily page has separate rows for
1577             # max and min, we have to scan the entire page in order to collect the max and
1578             # min for each day. It's possible there may be a day with one or both entries
1579             # missing, in which case we undef the corresponding entry since it's impossible
1580             # to calculate the mean in that case.
1581            
1582 15         121 while ( my ($yyyymmdd, $href) = each $_daily_href->%* ) {
1583            
1584 5426         9587 my $max = $href->{TMAX};
1585 5426         7976 my $min = $href->{TMIN};
1586            
1587 5426 100 100     14442 if (defined $max and defined $min) {
1588 5290         17065 $href->{Tavg} = ($max + $min) / 2.0;
1589             }
1590             }
1591            
1592 15         106 $_tstats->stop('Compute_mean');
1593            
1594 15         42 return;
1595             }
1596            
1597 16     16   1876 method _compute_quality ($stn, $context_msg, $day_count, $range, $quality) {
  16         47  
  16         36  
  16         41  
  16         40  
  16         36  
  16         42  
  16         27  
1598            
1599 16         82 $_tstats->start('Compute_quality');
1600            
1601 16         44 my $expected_days = 0;
1602 16         47 my $insufficient_quality = 0;
1603            
1604 16         62 map { $expected_days += _days_in_year($_) } rng_new($range)->as_array();
  24         669  
1605            
1606             ## no critic [ProhibitMagicNumbers]
1607 16         122 my $data_quality = int(($day_count / $expected_days) * 1000 + 0.5) / 10.0;
1608            
1609 16 100       63 if ( $data_quality < $quality ) {
1610 1         6 my $msg = sprintf "%s\tinsufficient data\tstation only has %d days in %s and needs %d (%0.1f%% < %d%%)",
1611             $stn->id, $day_count, $context_msg, $expected_days, $data_quality, $quality;
1612 1         8 $stn->add_note($ERR_INSUFF, $msg);
1613 1         3 $insufficient_quality++;
1614             }
1615            
1616 16         71 $_tstats->stop('Compute_quality');
1617            
1618 16         409 return $insufficient_quality;
1619             }
1620            
1621 53     53   204 method _fetch_url ($url, $timer_label=$EMPTY) {
  53         134  
  53         151  
  53         149  
  53         93  
1622            
1623 53 50       429 $_tstats->start($timer_label) if $timer_label;
1624            
1625 53         444 my ($from_cache, $content) = $_cache_obj->fetch($url);
1626            
1627 53 50       332 carp '*I* cache refresh is set to ' . $_cache_obj->refresh
1628             unless $content;
1629 53 50       233 croak '*E* unable to fetch data from ' . $url
1630             unless $content;
1631            
1632 53 50       676 $_tstats->stop($timer_label) if $timer_label;
1633            
1634 53         16308 return $content;
1635             }
1636            
1637            
1638 19     19   101 method _filter_stations ($stations_href) {
  19         46  
  19         39  
  19         52  
1639 19         128 $_tstats->start('Filter_stn');
1640            
1641 19         1338 my $opt_range_nrs = rng_new( $Opt->range );
1642 19         510 my $opt_active_nrs = rng_new( $Opt->active );
1643            
1644             ## no critic [ProhibitDoubleSigils]
1645 19         237 foreach my $stn (values $stations_href->%*) {
1646 1824         25335 my $stn_active_nrs = rng_new( $stn->active );
1647            
1648             ##debug '=== station ', $stn->id, ' ', $stn->name;
1649            
1650 1824 100       5997 $stn->add_note($ERR_METRICS)
1651             if not $stn->elems_href->%*;
1652            
1653 1824 50 66     40026 $stn->add_note($ERR_RANGE_FULL)
1654             if $Opt->range and not $opt_range_nrs->subset($stn_active_nrs);
1655            
1656 1824 50 66     55255 $stn->add_note($ERR_NO_GIS)
1657             if $Opt->gps and $stn->coordinates eq $EMPTY;
1658            
1659 1824 100 100     44391 $stn->add_note($ERR_NOACTIVE)
1660             if $Opt->active and not $stn->active;
1661            
1662 1824 100       4549 if ($stn->active) {
1663 617 100       10180 if ($Opt->partial) {
1664 548         6620 my $s = $opt_active_nrs->intersection($stn_active_nrs);
1665            
1666 548 100 66     228796 $stn->add_note($ERR_NOT_PARTIAL)
1667             if $Opt->active and $s->is_empty;
1668             } else {
1669 69 100 100     1767 $stn->add_note($ERR_NOT_ACTIVE)
1670             if $Opt->active and not $opt_active_nrs->subset($stn_active_nrs);
1671             }
1672            
1673             } else {
1674 1207 50       24410 $stn->add_note($ERR_NOACTIVE)
1675             if $Opt->range;
1676             }
1677             }
1678            
1679 19         1204 $_tstats->stop('Filter_stn');
1680            
1681 19         271 my $count = grep { $_->error_count == 0 } values $stations_href->%*;
  1824         3867  
1682            
1683 19         95 return $count;
1684             }
1685            
1686 15     15   59 method _initialize_flag_cnts () {
  15         45  
  15         33  
1687            
1688             # initialize KEPT flags for all metrics, so all get printed
1689 15         93 foreach my $elem ( $_measures_obj->measures ) {
1690 55   100     271 $_flag_cnts_href->{$elem}->{KEPT} //= 0;
1691             }
1692            
1693 15         54 return;
1694             }
1695            
1696 15     15   59 method _load_daily_data ($stn, $stn_content) {
  15         32  
  15         35  
  15         50  
  15         32  
1697            
1698 15         42 my %gaps;
1699             my %baseline_days;
1700            
1701 15         497 my $opt_range_nrs = rng_new($Opt->range);
1702 15         419 my $opt_baseline_nrs = rng_new($Opt->baseline);
1703 15         402 my $opt_fmonth_nrs = rng_new($Opt->fmonth);
1704 15         401 my $opt_fday_nrs = rng_new($Opt->fday);
1705            
1706 15         155 $self->_initialize_flag_cnts();
1707            
1708             ## no critic [ProhibitDoubleSigils]
1709             ## no critic [ProhibitMagicNumbers]
1710            
1711 15         86 $_tstats->start('Load_daily_data');
1712            
1713             ## no critic [InputOutput::RequireBriefOpen]
1714 15 50       50638 open my $stn_fh, '<', \$stn_content
1715             or croak '*E* unable to open station daily content';
1716            
1717 15         181 while ( my $line = <$stn_fh> ) {
1718            
1719             # |--- 0---|--- 10---|--- 20---|--- 30---|--- 40---|--- 50---|
1720             # |123456789|123456789|123456789|123456789|123456789|123456789|
1721             # iiiiiiiiiiiyyyymmeeee(dddddd)(dddddd)(dddddd)(dddddd)(dddddd) ...
1722             # CA006105976188911TMAX 39 C 122 C 144 C 83 C 28 C ...
1723            
1724 91191         1430253 my $id = substr $line, 0, 11;
1725 91191         176455 my $year = 0 + substr $line, 11, 4; # coerce to number
1726 91191         145704 my $month = 0 + substr $line, 15, 2; # coerce to number
1727 91191         144193 my $element = substr $line, 17, 4;
1728 91191         138438 my $data = substr $line, 21 ;
1729            
1730 91191 100       192990 next unless $element =~ $_measures_obj->re;
1731            
1732 47761 100 100     827434 next if $Opt->fmonth and not $opt_fmonth_nrs->contains($month);
1733            
1734 34836 100       880702 my $need_baseline = $Opt->anomalies and $opt_baseline_nrs->contains($year);
1735 34836 50       835587 if ( $Opt->range ) {
1736 34836 100 100     374295 next unless $opt_range_nrs->contains($year)
1737             or $need_baseline;
1738             }
1739            
1740 638         15208 foreach my $ii ( 0 .. 30 ) {
1741 19778         241550 my $day = $ii + 1;
1742            
1743 19778 100 100     322866 next if $Opt->fday and not $opt_fday_nrs->contains($day);
1744            
1745             # |--- 0---|--- 10---|--- 20---|--- 30---|--- 40---|--- 50
1746             # |123456789|123456789|123456789|123456789|123456789|123456
1747             # 39 C 122 C 144 C 83 C 28 C 67 C 89 C ...
1748 11948         140797 my $daily = substr $data, $ii * 8, 8;
1749            
1750 11948         24106 my $value = 0 + substr $daily, 0, 5;
1751             # my $mflag = substr $daily, 5, 1;
1752 11948         18077 my $qflag = substr $daily, 6, 1;
1753             # my $sflag = substr $daily, 7, 1;
1754            
1755 11948         35200 $qflag =~ s{ \s+ \Z }{}xms;
1756            
1757 11948         34113 my $key = sprintf '%04d%02d%02d', $year, $month, $day;
1758            
1759             # keep track of all baseline days
1760 11948 100       23812 $baseline_days{$key}++
1761             if $need_baseline;
1762            
1763             # skip over values that have quality flags
1764 11948 50       21689 if ( $qflag ne $EMPTY ) {
1765 0         0 $_flag_cnts_href->{$element}->{QFLAGS}->{$qflag}++;
1766 0         0 $_flag_cnts_href->{$element}->{REJECTED}++;
1767 0         0 $_daily_href->{$key}->{QFLAGS}->{$element}->{$qflag}++;
1768 0         0 next;
1769             }
1770            
1771             # skip missing values
1772 11948 100       21452 next if $value == -9999;
1773            
1774 11556         19614 $_flag_cnts_href->{$element}->{KEPT}++;
1775            
1776 11556 50       31121 if ( $element =~ m{ \A ( TMAX | TMIN | TAVG | SNOW | SNWD | PRCP ) \Z }xms ) {
1777             # values are stored in 10th of a unit, so we divide by 10 to scale them to get:
1778             # - temperatures in °C
1779             # - PRCP in mm
1780             # - SNOW and SNWD in cm
1781 11556 100       40466 $_daily_href->{$key}->{$element} = $value / 10.0 if $value;
1782 11556   100     26925 $gaps{$year}->{$month} //= pack 'b30', 0;
1783 11556         39567 vec( $gaps{$year}->{$month}, $ii, 1) = 1;
1784             }
1785             }
1786             }
1787 15 50       169 close $stn_fh or croak '*E* unable to close';
1788            
1789 15         135 $_tstats->stop('Load_daily_data');
1790            
1791             # warn about missing years, months or days
1792 15         121 $self->_report_gaps( $stn, \%gaps );
1793            
1794 15         45 my $insufficient_quality = 0;
1795            
1796             # determine whether there's sufficient data within the -range, based on the -quality threshold
1797 15 50       375 if ( $Opt->range ) {
1798 15         286 my $day_count = keys $_daily_href->%*;
1799 15         267 $insufficient_quality += $self->_compute_quality( $stn, 'range', $day_count, $Opt->range, $Opt->quality );
1800             }
1801            
1802             # determine whether there's sufficient data within the -baseline, based on the -quality threshold
1803 15 100       415 if ( $Opt->anomalies ) {
1804 1         15 my $day_count = keys %baseline_days;
1805 1         21 $insufficient_quality += $self->_compute_quality( $stn, 'baseline', $day_count, $Opt->baseline, $Opt->quality );
1806             }
1807            
1808 15         326 $self->_compute_mean_and_counts( $stn );
1809            
1810 15 100 66     382 if ($Opt->anomalies and $insufficient_quality == 0) {
1811 1         41 $self->_compute_baseline( $stn, $Opt->baseline );
1812 1         8 $self->_compute_anomalies();
1813             }
1814            
1815 15         593 return $insufficient_quality;
1816             }
1817            
1818 19     19   91 method _load_station_inventories () {
  19         44  
  19         40  
1819            
1820 19         127 my $inv_content = $self->_fetch_url($GHCN_STN_INVEN_URL, 'URI::Fetch_inv');
1821            
1822             # Now scan the station inventory list, to get the active range for each station
1823             # - note there are multiple records, one for each element and active range combo
1824            
1825 19         161 $_tstats->start('Parse_inv');
1826            
1827             ## no critic [InputOutput::RequireBriefOpen]
1828 19 50       1258415 open my $inv_fh, '<', \$inv_content
1829             or croak '*E* unable to open inventory content';
1830            
1831 19         471 while (my $inv = <$inv_fh>) {
1832             # |--- 0---|--- 10---|--- 20---|--- 30---|--- 40---
1833             # |123456789|123456789|123456789|123456789|123456789
1834             # (stationid)....................elem.from.(to)
1835             # ACW00011604 17.1167 -61.7833 TMAX 1949 1949
1836            
1837             ## no critic [ProhibitMagicNumbers]
1838 13796185         22776462 my $id = substr $inv, 0, 11;
1839             # my $lat = substr $inc, 12, 8;
1840             # my $long = substr $inc, 21, 9;
1841 13796185         18852707 my $elem = substr $inv, 31, 4;
1842 13796185         18963157 my $firstyear = substr $inv, 36, 4;
1843 13796185         18862629 my $lastyear = substr $inv, 41, 4;
1844             ## use critic [ProhibitMagicNumbers]
1845            
1846             # in case the inventory list contains a station id that isn't in
1847             # the station list, we'll skip it
1848 13796185 100       36474513 next if not exists $_station{$id};
1849            
1850 17023 100       35817 next unless $elem =~ $_measures_obj->re;
1851            
1852 1208         2935 my $stn = $_station{$id};
1853            
1854             # combine the inventory active range set with the station active range
1855 1208         3230 $stn->active = rng_new( $stn->active, $firstyear . $DASH . $lastyear )->as_string();
1856            
1857 1208 50       4163 $stn->elems_href->{$elem}++
1858             if $elem =~ $_measures_obj->re;
1859             }
1860 19 50       189 close $inv_fh or croak;
1861            
1862 19         265 $_tstats->stop('Parse_inv');
1863            
1864 19         169 my $count = $self->_filter_stations(\%_station);
1865            
1866 19         199 return $count;
1867             }
1868            
1869             # TODO: consider ways to let the caller decide on how to output the result
1870            
1871 4     4   20 method _print_detail_data ($measure_begin_idx, $stn, $row_sub) {
  4         12  
  4         10  
  4         9  
  4         12  
  4         14  
1872            
1873 4 50       18 return if not defined $row_sub;
1874            
1875 4         25 $_tstats->start('Printing');
1876            
1877             # build has of measure names and indices so measures can be
1878             # inserted into the correct columns
1879 4         10 my %measure_idx;
1880 4         14 my $ii = 0;
1881 4         26 foreach my $m ( $_measures_obj->measures ) {
1882 19         51 $measure_idx{$m} = $measure_begin_idx + $ii++;
1883             }
1884            
1885 4         87 my $opt_range_nrs = rng_new($Opt->range);
1886            
1887             # generate and print the data rows
1888 4         493 foreach my $key ( sort keys $_daily_href->%* ) {
1889 998         13425 my ($year, $month, $day) = unpack 'A4 A2 A2', $key;
1890 998         1824 my $flags = $EMPTY;
1891            
1892 998 50       19745 next unless ( $Opt->range ? $opt_range_nrs->contains($year) : $TRUE );
    100          
1893            
1894 740         23828 my $row = $_daily_href->{$key};
1895            
1896             $flags = _qflags_as_string( $row->{QFLAGS} )
1897 740 50       1603 if exists $row->{QFLAGS};
1898            
1899 740         1019 my @row;
1900 740         1615 push @row, $year;
1901 740         1357 push @row, $month;
1902 740         1158 push @row, $day;
1903 740         1467 push @row, int($year / 10) * 10; ## no critic [ProhibitMagicNumbers]
1904 740         1497 push @row, _seasonal_decade($year, $month);
1905 740         1538 push @row, _seasonal_year($year, $month);
1906 740         1463 push @row, _seasonal_qtr($year, $month);
1907 740         2003 foreach my $elem ( $_measures_obj->measures ) {
1908             $row[ $measure_idx{$elem} ] = sprintf '%.2f', $row->{$elem}
1909 2252 100       11451 if defined $row->{$elem};
1910 2252   66     5100 $row[ $measure_idx{$elem} ] //= $EMPTY;
1911             }
1912 740         1819 push @row, $flags;
1913 740         2190 push @row, $stn->id;
1914 740         1787 push @row, $stn->name;
1915 740         1760 push @row, $stn->idx;
1916 740         1991 push @row, $stn->grid;
1917            
1918 740         1888 $row_sub->( \@row );
1919             }
1920            
1921 4         117 $_tstats->stop('Printing');
1922            
1923 4         25 return;
1924             }
1925            
1926 15     15   109 method _report_gaps ($stn, $gaps_href) {
  15         38  
  15         32  
  15         35  
  15         35  
1927            
1928 15         99 $_tstats->start('Report_gaps');
1929            
1930             ## no critic [ProhibitDoubleSigils]
1931 15         234 my @years = sort keys $gaps_href->%*;
1932            
1933 15 100       398 if ( $Opt->active ) {
1934 4         120 my $active_nrs = rng_new( $Opt->active );
1935 4         17 my $years_nrs = rng_new( @years );
1936 4         42 my $gap_nrs = $active_nrs->diff( $years_nrs );
1937 4 50       2114 if ($gap_nrs->cardinality) {
1938 0         0 my $msg = sprintf "%s\tmissing data in the active range\tyears %s", $stn->id, $gap_nrs->as_string;
1939 0         0 $stn->add_note($WARN_MISS_YA, $msg, $Opt->verbose);
1940 0         0 my $iter = $gap_nrs->iterate_runs();
1941 0         0 while (my ( $from, $to ) = $iter->()) {
1942 0         0 foreach my $yyyy ($from .. $to) {
1943 0         0 $_missing_href->{$stn->id}{$yyyy}{$EMPTY}++;
1944             }
1945             }
1946             }
1947             }
1948            
1949 15         557 my $opt_range_nrs = rng_new($Opt->range);
1950 15         381 my $opt_baseline_nrs = rng_new($Opt->baseline);
1951            
1952 15 50       367 if ( $Opt->range ) {
1953 15         279 my $years_nrs = rng_new( @years );
1954 15         105 my $gap_nrs = $opt_range_nrs->diff( $years_nrs );
1955 15 100       6197 if ($gap_nrs->cardinality) {
1956 1         31 my $msg = sprintf "%s\tmissing data in the filter range\tyears %s", $stn->id, $gap_nrs->as_string;
1957 1         62 $stn->add_note($WARN_MISS_YF, $msg, $Opt->verbose);
1958 1         7 my $iter = $gap_nrs->iterate_runs();
1959 1         13 while (my ( $from, $to ) = $iter->()) {
1960 1         13 foreach my $yyyy ($from .. $to) {
1961 1         4 $_missing_href->{$stn->id}{$yyyy}{$EMPTY}++;
1962             }
1963             }
1964             }
1965             }
1966            
1967 15         1349 my (undef, undef, undef , $mday, $mon, $year) = localtime time;
1968             ## no critic [ValuesAndExpressions::ProhibitMagicNumbers]
1969 15         96 my ($this_yyyy, $this_mm) = ($year+1900, $mon+1);
1970            
1971 15         58 foreach my $yyyy ( @years ) {
1972             # don't report gaps for years that aren't within -range (or -baseline if -anomalies)
1973 150 100 100     6992 next if $Opt->range and
      66        
1974             ( $opt_range_nrs and not $opt_range_nrs->contains($yyyy)
1975             or
1976             $Opt->anomalies and not $opt_baseline_nrs->contains($yyyy) );
1977            
1978             ## no critic [ProhibitDoubleSigils]
1979             ## no critic [ProhibitMagicNumbers]
1980 19 50       1571 my $end_month = $yyyy == $this_yyyy
1981             ? $this_mm
1982             : 12;
1983            
1984 19 100       395 my $month_gap_nrs = $Opt->fmonth
1985             ? rng_new( $Opt->fmonth )
1986             : rng_new( 1 .. $end_month );
1987            
1988 19         279 my @months = sort {$a<=>$b} keys $gaps_href->{$yyyy}->%*;
  429         702  
1989            
1990 19         129 $month_gap_nrs->remove( @months );
1991            
1992 19 100       3168 if ($month_gap_nrs->cardinality) {
1993 1         35 my $gap_months = join $SPACE, _month_names($month_gap_nrs->as_array);
1994 1         9 my $msg = sprintf "%s\tmissing data: year %d months %s", $stn->id, $yyyy, $gap_months;
1995 1         56 $stn->add_note($WARN_MISS_MO, $msg, $Opt->verbose);
1996 1         5 $_missing_href->{$stn->id}{$yyyy}{$gap_months}++;
1997             }
1998            
1999 19         868 my $opt_fday_nrs = rng_new($Opt->fday);
2000 19         456 my $opt_fmonth_nrs = rng_new($Opt->fmonth);
2001            
2002             ## no critic [ProhibitMagicNumbers]
2003 19         92 my $gap_text = $EMPTY;
2004 19         77 foreach my $mm ( 1 .. $end_month ) {
2005             # skip months that don't match -fmonth when -fmonth was given
2006 228 100 100     7750 next if $Opt->fmonth and not $opt_fmonth_nrs->contains($mm);
2007            
2008 184         2952 my $day_vec = $gaps_href->{$yyyy}->{$mm};
2009            
2010 184 100       428 next if not $day_vec;
2011            
2012 175         284 my @days_with_data;
2013 175         426 my $mdays = _days_in_month($yyyy,$mm);
2014            
2015 175         465 foreach my $day ( 1..31 ) {
2016             # skip days that don't match -fday when -fday was given
2017 5425 100 100     84117 next if $Opt->fday and not $opt_fday_nrs->contains($day);
2018            
2019 5338 100       54751 next if $day > $mdays;
2020            
2021 5239 100       12362 push @days_with_data, $day
2022             if vec($day_vec, $day - 1, 1) == 1;
2023             }
2024            
2025 175 100       2848 my $days_in_month_nrs = $Opt->fday
2026             ? $opt_fday_nrs
2027             : rng_new( 1 .. $mdays );
2028            
2029 175         1086 my $days_nrs = rng_new( @days_with_data );
2030            
2031 175         650 my $day_gap_nrs = $days_in_month_nrs->diff($days_nrs);
2032            
2033 175 100       66123 $gap_text .= $SPACE . _month_names($mm) . '[' . $day_gap_nrs->as_string . ']'
2034             unless $day_gap_nrs->is_empty;
2035             }
2036            
2037 19 100       542 if ( $gap_text !~ m{\A \s* \Z}xms ) {
2038 6         53 my $msg = sprintf "%s\tmissing data: %d days %s", $stn->id, $yyyy, $gap_text;
2039 6         182 $stn->add_note($WARN_MISS_DY, $msg, $Opt->verbose);
2040 6         37 $gap_text =~ s{\A \s+ }{}xms;
2041 6         29 $_missing_href->{$stn->id}{$yyyy}{$gap_text}++;
2042             }
2043             }
2044            
2045 15         145 $_tstats->stop('Report_gaps');
2046            
2047 15         84 return;
2048             }
2049            
2050             #----------------------------------------------------------------------
2051             # Configuration Helper functions
2052             #----------------------------------------------------------------------
2053            
2054 10     10   6555 sub _get_profile_options ($profile=$EMPTY) {
  10         22  
  10         18  
2055            
2056             #debug# use DDP;
2057             #debug# use Log::Dispatch;
2058             #debug# my $log = Log::Dispatch->new(
2059             #debug# outputs => [
2060             #debug# [ 'File', min_level => 'debug', filename => 'c:/sandbox/log.log' ],
2061             #debug# [ 'Screen', min_level => 'debug' ],
2062             #debug#
2063             #debug# ]
2064             #debug# );
2065            
2066 10         26 my $profile_href = {};
2067            
2068             # passing undef will result in an empty config
2069 10 100       45 return $profile_href if not defined $profile;
2070            
2071             #debug# use FindBin;
2072             #debug# open my $fh, '>>', 'c:/sandbox/log.log' or die;
2073             #debug# $log->debug( 'program ' . $0 );
2074             #debug# $log->debug( 'caller ' . join(' | ', caller) );
2075             #debug# $log->debug( 'received profile_file: ' . $_profile );
2076            
2077 9         33 my $profile_filespec = _get_profile_filespec($profile);
2078            
2079 9         23 my $yaml_struct;
2080 9         22 my $msg = $EMPTY;
2081            
2082             # uncoverable branch false
2083 9 100       49 if (-e $profile_filespec) {
2084             # uncoverable branch false
2085             try {
2086 8     8   808 $yaml_struct = YAML::Tiny->read($profile_filespec);
2087             } catch {
2088 0     0   0 $msg = '*W* no cache or aliases: failed reading YAML in ' . $profile_filespec;
2089 0         0 carp $msg;
2090             }
2091 8         336 } else {
2092 1         82 return $profile_href;
2093             }
2094            
2095 8 50       11062 $profile_href = $yaml_struct->[0]
2096             if $yaml_struct;
2097            
2098             #debug# $log->( 'yaml_struct length = ' . length $yaml_struct );
2099             #debug# $log->( "\n" );
2100             #debug# $log->( 'profile_filespec: ' . $profile_filespec );
2101             #debug# $log->( 'carp ' . $msg );
2102             #debug# $log->( 'FindBin::Bin ' . $FindBin::Bin );
2103             #debug# $log->( "\n");
2104             #debug# $log->( 'profile_href ' . np($profile_href) );
2105             #debug# $log->( "\n" );
2106             #debug# $log->( "================" );
2107             #debug# $log->( "\n" );
2108             #debug# close $fh;
2109            
2110 8         50 return $profile_href;
2111             }
2112            
2113 10     10   22 sub _get_profile_filespec ($profile) {
  10         19  
  10         20  
2114            
2115             # an EMPTY arg will default to ~/.ghcn_fetch.yaml
2116 10   66     71 $profile ||= $DEFAULT_PROFILE_FILE;
2117            
2118             # Path::Tiny::path will replace ~ or ~username with the corresponding path
2119 10         45 my $profile_filespec = path($profile);
2120             #debug# say {$fh} 'profile_filespec (canon): ', $profile_filespec;
2121            
2122 10         512 return $profile_filespec;
2123             }
2124            
2125             #----------------------------------------------------------------------
2126             # -kml Helper Functions
2127             #----------------------------------------------------------------------
2128            
2129             # TODO: allow any KML hex colour code, format
2130            
2131 17     17   9027 sub _get_kml_color ($color_opt) {
  17         35  
  17         27  
2132            
2133             # From https://developers.google.com/kml/documentation/kmlreference#colorstyle
2134            
2135             # Color and opacity (alpha) values are expressed in hexadecimal notation.
2136             # The range of values for any one color is 0 to 255 (00 to ff). For
2137             # alpha, 00 is fully transparent and ff is fully opaque. The order of
2138             # expression is aabbggrr, where:
2139             #
2140             # aa=alpha (00 to ff)
2141             # bb=blue (00 to ff)
2142             # gg=green (00 to ff)
2143             # rr=red (00 to ff).
2144             #
2145             # For example, if you want to apply a blue color with 50 percent opacity
2146             # to an overlay, you would specify the following:
2147             # 7fff0000,
2148             # where alpha=0x7f, blue=0xff, green=0x00, and red=0x00.
2149            
2150 17         156 my %kml_colors = (
2151             b => [ 'blue', 'ff780000' ],
2152             g => [ 'grn', 'ff147800' ],
2153             a => [ 'ltblu', 'ffF06414' ], # 'a' for azure
2154             p => [ 'purple','ff780078' ],
2155             r => [ 'red', 'ff1400FF' ],
2156             w => [ 'wht', 'ffFFFFFF' ],
2157             y => [ 'ylw', 'ff14F0FF' ],
2158             );
2159            
2160             # just use the first character of whatever string we're given
2161 17         45 my $c = substr $color_opt, 0, 1;
2162            
2163 17 100       61 return unless $kml_colors{$c};
2164            
2165 16         92 return $kml_colors{$c}->[1];
2166             }
2167            
2168             #----------------------------------------------------------------------
2169             # -gps Helper Functions
2170             #----------------------------------------------------------------------
2171            
2172             # Calculate geographic distances in kilometers between coordinates in
2173             # geodetic WGS84 format using the Haversine formula.
2174            
2175 122047     122047   193963 sub _gis_distance ($lat1, $lon1, $lat2, $lon2) {
  122047         196131  
  122047         199824  
  122047         187661  
  122047         175042  
  122047         171159  
2176 122047         314362 $lon1 = deg2rad($lon1);
2177 122047         1197671 $lat1 = deg2rad($lat1);
2178 122047         882335 $lon2 = deg2rad($lon2);
2179 122047         813204 $lat2 = deg2rad($lat2);
2180            
2181             ## no critic [ProhibitParensWithBuiltins]
2182             ## no critic [ProhibitMagicNumbers]
2183            
2184 122047         774832 my $dlon = $lon2 - $lon1;
2185 122047         192732 my $dlat = $lat2 - $lat1;
2186 122047         394772 my $a = (sin($dlat/2)) ** 2 + cos($lat1) * cos($lat2) * (sin($dlon/2)) ** 2;
2187 122047         293582 my $c = 2 * atan2(sqrt($a), sqrt(1-$a));
2188            
2189 122047         243220 return 6_371_640 * $c / 1000.0;
2190             }
2191            
2192             #----------------------------------------------------------------------
2193             # -location Helper Functions
2194             #----------------------------------------------------------------------
2195            
2196             # Match -location to the station id or name provided in the call.
2197             # If the pattern looks like a station id (e.g. 'CA006105887') then it matches
2198             # to the stn_id parameter; if it looks like a comma-separated list of station
2199             # id's, it returns success if any of them match the stn_id parameter. Otherwise,
2200             # it matches the to the start of the stn_name parameter.
2201 1227449     1227449   38165518 sub _match_location ($stn_id, $stn_name, $pattern) {
  1227449         1910619  
  1227449         1736404  
  1227449         1676928  
  1227449         1624202  
2202            
2203 1227449         1759870 my $result = $FALSE;
2204            
2205 1227449 100       6044668 if ($pattern =~ m{ \A $STN_ID_RE \Z }xms) {
    100          
2206 854330         1795830 $result = $stn_id eq $pattern;
2207             }
2208             elsif ($pattern =~ m{ \A $STN_ID_RE ( [,] $STN_ID_RE )+ \Z }xms) {
2209 244096         724042 my @patterns = split m{ [,] }xms, $pattern;
2210 244096         617789 my $multi_pattern = '\A(' . join(q(|), @patterns) . ')\Z';
2211            
2212 244096         605702 $result = $stn_id =~ $multi_pattern;
2213             }
2214             else {
2215             ## no critic [RequireExtendedFormatting]
2216 129023         368432 $result = $stn_name =~ m{\A$pattern}msi;
2217             }
2218            
2219 1227449         6993429 return $result;
2220             }
2221            
2222             #----------------------------------------------------------------------
2223             # -nogaps Helper Functions
2224             #----------------------------------------------------------------------
2225            
2226             # parse the missing values text, which looks like this:
2227             # month names: Jan Feb Mar Apr May Jun Jul Aug Sep Oct
2228             # or day ranges: May[2] Oct[3,11] Nov[1] Dec[2,5]
2229            
2230 10     10   4934 sub _parse_missing_text ( $s ) {
  10         22  
  10         17  
2231 10         18 my @months;
2232             my @mmdd;
2233 10         63 my @f = split m{ \s }xms, $s;
2234            
2235 10         37 my $mmm_re = qr{ [[:upper:]][[:lower:]][[:lower:]] }xms;
2236 10         26 my $nbr_rng = qr{ \d+ ( [-] \d+)? }xms;
2237 10         109 my $rng_list = qr{ $nbr_rng (, $nbr_rng)* }xms;
2238            
2239 10         26 foreach my $tok (@f) {
2240 36 100       214 if ( $tok =~ m{ \A $mmm_re \Z }xms ) {
2241 10         30 push @months, $MMM_TO_MM{$tok};
2242             }
2243 36 100       366 if ( $tok =~ m{ \A ($mmm_re) \[ ($rng_list) \] \Z }xms ) {
2244 26         93 my $mm = $MMM_TO_MM{$1};
2245 26         72 my @days = rng_new($2)->as_array;
2246 26         825 foreach my $day (@days) {
2247 104         215 push @mmdd, [$mm, $day];
2248             }
2249             }
2250             }
2251 10         57 return \@months, \@mmdd;
2252             }
2253            
2254             #----------------------------------------------------------------------
2255             # Misc Functions
2256             #----------------------------------------------------------------------
2257            
2258             # format qflags like this: I:1, N:9, S:4
2259 9     9   5054 sub _qflags_as_string ( $qflags_href ) {
  9         22  
  9         13  
2260 9 100       33 return $EMPTY if not $qflags_href;
2261            
2262 2         3 my @r;
2263 2         11 foreach my $qflag ( sort keys $qflags_href->%* ) {
2264 3         12 push @r, sprintf '%s:%d', $qflag, $qflags_href->{$qflag};
2265             }
2266            
2267 2         10 return join ', ', @r;
2268             }
2269             #----------------------------------------------------------------------
2270             # Undef-safe Functions
2271             #----------------------------------------------------------------------
2272            
2273             # defined max - return the maximum of the two arguments,
2274             # or the defined argument if one of the arguments is undef
2275 3299     3299   4472 sub _dmax ($x, $y) {
  3299         4769  
  3299         4524  
  3299         4360  
2276            
2277 3299 100 100     6705 return if not defined $x and not defined $y;
2278 3298 100       6300 return $y if not defined $x;
2279 2864 100       5215 return $x if not defined $y;
2280            
2281 2863 100       6461 return $x > $y ? $x : $y;
2282             }
2283            
2284             # defined min - return the minimum of the two arguments,
2285             # or the defined argument if one of the arguments is undef
2286 3276     3276   4457 sub _dmin ($x, $y) {
  3276         4845  
  3276         4694  
  3276         4398  
2287            
2288 3276 100 100     7071 return if not defined $x and not defined $y;
2289 3275 100       6043 return $y if not defined $x;
2290 2844 100       5044 return $x if not defined $y;
2291            
2292 2843 100       6241 return $x < $y ? $x : $y;
2293             }
2294            
2295             # defined sum - return the sum of the two arguments,
2296             # or the defined argument if one of the arguments is undef
2297 15385     15385   20469 sub _dsum ($x, $y) {
  15385         21177  
  15385         21103  
  15385         19568  
2298            
2299 15385 100 100     37175 return if not defined $x and not defined $y;
2300 15384 100       29094 return $y if not defined $x;
2301 4965 100       9105 return $x if not defined $y;
2302            
2303 4964         9771 return $x + $y;
2304             }
2305            
2306             # defined count - increment the value of the first argument by 1,
2307             # or return it unchanged if the second argument is undef
2308 15211     15211   21116 sub _dcount ($x, $y) {
  15211         20339  
  15211         19731  
  15211         19564  
2309            
2310 15211 100       25693 return $x if not defined $y;
2311            
2312 15209 100       50097 return defined $x ? $x + 1 : 1;
2313             }
2314            
2315             # defined divide - returns undef if either argument is undef
2316             # or if the denominator is zero
2317 9996     9996   13363 sub _ddivide ($x, $y) {
  9996         13922  
  9996         13684  
  9996         13257  
2318            
2319 9996 100       17699 return if not defined $x;
2320 9994 100       16810 return if not defined $y;
2321 9993 100       17361 return if not $y;
2322            
2323 9992         18974 return $x / $y;
2324             }
2325            
2326             #----------------------------------------------------------------------
2327             # Date and Time Helper Functions
2328             #----------------------------------------------------------------------
2329            
2330 189     189   317 sub _days_in_month ($year, $month) {
  189         317  
  189         346  
  189         272  
2331            
2332             # coerce to numbers
2333 189         306 $year += 0;
2334 189         273 $month += 0;
2335            
2336             ## no critic [ProhibitMagicNumbers]
2337 189         499 my @mdays = (0, 31,28,31,30,31,30,31,31,30,31,30,31);
2338            
2339 189 100       550 if ($month == 2) {
2340 18         130 return $mdays[$month] + _is_leap_year($year);
2341             }
2342            
2343 171         466 return $mdays[$month];
2344             }
2345            
2346 31     31   5390 sub _days_in_year ($year) {
  31         56  
  31         56  
2347             ## no critic [ProhibitMagicNumbers]
2348 31         92 return 365 + _is_leap_year( $year+0 );
2349             }
2350            
2351 58     58   99 sub _is_leap_year ($year) {
  58         112  
  58         91  
2352            
2353 58         160 $year += 0; # coerce to number
2354            
2355             ## no critic [ProhibitMagicNumbers]
2356 58 100       233 return 0 if $year % 4;
2357 32 100       113 return 1 if $year % 100;
2358 24 100       117 return 0 if $year % 400;
2359 8         31 return 1;
2360             }
2361            
2362 23     23   5935 sub _month_names (@mm) {
  23         55  
  23         41  
2363            
2364 23         89 my @mnames = qw(xxx Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
2365            
2366 23         47 my @result = ();
2367 23 100       71 return (@result) unless @mm;
2368            
2369             ## no critic [ProhibitMagicNumbers]
2370 22         55 foreach my $mm (@mm) {
2371 41 100 100     280 if ($mm !~ m{ \A \d\d? \Z }xms) {
    100          
2372 1         4 push @result, '???';
2373             }
2374             elsif ($mm > 0 and $mm < 13) {
2375 38         99 push @result, $mnames[$mm];
2376             }
2377             else {
2378 2         6 push @result, '???';
2379             }
2380             }
2381            
2382 22 100       148 return wantarray ? @result : shift @result;
2383             }
2384            
2385             # Seasonal decades are based on seasonal years.
2386             # However, decades are deemed to begin when the seasonal
2387             # year ends in 9. For example, seasonal decade 2010 begins
2388             # Dec 1 2009 and continues through Nov 30, 2019.
2389 1174     1174   1612 sub _seasonal_decade ($year, $month) {
  1174         2244  
  1174         1624  
  1174         1511  
2390             ## no critic [ProhibitMagicNumbers]
2391 1174 100       2484 $year += 1 if $month == 12;
2392 1174         2553 return int($year / 10) * 10;
2393             }
2394            
2395             # Seasonal quarter Q1 begins Dec 1 and ends Feb 28/29
2396             # For example, 2017 Q1 includes Dec 2017, Jan 2018 and Feb 2018.
2397 1176     1176   1604 sub _seasonal_qtr ($year, $month) {
  1176         1695  
  1176         1630  
  1176         1559  
2398 1176         4179 my @seasonal_quarter = qw( Q0 Q1 Q1 Q2 Q2 Q2 Q3 Q3 Q3 Q4 Q4 Q4 Q1 );
2399            
2400 1176         3387 return $seasonal_quarter[$month];
2401             }
2402            
2403             # Seasonal years begin Dec 1 and continue through end of November
2404             # the following year. For example, Dec 15 2017 is in Q1 2017;
2405             # Jan 15 2018 is in Q1 2017 (not 2018!!!); Nov 15 2018 is in Q4 2017.
2406             # This is consistent with definition used by weatherstats.ca.
2407 1176     1176   1635 sub _seasonal_year ($year, $month) {
  1176         1817  
  1176         1586  
  1176         1576  
2408             ## no critic [ProhibitMagicNumbers]
2409 1176 100       2600 $year -= 1 unless $month == 12;
2410 1176         2893 return $year;
2411             }
2412            
2413             #----------------------------------------------------------------------
2414             # Performance Helper Functions
2415             #----------------------------------------------------------------------
2416            
2417             # calculate the total size of a hash or other structure
2418 19     19   7726 sub _memsize ( $ref, $opt_performance ) {
  19         40  
  19         42  
  19         52  
2419 19 100       173 return $EMPTY unless $opt_performance;
2420 4         640 return sprintf ' [%s]', commify( Devel::Size::total_size( $ref ) );
2421             }
2422            
2423             =head1 EXAMPLE PROGRAM
2424            
2425             use Weather::GHCN::StationTable;
2426            
2427             my $ghcn = Weather::GHCN::StationTable->new;
2428            
2429             my ($opt, @errors) = $ghcn->set_options(
2430             cachedir => 'c:/ghcn_cache',
2431             refresh => 'always',
2432             country => 'US',
2433             state => 'NY',
2434             location => 'New York',
2435             active => '2000-2022',
2436             report => 'yearly',
2437             );
2438            
2439             die @errors if @errors;
2440            
2441             $ghcn->load_stations;
2442            
2443             my @rows;
2444             if ($opt->report) {
2445             say $ghcn->get_header;
2446            
2447             # this also prints detailed station data if $opt->report eq 'detail'
2448             $ghcn->load_data(
2449             # set a callback routine for printing progress messages
2450             progress_sub => sub { say {*STDERR} @_ },
2451             # set a callback routine for capturing data rows when report => 'detail'
2452             row_sub => sub { push @rows, $_[0] },
2453             );
2454            
2455             # these only do something when $opt->report ne 'detail'
2456             $ghcn->summarize_data;
2457             say $ghcn->get_summary_data;
2458            
2459             say '';
2460             say $ghcn->get_footer;
2461            
2462             say '';
2463             say $ghcn->get_flag_statistics;
2464             }
2465            
2466             # print data rows collected by row_sub callback (when report => 'detail')
2467             foreach my $row_aref (@rows) {
2468             say join "\t", $row_aref->@*;
2469             }
2470            
2471             say '';
2472             say $ghcn->get_stations( kept => 1 );
2473            
2474             say '';
2475             say 'Stations that failed to meet range or quality criteria:';
2476             say $ghcn->get_stations( kept => 0, no_header => 1 );
2477            
2478             if ( $ghcn->has_missing_data ) {
2479             warn '*W* some data was missing for the stations and date range processed' . $NL;
2480             say '';
2481             say $ghcn->get_missing_data_ranges;
2482             }
2483            
2484             say $ghcn->get_options;
2485            
2486             say $ghcn->get_timing_stats;
2487            
2488             say $ghcn->get_hash_stats;
2489            
2490             $ghcn->export_kml if $opt->kml;
2491            
2492             =head1 OPTIONS
2493            
2494             StationTable supports almost all the options documented in
2495             B. The only options not supported are ones that
2496             are listed in the Command-Line Only Options section of the POD, namely:
2497             -help, -usage, -readme, -gui, -optfile, and -outclip.
2498            
2499             Options can be passed directly to the API via B.
2500            
2501             Options can also be defined in a file (called a B) that will
2502             be loaded at runtime and merged with the options passed to B.
2503            
2504             Options passed to B must be defined as a perl hash
2505             structure. See B for a list of all options in
2506             Getopts::Long format. Simply translate the option to a hash
2507             key/value pair. For example, B<-report detail> becomes B
2508             'detail'>.
2509            
2510             Options defined in a B file must be defined using
2511             YAML (see below).
2512            
2513             =head2 Aliases
2514            
2515             Aliases are a convenience feature that allow you to define mnemonic
2516             shortcuts for specific stations. GHCN station id's (like CA006106000)
2517             are difficult to remember and type, as can GHCN station names.
2518             Frequently-used station id's can be given easier alias names that
2519             can be used in the -location option for precise and reliable data
2520             retrieval.
2521            
2522             The entries within the aliases hash are simply keyword/value pairs
2523             that represent the mnemonic alias name and the station id (or id's)
2524             that are to be retrieved when that alias is used in -location.
2525            
2526             Aliases can only be defined via the B API or in a B
2527             file. There is no command-line option for defining them.
2528            
2529             =head2 YAML Example
2530            
2531             This is what the YAML content for a typical profile file would
2532             look like:
2533            
2534             ---
2535             cachedir: C:/ghcn_cache_new
2536            
2537             aliases:
2538             yow: CA006106000,CA006106001 # Ottawa airport
2539             cda: CA006105976,CA006105978 # Ottawa (CDA and CDA RCS)
2540            
2541             =head2 Hash Example
2542            
2543             Here's what the options would look like as a hash passed to the
2544             B API call:
2545            
2546             %options = (
2547             cachedir => 'C:/ghcn_cache',
2548             aliases => {
2549             yow => 'CA006106000,CA006106001', # Ottawa airport
2550             cda => 'CA006105976,CA006105978', # Ottawa (CDA and CDA RCS)
2551             }
2552             );
2553            
2554             my $ghcn->Weather::GHCN::StationTable->new();
2555             $ghcn->set_options(%options);
2556            
2557             =head1 VERSIONING and COMPATIBILITY
2558            
2559             The version number scheme used for this module consists of a 3-part
2560             dot-delimited string such as v0.0.003. This format was chosen for
2561             compatibility with Dist::Zilla version support, so that all modules
2562             in GHCN will get the same version number upon release. See also
2563             L.
2564            
2565             The first digit of the string is a major release numbers, and the
2566             second is the minor release number. With the exception of v0.0
2567             releases, which should be considered experimental pre-production
2568             versions, the interface is intended to be upward compatible within a
2569             set of releases sharing the same major release number. If an
2570             incompatible change becomes necessary, the major release number will
2571             be incremented.
2572            
2573             An increment to the minor release number indicates significant new
2574             functionality, which usually mean new API's and options. But, it should
2575             be upward compatible with the prior release.
2576            
2577             =head1 AUTHOR
2578            
2579             Gary Puckering (jgpuckering@rogers.com)
2580            
2581             =head1 LICENSE AND COPYRIGHT
2582            
2583             Copyright 2022, Gary Puckering
2584            
2585             =cut
2586            
2587             1;