File Coverage

lib/Weather/GHCN/StationTable.pm
Criterion Covered Total %
statement 941 982 95.8
branch 291 344 84.5
condition 141 178 79.2
subroutine 79 85 92.9
pod 31 33 93.9
total 1483 1622 91.4


line stmt bran cond sub pod time code
1             # Weather::GHCN::StationTable.pm - class for collecting station objects and weather data
2            
3             ## no critic (Documentation::RequirePodAtEnd)
4            
5             =head1 NAME
6            
7             Weather::GHCN::StationTable - collect station objects and weather data
8              
9             =head1 VERSION
10              
11             version v0.0.011
12            
13             =head1 SYNOPSIS
14            
15             use Weather::GHCN::StationTable;
16            
17             my $ghcn = Weather::GHCN::StationTable->new;
18            
19             my ($opt, @errors) = $ghcn->set_options(
20             country => 'US',
21             state => 'NY',
22             location => 'New York',
23             report => 'yearly',
24             );
25             die @errors if @errors;
26            
27             $ghcn->load_stations;
28            
29             # generate a list of the stations that were selected
30             say $ghcn->get_stations( kept => 1 );
31            
32             if ($opt->report) {
33             say $ghcn->get_header;
34            
35             $ghcn->load_data();
36             $ghcn->summarize_data;
37            
38             say $ghcn->get_summary_data;
39             say $ghcn->get_footer;
40             }
41            
42            
43             =head1 DESCRIPTION
44            
45             The B module provides a class that is used to
46             fetch stations information from the NOAA Global Historical Climatology
47             Network database, along with temperature and/or precipitation records
48             from the daily historical records.
49            
50             For a more comprehensive example than the above Synopsis, see the
51             section EXAMPLE PROGRAM.
52            
53             Caveat emptor: incompatible interface changes may occur on releases
54             prior to v1.00.000. (See VERSIONING and COMPATIBILITY.)
55            
56             The module is primarily for use by modules Weather::GHCN::Fetch.
57            
58             =cut
59            
60             # these are needed because perlcritic fails to detect that Object::Pad handles these things
61             ## no critic [ValuesAndExpressions::ProhibitVersionStrings]
62            
63 4     4   7499 use v5.18; # minimum for Object::Pad
  4         26  
64 4     4   23 use warnings;
  4         8  
  4         142  
65 4     4   1128 use Object::Pad 0.66 qw( :experimental(init_expr) );
  4         23944  
  4         30  
66            
67             package Weather::GHCN::StationTable;
68             class Weather::GHCN::StationTable;
69            
70             our $VERSION = 'v0.0.011';
71            
72             # directly used by this module
73 4     4   5061 use Carp qw( carp croak );
  4         9  
  4         228  
74 4     4   866 use Const::Fast;
  4         5197  
  4         33  
75 4     4   2409 use HTML::Entities;
  4         25718  
  4         362  
76 4     4   1965 use Devel::Size;
  4         2028  
  4         213  
77 4     4   28 use FindBin;
  4         8  
  4         154  
78 4     4   2077 use Math::Trig;
  4         63339  
  4         703  
79 4     4   2013 use Path::Tiny;
  4         27981  
  4         285  
80 4     4   1154 use Weather::GHCN::Common qw( :all );
  4         11  
  4         656  
81 4     4   2006 use Weather::GHCN::TimingStats;
  4         12  
  4         176  
82            
83             # included so consumers of this module don't have to include these
84 4     4   1815 use Weather::GHCN::CacheURI;
  4         12  
  4         210  
85 4     4   2085 use Weather::GHCN::CountryCodes;
  4         11  
  4         354  
86 4     4   1898 use Weather::GHCN::Measures;
  4         10  
  4         167  
87 4     4   2065 use Weather::GHCN::Options;
  4         13  
  4         197  
88 4     4   1419 use Weather::GHCN::Station;
  4         12  
  4         30814  
89            
90             const my $EMPTY => q(); # empty string
91             const my $SPACE => q( ); # space character
92             const my $DASH => q(-); # dash character
93             const my $TAB => qq(\t); # tab character
94             const my $NL => qq(\n); # perl platform-universal newline
95             const my $TRUE => 1; # perl's usual TRUE
96             const my $FALSE => not $TRUE; # a dual-var consisting of '' and 0
97            
98             const my %MMM_TO_MM => (
99             Jan=>1, Feb=>2, Mar=>3, Apr=>4, May=>5, Jun=>6,
100             Jul=>7, Aug=>8, Sep=>9, Oct=>10, Nov=>11, Dec=>12,
101             );
102            
103             const my $GHCN_DATA => 'https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/all/';
104             const my $GHCN_STN_LIST_URL => 'https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt';
105             const my $GHCN_STN_INVEN_URL => 'https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-inventory.txt';
106            
107             const my $STN_ID_RE => qr{ [[:upper:]]{2} [[:alnum:]\_\-]{9} }xms;
108             const my $STN_LIST_RE => qr{ \A $STN_ID_RE ( [,] $STN_ID_RE )+ \Z }xms;
109            
110             # station errors (rejected)
111             const my $ERR_FETCH => 1;
112             const my $ERR_METRICS => 2;
113             const my $ERR_NOT_ACTIVE => 3;
114             const my $ERR_RANGE_FULL => 4;
115             const my $ERR_NOT_PARTIAL => 5;
116             const my $ERR_NOACTIVE => 6;
117             const my $ERR_INSUFF => 7;
118             const my $ERR_NOT_GSN => 8;
119             const my $ERR_NO_GIS => 9;
120            
121             # station warnings (not rejected)
122             const my $WARN_MISS_YA => 51;
123             const my $WARN_MISS_YF => 52;
124             const my $WARN_MISS_MO => 53;
125             const my $WARN_MISS_DY => 54;
126             const my $WARN_ANOM => 55;
127            
128             my %NoteMsg = ( ## no critic [ProhibitMagicNumbers]
129             1 => q|*E* unable to fetch data from URL|,
130             2 => q|*E* station doesn't have any of the required measurements|,
131             3 => q|*E* station active range isn't a subset of the -active range|,
132             4 => q|*E* station active range doesn't fully intersect -range|,
133             5 => q|*E* station active range doesn't partially intersect -active|,
134             6 => q|*E* option -range or -active specified but station has no active range in the inventory|,
135             7 => q|*E* insufficient data to meet quality threshold|,
136             8 => q|*E* not a GSN station|,
137             9 => q|*E* -gps specified but station is without GIS coordinates|,
138            
139             51 => q|*W* station missing data in years of the active range|,
140             52 => q|*W* station missing data in years of the filter range|,
141             53 => q|*W* station missing one or more entire months of data in a year|,
142             54 => q|*W* station missing one or more days of data in a year|,
143             55 => q|*W* station has insufficient baseline years - anomalies not calculated|,
144             );
145            
146             # Global lexical option object, created by new()
147             my $Opt;
148            
149             # private fields that are intialized by set_options()
150             field $_ghcn_opt_obj; # [0] Weather::GHCN::Options object providing filtering and other options
151            
152             # private fields that are intialized by load()
153             field $_measures_obj; # [1] Measures object
154            
155             # private fields that are intialized by get_header()
156             field $_measure_begin_idx {0}; # [2] column index where measures will go in the output
157            
158             # private fields that are intialized by new() i.e. automatically or within sub BUILD
159             field %_hstats; # [3] Hash for capturing data field hash statistics
160             field $_tstats; # [4] TimingStats object (or undef)
161            
162             # data fields
163             field %_station; # [5] loaded station objects, key station_id
164             field $_aggregate_href { {} } ; # [6] hashref of aggregated (summarized0 daily data
165             field $_flag_cnts_href { {} } ; # [7] hashref of flag counts
166             field $_daily_href { {} } ; # [8] hashref of most recent daily data loaded
167             field $_baseline_href { {} } ; # [9] hashref of baseline data
168            
169             # readable fields that are populated by set_options
170 0     0 1 0 field $_opt_obj :reader; # [10] $ghcn_opt_obj->opt_obj (a Hash::Wrap of $ghcn_opt_obj->opt_href)
  0         0  
171 0     0 1 0 field $_opt_href :reader; # [11] $ghcn_opt_obj->opt_href
  0         0  
172 0     0 0 0 field $_cache_obj :reader; # [12] cache object
  0         0  
173 5     5 0 4154 field $_cachedir :reader; # [13] cache directory
  5         96  
174 4     4 1 836 field $_profile_file :reader; # [14]
  4         57  
175 3     3 1 3489 field $_profile_href :reader; # [15] hash reference containing cache and alias options
  3         25  
176            
177             # other fields with read methods
178 5     5 1 48 field $_stn_count :reader; # [16]
  5         584  
179 5     5 1 18 field $_stn_selected_count :reader; # [17]
  5         102  
180 26     26 1 71 field $_stn_filtered_count :reader; # [18]
  26         186  
181 0     0 1 0 field $_missing_href :reader { {} }; # [19]
  0         0  
182            
183             # fields for API use that are readable and writable
184 4 100   4 1 1608 field $_stnid_filter_href :accessor; # [20] hashref of station id's to be loaded, or empty hash
  4         37  
185 0 0   0 1 0 field $_return_list :accessor; # [21] return method results as tsv if true, a list if false
  0         0  
186            
187             =head1 FIELDS (read-only)
188            
189             =over 4
190            
191             =item opt_obj
192            
193             Returns a reference to the Options object created by set_options.
194            
195             =item opt_href
196            
197             Returns a reference to a hash of the Options created by set_options.
198            
199             =item profile_file
200            
201             Returns the name of the profile file, if one was passed to
202             set_options.
203            
204             =item profile_href
205            
206             Returns a reference to a hash containing the profile options
207             set by set_options (if any).
208            
209             =item stn_count
210            
211             Returns a count of the total number of stations found in the station
212             list.
213            
214             =item stn_selected_count
215            
216             Returns a count of the number of stations that were selected for
217             processng.
218            
219             =item stn_filtered_count
220            
221             Returns a count of the number of stations that were selected for
222             processing, excluding those rejected due to errors or other criteria.
223            
224             =item missing_href
225            
226             Returns a hash of the missing months and days for the selected
227             data.
228            
229             =back
230            
231             =head1 FIELDS (read or write)
232            
233             =over 4
234            
235             =item return_list()
236            
237             For API use. By default, get methods return a tab-separated string
238             of results. If return_list is set to a perl true value, then these
239             methods will return a list (or list of lists). If no argument is
240             given, the current value of return_list is returned.
241            
242             =item stnid_filter_href(\%stnid_filter)
243            
244             For API use. With no argument, the current value is returned. If an
245             argument is given, it must be a hash reference the keys of which are
246             the specific station id's you want to fetch and process. When this is
247             used, many filtering options set via set_options will be overridden;
248             e.g. country, state, location etc.
249            
250             =back
251            
252             =cut
253            
254             =head1 METHODS
255            
256             =head2 new ()
257            
258             Create a new StationTable object.
259            
260             =cut
261            
262             BUILD {
263             %_hstats = ();
264             $_tstats = Weather::GHCN::TimingStats->new();
265             }
266            
267             =head2 flag_counts ()
268            
269             The load_stations() and load_data() methods may reject a station or a
270             particular data entry due to quality or other issues. These
271             decisions are kept in a hash field, and a reference to that hash is
272             returned by this method. The caller can then report the values.
273            
274             =cut
275            
276 1     1 1 692 method flag_counts () {
  1         5  
  1         3  
277 1         4 return $_flag_cnts_href;
278             }
279            
280             =head2 get_flag_statistics ( list => 0, no_header => 0 )
281            
282             Gets a header row and summary table of data points that were kept and rejected, along
283             with counts of QFLAGS (quality flags). Returns tab-separated
284             text, or a list if the list argument is true. A heading line
285             is provided unless no_header is true.
286            
287             =over 4
288            
289             =item argument: list =>
290            
291             If the arguments include the 'list' keyword and a true value, then a
292             list is returned rather than tab-separated lines of text. Defaults
293             to false.
294            
295             =item argument: no_header =>
296            
297             If the arguments include the 'no_header' keyword and a true value,
298             then the return value will not include a header line. Default is
299             false.
300            
301             =back
302            
303             =cut
304            
305 3     3 1 4878 method get_flag_statistics ( %args ) {
  3         7  
  3         8  
  3         6  
306 3   66     14 my $return_list = $args{list} // $_return_list;
307            
308 3         7 my @output;
309            
310             # print summary of data points kept and rejected, with qflag counts
311             push @output, ['Values', 'Kept', 'Rejected', 'Flags']
312 3 50       18 unless $args{no_header};
313 3         22 foreach my $elem ( sort keys $_flag_cnts_href->%* ) {
314 9 100       39 next if $elem =~ m{ [[:lower:]] }xms;
315 6 50       18 next if $elem =~ m{ \A A_ }xms;
316 6         25 my $flag_info = _qflags_as_string( $_flag_cnts_href->{$elem}->{QFLAGS} );
317            
318             push @output, [
319             $elem,
320             ( $_flag_cnts_href->{$elem}->{KEPT} // 0 ),
321 6   50     60 ( $_flag_cnts_href->{$elem}->{REJECTED} // 0 ),
      50        
322             $flag_info,
323             ];
324             }
325            
326 3 100       23 return $return_list ? @output : tsv(\@output);
327             }
328            
329             =head2 get_footer( list => 0 )
330            
331             Get a footing section with explanatory notes about the output data
332             produced by detail and summary reports.
333            
334             =over 4
335            
336             =item argument: list =>
337            
338             If the arguments include the 'list' keyword and a true value, then a
339             list is returned rather than tab-separated lines of text. Defaults
340             to false.
341            
342             =back
343            
344             =cut
345            
346 3     3 1 5922 method get_footer ( %args ) {
  3         9  
  3         9  
  3         7  
347 3   66     16 my $return_list = $args{list} // $_return_list;
348            
349 3         8 my @output;
350            
351 3         9 push @output, 'Notes:';
352 3         9 push @output, ' * Data is obtained from the GHCN GHCN repository, specifically:';
353 3         12 push @output, $TAB . $GHCN_STN_LIST_URL;
354 3         11 push @output, $TAB . $GHCN_STN_INVEN_URL;
355 3         9 push @output, $TAB . $GHCN_DATA;
356 3         8 push @output, ' * Temperatures are in Celsius, precipitation in mm and snowfall/depth in cm.';
357 3         9 push @output, ' * TAVG is a daily average computed at each station; Tavg is the average of TMAX and TMIN.';
358 3         9 push @output, ' * Data is averaged at the daily level across multiple stations.';
359 3         9 push @output, ' * Data is summarized at the monthly or yearly level using different rules depending on the measure:';
360 3         10 push @output, $TAB . '- TMAX is aggregated by max(); TMIN is aggregated by min().';
361 3         9 push @output, $TAB . '- TAVG and Tavg are aggregated by average().';
362 3         24 push @output, $TAB . '- PRCP and SNOW are aggregated by sum().';
363 3         9 push @output, $TAB . '- SNWD is aggregated by max().';
364 3         7 push @output, ' * Decades begin on Jan 1 in calendar years ending in zero.';
365 3         6 push @output, ' * Seasonal decades/year/quarters begin Dec 1 of the previous calendar year.';
366 3         9 push @output, ' * Quality is the ratio of non-missing data days to days in the year, as a %\'tage.';
367            
368 3 100       22 return $return_list ? @output : tsv(\@output);
369             }
370            
371             =head2 get_hash_stats ( list => 0, no_header => 0 )
372            
373             Gets the hash sizes collected during the execution of StationTable
374             methods, notably load_stations and load_data, as tab-separated
375             lines of text.
376            
377             =over 4
378            
379             =item argument: list =>
380            
381             If the arguments include the 'list' keyword and a true value, then a
382             list is returned rather than tab-separated lines of text. Defaults
383             to false.
384            
385             =item argument: no_header =>
386            
387             If the arguments include the 'no_header' keyword and a true value,
388             then the return value will not include a header line. Default is
389             false.
390            
391             =back
392            
393             =cut
394            
395 4     4 1 2281 method get_hash_stats ( %args ) {
  4         9  
  4         10  
  4         6  
396 4   66     18 my $return_list = $args{list} // $_return_list;
397            
398 4         10 my @output;
399            
400 4         28 my @keys = sort keys %_hstats;
401            
402 4 100 66     30 if ( @keys and not $args{no_header} ) {
403 3         11 push @output, [ 'Hash sizes:' ];
404 3         12 push @output, [ qw(Hash Subject Iteration Size) ];
405             }
406            
407 4         16 foreach my $hash ( sort @keys ) {
408 16         53 foreach my $subject ( sort keys $_hstats{$hash}->%* ) {
409 20         63 foreach my $iter ( sort keys $_hstats{$hash}->{$subject}->%* ) {
410 20         51 my $sz = commify( $_hstats{$hash}->{$subject}->{$iter} );
411 20         80 push @output, [ $hash, $subject, $iter, $sz ];
412             }
413             }
414             }
415            
416 4 100       27 return $return_list ? @output : tsv(\@output);
417             }
418            
419             =head2 get_header ( list => 0 )
420            
421             The weather data obtained by the laod_data() method is essentially a
422             table. Which columns are returned depends on various options. For
423             example, if report => monthly is given, then the key columns will be
424             year and month -- no day. If the precip option is given, then
425             extra columns are included for precipitation values.
426            
427             This variabiliy makes it difficult for a consumer of these modules
428             to emit a heading that matches the underlying columns. The purpose of
429             this method is to return a set of column headings that will match
430             the data. The value returned is a tab-separated string.
431            
432             =over 4
433            
434             =item argument: list =>
435            
436             If the arguments include the 'list' keyword and a true value, then a
437             list is returned rather than tab-separated lines of text. Defaults
438             to false.
439            
440             =back
441            
442             =cut
443            
444 19     19 1 5882 method get_header ( %args ) {
  19         44  
  19         56  
  19         31  
445 19   66     83 my $return_list = $args{list} // $_return_list;
446            
447             # if this is a summary report, then alter the measure column labels
448 19 100       449 if ( not $Opt->report eq 'detail' ) {
449 4         103 foreach my $label ($_measures_obj->measures) {
450 12         35 $label =~ s{ \A TMAX }{TMAX max}xms;
451 12         33 $label =~ s{ \A TMIN }{TMIN min}xms;
452            
453 12         24 $label =~ s{ \A TAVG }{TAVG avg}xms;
454 12         29 $label =~ s{ \A Tavg }{Tavg avg}xms;
455            
456 12         25 $label =~ s{ \A PRCP }{PRCP sum}xms;
457 12         21 $label =~ s{ \A SNOW }{SNOW sum}xms;
458            
459 12         23 $label =~ s{ \A SNWD }{SNWD max}xms;
460            
461 12         29 $label =~ s{ \A A_ (\w+) }{$1 anom}xms;
462             }
463             }
464            
465 19   100     496 my $includes_month =
466             $Opt->report eq 'detail' ||
467             $Opt->report eq 'daily' ||
468             $Opt->report eq 'monthly';
469            
470            
471             # generate and print the header row
472 19         436 my @output;
473 19         44 push @output, 'Year';
474 19 100       67 push @output, 'Month' if $includes_month;
475 19 100 100     307 push @output, 'Day' if $Opt->report eq 'detail' or $Opt->report eq 'daily';
476 19         368 push @output, 'Decade';
477 19 100       54 push @output, 'S_Decade' if $includes_month;
478 19 100       55 push @output, 'S_Year' if $includes_month;
479 19 100       51 push @output, 'S_Qtr' if $includes_month;
480            
481 19         43 $_measure_begin_idx = @output;
482 19         74 push @output, $_measures_obj->measures;
483            
484 19 100       370 push @output, 'QFLAGS' if $Opt->report eq 'detail';
485 19 100       571 push @output, 'StationId' if $Opt->report eq 'detail';
486 19 100       505 push @output, 'Location' if $Opt->report eq 'detail';
487 19 100       487 push @output, 'StnIdx' if $Opt->report eq 'detail';
488 19 100       477 push @output, 'Grid' if $Opt->report eq 'detail';
489            
490 19 100       399 return $return_list ? @output : join $TAB, @output;
491             }
492            
493             =head2 get_missing_data_ranges( list => 0, no_header => 0 )
494            
495             Gets a list, by station id and year, of any months or day ranges
496             when data was found to be missing. Missing data can lead to incorrect
497             interpretation and can cause a station to be rejected if the percent
498             of found data does not meet the -quality threshold (normally 90%).
499            
500             Returns a heading line followed by lines of tab-separated strings.
501            
502             =over 4
503            
504             =item argument: list =>
505            
506             If the arguments include the 'list' keyword and a true value, then a
507             list of lists (stations containing years) is returned rather than
508             tab-separated lines of text. Defaults to false.
509            
510             =item argument: no_header =>
511            
512             If the arguments include the 'no_header' keyword and a true value,
513             then the return value will not include a header line. Default is
514             false.
515            
516             =item option: report
517            
518             Determines the number and content of heading values.
519            
520             =back
521            
522             =cut
523            
524 3     3 1 843 method get_missing_data_ranges ( %args ) {
  3         6  
  3         9  
  3         5  
525 3   66     16 my $return_list = $args{list} // $_return_list;
526            
527 3         6 my @output;
528            
529             push @output, [ 'StnId', 'Year', 'Quality%', 'Missing months and days']
530 3 50       18 unless $args{no_header};
531            
532 3         19 foreach my $stnid ( sort keys $_missing_href->%* ) {
533 5         13 my $stnobj = $_station{$stnid};
534 5 50       21 next if $stnobj->error_count > 0;
535 5         13 my $yyyy_href = $_missing_href->{$stnid};
536            
537 5         23 foreach my $yyyy ( sort keys $yyyy_href->%* ) {
538 9         18 my $gaps_href = $yyyy_href->{$yyyy};
539 9         24 foreach my $key ( keys $gaps_href->%* ) {
540 9         14 my $gap_text = $key;
541 9         16 my $quality_pct = $gaps_href->{$key};
542 9         34 push @output, [ $stnid, $yyyy, $quality_pct, $gap_text ];
543             }
544             }
545             }
546            
547 3 100       20 return $return_list ? @output : tsv(\@output);
548             }
549            
550             =head2 datarow_as_hash ( $row_aref )
551            
552             This is a convenience method that may be used to convert table rows
553             returned by the row_sub callback subroutine of load_data from a perl
554             list into a hash. It automatically calls get_header to get the
555             headers for the table data. When you pass it a reference to a data
556             row (obtained vis the row_sub callback routine given to load_data)
557             it combines the elements of the data row list with the column headings
558             and returns a hash.
559            
560             =cut
561            
562 9     9 1 1743 method datarow_as_hash ( $row_aref ) {
  9         15  
  9         15  
  9         14  
563 9         28 my @header = $self->get_header( list => 1 );
564 9         17 my %h;
565            
566             ## no critic [ProhibitDoubleSigils]
567 9         93 @h{@header} = $row_aref->@*;
568            
569 9         123 return %h;
570             }
571            
572             =head2 get_missing_rows( list => 0 )
573            
574             In support of a -nogaps option, to generate detail output that does
575             not have any gaps due to missing data, this method gets a list of
576             rows for the months and days that had missing data for a given
577             station id in a given year.
578            
579             Returns lines of tab-separated strings.
580            
581             =over 4
582            
583             =item argument: list =>
584            
585             If the arguments include the 'list' keyword and a true value, then a
586             list is returned rather than tab-separated lines of text. Defaults
587             to false.
588            
589             =item option: nogaps
590            
591             Emits extra rows after the detail data rows to make up for missing
592             months or days. This is primarily so that if the data is charted
593             by date, then the x-axis will have all the dates from start to finish.
594             Otherwise, the chart and any trends that are projected on it will
595             be distorted by the missing data.
596            
597             =back
598            
599             =cut
600            
601 2     2 1 2810 method get_missing_rows ( %args ) {
  2         6  
  2         6  
  2         3  
602 2   33     9 my $return_list = $args{list} // $_return_list;
603            
604 2         5 my @output;
605            
606             my %loc;
607 2         8 map { $loc{$_} = $_station{$_}->name } keys %_station;
  4         16  
608            
609             ## no critic [ProhibitDoubleSigils]
610             ## no critic [ProhibitMagicNumbers]
611 2         12 foreach my $stnid ( sort keys $_missing_href->%* ) {
612 4         9 my $yyyy_href = $_missing_href->{$stnid};
613 4         13 foreach my $yyyy ( sort keys $yyyy_href->%* ) {
614 8         18 my $values_href = $yyyy_href->{$yyyy};
615 8         53 foreach my $v ( keys $values_href->%* ) {
616 8         22 my ($months_aref, $mmdd_aref) = _parse_missing_text($v);
617 8         22 foreach my $mm ( $months_aref->@* ) {
618 0         0 my $ndays = _days_in_month($yyyy, $mm);
619 0         0 foreach my $day (1..$ndays) {
620 0         0 push @output, [ $yyyy, $mm, $day, ($EMPTY) x 8, $stnid, $loc{$stnid} ];
621             }
622             }
623 8         15 foreach my $mmdd_aref ( $mmdd_aref->@* ) {
624 98         151 my ($mm,$dd) = $mmdd_aref->@*;
625 98         365 push @output, [ $yyyy, $mm, $dd, ($EMPTY) x 8, $stnid, $loc{$stnid} ];
626             }
627             }
628             }
629             }
630            
631 2 100       16 return $return_list ? @output : tsv(\@output);
632             }
633            
634             =head2 get_options ( list => 0, no_header => 0 )
635            
636             Get text which shows the options that were in effect for this
637             processing run, in a Getopt style. Includes a heading and a
638             footing with explanatory notes. If argument 'list' is true, returns
639             the lines as a list. Line [1] contains the options string.
640            
641             =over 4
642            
643             =item argument: list =>
644            
645             If the arguments include the 'list' keyword and a true value, then a
646             list is returned rather than tab-separated lines of text. Defaults
647             to false.
648            
649             =item argument: no_header =>
650            
651             If the arguments include the 'no_header' keyword and a true value,
652             then the return value will not include a header line or the explanatory
653             footing notes. Default is false.
654            
655             =back
656            
657             =cut
658            
659 5     5 1 3348 method get_options ( %args ) {
  5         13  
  5         14  
  5         8  
660 5   66     31 my $return_list = $args{list} // $_return_list;
661            
662 5         10 my @output;
663            
664             push @output, 'Options:'
665 5 50       26 unless $args{no_header};
666            
667 5         34 push @output, $TAB . $_ghcn_opt_obj->options_as_string;
668            
669 5 50       22 if ( not $args{no_header} ) {
670 5         14 push @output, $EMPTY;
671 5         19 push @output, $TAB . 'Note that quality is a percentage; radius in km';
672             }
673            
674 5 100       32 return $return_list ? @output : tsv(\@output);
675             }
676            
677             =head2 get_stations ( list => 0, kept => 1, no_header => 0 )
678            
679             Return lines of text with tab-separated columns describing each of
680             the stations for stations that were found to meet the filtering
681             criteria specified in the user options.
682            
683             =over 4
684            
685             =item argument: kept =>
686            
687             If the argument kept => 0 is specified, and load_data has already
688             been invoked, then the stations which were rejected due to quality flags
689             or missing data will be returned. If kept => 1 is specified, then
690             the stations that were kept will be returned.
691            
692             =item argument: list =>
693            
694             If the arguments include the 'list' keyword and a true value, then a
695             list is returned rather than tab-separated lines of text. Defaults
696             to false.
697            
698             =item argument: no_header =>
699            
700             If the arguments include the 'no_header' keyword and a true value,
701             then the return value will not include a header line. Default is
702             false.
703            
704             =back
705            
706             =cut
707            
708 16     16 1 125 method get_stations ( %args ) {
  16         38  
  16         74  
  16         33  
709 16   66     80 my $return_list = $args{list} // $_return_list;
710 16         35 my $kept = $args{kept};
711 16         42 my $no_header = $args{no_header};
712            
713 16         37 my @output;
714            
715             # no stations, so just return empty
716 16 50       68 return $return_list ? @output : tsv(\@output)
    100          
717             if $self->stn_filtered_count == 0;
718            
719 15 100       69 push @output, scalar Weather::GHCN::Station::Headings
720             unless $no_header;
721            
722 15         37 my $ii = 0;
723 15         1413 foreach my $id ( sort keys %_station ) {
724 1833         5055 my $stn = $_station{$id};
725 1833         2481 $ii++;
726 1833 100 100     5442 if (not defined $kept
      100        
      100        
      100        
727             or $kept and $stn->error_count == 0
728             or not $kept and $stn->error_count > 0)
729             {
730 50         153 push @output, scalar $stn->row;
731             }
732             }
733            
734 15 50       226 croak '*E* get_stations called before load_stations'
735             if $ii == 0;
736            
737 15 100       119 return $return_list ? @output : tsv(\@output);
738             }
739            
740             =head2 get_station_note_list ()
741            
742             Return a list consisting of tab-separated code/description pairs that
743             rejected stations were flagged with; i.e. the reasons for their
744             rejection.
745            
746             =cut
747            
748 2     2 1 849 method get_station_note_list () {
  2         5  
  2         4  
749 2         7 my @stn_notes;
750            
751 2         9 my $notes_nrs = rng_new();
752            
753 2         17 foreach my $id ( sort keys %_station ) {
754 13         1228 my $stn = $_station{$id};
755             # combine all the notes
756 13         40 $notes_nrs->add( $stn->note_nrs->as_array );
757             }
758            
759 2 50       235 if ( not $notes_nrs->is_empty ) {
760 2         22 foreach my $note_code ($notes_nrs->as_array) {
761 4         116 push @stn_notes, join $TAB, $note_code, $NoteMsg{$note_code};
762             }
763             }
764            
765 2         18 return @stn_notes;
766             }
767            
768             =head2 get_summary_data ( list => 0 )
769            
770             Gets a list of summarized the temperature or precipitation data
771             by day, month or year depending on the report option.
772            
773             Returns undef if the report option is 'detail'.
774            
775             The actual columns that are returned is dictated by the report option
776             and by the tavg and precip options provided when the object was
777             instantiated by new().
778            
779             =over 4
780            
781             =item argument: list =>
782            
783             If the arguments include the 'list' keyword and a true value, then a
784             list is returned rather than tab-separated lines of text. Defaults
785             to false.
786            
787             =item option: report
788            
789             Determines the level of summarization.
790            
791             =item option: range
792            
793             If the range option is provided, the output rows are restricted to
794             those years that are within the specified range(s).
795            
796             =back
797            
798             =cut
799            
800 6     6 1 85 method get_summary_data ( %args ) {
  6         19  
  6         17  
  6         11  
801 6   66     38 my $return_list = $args{list} // $_return_list;
802            
803             # when an 'detail' report is requested, we generate detail data only
804             # so there is no summary data to print
805 6 50       150 return if $Opt->report eq 'detail';
806            
807 6         108 my @output;
808            
809             # build hash of measure names and indices so measures can be
810             # inserted into the correct columns
811             my %measure_idx;
812 6         15 my $ii = 0;
813 6         47 foreach my $m ( $_measures_obj->measures ) {
814 21         55 $measure_idx{$m} = $_measure_begin_idx + $ii++;
815             }
816            
817 6         137 my $opt_range_nrs = rng_new($Opt->range);
818            
819             # generate and print the data rows
820 6         270 foreach my $key ( sort keys $_aggregate_href->%* ) {
821 433         1779 my ($year, $month, $day) = unpack 'A4 A2 A2', $key;
822            
823 433 50 33     8581 next if $Opt->range and not $opt_range_nrs->contains($year);
824            
825 433         14519 my $row = $_aggregate_href->{$key};
826            
827 433         626 my @row;
828 433         924 push @row, $year;
829 433 100 100     7286 push @row, $month if $Opt->report eq 'daily' or $Opt->report eq 'monthly';
830 433 100       11727 push @row, $day if $Opt->report eq 'daily';
831 433         4698 push @row, int($year / 10) * 10; ## no critic [ProhibitMagicNumbers]
832 433 100       6660 if ( not $Opt->report eq 'yearly' ) {
833 431         4444 push @row, _seasonal_decade($year, $month);
834 431         891 push @row, _seasonal_year($year, $month);
835 431         881 push @row, _seasonal_qtr($year, $month);
836             }
837            
838 433         1207 foreach my $elem ( $_measures_obj->measures ) {
839 1302         3178 my $sum = $row->{$elem}->[0];
840 1302         2069 my $cnt = $row->{$elem}->[1];
841 1302 100 50     6635 $row[ $measure_idx{$elem} ] = $cnt ? sprintf '%.2f', ($sum // 0) / $cnt : $EMPTY;
842             }
843            
844 433         1319 push @output, \@row;
845             }
846            
847 6 100       159 return $return_list ? @output : tsv(\@output);
848             }
849            
850            
851             =head2 get_timing_stats ( list => 0 )
852            
853             Get a list of the timers, with durations and notes, in alphabetical
854             order by timer label.
855            
856             =over 4
857            
858             =item argument: list =>
859            
860             If the arguments include the 'list' keyword and a true value, then a
861             list is returned rather than tab-separated lines of text. Defaults
862             to false.
863            
864             =back
865            
866             =cut
867            
868 3     3 1 795 method get_timing_stats ( %args ) {
  3         7  
  3         9  
  3         5  
869 3   66     17 my $return_list = $args{list} // $_return_list;
870            
871 3         8 my @output;
872            
873 3         18 foreach my $k ( $_tstats->get_timers() ) {
874 38   66     107 my $note = $_tstats->get_note($k) // $EMPTY;
875             ## no critic [ProhibitMagicNumbers]
876 38         90 my $dur = sprintf '%.1f', $_tstats->get_duration($k) * 1000.0;
877 38         82 my $label = $k . $SPACE . $note;
878             push @output, $args{rows_as_tsv}
879 38 50       131 ? [ $dur, $label ]
880             : join $TAB, $dur, $label;
881             }
882            
883 3 100       25 return $return_list ? @output : tsv(\@output);
884             }
885            
886             =head2 has_missing_data ()
887            
888             Returns true if any missing data was detected amongst the stations
889             that were processed. The calling script can use this to decide
890             whether to issue a warning to the user. A list of missing data
891             specifics can be sent to the output by calling method
892             get_missing_data_ranges.
893            
894             =cut
895            
896 4     4 1 23 method has_missing_data () {
  4         11  
  4         9  
897 4         19 my $keycount = ( keys $_missing_href->%* );
898 4 100       175 return $keycount ? $TRUE : $FALSE;
899             }
900            
901             =head2 load_data ( progress_sub => undef, row_sub => sub { say @_ } )
902            
903             Load the daily weather data for each of the stations that are were
904             loaded into the collection. Print the data if option report detail is
905             given. Otherwise cache the data so it can be aggregated at a later
906             step.
907            
908             =over 4
909            
910             =item argument: progress_sub => undef
911            
912             As fetching and parsing each daily data page can take some time, an
913             optional callback hook is provided so the caller can emit a
914             progress message before each station's data is loaded; e.g.
915             progress => sub{ say {STDERR} @_ }.
916            
917             =item argument: row_sub => sub { say @_ }
918            
919             Optional callback hook to allow the caller to provide their own
920             subroutine for printing (or collecting in a list, or both) the
921             row-level station data that is fetched when the report option is 'detail'.
922             Defaults to printing via the 'say' operator.
923            
924             =item option: report
925            
926             When report detail is specified, the weather data for each station is
927             printed immediately (via the row_sub callback hook).
928            
929             For all other report options, the data is fetched from each station
930             and kept in a cache so that it can be aggregated by invoking
931             summarize_data(). The row_sub hook is not invoked.
932            
933             =back
934            
935             =cut
936            
937 12     12 1 7494 method load_data ( %args ) {
  12         30  
  12         45  
  12         29  
938 12         34 my $progress_callback = $args{progress_sub};
939 12         38 my $row_callback = $args{row_sub};
940            
941             my @station_objs =
942 3         24 sort { $a->id cmp $b->id }
943 12         50 grep { $_->error_count == 0 } values %_station;
  15         62  
944            
945 12         43 my $stn_count = @station_objs;
946            
947 12         34 my $ii = 0;
948 12         39 foreach my $stn ( @station_objs ) {
949 15         64 my $daily_url = $GHCN_DATA . $stn->id . '.dly';
950 15         84 my $content = $self->_fetch_url($daily_url, 'URI::Fetch_daily');
951            
952 15 100       72 if ($progress_callback) {
953 4     4   40 no strict 'refs'; ## no critic [ProhibitNoStrict]
  4         11  
  4         62126  
954 4         20 my $msg = sprintf 'processing station %d/%d %s %s', ++$ii, $stn_count, $stn->id, $stn->name;;
955 4         24 $progress_callback->($msg);
956             }
957            
958 15 50       73 if (not $content) {
959 0         0 $stn->add_note($ERR_FETCH, $stn->id . $TAB . 'fetch daily URL failed');
960 0         0 next;
961             }
962            
963 15         79 my $insufficient_quality = $self->_load_daily_data($stn, $content);
964            
965 15 100       347 if ( $Opt->report eq 'detail' ) {
966 4         86 $self->_print_detail_data( $_measure_begin_idx, $stn, $row_callback );
967             } else {
968 11 100       239 $self->_aggregate_station_data($stn)
969             unless $insufficient_quality;
970             }
971            
972 15         111 $self->_clear_daily_data();
973            
974 15         122 $self->_capture_data_hash_stats($stn->id, $ii);
975             }
976            
977 12         73 return;
978             }
979            
980             =head2 load_stations ( content => undef )
981            
982             Read the GHCN stations list and the stations inventory list and create
983             a hash of Station objects, keyed on station id, filtered according
984             to the options provided in set_options().
985            
986             Returns a hash of Weather::GHCN::Station objects, keyed on station id.
987            
988             =over 4
989            
990             =item argument: content => undef
991            
992             Normally, load_stations will fetch the stations list from the cache,
993             or if not in the cache (or its stale) then from the GHCN web repository.
994             However, an API caller might want to obtain the station text by some
995             other means, in which case it can use the optional 'content' keyword
996             argument to just pass in a scalar containing the station text.
997            
998             =item option: country
999            
1000             Selects only those stations that match the 2-digit GEC (formerly
1001             FIPS) country code or that uniquely match the name or partial name
1002             given in .
1003            
1004             =item option: state
1005            
1006             Selects only those stations that match a US state or Canadian provinc
1007             code.
1008            
1009             =item option: location
1010            
1011             Selects only those stations with a name that matches the specified
1012             pattern, which can be either a station id, or a comma-separated
1013             list of station id's, or a regex. If a regex, then it is anchored
1014             on the left and whitespace is NOT ignored.
1015            
1016             =item option: gps
1017            
1018             This option selects stations within a certain radius of the designated
1019             latitude and longitude, expressed as positive and negative numbers
1020             (not using N, S, W, E designators).
1021            
1022             =item option: radius
1023            
1024             In conjunction the gps options, determines the radius in kilometers
1025             for the search area. Defaults to 25 km.
1026            
1027             =item option: gsn
1028            
1029             Select only GCOS Surface Network stations, which is a baseline
1030             network comprising a subset of about 1000 stations chosen mainly to
1031             give a fairly uniform spatial coverage from places where there is a
1032             good length and quality of data record. See
1033             L
1034             cos-surface-network-gsn-program-overview>
1035            
1036             =back
1037            
1038             =cut
1039            
1040 20     20 1 14182 method load_stations (%args) {
  20         49  
  20         53  
  20         40  
1041            
1042 20   66     155 my $content = $args{content} // $self->_fetch_url( $GHCN_STN_LIST_URL, 'URI::Fetch_stn');
1043            
1044 20 50       24843 if ( $content =~ m{(.*?)}xms ) {
1045 0         0 croak '*E* unable to fetch data from ' . $GHCN_STN_LIST_URL . ': ' . $1;
1046             }
1047            
1048             ## no critic [InputOutput::RequireBriefOpen]
1049 20 50   3   348754 open my $stn_fh, '<', \$content
  3         42  
  3         7  
  3         33  
1050             or croak '*E* unable to open stations_content string';
1051            
1052 20         44834 $_tstats->start('Parse_stn');
1053            
1054 20         46 my $is_stnid_filter;
1055 20 100       101 $is_stnid_filter = keys %{ $_stnid_filter_href }
  4         53  
1056             if $_stnid_filter_href;
1057            
1058 20         56 my %stnidx;
1059            
1060             # Scan the station table
1061             # - filtering on country, state, location and GIS distance according to options
1062 20         384 while ( my $line = <$stn_fh> ) {
1063 2440940         13124574 $_stn_count++; # increment the stn count in the object
1064            
1065             # |--- 0---|--- 10---|--- 20---|--- 30---|--- 40---|--- 50---|--- 60---
1066             # |123456789|123456789|123456789|123456789|123456789|123456789|123456789
1067             # (stationid).(latitu).(longitu).(elev).st.--name-----------------------
1068             # ACW00011604 17.1167 -61.7833 10.1 ST JOHNS COOLIDGE FLD
1069            
1070             ## no critic [ProhibitMagicNumbers]
1071 2440940         4321871 my $id = substr $line, 0, 11;
1072            
1073 2440940 100 100     5587833 next if $is_stnid_filter and not $_stnid_filter_href->{$id};
1074            
1075 1952762         4976443 my $lat = 0 + substr $line, 12, 8; # coerce to number
1076 1952762         3935013 my $long = 0 + substr $line, 21, 9; # coerce to number
1077 1952762         3572221 my $elev = 0 + substr $line, 31, 6; # coerce to number
1078 1952762         3134726 my $state = substr $line, 38, 2;
1079 1952762         3226756 my $name = substr $line, 41, 30;
1080 1952762         2896587 my $gsn_flag = substr $line, 72, 3;
1081             # my $hcr_crn_flag = substr $line, 76, 3;
1082             # my $wmo_id = substr $line, 80, 5;
1083            
1084 1952762         2804679 my $country = substr $id, 0, 2;
1085 1952762         6986273 $name =~ s{ \s+ \Z }{}xms;
1086            
1087 1952762 100       4612474 my $gsn = $gsn_flag eq 'GSN' ? 'GSN' : $EMPTY;
1088            
1089             ## use critic [ProhibitMagicNumbers]
1090            
1091 1952762 100       3821687 if (not $is_stnid_filter) {
1092             ## no critic [RequireExtendedFormatting]
1093 1952752         33051271 my $opt_country = $Opt->country;
1094 1952752 100 100     45447346 next if $Opt->country and $country !~ m{\A$opt_country}msi;
1095            
1096 1685072         40424899 my $opt_state = $Opt->state;
1097 1685072 100 100     38095332 next if $Opt->state and $state !~ m{\A$opt_state}msi;
1098            
1099             ## use critic [RequireExtendedFormatting]
1100 1351232 100 100     31107314 next if $Opt->location and not _match_location($id, $name, $Opt->location);
1101            
1102 123845 100       2964161 if ( $Opt->gps ) {
1103 122047         2870810 my ($opt_lat, $opt_long) = split m{[,;\s]}xms, $Opt->gps;
1104 122047         1454778 my $distance = _gis_distance($opt_lat, $opt_long, $lat, $long);
1105 122047 100       2231033 next if $distance > $Opt->radius;
1106             }
1107            
1108 1818 50 33     43757 next if $Opt->gsn and not $gsn;
1109             }
1110            
1111 1828         28857 $_station{$id} = Weather::GHCN::Station->new(
1112             id => $id,
1113             country => $country,
1114             state => $state,
1115             active => $EMPTY,
1116             lat => $lat,
1117             long => $long,
1118             elev => $elev,
1119             name => $name,
1120             gsn => $gsn
1121             );
1122            
1123 1828         6063 $stnidx{$_station{$id}->coordinates}++;
1124             }
1125 20 50       298 close $stn_fh or croak '*E* unable to close stations_content string';
1126            
1127 20         201 $_tstats->stop('Parse_stn');
1128            
1129 20         123 $_stn_selected_count = keys %_station;
1130            
1131             # assign a unique index to each station with matching coordinates
1132 20         66 my $ii = 0;
1133 20         1434 foreach my $coord (sort keys %stnidx) {
1134 1808         2997 $stnidx{$coord} = ++$ii;
1135             }
1136            
1137 20         1396 foreach my $stnid ( sort keys %_station ) {
1138 1828         3484 my $stn = $_station{$stnid};
1139 1828         3973 $stn->idx = $stnidx{$stn->coordinates};
1140             }
1141            
1142 20         279 $_stn_filtered_count = $self->_load_station_inventories();
1143            
1144 20         892 return \%_station;
1145             }
1146            
1147             =head2 report_kml( list => 0 )
1148            
1149             Output the coordinates of the station collection in KML format, for
1150             import into Google Earth as placemarks. The active range of each
1151             station will be included as timespans so that you can view the
1152             placemarks across time.
1153            
1154             =over 4
1155            
1156             =item argument: list
1157            
1158             If the argument list contains the 'list' keyword and a true value,
1159             then a perl list is returned. Otherwise, a string consisting of lines
1160             of text is returned.
1161            
1162             =item option: kml
1163            
1164             Print KML on stdout.
1165            
1166             =item option: kmlcolor
1167            
1168             A color name, one of blue, green, azure, purple, red, white or yellow.
1169             Only the first character is recognized, so 'b' and 'bob' both result
1170             in blue. All colors are given an opacity of 50 (the range is 00 to ff).
1171            
1172             =back
1173            
1174             =cut
1175            
1176 2     2 1 1168 method report_kml ( %arg ) {
  2         4  
  2         7  
  2         4  
1177 2   66     28 my $return_list = $arg{list} // $_return_list;
1178            
1179 2         98 my $kml_color = _get_kml_color( $Opt->kmlcolor );
1180 2         5 my @output;
1181            
1182 2         8 push @output, '';
1183 2         6 push @output, '';
1184 2         5 push @output, '';
1185 2         8 push @output, ' ';
1194            
1195 2         11 foreach my $stn ( values %_station ) {
1196 13 100       34 next if $stn->error_count;
1197             # TODO: use ->sets to get a list of spans and use the first span instead of splitting run_list
1198 4         20 my ($start, $end) = split m{ [-] }xms, $stn->active;
1199 4   33     28 $end //= $start;
1200            
1201 4         16 my $desc = $stn->description();
1202            
1203 4         11 push @output, ' ';
1204 4         21 push @output, ' #mypin';
1205 4         110 push @output, sprintf ' %s', encode_entities($stn->name);
1206 4         98 push @output, sprintf ' %s', encode_entities($desc);
1207 4         66 push @output, ' ';
1208 4         14 push @output, sprintf ' %s-01-01T00:00:00Z', $start;
1209 4         62 push @output, sprintf ' %s-12-31T23:59:59Z', $end;
1210 4         11 push @output, ' ';
1211 4         18 push @output, sprintf ' %f, %f, %f', $stn->long, $stn->lat, $stn->elev;
1212 4         15 push @output, ' ';
1213             }
1214            
1215 2         7 push @output, '';
1216 2         4 push @output, '';
1217            
1218 2 100       24 return $return_list ? @output : tsv(\@output);
1219             }
1220            
1221             =head2 report_urls( list => 0, curl => 0 )
1222            
1223             Output the URL of the .dly (daily weather data) file for each of the
1224             stations that meet the selection criteria.
1225            
1226             =over 4
1227            
1228             =item argument: list
1229            
1230             If the argument list contains the 'list' keyword and a true value,
1231             then a perl list is returned. Otherwise, a string consisting of lines
1232             of text is returned.
1233            
1234             =item argument: curl
1235            
1236             If the argument list contains the 'curl' keyword and a true value,
1237             then the output will be a set of lines that can be saved in a file
1238             for subsequent input to the B program using the B<-K> option.
1239             This facilitates bulk fetching of .dly files into the cache.
1240            
1241             =back
1242            
1243             =cut
1244            
1245 0     0 1 0 method report_urls ( %arg ) {
  0         0  
  0         0  
  0         0  
1246 0   0     0 my $return_list = $arg{list} // $_return_list;
1247            
1248 0         0 my @output;
1249            
1250             push @output, '# Use curl -K to download these URL\'s'
1251 0 0       0 if $arg{curl};
1252            
1253 0         0 foreach my $stn ( values %_station ) {
1254 0 0       0 next if $stn->error_count;
1255             # TODO: use ->sets to get a list of spans and use the first span instead of splitting run_list
1256 0         0 my ($start, $end) = split m{ [-] }xms, $stn->active;
1257            
1258 0 0       0 if ( $arg{curl} ) {
1259 0         0 my @parts = split m{ [/] }xms, $stn->url;
1260 0         0 push @output, 'output = ' . $parts[-1];
1261 0         0 push @output, 'url = ' . $stn->url;
1262             } else {
1263 0         0 push @output, $stn->url;
1264             }
1265             }
1266            
1267 0 0       0 return $return_list ? @output : tsv(\@output);
1268             }
1269            
1270             =head2 ($opt, @errors) = set_options ( %args )
1271            
1272             Set various options for this StationTable instance. These options
1273             will affect the processing and output by subsequent method calls.
1274            
1275             Returns an Option object and a list of errors. It is advised you
1276             check @errors after calling set_options to report the errors and to
1277             cease processing if there are any; e.g. I.
1278            
1279             You may want to set up a file-scoped lexical variable to hold the
1280             options object. That way it is accessible throughout your code.
1281             The typical calling pattern would look like this:
1282            
1283             my $Opt; # a file-scope lexical
1284            
1285             sub run (@ARGV) {
1286             my $ghcn = Weather::GHCN::StationTable->new;
1287            
1288             my @errors;
1289             ($Opt, @errors) = set_options(...);
1290             die @errors if @errors;
1291             ...
1292             }
1293            
1294             =over 4
1295            
1296             =item timing_stats => $TimingStats_obj
1297            
1298             This optional argument should point to a TimingStats object that was
1299             created by the caller and will be used to collect timing statistics.
1300            
1301             =item hash_stats => \%hash_stats
1302            
1303             This optional argument should be a reference to a hash that was
1304             created by the caller and will be used to collect performance and
1305             memory statistics.
1306            
1307             =back
1308            
1309             =cut
1310            
1311 23     23 1 9539 method set_options (%user_options) {
  23         54  
  23         131  
  23         44  
1312            
1313 23 100       126 if ( $user_options{'profile'} ) {
1314             # save the expanded profile file path in the object
1315 8         43 $_profile_file = path( $user_options{'profile'} )->absolute()->stringify;
1316 8         701 $_profile_href = Weather::GHCN::Options->get_profile_options($_profile_file);
1317             }
1318            
1319 23   66     283 $_ghcn_opt_obj //= Weather::GHCN::Options->new();
1320             # combine user-specified options with the defaults
1321 23         133 ($_opt_href, $_opt_obj) = $_ghcn_opt_obj->combine_options(\%user_options, $_profile_href);
1322            
1323 23 50       129 if ( $_opt_href->{'cachedir'} ) {
1324 23         146 $_cachedir = path( $_opt_href->{'cachedir'} )->absolute()->stringify;
1325 23         2873 $_cache_obj = Weather::GHCN::CacheURI->new($_cachedir, $_opt_obj->refresh);
1326             } else {
1327 0         0 $_cache_obj = Weather::GHCN::CacheURI->new($EMPTY, $_opt_obj->refresh);
1328             }
1329            
1330            
1331 23         149 my @errors = $_ghcn_opt_obj->validate();
1332            
1333 23 100 100     160 if ( defined $_opt_href->{'aliases'} and defined $_opt_href->{'location'} ) {
1334             # if the location matches an aliases, then pull the list of
1335             # stations from the alias definition and assign it to the
1336             # $_stnid_filter
1337 6   66     36 my $stnid_string = $_opt_href->{'aliases'}->{ $_opt_href->{'location'} } // $EMPTY;
1338 6 100       18 if ($stnid_string) {
1339 4         26 my @stns = split m{ [,;\s] }xms, $stnid_string;
1340 4         12 my %stnid_filter;
1341 4         13 foreach my $stnid (@stns) {
1342 8         24 $stnid_filter{$stnid} = $TRUE;
1343             }
1344 4         12 $_stnid_filter_href = \%stnid_filter;
1345 4         12 $_opt_href->{'location'} = undef;
1346             }
1347             }
1348            
1349             # update the combined options hash in the Options object
1350 23         122 $_ghcn_opt_obj->opt_href = $_opt_href;
1351             # update the combined options object in the Options object
1352 23         96 $_ghcn_opt_obj->opt_obj = $_opt_obj;
1353             # save the combined option object in a file-scoped lexical for use throughout this code
1354 23         125 $Opt = $_opt_obj;
1355            
1356 23         243 $_measures_obj = Weather::GHCN::Measures->new($_opt_href);
1357            
1358 23         136 return ($_opt_obj, @errors);
1359             }
1360            
1361             =head2 summarize_data ()
1362            
1363             Aggregate the daily weather data for the stations that were loaded,
1364             according to the report option.
1365            
1366             =over 4
1367            
1368             =item option: report => 'daily|monthly|yearly'
1369            
1370             When the report option is 'detail', no summarization is needed and the
1371             method immediately returns undef.
1372            
1373             =back
1374            
1375            
1376             =cut
1377            
1378 8     8 1 61 method summarize_data () {
  8         20  
  8         19  
1379            
1380             # when an 'detail' report is requested, we generate detail data only
1381             # so there is no need to summarize data.
1382 8 50       197 return if $Opt->report eq 'detail';
1383            
1384             # We'll be replacing $_aggregate_href with this hash after we're
1385             # done, but we can't loop over $_aggregate_href and be changing
1386             # it within the loop. Hence the need for another hash.
1387 8         145 my %summary;
1388            
1389 8         44 $_tstats->start('Summarize_data');
1390            
1391 8         58 while ( my ($k,$href) = each $_aggregate_href->%* ) {
1392             ## no critic [ProhibitMagicNumbers]
1393 3311         6866 my $year = substr $k, 0, 4;
1394 3311         5361 my $month = substr $k, 4, 2;
1395 3311         5019 my $day = substr $k, 6, 2;
1396             ## use critic [ProhibitMagicNumbers]
1397            
1398 3311         4784 my $key = $year;
1399 3311 100 100     64006 $key .= $month if $Opt->report eq 'monthly' or $Opt->report eq 'daily';
1400 3311 100       134298 $key .= $day if $Opt->report eq 'daily';
1401            
1402 3311         38207 foreach my $elem ( keys $href->%* ) {
1403 9989         17939 my $a = $_aggregate_href->{$k}->{$elem};
1404            
1405 9989         20840 my $v = _ddivide( $a->[0], $a->[1] );
1406            
1407 9989   100     24639 my $s = $summary{$key}{$elem} //= [undef, undef];
1408            
1409 9989 100 66     30848 if ($elem eq 'TMIN') {
    100 100        
    100          
1410             # for TMIN we keep the minimum value
1411 3270         5834 $s->[0] = _dmin($s->[0], $v);
1412 3270         9016 $s->[1] = 1.0;
1413             }
1414             elsif ( $elem eq 'TMAX' or $elem eq 'SNWD' ) {
1415             # For TMAX SNOW SNWD PRCP we keep the maximum value
1416 3293         5842 $s->[0] = _dmax($s->[0], $v);
1417 3293         10344 $s->[1] = 1.0;
1418             }
1419             elsif ( $elem eq 'PRCP' or $elem eq 'SNOW' ) {
1420             # for PRCP and SNOW, sum and use a count of 1 so we sum across time
1421 174         332 $s->[0] = _dsum($s->[0], $v);
1422 174         403 $s->[1] = 1.0;
1423             }
1424             else {
1425             # for TAVG and Tavg and the anomaly values we keep the sum and count,
1426             # so we can calculate an average of them across time
1427 3252         6336 $s->[0] = _dsum($s->[0], $v);
1428 3252         6087 $s->[1] = _dcount($s->[1], $v);
1429             }
1430             }
1431             }
1432            
1433 8         214 $_tstats->stop('Summarize_data', _memsize(\%summary, $Opt->performance));
1434            
1435 8         3404 $_aggregate_href = \%summary;
1436            
1437 8         141 return;
1438             }
1439            
1440             =head2 tstats ()
1441            
1442             Provides access to the TimingStats object so the caller can start
1443             and stop script-level timers.
1444            
1445             =cut
1446            
1447 18     18 1 824 method tstats () {
  18         36  
  18         37  
1448 18         90 return $_tstats;
1449             }
1450            
1451             =head2 DOES
1452            
1453             Defined by Object::Pad. Included for POD::Coverage.
1454            
1455             =head2 META
1456            
1457             Defined by Object::Pad. Included for POD::Coverage.
1458            
1459             =cut
1460            
1461             #---------------------------------------------------------------------
1462             # Private Methods
1463             #---------------------------------------------------------------------
1464            
1465 10     10   44 method _aggregate_station_data ($stn) {
  10         25  
  10         26  
  10         25  
1466            
1467 10         49 $_tstats->start('Aggregate_station_data');
1468            
1469            
1470 10         87 while ( my ($yyyymmdd,$href) = each $_daily_href->%* ) {
1471 3997         10944 while ( my ($elem,$v) = each $href->%* ) {
1472             # autovivify the slot, initialize the array, and get a reference to the
1473             # slot to avoid the overhead of multiple key lookups later
1474 11953   100     39100 my $slot_href = $_aggregate_href->{$yyyymmdd}->{$elem} //= [undef, undef];
1475            
1476             # We accumulate into two array elements. The first contains
1477             # the sum and the second the count so we can compute avg = sum/count.
1478             # _dsum and _dcount return the second arg if the first arg is undef
1479             # otherwise, _dsum returns the sum of the two args and _dcount returns the
1480             # value of the first arg incremented by 1
1481 11953         20981 $slot_href->[0] = _dsum($slot_href->[0], $v);
1482 11953         19000 $slot_href->[1] = _dcount($slot_href->[1], $v);
1483             }
1484             }
1485            
1486 10         466 $_tstats->stop('Aggregate_station_data', _memsize($_aggregate_href, $Opt->performance));
1487            
1488 10         34 return;
1489             }
1490            
1491 15     15   64 method _capture_data_hash_stats ($subject, $iter) {
  15         33  
  15         42  
  15         35  
  15         34  
1492            
1493 15         4461 $_hstats{'aggregate'}{$subject}{$iter} = Devel::Size::total_size( $_aggregate_href );
1494 15         307 $_hstats{'flag_cnts'}{$subject}{$iter} = Devel::Size::total_size( $_flag_cnts_href );
1495 15         174 $_hstats{'daily'}{$subject}{$iter} = Devel::Size::total_size( $_daily_href );
1496 15         159 $_hstats{'baseline'}{$subject}{$iter} = Devel::Size::total_size( $_baseline_href );
1497            
1498 15         86 return;
1499             }
1500            
1501 15     15   58 method _clear_daily_data () {
  15         31  
  15         34  
1502            
1503 15         2144 $_daily_href = { };
1504            
1505 15         54 return;
1506             }
1507            
1508 1     1   4 method _compute_anomalies () {
  1         4  
  1         4  
1509            
1510 1         6 $_tstats->start('Compute_anomalies');
1511            
1512 1         6 while ( my ($k,$href) = each $_daily_href->%* ) {
1513 266         667 my ($year,$month,$day) = unpack 'A4 A2 A2', $k;
1514            
1515 266         466 my $base_href = $_baseline_href->{ $month . $day };
1516            
1517 266         683 foreach my $elem ( keys $href->%* ) {
1518 784 50       1362 next if not exists $base_href->{$elem};
1519 784         1990 $href->{'A_' . $elem} = $href->{$elem} - $base_href->{$elem};
1520             }
1521             }
1522            
1523 1         7 $_tstats->stop('Compute_anomalies');
1524            
1525 1         3 return;
1526             }
1527            
1528 1     1   17 method _compute_baseline ($stn, $baseline) {
  1         2  
  1         3  
  1         3  
  1         2  
1529            
1530 1         5 $_tstats->start('Compute_baseline');
1531            
1532 1         6 my $baseline_nrs = rng_new($baseline);
1533            
1534 1         3 my %baseline_data;
1535            
1536 1         6 my $gap_nrs = rng_new( $baseline_nrs->as_string );
1537            
1538 1         8 while ( my ($k,$href) = each $_daily_href->%* ) {
1539 266         6314 my ($year,$month,$day) = unpack 'A4 A2 A2', $k;
1540            
1541 266 100       575 next unless $baseline_nrs->contains($year);
1542            
1543 4 50       94 $gap_nrs->remove($year) if $baseline_nrs->contains($year);
1544            
1545 4         581 while ( my ($elem,$v) = each $href->%* ) {
1546 12   50     42 $baseline_data{ $month . $day }->{$elem}->[0] += $v // 0;
1547 12         45 $baseline_data{ $month . $day }->{$elem}->[1] ++;
1548             }
1549             }
1550            
1551 1 50       28 if ($gap_nrs->cardinality > 0) {
1552 0         0 my $msg = sprintf "%s\tanomalies not calculated\tmissing %d years from the baseline (%s)",
1553             $stn->id, $gap_nrs->cardinality, $gap_nrs->as_string;
1554 0         0 $stn->add_note($WARN_ANOM, $msg);
1555 0         0 $_baseline_href = { };
1556 0         0 return;
1557             }
1558            
1559             ## no critic [ProhibitDoubleSigils]
1560 1         34 while ( my ($md,$elem_href) = each %baseline_data ) {
1561 2         7 while ( my ($elem, $aref) = each $elem_href->%* ) {
1562 6         13 my ($sum, $count) = $aref->@*;
1563 6         26 $baseline_data{$md}->{$elem} = $sum / $count;
1564             }
1565             }
1566            
1567 1         4 $_baseline_href = \%baseline_data;
1568            
1569 1         5 $_tstats->stop('Compute_baseline');
1570            
1571 1         7 return;
1572             }
1573            
1574 15     15   58 method _compute_mean_and_counts ($stn) {
  15         36  
  15         31  
  15         31  
1575            
1576 15         72 $_tstats->start('Compute_mean');
1577            
1578             # In this subroutine we loop through the daily min and max values that
1579             # were captured from the daily page and replace them with the calculated
1580             # mean. We replace in order to save on the memory and performance overhead
1581             # of creating a new hash table. Because the daily page has separate rows for
1582             # max and min, we have to scan the entire page in order to collect the max and
1583             # min for each day. It's possible there may be a day with one or both entries
1584             # missing, in which case we undef the corresponding entry since it's impossible
1585             # to calculate the mean in that case.
1586            
1587 15         107 while ( my ($yyyymmdd, $href) = each $_daily_href->%* ) {
1588            
1589 5426         9457 my $max = $href->{TMAX};
1590 5426         8358 my $min = $href->{TMIN};
1591            
1592 5426 100 100     14374 if (defined $max and defined $min) {
1593 5290         16566 $href->{Tavg} = ($max + $min) / 2.0;
1594             }
1595             }
1596            
1597 15         90 $_tstats->stop('Compute_mean');
1598            
1599 15         35 return;
1600             }
1601            
1602 16     16   1878 method _compute_quality ($stn, $context_msg, $day_count, $range, $quality) {
  16         42  
  16         33  
  16         43  
  16         48  
  16         36  
  16         65  
  16         26  
1603            
1604 16         83 $_tstats->start('Compute_quality');
1605            
1606 16         38 my $expected_days = 0;
1607 16         37 my $insufficient_quality = 0;
1608            
1609 16         60 map { $expected_days += _days_in_year($_) } rng_new($range)->as_array();
  24         638  
1610            
1611             ## no critic [ProhibitMagicNumbers]
1612 16         99 my $data_quality = int(($day_count / $expected_days) * 1000 + 0.5) / 10.0;
1613            
1614 16 100       69 if ( $data_quality < $quality ) {
1615 1         7 my $msg = sprintf "%s\tstation only has %d days in %s and needs %d (%0.1f%% < %d%%)",
1616             $stn->id, $day_count, $context_msg, $expected_days, $data_quality, $quality;
1617 1         9 $stn->add_note($ERR_INSUFF, $msg);
1618 1         4 $insufficient_quality++;
1619             }
1620            
1621 16         77 $_tstats->stop('Compute_quality');
1622            
1623 16         66 return $insufficient_quality;
1624             }
1625            
1626 54     54   208 method _fetch_url ($url, $timer_label=$EMPTY) {
  54         133  
  54         154  
  54         147  
  54         121  
1627            
1628 54 50       428 $_tstats->start($timer_label) if $timer_label;
1629            
1630 54         416 my ($from_cache, $content) = $_cache_obj->fetch($url);
1631            
1632 54 50       356 carp '*I* cache refresh is set to ' . $_cache_obj->refresh
1633             unless $content;
1634 54 50       239 croak '*E* unable to fetch data from ' . $url
1635             unless $content;
1636            
1637 54 50       606 $_tstats->stop($timer_label) if $timer_label;
1638            
1639 54         15966 return $content;
1640             }
1641            
1642            
1643 20     20   95 method _filter_stations ($stations_href) {
  20         55  
  20         43  
  20         39  
1644 20         307 $_tstats->start('Filter_stn');
1645            
1646 20         1364 my $opt_range_nrs = rng_new( $Opt->range );
1647 20         537 my $opt_active_nrs = rng_new( $Opt->active );
1648            
1649             ## no critic [ProhibitDoubleSigils]
1650 20         212 foreach my $stn (values $stations_href->%*) {
1651 1828         24652 my $stn_active_nrs = rng_new( $stn->active );
1652            
1653             ##debug '=== station ', $stn->id, ' ', $stn->name;
1654            
1655 1828 100       5971 $stn->add_note($ERR_METRICS)
1656             if not $stn->elems_href->%*;
1657            
1658 1828 50 66     38233 $stn->add_note($ERR_RANGE_FULL)
1659             if $Opt->range and not $opt_range_nrs->subset($stn_active_nrs);
1660            
1661 1828 50 66     54654 $stn->add_note($ERR_NO_GIS)
1662             if $Opt->gps and $stn->coordinates eq $EMPTY;
1663            
1664 1828 100 100     44468 $stn->add_note($ERR_NOACTIVE)
1665             if $Opt->active and not $stn->active;
1666            
1667 1828 100       4635 if ($stn->active) {
1668 621 100       10131 if ($Opt->partial) {
1669 548         6579 my $s = $opt_active_nrs->intersection($stn_active_nrs);
1670            
1671 548 100 66     229418 $stn->add_note($ERR_NOT_PARTIAL)
1672             if $Opt->active and $s->is_empty;
1673             } else {
1674 73 100 100     1916 $stn->add_note($ERR_NOT_ACTIVE)
1675             if $Opt->active and not $opt_active_nrs->subset($stn_active_nrs);
1676             }
1677            
1678             } else {
1679 1207 50       24022 $stn->add_note($ERR_NOACTIVE)
1680             if $Opt->range;
1681             }
1682             }
1683            
1684 20         1229 $_tstats->stop('Filter_stn');
1685            
1686 20         254 my $count = grep { $_->error_count == 0 } values $stations_href->%*;
  1828         4309  
1687            
1688 20         85 return $count;
1689             }
1690            
1691 19     19   301 method _find_gaps ($stn, $range, $type, $noteid, $years_aref) {
  19         41  
  19         40  
  19         50  
  19         44  
  19         41  
  19         41  
  19         36  
1692 19         113 my $nrs = rng_new( $range );
1693 19         96 my $years_nrs = rng_new( $years_aref->@* );
1694 19         115 my $gap_nrs = $nrs->diff( $years_nrs );
1695 19 100       7905 if ($gap_nrs->cardinality) {
1696 1         35 my $msg = sprintf "%s\tmissing data in the $type range\tyears %s", $stn->id, $gap_nrs->as_string;
1697 1         50 $stn->add_note($noteid, $msg, $TRUE);
1698 1         6 my $iter = $gap_nrs->iterate_runs();
1699 1         14 while (my ( $from, $to ) = $iter->()) {
1700 1         15 foreach my $yyyy ($from .. $to) {
1701 1         4 $_missing_href->{$stn->id}{$yyyy}{$EMPTY}++;
1702             }
1703             }
1704             }
1705             }
1706            
1707 15     15   69 method _initialize_flag_cnts () {
  15         44  
  15         31  
1708            
1709             # initialize KEPT flags for all metrics, so all get printed
1710 15         80 foreach my $elem ( $_measures_obj->measures ) {
1711 55   100     265 $_flag_cnts_href->{$elem}->{KEPT} //= 0;
1712             }
1713            
1714 15         44 return;
1715             }
1716            
1717 15     15   61 method _load_daily_data ($stn, $stn_content) {
  15         41  
  15         50  
  15         49  
  15         26  
1718            
1719 15         35 my %gaps;
1720             my %baseline_days;
1721            
1722 15         485 my $opt_range_nrs = rng_new($Opt->range);
1723 15         400 my $opt_baseline_nrs = rng_new($Opt->baseline);
1724 15         393 my $opt_fmonth_nrs = rng_new($Opt->fmonth);
1725 15         413 my $opt_fday_nrs = rng_new($Opt->fday);
1726            
1727 15         116 $self->_initialize_flag_cnts();
1728            
1729             ## no critic [ProhibitDoubleSigils]
1730             ## no critic [ProhibitMagicNumbers]
1731            
1732 15         73 $_tstats->start('Load_daily_data');
1733            
1734             ## no critic [InputOutput::RequireBriefOpen]
1735 15 50       50860 open my $stn_fh, '<', \$stn_content
1736             or croak '*E* unable to open station daily content';
1737            
1738 15         171 while ( my $line = <$stn_fh> ) {
1739            
1740             # |--- 0---|--- 10---|--- 20---|--- 30---|--- 40---|--- 50---|
1741             # |123456789|123456789|123456789|123456789|123456789|123456789|
1742             # iiiiiiiiiiiyyyymmeeee(dddddd)(dddddd)(dddddd)(dddddd)(dddddd) ...
1743             # CA006105976188911TMAX 39 C 122 C 144 C 83 C 28 C ...
1744            
1745 91191         1385030 my $id = substr $line, 0, 11;
1746 91191         183681 my $year = 0 + substr $line, 11, 4; # coerce to number
1747 91191         145583 my $month = 0 + substr $line, 15, 2; # coerce to number
1748 91191         148932 my $element = substr $line, 17, 4;
1749 91191         139059 my $data = substr $line, 21 ;
1750            
1751 91191 100       190667 next unless $element =~ $_measures_obj->re;
1752            
1753 47761 100 100     834383 next if $Opt->fmonth and not $opt_fmonth_nrs->contains($month);
1754            
1755 34836 100       872024 my $need_baseline = $Opt->anomalies and $opt_baseline_nrs->contains($year);
1756 34836 50       833309 if ( $Opt->range ) {
1757 34836 100 100     371370 next unless $opt_range_nrs->contains($year)
1758             or $need_baseline;
1759             }
1760            
1761 638         15241 foreach my $ii ( 0 .. 30 ) {
1762 19778         244331 my $day = $ii + 1;
1763            
1764 19778 100 100     328940 next if $Opt->fday and not $opt_fday_nrs->contains($day);
1765            
1766             # |--- 0---|--- 10---|--- 20---|--- 30---|--- 40---|--- 50
1767             # |123456789|123456789|123456789|123456789|123456789|123456
1768             # 39 C 122 C 144 C 83 C 28 C 67 C 89 C ...
1769 11948         140509 my $daily = substr $data, $ii * 8, 8;
1770            
1771 11948         24968 my $value = 0 + substr $daily, 0, 5;
1772             # my $mflag = substr $daily, 5, 1;
1773 11948         19375 my $qflag = substr $daily, 6, 1;
1774             # my $sflag = substr $daily, 7, 1;
1775            
1776 11948         35926 $qflag =~ s{ \s+ \Z }{}xms;
1777            
1778 11948         33505 my $key = sprintf '%04d%02d%02d', $year, $month, $day;
1779            
1780             # keep track of all baseline days
1781 11948 100       23637 $baseline_days{$key}++
1782             if $need_baseline;
1783            
1784             # skip over values that have quality flags
1785 11948 50       21679 if ( $qflag ne $EMPTY ) {
1786 0         0 $_flag_cnts_href->{$element}->{QFLAGS}->{$qflag}++;
1787 0         0 $_flag_cnts_href->{$element}->{REJECTED}++;
1788 0         0 $_daily_href->{$key}->{QFLAGS}->{$element}->{$qflag}++;
1789 0         0 next;
1790             }
1791            
1792             # skip missing values
1793 11948 100       21946 next if $value == -9999;
1794            
1795 11556         19229 $_flag_cnts_href->{$element}->{KEPT}++;
1796            
1797 11556 50       31361 if ( $element =~ m{ \A ( TMAX | TMIN | TAVG | SNOW | SNWD | PRCP ) \Z }xms ) {
1798             # values are stored in 10th of a unit, so we divide by 10 to scale them to get:
1799             # - temperatures in °C
1800             # - PRCP in mm
1801             # - SNOW and SNWD in cm
1802 11556 100       39926 $_daily_href->{$key}->{$element} = $value / 10.0 if $value;
1803 11556   100     25504 $gaps{$year}->{$month} //= pack 'b30', 0;
1804 11556         40737 vec( $gaps{$year}->{$month}, $ii, 1) = 1;
1805             }
1806             }
1807             }
1808 15 50       168 close $stn_fh or croak '*E* unable to close';
1809            
1810 15         119 $_tstats->stop('Load_daily_data');
1811            
1812             # warn about missing years, months or days
1813 15         145 $self->_report_gaps( $stn, \%gaps );
1814            
1815 15         47 my $insufficient_quality = 0;
1816            
1817             # determine whether there's sufficient data within the -range, based on the -quality threshold
1818 15 50       394 if ( $Opt->range ) {
1819 15         296 my $day_count = keys $_daily_href->%*;
1820 15         298 $insufficient_quality += $self->_compute_quality( $stn, 'range', $day_count, $Opt->range, $Opt->quality );
1821             }
1822            
1823             # determine whether there's sufficient data within the -baseline, based on the -quality threshold
1824 15 100       380 if ( $Opt->anomalies ) {
1825 1         15 my $day_count = keys %baseline_days;
1826 1         19 $insufficient_quality += $self->_compute_quality( $stn, 'baseline', $day_count, $Opt->baseline, $Opt->quality );
1827             }
1828            
1829 15         304 $self->_compute_mean_and_counts( $stn );
1830            
1831 15 100 66     401 if ($Opt->anomalies and $insufficient_quality == 0) {
1832 1         39 $self->_compute_baseline( $stn, $Opt->baseline );
1833 1         9 $self->_compute_anomalies();
1834             }
1835            
1836 15         544 return $insufficient_quality;
1837             }
1838            
1839 20     20   102 method _load_station_inventories () {
  20         48  
  20         42  
1840            
1841 20         111 my $inv_content = $self->_fetch_url($GHCN_STN_INVEN_URL, 'URI::Fetch_inv');
1842            
1843             # Now scan the station inventory list, to get the active range for each station
1844             # - note there are multiple records, one for each element and active range combo
1845            
1846 20         169 $_tstats->start('Parse_inv');
1847            
1848             ## no critic [InputOutput::RequireBriefOpen]
1849 20 50       1331120 open my $inv_fh, '<', \$inv_content
1850             or croak '*E* unable to open inventory content';
1851            
1852 20         508 while (my $inv = <$inv_fh>) {
1853             # |--- 0---|--- 10---|--- 20---|--- 30---|--- 40---
1854             # |123456789|123456789|123456789|123456789|123456789
1855             # (stationid)....................elem.from.(to)
1856             # ACW00011604 17.1167 -61.7833 TMAX 1949 1949
1857            
1858             ## no critic [ProhibitMagicNumbers]
1859 14522300         22058319 my $id = substr $inv, 0, 11;
1860             # my $lat = substr $inc, 12, 8;
1861             # my $long = substr $inc, 21, 9;
1862 14522300         19961827 my $elem = substr $inv, 31, 4;
1863 14522300         19718865 my $firstyear = substr $inv, 36, 4;
1864 14522300         20245699 my $lastyear = substr $inv, 41, 4;
1865             ## use critic [ProhibitMagicNumbers]
1866            
1867             # in case the inventory list contains a station id that isn't in
1868             # the station list, we'll skip it
1869 14522300 100       36317806 next if not exists $_station{$id};
1870            
1871 17104 100       34529 next unless $elem =~ $_measures_obj->re;
1872            
1873 1216         2821 my $stn = $_station{$id};
1874            
1875             # combine the inventory active range set with the station active range
1876 1216         3178 $stn->active = rng_new( $stn->active, $firstyear . $DASH . $lastyear )->as_string();
1877            
1878 1216 50       4091 $stn->elems_href->{$elem}++
1879             if $elem =~ $_measures_obj->re;
1880             }
1881 20 50       225 close $inv_fh or croak;
1882            
1883 20         316 $_tstats->stop('Parse_inv');
1884            
1885 20         178 my $count = $self->_filter_stations(\%_station);
1886            
1887 20         160 return $count;
1888             }
1889            
1890             # TODO: consider ways to let the caller decide on how to output the result
1891            
1892 4     4   19 method _print_detail_data ($measure_begin_idx, $stn, $row_sub) {
  4         8  
  4         12  
  4         10  
  4         8  
  4         8  
1893            
1894 4 50       17 return if not defined $row_sub;
1895            
1896 4         16 $_tstats->start('Printing');
1897            
1898             # build has of measure names and indices so measures can be
1899             # inserted into the correct columns
1900 4         9 my %measure_idx;
1901 4         11 my $ii = 0;
1902 4         23 foreach my $m ( $_measures_obj->measures ) {
1903 19         48 $measure_idx{$m} = $measure_begin_idx + $ii++;
1904             }
1905            
1906 4         82 my $opt_range_nrs = rng_new($Opt->range);
1907            
1908             # generate and print the data rows
1909 4         515 foreach my $key ( sort keys $_daily_href->%* ) {
1910 998         13386 my ($year, $month, $day) = unpack 'A4 A2 A2', $key;
1911 998         1760 my $flags = $EMPTY;
1912            
1913 998 50       19643 next unless ( $Opt->range ? $opt_range_nrs->contains($year) : $TRUE );
    100          
1914            
1915 740         24138 my $row = $_daily_href->{$key};
1916            
1917             $flags = _qflags_as_string( $row->{QFLAGS} )
1918 740 50       1619 if exists $row->{QFLAGS};
1919            
1920 740         1088 my @row;
1921 740         1672 push @row, $year;
1922 740         1177 push @row, $month;
1923 740         1173 push @row, $day;
1924 740         1352 push @row, int($year / 10) * 10; ## no critic [ProhibitMagicNumbers]
1925 740         1520 push @row, _seasonal_decade($year, $month);
1926 740         1537 push @row, _seasonal_year($year, $month);
1927 740         1421 push @row, _seasonal_qtr($year, $month);
1928 740         1936 foreach my $elem ( $_measures_obj->measures ) {
1929             $row[ $measure_idx{$elem} ] = sprintf '%.2f', $row->{$elem}
1930 2252 100       11565 if defined $row->{$elem};
1931 2252   66     5276 $row[ $measure_idx{$elem} ] //= $EMPTY;
1932             }
1933 740         1694 push @row, $flags;
1934 740         2105 push @row, $stn->id;
1935 740         1773 push @row, $stn->name;
1936 740         1666 push @row, $stn->idx;
1937 740         1665 push @row, $stn->grid;
1938            
1939 740         1893 $row_sub->( \@row );
1940             }
1941            
1942 4         152 $_tstats->stop('Printing');
1943            
1944 4         31 return;
1945             }
1946            
1947 15     15   67 method _report_gaps ($stn, $gaps_href) {
  15         41  
  15         34  
  15         37  
  15         24  
1948            
1949 15         91 $_tstats->start('Report_gaps');
1950            
1951             ## no critic [ProhibitDoubleSigils]
1952 15         218 my @years = sort keys $gaps_href->%*;
1953            
1954 15 100       377 if ( $Opt->active ) {
1955 4         132 $self->_find_gaps($stn, $Opt->active, 'active', $WARN_MISS_YA, \@years);
1956             }
1957            
1958 15 50       576 if ( $Opt->range ) {
1959 15         459 $self->_find_gaps($stn, $Opt->range, 'range', $WARN_MISS_YF, \@years);
1960             }
1961            
1962 15         724 my $opt_range_nrs = rng_new($Opt->range);
1963 15         418 my $opt_baseline_nrs = rng_new($Opt->baseline);
1964            
1965 15         1019 my (undef, undef, undef , $mday, $mon, $year) = localtime time;
1966             ## no critic [ValuesAndExpressions::ProhibitMagicNumbers]
1967 15         93 my ($this_yyyy, $this_mm) = ($year+1900, $mon+1);
1968            
1969 15         64 foreach my $yyyy ( @years ) {
1970             # don't report gaps for years that aren't within -range (or -baseline if -anomalies)
1971 150 100 100     7074 next if $Opt->range and
      66        
1972             ( $opt_range_nrs and not $opt_range_nrs->contains($yyyy)
1973             or
1974             $Opt->anomalies and not $opt_baseline_nrs->contains($yyyy) );
1975            
1976 19         1556 my $gap_months = $EMPTY;
1977 19         40 my $days_missing = 0;
1978            
1979            
1980             ## no critic [ProhibitDoubleSigils]
1981             ## no critic [ProhibitMagicNumbers]
1982 19 50       77 my $end_month = $yyyy == $this_yyyy
1983             ? $this_mm
1984             : 12;
1985            
1986 19 100       333 my $month_gap_nrs = $Opt->fmonth
1987             ? rng_new( $Opt->fmonth )
1988             : rng_new( 1 .. $end_month );
1989            
1990 19         242 my @months = sort {$a<=>$b} keys $gaps_href->{$yyyy}->%*;
  406         653  
1991            
1992 19         126 $month_gap_nrs->remove( @months );
1993            
1994 19 100       3140 if ($month_gap_nrs->cardinality) {
1995 1         49 foreach my $mm ($month_gap_nrs->as_array) {
1996 9         67 $days_missing += _days_in_month($yyyy,$mm);
1997             }
1998 1         5 $gap_months = join $SPACE, _month_names($month_gap_nrs->as_array);
1999             }
2000            
2001 19         848 my $opt_fday_nrs = rng_new($Opt->fday);
2002 19         420 my $opt_fmonth_nrs = rng_new($Opt->fmonth);
2003            
2004             ## no critic [ProhibitMagicNumbers]
2005 19         65 my $gap_text = $EMPTY;
2006 19         73 foreach my $mm ( 1 .. $end_month ) {
2007             # skip months that don't match -fmonth when -fmonth was given
2008 228 100 100     8205 next if $Opt->fmonth and not $opt_fmonth_nrs->contains($mm);
2009            
2010 184         2996 my $day_vec = $gaps_href->{$yyyy}->{$mm};
2011            
2012 184 100       444 next if not $day_vec;
2013            
2014 175         287 my @days_with_data;
2015 175         456 my $mdays = _days_in_month($yyyy,$mm);
2016            
2017 175         458 foreach my $day ( 1..31 ) {
2018             # skip days that don't match -fday when -fday was given
2019 5425 100 100     85017 next if $Opt->fday and not $opt_fday_nrs->contains($day);
2020            
2021 5338 100       54178 next if $day > $mdays;
2022            
2023 5239 100       13583 push @days_with_data, $day
2024             if vec($day_vec, $day - 1, 1) == 1;
2025             }
2026            
2027 175 100       2772 my $days_in_month_nrs = $Opt->fday
2028             ? $opt_fday_nrs
2029             : rng_new( 1 .. $mdays );
2030            
2031 175         720 my $days_nrs = rng_new( @days_with_data );
2032            
2033 175         558 my $day_gap_nrs = $days_in_month_nrs->diff($days_nrs);
2034            
2035 175         66438 $days_missing += $day_gap_nrs->cardinality;
2036            
2037 175 100       4017 $gap_text .= $SPACE . _month_names($mm) . '[' . $day_gap_nrs->as_string . ']'
2038             unless $day_gap_nrs->is_empty;
2039             }
2040            
2041 19 100       437 $gap_text = join $SPACE, $gap_months, $gap_text
2042             if $gap_months;
2043            
2044 19         75 $gap_text =~ s{ \A \s+ }{}xms; # trim leading whitespace
2045            
2046 19 100       159 if ( $gap_text !~ m{\A \s* \Z}xms ) {
2047 6         32 my $days_of_data = _days_in_year($yyyy) - $days_missing;
2048 6         27 my $quality = sprintf '%6.1f', 100 * ( $days_of_data / _days_in_year($yyyy) );
2049 6         57 my $msg = sprintf "%s\tmissing data: %d %s %s", $stn->id, $yyyy, $quality, $gap_text;
2050 6         197 $stn->add_note($WARN_MISS_DY, $msg, $Opt->verbose);
2051 6         26 $gap_text =~ s{\A \s+ }{}xms;
2052 6         26 $_missing_href->{$stn->id}{$yyyy}{$gap_text} = $quality;
2053             }
2054             }
2055            
2056 15         136 $_tstats->stop('Report_gaps');
2057            
2058 15         83 return;
2059             }
2060            
2061             #----------------------------------------------------------------------
2062             # -kml Helper Functions
2063             #----------------------------------------------------------------------
2064            
2065             # TODO: allow any KML hex colour code, format
2066            
2067 17     17   11543 sub _get_kml_color ($color_opt) {
  17         35  
  17         27  
2068            
2069             # From https://developers.google.com/kml/documentation/kmlreference#colorstyle
2070            
2071             # Color and opacity (alpha) values are expressed in hexadecimal notation.
2072             # The range of values for any one color is 0 to 255 (00 to ff). For
2073             # alpha, 00 is fully transparent and ff is fully opaque. The order of
2074             # expression is aabbggrr, where:
2075             #
2076             # aa=alpha (00 to ff)
2077             # bb=blue (00 to ff)
2078             # gg=green (00 to ff)
2079             # rr=red (00 to ff).
2080             #
2081             # For example, if you want to apply a blue color with 50 percent opacity
2082             # to an overlay, you would specify the following:
2083             # 7fff0000,
2084             # where alpha=0x7f, blue=0xff, green=0x00, and red=0x00.
2085            
2086 17         133 my %kml_colors = (
2087             b => [ 'blue', 'ff780000' ],
2088             g => [ 'grn', 'ff147800' ],
2089             a => [ 'ltblu', 'ffF06414' ], # 'a' for azure
2090             p => [ 'purple','ff780078' ],
2091             r => [ 'red', 'ff1400FF' ],
2092             w => [ 'wht', 'ffFFFFFF' ],
2093             y => [ 'ylw', 'ff14F0FF' ],
2094             );
2095            
2096             # just use the first character of whatever string we're given
2097 17         43 my $c = substr $color_opt, 0, 1;
2098            
2099 17 100       83 return unless $kml_colors{$c};
2100            
2101 16         94 return $kml_colors{$c}->[1];
2102             }
2103            
2104             #----------------------------------------------------------------------
2105             # -gps Helper Functions
2106             #----------------------------------------------------------------------
2107            
2108             # Calculate geographic distances in kilometers between coordinates in
2109             # geodetic WGS84 format using the Haversine formula.
2110            
2111 122047     122047   183708 sub _gis_distance ($lat1, $lon1, $lat2, $lon2) {
  122047         190118  
  122047         186592  
  122047         195595  
  122047         170244  
  122047         174735  
2112 122047         306854 $lon1 = deg2rad($lon1);
2113 122047         1171202 $lat1 = deg2rad($lat1);
2114 122047         899233 $lon2 = deg2rad($lon2);
2115 122047         797819 $lat2 = deg2rad($lat2);
2116            
2117             ## no critic [ProhibitParensWithBuiltins]
2118             ## no critic [ProhibitMagicNumbers]
2119            
2120 122047         764372 my $dlon = $lon2 - $lon1;
2121 122047         185940 my $dlat = $lat2 - $lat1;
2122 122047         361104 my $a = (sin($dlat/2)) ** 2 + cos($lat1) * cos($lat2) * (sin($dlon/2)) ** 2;
2123 122047         288275 my $c = 2 * atan2(sqrt($a), sqrt(1-$a));
2124            
2125 122047         242353 return 6_371_640 * $c / 1000.0;
2126             }
2127            
2128             #----------------------------------------------------------------------
2129             # -location Helper Functions
2130             #----------------------------------------------------------------------
2131            
2132             # Match -location to the station id or name provided in the call.
2133             # If the pattern looks like a station id (e.g. 'CA006105887') then it matches
2134             # to the stn_id parameter; if it looks like a comma-separated list of station
2135             # id's, it returns success if any of them match the stn_id parameter. Otherwise,
2136             # it matches the to the start of the stn_name parameter.
2137 1227449     1227449   38034381 sub _match_location ($stn_id, $stn_name, $pattern) {
  1227449         1948118  
  1227449         1777322  
  1227449         1652361  
  1227449         1622998  
2138            
2139 1227449         1829080 my $result = $FALSE;
2140            
2141 1227449 100       5944595 if ($pattern =~ m{ \A $STN_ID_RE \Z }xms) {
    100          
2142 854330         1659351 $result = $stn_id eq $pattern;
2143             }
2144             elsif ($pattern =~ m{ \A $STN_ID_RE ( [,] $STN_ID_RE )+ \Z }xms) {
2145 244096         690187 my @patterns = split m{ [,] }xms, $pattern;
2146 244096         633242 my $multi_pattern = '\A(' . join(q(|), @patterns) . ')\Z';
2147            
2148 244096         609589 $result = $stn_id =~ $multi_pattern;
2149             }
2150             else {
2151             ## no critic [RequireExtendedFormatting]
2152 129023         362595 $result = $stn_name =~ m{\A$pattern}msi;
2153             }
2154            
2155 1227449         7274201 return $result;
2156             }
2157            
2158             #----------------------------------------------------------------------
2159             # -nogaps Helper Functions
2160             #----------------------------------------------------------------------
2161            
2162             # parse the missing values text, which looks like this:
2163             # month names: Jan Feb Mar Apr May Jun Jul Aug Sep Oct
2164             # or day ranges: May[2] Oct[3,11] Nov[1] Dec[2,5]
2165            
2166 10     10   5051 sub _parse_missing_text ( $s ) {
  10         21  
  10         17  
2167 10         19 my @months;
2168             my @mmdd;
2169 10         69 my @f = split m{ \s }xms, $s;
2170            
2171 10         45 my $mmm_re = qr{ [[:upper:]][[:lower:]][[:lower:]] }xms;
2172 10         28 my $nbr_rng = qr{ \d+ ( [-] \d+)? }xms;
2173 10         111 my $rng_list = qr{ $nbr_rng (, $nbr_rng)* }xms;
2174            
2175 10         29 foreach my $tok (@f) {
2176 36 100       213 if ( $tok =~ m{ \A $mmm_re \Z }xms ) {
2177 10         38 push @months, $MMM_TO_MM{$tok};
2178             }
2179 36 100       404 if ( $tok =~ m{ \A ($mmm_re) \[ ($rng_list) \] \Z }xms ) {
2180 26         90 my $mm = $MMM_TO_MM{$1};
2181 26         75 my @days = rng_new($2)->as_array;
2182 26         839 foreach my $day (@days) {
2183 104         224 push @mmdd, [$mm, $day];
2184             }
2185             }
2186             }
2187 10         69 return \@months, \@mmdd;
2188             }
2189            
2190             #----------------------------------------------------------------------
2191             # Misc Functions
2192             #----------------------------------------------------------------------
2193            
2194             # format qflags like this: I:1, N:9, S:4
2195 9     9   5085 sub _qflags_as_string ( $qflags_href ) {
  9         23  
  9         15  
2196 9 100       30 return $EMPTY if not $qflags_href;
2197            
2198 2         4 my @r;
2199 2         12 foreach my $qflag ( sort keys $qflags_href->%* ) {
2200 3         12 push @r, sprintf '%s:%d', $qflag, $qflags_href->{$qflag};
2201             }
2202            
2203 2         10 return join ', ', @r;
2204             }
2205             #----------------------------------------------------------------------
2206             # Undef-safe Functions
2207             #----------------------------------------------------------------------
2208            
2209             # defined max - return the maximum of the two arguments,
2210             # or the defined argument if one of the arguments is undef
2211 3299     3299   4593 sub _dmax ($x, $y) {
  3299         5169  
  3299         4467  
  3299         4525  
2212            
2213 3299 100 100     6737 return if not defined $x and not defined $y;
2214 3298 100       6067 return $y if not defined $x;
2215 2864 100       5123 return $x if not defined $y;
2216            
2217 2863 100       6375 return $x > $y ? $x : $y;
2218             }
2219            
2220             # defined min - return the minimum of the two arguments,
2221             # or the defined argument if one of the arguments is undef
2222 3276     3276   4394 sub _dmin ($x, $y) {
  3276         4952  
  3276         4739  
  3276         4243  
2223            
2224 3276 100 100     6769 return if not defined $x and not defined $y;
2225 3275 100       6274 return $y if not defined $x;
2226 2844 100       5091 return $x if not defined $y;
2227            
2228 2843 100       6352 return $x < $y ? $x : $y;
2229             }
2230            
2231             # defined sum - return the sum of the two arguments,
2232             # or the defined argument if one of the arguments is undef
2233 15385     15385   19933 sub _dsum ($x, $y) {
  15385         21334  
  15385         19953  
  15385         20184  
2234            
2235 15385 100 100     36410 return if not defined $x and not defined $y;
2236 15384 100       29483 return $y if not defined $x;
2237 4965 100       8766 return $x if not defined $y;
2238            
2239 4964         9572 return $x + $y;
2240             }
2241            
2242             # defined count - increment the value of the first argument by 1,
2243             # or return it unchanged if the second argument is undef
2244 15211     15211   21239 sub _dcount ($x, $y) {
  15211         21321  
  15211         19523  
  15211         19370  
2245            
2246 15211 100       26045 return $x if not defined $y;
2247            
2248 15209 100       49932 return defined $x ? $x + 1 : 1;
2249             }
2250            
2251             # defined divide - returns undef if either argument is undef
2252             # or if the denominator is zero
2253 9996     9996   14142 sub _ddivide ($x, $y) {
  9996         13992  
  9996         13716  
  9996         13026  
2254            
2255 9996 100       18045 return if not defined $x;
2256 9994 100       16396 return if not defined $y;
2257 9993 100       16873 return if not $y;
2258            
2259 9992         18319 return $x / $y;
2260             }
2261            
2262             #----------------------------------------------------------------------
2263             # Date and Time Helper Functions
2264             #----------------------------------------------------------------------
2265            
2266 198     198   343 sub _days_in_month ($year, $month) {
  198         307  
  198         331  
  198         284  
2267            
2268             # coerce to numbers
2269 198         334 $year += 0;
2270 198         306 $month += 0;
2271            
2272             ## no critic [ProhibitMagicNumbers]
2273 198         510 my @mdays = (0, 31,28,31,30,31,30,31,31,30,31,30,31);
2274            
2275 198 100       471 if ($month == 2) {
2276 19         90 return $mdays[$month] + _is_leap_year($year);
2277             }
2278            
2279 179         470 return $mdays[$month];
2280             }
2281            
2282 43     43   5462 sub _days_in_year ($year) {
  43         91  
  43         77  
2283             ## no critic [ProhibitMagicNumbers]
2284 43         126 return 365 + _is_leap_year( $year+0 );
2285             }
2286            
2287 71     71   122 sub _is_leap_year ($year) {
  71         136  
  71         114  
2288            
2289 71         123 $year += 0; # coerce to number
2290            
2291             ## no critic [ProhibitMagicNumbers]
2292 71 100       387 return 0 if $year % 4;
2293 35 100       125 return 1 if $year % 100;
2294 27 100       111 return 0 if $year % 400;
2295 11         57 return 1;
2296             }
2297            
2298 23     23   5880 sub _month_names (@mm) {
  23         57  
  23         40  
2299            
2300 23         104 my @mnames = qw(xxx Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
2301            
2302 23         50 my @result = ();
2303 23 100       66 return (@result) unless @mm;
2304            
2305             ## no critic [ProhibitMagicNumbers]
2306 22         62 foreach my $mm (@mm) {
2307 41 100 100     292 if ($mm !~ m{ \A \d\d? \Z }xms) {
    100          
2308 1         3 push @result, '???';
2309             }
2310             elsif ($mm > 0 and $mm < 13) {
2311 38         95 push @result, $mnames[$mm];
2312             }
2313             else {
2314 2         7 push @result, '???';
2315             }
2316             }
2317            
2318 22 100       144 return wantarray ? @result : shift @result;
2319             }
2320            
2321             # Seasonal decades are based on seasonal years.
2322             # However, decades are deemed to begin when the seasonal
2323             # year ends in 9. For example, seasonal decade 2010 begins
2324             # Dec 1 2009 and continues through Nov 30, 2019.
2325 1174     1174   1726 sub _seasonal_decade ($year, $month) {
  1174         1860  
  1174         1725  
  1174         1561  
2326             ## no critic [ProhibitMagicNumbers]
2327 1174 100       2528 $year += 1 if $month == 12;
2328 1174         2690 return int($year / 10) * 10;
2329             }
2330            
2331             # Seasonal quarter Q1 begins Dec 1 and ends Feb 28/29
2332             # For example, 2017 Q1 includes Dec 2017, Jan 2018 and Feb 2018.
2333 1176     1176   1542 sub _seasonal_qtr ($year, $month) {
  1176         1695  
  1176         1754  
  1176         1591  
2334 1176         4093 my @seasonal_quarter = qw( Q0 Q1 Q1 Q2 Q2 Q2 Q3 Q3 Q3 Q4 Q4 Q4 Q1 );
2335            
2336 1176         3333 return $seasonal_quarter[$month];
2337             }
2338            
2339             # Seasonal years begin Dec 1 and continue through end of November
2340             # the following year. For example, Dec 15 2017 is in Q1 2017;
2341             # Jan 15 2018 is in Q1 2017 (not 2018!!!); Nov 15 2018 is in Q4 2017.
2342             # This is consistent with definition used by weatherstats.ca.
2343 1176     1176   1584 sub _seasonal_year ($year, $month) {
  1176         1769  
  1176         1632  
  1176         1541  
2344             ## no critic [ProhibitMagicNumbers]
2345 1176 100       2492 $year -= 1 unless $month == 12;
2346 1176         2968 return $year;
2347             }
2348            
2349             #----------------------------------------------------------------------
2350             # Performance Helper Functions
2351             #----------------------------------------------------------------------
2352            
2353             # calculate the total size of a hash or other structure
2354 19     19   7853 sub _memsize ( $ref, $opt_performance ) {
  19         48  
  19         37  
  19         42  
2355 19 100       176 return $EMPTY unless $opt_performance;
2356 4         619 return sprintf ' [%s]', commify( Devel::Size::total_size( $ref ) );
2357             }
2358            
2359             =head1 EXAMPLE PROGRAM
2360            
2361             use Weather::GHCN::StationTable;
2362            
2363             my $ghcn = Weather::GHCN::StationTable->new;
2364            
2365             my ($opt, @errors) = $ghcn->set_options(
2366             cachedir => 'c:/ghcn_cache',
2367             refresh => 'always',
2368             country => 'US',
2369             state => 'NY',
2370             location => 'New York',
2371             active => '2000-2022',
2372             report => 'yearly',
2373             );
2374            
2375             die @errors if @errors;
2376            
2377             $ghcn->load_stations;
2378            
2379             my @rows;
2380             if ($opt->report) {
2381             say $ghcn->get_header;
2382            
2383             # this also prints detailed station data if $opt->report eq 'detail'
2384             $ghcn->load_data(
2385             # set a callback routine for printing progress messages
2386             progress_sub => sub { say {*STDERR} @_ },
2387             # set a callback routine for capturing data rows when report => 'detail'
2388             row_sub => sub { push @rows, $_[0] },
2389             );
2390            
2391             # these only do something when $opt->report ne 'detail'
2392             $ghcn->summarize_data;
2393             say $ghcn->get_summary_data;
2394            
2395             say '';
2396             say $ghcn->get_footer;
2397            
2398             say '';
2399             say $ghcn->get_flag_statistics;
2400             }
2401            
2402             # print data rows collected by row_sub callback (when report => 'detail')
2403             foreach my $row_aref (@rows) {
2404             say join "\t", $row_aref->@*;
2405             }
2406            
2407             say '';
2408             say $ghcn->get_stations( kept => 1 );
2409            
2410             say '';
2411             say 'Stations that failed to meet range or quality criteria:';
2412             say $ghcn->get_stations( kept => 0, no_header => 1 );
2413            
2414             if ( $ghcn->has_missing_data ) {
2415             warn '*W* some data was missing for the stations and date range processed' . $NL;
2416             say '';
2417             say $ghcn->get_missing_data_ranges;
2418             }
2419            
2420             say $ghcn->get_options;
2421            
2422             say $ghcn->get_timing_stats;
2423            
2424             say $ghcn->get_hash_stats;
2425            
2426             $ghcn->export_kml if $opt->kml;
2427            
2428             =head1 OPTIONS
2429            
2430             StationTable supports almost all the options documented in
2431             B. The only options not supported are ones that
2432             are listed in the Command-Line Only Options section of the POD, namely:
2433             -help, -usage, -readme, -gui, -optfile, and -outclip.
2434            
2435             Options can be passed directly to the API via B.
2436            
2437             Options can also be defined in a file (called a B) that will
2438             be loaded at runtime and merged with the options passed to B.
2439            
2440             Options passed to B must be defined as a perl hash
2441             structure. See B for a list of all options in
2442             Getopts::Long format. Simply translate the option to a hash
2443             key/value pair. For example, B<-report detail> becomes B
2444             'detail'>.
2445            
2446             Options defined in a B file must be defined using
2447             YAML (see below).
2448            
2449             =head2 Aliases
2450            
2451             Aliases are a convenience feature that allow you to define mnemonic
2452             shortcuts for specific stations. GHCN station id's (like CA006106000)
2453             are difficult to remember and type, as can GHCN station names.
2454             Frequently-used station id's can be given easier alias names that
2455             can be used in the -location option for precise and reliable data
2456             retrieval.
2457            
2458             The entries within the aliases hash are simply keyword/value pairs
2459             that represent the mnemonic alias name and the station id (or id's)
2460             that are to be retrieved when that alias is used in -location.
2461            
2462             Aliases can only be defined via the B API or in a B
2463             file. There is no command-line option for defining them.
2464            
2465             =head2 YAML Example
2466            
2467             This is what the YAML content for a typical profile file would
2468             look like:
2469            
2470             ---
2471             cachedir: C:/ghcn_cache_new
2472            
2473             aliases:
2474             yow: CA006106000,CA006106001 # Ottawa airport
2475             cda: CA006105976,CA006105978 # Ottawa (CDA and CDA RCS)
2476            
2477             =head2 Hash Example
2478            
2479             Here's what the options would look like as a hash passed to the
2480             B API call:
2481            
2482             %options = (
2483             cachedir => 'C:/ghcn_cache',
2484             aliases => {
2485             yow => 'CA006106000,CA006106001', # Ottawa airport
2486             cda => 'CA006105976,CA006105978', # Ottawa (CDA and CDA RCS)
2487             }
2488             );
2489            
2490             my $ghcn->Weather::GHCN::StationTable->new();
2491             $ghcn->set_options(%options);
2492            
2493             =head1 VERSIONING and COMPATIBILITY
2494            
2495             The version number scheme used for this module consists of a 3-part
2496             dot-delimited string such as v0.0.003. This format was chosen for
2497             compatibility with Dist::Zilla version support, so that all modules
2498             in GHCN will get the same version number upon release. See also
2499             L.
2500            
2501             The first digit of the string is a major release numbers, and the
2502             second is the minor release number. With the exception of v0.0
2503             releases, which should be considered experimental pre-production
2504             versions, the interface is intended to be upward compatible within a
2505             set of releases sharing the same major release number. If an
2506             incompatible change becomes necessary, the major release number will
2507             be incremented.
2508            
2509             An increment to the minor release number indicates significant new
2510             functionality, which usually mean new API's and options. But, it should
2511             be upward compatible with the prior release.
2512            
2513             =head1 AUTHOR
2514            
2515             Gary Puckering (jgpuckering@rogers.com)
2516            
2517             =head1 LICENSE AND COPYRIGHT
2518            
2519             Copyright 2022, Gary Puckering
2520            
2521             =cut
2522            
2523             1;