File Coverage

lib/Weather/GHCN/App/Fetch.pm
Criterion Covered Total %
statement 175 224 78.1
branch 37 68 55.8
condition 13 39 33.3
subroutine 21 23 91.3
pod 4 4 100.0
total 250 358 70.1


line stmt bran cond sub pod time code
1             # Weather::GHCN::Fetch.pm - class for creating applications that fetch NOAA GHCN data
2            
3             ## no critic (Documentation::RequirePodAtEnd)
4            
5             =head1 NAME
6            
7             Weather::GHCN::App::Fetch - Fetch station and weather data from the NOAA GHCN repository
8              
9             =head1 VERSION
10              
11             version v0.0.010
12            
13             =head1 SYNOPSIS
14            
15             use Weather::GHCN::App::Fetch;
16            
17             Weather::GHCN::App::Fetch->run( \@ARGV );
18            
19             See ghcn_fetch -help for details.
20            
21             =cut
22            
23             # Testing notes:
24             #
25             # The quickest way to spot check results from this script is to compare them
26             # to those obtained from:
27             #
28             # https://ottawa.weatherstats.ca/charts/
29             #
30             # Run the script with parameters such as -prov ON -loc "Ottawa Int" -range
31             # 2017-2018 -precip -tavg -o first with the -daily option, then again with
32             # -monthly and -yearly. You can then compare results to various charts you
33             # generate using the above link by selecting Ottawa (Kanata - Orleans),
34             # which I've verified corresponds to station CA006105976 (Ottawa Int'l).
35             #
36             # Charts to use include Temperature (TMAX, TMIN, TAVG, Avg), Snowfall
37             # (SNOW), Snow on Ground (SNWD) and Total Precipitation (PRCP). Annual and
38             # monthly charts work well, but you may need daily charts and some
39             # investigation of the NOAA source data if there are anomalies. Sometimes
40             # the NOAA data has missing data; e.g. station CA006105976 (Ottawa Int'l)
41             # is missing days 6-28 for 2018-02.
42            
43             ########################################################################
44             # Pragmas
45             ########################################################################
46            
47             # these are needed because perlcritic fails to detect that Object::Pad handles these things
48             ## no critic [ProhibitVersionStrings]
49             ## no critic [RequireUseWarnings]
50            
51 1     1   111193 use v5.18; # minimum for Object::Pad
  1         14  
52            
53             package Weather::GHCN::App::Fetch;
54            
55             our $VERSION = 'v0.0.010';
56            
57 1     1   7 use feature 'signatures';
  1         2  
  1         145  
58 1     1   6 no warnings 'experimental::signatures';
  1         2  
  1         55  
59            
60             ########################################################################
61             # perlcritic rules
62             ########################################################################
63            
64             ## no critic [ProhibitSubroutinePrototypes]
65             ## no critic [ErrorHandling::RequireCarping]
66             ## no critic [Modules::ProhibitAutomaticExportation]
67             ## no critic [InputOutput::RequireBriefOpen]
68            
69             # due to subroutine signatures, perlcritic can't seem to handle disabling
70             # the following warnings on the subs where they occur
71             ## no critic [Subroutines::ProhibitExcessComplexity]
72            
73             # due to use of postfix dereferencing, we have to disable these warnings
74             ## no critic [References::ProhibitDoubleSigils]
75            
76             ########################################################################
77             # Export
78             ########################################################################
79            
80             require Exporter;
81            
82 1     1   5 use base 'Exporter';
  1         2  
  1         126  
83            
84             our @EXPORT = ( 'run' );
85            
86             ########################################################################
87             # Libraries and Features
88             ########################################################################
89 1     1   580 use Object::Pad 0.66 qw( :experimental(init_expr) );
  1         11200  
  1         5  
90            
91 1     1   969 use Getopt::Long;
  1         13491  
  1         5  
92 1     1   709 use Pod::Usage;
  1         58414  
  1         164  
93 1     1   616 use Const::Fast;
  1         2752  
  1         6  
94 1     1   590 use English qw( -no_match_vars );
  1         3820  
  1         8  
95            
96             # cpan modules
97 1     1   330 use FindBin qw($Bin);
  1         3  
  1         111  
98 1     1   526 use LWP::Simple;
  1         62854  
  1         7  
99 1     1   1233 use Path::Tiny;
  1         14148  
  1         70  
100 1     1   832 use Text::Abbrev;
  1         52  
  1         73  
101            
102             # modules for Windows only
103 1     1   702 use if $OSNAME eq 'MSWin32', 'Win32::Clipboard';
  1         14  
  1         7  
104            
105             # conditional modules
106 1     1   584 use Module::Load::Conditional qw( can_load check_install requires );
  1         27265  
  1         76  
107            
108             # custom modules
109 1     1   516 use Weather::GHCN::Common qw( :all );
  1         3  
  1         167  
110 1     1   681 use Weather::GHCN::StationTable;
  1         4  
  1         3494  
111            
112             ########################################################################
113             # Global delarations
114             ########################################################################
115            
116             # is it ok to use Tk?
117             our $TK_MODULES = {
118             'Tk' => undef,
119             'Tk::Getopt' => undef,
120             };
121            
122             # is it ok to use Win32::Clipboard?
123             our $USE_WINCLIP = $OSNAME eq 'MSWin32';
124             our $USE_TK = can_load( modules => $TK_MODULES );
125            
126             my $Opt; # options object, with property accessors for each user option
127            
128             # options that relate to script execution, not GHCN processing and output
129             my $Opt_savegui; # file in which to save options from GUI dialog
130             my $Opt_gui; # launch the GUI dialog
131             my $Opt_help; # display POD documentation
132             my $Opt_readme; # print the text of the GHCN readme file
133             my $Opt_usage; # display a synopsis of the command line syntax
134             my $Opt_outclip; # send report output to the Windows clipboard instead of STDOUT
135            
136             ########################################################################
137             # Constants
138             ########################################################################
139            
140             const my $EMPTY => q(); # empty string
141             const my $SPACE => q( ); # space character
142             const my $DASH => q(-); # dash character
143             const my $TAB => qq(\t); # tab character
144             const my $NL => qq(\n); # perl universal newline (any platform)
145             const my $TRUE => 1; # perl's usual TRUE
146             const my $FALSE => not $TRUE; # a dual-var consisting of '' and 0
147            
148             const my $PROFILE_FILE => '~/.ghcn_fetch.yaml';
149            
150             const my $STN_THRESHOLD => 100; # ask if number of selected stations exceeds this
151            
152             const my $STN_ID_RE => qr{ [[:upper:]]{2} [[:alnum:]\_\-]{9} }xms;
153            
154             ########################################################################
155             # Script Mainline
156             ########################################################################
157            
158             __PACKAGE__->run( \@ARGV ) unless caller;
159            
160             =head1 SUBROUTINES
161            
162             =head2 run ( \@ARGV, stdin => 0 )
163            
164             Invoke this subroutine, passing in a reference to @ARGV, in order to
165             fetch NOAA GHCN station data or daily weather data.
166            
167             See ghnc_fetch.pl -help for details.
168            
169             Stations are filtered by various options, such as -country and -location.
170             But Fetch->run can also receive a list of station id's via a pipe or
171             a file. To enable this feature, set the B parameter to 1 (true).
172            
173             When calling Fetch->run inside a test script, it's usually best to leave
174             this option disabled as some test harnesses may fool the algorithm used
175             to detect stdin from a file or pipe. This can be done by omitting
176             the stdin => parameter, or setting it to false.
177            
178            
179             =cut
180            
181 11     11 1 60053 sub run ($progname, $argv_aref, %args) {
  11         29  
  11         23  
  11         23  
  11         20  
182            
183 11         49 local @ARGV = $argv_aref->@*;
184            
185             # these persist across calls to run() in the unit tests, so we
186             # need to reset them each time
187 11         28 $Opt_savegui = $FALSE;
188 11         26 $Opt_gui = $FALSE;
189 11         15 $Opt_help = $FALSE;
190 11         25 $Opt_readme = $FALSE;
191 11         32 $Opt_usage = $FALSE;
192 11         21 $Opt_outclip = $FALSE;
193            
194 11         120 my $ghcn = Weather::GHCN::StationTable->new;
195            
196 11         49 $ghcn->tstats->start('_Overall');
197            
198 11         61 Getopt::Long::Configure ('pass_through');
199            
200             # If the first command line argument is a report_type, remove and save it
201 11         428 my $report_type;
202 11 100 66     99 if (@ARGV > 0 and $ARGV[0] =~ m{ \A [^-][[:alpha:]]+ \b }xms ) {
203 2         5 my $rt_arg = shift @ARGV;
204 2         21 my $rt = Weather::GHCN::Options->deabbrev_report_type( $rt_arg );
205 2   33     9 $report_type = $rt // $rt_arg;
206             }
207            
208             # record the number of command line arguments before they are removed by GetOptions
209 11         29 my $argv_count = @ARGV;
210            
211 11         83 my %script_args = (
212             'gui' => \$Opt_gui,
213             'outclip' => \$Opt_outclip,
214             'help' => \$Opt_help,
215             'usage|?' => \$Opt_usage,
216             'savegui:s' => \$Opt_savegui, # file for options load/save
217             'readme' => \$Opt_readme,
218             );
219            
220             # parse out the script options into $Opt_ fields, letting the rest
221             # pass through to get_user_options below
222 11         71 GetOptions( %script_args );
223            
224 11 50 33     9237 if ($Opt_outclip and not $USE_WINCLIP) {
225 0         0 die "*E* -outclip not available (needs Win32::Clipboard)\n";
226             }
227            
228 11         66 my $ghcn_fetch_pl = path($Bin, '..', 'bin', 'ghcn_fetch')->absolute->stringify;
229            
230 11 100       1203 if ( $Opt_help ) {
231 1         11 pod2usage( { -verbose => 2, -exitval => 'NOEXIT', -input => $ghcn_fetch_pl } );
232 1         186304 return;
233             }
234 10 100       35 if ( $Opt_usage ) {
235 2         29 pod2usage( { -verbose => 1, -exitval => 'NOEXIT', -input => $ghcn_fetch_pl } );
236 2         116860 return;
237             }
238            
239             # launch the default browser with the NOAA Daily readme.txt file content
240 8 100       27 if ( $Opt_readme ) {
241 1         3 my $readme_uri = 'https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt';
242 1         53 say 'Source: ', $readme_uri;
243 1         15 say $EMPTY;
244 1         10 getprint $readme_uri;
245 1         10439 return;
246             }
247            
248             # Default to -gui if no command line arguments were provided and
249             # we aren't taking input from a pipe or file.
250             # PBP recommends using IO::Interactive::is_interactive rather than -t
251             # because it better deals with ARGV magic; but here we just need to
252             # know if *STDIN is pointing at the terminal so we suppress the
253             # perlcritic warning.
254            
255             ## no critic [ProhibitInteractiveTest]
256             # uncoverable branch true
257 7 0 33     31 $Opt_gui = 1 if $USE_TK and $argv_count == 0 and -t *STDIN;
      33        
258            
259 7         25 my $user_opt_href = get_user_options($Opt_savegui);
260            
261 7 100       28 $user_opt_href->{report} = $report_type
262             if defined $report_type;
263            
264 7   33     32 $user_opt_href->{profile} //= $PROFILE_FILE;
265            
266 7 100       43 die '*E* unrecognized options: ' . join $SPACE, @ARGV
267             if @ARGV;
268            
269 6         12 my @errors;
270 6         55 ($Opt, @errors) = $ghcn->set_options( $user_opt_href->%* );
271            
272 6 50       54 die join qq(\n), @errors, qq(\n)
273             if @errors;
274            
275 6         18 my ( $output, $new_fh, $old_fh );
276 6 0 33     16 if ( $Opt_outclip and $USE_WINCLIP ) {
277 0 0       0 open $new_fh, '>', \$output
278             or die 'Unable to open buffer for write';
279 0         0 $old_fh = select $new_fh; ## no critic (ProhibitOneArgSelect)
280             }
281            
282             # get a list of station id's from stdin if it's a pipe or file
283             # (but not if stdin is pointing to the terminal)
284 6 50 33     65 if ( $args{'stdin'} && ( -p *STDIN || -f *STDIN ) ) {
      66        
285 2         7 my $ii;
286             my %f;
287 2         55 while (my $line = ) { ## no critic [ProhibitExplicitStdin]
288 2         9 chomp;
289 2         105 my @id_list = $line =~ m{ $STN_ID_RE }xmsg;
290 2         8 foreach my $id ( @id_list ) {
291 2         6 $f{$id}++;
292 2         18 $ii++;
293             }
294             }
295            
296 2 100       11 if ($ii == 0) {
297 1         25 die '*W* no station ids found in the input';
298             } else {
299 1         7 $ghcn->stnid_filter_href( \%f );
300             }
301             }
302            
303 5         28 $ghcn->load_stations;
304            
305 5         12 say {*STDERR} '*I* ', $ghcn->stn_count, ' stations found';
  5         46  
306 5         33 say {*STDERR} '*I* ', $ghcn->stn_selected_count, ' stations match location and GSN options';
  5         50  
307 5         22 say {*STDERR} '*I* ', $ghcn->stn_filtered_count, ' stations matched range and measurement options';
  5         44  
308            
309 5 50       31 if ($ghcn->stn_filtered_count > $STN_THRESHOLD ) {
310 0 0       0 if (-t *STDIN) {
311 0         0 print {*STDERR} ">>>> There are a lot of stations to process. Continue (y/n)?\n>>>> ";
  0         0  
312 0         0 my $reply = ;
313 0         0 chomp $reply;
314 0 0       0 exit if $reply =~ m{ \A ( n | no ) }xmsi;
315             } else {
316 0         0 die '*E* too many stations to process';
317             }
318             }
319            
320 5 100       156 if ( $Opt->report eq 'kml' ) {
    50          
    50          
    50          
    50          
321 1         25 say $ghcn->report_kml;
322 1         36 goto WRAP_UP;
323             }
324             elsif ( $Opt->report eq 'url' ) {
325 0         0 say $ghcn->report_urls;
326 0         0 goto WRAP_UP;
327             }
328             elsif ( $Opt->report eq 'curl' ) {
329 0         0 say $ghcn->report_urls( curl => 1 );
330 0         0 goto WRAP_UP;
331             }
332             elsif ( $Opt->report eq 'stn' ) {
333 0         0 say $ghcn->get_stations( kept => 1 );
334 0         0 goto WRAP_UP;
335             }
336             elsif ( $Opt->report eq 'id' ) {
337 0         0 my @stn_list = $ghcn->get_stations( list => 1, kept => 1, no_header => 1 );
338 0         0 my @id_list = map { $_->[0] } @stn_list;
  0         0  
339 0         0 say join $NL, @id_list;
340 0         0 goto WRAP_UP;
341             }
342            
343 4 100       596 if ($Opt->report) {
344 2         42 say $ghcn->get_header;
345            
346             # this prints detailed station data if $Opt->report eq 'detail'
347             $ghcn->load_data(
348             # set a callback routine for printing progress messages
349 4     4   9 progress_sub => sub { say {*STDERR} @_ },
  4         142  
350             # set a callback routine for printing rows when -report detail
351 0     0   0 row_sub => sub { say join "\t", @{ $_[0] } },
  0         0  
352 2         41 );
353            
354 2 50 33     64 if ($Opt->report eq 'detail' and $Opt->nogaps) {
355 0         0 say $ghcn->get_missing_rows;
356             }
357            
358             # these only do something when $Opt->report ne 'detail'
359 2         43 $ghcn->summarize_data;
360 2         16 say $ghcn->get_summary_data;
361 2         60 say $EMPTY;
362            
363 2 100       73 goto WRAP_UP if $Opt->dataonly;
364            
365 1         31 say $EMPTY;
366 1         10 say $ghcn->get_footer;
367            
368 1         14 say $EMPTY;
369 1         10 say $ghcn->get_flag_statistics;
370             }
371            
372 3         100 say $EMPTY;
373 3         33 say $ghcn->get_stations( kept => 1 );
374            
375 3         33 my @rejected = $ghcn->get_stations( list => 1, kept => 0, no_header => 1 );
376 3 100       14 if (@rejected) {
377 1         32 say $EMPTY;
378 1         15 say 'Stations that failed to meet range or quality criteria:';
379 1         7 say tsv(\@rejected);
380             }
381            
382 3 100       22 if ( $ghcn->has_missing_data ) {
383 1         20 warn '*W* some data was missing for the stations and date range processed' . $NL;
384 1         14 say $EMPTY;
385 1         9 say $ghcn->get_missing_data_ranges;
386             }
387            
388 3         25 $ghcn->tstats->stop('_Overall') ;
389 3         14 $ghcn->tstats->finish;
390            
391 3         50 say $EMPTY;
392 3         26 say $ghcn->get_options;
393            
394 3         43 say $EMPTY;
395 3         35 say 'Script:';
396 3         43 say $TAB, $PROGRAM_NAME;
397 3         42 say "\tWeather::GHCN::StationTable version " . $Weather::GHCN::StationTable::VERSION;
398 3         24 say $TAB, 'Cache directory: ' . $ghcn->cachedir;
399 3         25 say $TAB, 'Profile file: ' . $ghcn->profile_file;
400            
401 3 100       110 if ( $Opt->performance ) {
402 1         27 say $EMPTY;
403 1         12 say sprintf 'Timing statistics (ms) and memory [bytes]';
404 1         9 say $ghcn->get_timing_stats;
405            
406 1         16 say $EMPTY;
407 1         8 say $ghcn->get_hash_stats;
408             }
409            
410             WRAP_UP:
411             # send output to the Windows clipboard
412 5 0 33     1269 if ( $Opt_outclip and $USE_WINCLIP ) {
413 0         0 Win32::Clipboard->new()->Set( $output );
414 0         0 select $old_fh; ## no critic [ProhibitOneArgSelect]
415             }
416            
417 5         338 return;
418             }
419            
420             ########################################################################
421             # Subroutines
422             ########################################################################
423            
424             =head2 get_user_options ( $optfile=undef )
425            
426             Fetch.pm uses B to either get user options
427             via B -- if it is installed -- or via B.
428            
429             =cut
430            
431 7     7 1 13 sub get_user_options ( $optfile=undef ) {
  7         18  
  7         16  
432            
433 7 50       42 my $user_opt_href = $Opt_gui
434             ? get_user_options_tk($optfile)
435             : get_user_options_no_tk($optfile)
436             ;
437            
438 7         26 return $user_opt_href;
439             }
440            
441             =head2 get_user_options_no_tk ( $optfile=undef )
442            
443             This function obtains user options from @ARGV by calling B
444             B using a list of option definitions obtained by calling
445             Bget_getopt_list()>. The options (and their values)
446             are extracted from @ARGV and put in a hash, a reference to which is
447             then returned.
448            
449             This function is called when the GUI is not being used. The $optfile
450             argument, if provided, is assumed to be a file saved from a GUI
451             invocation and will be eval'd and used as the options list.
452            
453             =cut
454            
455 8     8 1 3593 sub get_user_options_no_tk ( $optfile=undef ) {
  8         21  
  8         15  
456            
457 8         68 my @options = ( Weather::GHCN::Options->get_getopt_list() );
458            
459 8 50       27 if ($optfile) {
460 0         0 my $saved_opt_perlsrc = join $SPACE, path($optfile)->lines( {chomp=>1} );
461 0         0 my $loadoptions;
462            
463             ## no critic [ProhibitStringyEval]
464             ## no critic [RequireCheckingReturnValueOfEval]
465 0         0 eval $saved_opt_perlsrc;
466            
467 0         0 return $loadoptions;
468             }
469            
470 8         18 my %opt;
471 8         52 GetOptions( \%opt, @options);
472            
473 8         17979 return \%opt;
474             }
475            
476             =head2 get_user_options_tk ( $optfile=undef )
477            
478             This function returns a reference to a hash of user options obtained
479             by calling B. This may launch a GUI dialog to collect
480             the options.
481            
482             The optional $optfile argument specifies a filename which
483             B can use to store or load options.
484            
485             =cut
486            
487 0     0 1   sub get_user_options_tk ( $optfile=undef ) {
  0            
  0            
488            
489 0 0         if (not $USE_TK) {
490 0           die '*E* -gui option unavailable -- try installing Tk and Tk::Getopt';
491             }
492            
493 0           my %opt;
494            
495 0           my @opttable = ( Weather::GHCN::Options->get_tk_options_table() );
496            
497 0           my $optobj = Tk::Getopt->new(
498             -opttable => \@opttable,
499             -options => \%opt,
500             -filename => $optfile);
501            
502 0           $optobj->set_defaults; # set default values
503            
504 0 0 0       $optobj->load_options # Tk:Getopt configuration file
505             if defined $optfile and -e $optfile;
506            
507 0           $optobj->get_options; # command line
508            
509 0           $optobj->process_options; # process callbacks, check restrictions ...
510            
511 0 0         if ($Opt_gui) {
512 0           my $top = MainWindow->new;
513 0           $top->geometry('500x300+300+200');
514 0           $top->title('GHCN Daily Parser');
515            
516 0           my $retval = $optobj->option_dialog(
517             $top,
518             -toplevel => 'Frame',
519             -buttons => [qw/ok cancel save/], # not using cancel apply undo save defaults
520             -statusbar => 1,
521             -wait => 1,
522             -pack => [-fill => 'both', -expand => 1],
523             );
524            
525 0 0 0       die "*I* action cancelled\n" if $retval and $retval eq 'cancel';
526             }
527            
528 0           return \%opt;
529             }
530            
531             =head1 AUTHOR
532            
533             Gary Puckering (jgpuckering@rogers.com)
534            
535             =head1 LICENSE AND COPYRIGHT
536            
537             Copyright 2022, Gary Puckering
538            
539             =cut
540            
541             1;