File Coverage

lib/Weather/GHCN/App/Fetch.pm
Criterion Covered Total %
statement 179 228 78.5
branch 37 68 55.8
condition 13 39 33.3
subroutine 21 23 91.3
pod 4 4 100.0
total 254 362 70.4


line stmt bran cond sub pod time code
1             # Weather::GHCN::Fetch.pm - class for creating applications that fetch NOAA GHCN data
2            
3             ## no critic (Documentation::RequirePodAtEnd)
4            
5             =head1 NAME
6            
7             Weather::GHCN::App::Fetch - Fetch station and weather data from the NOAA GHCN repository
8              
9             =head1 VERSION
10              
11             version v0.0.011
12            
13             =head1 SYNOPSIS
14            
15             use Weather::GHCN::App::Fetch;
16            
17             Weather::GHCN::App::Fetch->run( \@ARGV );
18            
19             See ghcn_fetch -help for details.
20            
21             =cut
22            
23             # Testing notes:
24             #
25             # The quickest way to spot check results from this script is to compare them
26             # to those obtained from:
27             #
28             # https://ottawa.weatherstats.ca/charts/
29             #
30             # Run the script with parameters such as -prov ON -loc "Ottawa Int" -range
31             # 2017-2018 -precip -tavg -o first with the -daily option, then again with
32             # -monthly and -yearly. You can then compare results to various charts you
33             # generate using the above link by selecting Ottawa (Kanata - Orleans),
34             # which I've verified corresponds to station CA006105976 (Ottawa Int'l).
35             #
36             # Charts to use include Temperature (TMAX, TMIN, TAVG, Avg), Snowfall
37             # (SNOW), Snow on Ground (SNWD) and Total Precipitation (PRCP). Annual and
38             # monthly charts work well, but you may need daily charts and some
39             # investigation of the NOAA source data if there are anomalies. Sometimes
40             # the NOAA data has missing data; e.g. station CA006105976 (Ottawa Int'l)
41             # is missing days 6-28 for 2018-02.
42            
43             ########################################################################
44             # Pragmas
45             ########################################################################
46            
47             # these are needed because perlcritic fails to detect that Object::Pad handles these things
48             ## no critic [ProhibitVersionStrings]
49             ## no critic [RequireUseWarnings]
50            
51 1     1   109665 use v5.18; # minimum for Object::Pad
  1         15  
52            
53             package Weather::GHCN::App::Fetch;
54            
55             our $VERSION = 'v0.0.011';
56            
57 1     1   6 use feature 'signatures';
  1         1  
  1         178  
58 1     1   6 no warnings 'experimental::signatures';
  1         2  
  1         52  
59            
60             ########################################################################
61             # perlcritic rules
62             ########################################################################
63            
64             ## no critic [ProhibitSubroutinePrototypes]
65             ## no critic [ErrorHandling::RequireCarping]
66             ## no critic [Modules::ProhibitAutomaticExportation]
67             ## no critic [InputOutput::RequireBriefOpen]
68            
69             # due to subroutine signatures, perlcritic can't seem to handle disabling
70             # the following warnings on the subs where they occur
71             ## no critic [Subroutines::ProhibitExcessComplexity]
72            
73             # due to use of postfix dereferencing, we have to disable these warnings
74             ## no critic [References::ProhibitDoubleSigils]
75            
76             ########################################################################
77             # Export
78             ########################################################################
79            
80             require Exporter;
81            
82 1     1   9 use base 'Exporter';
  1         2  
  1         128  
83            
84             our @EXPORT = ( 'run' );
85            
86             ########################################################################
87             # Libraries and Features
88             ########################################################################
89 1     1   639 use Object::Pad 0.66 qw( :experimental(init_expr) );
  1         11098  
  1         5  
90            
91 1     1   930 use Getopt::Long;
  1         13525  
  1         7  
92 1     1   686 use Pod::Usage;
  1         57974  
  1         135  
93 1     1   539 use Const::Fast;
  1         2599  
  1         6  
94 1     1   624 use English qw( -no_match_vars );
  1         3777  
  1         7  
95            
96             # cpan modules
97 1     1   354 use FindBin qw($Bin);
  1         3  
  1         96  
98 1     1   493 use LWP::Simple;
  1         64101  
  1         9  
99 1     1   1227 use Path::Tiny;
  1         13529  
  1         56  
100 1     1   719 use Text::Abbrev;
  1         47  
  1         67  
101            
102             # modules for Windows only
103 1     1   712 use if $OSNAME eq 'MSWin32', 'Win32::Clipboard';
  1         15  
  1         7  
104            
105             # conditional modules
106 1     1   571 use Module::Load::Conditional qw( can_load check_install requires );
  1         27010  
  1         93  
107            
108             # custom modules
109 1     1   531 use Weather::GHCN::Common qw( :all );
  1         3  
  1         168  
110 1     1   673 use Weather::GHCN::StationTable;
  1         5  
  1         3219  
111            
112             ########################################################################
113             # Global delarations
114             ########################################################################
115            
116             # is it ok to use Tk?
117             our $TK_MODULES = {
118             'Tk' => undef,
119             'Tk::Getopt' => undef,
120             };
121            
122             # is it ok to use Win32::Clipboard?
123             our $USE_WINCLIP = $OSNAME eq 'MSWin32';
124             our $USE_TK = can_load( modules => $TK_MODULES );
125            
126             my $Opt; # options object, with property accessors for each user option
127            
128             # options that relate to script execution, not GHCN processing and output
129             my $Opt_savegui; # file in which to save options from GUI dialog
130             my $Opt_gui; # launch the GUI dialog
131             my $Opt_help; # display POD documentation
132             my $Opt_readme; # print the text of the GHCN readme file
133             my $Opt_usage; # display a synopsis of the command line syntax
134             my $Opt_outclip; # send report output to the Windows clipboard instead of STDOUT
135            
136             ########################################################################
137             # Constants
138             ########################################################################
139            
140             const my $EMPTY => q(); # empty string
141             const my $SPACE => q( ); # space character
142             const my $DASH => q(-); # dash character
143             const my $TAB => qq(\t); # tab character
144             const my $NL => qq(\n); # perl universal newline (any platform)
145             const my $TRUE => 1; # perl's usual TRUE
146             const my $FALSE => not $TRUE; # a dual-var consisting of '' and 0
147            
148             const my $PROFILE_FILE => Weather::GHCN::Options->get_profile_filespec();
149            
150             const my $STN_THRESHOLD => 100; # ask if number of selected stations exceeds this
151            
152             const my $STN_ID_RE => qr{ [[:upper:]]{2} [[:alnum:]\_\-]{9} }xms;
153            
154             ########################################################################
155             # Script Mainline
156             ########################################################################
157            
158             __PACKAGE__->run( \@ARGV ) unless caller;
159            
160             =head1 SUBROUTINES
161            
162             =head2 run ( \@ARGV, stdin => 0 )
163            
164             Invoke this subroutine, passing in a reference to @ARGV, in order to
165             fetch NOAA GHCN station data or daily weather data.
166            
167             See ghnc_fetch.pl -help for details.
168            
169             Stations are filtered by various options, such as -country and -location.
170             But Fetch->run can also receive a list of station id's via a pipe or
171             a file. To enable this feature, set the B parameter to 1 (true).
172            
173             When calling Fetch->run inside a test script, it's usually best to leave
174             this option disabled as some test harnesses may fool the algorithm used
175             to detect stdin from a file or pipe. This can be done by omitting
176             the stdin => parameter, or setting it to false.
177            
178            
179             =cut
180            
181 11     11 1 62597 sub run ($progname, $argv_aref, %args) {
  11         29  
  11         21  
  11         32  
  11         22  
182            
183 11         53 local @ARGV = $argv_aref->@*;
184            
185             # these persist across calls to run() in the unit tests, so we
186             # need to reset them each time
187 11         30 $Opt_savegui = $FALSE;
188 11         25 $Opt_gui = $FALSE;
189 11         29 $Opt_help = $FALSE;
190 11         21 $Opt_readme = $FALSE;
191 11         25 $Opt_usage = $FALSE;
192 11         29 $Opt_outclip = $FALSE;
193            
194 11         146 my $ghcn = Weather::GHCN::StationTable->new;
195            
196 11         53 $ghcn->tstats->start('_Overall');
197            
198 11         75 Getopt::Long::Configure ('pass_through');
199            
200             # If the first command line argument is a report_type, remove and save it
201 11         453 my $report_type;
202 11 100 66     116 if (@ARGV > 0 and $ARGV[0] =~ m{ \A [^-][[:alpha:]]+ \b }xms ) {
203 2         7 my $rt_arg = shift @ARGV;
204 2         21 my $rt = Weather::GHCN::Options->deabbrev_report_type( $rt_arg );
205 2   33     12 $report_type = $rt // $rt_arg;
206             }
207            
208             # record the number of command line arguments before they are removed by GetOptions
209 11         29 my $argv_count = @ARGV;
210            
211 11         77 my %script_args = (
212             'gui' => \$Opt_gui,
213             'outclip' => \$Opt_outclip,
214             'help' => \$Opt_help,
215             'usage|?' => \$Opt_usage,
216             'savegui:s' => \$Opt_savegui, # file for options load/save
217             'readme' => \$Opt_readme,
218             );
219            
220             # parse out the script options into $Opt_ fields, letting the rest
221             # pass through to get_user_options below
222 11         102 GetOptions( %script_args );
223            
224 11 50 33     9487 if ($Opt_outclip and not $USE_WINCLIP) {
225 0         0 die "*E* -outclip not available (needs Win32::Clipboard)\n";
226             }
227            
228 11         71 my $ghcn_fetch_pl = path($Bin, '..', 'bin', 'ghcn_fetch')->absolute->stringify;
229            
230 11 100       1244 if ( $Opt_help ) {
231 1         10 pod2usage( { -verbose => 2, -exitval => 'NOEXIT', -input => $ghcn_fetch_pl } );
232 1         185033 return;
233             }
234 10 100       34 if ( $Opt_usage ) {
235 2         21 pod2usage( { -verbose => 1, -exitval => 'NOEXIT', -input => $ghcn_fetch_pl } );
236 2         120073 return;
237             }
238            
239             # launch the default browser with the NOAA Daily readme.txt file content
240 8 100       23 if ( $Opt_readme ) {
241 1         3 my $readme_uri = 'https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt';
242 1         54 say 'Source: ', $readme_uri;
243 1         16 say $EMPTY;
244 1         11 getprint $readme_uri;
245 1         10705 return;
246             }
247            
248             # Default to -gui if no command line arguments were provided and
249             # we aren't taking input from a pipe or file.
250             # PBP recommends using IO::Interactive::is_interactive rather than -t
251             # because it better deals with ARGV magic; but here we just need to
252             # know if *STDIN is pointing at the terminal so we suppress the
253             # perlcritic warning.
254            
255             ## no critic [ProhibitInteractiveTest]
256             # uncoverable branch true
257 7 0 33     29 $Opt_gui = 1 if $USE_TK and $argv_count == 0 and -t *STDIN;
      33        
258            
259 7         26 my $user_opt_href = get_user_options($Opt_savegui);
260            
261 7 100       29 $user_opt_href->{report} = $report_type
262             if defined $report_type;
263            
264 7   33     30 $user_opt_href->{profile} //= $PROFILE_FILE;
265            
266 7 100       50 die '*E* unrecognized options: ' . join $SPACE, @ARGV
267             if @ARGV;
268            
269 6         12 my @errors;
270 6         45 ($Opt, @errors) = $ghcn->set_options( $user_opt_href->%* );
271            
272 6 50       62 die join qq(\n), @errors, qq(\n)
273             if @errors;
274            
275 6         13 my ( $output, $new_fh, $old_fh );
276 6 0 33     19 if ( $Opt_outclip and $USE_WINCLIP ) {
277 0 0       0 open $new_fh, '>', \$output
278             or die 'Unable to open buffer for write';
279 0         0 $old_fh = select $new_fh; ## no critic (ProhibitOneArgSelect)
280             }
281            
282             # get a list of station id's from stdin if it's a pipe or file
283             # (but not if stdin is pointing to the terminal)
284 6 50 33     65 if ( $args{'stdin'} && ( -p *STDIN || -f *STDIN ) ) {
      66        
285 2         8 my $ii;
286             my %f;
287 2         57 while (my $line = ) { ## no critic [ProhibitExplicitStdin]
288 2         8 chomp;
289 2         107 my @id_list = $line =~ m{ $STN_ID_RE }xmsg;
290 2         9 foreach my $id ( @id_list ) {
291 2         8 $f{$id}++;
292 2         18 $ii++;
293             }
294             }
295            
296 2 100       10 if ($ii == 0) {
297 1         26 die '*W* no station ids found in the input';
298             } else {
299 1         9 $ghcn->stnid_filter_href( \%f );
300             }
301             }
302            
303 5         28 $ghcn->load_stations;
304            
305 5         12 say {*STDERR} '*I* ', $ghcn->stn_count, ' stations found';
  5         47  
306 5         30 say {*STDERR} '*I* ', $ghcn->stn_selected_count, ' stations match location and GSN options';
  5         37  
307 5         21 say {*STDERR} '*I* ', $ghcn->stn_filtered_count, ' stations matched range and measurement options';
  5         45  
308            
309 5 50       32 if ($ghcn->stn_filtered_count > $STN_THRESHOLD ) {
310 0 0       0 if (-t *STDIN) {
311 0         0 print {*STDERR} ">>>> There are a lot of stations to process. Continue (y/n)?\n>>>> ";
  0         0  
312 0         0 my $reply = ;
313 0         0 chomp $reply;
314 0 0       0 exit if $reply =~ m{ \A ( n | no ) }xmsi;
315             } else {
316 0         0 die '*E* too many stations to process';
317             }
318             }
319            
320 5 100       147 if ( $Opt->report eq 'kml' ) {
    50          
    50          
    50          
    50          
321 1         23 say $ghcn->report_kml;
322 1         31 goto WRAP_UP;
323             }
324             elsif ( $Opt->report eq 'url' ) {
325 0         0 say $ghcn->report_urls;
326 0         0 goto WRAP_UP;
327             }
328             elsif ( $Opt->report eq 'curl' ) {
329 0         0 say $ghcn->report_urls( curl => 1 );
330 0         0 goto WRAP_UP;
331             }
332             elsif ( $Opt->report eq 'stn' ) {
333 0         0 say $ghcn->get_stations( kept => 1 );
334 0         0 goto WRAP_UP;
335             }
336             elsif ( $Opt->report eq 'id' ) {
337 0         0 my @stn_list = $ghcn->get_stations( list => 1, kept => 1, no_header => 1 );
338 0         0 my @id_list = map { $_->[0] } @stn_list;
  0         0  
339 0         0 say join $NL, @id_list;
340 0         0 goto WRAP_UP;
341             }
342            
343 4 100       576 if ($Opt->report) {
344 2         44 say $ghcn->get_header;
345            
346             # this prints detailed station data if $Opt->report eq 'detail'
347             $ghcn->load_data(
348             # set a callback routine for printing progress messages
349 4     4   10 progress_sub => sub { say {*STDERR} @_ },
  4         151  
350             # set a callback routine for printing rows when -report detail
351 0     0   0 row_sub => sub { say join "\t", @{ $_[0] } },
  0         0  
352 2         49 );
353            
354 2 50 33     66 if ($Opt->report eq 'detail' and $Opt->nogaps) {
355 0         0 say $ghcn->get_missing_rows;
356             }
357            
358             # these only do something when $Opt->report ne 'detail'
359 2         81 $ghcn->summarize_data;
360 2         15 say $ghcn->get_summary_data;
361 2         64 say $EMPTY;
362            
363 2 100       148 goto WRAP_UP if $Opt->dataonly;
364            
365 1         32 say $EMPTY;
366 1         9 say $ghcn->get_footer;
367            
368 1         15 say $EMPTY;
369 1         9 say $ghcn->get_flag_statistics;
370             }
371            
372 3         101 say $EMPTY;
373 3         32 say $ghcn->get_stations( kept => 1 );
374            
375 3         33 my @rejected = $ghcn->get_stations( list => 1, kept => 0, no_header => 1 );
376 3 100       12 if (@rejected) {
377 1         21 say $EMPTY;
378 1         19 say 'Stations that failed to meet range or quality criteria:';
379 1         10 say tsv(\@rejected);
380 1         16 say $EMPTY;
381 1         16 say 'Reasons for rejection:';
382 1         14 my @notes = $ghcn->get_station_note_list;
383 1         6 say tsv(\@notes);
384             }
385            
386 3 100       25 if ( $ghcn->has_missing_data ) {
387 1         23 warn '*W* some data was missing for the stations and date range processed' . $NL;
388 1         15 say $EMPTY;
389 1         9 say $ghcn->get_missing_data_ranges;
390             }
391            
392 3         22 $ghcn->tstats->stop('_Overall') ;
393 3         15 $ghcn->tstats->finish;
394            
395 3         50 say $EMPTY;
396 3         23 say $ghcn->get_options;
397            
398 3         48 say $EMPTY;
399 3         38 say 'Script:';
400 3         41 say $TAB, $PROGRAM_NAME;
401 3         40 say "\tWeather::GHCN::StationTable version " . $Weather::GHCN::StationTable::VERSION;
402 3         31 say $TAB, 'Cache directory: ' . $ghcn->cachedir;
403 3         26 say $TAB, 'Profile file: ' . $ghcn->profile_file;
404            
405 3 100       120 if ( $Opt->performance ) {
406 1         30 say $EMPTY;
407 1         17 say sprintf 'Timing statistics (ms) and memory [bytes]';
408 1         8 say $ghcn->get_timing_stats;
409            
410 1         15 say $EMPTY;
411 1         9 say $ghcn->get_hash_stats;
412             }
413            
414             WRAP_UP:
415             # send output to the Windows clipboard
416 5 0 33     1308 if ( $Opt_outclip and $USE_WINCLIP ) {
417 0         0 Win32::Clipboard->new()->Set( $output );
418 0         0 select $old_fh; ## no critic [ProhibitOneArgSelect]
419             }
420            
421 5         344 return;
422             }
423            
424             ########################################################################
425             # Subroutines
426             ########################################################################
427            
428             =head2 get_user_options ( $optfile=undef )
429            
430             Fetch.pm uses B to either get user options
431             via B -- if it is installed -- or via B.
432            
433             =cut
434            
435 7     7 1 13 sub get_user_options ( $optfile=undef ) {
  7         17  
  7         11  
436            
437 7 50       36 my $user_opt_href = $Opt_gui
438             ? get_user_options_tk($optfile)
439             : get_user_options_no_tk($optfile)
440             ;
441            
442 7         25 return $user_opt_href;
443             }
444            
445             =head2 get_user_options_no_tk ( $optfile=undef )
446            
447             This function obtains user options from @ARGV by calling B
448             B using a list of option definitions obtained by calling
449             Bget_getopt_list()>. The options (and their values)
450             are extracted from @ARGV and put in a hash, a reference to which is
451             then returned.
452            
453             This function is called when the GUI is not being used. The $optfile
454             argument, if provided, is assumed to be a file saved from a GUI
455             invocation and will be eval'd and used as the options list.
456            
457             =cut
458            
459 8     8 1 3679 sub get_user_options_no_tk ( $optfile=undef ) {
  8         20  
  8         16  
460            
461 8         71 my @options = ( Weather::GHCN::Options->get_getopt_list() );
462            
463 8 50       28 if ($optfile) {
464 0         0 my $saved_opt_perlsrc = join $SPACE, path($optfile)->lines( {chomp=>1} );
465 0         0 my $loadoptions;
466            
467             ## no critic [ProhibitStringyEval]
468             ## no critic [RequireCheckingReturnValueOfEval]
469 0         0 eval $saved_opt_perlsrc;
470            
471 0         0 return $loadoptions;
472             }
473            
474 8         17 my %opt;
475 8         42 GetOptions( \%opt, @options);
476            
477 8         18392 return \%opt;
478             }
479            
480             =head2 get_user_options_tk ( $optfile=undef )
481            
482             This function returns a reference to a hash of user options obtained
483             by calling B. This may launch a GUI dialog to collect
484             the options.
485            
486             The optional $optfile argument specifies a filename which
487             B can use to store or load options.
488            
489             =cut
490            
491 0     0 1   sub get_user_options_tk ( $optfile=undef ) {
  0            
  0            
492            
493 0 0         if (not $USE_TK) {
494 0           die '*E* -gui option unavailable -- try installing Tk and Tk::Getopt';
495             }
496            
497 0           my %opt;
498            
499 0           my @opttable = ( Weather::GHCN::Options->get_tk_options_table() );
500            
501 0           my $optobj = Tk::Getopt->new(
502             -opttable => \@opttable,
503             -options => \%opt,
504             -filename => $optfile);
505            
506 0           $optobj->set_defaults; # set default values
507            
508 0 0 0       $optobj->load_options # Tk:Getopt configuration file
509             if defined $optfile and -e $optfile;
510            
511 0           $optobj->get_options; # command line
512            
513 0           $optobj->process_options; # process callbacks, check restrictions ...
514            
515 0 0         if ($Opt_gui) {
516 0           my $top = MainWindow->new;
517 0           $top->geometry('500x300+300+200');
518 0           $top->title('GHCN Daily Parser');
519            
520 0           my $retval = $optobj->option_dialog(
521             $top,
522             -toplevel => 'Frame',
523             -buttons => [qw/ok cancel save/], # not using cancel apply undo save defaults
524             -statusbar => 1,
525             -wait => 1,
526             -pack => [-fill => 'both', -expand => 1],
527             );
528            
529 0 0 0       die "*I* action cancelled\n" if $retval and $retval eq 'cancel';
530             }
531            
532 0           return \%opt;
533             }
534            
535             =head1 AUTHOR
536            
537             Gary Puckering (jgpuckering@rogers.com)
538            
539             =head1 LICENSE AND COPYRIGHT
540            
541             Copyright 2022, Gary Puckering
542            
543             =cut
544            
545             1;