File Coverage

lib/Weather/GHCN/App/CacheUtil.pm
Criterion Covered Total %
statement 168 220 76.3
branch 23 68 33.8
condition 4 21 19.0
subroutine 24 26 92.3
pod 10 10 100.0
total 229 345 66.3


line stmt bran cond sub pod time code
1             # Weather::GHCN::CacheUtil.pm - cache utility
2            
3             ## no critic (Documentation::RequirePodAtEnd)
4            
5             =head1 NAME
6            
7             Weather::GHCN::App::CacheUtil - Show or clean up cache content
8              
9             =head1 VERSION
10              
11             version v0.0.011
12            
13             =head1 SYNOPSIS
14            
15             use Weather::GHCN::App::CacheUtil;
16            
17             Weather::GHCN::App::CacheUtil->run( \@ARGV );
18            
19             See ghcn_cacheutil -help for details.
20            
21             =cut
22            
23             ########################################################################
24             # Pragmas
25             ########################################################################
26            
27             # these are needed because perlcritic fails to detect that Object::Pad handles these things
28             ## no critic [ValuesAndExpressions::ProhibitVersionStrings]
29            
30 1     1   110846 use v5.18;
  1         15  
31 1     1   6 use warnings;
  1         2  
  1         51  
32            
33             package Weather::GHCN::App::CacheUtil;
34            
35             our $VERSION = 'v0.0.011';
36            
37 1     1   6 use feature 'signatures';
  1         2  
  1         243  
38 1     1   7 no warnings 'experimental::signatures';
  1         3  
  1         55  
39            
40             ########################################################################
41             # perlcritic rules
42             ########################################################################
43            
44             ## no critic [Subroutines::ProhibitSubroutinePrototypes]
45             ## no critic [ErrorHandling::RequireCarping]
46             ## no critic [Modules::ProhibitAutomaticExportation]
47            
48             # due to use of postfix dereferencing, we have to disable these warnings
49             ## no critic [References::ProhibitDoubleSigils]
50            
51             ########################################################################
52             # Export
53             ########################################################################
54            
55             require Exporter;
56            
57 1     1   6 use base 'Exporter';
  1         2  
  1         129  
58            
59             our @EXPORT = ( 'run' );
60            
61             ########################################################################
62             # Libraries
63             ########################################################################
64 1     1   503 use English qw( -no_match_vars ) ;
  1         3544  
  1         5  
65 1     1   1041 use Getopt::Long qw( GetOptionsFromArray );
  1         12674  
  1         6  
66 1     1   672 use Pod::Usage;
  1         54468  
  1         144  
67 1     1   621 use Const::Fast;
  1         2812  
  1         8  
68 1     1   599 use Hash::Wrap {-lvalue => 1, -defined => 1, -as => '_wrap_hash'};
  1         3933  
  1         8  
69 1     1   2227 use Path::Tiny 0.122;
  1         12930  
  1         65  
70 1     1   541 use Weather::GHCN::Common qw(commify);
  1         3  
  1         72  
71 1     1   513 use Weather::GHCN::Station;
  1         4  
  1         50  
72 1     1   750 use Weather::GHCN::StationTable;
  1         3  
  1         76  
73            
74             # modules for Windows only
75 1     1   846 use if $OSNAME eq 'MSWin32', 'Win32::Clipboard';
  1         15  
  1         6  
76            
77             ########################################################################
78             # Global delarations
79             ########################################################################
80            
81             # is it ok to use Win32::Clipboard?
82             our $USE_WINCLIP = $OSNAME eq 'MSWin32';
83            
84             our $Opt; # declared as 'our' for r/w access from 94_ghcn_cacheutil.t
85            
86             ########################################################################
87             # Constants
88             ########################################################################
89            
90             const my $EMPTY => q(); # empty string
91             const my $SPACE => q( ); # space character
92             const my $COMMA => q(,); # comma character
93             const my $TAB => qq(\t); # tab character
94             const my $DASH => q(-); # dash character
95             const my $TRUE => 1; # perl's usual TRUE
96             const my $FALSE => not $TRUE; # a dual-var consisting of '' and 0
97            
98             const my $PROFILE_FILE => '~/.ghcn_fetch.yaml';
99            
100            
101             ########################################################################
102             # Script Mainline
103             ########################################################################
104            
105             __PACKAGE__->run( \@ARGV ) unless caller;
106            
107             #-----------------------------------------------------------------------
108             =head1 SUBROUTINES
109            
110             =head2 run ( \@ARGV )
111            
112             Invoke this subroutine, passing in a reference to @ARGV, in order to
113             get list of cache contents or remove cache content.
114            
115             See ghnc_cache.pl -help for details.
116            
117             =cut
118            
119 0     0 1 0 sub run ($progname, $argv_aref) {
  0         0  
  0         0  
  0         0  
120            
121 0         0 $Opt = get_options($argv_aref);
122            
123 0         0 my $ghcn = get_ghcn($Opt->profile, $Opt->cachedir);
124 0         0 my $cache_pto = path($ghcn->cachedir); # pto = Path::Tiny object
125            
126 0 0       0 if ($Opt->clean) {
127 0         0 my @errors = $ghcn->cache_obj->clean_cache();
128 0 0       0 if (@errors) {
129 0         0 say {*STDERR} join "\n", @errors;
  0         0  
130 0         0 exit 1;
131             }
132 0         0 return;
133             }
134            
135             # send print output to the Windows clipboard if requested and doable
136 0 0 0     0 outclip() if $Opt->outclip and $USE_WINCLIP;
137            
138 0         0 my $alias_href = get_alias_stnids($ghcn->profile_href);
139            
140 0         0 my $files_href = load_cached_files($ghcn, $cache_pto, $alias_href);
141            
142 0 0       0 if (keys $files_href->%* == 0) {
143 0         0 say {*STDERR} '*I* cache is empty';
  0         0  
144 0         0 return;
145             }
146            
147 0 0       0 if ($Opt->remove) {
148 0         0 foreach my $fileid (sort keys $files_href->%*) {
149 0         0 my $file = $files_href->{$fileid};
150 0 0       0 next unless $file->{INCLUDE};
151 0         0 say {*STDERR} 'Removing ', $file->{PathObj};
  0         0  
152 0         0 $file->{PathObj}->remove;
153             }
154 0         0 return;
155             }
156            
157 0         0 my $total_kb = report_daily_files($files_href);
158            
159 0         0 say '';
160 0         0 say "Total cache size: ", commify($total_kb);
161 0         0 say 'Cache location: ', $cache_pto;
162            
163             # restore print output to stdout
164 0 0 0     0 outclip() if $Opt->outclip and $USE_WINCLIP;
165            
166 0         0 return;
167             }
168            
169             =head2 filter_files ( \%files )
170            
171             Given a hash containing Path::Tiny objects representing the files
172             in the designed ghcn cache folder, apply the various filtering
173             criteria options and mark those objects which match the criteria by
174             inserting the key INCLUDE with value 1 in the %files entry for
175             that object.
176            
177             Modifies the content of %files. Void return.
178            
179             =cut
180            
181 1     1 1 4 sub filter_files ($files_href) {
  1         3  
  1         4  
182 1         12 foreach my $fileid (sort keys $files_href->%*) {
183 6         13 my $file = $files_href->{$fileid};
184 6         113 my $loc = $Opt->location;
185            
186 6 50       849 next unless match_type( $file->{Type}, $Opt->type );
187            
188 6 50 33     98 next if $Opt->country and $file->{Country} ne $Opt->country;
189 6 50 33     154 next if $Opt->state and $file->{State} ne $Opt->state;
190            
191 6         535 my $kb = round($file->{Size} / 1024);
192            
193 6 50       101 if (defined $Opt->size) {
194 0 0       0 next unless $Opt->size <= 0
    0          
195             ? $kb <= -$Opt->size
196             : $kb >= $Opt->size;
197             }
198            
199 6 50       638 if (defined $Opt->age) {
200             next unless $Opt->age <= 0
201             ? $file->{Age} <= -$Opt->age
202 0 0       0 : $file->{Age} >= $Opt->age;
    0          
203             }
204            
205 6 50       610 if ($Opt->invert) {
206 0 0 0     0 next if $Opt->location and $file->{Location} =~ m{$loc}msi;
207             } else {
208 6 50 33     672 next if $Opt->location and $file->{Location} !~ m{$loc}msi;
209             }
210            
211 6         78 $file->{INCLUDE} = 1;
212             }
213             }
214            
215             =head2 get_ghcn ($profile, $cachedir)
216            
217             Returns a Weather::GHCN::StationTable object initialized with a cache
218             location obtained from $cachedir or, if $cachdir is undefined, from
219             the cachedir option defined in the user profile specified by
220             $profile. If errors are encounterd, it dies and produces a list.
221            
222             =cut
223            
224 1     1 1 3595 sub get_ghcn ($profile, $cachedir) {
  1         3  
  1         3  
  1         2  
225 1         22 my $ghcn = Weather::GHCN::StationTable->new;
226            
227 1   33     4 $profile //= $PROFILE_FILE;
228            
229 1         6 my ($opt, @errors) = $ghcn->set_options(
230             cachedir => $cachedir,
231             profile => $profile,
232             );
233 1 50       4 die @errors if @errors;
234            
235 1         4 return $ghcn;
236             }
237            
238             =head2 get_options ( \@ARGV )
239            
240             B encapsulates everything we need to process command line
241             options, or to set options when invoking this script from a test script.
242            
243             Normally it's called by passing a reference to @ARGV; from a test script
244             you'd set up a local array variable to specify the options.
245            
246             By convention, you should set up a file-scoped lexical variable named
247             $Opt and set it in the mainline using the return value from this function.
248             Then all options can be accessed used $Opt->option notation.
249            
250             =cut
251            
252 1     1 1 3804 sub get_options ($argv_aref) {
  1         3  
  1         2  
253            
254 1         8 my @options = (
255             'country:s', # filter by country
256             'state|prov:s', # filter by state or province
257             'location:s', # filter by localtime
258             'remove', # remove cached daily files (except aliases)
259             'clean', # remove all files from the cache
260             'invert|v', # invert -location selection criteria
261             'size|kb:i', # select files by size in Kb
262             'age:i', # select file if >= age
263             'type:s', # select based on type
264             'cachedir:s', # cache location
265             'profile:s', # profile file
266             'outclip', # output data to the Windows clipboard
267             'help','usage|?', # help
268             );
269            
270 1         3 my %opt;
271            
272             # create a list of option key names by stripping the various adornments
273 1         3 my @keys = map { (split m{ [!+=:|] }xms)[0] } grep { !ref } @options;
  14         44  
  14         24  
274             # initialize all possible options to undef
275 1         23 @opt{ @keys } = ( undef ) x @keys;
276            
277 1 50       10 GetOptionsFromArray($argv_aref, \%opt, @options)
278             or pod2usage(2);
279            
280             # Make %opt into an object and name it the same as what we usually
281             # call the global options object. Note that this doesn't set the
282             # global -- the script will have to do that using the return value
283             # from this function. But, what this does is allow us to call
284             # $Opt->help and other option within this function using the same
285             # syntax as what we use in the script. This is handy if you need
286             # to rename option '-foo' to '-bar' because you can do a find/replace
287             # on '$Opt->foo' and you'll get any instances of it here as well as
288             # in the script.
289            
290             ## no critic [Capitalization]
291             ## no critic [ProhibitReusedNames]
292 1         1082 my $Opt = _wrap_hash \%opt;
293            
294 1 50       66 pod2usage(1) if $Opt->usage;
295 1 50       812 pod2usage(-verbose => 2) if $Opt->help;
296            
297 1         483 return $Opt;
298             }
299            
300             =head2 get_alias_stnids ( \%profile )
301            
302             Read the hash obtained from the user profile file and find the alias
303             definitions. Return a hash of station id's that have been aliased.
304            
305             =cut
306            
307 1     1 1 4 sub get_alias_stnids ($profile_href) {
  1         3  
  1         3  
308 1 50       6 return {} if not $profile_href;
309 1         4 my $aliases_href = $profile_href->{aliases};
310 1 50       4 return {} if not $aliases_href;
311 1         2 my %aliases;
312 1         5 foreach my $stn_str (values $aliases_href->%*) {
313 3         37 my @stns = split $COMMA, $stn_str;
314 3         10 foreach my $stn (@stns) {
315 5         15 $aliases{$stn} = 1;
316             }
317             }
318 1         3 return \%aliases;
319             }
320            
321             =head2 load_cached_files ($ghcn, $cache_pto, \%alias )
322            
323             Given a Weather::GHCN::StationTable object and a cache Path::Tiny
324             object, and a hash of which files correspond to aliased stations,
325             return a hash which combines the file information and the station
326             information (where applicable) and categorizes each entry by type:
327             D for daily data file, A for aliases station, and C for catalog files.
328            
329             =cut
330            
331 1     1 1 54 sub load_cached_files ($ghcn, $cache_pto, $alias_href) {
  1         2  
  1         3  
  1         2  
  1         2  
332            
333 1         26 my @files = $cache_pto->children;
334            
335 1 50       384 return {} if not @files;
336            
337 1         4 my @txtfiles;
338             my %filter;
339 1         5 foreach my $pto (@files) {
340 6         19 my $bname = $pto->basename;
341 6 100       156 if ( $bname =~ m{ [.]txt \Z}xms ) {
342 2         5 push @txtfiles, $pto;
343 2         6 next;
344             }
345 4         23 my $stnid = $pto->basename('.dly'); # removes the extension
346 4         99 $filter{$stnid} = 1;
347             }
348            
349            
350 1 50       5 if (@txtfiles == 0) {
351 0         0 say {*STDERR} '*W* no station catalog files (ghcnd-*.txt) in the cache - resorting to a simple file list';
  0         0  
352 0         0 say $_->basename for @files;
353 0         0 return {};
354             }
355            
356 1         4 my $stations_txt = path($cache_pto, 'ghcnd-stations.txt')->slurp;
357            
358 1         20093 $ghcn->stnid_filter_href( \%filter );
359 1         8 $ghcn->load_stations( content => $stations_txt );
360            
361 1         8 my @stations = $ghcn->get_stations(list => 1, no_header => 1);
362 1         12 my @hdr = Weather::GHCN::Station::Headings;
363            
364 1         3 my %files;
365 1         3 foreach my $stn_row (@stations) {
366 4         6 my %file;
367 4         50 @file{@hdr} = $stn_row->@*;
368            
369 4         12 my $fileid = $file{StationId};
370 4         21 my $pathobj = path($cache_pto, $fileid . '.dly');
371            
372 4 50       238 $file{Type} = $alias_href->{$fileid} ? 'A' : 'D';
373 4         14 $file{Size} = $pathobj->size;
374 4         133 $file{Age} = int -M $pathobj->stat;
375 4         783 $file{PathObj} = $pathobj;
376 4         20 $files{$file{StationId}} = \%file;
377             }
378            
379 1         3 foreach my $pto (@txtfiles) {
380 2         4 my %file;
381 2         9 my $fileid = $pto->basename('.txt');
382 2         77 $fileid =~ s{ \A ghcnd- }{}xms;
383 2         7 $file{StationId} = $fileid;
384 2         6 $file{Location} = $pto->basename;
385 2         21 $file{Type} = 'C';
386 2         7 $file{Size} = $pto->size;
387 2         68 $file{Age} = int -M $pto->stat;
388 2         313 $file{PathObj} = $pto;
389 2         11 $files{$file{StationId}} = \%file;
390             }
391            
392 1         13 filter_files(\%files);
393            
394 1         26 return \%files;
395             }
396            
397             =head2 match_type ($file_type, $match_types)
398            
399             Cache files are categorized by type: D for .dly files, A for .dly files
400             that correspond to user aliases, and C for .txt files. The user can
401             provide a -type option with a string to select based on type. The
402             string can contain any or all of the three letters. This function
403             is used to match the file type with the -type option. Returns true
404             if the $file_type letter (D, A or C) is found in the $match_types
405             string.
406            
407             =cut
408            
409 14     14 1 4395 sub match_type ($file_type, $match_types) {
  14         35  
  14         23  
  14         25  
410 14 100       44 return $TRUE if not $match_types;
411 7         23 my @types = split //, $match_types;
412 7         15 my $matched = 0;
413 7         15 foreach my $m (@types) {
414 10 100       31 $matched++ if uc $m eq uc $file_type
415             }
416 7         40 return $matched++
417             }
418            
419             =head2 outclip ()
420            
421             When called initially, it redirects STDOUT to local variable so that
422             printing is saved in memory. On the subsequent call, it writes the
423             content of the variable to the Windows Clipboard and resets STDOUT
424             to its original state (usually the terminal).
425            
426             Since Windows::Clipboard is platform specific, calls to this subroutine
427             should conditional. The following pattern is recommended:
428            
429             # modules for Windows only
430             use if $OSNAME eq 'MSWin32', 'Win32::Clipboard';
431            
432             # is it ok to use Win32::Clipboard?
433             our $USE_WINCLIP = $OSNAME eq 'MSWin32';
434            
435             # send print output to the Windows clipboard if requested and doable
436             outclip() if $Opt->outclip and $USE_WINCLIP;
437            
438             ... print stuff
439            
440             # restore print output to stdout
441             outclip() if $Opt->outclip and $USE_WINCLIP;
442            
443             This subroutine relies on state variables. It cannot be used in a
444             nested fashion. It is best confined to main:: (or the top-level
445             subroutine).
446            
447             =cut
448            
449 0     0 1 0 sub outclip () {
  0         0  
450 0         0 state $old_fh;
451 0         0 state $output;
452            
453 0 0       0 if ($old_fh) {
454 0         0 Win32::Clipboard->new()->Set( $output );
455 0         0 select $old_fh; ## no critic [ProhibitOneArgSelect]
456             } else {
457 0 0       0 open my $new_fh, '>', \$output
458             or die 'Unable to open buffer for write';
459 0         0 $old_fh = select $new_fh; ## no critic (ProhibitOneArgSelect)
460             }
461            
462 0         0 return;
463             }
464            
465             =head2 report_daily_files ($files_href)
466            
467             Given a hash of the cache file hash objects, each consisting of a
468             merger of file properties and station properties, this subroutine
469             will print a report listing those that were flagged for inclusion
470             by filter_files().
471            
472             Output is ordered by StationId. Catalog (.txt) files don't have a
473             station id, so short version of the filename is used. Since those
474             names are lowercase, they sort last in the the list.
475            
476             The Type of the file appears in the first column: D for daily weather
477             data files, A for daily weather data files that correspond to aliases
478             defined in the user profile, and C for catalog files.
479            
480             =cut
481            
482 1     1 1 6961 sub report_daily_files ($files_href) {
  1         2  
  1         3  
483            
484 1         69 printf "%s %-11s %2s %2s %-9s %6s %4s %s\n", qw(T StationId Co St Active Kb Age Location);
485            
486 1         7 my $total_kb = 0;
487            
488 1         8 foreach my $fileid (sort keys $files_href->%*) {
489 6         20 my $file = $files_href->{$fileid};
490 6 50       33 next unless $file->{INCLUDE};
491            
492 6         20 my $kb = round($file->{Size} / 1024);
493 6         12 $total_kb += $kb;
494            
495 1     1   2758 no warnings 'uninitialized';
  1         2  
  1         220  
496             printf "%s %-11s %2s %2s %9s %6s %4s %s\n",
497             $file->{Type},
498             $file->{StationId},
499             $file->{Country},
500             $file->{State},
501             $file->{Active},
502             sprintf('%6s', commify( $kb )),
503             $file->{Age},
504             $file->{Location},
505 6         32 ;
506             }
507            
508 1         8 return $total_kb;
509             }
510            
511             =head2 round ($v)
512            
513             Round $v using the half-adjust method. Returns an integer.
514            
515             =cut
516            
517 17     17 1 2041 sub round ($v) {
  17         28  
  17         24  
518 17         56 return int($v + .5);
519             }
520            
521             1;