File Coverage

blib/lib/WebFetch.pm
Criterion Covered Total %
statement 436 827 52.7
branch 145 410 35.3
condition 22 117 18.8
subroutine 76 106 71.7
pod 26 41 63.4
total 705 1501 46.9


line stmt bran cond sub pod time code
1             # WebFetch
2             # ABSTRACT: Perl module to download/fetch and save information from the Web
3             # This module hierarchy is infrastructure for downloading ("fetching") information from
4             # various sources around the Internet or the local system in order to
5             # present them for display, or to export local information to other sites
6             # on the Internet
7             #
8             # Copyright (c) 1998-2022 Ian Kluft. This program is free software; you can
9             # redistribute it and/or modify it under the terms of the GNU General Public
10             # License Version 3. See https://www.gnu.org/licenses/gpl-3.0-standalone.html
11              
12             # pragmas to silence some warnings from Perl::Critic
13             ## no critic (Modules::RequireExplicitPackage)
14             # This solves a catch-22 where parts of Perl::Critic want both package and use-strict to be first
15 5     5   470122 use strict;
  5         44  
  5         163  
16 5     5   25 use warnings;
  5         12  
  5         135  
17 5     5   692 use utf8;
  5         23  
  5         48  
18             ## use critic (Modules::RequireExplicitPackage)
19              
20             package WebFetch;
21             $WebFetch::VERSION = '0.15.9';
22              
23 5     5   251 use Carp qw(croak);
  5         9  
  5         280  
24 5     5   3980 use Getopt::Long;
  5         58184  
  5         25  
25 5     5   2579 use Readonly;
  5         12167  
  5         300  
26 5     5   45 use Scalar::Util qw(reftype);
  5         15  
  5         273  
27 5     5   3901 use LWP::UserAgent;
  5         263870  
  5         212  
28 5     5   44 use HTTP::Request;
  5         9  
  5         129  
29 5     5   5104 use DateTime;
  5         2794045  
  5         271  
30 5     5   3592 use DateTime::Format::ISO8601;
  5         2221244  
  5         316  
31 5     5   57 use DateTime::Locale;
  5         11  
  5         136  
32 5     5   3499 use Date::Calc;
  5         33581  
  5         305  
33 5     5   3540 use Paranoid::IO qw(pclose);
  5         95058  
  5         414  
34 5     5   2533 use Paranoid::IO::Lockfile qw(pexclock pshlock punlock);
  5         6343  
  5         418  
35 5     5   2656 use WebFetch::Data::Config;
  5         17  
  5         1016  
36              
37             #
38             # constants
39             #
40              
41             # defualt supported output formats
42             # more may be added by plugin modules
43             Readonly::Array my @WebFetch_formatters => qw( output:html output:xml output:wf );
44              
45             # defualy modules for input and output
46             Readonly::Hash my %default_modules => (
47             "input" => {
48             "rss" => "WebFetch::RSS",
49             "sitenews" => "WebFetch::Input::SiteNews",
50             "perlstruct" => "WebFetch::Input::PerlStruct",
51             "atom" => "WebFetch::Input::Atom",
52             "dump" => "WebFetch::Input::Dump",
53             },
54             "output" => {
55             "rss" => "WebFetch::RSS",
56             "atom" => "WebFetch::Atom",
57             "tt" => "WebFetch::Output:TT",
58             "perlstruct" => "WebFetch::Output::PerlStruct",
59             "dump" => "WebFetch::Output::Dump",
60             }
61             );
62              
63             # parameters which are redirected into a sub-hash
64             Readonly::Hash my %redirect_params => (
65             locale => "datetime_settings",
66             time_zone => "datetime_settings",
67             notable => "style",
68             para => "style",
69             ul => "style",
70             );
71              
72             # file paths and class names
73             Readonly::Scalar my $db_class => "DB_File";
74             Readonly::Array my @yaml_class => (
75             ( exists $ENV{WEBFETCH_YAML_CLASS} )
76             ? ( split " ", $ENV{WEBFETCH_YAML_CLASS} )
77             : qw(YAML::XS YAML::Syck YAML::PP YAML)
78             );
79             Readonly::Hash my %index_file => (
80             db => "id_index.db",
81             lock => "id_index.lock",
82             yaml => "id_index.yaml",
83             );
84              
85             #
86             # exceptions/errors
87             #
88 5     5   72 use Try::Tiny;
  5         19  
  5         1233  
89             use Exception::Class (
90 5         194 'WebFetch::Exception',
91             'WebFetch::TracedException' => {
92             isa => 'WebFetch::Exception',
93             },
94              
95             'WebFetch::Exception::DataWrongType' => {
96             isa => 'WebFetch::TracedException',
97             alias => 'throw_data_wrongtype',
98             description => "provided data must be a WebFetch::Data::Store",
99             },
100              
101             'WebFetch::Exception::IncompatibleClass' => {
102             isa => 'WebFetch::Exception',
103             alias => 'throw_incompatible_class',
104             description => "class method called for class outside WebFetch hierarchy",
105             },
106              
107             'WebFetch::Exception::GetoptError' => {
108             isa => 'WebFetch::Exception',
109             alias => 'throw_getopt_error',
110             description => "software error during command line processing",
111             },
112              
113             'WebFetch::Exception::Usage' => {
114             isa => 'WebFetch::Exception',
115             alias => 'throw_cli_usage',
116             description => "command line processing failed",
117             },
118              
119             'WebFetch::Exception::ParameterError' => {
120             isa => 'WebFetch::Exception',
121             alias => 'throw_param_error',
122             description => "parameter error",
123             },
124              
125             'WebFetch::Exception::Save' => {
126             isa => 'WebFetch::Exception',
127             alias => 'throw_save_error',
128             description => "an error occurred while saving the data",
129             },
130              
131             'WebFetch::Exception::NoSave' => {
132             isa => 'WebFetch::Exception',
133             alias => 'throw_no_save',
134             description => "unable to save: no data or nowhere to save it",
135             },
136              
137             'WebFetch::Exception::NoHandler' => {
138             isa => 'WebFetch::Exception',
139             alias => 'throw_no_handler',
140             description => "no handler was found",
141             },
142              
143             'WebFetch::Exception::MustOverride' => {
144             isa => 'WebFetch::TracedException',
145             alias => 'throw_abstract',
146             description => "A WebFetch function was called which is " . "supposed to be overridden by a subclass",
147             },
148              
149             'WebFetch::Exception::NetworkGet' => {
150             isa => 'WebFetch::Exception',
151             alias => 'throw_network_get',
152             fields => [qw( client )],
153             description => "Failed to access feed source",
154             },
155              
156             'WebFetch::Exception::ModLoadFailure' => {
157             isa => 'WebFetch::Exception',
158             alias => 'throw_mod_load_failure',
159             description => "failed to load a WebFetch Perl module",
160             },
161              
162             'WebFetch::Exception::ModRunFailure' => {
163             isa => 'WebFetch::Exception',
164             alias => 'throw_mod_run_failure',
165             description => "failed to run a WebFetch module",
166             },
167              
168             'WebFetch::Exception::ModNoRunModule' => {
169             isa => 'WebFetch::Exception',
170             alias => 'throw_no_run',
171             description => "no module was found to run the request",
172             },
173              
174             'WebFetch::Exception::AutoloadFailure' => {
175             isa => 'WebFetch::TracedException',
176             alias => 'throw_autoload_fail',
177             description => "AUTOLOAD failed to handle function call",
178             },
179              
180 5     5   39 );
  5         16  
181              
182             # initialize class variables
183             my %modules;
184             our $AUTOLOAD;
185              
186             sub debug_mode
187             {
188 736     736 0 3025 my @args = @_;
189              
190             # check if any arguments were provided
191             # counting parameters allows us to handle undef if provided as a value (can't do that with "defined" test)
192 736 100       1498 if ( scalar @args == 0 ) {
193              
194             # if no new value provided, return debug configuration value
195 727 100       1240 return WebFetch->config('debug') ? 1 : 0;
196             }
197              
198             # set debug mode from provided value
199 9 100       26 my $debug_mode = $args[0] ? 1 : 0;
200 9         35 WebFetch->config( debug => $debug_mode );
201 9         65 return $debug_mode;
202             }
203              
204             # print parameters to STDERR if debug mode is enabled
205             sub debug
206             {
207 722     722 0 57312 my @args = @_;
208 722         1125 my $debug_mode = debug_mode();
209 722 100       1309 if ($debug_mode) {
210 1         60 print STDERR "debug: " . join( " ", @args ) . "\n";
211             }
212 722         1165 return $debug_mode;
213             }
214              
215             # module registry read-accessor
216             # for testing and internal use only (inhibit critic warning because it is not unused - tests use it)
217             ## no critic (Subroutines::ProhibitUnusedPrivateSubroutines)
218             sub _module_registry
219             {
220 2     2   2219 my ( $class, $key ) = @_;
221 2 50       10 if ( not $class->isa("WebFetch") ) {
222 0         0 throw_incompatible_class("invalid _module_registry() call for '$class': not in the WebFetch hierarchy");
223             }
224 2 50       8 if ( exists $modules{$key} ) {
225 2         5 return $modules{$key};
226             }
227 0         0 return;
228             }
229             ## critic (Subroutines::ProhibitUnusedPrivateSubroutines)
230              
231             # load a YAML parser class from allowed list
232             sub _load_yaml
233             {
234 5     5   723 my $yaml_loaded;
235 5         27 foreach my $classname (@yaml_class) {
236             try {
237             ## no critic (BuiltinFunctions::ProhibitStringyEval)
238 5 50   5   168 eval "require $classname" or croak $@;
239 5         6164 $classname->import(qw(LoadFile DumpFile));
240 5         26 $yaml_loaded = $classname;
241 5         71 };
242 5 50       90 last if $yaml_loaded;
243             }
244 5         17 return $yaml_loaded;
245             }
246              
247             # return WebFetch (or subclass) version number
248             sub version
249             {
250 9     9 1 434 my $class = shift;
251              
252 9 50       24 if ( not defined $class ) {
253 0         0 throw_incompatible_class("invalid version() call on undefined value");
254             }
255 9 50       29 if ( not $class->isa("WebFetch") ) {
256 0         0 throw_incompatible_class("invalid version() call for '$class': not in the WebFetch hierarchy");
257             }
258             {
259             ## no critic (TestingAndDebugging::ProhibitNoStrict)
260 5     5   25524 no strict 'refs';
  5         12  
  5         8846  
  9         16  
261 9 50       14 if ( defined ${ $class . "::VERSION" } ) {
  9         32  
262 9         12 return ${ $class . "::VERSION" };
  9         127  
263             }
264             }
265 0 0       0 if ( defined $WebFetch::VERSION ) {
266 0         0 return $WebFetch::VERSION;
267             }
268 0         0 return "00-dev";
269             }
270              
271             # wrapper for WebFetch::Data::Config read/write accessor
272             sub config
273             {
274 749     749 1 3517 my ( $class, $key, $value ) = @_;
275 749 50       1821 if ( not $class->isa("WebFetch") ) {
276 0         0 throw_incompatible_class("invalid config() call for '$class': not in the WebFetch hierarchy");
277             }
278 749         1602 return WebFetch::Data::Config->accessor( $key, $value );
279             }
280              
281             # wrapper for WebFetch::Data::Config existence-test method
282             sub has_config
283             {
284 14     14 1 5484 my ( $class, $key ) = @_;
285 14 50       60 if ( not $class->isa("WebFetch") ) {
286 0         0 throw_incompatible_class("invalid has_config() call for '$class': not in the WebFetch hierarchy");
287             }
288 14         54 return WebFetch::Data::Config->contains($key);
289             }
290              
291             # wrapper for WebFetch::Data::Config existence-test method
292             sub del_config
293             {
294 4     4 1 2167 my ( $class, $key ) = @_;
295 4 50       22 if ( not $class->isa("WebFetch") ) {
296 0         0 throw_incompatible_class("invalid del_config() call for '$class': not in the WebFetch hierarchy");
297             }
298 4         16 return WebFetch::Data::Config->del($key);
299             }
300              
301             sub import_config
302             {
303 3     3 1 12 my ( $class, $hashref ) = @_;
304 3 50       28 if ( not $class->isa("WebFetch") ) {
305 0         0 throw_incompatible_class("invalid import_config() call for '$class': not in the WebFetch hierarchy");
306             }
307              
308             # import config entries
309 3         12 foreach my $key (%$hashref) {
310 18         59 WebFetch::Data::Config->accessor( $key, $hashref->{$key} );
311             }
312 3         7 return;
313             }
314              
315             sub keys_config
316             {
317 2     2 1 1308 my ($class) = @_;
318 2 50       12 if ( not $class->isa("WebFetch") ) {
319 0         0 throw_incompatible_class("invalid import_config() call for '$class': not in the WebFetch hierarchy");
320             }
321 2         8 my $instance = WebFetch::Data::Config->instance();
322 2         33 return keys %$instance;
323             }
324              
325             sub module_register
326             {
327 11     11 1 69 my ( $module, @capabilities ) = @_;
328              
329             # each string provided is a capability the module provides
330 11         80 foreach my $capability (@capabilities) {
331              
332             # import configuration entries if any entry in @capabilities is a hashref
333 26 100       82 if ( ref $capability eq 'HASH' ) {
334 3         14 WebFetch->import_config($capability);
335 3         7 next;
336             }
337              
338             # A ":" if present delimits a group of capabilities
339             # such as "input:rss" for and "input" capability of "rss"
340 23 100       132 if ( $capability =~ /([^:]+):([^:]+)/x ) {
341              
342             # A ":" was found so process a 2nd-level group entry
343 20         49 my $group = $1;
344 20         41 my $subcap = $2;
345 20 100       72 if ( not exists $modules{$group} ) {
346 8         17 $modules{$group} = {};
347             }
348 20 100       66 if ( not exists $modules{$group}{$subcap} ) {
349 17         39 $modules{$group}{$subcap} = [];
350             }
351 20         28 push @{ $modules{$group}{$subcap} }, $module;
  20         80  
352             } else {
353              
354             # just a simple capbility name so store it
355 3 50       10 if ( not exists $modules{$capability} ) {
356 3         11 $modules{$capability} = [];
357             }
358 3         4 push @{ $modules{$capability} }, $module;
  3         10  
359             }
360             }
361 11         46 return;
362             }
363              
364             # module selection - choose WebFetch module based on selected file format
365             # for WebFetch internal use only
366             sub module_select
367             {
368 0     0 0 0 my $capability = shift;
369 0         0 my $is_optional = shift;
370              
371 0         0 debug "module_select($capability,$is_optional)";
372              
373             # parse the capability string
374 0         0 my ( $group, $topic );
375 0 0       0 if ( $capability =~ /([^:]*):(.*)/x ) {
376 0         0 $group = $1;
377 0         0 $topic = $2;
378             } else {
379 0         0 $topic = $capability;
380             }
381              
382             # check for modules to handle the specified source_format
383 0         0 my ( @handlers, %handlers );
384              
385             # consider whether a group is in use (single or double-level scan)
386 0 0       0 if ($group) {
387              
388             # double-level scan
389              
390             # if the group exists, search in it
391 0 0 0     0 if ( ( exists $modules{$group}{$topic} )
    0          
392             and ( ref $modules{$group}{$topic} eq "ARRAY" ) )
393             {
394             # search group for topic
395 0         0 foreach my $handler ( @{ $modules{$group}{$topic} } ) {
  0         0  
396 0 0       0 if ( not exists $handlers{$handler} ) {
397 0         0 push @handlers, $handler;
398 0         0 $handlers{$handler} = 1;
399             }
400             }
401              
402             # otherwise check the defaults
403             } elsif ( exists $default_modules{$group}{$topic} ) {
404              
405             # check default handlers
406 0         0 my $def_handler = $default_modules{$group}{$topic};
407 0 0       0 if ( not exists $handlers{$def_handler} ) {
408 0         0 push @handlers, $def_handler;
409 0         0 $handlers{$def_handler} = 1;
410             }
411             }
412             } else {
413              
414             # single-level scan
415              
416             # if the topic exists, the search is a success
417 0 0 0     0 if ( ( exists $modules{$topic} )
418             and ( ref $modules{$topic} eq "ARRAY" ) )
419             {
420 0         0 @handlers = @{ $modules{$topic} };
  0         0  
421             }
422             }
423              
424             # check if any handlers were found for this format
425 0 0 0     0 if ( not @handlers and not $is_optional ) {
426 0         0 throw_no_handler("handler not found for $capability");
427             }
428              
429 0         0 debug "module_select: " . join( " ", @handlers );
430 0         0 return @handlers;
431             }
432              
433             # satisfy POD coverage test - but don't put this function in the user manual
434              
435             # if no input or output format was specified, but only 1 is registered, pick it
436             # $group parameter should be config group to search, i.e. "input" or "output"
437             # returns the format string which will be provided
438             sub singular_handler
439             {
440 0     0 0 0 my $group = shift;
441              
442 0         0 debug "singular_handler($group)";
443 0         0 my $count = 0;
444 0         0 my $last_entry;
445 0         0 foreach my $entry ( keys %{ $modules{$group} } ) {
  0         0  
446 0 0       0 if ( ref $modules{$group}{$entry} eq "ARRAY" ) {
447 0         0 my $entry_count = scalar @{ $modules{$group}{$entry} };
  0         0  
448 0         0 $count += $entry_count;
449 0 0       0 if ( $count > 1 ) {
450 0         0 return;
451             }
452 0 0       0 if ( $entry_count == 1 ) {
453 0         0 $last_entry = $entry;
454             }
455             }
456             }
457              
458             # if there's only one registered, that's the one to use
459 0         0 debug "singular_handler: count=$count last_entry=$last_entry";
460 0 0       0 return $count == 1 ? $last_entry : undef;
461             }
462              
463             # Find and run all the fetch_main functions in packages under WebFetch.
464             # This eliminates the need for the sub-packages to export their own
465             # fetch_main(), which users found conflicted with each other when
466             # loading more than one WebFetch-derived module.
467              
468             # fetch_main - try/catch wrapper for fetch_main2 to catch and display errors
469             sub main::fetch_main
470             {
471              
472             # run fetch_main2 in a try/catch wrapper to handle exceptions
473             try {
474 0     0   0 &WebFetch::fetch_main2;
475             } catch {
476              
477             # process any error/exception that we may have gotten
478 0     0   0 my $ex = $_;
479              
480             # determine if there's an error message available to display
481 0         0 my $pkg = __PACKAGE__;
482 0 0       0 if ( ref $ex ) {
483 0 0       0 if ( my $ex_cap = Exception::Class->caught("WebFetch::Exception") ) {
484 0 0       0 if ( $ex_cap->isa("WebFetch::TracedException") ) {
485 0         0 warn $ex_cap->trace->as_string, "\n";
486             }
487              
488 0         0 croak "$pkg: " . $ex_cap->error . "\n";
489             }
490 0 0       0 if ( $ex->can("stringify") ) {
    0          
491              
492             # Error.pm, possibly others
493 0         0 croak "$pkg: " . $ex->stringify . "\n";
494             } elsif ( $ex->can("as_string") ) {
495              
496             # generic - should work for many classes
497 0         0 croak "$pkg: " . $ex->as_string . "\n";
498             } else {
499 0         0 croak "$pkg: unknown exception of type " . ( ref $ex ) . "\n";
500             }
501             } else {
502 0         0 croak "pkg: $_\n";
503             }
504 0     0   0 };
505              
506             # success
507 0         0 return 0;
508             }
509              
510             # Search for modules which have registered "cmdline" capability.
511             # Collect command-line options and usage info from modules.
512             sub collect_cmdline
513             {
514 0     0 0 0 my ( @mod_options, @mod_usage );
515 0 0 0     0 if ( ( exists $modules{cmdline} ) and ( ref $modules{cmdline} eq "ARRAY" ) ) {
516 0         0 foreach my $cli_mod ( @{ $modules{cmdline} } ) {
  0         0  
517              
518             # obtain ref to module symbol table for backward compatibility with old @Options/$Usage interface
519 0         0 my $symtab;
520             {
521             ## no critic (TestingAndDebugging::ProhibitNoStrict)
522 5     5   59 no strict 'refs';
  5         21  
  5         44745  
  0         0  
523 0         0 $symtab = \%{ $cli_mod . "::" };
  0         0  
524             }
525              
526             # get command line options - try WebFetch config first (preferred), otherwise module symtab (deprecated)
527 0 0 0     0 if ( WebFetch->has_config("Options") ) {
    0          
528 0         0 push @mod_options, WebFetch->config("Options");
529             } elsif ( ( exists $symtab->{Options} )
530 0         0 and int @{ $symtab->{Options} } )
531             {
532 0         0 push @mod_options, @{ $symtab->{Options} };
  0         0  
533             }
534              
535             # get command line usage - try WebFetch config first (preferred), otherwise module symtab (deprecated)
536 0 0 0     0 if ( WebFetch->has_config("Usage") ) {
    0          
537 0         0 push @mod_usage, WebFetch->config("Usage");
538             } elsif ( ( exists $symtab->{Usage} )
539 0         0 and defined ${ $symtab->{Usage} } )
540             {
541 0         0 push @mod_usage, ${ $symtab->{Usage} };
  0         0  
542             }
543             }
544             }
545 0         0 return ( \@mod_options, \@mod_usage );
546             }
547              
548             # mainline which fetch_main() calls in an exception catching wrapper
549             sub fetch_main2
550             {
551              
552             # search for modules which have registered "cmdline" capability
553             # collect their command line options
554 0     0 0 0 my ( @mod_options, @mod_usage );
555             {
556 0         0 my ( $mod_options_ref, $mod_usage_ref ) = collect_cmdline();
  0         0  
557 0         0 @mod_options = @$mod_options_ref;
558 0         0 @mod_usage = $mod_usage_ref;
559             }
560              
561             # process command line
562 0         0 my ( $options_result, %options );
563             try {
564 0     0   0 $options_result = GetOptions(
565             \%options, "dir:s", "group:s", "mode:s", "source=s", "source_format:s",
566             "dest=s", "dest_format:s", "fetch_urls", "quiet", "debug", @mod_options
567             )
568             } catch {
569 0     0   0 throw_getopt_error("command line processing failed: $_");
570 0         0 };
571 0 0       0 if ( not $options_result ) {
572 0         0 throw_cli_usage( "usage: $0 --dir dirpath "
573             . "[--group group] [--mode mode] "
574             . "[--source file] [--source_format fmt-string] "
575             . "[--dest file] [--dest_format fmt-string] "
576             . "[--fetch_urls] [--quiet] "
577             . join( " ", @mod_usage ) );
578             }
579              
580             # set debugging mode
581 0 0 0     0 if ( ( exists $options{debug} ) and $options{debug} ) {
582 0         0 WebFetch::debug_mode(1);
583             }
584 0         0 debug "fetch_main2";
585              
586             # if either source/input or dest/output formats were not provided,
587             # check if only one handler is registered - if so that's the default
588 0 0       0 if ( not exists $options{source_format} ) {
589 0 0       0 if ( my $fmt = singular_handler("input") ) {
590 0         0 $options{source_format} = $fmt;
591             }
592             }
593 0 0       0 if ( not exists $options{dest_format} ) {
594 0 0       0 if ( my $fmt = singular_handler("output") ) {
595 0         0 $options{dest_format} = $fmt;
596             }
597             }
598              
599             # check for modules to handle the specified source_format
600 0         0 my ( @handlers, %handlers );
601 0 0 0     0 if ( ( exists $modules{input}{ $options{source_format} } )
602             and ( ref $modules{input}{ $options{source_format} } eq "ARRAY" ) )
603             {
604 0         0 foreach my $handler ( @{ $modules{input}{ $options{source_format} } } ) {
  0         0  
605 0 0       0 if ( not exists $handlers{$handler} ) {
606 0         0 push @handlers, $handler;
607 0         0 $handlers{$handler} = 1;
608             }
609             }
610             }
611 0 0       0 if ( exists $default_modules{ $options{source_format} } ) {
612 0         0 my $handler = $default_modules{ $options{source_format} };
613 0 0       0 if ( not exists $handlers{$handler} ) {
614 0         0 push @handlers, $handler;
615 0         0 $handlers{$handler} = 1;
616             }
617             }
618              
619             # check if any handlers were found for this input format
620 0 0       0 if ( not @handlers ) {
621 0         0 throw_no_handler( "input handler not found for " . $options{source_format} );
622             }
623              
624             # run the available handlers until one succeeds or none are left
625 0         0 my $run_count = 0;
626 0         0 foreach my $pkgname (@handlers) {
627 0         0 debug "running for $pkgname";
628             try {
629 0     0   0 &WebFetch::run( $pkgname, \%options )
630             } catch {
631 0     0   0 print STDERR "WebFetch: run exception: $_\n";
632             } finally {
633 0 0   0   0 if ( not @_ ) {
634 0         0 $run_count++;
635 0         0 last;
636             }
637 0         0 };
638             }
639 0 0       0 if ( $run_count == 0 ) {
640 0         0 throw_no_run( "no handlers were able or available to process " . " source format" );
641             }
642 0         0 return 1;
643             }
644              
645             # allocate a new object
646             sub new
647             {
648 3     3 1 16 my ( $class, @args ) = @_;
649 3         10 my $self = {};
650 3         7 bless $self, $class;
651              
652             # initialize the object parameters
653 3         20 $self->init(@args);
654              
655             # register WebFetch-provided formatters
656 3         20 WebFetch->module_register(@WebFetch_formatters);
657              
658             # go fetch the data
659             # this function must be provided by a derived module
660             # non-fetching modules (i.e. data) must define $self->{no_fetch}=1
661 3 50 33     17 if ( ( not exists $self->{no_fetch} ) or not $self->{no_fetch} ) {
662 3         575 require WebFetch::Data::Store;
663 3 50       14 if ( exists $self->{data} ) {
664 0 0       0 $self->{data}->isa("WebFetch::Data::Store")
665             or throw_data_wrongtype "object data must be " . "a WebFetch::Data::Store";
666             } else {
667 3         19 $self->{data} = WebFetch::Data::Store->new();
668             }
669 3         13 $self->fetch();
670             }
671              
672             # the object has been created
673 3         20 return $self;
674             }
675              
676             # initialize attributes of new objects
677             sub init
678             {
679 7     7 1 24 my ( $self, @args ) = @_;
680 7 100       23 return if not @args;
681              
682             # convert parameter list to hash
683 4         19 my %params = @args;
684              
685             # set parameters into $self with the set_param() method
686 4         21 foreach my $key ( keys %params ) {
687 27         58 $self->set_param( $key, $params{$key} );
688             }
689 4         27 return;
690             }
691              
692             sub set_param
693             {
694 37     37 1 83 my ( $self, $key, $value ) = @_;
695              
696 37 100       117 if ( exists $redirect_params{$key} ) {
697              
698             # reorganize parameters known to belong in a sub-hash
699             # configure this in %redirect_params constant
700 6         50 my $hash_name = $redirect_params{$key};
701              
702             # make sure we can move the parameter to the sub-hash
703 6 100       62 if ( not $self->{$hash_name} ) {
704 4         10 $self->{$hash_name} = {};
705             } else {
706 2 50       30 if ( reftype( $self->{$hash_name} ) ne "HASH" ) {
707 0         0 throw_param_error( "unable to redirect '$key' parameter into '$hash_name' "
708             . "because it already exists and is not a hash" );
709             }
710             }
711              
712             # set the value in the destination sub-hash
713 6         17 $self->{$hash_name}{$key} = $value;
714             } else {
715              
716             # if not intercepted, set the value directly to the key name
717 31         247 $self->{$key} = $value;
718             }
719 37         88 return;
720             }
721              
722             sub mod_load
723             {
724 3     3 1 7 my $pkg = shift;
725              
726             # make sure we have the run package loaded
727             ## no critic (BuiltinFunctions::ProhibitStringyEval)
728             try {
729 3 50   3   349 eval "require $pkg" or croak $@;
730             } catch {
731 0     0   0 throw_mod_load_failure("failed to load $pkg: $_");
732 3         26 };
733 3         78 return;
734             }
735              
736             # command-line handling for WebFetch-derived classes
737             sub run
738             {
739 3     3 1 223 my $run_pkg = shift;
740 3         9 my $options_ref = shift;
741 3         6 my $obj;
742              
743 3         18 debug "entered run for $run_pkg";
744             my $test_probe_ref =
745             ( ( exists $options_ref->{test_probe} ) and ( ref $options_ref->{test_probe} eq "HASH" ) )
746             ? $options_ref->{test_probe}
747 3 100 66     34 : undef;
748              
749             # make sure we have the run package loaded
750 3         11 mod_load $run_pkg;
751              
752             # Note: in order to add WebFetch-embedding capability, the fetch
753             # routine saves its raw data without any HTML/XML/etc formatting
754             # in @{$obj->{data}} and data-to-savable conversion routines in
755             # %{$obj->{actions}}, which contains several structures with key
756             # names matching software processing features. The purpose of
757             # this is to externalize the captured data so other software can
758             # use it too.
759              
760             # create the new object
761             # this also calls the $obj->fetch() routine for the module which
762             # has inherited from WebFetch to do this
763 3         10 debug "run before new";
764             try {
765 3     3   148 $obj = $run_pkg->new(%$options_ref);
766             } catch {
767 0     0   0 throw_mod_run_failure( "module run failure in $run_pkg: " . $_ );
768 3         44 };
769 3 100       61 if ($test_probe_ref) {
770 2         6 $test_probe_ref->{webfetch} = $obj;
771             }
772              
773             # if the object had data for the WebFetch-embedding API,
774             # then data processing is external to the fetch routine
775             # (This externalizes the data for other software to capture it.)
776 3         16 debug "run before output";
777 3         7 my $dest_format = $obj->{dest_format};
778 3 100       17 if ( not exists $obj->{actions} ) {
779 1         3 $obj->{actions} = {};
780             }
781 3 50       10 if ( ( exists $obj->{data} ) ) {
782 3 50       10 if ( exists $obj->{dest} ) {
783 3 50       11 if ( not exists $obj->{actions}{$dest_format} ) {
784 3         10 $obj->{actions}{$dest_format} = [];
785             }
786 3         4 push @{ $obj->{actions}{$dest_format} }, [ $obj->{dest} ];
  3         12  
787             }
788              
789             # perform requested actions on the data
790 3         15 $obj->do_actions();
791             } else {
792 0         0 throw_no_save("save failed: no data or nowhere to save it");
793             }
794              
795 3         12 debug "run before save";
796 3         27 my $result = $obj->save();
797 3 50       9 my $result_code = $result ? 0 : 1;
798 3 100       10 if ($test_probe_ref) {
799 2         5 $test_probe_ref->{result} = $result_code;
800             }
801              
802             # check for errors, throw exception to report errors per savable item
803 3 50       10 if ( not $result ) {
804 0         0 my @errors;
805 0         0 foreach my $savable ( @{ $obj->{savable} } ) {
  0         0  
806 0 0       0 ( ref $savable eq "HASH" ) or next;
807 0 0       0 if ( exists $savable->{error} ) {
808 0         0 push @errors, "file: " . $savable->{file} . "error: " . $savable->{error};
809             }
810             }
811 0 0       0 if ($test_probe_ref) {
812 0         0 $test_probe_ref->{errors} = \@errors;
813             }
814 0 0       0 if (@errors) {
815 0         0 throw_save_error( "error saving results in " . $obj->{dir} . "\n" . join( "\n", @errors ) . "\n" );
816             }
817             }
818              
819 3         17 return $result_code;
820             }
821              
822             sub do_actions
823             {
824 3     3 1 9 my ($self) = @_;
825 3         20 debug "in WebFetch::do_actions";
826              
827             # we *really* need the data and actions to be set!
828             # otherwise assume we're in WebFetch 0.09 compatibility mode and
829             # $self->fetch() better have created its own savables already
830 3 50 33     24 if ( ( not exists $self->{data} ) or ( not exists $self->{actions} ) ) {
831 0         0 return;
832             }
833              
834             # loop through all the actions
835 3         6 foreach my $action_spec ( keys %{ $self->{actions} } ) {
  3         18  
836 7         13 my $handler_ref;
837              
838             # check for modules to handle the specified dest_format
839 7         17 my $action_handler = "fmt_handler_" . $action_spec;
840 7 50       23 if ( exists $modules{output}{$action_spec} ) {
841 7         12 foreach my $class ( @{ $modules{output}{$action_spec} }, ref $self ) {
  7         22  
842 7 50       72 if ( my $func_ref = $class->can($action_handler) ) {
843 7         13 $handler_ref = $func_ref;
844 7         11 last;
845             }
846             }
847             }
848              
849 7 50       17 if ( defined $handler_ref ) {
850              
851             # loop through action spec entries (parameter lists)
852 7         13 foreach my $entry ( @{ $self->{actions}{$action_spec} } ) {
  7         16  
853              
854             # parameters must be in an ARRAY ref
855 7 50       23 if ( ref $entry ne "ARRAY" ) {
856 0         0 warn "warning: entry in action spec " . "\""
857             . $action_spec . "\""
858             . "expected to be ARRAY, found "
859             . ( ref $entry )
860             . " instead "
861             . "- ignored\n";
862 0         0 next;
863             }
864              
865             # everything looks OK - call the handler
866 7         35 &$handler_ref( $self, @$entry );
867              
868             # if there were errors, the handler should
869             # have created a savable entry which
870             # contains only the error entry so that
871             # it will be reported by $self->save()
872             }
873             } else {
874 0         0 warn "warning: action \"$action_spec\" specified but "
875             . "$action_handler}() method not accessible in "
876             . ( ref $self )
877             . " or output classes - ignored\n";
878             }
879             }
880 3         6 return;
881             }
882              
883             # placeholder for fetch routines by derived classes
884             sub fetch
885             {
886 0     0 1 0 throw_abstract "fetch is an abstract function and must be overridden by a subclass";
887             }
888              
889             # utility function to get the contents of a URL
890             sub get
891             {
892 0     0 1 0 my ( $self, $source ) = @_;
893              
894 0 0       0 if ( not defined $source ) {
895 0         0 $source = $self->{source};
896             }
897 0         0 debug "get(" . $source . ")\n";
898              
899             # send request, capture response
900 0         0 my $ua = LWP::UserAgent->new;
901 0         0 $ua->agent( "WebFetch/" . WebFetch->version() . " " . $ua->agent );
902 0         0 my $request = HTTP::Request->new( GET => $source );
903 0         0 my $response = $ua->request($request);
904              
905             # abort on failure
906 0 0       0 if ( $response->is_error ) {
907 0         0 WebFetch::Exception::NetworkGet->throw( "The request received an error: " . $response->as_string );
908             }
909              
910             # return the content
911 0         0 my $content = $response->content;
912 0         0 return \$content;
913             }
914              
915             # utility function to generate WebFetch Export format
916             # which WebFetch users can read with the WebFetch::General module
917             # wf_export() is grandfathered out of Subroutines::ProhibitManyArgs restriction since it predates perlcritic/PBP
918             ## no critic ( Subroutines::ProhibitManyArgs )
919             sub wf_export
920             {
921 0     0 1 0 my ( $self, $filename, $fields, $lines, $comment, $param ) = @_;
922 0         0 my @export_out;
923 0         0 my $delim = ""; # blank line is delimeter
924              
925 0         0 debug "entered wf_export, output to $filename\n";
926              
927             # validate parameters
928 0 0 0     0 if ( not ref $fields or ref $fields ne "ARRAY" ) {
929 0         0 die "WebFetch: export error: fields parameter is not an " . "array reference\n";
930             }
931 0 0 0     0 if ( not ref $lines or ref $lines ne "ARRAY" ) {
932 0         0 die "WebFetch: export error: lines parameter is not an " . "array reference\n";
933             }
934 0 0 0     0 if ( ( defined $param ) and ref $param ne "HASH" ) {
935 0         0 die "WebFetch: export error: param parameter is not an " . "hash reference\n";
936             }
937              
938             # generate output header
939 0         0 push @export_out, "[WebFetch export]";
940 0         0 push @export_out, "Version: " . WebFetch->version();
941 0         0 push @export_out, "# This was generated by the Perl5 WebFetch " . WebFetch->version() . " module.";
942 0         0 push @export_out, "# WebFetch info can be found at " . "http://www.webfetch.org/";
943 0 0       0 if ( defined $comment ) {
944 0         0 push @export_out, "#";
945 0         0 foreach my $c_line ( split( "\n", $comment ) ) {
946 0         0 push @export_out, "# $c_line";
947             }
948             }
949              
950             # generate contents, each field has items in RFC822-like style
951 0         0 foreach my $line (@$lines) {
952 0         0 push @export_out, $delim;
953 0         0 my ( $field, $item );
954 0         0 for ( $field = 0 ; $field <= $#{@$fields} ; $field++ ) {
  0         0  
955 0         0 $item = $line->[$field];
956 0 0       0 ( defined $item ) or last;
957 0         0 $item =~ s/\n\n+/\n/sgox; # remove blank lines
958 0         0 $item =~ s/^\n+//ox; # remove leading newlines
959 0         0 $item =~ s/\n+$//ox; # remove trailing newlines
960 0         0 $item =~ s/\n/\\\n /sgox; # escape newlines with "\"
961 0         0 push @export_out, $fields->[$field] . ": $item";
962             }
963             }
964              
965             # store contents
966 0         0 $self->raw_savable( $filename, join( "\n", @export_out ) . "\n" );
967 0         0 return;
968             }
969             ## critic ( Subroutines::ProhibitManyArgs )
970              
971             # accessors & utilities for use by html_gen
972 8 50   8   13 sub _style_para { my $self = shift; return ( exists $self->{style}{para} ) ? $self->{style}{para} : 0; }
  8         24  
973 4 50   4   5 sub _style_notable { my $self = shift; return ( exists $self->{style}{notable} ) ? $self->{style}{notable} : 0; }
  4         16  
974 14 50   14   21 sub _style_ul { my $self = shift; return ( exists $self->{style}{ul} ) ? $self->{style}{ul} : 0; }
  14         49  
975              
976             sub _style_bullet
977             {
978 10     10   14 my $self = shift;
979             return 1
980             if not exists $self->{style}{para}
981             and not exists $self->{style}{ul}
982 10 0 33     30 and not exists $self->{style}{bullet};
      0        
983 10 50       37 return ( exists $self->{style}{bullet} ) ? $self->{style}{bullet} : 0;
984             }
985              
986             sub _html_gen_tag
987             {
988 16     16   37 my ( $self, $tag, %params ) = @_;
989 16         22 my $close_tag = 0;
990 16 100       30 if ( exists $params{_close} ) {
991 8 50       22 $close_tag = $params{_close} ? 1 : 0;
992 8         15 delete $params{_close};
993             }
994 16 50       28 my $css_class = exists $self->{css_class} ? $self->{css_class} : "webfetch";
995             return
996             "<$tag class=\"$css_class-$tag\" "
997 16 100       82 . join( " ", grep { "$_=\"" . $params{$_} . "\"" } keys %params )
  2         10  
998             . ( $close_tag ? "/" : "" ) . ">";
999             }
1000 8     8   14 sub _html_gen_untag { my ( $self, $tag ) = @_; return "</$tag>"; }
  8         19  
1001              
1002             # utility function to generate HTML output
1003             sub html_gen
1004             {
1005 2     2 1 7 my ( $self, $filename, $format_func, $links ) = @_;
1006              
1007             # generate summary HTML links
1008 2         4 my $link_count = 0;
1009 2         4 my @result;
1010              
1011 2 50       9 if ( not $self->_style_notable() ) {
1012 2         9 push @result, $self->_html_gen_tag("center");
1013 2         6 push @result, $self->_html_gen_tag("table");
1014 2         10 push @result, $self->_html_gen_tag("tr");
1015 2         11 push @result, $self->_html_gen_tag( "td", valign => 'top' );
1016             }
1017 2 50       15 if ( $self->_style_ul() ) {
1018 0         0 push @result, $self->_html_gen_tag("ul");
1019             }
1020 2         13 $self->font_start( \@result );
1021 2 50       10 if ( @$links >= 0 ) {
1022 2         5 foreach my $entry (@$links) {
1023 10 50       22 push @result,
    50          
1024             (
1025             $self->_style_ul()
1026             ? $self->_html_gen_tag("li")
1027             : ( $self->_style_bullet() ? "&#149;&nbsp;" : "" )
1028             ) . &$format_func(@$entry);
1029 10 100       31 if ( ++$link_count >= $self->{num_links} ) {
1030 2         18 last;
1031             }
1032 8 50 33     28 if ( ( exists $self->{table_sections} )
      33        
      0        
1033             and not $self->_style_para()
1034             and not $self->_style_notable()
1035             and $link_count == int( ( $self->{num_links} + 1 ) / $self->{table_sections} ) )
1036             {
1037 0         0 $self->font_end( \@result );
1038 0         0 push @result, $self->_html_gen_untag("td");
1039 0         0 push @result, $self->_html_gen_tag( "td", width => '45%', valign => 'top' );
1040 0         0 $self->font_start( \@result );
1041             } else {
1042 8 50       22 if ( $self->_style_para() ) {
    0          
1043 8         18 push @result, $self->_html_gen_tag( "p", _close => 1 );
1044             } elsif ( $self->_style_bullet() ) {
1045 0         0 push @result, $self->_html_gen_tag( "br", _close => 1 );
1046             }
1047             }
1048             }
1049             } else {
1050 0         0 push @result,
1051             "<i>(There are technical difficulties with "
1052             . "this information source. "
1053             . "Please check again later.)</i>";
1054             }
1055 2         14 $self->font_end( \@result );
1056 2 50       6 if ( $self->_style_ul() ) {
1057 0         0 push @result, $self->_html_gen_untag("ul");
1058             }
1059 2 50       8 if ( not $self->_style_notable() ) {
1060 2         8 push @result, $self->_html_gen_untag("td");
1061 2         6 push @result, $self->_html_gen_untag("tr");
1062 2         6 push @result, $self->_html_gen_untag("table");
1063 2         15 push @result, $self->_html_gen_untag("center");
1064             }
1065              
1066 2         23 $self->html_savable( $filename, join( "\n", @result ) . "\n" );
1067 2         6 return;
1068             }
1069              
1070             # internal-use function font_start, used by html_gen
1071             sub font_start
1072             {
1073 2     2 0 7 my ( $self, $result ) = @_;
1074              
1075 2 50 33     13 if ( ( defined $self->{font_size} ) or ( defined $self->{font_face} ) ) {
1076             push @$result,
1077             "<font"
1078             . ( ( defined $self->{font_size} ) ? " size=" . $self->{font_size} : "" )
1079 0 0       0 . ( ( defined $self->{font_face} ) ? " face=\"" . $self->{font_face} . "\"" : "" ) . ">";
    0          
1080             }
1081 2         5 return;
1082             }
1083              
1084             # internal-use function font_end, used by html_gen
1085             sub font_end
1086             {
1087 2     2 0 7 my ( $self, $result ) = @_;
1088              
1089 2 50 33     13 if ( ( defined $self->{font_size} ) or ( defined $self->{font_face} ) ) {
1090 0         0 push @$result, "</font>";
1091             }
1092 2         4 return;
1093             }
1094              
1095             # utility function to make a savable record for HTML text
1096             sub html_savable
1097             {
1098 4     4 1 13 my ( $self, $filename, $content ) = @_;
1099              
1100 4         18 $self->raw_savable( $filename,
1101             "<!--- begin text generated by "
1102             . "Perl5 WebFetch "
1103             . WebFetch->version()
1104             . " - do not manually edit --->\n"
1105             . "<!--- WebFetch can be found at "
1106             . "http://www.webfetch.org/ --->\n"
1107             . $content
1108             . "<!--- end text generated by "
1109             . "Perl5 WebFetch "
1110             . WebFetch->version()
1111             . " - do not manually edit --->\n" );
1112 4         9 return;
1113             }
1114              
1115             # utility function to make a savable record for raw text
1116             sub raw_savable
1117             {
1118 5     5 1 243 my ( $self, $filename, $content ) = @_;
1119              
1120 5 100       19 if ( not exists $self->{savable} ) {
1121 3         12 $self->{savable} = [];
1122             }
1123             push(
1124 5         38 @{ $self->{savable} },
1125             {
1126             'file' => $filename,
1127             'content' => $content,
1128             ( ( exists $self->{group} ) ? ( 'group' => $self->{group} ) : () ),
1129 5 50       11 ( ( exists $self->{mode} ) ? ( 'mode' => $self->{mode} ) : () )
    50          
1130             }
1131             );
1132 5         12 return;
1133             }
1134              
1135             sub direct_fetch_savable
1136             {
1137 0     0 1 0 my ( $self, $url ) = @_;
1138              
1139 0 0       0 if ( not exists $self->{savable} ) {
1140 0         0 $self->{savable} = [];
1141             }
1142 0         0 my $filename = $url;
1143 0         0 $filename =~ s=[;?].*==x;
1144 0         0 $filename =~ s=^.*/==x;
1145             push(
1146 0         0 @{ $self->{savable} },
1147             {
1148             'url' => $url,
1149             'file' => $filename,
1150             'index' => 1,
1151             ( ( exists $self->{group} ) ? ( 'group' => $self->{group} ) : () ),
1152 0 0       0 ( ( exists $self->{mode} ) ? ( 'mode' => $self->{mode} ) : () )
    0          
1153             }
1154             );
1155 0         0 return;
1156             }
1157              
1158             sub no_savables_ok
1159             {
1160 2     2 1 5 my $self = shift;
1161              
1162             push(
1163 2         3 @{ $self->{savable} },
  2         9  
1164             {
1165             'ok_empty' => 1,
1166             }
1167             );
1168 2         4 return;
1169             }
1170              
1171             # check conditions are met to perform a save()
1172             # internal method used by save()
1173             sub _save_precheck
1174             {
1175 3     3   13 my $self = shift;
1176              
1177             # check if we have attributes needed to proceed
1178 3 50       12 if ( not exists $self->{"dir"} ) {
1179 0         0 croak "WebFetch: directory path missing - required for save\n";
1180             }
1181 3 50       13 if ( not exists $self->{savable} ) {
1182 0         0 croak "WebFetch: nothing to save\n";
1183             }
1184 3 50       13 if ( ref( $self->{savable} ) ne "ARRAY" ) {
1185 0         0 croak "WebFetch: cannot save - savable is not an array\n";
1186             }
1187 3         6 return;
1188             }
1189              
1190             # convert link fields to savables
1191             # internal method used by save()
1192             sub _save_fetch_urls
1193             {
1194 3     3   6 my $self = shift;
1195              
1196             # if fetch_urls is defined, turn link fields in the data to savables
1197 3 0 33     8 if ( ( exists $self->{fetch_urls} ) and $self->{fetch_urls} ) {
1198 0         0 my $entry;
1199 0         0 $self->data->reset_pos;
1200 0         0 while ( $entry = $self->data->next_record() ) {
1201 0         0 my $url = $entry->url;
1202 0 0       0 if ( defined $url ) {
1203 0         0 $self->direct_fetch_savable( $entry->url );
1204             }
1205             }
1206             }
1207 3         5 return;
1208             }
1209              
1210             # write new content for save operation
1211             # internal method used by save()
1212             sub _save_write_content
1213             {
1214 4     4   10 my ( $self, $savable, $new_content ) = @_;
1215              
1216             # write content to the "new content" file
1217             ## no critic (InputOutput::RequireBriefOpen)
1218 4         7 my $new_file;
1219 4 50   1   291 if ( not open( $new_file, ">:encoding(UTF-8)", "$new_content" ) ) {
  1         6  
  1         3  
  1         15  
1220 0         0 $savable->{error} = "cannot open $new_content: $!";
1221 0         0 return 0;
1222             }
1223 4 50       13678 if ( not print $new_file $savable->{content} ) {
1224 0         0 $savable->{error} = "failed to write to " . $new_content . ": $!";
1225 0         0 close $new_file;
1226 0         0 return 0;
1227             }
1228 4 50       150 if ( not close $new_file ) {
1229              
1230             # this can happen with NFS errors
1231 0         0 $savable->{error} = "failed to close " . $new_content . ": $!";
1232 0         0 return 0;
1233             }
1234 4         29 return 1;
1235             }
1236              
1237             # save previous main content as old backup
1238             # internal method used by save()
1239             sub _save_main_to_backup
1240             {
1241 4     4   15 my ( $self, $savable, $main_content, $old_content ) = @_;
1242              
1243             # move the main content to the old content - now it's a backup
1244 4 50       74 if ( -f $main_content ) {
1245 0 0       0 if ( not rename $main_content, $old_content ) {
1246 0         0 $savable->{error} = "cannot rename " . $main_content . " to " . $old_content . ": $!";
1247 0         0 return 0;
1248             }
1249             }
1250 4         25 return 1;
1251             }
1252              
1253             # chgrp and chmod the "new content" before final installation
1254             # internal method used by save()
1255             sub _save_file_mode
1256             {
1257 4     4   16 my ( $self, $savable, $new_content ) = @_;
1258              
1259             # chgrp the "new content" before final installation
1260 4 50       15 if ( exists $savable->{group} ) {
1261 0         0 my $gid = $savable->{group};
1262 0 0       0 if ( $gid !~ /^[0-9]+$/ox ) {
1263 0         0 $gid = ( getgrnam($gid) )[2];
1264 0 0       0 if ( not defined $gid ) {
1265 0         0 $savable->{error} = "cannot chgrp " . $new_content . ": " . $savable->{group} . " does not exist";
1266 0         0 return 0;
1267             }
1268             }
1269 0 0       0 if ( not chown $>, $gid, $new_content ) {
1270 0         0 $savable->{error} = "cannot chgrp " . $new_content . " to " . $savable->{group} . ": $!";
1271 0         0 return 0;
1272             }
1273             }
1274              
1275             # chmod the "new content" before final installation
1276 4 50       11 if ( exists $savable->{mode} ) {
1277 0 0       0 if ( not chmod oct( $savable->{mode} ), $new_content ) {
1278 0         0 $savable->{error} = "cannot chmod " . $new_content . " to " . $savable->{mode} . ": $!";
1279 0         0 return 0;
1280             }
1281             }
1282 4         11 return 1;
1283             }
1284              
1285             # index lookup via legacy DB file
1286             # returns 1 if item was found in index, 0 if it had to be added to index
1287             # internal method used by _save_check_index()
1288             sub _save_check_index_db
1289             {
1290 0     0   0 my ( $self, $savable ) = @_;
1291 0         0 my $was_in_index = 0;
1292 0         0 my $index_db_path = $self->{dir} . "/" . $index_file{db};
1293              
1294             # check if DB_File module is available
1295 0         0 my $db_available = 0;
1296             try {
1297             ## no critic (BuiltinFunctions::ProhibitStringyEval)
1298 0 0   0   0 eval "require $db_class" or croak $@;
1299 0         0 $db_available = 1;
1300 0         0 };
1301              
1302             # look up content in DB index
1303 0 0 0     0 if ( $db_available and ( exists $savable->{url} ) and ( exists $savable->{index} ) ) {
      0        
1304 0         0 tie my %id_index, 'DB_File', $index_db_path, &DB_File::O_CREAT | &DB_File::O_RDWR;
1305 0         0 my ( $timestamp, $filename );
1306 0 0       0 if ( exists $id_index{ $savable->{url} } ) {
1307             ( $timestamp, $filename ) =
1308 0         0 split /#/x, $id_index{ $savable->{url} };
1309 0         0 $was_in_index = 1;
1310             } else {
1311 0         0 $timestamp = time;
1312             $id_index{ $savable->{url} } =
1313 0         0 $timestamp . "#" . $savable->{file};
1314             }
1315 0         0 untie %id_index;
1316             }
1317 0         0 return $was_in_index;
1318             }
1319              
1320             # index lookup via YAML file
1321             # returns 1 if item was found in index, 0 if it had to be added to index
1322             # internal method used by _save_check_index()
1323             sub _save_check_index_yaml
1324             {
1325 4     4   14 my ( $self, $savable ) = @_;
1326 4         8 my $was_in_index = 0;
1327 4         14 my $index_lock_path = $self->{dir} . "/" . $index_file{lock};
1328 4         66 my $index_yaml_path = $self->{dir} . "/" . $index_file{yaml};
1329              
1330             # check if YAML module is available
1331 4         58 my $yaml_loaded = _load_yaml();
1332              
1333             # look up content in YAML index
1334 4 0 33     26 if ( $yaml_loaded and ( exists $savable->{url} ) and ( exists $savable->{index} ) ) {
      33        
1335 0         0 my $id_index_ref = {};
1336 0         0 my ( $timestamp, $filename );
1337              
1338             # lock and read index YAML if it exists
1339 0 0 0     0 if ( -f $index_lock_path and pshlock($index_lock_path) ) {
1340 0         0 ($id_index_ref) = LoadFile($index_yaml_path);
1341 0         0 punlock($index_lock_path);
1342 0         0 pclose($index_lock_path);
1343             }
1344 0 0       0 if ( exists $id_index_ref->{ $savable->{url} } ) {
1345             ( $timestamp, $filename ) =
1346 0         0 split /#/x, $id_index_ref->{ $savable->{url} };
1347 0         0 $was_in_index = 1;
1348             } else {
1349 0         0 $timestamp = time;
1350             $id_index_ref->{ $savable->{url} } =
1351 0         0 $timestamp . "#" . $savable->{file};
1352             }
1353              
1354             # save index if modified
1355 0 0 0     0 if ( not $was_in_index and pexclock($index_lock_path) ) {
1356 0         0 DumpFile( $index_yaml_path, $id_index_ref );
1357 0         0 punlock($index_lock_path);
1358 0         0 pclose($index_lock_path);
1359             }
1360             }
1361 4         17 return $was_in_index;
1362             }
1363              
1364             # check if content is already in index file
1365             # internal method used by save()
1366             sub _save_check_index
1367             {
1368 4     4   17 my ( $self, $savable ) = @_;
1369              
1370             # if a URL was provided and index flag is set, use index file
1371 4         9 my $was_in_index = 0;
1372 4         15 my $index_db_path = $self->{dir} . "/" . $index_file{db};
1373 4         91 my $index_yaml_path = $self->{dir} . "/" . $index_file{yaml};
1374              
1375             # use backward-compatible DB_File index if DB index file exists and YAML index does not
1376 4 50 33     112 if ( -f $index_db_path and not -f $index_yaml_path ) {
1377 0         0 $was_in_index = $self->_save_check_index_db($savable);
1378             }
1379              
1380             # handle YAML file
1381 4   33     30 $was_in_index = ( $self->_save_check_index_yaml($savable) or $was_in_index );
1382              
1383             # For now, we consider it done if the file was in the index.
1384             # Future options would be to check if URL was modified.
1385 4 50       23 return $was_in_index ? 0 : 1;
1386             }
1387              
1388             # if a URL was provided and no content, get content from URL
1389             # internal method used by save()
1390             sub _save_fill_empty_from_url
1391             {
1392 4     4   19 my ( $self, $savable ) = @_;
1393              
1394             # if a URL was provided and no content, get content from URL
1395 4 0 33     12 if ( ( not exists $savable->{content} ) and ( exists $savable->{url} ) ) {
1396             try {
1397 0     0   0 $savable->{content} = ${ $self->get( $savable->{url} ) };
  0         0  
1398             } catch {
1399 0     0   0 return 0;
1400 0         0 };
1401             }
1402 4         13 return 1;
1403             }
1404              
1405             # print errors from save operation
1406             # internal method used by save()
1407             sub _save_report_errors
1408             {
1409 3     3   9 my ($self) = @_;
1410              
1411             # loop through savable to report any errors
1412 3         7 my $err_count = 0;
1413 3         13 foreach my $savable ( @{ $self->{savable} } ) {
  3         15  
1414 7 50       18 if ( exists $savable->{error} ) {
1415 0         0 print STDERR "WebFetch: failed to save " . $savable->{file} . ": " . $savable->{error} . "\n";
1416 0         0 $err_count++;
1417             }
1418             }
1419 3 50       12 if ($err_count) {
1420 0         0 croak "WebFetch: $err_count errors - fetch/save failed\n";
1421             }
1422 3         7 return;
1423             }
1424              
1425             # file-save routines for all WebFetch-derived classes
1426             sub save
1427             {
1428 3     3 1 6 my $self = shift;
1429              
1430 3         8 debug "entering save()\n";
1431              
1432             # check if we have attributes needed to proceed
1433 3         17 $self->_save_precheck();
1434              
1435             # if fetch_urls is defined, turn link fields in the data to savables
1436 3         15 $self->_save_fetch_urls();
1437              
1438             # loop through "savable" (grouped content and filename destination)
1439 3         4 foreach my $savable ( @{ $self->{savable} } ) {
  3         10  
1440              
1441 6 100       22 if ( exists $savable->{file} ) {
1442 4         17 debug "saving " . $savable->{file} . "\n";
1443             }
1444              
1445             # an output module may have handled a more intricate operation
1446 6 100       23 last if ( exists $savable->{ok_empty} );
1447              
1448             # verify contents of savable record
1449 4 50       13 if ( not exists $savable->{file} ) {
1450 0         0 $savable->{error} = "missing file name - skipped";
1451 0         0 next;
1452             }
1453 4 0 33     29 if ( ( not exists $savable->{content} )
1454             and ( not exists $savable->{url} ) )
1455             {
1456 0         0 $savable->{error} = "missing content or URL - skipped";
1457 0         0 next;
1458             }
1459              
1460             # generate file names
1461 4         93 my $new_content = $self->{"dir"} . "/N" . $savable->{file};
1462 4         62 my $main_content = $self->{"dir"} . "/" . $savable->{file};
1463 4         35 my $old_content = $self->{"dir"} . "/O" . $savable->{file};
1464              
1465             # make sure the Nxx "new content" file does not exist yet
1466 4 50       164 if ( -f $new_content ) {
1467 0 0       0 if ( not unlink $new_content ) {
1468 0         0 $savable->{error} = "cannot unlink " . $new_content . ": $!";
1469 0         0 next;
1470             }
1471             }
1472              
1473             # if a URL was provided and index flag is set, use index file
1474 4 50       30 if ( not $self->_save_check_index($savable) ) {
1475              
1476             # done since it was found in the index
1477 0         0 next;
1478             }
1479              
1480             # if a URL was provided and no content, get content from URL
1481 4 50       23 if ( not $self->_save_fill_empty_from_url($savable) ) {
1482              
1483             # error occurred - available in $savable->{error}
1484 0         0 next;
1485             }
1486              
1487             # write content to the "new content" file
1488 4 50       15 if ( not $self->_save_write_content( $savable, $new_content ) ) {
1489              
1490             # error occurred - available in $savable->{error}
1491 0         0 next;
1492             }
1493              
1494             # remove the "old content" file to get it out of the way
1495 4 50       108 if ( -f $old_content ) {
1496 0 0       0 if ( not unlink $old_content ) {
1497 0         0 $savable->{error} = "cannot unlink " . $old_content . ": $!";
1498 0         0 next;
1499             }
1500             }
1501              
1502             # move the main content to the old content - now it's a backup
1503 4 50       32 if ( not $self->_save_main_to_backup( $savable, $main_content ), $old_content ) {
1504              
1505             # error occurred - available in $savable->{error}
1506 0         0 next;
1507             }
1508              
1509             # chgrp and chmod the "new content" before final installation
1510 4 50       25 if ( not $self->_save_file_mode( $savable, $new_content ) ) {
1511              
1512             # error occurred - available in $savable->{error}
1513 0         0 next;
1514             }
1515              
1516             # move the new content to the main content - final install
1517 4 50       54 if ( -f $new_content ) {
1518 4 50       155 if ( not rename $new_content, $main_content ) {
1519 0         0 $savable->{error} = "cannot rename " . $new_content . " to " . $main_content . ": $!";
1520 0         0 next;
1521             }
1522             }
1523             }
1524              
1525             # loop through savable to report any errors
1526 3         28 $self->_save_report_errors();
1527              
1528             # success if we got here
1529 3         7 return 1;
1530             }
1531              
1532             sub parse_date
1533             {
1534 79     79 1 68104 my @args = @_;
1535 79         171 my %opts;
1536 79 100       199 if ( ref $args[0] eq "HASH" ) {
1537 33         45 %opts = %{ shift @args };
  33         87  
1538             }
1539 79         136 my $stamp = shift @args;
1540 79         112 my $result;
1541              
1542             # use regular expressions to check simple date formats YYYY-MM-DD and YYYYMMDD
1543              
1544             # check YYYY-MM-DD date format
1545             # save it as a date-only array which can be fed to DateTime->new(), so gen_timestamp() will only use the date
1546 79 100       449 if ( $stamp =~ /^ (\d{4}) - (\d{2}) - (\d{2}) \s* $/x ) {
    50          
1547 6         47 $result = [ year => int($1), month => int($2), day => int($3), %opts ];
1548              
1549             # check YYYYMMDD format for backward compatibility: no longer ISO 8601 compliant since 2004 update
1550             # save it as a date-only array which can be fed to DateTime->new(), so gen_timestamp() will only use the date
1551             } elsif ( $stamp =~ /^ (\d{4}) (\d{2}) (\d{2}) \s* $/x ) {
1552 0         0 $result = [ year => int($1), month => int($2), day => int($3), %opts ];
1553             }
1554              
1555             # check ISO 8601
1556             # catch any exceptions thrown by the DateTime::Format::ISO8601 constructor and leave $result undefined
1557             # save it as a DateTime object
1558 79 100       191 if ( not defined $result ) {
1559             try {
1560 73     73   2035 $result = DateTime::Format::ISO8601->parse_datetime( $stamp, $opts{locale} );
1561 23 50 66     11565 if ( exists $opts{time_zone}
1562             and $result->time_zone() eq "floating" )
1563             {
1564 0         0 $result->set_time_zone( $opts{time_zone} );
1565             }
1566 73         401 };
1567             }
1568              
1569             # check Unix date format and other misc processing from Date::Calc's Parse_Date()
1570             # save it as a date-only array which can be fed to DateTime->new(), so gen_timestamp() will only use the date
1571 79 100       62992 if ( not defined $result ) {
1572 50         101 my @date;
1573             try {
1574 50     50   1294 @date = Date::Calc::Parse_Date( $stamp, $opts{locale} );
1575 50         260 };
1576 50 50       1394 if (@date) {
1577 0         0 $result =
1578             [ year => $date[0], month => $date[1], day => $date[2], %opts ];
1579             }
1580             }
1581              
1582             # return parsed result, or undef if all parsing methods failed
1583 79         242 return $result;
1584             }
1585              
1586             sub gen_timestamp
1587             {
1588 8     8 1 19 my @args = @_;
1589 8         10 my %opts;
1590 8 50       20 if ( ref $args[0] eq "HASH" ) {
1591 8         15 %opts = %{ shift @args };
  8         29  
1592             }
1593              
1594 8         15 my $datetime;
1595 8         11 my $date_only = 0; # boolean flag: true = use date only, false = full timestamp
1596 8 50       29 if ( ref $args[0] ) {
1597 8 50       70 if ( ref $args[0] eq "DateTime" ) {
    0          
1598 8         14 $datetime = $args[0];
1599 8 50       20 if ( exists $opts{locale} ) {
1600             try {
1601 8     8   222 $datetime->set_locale( $opts{locale} );
1602 8         42 };
1603             }
1604 8 50       825 if ( exists $opts{time_zone} ) {
1605             try {
1606 8     8   234 $datetime->set_time_zone( $opts{time_zone} );
1607 8         39 };
1608             }
1609             } elsif ( ref $args[0] eq "ARRAY" ) {
1610 0         0 my %dt_opts = @{ $args[0] };
  0         0  
1611 0         0 foreach my $key ( keys %opts ) {
1612              
1613             # if provided, use %opts as DateTime defaults for locale, time_zone and any other keys found
1614 0 0       0 if ( not exists $dt_opts{$key} ) {
1615 0         0 $dt_opts{$key} = $opts{$key};
1616             }
1617             }
1618 0         0 $datetime = DateTime->new(%dt_opts);
1619 0         0 $date_only = 1;
1620             }
1621             }
1622              
1623             # generate locale-specific timestamp string
1624 8         1243 my $dt_locale = $datetime->locale();
1625 8 50       53 if ($date_only) {
1626 0         0 return $datetime->format_cldr( $dt_locale->date_format_full );
1627             }
1628 8         35 return $datetime->format_cldr( $dt_locale->datetime_format_full );
1629             }
1630              
1631             sub anchor_timestr
1632             {
1633 21     21 1 1141 my @args = @_;
1634 21         35 my %opts;
1635 21 50       62 if ( ref $args[0] eq "HASH" ) {
1636 21         35 %opts = %{ shift @args };
  21         62  
1637             }
1638              
1639 21         36 my $datetime;
1640 21         31 my $date_only = 0; # boolean flag: true = use date only, false = full timestamp
1641 21 50       51 if ( ref $args[0] ) {
1642 21 100       57 if ( ref $args[0] eq "DateTime" ) {
    50          
1643 17         30 $datetime = $args[0];
1644 17 100       39 if ( exists $opts{time_zone} ) {
1645             try {
1646 8     8   204 $datetime->set_time_zone( $opts{time_zone} );
1647 8         41 };
1648             }
1649             } elsif ( ref $args[0] eq "ARRAY" ) {
1650 4         16 my %dt_opts = @{ $args[0] };
  4         16  
1651 4         12 foreach my $key ( keys %opts ) {
1652              
1653             # if provided, use %opts as DateTime defaults for locale, time_zone and any other keys found
1654 0 0       0 if ( not exists $dt_opts{$key} ) {
1655 0         0 $dt_opts{$key} = $opts{$key};
1656             }
1657             }
1658 4         25 $datetime = DateTime->new(%dt_opts);
1659 4         1454 $date_only = 1;
1660             }
1661             }
1662              
1663             # generate anchor timestamp string
1664 21 50       1190 return "undated" if not defined $datetime;
1665 21 100       49 if ($date_only) {
1666 4         13 return $datetime->ymd('-');
1667             }
1668 17         55 return $datetime->ymd('-') . "-" . $datetime->hms('-');
1669             }
1670              
1671             #
1672             # shortcuts to data object functions
1673             #
1674              
1675 37     37 1 66 sub data { my $self = shift; return $self->{data}; }
  37         134  
1676 0     0 0 0 sub wk2fname { my ( $self, @args ) = @_; return $self->{data}->wk2fname(@args) }
  0         0  
1677              
1678             sub fname2fnum
1679             {
1680 258     258 0 483 my ( $self, @args ) = @_;
1681 258         525 return $self->{data}->fname2fnum(@args);
1682             }
1683 0     0 0 0 sub wk2fnum { my ( $self, @args ) = @_; return $self->{data}->wk2fnum(@args) }
  0         0  
1684              
1685             #
1686             # format handler functions
1687             # these do not have their own POD docs, but are defined in the
1688             # $obj->do_actions() docs above
1689             #
1690              
1691             # HTML format handler
1692             sub fmt_handler_html
1693             {
1694 2     2 0 5 my ( $self, $filename, $params ) = @_;
1695 2         6 my $records = $self->{data}{records};
1696              
1697             # if we need to filter or sort, make a copy of the data records
1698 2 50 33     9 if ( ( defined $params->{filter_func} )
1699             or ( defined $params->{sort_func} ) )
1700             {
1701             # filter/select items in the table if filter function exists
1702 2         3 my $i;
1703 2 50 33     13 if ( ( defined $params->{filter_func} )
1704             and ref $params->{filter_func} eq "CODE" )
1705             {
1706             # create the new table
1707 2         6 $records = [];
1708              
1709 2         5 for ( $i = 0 ; $i < scalar( @{ $self->{data}{records} } ) ; $i++ ) {
  35         75  
1710 33 50       73 if ( &{ $params->{filter_func} }( @{ $self->{data}{records}[$i] } ) ) {
  33         65  
  33         57  
1711 33         76 unshift @$records, $self->{data}{records}[$i];
1712             }
1713             }
1714             } else {
1715              
1716             # copy all the references in the table over
1717             # don't mess with the data itself
1718 0         0 $records = [ @{ $self->{data}{records} } ];
  0         0  
1719             }
1720              
1721             # sort the table if sort/compare function is present
1722 2 50 33     22 if ( ( defined $params->{sort_func} )
1723             and ref $params->{sort_func} eq "CODE" )
1724             {
1725 2         10 $records = [ sort { &{ $params->{sort_func} }( $a, $b ) } @$records ];
  48         75  
  48         81  
1726             }
1727             }
1728              
1729 2 50 33     15 if ( ( defined $params->{format_func} )
1730             and ref $params->{format_func} eq "CODE" )
1731             {
1732 2         13 $self->html_gen( $filename, $params->{format_func}, $records );
1733 2         8 return;
1734             }
1735              
1736             # get local copies of the values from wk2fnum so that we can
1737             # take advantage of closure scoping to grab these values instead
1738             # of doing a table lookup for every value every time the format
1739             # function iterates over every data item
1740 0         0 my ( $title_fnum, $url_fnum, $date_fnum, $summary_fnum, $comments_fnum ) = (
1741             $self->wk2fnum("title"), $self->wk2fnum("url"), $self->wk2fnum("date"), $self->wk2fnum("summary"),
1742             $self->wk2fnum("comments"),
1743             );
1744              
1745             # generate the html and formatting function
1746             # This does a lot of conditional inclusion of well-known fields,
1747             # depending on their presence in a give data record.
1748             # The $_[...] notation is used to grab the data because this
1749             # anonymous function will be run once for every record in
1750             # @{$self->{data}{records}} with the data array/record passed
1751             # to it as the function's parameters.
1752             $self->html_gen(
1753             $filename,
1754             sub {
1755 0 0   0   0 return ""
    0          
    0          
    0          
    0          
1756             . (
1757             ( defined $title_fnum )
1758             ? (
1759             ( defined $url_fnum )
1760             ? "<a href=\"" . $_[$url_fnum] . "\">"
1761             : ""
1762             )
1763             . $_[$title_fnum]
1764             . (
1765             ( defined $url_fnum )
1766             ? "</a>"
1767             : ""
1768             )
1769             : (
1770             ( defined $summary_fnum )
1771             ? $_[$summary_fnum]
1772             : ""
1773             )
1774             )
1775             . (
1776             ( defined $comments_fnum )
1777             ? " (" . $_[$comments_fnum] . ")"
1778             : ""
1779             );
1780             },
1781 0         0 $records
1782             );
1783 0         0 return;
1784             }
1785              
1786             # XML format handler
1787             # This generates a "standalone" XML document with its own built-in DTD
1788             # to define the fields.
1789             # Note: we couldn't use XML::Writer because it only writes to a filehandle
1790             # and we need to do some more complicated stuff here.
1791             sub fmt_handler_xml
1792             {
1793 0     0 0 0 my ( $self, $filename ) = @_;
1794 0         0 my ( @xml, $field, $indent );
1795              
1796             # generate XML prolog/heading with a dynamically-generated XML DTD
1797 0         0 $indent = " " x 4;
1798 0         0 push @xml, "<?xml version=\"1.0\" standalone=\"yes\" ?>";
1799 0         0 push @xml, "";
1800 0         0 push @xml, "<!DOCTYPE webfetch_dynamic [";
1801 0         0 push @xml, $indent . "<!ELEMENT webfetch_dynamic (record*)>";
1802 0         0 push @xml, $indent . "<!ELEMENT record (" . join( ", ", @{ $self->{data}{fields} } ) . ")>";
  0         0  
1803 0         0 for ( $field = 0 ; $field < scalar @{ $self->{data}{fields} } ; $field++ ) {
  0         0  
1804 0         0 push @xml, $indent . "<!ELEMENT " . $self->{data}{fields}[$field] . "(#PCDATA)>";
1805             }
1806 0         0 push @xml, "]>";
1807 0         0 push @xml, "";
1808              
1809             # generate XML content
1810 0         0 push @xml, "<webfetch_dynamic>";
1811 0         0 foreach my $record ( @{ $self->{data}{records} } ) {
  0         0  
1812 0         0 push @xml, $indent . "<record>";
1813 0         0 for ( $field = 0 ; $field < scalar @{ $self->{data}{fields} } ; $field++ ) {
  0         0  
1814             push @xml,
1815             ( $indent x 2 ) . "<"
1816             . $self->{data}{fields}[$field] . ">"
1817             . $record->[$field] . "</"
1818 0         0 . $self->{data}{fields}[$field] . ">";
1819             }
1820 0         0 push @xml, $indent . "</record>";
1821             }
1822 0         0 push @xml, "</webfetch_dynamic>";
1823              
1824             # store the XML text as a savable
1825 0         0 $self->raw_savable( $filename, join( "\n", @xml ) . "\n" );
1826 0         0 return;
1827             }
1828              
1829             # WebFetch::General format handler
1830             sub fmt_handler_wf
1831             {
1832 0     0 0 0 my ( $self, $filename ) = @_;
1833              
1834             $self->wf_export(
1835             $filename,
1836             $self->{data}{fields},
1837             $self->{data}{records},
1838 0         0 "Exported from " . ( ref $self ) . "\n" . "fields are " . join( ", ", @{ $self->{data}{fields} } ) . "\n"
  0         0  
1839             );
1840 0         0 return;
1841             }
1842              
1843             # RDF format handler
1844             sub fmt_handler_rdf
1845             {
1846 0     0 0 0 my ( $self, $filename, $site_title, $site_link, $site_desc, $image_title, $image_url ) = @_;
1847              
1848             # get the field numbers for the well-known fields for title and url
1849 0         0 my ( $title_fnum, $url_fnum, );
1850 0         0 $title_fnum = $self->wk2fnum("title");
1851 0         0 $url_fnum = $self->wk2fnum("url");
1852              
1853             # if title or url is missing, we have to abort with an error message
1854 0 0 0     0 if ( ( not defined $title_fnum ) or ( not defined $url_fnum ) ) {
1855 0 0       0 my %savable = (
    0          
1856             "file" => $filename,
1857             "error" => "cannot RDF export with missing fields: "
1858             . ( ( not defined $title_fnum ) ? "title " : "" )
1859             . ( ( not defined $url_fnum ) ? "url " : "" )
1860             );
1861 0 0       0 if ( not defined $self->{savable} ) {
1862 0         0 $self->{savable} = [];
1863             }
1864 0         0 push @{ $self->{savable} }, \%savable;
  0         0  
1865 0         0 return;
1866             }
1867              
1868             # check if we can shortcut the array processing
1869 0         0 my $data;
1870 0 0 0     0 if ( $title_fnum == 0 and $url_fnum == 1 ) {
1871 0         0 $data = $self->{data}{records};
1872             } else {
1873              
1874             # oh well, the fields weren't in the right order
1875             # extract a copy that contains title and url fields
1876 0         0 $data = [];
1877 0         0 foreach my $entry ( @{ $self->{data}{records} } ) {
  0         0  
1878 0         0 push @$data, [ $entry->[$title_fnum], $entry->[$url_fnum] ];
1879             }
1880             }
1881 0         0 $self->ns_export( $filename, $data, $site_title, $site_link, $site_desc, $image_title, $image_url );
1882 0         0 return;
1883             }
1884              
1885             # autoloader catches calls to unknown functions
1886             # redirect to the class which made the call, if the function exists
1887             ## no critic (ClassHierarchies::ProhibitAutoloading)
1888             sub AUTOLOAD
1889             {
1890 5     5   6016 my ( $self, @args ) = @_;
1891 5         10 my $name = $AUTOLOAD;
1892 5 50       18 my $type = ref($self)
1893             or throw_autoload_fail "AUTOLOAD failed on $name: self is not an object";
1894              
1895 5         34 $name =~ s/.*://x; # strip fully-qualified portion, just want function
1896              
1897             # decline all-caps names - reserved for special Perl functions
1898 5         18 my ( $package, $filename, $line ) = caller;
1899 5 50       214 ( $name =~ /^[A-Z]+$/x ) and return;
1900 0         0 debug __PACKAGE__ . "::AUTOLOAD $name";
1901              
1902             # check for function in caller package
1903             # (WebFetch may hand an input module's object to an output module)
1904 0 0       0 if ( not $package->can($name) ) {
1905              
1906             # throw exception for unknown function/method
1907 0         0 throw_autoload_fail "function $name not found - called by $package ($filename line $line)";
1908             }
1909              
1910             # make an alias of the sub
1911             {
1912             ## no critic (TestingAndDebugging::ProhibitNoStrict)
1913 5     5   50 no strict 'refs';
  5         18  
  5         2033  
  0         0  
1914 0         0 *{ __PACKAGE__ . "::" . $name } = \&{ $package . "::" . $name };
  0         0  
  0         0  
1915             }
1916 0         0 my $retval;
1917             try {
1918 0     0   0 $retval = $self->$name(@args);
1919             } catch {
1920 0     0   0 my $e = Exception::Class->caught();
1921 0 0       0 ref $e
1922             ? $e->rethrow
1923             : throw_autoload_fail "failure in " . "autoloaded function: " . $e;
1924 0         0 };
1925 0         0 return $retval;
1926             }
1927             ## critic (ClassHierarchies::ProhibitAutoloading)
1928              
1929             1;
1930              
1931             =pod
1932              
1933             =encoding UTF-8
1934              
1935             =head1 NAME
1936              
1937             WebFetch - Perl module to download/fetch and save information from the Web
1938              
1939             =head1 VERSION
1940              
1941             version 0.15.9
1942              
1943             =head1 SYNOPSIS
1944              
1945             use WebFetch;
1946              
1947             =head1 DESCRIPTION
1948              
1949             The WebFetch module is a framework for downloading and saving
1950             information from the web, and for saving or re-displaying it.
1951             It provides a generalized interface for saving to a file
1952             while keeping the previous version as a backup.
1953             This is mainly intended for use in a cron-job to acquire
1954             periodically-updated information.
1955              
1956             WebFetch allows the user to specify a source and destination, and
1957             the input and output formats. It is possible to write new Perl modules
1958             to the WebFetch API in order to add more input and output formats.
1959              
1960             The currently-provided input formats are Atom, RSS, WebFetch "SiteNews" files
1961             and raw Perl data structures.
1962              
1963             The currently-provided output formats are RSS, WebFetch "SiteNews" files,
1964             the Perl Template Toolkit, and export into a TWiki site.
1965              
1966             Some modules which were specific to pre-RSS/Atom web syndication formats
1967             have been deprecated. Those modules can be found in the CPAN archive
1968             in WebFetch 0.10. Those modules are no longer compatible with changes
1969             in the current WebFetch API.
1970              
1971             =head1 INSTALLATION
1972              
1973             After unpacking and the module sources from the tar file, run
1974              
1975             C<perl Makefile.PL>
1976              
1977             C<make>
1978              
1979             C<make install>
1980              
1981             Or from a CPAN shell you can simply type "C<install WebFetch>"
1982             and it will download, build and install it for you.
1983              
1984             If you need help setting up a separate area to install the modules
1985             (i.e. if you don't have write permission where perl keeps its modules)
1986             then see the Perl FAQ.
1987              
1988             To begin using the WebFetch modules, you will need to test your
1989             fetch operations manually, put them into a crontab, and then
1990             use server-side include (SSI) or a similar server configuration to
1991             include the files in a live web page.
1992              
1993             =head2 MANUALLY TESTING A FETCH OPERATION
1994              
1995             Select a directory which will be the storage area for files created
1996             by WebFetch. This is an important administrative decision -
1997             keep the volatile automatically-generated files in their own directory
1998             so they'll be separated from manually-maintained files.
1999              
2000             Choose the specific WebFetch-derived modules that do the work you want.
2001             See their particular manual/web pages for details on command-line arguments.
2002             Test run them first before committing to a crontab.
2003              
2004             =head2 SETTING UP CRONTAB ENTRIES
2005              
2006             If needed, see the manual pages for crontab(1), crontab(5) and any
2007             web sites or books on Unix system administration.
2008              
2009             Since WebFetch command lines are usually very long, the user may prefer
2010             to make one or more scripts as front-ends so crontab entries aren't so big.
2011              
2012             Try not to run crontab entries too often - be aware if the site you're
2013             accessing has any resource constraints, and how often their information
2014             gets updated. If they request users not to access a feed more often
2015             than a certain interval, respect it. (It isn't hard to find violators
2016             in server logs.) If in doubt, try every 30 minutes until more information
2017             becomes available.
2018              
2019             =head1 WebFetch FUNCTIONS AND METHODS
2020              
2021             The following function definitions assume B<C<$obj>> is a blessed
2022             reference to a module that is derived from (inherits from) WebFetch.
2023              
2024             =over 4
2025              
2026             =item WebFetch->version()
2027              
2028             Return the version number of WebFetch, or for any subclass which inherits the method.
2029              
2030             When running code within a source-code development workspace, it returns "00-dev" to avoid warnings
2031             about undefined values.
2032             Release version numbers are assigned and added by the build system upon release,
2033             and are not available when running directly from a source code repository.
2034              
2035             =item WebFetch->config( $key, [$value])
2036              
2037             This class method is the read/write accessor to WebFetch's key/value configuration store.
2038             If $value is not provided (or is undefied) then this is a read accessor, returning the value of the
2039             configuration entry named by $key.
2040             If $value is defined then this is a write accessor, assigning $value to the configuration entry named by $key.
2041              
2042             =item WebFetch->has_config($key)
2043              
2044             This class method returns a boolean value which is true if the configuration entry named by $key exists
2045             in the WebFetch key/value configuration store. Otherwise it returns false.
2046              
2047             =item WebFetch->del_config($key)
2048              
2049             This class method deletes the configuration entry named by $key.
2050              
2051             =item WebFetch->import_config(\%hashref)
2052              
2053             This class method imports all the key/value pairs from %hashref into the WebFetch configuration.
2054              
2055             =item WebFetch->keys_config()
2056              
2057             This class method returns a list of the keys in the WebFetch configuration store.
2058             This method was made for testing purposes. That is currently its only foreseen use case.
2059              
2060             =item WebFetch::module_register( $module, @capabilities );
2061              
2062             This function allows a Perl module to register itself with the WebFetch API
2063             as able to perform various capabilities.
2064              
2065             For subclasses of WebFetch, it can be called as a class method.
2066             C<__PACKAGE__-&gt;module_register( @capabilities );>
2067              
2068             For the $module parameter, the Perl module should provide its own
2069             name, usually via the __PACKAGE__ string.
2070              
2071             @capabilities is an array of strings as needed to list the
2072             capabilities which the module performs for the WebFetch API.
2073              
2074             If any entry of @capabilities is a hash reference, its key/value
2075             pairs are all imported to the WebFetch configuration, and becomes accessible via
2076             the I<config()> method. For more readable code, a hashref parmeter should not be used more than once.
2077             Though that would work. Also for readability, it is recommended to make the hashref the first
2078             parameter when this feature is used.
2079              
2080             Except for the config hashref, parameters must be strings as follows.
2081              
2082             The currently-recognized capabilities are "cmdline", "input" and "output".
2083             "filter", "save" and "storage" are reserved for future use. The
2084             function will save all the capability names that the module provides, without
2085             checking whether any code will use it.
2086              
2087             For example, the WebFetch::Output::TT module registers itself like this:
2088             C<__PACKAGE__-&gt;module_register( "cmdline", "output:tt" );>
2089             meaning that it defines additional command-line options, and it provides an
2090             output format handler for the "tt" format, the Perl Template Toolkit.
2091              
2092             =item fetch_main
2093              
2094             This function is exported into the main package.
2095             For all modules which registered with an "input" capability for the requested
2096             file format at the time this is called, it will call the run() function on
2097             behalf of each of the packages.
2098              
2099             =item $obj = WebFetch::new( param => "value", [...] )
2100              
2101             Generally, the new function should be inherited and used from a derived
2102             class. However, WebFetch provides an AUTOLOAD function which will catch
2103             wayward function calls from a subclass, and redirect it to the appropriate
2104             function in the calling class, if it exists.
2105              
2106             The AUTOLOAD feature is needed because, for example, when an object is
2107             instantiated in a WebFetch::Input::* class, it will later be passed to
2108             a WebFetch::Output::* class, whose data method functions can be accessed
2109             this way as if the WebFetch object had become a member of that class.
2110              
2111             =item $obj->init( ... )
2112              
2113             This is called from the C<new> function that modules inherit from WebFetch.
2114             If subclasses override it, they should still call it before completion.
2115             It takes "name" => "value" pairs which are all placed verbatim as
2116             attributes in C<$obj>.
2117              
2118             =item $obj->set_param(key, value)
2119              
2120             This sets a value under the given key in the WebFetch object.
2121              
2122             Some keys are intercepted to be grouped into their own sub-hierarchy.
2123             The keys "locale" and "time_zone" are placed in a "datetime_settings" hash under the object.
2124              
2125             If the parameter is one of the intercepted values but the destination hierarchy already exists as a
2126             non-hash value, then it throws an exception.
2127              
2128             The method does not return a value. If it doens't throw an exception, other outcomes are success.
2129              
2130             =item WebFetch::mod_load ( $class )
2131              
2132             This specifies a WebFetch module (Perl class) which needs to be loaded.
2133             In case of an error, it throws an exception.
2134              
2135             =item WebFetch::run
2136              
2137             This function can be called by the C<main::fetch_main> function
2138             provided by WebFetch or by another user function.
2139             This handles command-line processing for some standard options,
2140             calling the module-specific fetch function and WebFetch's $obj->save
2141             function to save the contents to one or more files.
2142              
2143             The command-line processing for some standard options are as follows:
2144              
2145             =over 4
2146              
2147             =item --dir I<directory>
2148              
2149             (required) the directory in which to write output files
2150              
2151             =item --group I<group>
2152              
2153             (optional) the group ID to set the output file(s) to
2154              
2155             =item --mode I<mode>
2156              
2157             (optional) the file mode (permissions) to set the output file(s) to
2158              
2159             =item --save_file I<save-file-path>
2160              
2161             (optional) save a copy of the fetched info
2162             in the file named by this parameter.
2163             The contents of the file are determined by the C<--dest_format> parameter.
2164             If C<--dest_format> isn't defined but only one module has registered a
2165             file format for saving, then that will be used by default.
2166              
2167             =item --quiet
2168              
2169             (optional) suppress printed warnings for HTTP errors
2170             I<(applies only to modules which use the WebFetch::get() function)>
2171             in case they are not desired for cron outputs
2172              
2173             =item --debug
2174              
2175             (optional) print verbose debugging outputs,
2176             only useful for developers adding new WebFetch-based modules
2177             or finding/reporting a bug in an existing module
2178              
2179             =back
2180              
2181             Modules derived from WebFetch may add their own command-line options
2182             that WebFetch::run() will use by defining a WebFetch configuration entry
2183             called "Options",
2184             containing the name/value pairs defined in Perl's Getopts::Long module.
2185             Derived modules can also add to the command-line usage error message by
2186             defining a configuration entry called "Usage" with a string of the additional
2187             parameters, as they should appear in the usage message.
2188             See the WebFetch->module_register() and WebFetch->config() class methods
2189             for setting configuration entries.
2190              
2191             For backward compatibility, WebFetch also looks for @Options and $Usage
2192             in the calling module's symbol table if they aren't found in the WebFetch
2193             configuration. However this method is deprecated and should not be used in
2194             new code. Perl coding best practices have evolved to recommend against using
2195             package variables in the years since the API was first defined.
2196              
2197             =item $obj->do_actions
2198              
2199             I<C<do_actions> was added in WebFetch 0.10 as part of the
2200             WebFetch Embedding API.>
2201             Upon entry to this function, $obj must contain the following attributes:
2202              
2203             =over 4
2204              
2205             =item data
2206              
2207             is a reference to a hash containing the following three (required)
2208             keys:
2209              
2210             =over 4
2211              
2212             =item fields
2213              
2214             is a reference to an array containing the names of the fetched data fields
2215             in the order they appear in the records of the I<data> array.
2216             This is necessary to define what each field is called
2217             because any kind of data can be fetched from the web.
2218              
2219             =item wk_names
2220              
2221             is a reference to a hash which maps from
2222             a key string with a "well-known" (to WebFetch) field type
2223             to a field name used in this table.
2224             The well-known names are defined as follows:
2225              
2226             =over 4
2227              
2228             =item title
2229              
2230             a one-liner banner or title text
2231             (plain text, no HTML tags)
2232              
2233             =item url
2234              
2235             URL or file path (as appropriate) to the news source
2236              
2237             =item id
2238              
2239             unique identifier string for the entry
2240              
2241             =item date
2242              
2243             a date stamp (and optional timestamp),
2244             which must be program-readable as L<ISO 8601|https://en.wikipedia.org/wiki/ISO_8601>
2245             date/time format (via L<DateTime::Format::ISO8601>),
2246             Unix date command output (via L<Date::Calc>'s Parse_Date() function)
2247             or as "YYYY-MM-DD" date string format.
2248             For backward compatibility, "YYYYMMDD" format is also accepted,
2249             though technically that format was deprecated from ISO 8601 in 2004.
2250             If the date cannot be parsed by these methods,
2251             either translate it to ISO 8601 when your module captures it
2252             or do not define this well-known field.
2253              
2254             =item summary
2255              
2256             a paragraph of summary text in HTML
2257              
2258             =item comments
2259              
2260             number of comments/replies at the news site
2261             (plain text, no HTML tags)
2262              
2263             =item author
2264              
2265             a name, handle or login name representing the author of the news item
2266             (plain text, no HTML tags)
2267              
2268             =item category
2269              
2270             a word or short phrase representing the category, topic or department
2271             of the news item
2272             (plain text, no HTML tags)
2273              
2274             =item location
2275              
2276             a location associated with the news item
2277             (plain text, no HTML tags)
2278              
2279             =back
2280              
2281             The field names for this table are defined in the I<fields> array.
2282              
2283             The hash only maps for the fields available in the table.
2284             If no field representing a given well-known name is present
2285             in the data fields,
2286             that well-known name key must not be defined in this hash.
2287              
2288             =item records
2289              
2290             an array containing the data records.
2291             Each record is itself a reference to an array of strings which are
2292             the data fields.
2293             This is effectively a two-dimensional array or a table.
2294              
2295             Only one table-type set of data is permitted per fetch operation.
2296             If more are needed, they should be arranged as separate fetches
2297             with different parameters.
2298              
2299             =back
2300              
2301             =item actions
2302              
2303             is a reference to a hash.
2304             The hash keys are names for handler functions.
2305             The WebFetch core provides internal handler functions called
2306             I<fmt_handler_html> (for HTML output),
2307             I<fmt_handler_xml> (for XML output),
2308             I<fmt_handler_wf> (for WebFetch::General format),
2309             However, WebFetch modules may provide additional
2310             format handler functions of their own by prepending
2311             "fmt_handler_" to the key string used in the I<actions> array.
2312              
2313             The values are array references containing
2314             I<"action specs">,
2315             which are themselves arrays of parameters
2316             that will be passed to the handler functions
2317             for generating output in a specific format.
2318             There may be more than one entry for a given format if multiple outputs
2319             with different parameters are needed.
2320              
2321             The presence of values in this field mean that output is to be
2322             generated in the specified format.
2323             The presence of these would have been chosed by the WebFetch module that
2324             created them - possibly by default settings or by a command-line argument
2325             that directed a specific output format to be used.
2326              
2327             For each valid action spec,
2328             a separate "savable" (contents to be placed in a file)
2329             will be generated from the contents of the I<data> variable.
2330              
2331             The valid (but all optional) keys are
2332              
2333             =over 4
2334              
2335             =item html
2336              
2337             the value must be a reference to an array which specifies all the
2338             HTML generation (html_gen) operations that will take place upon the data.
2339             Each entry in the array is itself an array reference,
2340             containing the following parameters for a call to html_gen():
2341              
2342             =over 4
2343              
2344             =item filename
2345              
2346             a file name or path string
2347             (relative to the WebFetch output directory unless a full path is given)
2348             for output of HTML text.
2349              
2350             =item params
2351              
2352             a hash reference containing optional name/value parameters for the
2353             HTML format handler.
2354              
2355             =over 4
2356              
2357             =item filter_func
2358              
2359             (optional)
2360             a reference to code that, given a reference to an entry in
2361             @{$self->{data}{records}},
2362             returns true (1) or false (0) for whether it will be included in the
2363             HTML output.
2364             By default, all records are included.
2365              
2366             =item sort_func
2367              
2368             (optional)
2369             a reference to code that, given references to two entries in
2370             @{$self->{data}{records}},
2371             returns the sort comparison value for the order they should be in.
2372             By default, no sorting is done and all records (subject to filtering)
2373             are accepted in order.
2374              
2375             =item format_func
2376              
2377             (optional)
2378             a refernce to code that, given a reference to an entry in
2379             @{$self->{data}{records}},
2380             stores a savable representation of the string.
2381              
2382             =back
2383              
2384             =back
2385              
2386             =back
2387              
2388             Additional valid keys may be created by modules that inherit from WebFetch
2389             by supplying a method/function named with "fmt_handler_" preceding the
2390             string used for the key.
2391             For example, for an "xyz" format, the handler function would be
2392             I<fmt_handler_xyz>.
2393             The value (the "action spec") of the hash entry
2394             must be an array reference.
2395             Within that array are "action spec entries",
2396             each of which is a reference to an array containing the list of
2397             parameters that will be passed verbatim to the I<fmt_handler_xyz> function.
2398              
2399             When the format handler function returns, it is expected to have
2400             created entries in the $obj->{savables} array
2401             (even if they only contain error messages explaining a failure),
2402             which will be used by $obj->save() to save the files and print the
2403             error messages.
2404              
2405             For coding examples, use the I<fmt_handler_*> functions in WebFetch.pm itself.
2406              
2407             =back
2408              
2409             =item $obj->fetch
2410              
2411             B<This function must be provided by each derived module to perform the
2412             fetch operaton specific to that module.>
2413             It will be called from C<new()> so you should not call it directly.
2414             Your fetch function should extract some data from somewhere
2415             and place of it in HTML or other meaningful form in the "savable" array.
2416              
2417             TODO: cleanup references to WebFetch 0.09 and 0.10 APIs.
2418              
2419             Upon entry to this function, $obj must contain the following attributes:
2420              
2421             =over 4
2422              
2423             =item dir
2424              
2425             The name of the directory to save in.
2426             (If called from the command-line, this will already have been provided
2427             by the required C<--dir> parameter.)
2428              
2429             =item savable
2430              
2431             a reference to an array where the "savable" items will be placed by
2432             the $obj->fetch function.
2433             (You only need to provide an array reference -
2434             other WebFetch functions can write to it.)
2435              
2436             In WebFetch 0.10 and later,
2437             this parameter should no longer be supplied by the I<fetch> function
2438             (unless you wish to use 0.09 backward compatibility)
2439             because it is filled in by the I<do_actions>
2440             after the I<fetch> function is completed
2441             based on the I<data> and I<actions> variables
2442             that are set in the I<fetch> function.
2443             (See below.)
2444              
2445             Each entry of the savable array is a hash reference with the following
2446             attributes:
2447              
2448             =over 4
2449              
2450             =item file
2451              
2452             file name to save in
2453              
2454             =item content
2455              
2456             scalar w/ entire text or raw content to write to the file
2457              
2458             =item group
2459              
2460             (optional) group setting to apply to file
2461              
2462             =item mode
2463              
2464             (optional) file permissions to apply to file
2465              
2466             =back
2467              
2468             Contents of savable items may be generated directly by derived modules
2469             or with WebFetch's C<html_gen>, C<html_savable> or C<raw_savable>
2470             functions.
2471             These functions will set the group and mode parameters from the
2472             object's own settings, which in turn could have originated from
2473             the WebFetch command-line if this was called that way.
2474              
2475             =back
2476              
2477             Note that the fetch functions requirements changed in WebFetch 0.10.
2478             The old requirement (0.09 and earlier) is supported for backward compatibility.
2479              
2480             I<In WebFetch 0.09 and earlier>,
2481             upon exit from this function, the $obj->savable array must contain
2482             one entry for each file to be saved.
2483             More than one array entry means more than one file to save.
2484             The WebFetch infrastructure will save them, retaining backup copies
2485             and setting file modes as needed.
2486              
2487             I<Beginning in WebFetch 0.10>, the "WebFetch embedding" capability was introduced.
2488             In order to do this, the captured data of the I<fetch> function
2489             had to be externalized where other Perl routines could access it.
2490             So the fetch function now only populates data structures
2491             (including code references necessary to process the data.)
2492              
2493             Upon exit from the function,
2494             the following variables must be set in C<$obj>:
2495              
2496             =over 4
2497              
2498             =item data
2499              
2500             is a reference to a hash which will be used by the I<do_actions> function.
2501             (See above.)
2502              
2503             =item actions
2504              
2505             is a reference to a hash which will be used by the I<do_actions> function.
2506             (See above.)
2507              
2508             =back
2509              
2510             =item $obj->get
2511              
2512             This WebFetch utility function will get a URL and return a reference
2513             to a scalar with the retrieved contents.
2514             Upon entry to this function, C<$obj> must contain the following attributes:
2515              
2516             =over 4
2517              
2518             =item source
2519              
2520             the URL to get
2521              
2522             =item quiet
2523              
2524             a flag which, when set to a non-zero (true) value,
2525             suppresses printing of HTTP request errors on STDERR
2526              
2527             =back
2528              
2529             =item $obj->wf_export ( $filename, $fields, $links, [ $comment, [ $param ]] )
2530              
2531             I<In WebFetch 0.10 and later, this should be used only in
2532             format handler functions. See do_handlers() for details.>
2533              
2534             This WebFetch utility function generates contents for a WebFetch export
2535             file, which can be placed on a web server to be read by other WebFetch sites.
2536             The WebFetch::General module reads this format.
2537             $obj->wf_export has the following parameters:
2538              
2539             =over 4
2540              
2541             =item $filename
2542              
2543             the file to save the WebFetch export contents to;
2544             this will be placed in the savable record with the contents
2545             so the save function knows were to write them
2546              
2547             =item $fields
2548              
2549             a reference to an array containing a list of the names of the data fields
2550             (in each entry of the @$lines array)
2551              
2552             =item $lines
2553              
2554             a reference to an array of arrays;
2555             the outer array contains each line of the exported data;
2556             the inner array is a list of the fields within that line
2557             corresponding in index number to the field names in the $fields array
2558              
2559             =item $comment
2560              
2561             (optional) a Human-readable string comment (probably describing the purpose
2562             of the format and the definitions of the fields used) to be placed at the
2563             top of the exported file
2564              
2565             =item $param
2566              
2567             (optional) a reference to a hash of global parameters for the exported data.
2568             This is currently unused but reserved for future versions of WebFetch.
2569              
2570             =back
2571              
2572             =item $obj->html_gen( $filename, $format_func, $links )
2573              
2574             I<In WebFetch 0.10 and later, this should be used only in
2575             format handler functions. See do_handlers() for details.>
2576              
2577             This WebFetch utility function generates some common formats of
2578             HTML output used by WebFetch-derived modules.
2579             The HTML output is stored in the $obj->{savable} array,
2580             for which all the files in that array can later be saved by the
2581             $obj->save function.
2582             It has the following parameters:
2583              
2584             =over 4
2585              
2586             =item $filename
2587              
2588             the file name to save the generated contents to;
2589             this will be placed in the savable record with the contents
2590             so the save function knows were to write them
2591              
2592             =item $format_func
2593              
2594             a refernce to code that formats each entry in @$links into a
2595             line of HTML
2596              
2597             =item $links
2598              
2599             a reference to an array of arrays of parameters for C<&$format_func>;
2600             each entry in the outer array is contents for a separate HTML line
2601             and a separate call to C<&$format_func>
2602              
2603             =back
2604              
2605             Upon entry to this function, C<$obj> must contain the following attributes:
2606              
2607             =over 4
2608              
2609             =item num_links
2610              
2611             number of lines/links to display
2612              
2613             =item savable
2614              
2615             reference to an array of hashes which this function will use as
2616             storage for filenames and contents to save
2617             (you only need to provide an array reference - the function will write to it)
2618              
2619             See $obj->fetch for details on the contents of the C<savable> parameter
2620              
2621             =item table_sections
2622              
2623             (optional) if present, this specifies the number of table columns to use;
2624             the number of links from C<num_links> will be divided evenly between the
2625             columns
2626              
2627             =item style
2628              
2629             (optional) a hash reference with style parameter names/values
2630             that can modify the behavior of the funciton to use different HTML styles.
2631             The recognized values are enumerated with WebFetch's I<--style> command line
2632             option.
2633             (When they reach this point, they are no longer a comma-delimited string -
2634             WebFetch or another module has parsed them into a hash with the style
2635             name as the key and the integer 1 for the value.)
2636              
2637             =item url
2638              
2639             (optional) an alternative URL to fetch from.
2640             In WebFetch modules that fetch from a URL, this will override the default URL
2641             in the module.
2642             In other modules, it has no effect but its presence won't cause an error.
2643              
2644             =back
2645              
2646             =item $obj->html_savable( $filename, $content )
2647              
2648             I<In WebFetch 0.10 and later, this should be used only in
2649             format handler functions. See do_actions() for details.>
2650              
2651             This WebFetch utility function stores pre-generated HTML in a new entry in
2652             the $obj->{savable} array, for later writing to a file.
2653             It's basically a simple wrapper that puts HTML comments
2654             warning that it's machine-generated around the provided HTML text.
2655             This is generally a good idea so that neophyte webmasters
2656             (and you know there are a lot of them in the world :-)
2657             will see the warning before trying to manually modify
2658             your automatically-generated text.
2659              
2660             See $obj->fetch for details on the contents of the C<savable> parameter
2661              
2662             =item $obj->raw_savable( $filename, $content )
2663              
2664             I<In WebFetch 0.10 and later, this should be used only in
2665             format handler functions. See do_actions() for details.>
2666              
2667             This WebFetch utility function stores any raw content and a filename
2668             in the $obj->{savable} array,
2669             in preparation for writing to that file.
2670             (The actual save operation may also automatically include keeping
2671             backup files and setting the group and mode of the file.)
2672              
2673             See $obj->fetch for details on the contents of the C<savable> parameter
2674              
2675             =item $obj->direct_fetch_savable( $filename, $source )
2676              
2677             I<This should be used only in format handler functions.
2678             See do_actions() for details.>
2679              
2680             This adds a task for the save function to fetch a URL and save it
2681             verbatim in a file. This can be used to download links contained
2682             in a news feed.
2683              
2684             =item $obj->no_savables_ok
2685              
2686             This can be used by an output function which handles its own intricate output
2687             operation (such as WebFetch::Output::TWiki). If the savables array is empty,
2688             it would cause an error. Using this function drops a note in it which
2689             basically says that's OK.
2690              
2691             =item $obj->save
2692              
2693             This WebFetch utility function goes through all the entries in the
2694             $obj->{savable} array and saves their contents,
2695             providing several services such as keeping backup copies,
2696             and setting the group and mode of the file, if requested to do so.
2697              
2698             If you call a WebFetch-derived module from the command-line run()
2699             or fetch_main() functions, this will already be done for you.
2700             Otherwise you will need to call it after populating the
2701             C<savable> array with one entry per file to save.
2702              
2703             Upon entry to this function, C<$obj> must contain the following attributes:
2704              
2705             =over 4
2706              
2707             =item dir
2708              
2709             directory to save files in
2710              
2711             =item savable
2712              
2713             names and contents for files to save
2714              
2715             =back
2716              
2717             See $obj->fetch for details on the contents of the C<savable> parameter
2718              
2719             =item WebFetch::parse_date([{locale => "locale", time_zone => "time zone"}], $raw_time_str)
2720              
2721             This parses a time string into a time or date structure which can be used by gen_timestamp() or anchor_timestr().
2722              
2723             If the string can be parsed as a simple date in the format of YYYY-MM-DD or YYYYMMDD, it returns an array of
2724             parameters which can be passed to DateTime->new(). Given in this context, gen_timestamp() or anchor_timestr()
2725             recognize that means this is only a date with no time. (DateTime would fill in a time for midnight, which could be
2726             shifted by hours if a timezone is added, making a date-only condition nearly impossible to detect.)
2727              
2728             If the time can be parsed by L<DateTime::Format::ISO8601>, that result is returned.
2729              
2730             If the time can be parsed by L<Date::Calc>'s Parse_Date(), a date-only array result is returned as above.
2731              
2732             If the string can't be parsed, it returns undef;
2733              
2734             =item WebFetch::gen_timestamp([{locale => "locale", time_zone => "time zone"}], $time_ref)
2735              
2736             This takes a reference received from I<parse_date()> above and returns a string with the date in current locale format.
2737              
2738             =item anchor_timestr([{time_zone => "time zone"}], $time_ref)
2739              
2740             This takes a reference received from I<parse_date()> above and returns a timestamp string which can be used
2741             as a hypertext link anchor, such as in HTML.
2742             The string will be the numbers from the date, and possible time of day, delimited by dashes '-'.
2743             If a time zone is provided, it will be used.
2744              
2745             For example, August 5, 2022 at 19:30 becomes "2022-08-05-19-30-00".
2746              
2747             =item AUTOLOAD functionality
2748              
2749             When a WebFetch input object is passed to an output class, operations
2750             on $self would not usually work. WebFetch subclasses are considered to be
2751             cooperating with each other. So WebFetch provides AUTOLOAD functionality
2752             to catch undefined function calls for its subclasses. If the calling
2753             class provides a function by the name that was attempted, then it will
2754             be redirected there.
2755              
2756             =back
2757              
2758             =head2 WRITING WebFetch-DERIVED MODULES
2759              
2760             The easiest way to make a new WebFetch-derived module is to start
2761             from the module closest to your fetch operation and modify it.
2762             Make sure to change all of the following:
2763              
2764             =over 4
2765              
2766             =item fetch function
2767              
2768             The fetch function is the meat of the operation.
2769             Get the desired info from a local file or remote site and place the
2770             contents that need to be saved in the C<savable> parameter.
2771              
2772             =item module name
2773              
2774             Be sure to catch and change them all.
2775              
2776             =item file names
2777              
2778             The code and documentation may refer to output files by name.
2779              
2780             =item module parameters
2781              
2782             Change the URL, number of links, etc as necessary.
2783              
2784             =item command-line parameters
2785              
2786             If you need to add command-line parameters, set both the
2787             B<Options> and B<Usage> configuration parameters when your module calls I<module_register()>.
2788             Don't forget to add documentation for your command-line options
2789             and remove old documentation for any you removed.
2790              
2791             When adding documentation, if the existing formatting isn't enough
2792             for your changes, there's more information about
2793             Perl's
2794             POD ("plain old documentation")
2795             embedded documentation format at
2796             http://www.cpan.org/doc/manual/html/pod/perlpod.html
2797              
2798             =item authors
2799              
2800             Do not modify the names unless instructed to do so.
2801             The maintainers have discretion whether one's contributions are significant enough to qualify as a co-author.
2802              
2803             =back
2804              
2805             Please consider contributing any useful changes back to the WebFetch
2806             project at C<maint@webfetch.org>.
2807              
2808             =head1 ACKNOWLEDGEMENTS
2809              
2810             WebFetch was written by Ian Kluft
2811             Send patches, bug reports, suggestions and questions to
2812             C<maint@webfetch.org>.
2813              
2814             Some changes in versions 0.12-0.13 (Aug-Sep 2009) were made for and
2815             sponsored by Twiki Inc (formerly TWiki.Net).
2816              
2817             =head1 LICENSE
2818              
2819             WebFetch is Open Source software licensed under the GNU General Public License Version 3.
2820             See L<https://www.gnu.org/licenses/gpl-3.0-standalone.html>.
2821              
2822             =head1 SEE ALSO
2823              
2824             Included in WebFetch module:
2825             L<WebFetch::Input::PerlStruct>,
2826             L<WebFetch::Input::SiteNews>,
2827             L<WebFetch::Output::Dump>,
2828             L<WebFetch::Data::Config>,
2829             L<WebFetch::Data::Record>,
2830             L<WebFetch::Data::Store>
2831              
2832             Modules separated to contain external module dependencies:
2833             L<WebFetch::Input::Atom>,
2834             L<WebFetch::RSS>,
2835             L<WebFetch::Output::TT>,
2836             L<WebFetch::Output::TWiki>,
2837              
2838             Source code repository:
2839             L<https://github.com/ikluft/WebFetch>
2840              
2841             =head1 BUGS AND LIMITATIONS
2842              
2843             Please report bugs via GitHub at L<https://github.com/ikluft/WebFetch/issues>
2844              
2845             Patches and enhancements may be submitted via a pull request at L<https://github.com/ikluft/WebFetch/pulls>
2846              
2847             =head1 AUTHOR
2848              
2849             Ian Kluft <https://github.com/ikluft>
2850              
2851             =head1 COPYRIGHT AND LICENSE
2852              
2853             This software is Copyright (c) 1998-2023 by Ian Kluft.
2854              
2855             This is free software, licensed under:
2856              
2857             The GNU General Public License, Version 3, June 2007
2858              
2859             =cut
2860              
2861             __END__
2862             # remainder of POD docs follow
2863