File Coverage

blib/lib/WebFetch/Input/Atom.pm
Criterion Covered Total %
statement 33 65 50.7
branch 0 12 0.0
condition n/a
subroutine 11 14 78.5
pod 1 3 33.3
total 45 94 47.8


line stmt bran cond sub pod time code
1             # WebFetch::Input::Atom
2             # ABSTRACT: get headlines for WebFetch from Atom feeds
3             #
4             # Copyright (c) 1998-2022 Ian Kluft. This program is free software; you can
5             # redistribute it and/or modify it under the terms of the GNU General Public
6             # License Version 3. See https://www.gnu.org/licenses/gpl-3.0-standalone.html
7              
8             # pragmas to silence some warnings from Perl::Critic
9             ## no critic (Modules::RequireExplicitPackage)
10             # This solves a catch-22 where parts of Perl::Critic want both package and use-strict to be first
11 1     1   1480 use strict;
  1         2  
  1         24  
12 1     1   31 use warnings;
  1         3  
  1         26  
13 1     1   530 use utf8;
  1         12  
  1         4  
14             ## use critic (Modules::RequireExplicitPackage)
15              
16             $WebFetch::Input::Atom::VERSION = '0.1.0';
17             use strict;
18 1     1   41 use base "WebFetch";
  1         2  
  1         18  
19 1     1   4  
  1         8  
  1         551  
20             use Carp;
21 1     1   72061 use Scalar::Util qw( blessed );
  1         2  
  1         50  
22 1     1   6 use Date::Calc qw(Today Delta_Days Month_to_Text);
  1         2  
  1         36  
23 1     1   5 use XML::Atom::Client;
  1         3  
  1         65  
24 1     1   454 use LWP::UserAgent;
  1         502037  
  1         34  
25 1     1   7  
  1         3  
  1         69  
26             use Exception::Class (
27             );
28 1     1   8  
  1         3  
  1         418  
29              
30              
31             our @Options = ();
32             our $Usage = "";
33              
34             # no user-servicable parts beyond this point
35              
36             # register capabilities with WebFetch
37             __PACKAGE__->module_register( "cmdline", "input:atom" );
38              
39             # called from WebFetch main routine
40             {
41             my ( $self ) = @_;
42              
43 0     0 1   # set up Webfetch Embedding API data
44             $self->data->add_fields( "id", "updated", "title", "author", "link",
45             "summary", "content", "xml" );
46 0           # defined which fields match to which "well-known field names"
47             $self->data->add_wk_names(
48             "id" => "id",
49 0           "title" => "title",
50             "url" => "link",
51             "date" => "updated",
52             "summary" => "summary",
53             );
54              
55             # parse data file
56             $self->parse_input();
57              
58 0           # return and let WebFetch handle the data
59             return;
60             }
61 0            
62             # extract a string value from a scalar/ref if possible
63             {
64             my $thing = shift;
65              
66             ( defined $thing ) or return;
67 0     0 0   if ( ref $thing ) {
68             if ( !blessed $thing ) {
69 0 0         # it's a HASH/ARRAY/etc, not an object
70 0 0         return;
71 0 0         }
72             if ( $thing->can( "as_string" )) {
73 0           return $thing->as_string;
74             }
75 0 0         return;
76 0           } else {
77             $thing =~ s/\s+$//xs;
78 0           length $thing > 0 or return;
79             return $thing;
80 0           }
81 0 0         }
82 0            
83             # parse Atom input
84             {
85             my $self = shift;
86             my $atom_api = XML::Atom::Client->new;
87             my $atom_feed = $atom_api->getFeed( $self->{source} );
88              
89 0     0 0   # parse values from top of structure
90 0           my @entries;
91 0           @entries = $atom_feed->entries;
92             foreach my $entry ( @entries ) {
93             # save the data record
94 0           my $id = extract_value( $entry->id() );
95 0           my $title = extract_value( $entry->title() );
96 0           my $author = ( defined $entry->author )
97             ? extract_value( $entry->author->name ) : "";
98 0           my $link = extract_value( $entry->link->href );
99 0           my $updated = extract_value( $entry->updated() );
100 0 0         my $summary = extract_value( $entry->summary() );
101             my $content = extract_value( $entry->content() );
102 0           my $xml = $entry->as_xml();
103 0           $self->data->add_record( $id, $updated, $title,
104 0           $author, $link, $summary, $content, $xml );
105 0           }
106 0           return;
107 0           }
108              
109             1;
110 0            
111             =pod
112              
113             =encoding UTF-8
114              
115             =head1 NAME
116              
117             WebFetch::Input::Atom - get headlines for WebFetch from Atom feeds
118              
119             =head1 VERSION
120              
121             version 0.1.0
122              
123             =head1 SYNOPSIS
124              
125             This is an input module for WebFetch which accesses an Atom feed.
126             The --source parameter contains the URL of the feed.
127              
128             From the command line:
129              
130             C<perl -w -MWebFetch::Input::Atom -e "&fetch_main" -- --dir directory
131             --source atom-feed-url [...WebFetch output options...]>
132              
133             In perl scripts:
134              
135             use WebFetch::Input::Atom;
136              
137             my $obj = WebFetch->new(
138             "dir" => "/path/to/fetch/workspace",
139             "source" => "http://search.twitter.com/search.atom?q=%23twiki",
140             "source_format" => "atom",
141             "dest" => "dump",
142             "dest_format" = "/path/to/dump/file",
143             );
144             $obj->do_actions; # process output
145             $obj->save; # save results
146              
147             =head1 DESCRIPTION
148              
149             This module gets the current headlines from a site-local file.
150              
151             The I<--input> parameter specifies a file name which contains news to be
152             posted. See L<"FILE FORMAT"> below for details on contents to put in the
153             file. I<--input> may be specified more than once, allowing a single news
154             output to come from more than one input. For example, one file could be
155             manually maintained in CVS or RCS and another could be entered from a
156             web form.
157              
158             After this runs, the file C<site_news.html> will be created or replaced.
159             If there already was a C<site_news.html> file, it will be moved to
160             C<Osite_news.html>.
161              
162             =head1 Atom FORMAT
163              
164             Atom is an XML format defined at http://atompub.org/rfc4287.html
165              
166             WebFetch::Input::Atom uses Perl's XML::Atom::Client to parse Atom feeds.
167              
168             =head1 SEE ALSO
169              
170             L<WebFetch>
171             L<https://github.com/ikluft/WebFetch>
172              
173             =head1 BUGS AND LIMITATIONS
174              
175             Please report bugs via GitHub at L<https://github.com/ikluft/WebFetch/issues>
176              
177             Patches and enhancements may be submitted via a pull request at L<https://github.com/ikluft/WebFetch/pulls>
178              
179             =head1 AUTHOR
180              
181             Ian Kluft <https://github.com/ikluft>
182              
183             =head1 COPYRIGHT AND LICENSE
184              
185             This software is Copyright (c) 1998-2022 by Ian Kluft.
186              
187             This is free software, licensed under:
188              
189             The GNU General Public License, Version 3, June 2007
190              
191             =cut
192              
193             # POD docs follow
194