File Coverage

blib/lib/WebFetch/Input/Atom.pm

Criterion	Covered	Total	%
statement	33	65	50.7
branch	0	12	0.0
condition			n/a
subroutine	11	14	78.5
pod	1	3	33.3
total	45	94	47.8

line	stmt	bran	sub	pod	time	code
1						# WebFetch::Input::Atom
2						# ABSTRACT: get headlines for WebFetch from Atom feeds
3						#
4						# Copyright (c) 1998-2022 Ian Kluft. This program is free software; you can
5						# redistribute it and/or modify it under the terms of the GNU General Public
6						# License Version 3. See https://www.gnu.org/licenses/gpl-3.0-standalone.html
7
8						# pragmas to silence some warnings from Perl::Critic
9						## no critic (Modules::RequireExplicitPackage)
10						# This solves a catch-22 where parts of Perl::Critic want both package and use-strict to be first
11	1		1		1480	use strict;
	1				2
	1				24
12	1		1		31	use warnings;
	1				3
	1				26
13	1		1		530	use utf8;
	1				12
	1				4
14						## use critic (Modules::RequireExplicitPackage)
15
16						$WebFetch::Input::Atom::VERSION = '0.1.0';
17						use strict;
18	1		1		41	use base "WebFetch";
	1				2
	1				18
19	1		1		4
	1				8
	1				551
20						use Carp;
21	1		1		72061	use Scalar::Util qw( blessed );
	1				2
	1				50
22	1		1		6	use Date::Calc qw(Today Delta_Days Month_to_Text);
	1				2
	1				36
23	1		1		5	use XML::Atom::Client;
	1				3
	1				65
24	1		1		454	use LWP::UserAgent;
	1				502037
	1				34
25	1		1		7
	1				3
	1				69
26						use Exception::Class (
27						);
28	1		1		8
	1				3
	1				418
29
30
31						our @Options = ();
32						our $Usage = "";
33
34						# no user-servicable parts beyond this point
35
36						# register capabilities with WebFetch
37						__PACKAGE__->module_register( "cmdline", "input:atom" );
38
39						# called from WebFetch main routine
40						{
41						my ( $self ) = @_;
42
43	0		0	1		# set up Webfetch Embedding API data
44						$self->data->add_fields( "id", "updated", "title", "author", "link",
45						"summary", "content", "xml" );
46	0					# defined which fields match to which "well-known field names"
47						$self->data->add_wk_names(
48						"id" => "id",
49	0					"title" => "title",
50						"url" => "link",
51						"date" => "updated",
52						"summary" => "summary",
53						);
54
55						# parse data file
56						$self->parse_input();
57
58	0					# return and let WebFetch handle the data
59						return;
60						}
61	0
62						# extract a string value from a scalar/ref if possible
63						{
64						my $thing = shift;
65
66						( defined $thing ) or return;
67	0		0	0		if ( ref $thing ) {
68						if ( !blessed $thing ) {
69	0	0				# it's a HASH/ARRAY/etc, not an object
70	0	0				return;
71	0	0				}
72						if ( $thing->can( "as_string" )) {
73	0					return $thing->as_string;
74						}
75	0	0				return;
76	0					} else {
77						$thing =~ s/\s+$//xs;
78	0					length $thing > 0 or return;
79						return $thing;
80	0					}
81	0	0				}
82	0
83						# parse Atom input
84						{
85						my $self = shift;
86						my $atom_api = XML::Atom::Client->new;
87						my $atom_feed = $atom_api->getFeed( $self->{source} );
88
89	0		0	0		# parse values from top of structure
90	0					my @entries;
91	0					@entries = $atom_feed->entries;
92						foreach my $entry ( @entries ) {
93						# save the data record
94	0					my $id = extract_value( $entry->id() );
95	0					my $title = extract_value( $entry->title() );
96	0					my $author = ( defined $entry->author )
97						? extract_value( $entry->author->name ) : "";
98	0					my $link = extract_value( $entry->link->href );
99	0					my $updated = extract_value( $entry->updated() );
100	0	0				my $summary = extract_value( $entry->summary() );
101						my $content = extract_value( $entry->content() );
102	0					my $xml = $entry->as_xml();
103	0					$self->data->add_record( $id, $updated, $title,
104	0					$author, $link, $summary, $content, $xml );
105	0					}
106	0					return;
107	0					}
108
109						1;
110	0
111						=pod
112
113						=encoding UTF-8
114
115						=head1 NAME
116
117						WebFetch::Input::Atom - get headlines for WebFetch from Atom feeds
118
119						=head1 VERSION
120
121						version 0.1.0
122
123						=head1 SYNOPSIS
124
125						This is an input module for WebFetch which accesses an Atom feed.
126						The --source parameter contains the URL of the feed.
127
128						From the command line:
129
130						C<perl -w -MWebFetch::Input::Atom -e "&fetch_main" -- --dir directory
131						--source atom-feed-url [...WebFetch output options...]>
132
133						In perl scripts:
134
135						use WebFetch::Input::Atom;
136
137						my $obj = WebFetch->new(
138						"dir" => "/path/to/fetch/workspace",
139						"source" => "http://search.twitter.com/search.atom?q=%23twiki",
140						"source_format" => "atom",
141						"dest" => "dump",
142						"dest_format" = "/path/to/dump/file",
143						);
144						$obj->do_actions; # process output
145						$obj->save; # save results
146
147						=head1 DESCRIPTION
148
149						This module gets the current headlines from a site-local file.
150
151						The I<--input> parameter specifies a file name which contains news to be
152						posted. See L<"FILE FORMAT"> below for details on contents to put in the
153						file. I<--input> may be specified more than once, allowing a single news
154						output to come from more than one input. For example, one file could be
155						manually maintained in CVS or RCS and another could be entered from a
156						web form.
157
158						After this runs, the file C<site_news.html> will be created or replaced.
159						If there already was a C<site_news.html> file, it will be moved to
160						C<Osite_news.html>.
161
162						=head1 Atom FORMAT
163
164						Atom is an XML format defined at http://atompub.org/rfc4287.html
165
166						WebFetch::Input::Atom uses Perl's XML::Atom::Client to parse Atom feeds.
167
168						=head1 SEE ALSO
169
170						L<WebFetch>
171						L<https://github.com/ikluft/WebFetch>
172
173						=head1 BUGS AND LIMITATIONS
174
175						Please report bugs via GitHub at L<https://github.com/ikluft/WebFetch/issues>
176
177						Patches and enhancements may be submitted via a pull request at L<https://github.com/ikluft/WebFetch/pulls>
178
179						=head1 AUTHOR
180
181						Ian Kluft <https://github.com/ikluft>
182
183						=head1 COPYRIGHT AND LICENSE
184
185						This software is Copyright (c) 1998-2022 by Ian Kluft.
186
187						This is free software, licensed under:
188
189						The GNU General Public License, Version 3, June 2007
190
191						=cut
192
193						# POD docs follow
194