File Coverage

blib/lib/WebFetch/Input/Atom.pm

Criterion	Covered	Total	%
statement	6	6	100.0
branch			n/a
condition			n/a
subroutine	2	2	100.0
pod			n/a
total	8	8	100.0

line	stmt	sub	time	code
1				#
2				# WebFetch::Input::Atom - get headlines from remote Atom feed
3				#
4				# Copyright (c) 1998-2009 Ian Kluft. This program is free software; you can
5				# redistribute it and/or modify it under the terms of the GNU General Public
6				# License Version 3. See http://www.webfetch.org/GPLv3.txt
7
8				package WebFetch::Input::Atom;
9
10	1	1	1322	use strict;
	1		2
	1		38
11	1	1	5	use base "WebFetch";
	1		2
	1		74
12
13				use Carp;
14				use Scalar::Util qw( blessed );
15				use Date::Calc qw(Today Delta_Days Month_to_Text);
16				use XML::Atom::Client;
17				use LWP::UserAgent;
18
19				use Exception::Class (
20				);
21
22				=head1 NAME
23
24				WebFetch::Input::Atom - WebFetch input from Atom feeds
25
26				=head1 SYNOPSIS
27
28				This is an input module for WebFetch which accesses an Atom feed.
29				The --source parameter contains the URL of the feed.
30
31				From the command line:
32
33				C
34				--source atom-feed-url [...WebFetch output options...]>
35
36				In perl scripts:
37
38				use WebFetch::Input::Atom;
39
40				my $obj = WebFetch->new(
41				"dir" => "/path/to/fetch/workspace",
42				"source" => "http://search.twitter.com/search.atom?q=%23twiki",
43				"source_format" => "atom",
44				"dest" => "dump",
45				"dest_format" = "/path/to/dump/file",
46				);
47				$obj->do_actions; # process output
48				$obj->save; # save results
49
50
51				=head1 DESCRIPTION
52
53				This module gets the current headlines from a site-local file.
54
55				The I<--input> parameter specifies a file name which contains news to be
56				posted. See L<"FILE FORMAT"> below for details on contents to put in the
57				file. I<--input> may be specified more than once, allowing a single news
58				output to come from more than one input. For example, one file could be
59				manually maintained in CVS or RCS and another could be entered from a
60				web form.
61
62				After this runs, the file C will be created or replaced.
63				If there already was a C file, it will be moved to
64				C.
65
66				=cut
67
68
69				our @Options = ();
70				our $Usage = "";
71
72				# no user-servicable parts beyond this point
73
74				# register capabilities with WebFetch
75				__PACKAGE__->module_register( "cmdline", "input:atom" );
76
77				# called from WebFetch main routine
78				sub fetch
79				{
80				my ( $self ) = @_;
81
82				# set up Webfetch Embedding API data
83				$self->data->add_fields( "id", "updated", "title", "author", "link",
84				"summary", "content", "xml" );
85				# defined which fields match to which "well-known field names"
86				$self->data->add_wk_names(
87				"id" => "id",
88				"title" => "title",
89				"url" => "link",
90				"date" => "updated",
91				"summary" => "summary",
92				);
93
94				# parse data file
95				$self->parse_input();
96
97				# return and let WebFetch handle the data
98				}
99
100				# extract a string value from a scalar/ref if possible
101				sub extract_value
102				{
103				my $thing = shift;
104
105				( defined $thing ) or return undef;
106				if ( ref $thing ) {
107				if ( !blessed $thing ) {
108				# it's a HASH/ARRAY/etc, not an object
109				return undef;
110				}
111				if ( $thing->can( "as_string" )) {
112				return $thing->as_string;
113				}
114				return undef;
115				} else {
116				$thing =~ s/\s+$//s;
117				length $thing > 0 or return undef;
118				return $thing;
119				}
120				}
121
122				# parse Atom input
123				sub parse_input
124				{
125				my $self = shift;
126				my $atom_api = XML::Atom::Client->new;
127				my $atom_feed = $atom_api->getFeed( $self->{source} );
128
129				# parse values from top of structure
130				my ( %feed, @entries, $entry );
131				@entries = $atom_feed->entries;
132				foreach $entry ( @entries ) {
133				# save the data record
134				my $id = extract_value( $entry->id() );
135				my $title = extract_value( $entry->title() );
136				my $author = ( defined $entry->author )
137				? extract_value( $entry->author->name ) : "";
138				my $link = extract_value( $entry->link->href );
139				my $updated = extract_value( $entry->updated() );
140				my $summary = extract_value( $entry->summary() );
141				my $content = extract_value( $entry->content() );
142				my $xml = $entry->as_xml();
143				$self->data->add_record( $id, $updated, $title,
144				$author, $link, $summary, $content, $xml );
145				}
146				}
147
148				1;
149				__END__