File Coverage

blib/lib/WebFetch/Input/Atom.pm
Criterion Covered Total %
statement 6 6 100.0
branch n/a
condition n/a
subroutine 2 2 100.0
pod n/a
total 8 8 100.0


line stmt bran cond sub pod time code
1             #
2             # WebFetch::Input::Atom - get headlines from remote Atom feed
3             #
4             # Copyright (c) 1998-2009 Ian Kluft. This program is free software; you can
5             # redistribute it and/or modify it under the terms of the GNU General Public
6             # License Version 3. See http://www.webfetch.org/GPLv3.txt
7              
8             package WebFetch::Input::Atom;
9              
10 1     1   1322 use strict;
  1         2  
  1         38  
11 1     1   5 use base "WebFetch";
  1         2  
  1         74  
12              
13             use Carp;
14             use Scalar::Util qw( blessed );
15             use Date::Calc qw(Today Delta_Days Month_to_Text);
16             use XML::Atom::Client;
17             use LWP::UserAgent;
18              
19             use Exception::Class (
20             );
21              
22             =head1 NAME
23              
24             WebFetch::Input::Atom - WebFetch input from Atom feeds
25              
26             =head1 SYNOPSIS
27              
28             This is an input module for WebFetch which accesses an Atom feed.
29             The --source parameter contains the URL of the feed.
30              
31             From the command line:
32              
33             C
34             --source atom-feed-url [...WebFetch output options...]>
35              
36             In perl scripts:
37              
38             use WebFetch::Input::Atom;
39              
40             my $obj = WebFetch->new(
41             "dir" => "/path/to/fetch/workspace",
42             "source" => "http://search.twitter.com/search.atom?q=%23twiki",
43             "source_format" => "atom",
44             "dest" => "dump",
45             "dest_format" = "/path/to/dump/file",
46             );
47             $obj->do_actions; # process output
48             $obj->save; # save results
49              
50              
51             =head1 DESCRIPTION
52              
53             This module gets the current headlines from a site-local file.
54              
55             The I<--input> parameter specifies a file name which contains news to be
56             posted. See L<"FILE FORMAT"> below for details on contents to put in the
57             file. I<--input> may be specified more than once, allowing a single news
58             output to come from more than one input. For example, one file could be
59             manually maintained in CVS or RCS and another could be entered from a
60             web form.
61              
62             After this runs, the file C will be created or replaced.
63             If there already was a C file, it will be moved to
64             C.
65              
66             =cut
67              
68              
69             our @Options = ();
70             our $Usage = "";
71              
72             # no user-servicable parts beyond this point
73              
74             # register capabilities with WebFetch
75             __PACKAGE__->module_register( "cmdline", "input:atom" );
76              
77             # called from WebFetch main routine
78             sub fetch
79             {
80             my ( $self ) = @_;
81              
82             # set up Webfetch Embedding API data
83             $self->data->add_fields( "id", "updated", "title", "author", "link",
84             "summary", "content", "xml" );
85             # defined which fields match to which "well-known field names"
86             $self->data->add_wk_names(
87             "id" => "id",
88             "title" => "title",
89             "url" => "link",
90             "date" => "updated",
91             "summary" => "summary",
92             );
93              
94             # parse data file
95             $self->parse_input();
96              
97             # return and let WebFetch handle the data
98             }
99              
100             # extract a string value from a scalar/ref if possible
101             sub extract_value
102             {
103             my $thing = shift;
104              
105             ( defined $thing ) or return undef;
106             if ( ref $thing ) {
107             if ( !blessed $thing ) {
108             # it's a HASH/ARRAY/etc, not an object
109             return undef;
110             }
111             if ( $thing->can( "as_string" )) {
112             return $thing->as_string;
113             }
114             return undef;
115             } else {
116             $thing =~ s/\s+$//s;
117             length $thing > 0 or return undef;
118             return $thing;
119             }
120             }
121              
122             # parse Atom input
123             sub parse_input
124             {
125             my $self = shift;
126             my $atom_api = XML::Atom::Client->new;
127             my $atom_feed = $atom_api->getFeed( $self->{source} );
128              
129             # parse values from top of structure
130             my ( %feed, @entries, $entry );
131             @entries = $atom_feed->entries;
132             foreach $entry ( @entries ) {
133             # save the data record
134             my $id = extract_value( $entry->id() );
135             my $title = extract_value( $entry->title() );
136             my $author = ( defined $entry->author )
137             ? extract_value( $entry->author->name ) : "";
138             my $link = extract_value( $entry->link->href );
139             my $updated = extract_value( $entry->updated() );
140             my $summary = extract_value( $entry->summary() );
141             my $content = extract_value( $entry->content() );
142             my $xml = $entry->as_xml();
143             $self->data->add_record( $id, $updated, $title,
144             $author, $link, $summary, $content, $xml );
145             }
146             }
147              
148             1;
149             __END__