| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # | 
| 2 |  |  |  |  |  |  | # WebFetch::Input::RSS - get headlines from remote RSS feed | 
| 3 |  |  |  |  |  |  | # | 
| 4 |  |  |  |  |  |  | # Copyright (c) 1998-2009 Ian Kluft. This program is free software; you can | 
| 5 |  |  |  |  |  |  | # redistribute it and/or modify it under the terms of the GNU General Public | 
| 6 |  |  |  |  |  |  | # License Version 3. See  http://www.webfetch.org/GPLv3.txt | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | package WebFetch::Input::RSS; | 
| 9 |  |  |  |  |  |  |  | 
| 10 | 1 |  |  | 1 |  | 1712 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 41 |  | 
| 11 | 1 |  |  | 1 |  | 5 | use base "WebFetch"; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 82 |  | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | use Carp; | 
| 14 |  |  |  |  |  |  | use Scalar::Util qw( blessed ); | 
| 15 |  |  |  |  |  |  | use Date::Calc qw(Today Delta_Days Month_to_Text); | 
| 16 |  |  |  |  |  |  | use XML::RSS; | 
| 17 |  |  |  |  |  |  | use LWP::UserAgent; | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | use Exception::Class ( | 
| 20 |  |  |  |  |  |  | ); | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | =head1 NAME | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | WebFetch::Input::RSS - download and save an RSS feed | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | =cut | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | our @Options = (); | 
| 29 |  |  |  |  |  |  | our $Usage = ""; | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | # configuration parameters | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | # no user-servicable parts beyond this point | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | # register capabilities with WebFetch | 
| 36 |  |  |  |  |  |  | __PACKAGE__->module_register( "input:rss" ); | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | In perl scripts: | 
| 41 |  |  |  |  |  |  |  | 
| 42 |  |  |  |  |  |  | C | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | From the command line: | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | C | 
| 47 |  |  |  |  |  |  | --source rss-feed-url [...WebFetch output options...]> | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | =cut | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | # called from WebFetch main routine | 
| 52 |  |  |  |  |  |  | sub fetch | 
| 53 |  |  |  |  |  |  | { | 
| 54 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | # set up Webfetch Embedding API data | 
| 57 |  |  |  |  |  |  | $self->data->add_fields( "pubDate", "title", "link", "category", | 
| 58 |  |  |  |  |  |  | "description" ); | 
| 59 |  |  |  |  |  |  | # defined which fields match to which "well-known field names" | 
| 60 |  |  |  |  |  |  | $self->data->add_wk_names( | 
| 61 |  |  |  |  |  |  | "title" => "title", | 
| 62 |  |  |  |  |  |  | "url" => "link", | 
| 63 |  |  |  |  |  |  | "date" => "pubDate", | 
| 64 |  |  |  |  |  |  | "summary" => "description", | 
| 65 |  |  |  |  |  |  | "category" => "category", | 
| 66 |  |  |  |  |  |  | ); | 
| 67 |  |  |  |  |  |  |  | 
| 68 |  |  |  |  |  |  | # parse data file | 
| 69 |  |  |  |  |  |  | $self->parse_input(); | 
| 70 |  |  |  |  |  |  |  | 
| 71 |  |  |  |  |  |  | # return and let WebFetch handle the data | 
| 72 |  |  |  |  |  |  | } | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | # extract a string value from a scalar/ref if possible | 
| 75 |  |  |  |  |  |  | sub extract_value | 
| 76 |  |  |  |  |  |  | { | 
| 77 |  |  |  |  |  |  | my $thing = shift; | 
| 78 |  |  |  |  |  |  |  | 
| 79 |  |  |  |  |  |  | ( defined $thing ) or return undef; | 
| 80 |  |  |  |  |  |  | if ( ref $thing ) { | 
| 81 |  |  |  |  |  |  | if ( !blessed $thing ) { | 
| 82 |  |  |  |  |  |  | # it's a HASH/ARRAY/etc, not an object | 
| 83 |  |  |  |  |  |  | return undef; | 
| 84 |  |  |  |  |  |  | } | 
| 85 |  |  |  |  |  |  | if ( $thing->can( "as_string" )) { | 
| 86 |  |  |  |  |  |  | return $thing->as_string; | 
| 87 |  |  |  |  |  |  | } | 
| 88 |  |  |  |  |  |  | return undef; | 
| 89 |  |  |  |  |  |  | } else { | 
| 90 |  |  |  |  |  |  | $thing =~ s/\s+$//s; | 
| 91 |  |  |  |  |  |  | length $thing > 0 or return undef; | 
| 92 |  |  |  |  |  |  | return $thing; | 
| 93 |  |  |  |  |  |  | } | 
| 94 |  |  |  |  |  |  | } | 
| 95 |  |  |  |  |  |  |  | 
| 96 |  |  |  |  |  |  | # parse RSS feed into hash structure | 
| 97 |  |  |  |  |  |  | sub parse_rss | 
| 98 |  |  |  |  |  |  | { | 
| 99 |  |  |  |  |  |  | my $text = shift; | 
| 100 |  |  |  |  |  |  | my $rss = new XML::RSS; | 
| 101 |  |  |  |  |  |  | $rss->parse($text); | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  | # parse values from top of structure | 
| 104 |  |  |  |  |  |  | my ( %feed, $field, $item, @buckets ); | 
| 105 |  |  |  |  |  |  | foreach $field ( keys %$rss ) { | 
| 106 |  |  |  |  |  |  | if ( ref $rss->{$field} eq "HASH" ) { | 
| 107 |  |  |  |  |  |  | push @buckets, $field; | 
| 108 |  |  |  |  |  |  | } | 
| 109 |  |  |  |  |  |  | my $value = extract_value( $rss->{$field}); | 
| 110 |  |  |  |  |  |  | ( defined $value ) or next; | 
| 111 |  |  |  |  |  |  | $feed{$field} = $value; | 
| 112 |  |  |  |  |  |  | } | 
| 113 |  |  |  |  |  |  |  | 
| 114 |  |  |  |  |  |  | # parse hashes, i.e. channel parameters, XML/RSS modeules, etc | 
| 115 |  |  |  |  |  |  | my $bucket; | 
| 116 |  |  |  |  |  |  | foreach $bucket ( @buckets ) { | 
| 117 |  |  |  |  |  |  | ( defined $rss->{$bucket}) or next; | 
| 118 |  |  |  |  |  |  | $feed{$bucket} = {}; | 
| 119 |  |  |  |  |  |  | foreach $field ( keys %{$rss->{$bucket}} ) { | 
| 120 |  |  |  |  |  |  | my $value = extract_value( $rss->{$bucket}{$field}); | 
| 121 |  |  |  |  |  |  | ( defined $value ) or next; | 
| 122 |  |  |  |  |  |  | $feed{$bucket}{$field} = $value; | 
| 123 |  |  |  |  |  |  | } | 
| 124 |  |  |  |  |  |  | } | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  | # parse each item from the news feed | 
| 127 |  |  |  |  |  |  | $feed{items} = []; | 
| 128 |  |  |  |  |  |  | foreach $item ( @{$rss->{items}}) { | 
| 129 |  |  |  |  |  |  | my $f_item = {}; | 
| 130 |  |  |  |  |  |  | foreach $field ( keys %$item ) { | 
| 131 |  |  |  |  |  |  | my $value = extract_value( $item->{$field}); | 
| 132 |  |  |  |  |  |  | ( defined $value ) or next; | 
| 133 |  |  |  |  |  |  | $f_item->{$field} = $value; | 
| 134 |  |  |  |  |  |  | } | 
| 135 |  |  |  |  |  |  | push @{$feed{items}}, $f_item; | 
| 136 |  |  |  |  |  |  | } | 
| 137 |  |  |  |  |  |  |  | 
| 138 |  |  |  |  |  |  | return \%feed; | 
| 139 |  |  |  |  |  |  | } | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | # parse RSS input | 
| 142 |  |  |  |  |  |  | sub parse_input | 
| 143 |  |  |  |  |  |  | { | 
| 144 |  |  |  |  |  |  | my $self = shift; | 
| 145 |  |  |  |  |  |  |  | 
| 146 |  |  |  |  |  |  | # parse data file | 
| 147 |  |  |  |  |  |  | my $raw_rss = $self->get(); | 
| 148 |  |  |  |  |  |  | my $feed = parse_rss( $$raw_rss ); | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  | # translate parsed RSS feed into the WebFetch Embedding API data table | 
| 151 |  |  |  |  |  |  | my ( $item, %label_hash, $pos ); | 
| 152 |  |  |  |  |  |  | $pos = 0; | 
| 153 |  |  |  |  |  |  | foreach $item ( @{$feed->{items}} ) { | 
| 154 |  |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  | # save the data record | 
| 156 |  |  |  |  |  |  | my $title = ( defined $item->{title}) ? $item->{title} : ""; | 
| 157 |  |  |  |  |  |  | my $link = ( defined $item->{link}) ? $item->{link} : ""; | 
| 158 |  |  |  |  |  |  | my $pub_date = ( defined $item->{pubDate}) | 
| 159 |  |  |  |  |  |  | ? $item->{pubDate} : ""; | 
| 160 |  |  |  |  |  |  | my $category = ( defined $item->{category}) | 
| 161 |  |  |  |  |  |  | ? $item->{category} : ""; | 
| 162 |  |  |  |  |  |  | my $description = ( defined $item->{description}) | 
| 163 |  |  |  |  |  |  | ? $item->{description} : ""; | 
| 164 |  |  |  |  |  |  | $self->data->add_record( $pub_date, $title, $link, | 
| 165 |  |  |  |  |  |  | $category, $description ); | 
| 166 |  |  |  |  |  |  | $pos++; | 
| 167 |  |  |  |  |  |  | } | 
| 168 |  |  |  |  |  |  | } | 
| 169 |  |  |  |  |  |  |  | 
| 170 |  |  |  |  |  |  | 1; | 
| 171 |  |  |  |  |  |  | __END__ |