File Coverage

blib/lib/Plucene/SearchEngine/Index/RSS.pm
Criterion Covered Total %
statement 13 15 86.6
branch n/a
condition n/a
subroutine 5 5 100.0
pod n/a
total 18 20 90.0


line stmt bran cond sub pod time code
1             package Plucene::SearchEngine::Index::RSS;
2 1     1   162163 use base 'Plucene::SearchEngine::Index::Base';
  1         2  
  1         892  
3             __PACKAGE__->register_handler(qw( rss rdf application/rss+xml application/rdf+xml ));
4 1     1   16589 use 5.006;
  1         6  
  1         35  
5 1     1   6 use strict;
  1         6  
  1         25  
6 1     1   5 use warnings;
  1         1  
  1         34  
7 1     1   397 use XML::RSS;
  0            
  0            
8             use Date::Parse;
9             our $VERSION = '0.02';
10              
11             sub gather_data_from_file {
12             my ($self, $filename) = @_;
13             my $xml = XML::RSS->new;
14             eval{ $xml->parsefile($filename) }; return if $@;
15             my @articles;
16             my $x;
17             for my $art_xml (@{$xml->{'items'}}) {
18             my $art = (ref $self)->new;
19             $art->add_data("modified", "Date",
20             Time::Piece->new(str2time(
21             $art_xml->{dc}{date} || $xml->{dc}{date} ||
22             $xml->channel("pubDate")
23             ))
24             );
25             if ($art_xml->{dc}{creator}) {
26             $art->add_data("creator", "Text", $art_xml->{dc}{creator});
27             }
28             $art->add_data("feed", "Text", $xml->channel("title"));
29             $art->add_data("id", "Keyword", $art_xml->{link}." in ".$self->{id}{data}[0]);
30             $art->add_data("text", "UnStored", $art_xml->{description}
31             || $art_xml->{"http://purl.org/rss/1.0/modules/content/"}{encoded}
32             );
33             $art->add_data("title", "Text", $art_xml->{title});
34             push @articles, $art;
35             }
36             return @articles;
37             }
38              
39             =head1 NAME
40              
41             Plucene::SearchEngine::Index::RSS - Index RSS files
42              
43             =head1 SYNOPSIS
44              
45             my @articles = Plucene::SearchEngine::Index::URL->(
46             "http://planet.perl.org/rss10.xml"
47             );
48             $indexer->index($_->document) for @articles;
49              
50             =head1 DESCRIPTION
51              
52             This examines RSS files and creates document hashes for individual items
53             in the feed. The objects have the following Plucene fields:
54              
55             =over 3
56              
57             =item modified
58              
59             The date that this article was published.
60              
61             =item creator
62              
63             The creator, if one was specified.
64              
65             =item feed
66              
67             The name of the feed from which this was taken.
68              
69             =item id
70              
71             The URL that the article links to, and the URL of the feed.
72              
73             =item text
74              
75             The text of the article.
76              
77             =item title
78              
79             The title of the article.
80              
81             =back
82              
83             =head1 WARNING
84              
85             Since C uses MIME types to determine the
86             type of a file, this module doesn't work particularly well using the
87             C frontend. It works OK with the C frontend if the webserver
88             sends the right content type header. If not, you may have to fudge it by
89             registering your own handlers:
90              
91             Plucene::SearchEngine::Index::RSS->register_handler("text/xml");
92             # For instance
93              
94             =head1 SEE ALSO
95              
96             L.
97              
98             =head1 AUTHOR
99              
100             Simon Cozens, Esimon@cpan.orgE
101              
102             =head1 COPYRIGHT AND LICENSE
103              
104             Copyright (C) 2004 by Simon Cozens
105              
106             This library is free software; you can redistribute it and/or modify
107             it under the same terms as Perl itself.
108              
109             =cut