File Coverage

blib/lib/XML/RSS/Aggregate.pm
Criterion Covered Total %
statement 4 6 66.6
branch n/a
condition n/a
subroutine 2 2 100.0
pod n/a
total 6 8 75.0


line stmt bran cond sub pod time code
1             # $File: //member/autrijus/XML-RSS-Aggregate/lib/XML/RSS/Aggregate.pm $ $Author: autrijus $
2             # $Revision: #4 $ $Change: 2924 $ $DateTime: 2002/12/25 15:04:33 $
3              
4             package XML::RSS::Aggregate;
5             $XML::RSS::Aggregate::VERSION = '0.02';
6              
7 1     1   700 use strict;
  1         3  
  1         37  
8 1     1   439 use XML::RSS;
  0            
  0            
9             use base 'XML::RSS';
10              
11             use Date::Parse;
12             use LWP::Simple 'get';
13             use HTML::Entities 'encode_entities';
14              
15             =head1 NAME
16              
17             XML::RSS::Aggregate - RSS Aggregator
18              
19             =head1 SYNOPSIS
20              
21             my $rss = XML::RSS::Aggregate->new(
22             # parameters for XML::RSS->channel()
23             title => 'Aggregated Examples',
24             link => 'http://blog.elixus.org/',
25              
26             # parameters for XML::RSS::Aggregate->aggregate()
27             sources => [ qw(
28             http://one.example.com/index.rdf
29             http://another.example.com/index.rdf
30             http://etc.example.com/index.rdf
31             ) ],
32             sort_by => sub {
33             $_[0]->{dc}{subject} # default to sort by dc:date
34             },
35             uniq_by => sub {
36             $_[0]->{title} # default to uniq by link
37             }
38             );
39              
40             $rss->aggregate( sources => [ ... ] ); # more items
41             $rss->save("all.rdf");
42              
43             =head1 DESCRIPTION
44              
45             This module implements a subclass of B, adding a single
46             C method that fetches other RSS feeds and add to the object
47             itself. It handles the proper ordering and duplication removal for
48             aggregated links.
49              
50             Also, the constructor C is modified to take arguments to pass
51             implicitly to C and C methods.
52              
53             All the base methods are still applicable to this module; please see
54             L for details.
55              
56             =head1 METHODS
57              
58             =over 4
59              
60             =item aggregate (sources=>\@url, sort_by=>\&func, uniq_by=>\&func)
61              
62             This method fetches all RSS feeds listed in C<@url> and pass their
63             items to the object's C.
64              
65             The optional C argument specifies the function to use for
66             ordering RSS items; it defaults to sort them by their C<{dc}{date}>
67             attribute (converted to absolute timestamps), with ties broken by
68             their C<{link}> attribute.
69              
70             The optional C argument specifies the function to use for
71             removing duplicate RSS items; it defaults to remove items that has
72             the same C<{link}> value.
73              
74             =back
75              
76             =cut
77              
78             sub new {
79             my ($class, %args) = @_;
80              
81             my $version = delete($args{version}) || '1.0';
82             my $self = $class->SUPER::new( version => $version );
83              
84             my $sources = delete($args{sources});
85             my $sort_by = delete($args{sort_by});
86              
87             $self->channel(%args) if %args;
88             $self->aggregate(
89             sources => $sources,
90             sort_by => $sort_by,
91             ) if $sources;
92              
93             return $self;
94             }
95              
96             sub aggregate {
97             my ($self, %args) = @_;
98              
99             my $sources = $args{sources} or return;
100             my $sort_by = $args{sort_by} || sub {
101             my $date = $_[0]->{dc}{date};
102             $date =~ s/:(\d\d)$/$1/ if $date;
103             sprintf("%20s", str2time($date)).$_[0]->{link}
104             };
105             my $uniq_by = $args{uniq_by} || sub {
106             $_[0]->{link}
107             };
108              
109             my $old_items = $self->{items} || [];
110             $self->{items} = [];
111              
112             my %saw;
113             $self->add_item(%{$_->[0]}) for
114             sort { $b->[1] cmp $a->[1] }
115             grep { $_->[1] }
116             map { [ $_ => scalar($sort_by->($_)) ] }
117             grep { !$saw{$uniq_by->($_)}++ } @{$old_items},
118             map { encode_entities($_, '&<>') for grep {!ref($_)} values %{$_}; $_ }
119             map { encode_entities($_, '&<>') for grep {!ref($_)} values %{$_->{dc}}; $_ }
120             map { encode_entities($_, '&<>') for grep {!ref($_)} values %{$_->{syn}}; $_ }
121             map { encode_entities($_, '&<>') for grep {!ref($_)} @{$_->{taxo}}; $_ }
122             map { eval { (my $rss = XML::RSS->new)->parse(get($_)); @{$rss->{items}} } }
123             grep { /^\w+:/ } @{$sources};
124              
125             return $self;
126             }
127              
128             1;
129              
130             =head1 SEE ALSO
131              
132             L
133              
134             =head1 AUTHORS
135              
136             Autrijus Tang Eautrijus@autrijus.orgE
137              
138             =head1 COPYRIGHT
139              
140             Copyright 2002 by Autrijus Tang Eautrijus@autrijus.orgE.
141              
142             This program is free software; you can redistribute it and/or modify it
143             under the same terms as Perl itself.
144              
145             See L
146              
147             =cut
148              
149             __END__