File Coverage

blib/lib/SoggyOnion/Plugin/ImageScraper.pm
Criterion Covered Total %
statement 27 62 43.5
branch 0 24 0.0
condition 0 10 0.0
subroutine 9 12 75.0
pod 3 3 100.0
total 39 111 35.1


line stmt bran cond sub pod time code
1             package SoggyOnion::Plugin::ImageScraper;
2 1     1   1870 use warnings;
  1         3  
  1         53  
3 1     1   9 use strict;
  1         3  
  1         68  
4 1     1   9 use base qw( SoggyOnion::Plugin );
  1         3  
  1         178  
5              
6             our $VERSION = '0.04';
7              
8 1     1   7 use Template;
  1         2  
  1         39  
9 1     1   6 use constant TEMPLATE_FILE => 'imagescraper.tt2';
  1         2  
  1         91  
10              
11 1     1   8 use LWP::Simple qw(get head $ua);
  1         3  
  1         11  
12 1     1   135 use constant MOD_TIME => 2;
  1         1  
  1         53  
13              
14 1     1   4947 use HTML::TokeParser;
  1         25223  
  1         59  
15 1     1   11 use constant { TYPE => 0, TAG => 1, ATTR => 2 };
  1         2  
  1         914  
16              
17             # set our useragent to be nice
18             sub init {
19 0     0 1   $ua->agent( SoggyOnion->useragent );
20             }
21              
22             # try getting the modification time of the RSS feed from the web server.
23             # if we can't, just return the current time to make sure the feed is
24             # processed.
25             sub mod_time {
26 0     0 1   my $self = shift;
27 0           my $mtime = [ head( $self->{rss} ) ]->[MOD_TIME];
28 0   0       return $mtime || time; # in case no modification time is available
29             }
30              
31             sub content {
32 0     0 1   my $self = shift;
33              
34             # error checking for required options
35 0 0         die "'images' attribute is required\n"
36             unless ( exists $self->{images} );
37              
38             # setup defaults for other options
39 0   0       $self->{offset} ||= 0;
40 0   0       $self->{limit} ||= 1;
41              
42             # get the URL
43 0           my $document = get( $self->{images} );
44 0 0         die "couldn't get document" unless defined $document;
45              
46             # cheap way of getting title! FIXME
47             # URI::Title doesn't do much more anyway
48 0           $document =~ m#(.+?)#si;
49 0           my $title = $1;
50              
51             # process links
52 0 0         my $parser = HTML::TokeParser->new( \$document ) or die $!;
53 0           my $i = 0;
54 0           my @links = ();
55 0           while ( my $token = $parser->get_token ) {
56 0 0         next unless ref $token eq 'ARRAY';
57 0 0 0       next unless $token->[TYPE] eq 'S' && $token->[TAG] eq 'img';
58 0 0         next unless $i++ >= $self->{offset};
59 0           push @links, $token->[ATTR]->{src};
60 0 0         last if @links >= $self->{limit};
61             }
62              
63             # did we specify a prefix in the config? if so, prefix all links
64 0 0         if ( exists $self->{prefix} ) {
65 0           @links = map { $self->{prefix} . $_ } @links;
  0            
66             }
67              
68             # no prefix in the conf? go through and make sure that all our links are
69             # absolute. if they're relative, prepend the source URL
70             else {
71              
72             # determine protocol -- use if double-slash shorthand is used
73 0           $self->{images} =~ m/^(\w+):/;
74 0           my $protocol = $1;
75              
76             # strip connecting slash
77 0           $self->{images} =~ s#/+$##;
78              
79 0           for (@links) {
80              
81             # valid but uncommon URI shorthand
82 0 0         $_ = "$protocol\:$_" if m#^//[^/]#;
83              
84             # strip connecting slashes
85 0           s#^/+##;
86              
87             # prepend relative URIs with our source URI
88 0 0         $_ = $self->{images} . '/' . $_
89             unless m/^\w+:\/\//;
90             }
91             }
92              
93             # run it through our template
94 0 0         my $tt
95             = Template->new( INCLUDE_PATH => SoggyOnion->options->{templatedir} )
96             or die "couldn't create Template object\n";
97 0           my $output;
98 0 0         $tt->process( TEMPLATE_FILE,
99             { links => \@links, src => $self->{images}, title => $title },
100             \$output )
101             or die $tt->error;
102 0           return $output;
103             }
104              
105             1;
106              
107             __END__