| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package SoggyOnion::Plugin::ImageScraper; |
|
2
|
1
|
|
|
1
|
|
1870
|
use warnings; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
53
|
|
|
3
|
1
|
|
|
1
|
|
9
|
use strict; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
68
|
|
|
4
|
1
|
|
|
1
|
|
9
|
use base qw( SoggyOnion::Plugin ); |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
178
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
our $VERSION = '0.04'; |
|
7
|
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
7
|
use Template; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
39
|
|
|
9
|
1
|
|
|
1
|
|
6
|
use constant TEMPLATE_FILE => 'imagescraper.tt2'; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
91
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
8
|
use LWP::Simple qw(get head $ua); |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
11
|
|
|
12
|
1
|
|
|
1
|
|
135
|
use constant MOD_TIME => 2; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
53
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
1
|
|
|
1
|
|
4947
|
use HTML::TokeParser; |
|
|
1
|
|
|
|
|
25223
|
|
|
|
1
|
|
|
|
|
59
|
|
|
15
|
1
|
|
|
1
|
|
11
|
use constant { TYPE => 0, TAG => 1, ATTR => 2 }; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
914
|
|
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# set our useragent to be nice |
|
18
|
|
|
|
|
|
|
sub init { |
|
19
|
0
|
|
|
0
|
1
|
|
$ua->agent( SoggyOnion->useragent ); |
|
20
|
|
|
|
|
|
|
} |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# try getting the modification time of the RSS feed from the web server. |
|
23
|
|
|
|
|
|
|
# if we can't, just return the current time to make sure the feed is |
|
24
|
|
|
|
|
|
|
# processed. |
|
25
|
|
|
|
|
|
|
sub mod_time { |
|
26
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
27
|
0
|
|
|
|
|
|
my $mtime = [ head( $self->{rss} ) ]->[MOD_TIME]; |
|
28
|
0
|
|
0
|
|
|
|
return $mtime || time; # in case no modification time is available |
|
29
|
|
|
|
|
|
|
} |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub content { |
|
32
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# error checking for required options |
|
35
|
0
|
0
|
|
|
|
|
die "'images' attribute is required\n" |
|
36
|
|
|
|
|
|
|
unless ( exists $self->{images} ); |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# setup defaults for other options |
|
39
|
0
|
|
0
|
|
|
|
$self->{offset} ||= 0; |
|
40
|
0
|
|
0
|
|
|
|
$self->{limit} ||= 1; |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# get the URL |
|
43
|
0
|
|
|
|
|
|
my $document = get( $self->{images} ); |
|
44
|
0
|
0
|
|
|
|
|
die "couldn't get document" unless defined $document; |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# cheap way of getting title! FIXME |
|
47
|
|
|
|
|
|
|
# URI::Title doesn't do much more anyway |
|
48
|
0
|
|
|
|
|
|
$document =~ m#(.+?)#si; |
|
49
|
0
|
|
|
|
|
|
my $title = $1; |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# process links |
|
52
|
0
|
0
|
|
|
|
|
my $parser = HTML::TokeParser->new( \$document ) or die $!; |
|
53
|
0
|
|
|
|
|
|
my $i = 0; |
|
54
|
0
|
|
|
|
|
|
my @links = (); |
|
55
|
0
|
|
|
|
|
|
while ( my $token = $parser->get_token ) { |
|
56
|
0
|
0
|
|
|
|
|
next unless ref $token eq 'ARRAY'; |
|
57
|
0
|
0
|
0
|
|
|
|
next unless $token->[TYPE] eq 'S' && $token->[TAG] eq 'img'; |
|
58
|
0
|
0
|
|
|
|
|
next unless $i++ >= $self->{offset}; |
|
59
|
0
|
|
|
|
|
|
push @links, $token->[ATTR]->{src}; |
|
60
|
0
|
0
|
|
|
|
|
last if @links >= $self->{limit}; |
|
61
|
|
|
|
|
|
|
} |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# did we specify a prefix in the config? if so, prefix all links |
|
64
|
0
|
0
|
|
|
|
|
if ( exists $self->{prefix} ) { |
|
65
|
0
|
|
|
|
|
|
@links = map { $self->{prefix} . $_ } @links; |
|
|
0
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
} |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# no prefix in the conf? go through and make sure that all our links are |
|
69
|
|
|
|
|
|
|
# absolute. if they're relative, prepend the source URL |
|
70
|
|
|
|
|
|
|
else { |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# determine protocol -- use if double-slash shorthand is used |
|
73
|
0
|
|
|
|
|
|
$self->{images} =~ m/^(\w+):/; |
|
74
|
0
|
|
|
|
|
|
my $protocol = $1; |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# strip connecting slash |
|
77
|
0
|
|
|
|
|
|
$self->{images} =~ s#/+$##; |
|
78
|
|
|
|
|
|
|
|
|
79
|
0
|
|
|
|
|
|
for (@links) { |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# valid but uncommon URI shorthand |
|
82
|
0
|
0
|
|
|
|
|
$_ = "$protocol\:$_" if m#^//[^/]#; |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
# strip connecting slashes |
|
85
|
0
|
|
|
|
|
|
s#^/+##; |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# prepend relative URIs with our source URI |
|
88
|
0
|
0
|
|
|
|
|
$_ = $self->{images} . '/' . $_ |
|
89
|
|
|
|
|
|
|
unless m/^\w+:\/\//; |
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
} |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# run it through our template |
|
94
|
0
|
0
|
|
|
|
|
my $tt |
|
95
|
|
|
|
|
|
|
= Template->new( INCLUDE_PATH => SoggyOnion->options->{templatedir} ) |
|
96
|
|
|
|
|
|
|
or die "couldn't create Template object\n"; |
|
97
|
0
|
|
|
|
|
|
my $output; |
|
98
|
0
|
0
|
|
|
|
|
$tt->process( TEMPLATE_FILE, |
|
99
|
|
|
|
|
|
|
{ links => \@links, src => $self->{images}, title => $title }, |
|
100
|
|
|
|
|
|
|
\$output ) |
|
101
|
|
|
|
|
|
|
or die $tt->error; |
|
102
|
0
|
|
|
|
|
|
return $output; |
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
1; |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
__END__ |