| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Feed::Find; | 
| 2 | 3 |  |  | 3 |  | 203878 | use strict; | 
|  | 3 |  |  |  |  | 49 |  | 
|  | 3 |  |  |  |  | 95 |  | 
| 3 | 3 |  |  | 3 |  | 16 | use warnings; | 
|  | 3 |  |  |  |  | 5 |  | 
|  | 3 |  |  |  |  | 91 |  | 
| 4 | 3 |  |  | 3 |  | 58 | use 5.008_001; | 
|  | 3 |  |  |  |  | 9 |  | 
| 5 |  |  |  |  |  |  |  | 
| 6 | 3 |  |  | 3 |  | 15 | use base qw( Class::ErrorHandler ); | 
|  | 3 |  |  |  |  | 12 |  | 
|  | 3 |  |  |  |  | 1659 |  | 
| 7 | 3 |  |  | 3 |  | 2211 | use LWP::UserAgent; | 
|  | 3 |  |  |  |  | 94376 |  | 
|  | 3 |  |  |  |  | 120 |  | 
| 8 | 3 |  |  | 3 |  | 2074 | use HTML::Parser; | 
|  | 3 |  |  |  |  | 16978 |  | 
|  | 3 |  |  |  |  | 195 |  | 
| 9 | 3 |  |  | 3 |  | 34 | use URI; | 
|  | 3 |  |  |  |  | 6 |  | 
|  | 3 |  |  |  |  | 96 |  | 
| 10 |  |  |  |  |  |  |  | 
| 11 | 3 |  |  | 3 |  | 17 | use vars qw( $VERSION $ua ); | 
|  | 3 |  |  |  |  | 5 |  | 
|  | 3 |  |  |  |  | 224 |  | 
| 12 |  |  |  |  |  |  | $VERSION = '0.11'; | 
| 13 |  |  |  |  |  |  |  | 
| 14 | 3 |  |  |  |  | 2663 | use constant FEED_MIME_TYPES => [ | 
| 15 |  |  |  |  |  |  | 'application/x.atom+xml', | 
| 16 |  |  |  |  |  |  | 'application/atom+xml', | 
| 17 |  |  |  |  |  |  | 'application/xml', | 
| 18 |  |  |  |  |  |  | 'text/xml', | 
| 19 |  |  |  |  |  |  | 'application/rss+xml', | 
| 20 |  |  |  |  |  |  | 'application/rdf+xml', | 
| 21 | 3 |  |  | 3 |  | 17 | ]; | 
|  | 3 |  |  |  |  | 5 |  | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | our $FEED_EXT = qr/\.(?:rss|xml|rdf|atom)$/; | 
| 24 |  |  |  |  |  |  | our %IsFeed = map { $_ => 1 } @{ FEED_MIME_TYPES() }; | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | sub find { | 
| 27 | 4 |  |  | 4 | 1 | 6179 | my $class = shift; | 
| 28 | 4 |  |  |  |  | 21 | my($uri) = @_; | 
| 29 | 4 | 100 |  |  |  | 22 | $ua = LWP::UserAgent->new unless defined $ua; | 
| 30 | 4 |  |  |  |  | 2777 | $ua->env_proxy; | 
| 31 | 4 |  |  |  |  | 34198 | $ua->agent(join '/', $class, $class->VERSION); | 
| 32 | 4 |  |  |  |  | 356 | $ua->parse_head(0);   ## We're already basically doing this ourselves. | 
| 33 | 4 |  |  |  |  | 313 | my $req = HTTP::Request->new(GET => $uri); | 
| 34 | 4 |  |  |  |  | 12927 | my $p = HTML::Parser->new(api_version => 3, | 
| 35 |  |  |  |  |  |  | start_h => [ \&_find_links, 'self,tagname,attr' ]); | 
| 36 | 4 |  |  |  |  | 218 | $p->{base_uri} = $uri; | 
| 37 | 4 |  |  |  |  | 11 | $p->{feeds} = []; | 
| 38 |  |  |  |  |  |  | my $res = $ua->request($req, sub { | 
| 39 | 7 |  |  | 7 |  | 23983 | my($chunk, $res, $proto) = @_; | 
| 40 | 7 | 50 |  |  |  | 44 | if ($IsFeed{$res->content_type}) { | 
| 41 | 0 |  |  |  |  | 0 | push @{ $p->{feeds} }, $uri; | 
|  | 0 |  |  |  |  | 0 |  | 
| 42 | 0 |  |  |  |  | 0 | die "Done parsing"; | 
| 43 |  |  |  |  |  |  | } | 
| 44 | 7 | 100 |  |  |  | 363 | $p->parse($chunk) or die "Done parsing"; | 
| 45 | 4 |  |  |  |  | 49 | }); | 
| 46 | 4 | 50 |  |  |  | 1241 | return $class->error($res->status_line) unless $res->is_success; | 
| 47 | 4 |  |  |  |  | 46 | @{ $p->{feeds} }; | 
|  | 4 |  |  |  |  | 86 |  | 
| 48 |  |  |  |  |  |  | } | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | sub find_in_html { | 
| 51 | 2 |  |  | 2 | 1 | 2471 | my $class = shift; | 
| 52 | 2 |  |  |  |  | 5 | my($html, $base_uri) = @_; | 
| 53 | 2 |  |  |  |  | 20 | my $p = HTML::Parser->new(api_version => 3, | 
| 54 |  |  |  |  |  |  | start_h => [ \&_find_links, 'self,tagname,attr' ]); | 
| 55 | 2 |  |  |  |  | 98 | $p->{base_uri} = $base_uri; | 
| 56 | 2 |  |  |  |  | 5 | $p->{feeds} = []; | 
| 57 | 2 |  |  |  |  | 22 | $p->parse($$html); | 
| 58 | 2 |  |  |  |  | 4 | @{ $p->{feeds} }; | 
|  | 2 |  |  |  |  | 12 |  | 
| 59 |  |  |  |  |  |  | } | 
| 60 |  |  |  |  |  |  |  | 
| 61 |  |  |  |  |  |  | sub _find_links { | 
| 62 | 294 |  |  | 294 |  | 4615 | my($p, $tag, $attr) = @_; | 
| 63 | 294 |  |  |  |  | 335 | my $base_uri = $p->{base_uri}; | 
| 64 | 294 | 100 |  |  |  | 1189 | if ($tag eq 'link') { | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 65 | 114 | 50 |  |  |  | 171 | return unless $attr->{rel}; | 
| 66 | 114 |  |  |  |  | 232 | my %rel = map { $_ => 1 } split /\s+/, lc($attr->{rel}); | 
|  | 114 |  |  |  |  | 270 |  | 
| 67 | 114 |  |  |  |  | 216 | my $type = ''; | 
| 68 | 114 | 100 |  |  |  | 173 | if ($attr->{type}) { | 
| 69 | 51 |  |  |  |  | 151 | ($type = lc $attr->{type}) =~ s/^\s*//; | 
| 70 | 51 |  |  |  |  | 168 | $type =~ s/\s*$//; | 
| 71 |  |  |  |  |  |  | } | 
| 72 | 3 |  |  |  |  | 29 | push @{ $p->{feeds} }, URI->new_abs($attr->{href}, $base_uri)->as_string | 
| 73 |  |  |  |  |  |  | if $IsFeed{$type} && | 
| 74 | 114 | 50 | 33 |  |  | 702 | ($rel{alternate} || $rel{'service.feed'}); | 
|  |  |  | 66 |  |  |  |  | 
| 75 |  |  |  |  |  |  | } elsif ($tag eq 'base') { | 
| 76 | 0 | 0 |  |  |  | 0 | $p->{base_uri} = $attr->{href} if $attr->{href}; | 
| 77 |  |  |  |  |  |  | } elsif ($tag =~ /^(?:meta|isindex|title|script|style|head|html)$/) { | 
| 78 |  |  |  |  |  |  | ## Ignore other valid tags inside of . | 
| 79 |  |  |  |  |  |  | } elsif ($tag eq 'a') { | 
| 80 | 12 | 50 |  |  |  | 25 | my $href = $attr->{href} or return; | 
| 81 | 12 |  |  |  |  | 34 | my $uri = URI->new($href); | 
| 82 | 12 | 100 |  |  |  | 2571 | push @{ $p->{feeds} }, URI->new_abs($href, $base_uri)->as_string | 
|  | 3 |  |  |  |  | 43 |  | 
| 83 |  |  |  |  |  |  | if $uri->path =~ /$FEED_EXT/io; | 
| 84 |  |  |  |  |  |  | } else { | 
| 85 |  |  |  |  |  |  | ## Anything else indicates the start of the , | 
| 86 |  |  |  |  |  |  | ## so we stop parsing. | 
| 87 | 45 | 100 |  |  |  | 48 | $p->eof if @{ $p->{feeds} }; | 
|  | 45 |  |  |  |  | 274 |  | 
| 88 |  |  |  |  |  |  | } | 
| 89 |  |  |  |  |  |  | } | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | 1; | 
| 92 |  |  |  |  |  |  | __END__ |