File Coverage

lib/URI/Title.pm
Criterion Covered Total %
statement 81 125 64.8
branch 15 54 27.7
condition 9 25 36.0
subroutine 15 16 93.7
pod 1 1 100.0
total 121 221 54.7


line stmt bran cond sub pod time code
1             package URI::Title;
2             $URI::Title::VERSION = '1.902';
3 4     4   218824 use 5.006;
  4         34  
4 4     4   18 use warnings;
  4         6  
  4         108  
5 4     4   19 use strict;
  4         12  
  4         104  
6              
7 4     4   22 use base qw(Exporter);
  4         7  
  4         579  
8             our @EXPORT_OK = qw( title );
9              
10 4     4   1572 use Module::Pluggable (search_path => ['URI::Title'], require => 1 );
  4         40246  
  4         24  
11 4     4   2237 use File::Type;
  4         60036  
  4         162  
12              
13 4     4   2575 use LWP::UserAgent;
  4         157430  
  4         132  
14 4     4   30 use HTTP::Request;
  4         8  
  4         90  
15 4     4   18 use HTTP::Response;
  4         7  
  4         2551  
16              
17              
18             sub _ua {
19 4     4   33 my $ua = LWP::UserAgent->new;
20 4         7701 $ua->agent("URI::Title/$URI::Title::VERSION");
21 4         221 $ua->timeout(20);
22 4         63 $ua->default_header('Accept-Encoding' => 'gzip');
23 4         215 return $ua;
24             }
25              
26             sub _get_limited {
27 3     3   6 my $url = shift;
28 3   50     15 my $size = shift || 32*1024;
29 3         9 my $ua = _ua();
30 3         13 $ua->max_size($size);
31 3         43 my $req = HTTP::Request->new(GET => $url);
32 3         16318 $req->header( Range => "bytes=0-$size" );
33 3         243 $req->header( "Accept-Encoding" => "" ); # vox sends invalid gzipped data?
34 3         118 my $res = eval { $ua->request($req) };
  3         19  
35 3 50       643631 return unless $res; # useragent explodes for non-valid uris
36              
37             # some servers don't like the Range header. If we
38             # get an odd 4xx response that isn't 404, just try getting
39             # the full thing. This may be a little impolite.
40 3 50 66     8 return _get_all($url) if $res->code >= 400 and $res->code < 500 and $res->code != 404;
      66        
41 2 50       28 return unless $res->is_success;
42 2 50       28 if (!wantarray) {
43 0   0     0 return $res->decoded_content || $res->content;
44             }
45 2         4 my $cset = "iso-8859-1"; # default;
46 2         6 my $ct = $res->header("Content-type");
47 2 50       79 if ($ct =~ /charset\s*=\>?\s*\"?([\w-]+)/i) {
48 0         0 $cset = lc($1);
49             #warn "Got charset $cset from URI headers\n";
50             }
51 2   33     14 return ($res->decoded_content || $res->content, $cset);
52             }
53              
54             sub _get_end {
55 0     0   0 my $url = shift;
56 0   0     0 my $size = shift || 16*1024;
57              
58 0         0 my $ua = _ua();
59              
60 0         0 my $request = HTTP::Request->new(HEAD => $url);
61 0         0 my $response = $ua->request($request);
62 0 0       0 return unless $response; # useragent explodes for non-valid uris
63 0         0 my $length = $response->header('Content-Length');
64              
65 0 0       0 return unless $length; # We can't get the length, and we're _not_
66             # going to get the whole thing.
67              
68 0         0 my $start = $length - $size;
69              
70 0         0 $ua->max_size($size);
71              
72 0         0 my $req = HTTP::Request->new(GET => $url);
73 0         0 $req->header( Range => "bytes=$start-$length" );
74 0         0 my $res = $ua->request($req);
75 0 0       0 return unless $res; # useragent explodes for non-valid uris
76              
77 0 0       0 return unless $res->is_success;
78 0 0       0 return $res->decoded_content unless wantarray;
79 0         0 my $cset = "iso-8859-1"; # default;
80 0         0 my $ct = $res->header("Content-type");
81 0 0       0 if ($ct =~ /charset=\"?(.*)\"?$/) {
82 0         0 $cset = $1;
83             }
84 0         0 return ($res->decoded_content, $cset);
85             }
86              
87             sub _get_all {
88 1     1   31 my $url = shift;
89 1         3 my $ua = _ua();
90 1         3 my $req = HTTP::Request->new(GET => $url);
91 1         73 my $res = $ua->request($req);
92 1 50       350 return unless $res->is_success;
93 0 0       0 return $res->decoded_content unless wantarray;
94 0         0 my $cset = "iso-8859-1"; # default;
95 0         0 my $ct = $res->header("Content-type");
96 0 0       0 if ($ct =~ /charset=\"?(.*)\"?$/) {
97 0         0 $cset = $1;
98             }
99 0         0 return ($res->decoded_content, $cset);
100             }
101              
102             # cache
103             our $HANDLERS;
104             sub _handlers {
105 2     2   11 my @plugins = plugins();
106 2 50       10115 return $HANDLERS if $HANDLERS;
107 2         5 for my $plugin (@plugins) {
108 8         45 for my $type ($plugin->types) {
109 18         35 $HANDLERS->{$type} = $plugin;
110             }
111             }
112 2         6 return $HANDLERS;
113             }
114              
115             sub title {
116 3     3 1 242204 my $param = shift;
117 3         46 my $data;
118             my $url;
119 3         0 my $type;
120 3         6 my $cset = "iso-8859-1"; # default
121              
122             # we can be passed a hashref. Keys are url, or data.
123 3 50       25 if (ref($param)) {
124 0 0       0 if ($param->{data}) {
    0          
125 0         0 $data = $param->{data};
126 0 0       0 $data = $$data if ref($data); # we can be passed a ref to the data
127             } elsif ($param->{url}) {
128 0         0 $url = $param->{url};
129             } else {
130 4     4   26 use Carp qw(croak);
  4         7  
  4         1147  
131 0         0 croak("Expected a single parameter, or an 'url' or 'data' key");
132             }
133              
134             # otherwise, assume we're passed an url
135             } else {
136 3         5 $url = $param;
137             }
138              
139 3 0 33     9 if (!$url and !$data) {
140 0         0 warn "Need at least an url or data";
141 0         0 return;
142             }
143              
144             # If we don't have data, we will have an url, so try to get data.
145 3 50       8 if (!$data) {
146             # url might be a filename
147 3 50       73 if (-e $url) {
148 0         0 local $/ = undef;
149 0 0       0 unless (open DATA, $url) {
150 0         0 warn "$url looks like a file and isn't";
151 0         0 return;
152             }
153 0         0 $data = ;
154 0         0 close DATA;
155            
156             # If not, assume it's an url
157             } else {
158             # special case for itms
159 3 50       11 if ($url =~ s/^itms:/http:/) {
160 0         0 $type = "itms";
161 0         0 $data = 1; # we don't need it, fake it.
162              
163             } else {
164 3         8 $url =~ s{#!}{?_escaped_fragment_=};
165              
166 3         11 ($data, $cset) = _get_limited($url);
167             }
168             }
169             }
170 3 100       18423 if (!$data) {
171             #warn "Can't get content for $url";
172 1         4 return;
173             }
174              
175 2 50       7 return undef unless $data;
176              
177 2   33     25 $type ||= File::Type->new->checktype_contents($data);
178              
179 2         2023 my $handlers = _handlers();
180             my $handler = $handlers->{$type} || $handlers->{default}
181 2 50 33     13 or return;
182              
183 2         9 return $handler->title($url, $data, $type, $cset);
184             }
185              
186             1;
187              
188             __END__