File Coverage

lib/URI/Title.pm
Criterion Covered Total %
statement 85 125 68.0
branch 17 54 31.4
condition 9 25 36.0
subroutine 15 16 93.7
pod 1 1 100.0
total 127 221 57.4


line stmt bran cond sub pod time code
1             package URI::Title;
2             $URI::Title::VERSION = '1.904';
3 4     4   206944 use 5.006;
  4         39  
4 4     4   21 use warnings;
  4         7  
  4         108  
5 4     4   21 use strict;
  4         4  
  4         102  
6              
7 4     4   20 use base qw(Exporter);
  4         5  
  4         651  
8             our @EXPORT_OK = qw( title );
9              
10 4     4   1890 use Module::Pluggable (search_path => ['URI::Title'], require => 1 );
  4         43137  
  4         29  
11 4     4   2628 use File::Type;
  4         71009  
  4         195  
12              
13 4     4   2978 use LWP::UserAgent;
  4         209086  
  4         155  
14 4     4   39 use HTTP::Request;
  4         9  
  4         95  
15 4     4   23 use HTTP::Response;
  4         6  
  4         3211  
16              
17              
18             sub _ua {
19 3     3   28 my $ua = LWP::UserAgent->new;
20 3         6238 $ua->agent("URI::Title/$URI::Title::VERSION");
21 3         205 $ua->timeout(20);
22 3         56 $ua->default_header('Accept-Encoding' => 'gzip');
23 3         144 return $ua;
24             }
25              
26             sub _get_limited {
27 2     2   4 my $url = shift;
28 2   50     13 my $size = shift || 32*1024;
29 2         6 my $ua = _ua();
30 2         10 $ua->max_size($size);
31 2         33 my $req = HTTP::Request->new(GET => $url);
32 2         12110 $req->header( Range => "bytes=0-$size" );
33 2         268 $req->header( "Accept-Encoding" => "" ); # vox sends invalid gzipped data?
34 2         98 my $res = eval { $ua->request($req) };
  2         18  
35 2 50       388950 return unless $res; # useragent explodes for non-valid uris
36              
37             # some servers don't like the Range header. If we
38             # get an odd 4xx response that isn't 404, just try getting
39             # the full thing. This may be a little impolite.
40 2 50 66     9 return _get_all($url) if $res->code >= 400 and $res->code < 500 and $res->code != 404;
      66        
41 1 50       20 return unless $res->is_success;
42 1 50       18 if (!wantarray) {
43 0   0     0 return $res->decoded_content || $res->content;
44             }
45 1         4 my $cset = "iso-8859-1"; # default;
46 1         3 my $ct = $res->header("Content-type");
47 1 50       61 if ($ct =~ /charset\s*=\>?\s*\"?([\w-]+)/i) {
48 0         0 $cset = lc($1);
49             #warn "Got charset $cset from URI headers\n";
50             }
51 1   33     7 return ($res->decoded_content || $res->content, $cset);
52             }
53              
54             sub _get_end {
55 0     0   0 my $url = shift;
56 0   0     0 my $size = shift || 16*1024;
57              
58 0         0 my $ua = _ua();
59              
60 0         0 my $request = HTTP::Request->new(HEAD => $url);
61 0         0 my $response = $ua->request($request);
62 0 0       0 return unless $response; # useragent explodes for non-valid uris
63 0         0 my $length = $response->header('Content-Length');
64              
65 0 0       0 return unless $length; # We can't get the length, and we're _not_
66             # going to get the whole thing.
67              
68 0         0 my $start = $length - $size;
69              
70 0         0 $ua->max_size($size);
71              
72 0         0 my $req = HTTP::Request->new(GET => $url);
73 0         0 $req->header( Range => "bytes=$start-$length" );
74 0         0 my $res = $ua->request($req);
75 0 0       0 return unless $res; # useragent explodes for non-valid uris
76              
77 0 0       0 return unless $res->is_success;
78 0 0       0 return $res->decoded_content unless wantarray;
79 0         0 my $cset = "iso-8859-1"; # default;
80 0         0 my $ct = $res->header("Content-type");
81 0 0       0 if ($ct =~ /charset=\"?(.*)\"?$/) {
82 0         0 $cset = $1;
83             }
84 0         0 return ($res->decoded_content, $cset);
85             }
86              
87             sub _get_all {
88 1     1   46 my $url = shift;
89 1         3 my $ua = _ua();
90 1         4 my $req = HTTP::Request->new(GET => $url);
91 1         98 my $res = $ua->request($req);
92 1 50       460 return unless $res->is_success;
93 0 0       0 return $res->decoded_content unless wantarray;
94 0         0 my $cset = "iso-8859-1"; # default;
95 0         0 my $ct = $res->header("Content-type");
96 0 0       0 if ($ct =~ /charset=\"?(.*)\"?$/) {
97 0         0 $cset = $1;
98             }
99 0         0 return ($res->decoded_content, $cset);
100             }
101              
102             # cache
103             our $HANDLERS;
104             sub _handlers {
105 2     2   10 my @plugins = plugins();
106 2 50       8578 return $HANDLERS if $HANDLERS;
107 2         7 for my $plugin (@plugins) {
108 8         48 for my $type ($plugin->types) {
109 18         42 $HANDLERS->{$type} = $plugin;
110             }
111             }
112 2         8 return $HANDLERS;
113             }
114              
115             sub title {
116 3     3 1 53678 my $param = shift;
117 3         14 my $data;
118             my $url;
119 3         0 my $type;
120 3         6 my $cset = "iso-8859-1"; # default
121              
122             # we can be passed a hashref. Keys are url, or data.
123 3 50       31 if (ref($param)) {
124 0 0       0 if ($param->{data}) {
    0          
125 0         0 $data = $param->{data};
126 0 0       0 $data = $$data if ref($data); # we can be passed a ref to the data
127             } elsif ($param->{url}) {
128 0         0 $url = $param->{url};
129             } else {
130 4     4   31 use Carp qw(croak);
  4         9  
  4         1375  
131 0         0 croak("Expected a single parameter, or an 'url' or 'data' key");
132             }
133              
134             # otherwise, assume we're passed an url
135             } else {
136 3         6 $url = $param;
137             }
138              
139 3 0 33     12 if (!$url and !$data) {
140 0         0 warn "Need at least an url or data";
141 0         0 return;
142             }
143              
144             # If we don't have data, we will have an url, so try to get data.
145 3 50       8 if (!$data) {
146             # url might be a filename
147 3 100       98 if (-e $url) {
148 1         6 local $/ = undef;
149 1 50       35 unless (open DATA, $url) {
150 0         0 warn "$url looks like a file and isn't";
151 0         0 return;
152             }
153 1         63 $data = ;
154 1         16 close DATA;
155              
156             # If not, assume it's an url
157             } else {
158             # special case for itms
159 2 50       14 if ($url =~ s/^itms:/http:/) {
160 0         0 $type = "itms";
161 0         0 $data = 1; # we don't need it, fake it.
162              
163             } else {
164 2         6 $url =~ s{#!}{?_escaped_fragment_=};
165              
166 2         8 ($data, $cset) = _get_limited($url);
167             }
168             }
169             }
170 3 100       19159 if (!$data) {
171             #warn "Can't get content for $url";
172 1         6 return;
173             }
174              
175 2 50       7 return undef unless $data;
176              
177 2   33     27 $type ||= File::Type->new->checktype_contents($data);
178              
179 2         1872 my $handlers = _handlers();
180             my $handler = $handlers->{$type} || $handlers->{default}
181 2 50 33     16 or return;
182              
183 2         15 return $handler->title($url, $data, $type, $cset);
184             }
185              
186             1;
187              
188             __END__