File Coverage

blib/lib/HTTP/CookieJar.pm
Criterion Covered Total %
statement 155 157 98.7
branch 91 110 82.7
condition 28 41 68.2
subroutine 19 19 100.0
pod 7 7 100.0
total 300 334 89.8


line stmt bran cond sub pod time code
1 7     7   420968 use 5.008001;
  7         73  
2 7     7   60 use strict;
  7         13  
  7         135  
3 7     7   29 use warnings;
  7         10  
  7         346  
4              
5             package HTTP::CookieJar;
6             # ABSTRACT: A minimalist HTTP user agent cookie jar
7             our $VERSION = '0.012';
8              
9 7     7   39 use Carp ();
  7         10  
  7         92  
10 7     7   2923 use HTTP::Date ();
  7         25123  
  7         14136  
11              
12             my $HAS_MPS = eval { require Mozilla::PublicSuffix; 1 };
13              
14             #pod =construct new
15             #pod
16             #pod my $jar = HTTP::CookieJar->new;
17             #pod
18             #pod Return a new, empty cookie jar
19             #pod
20             #pod =cut
21              
22             sub new {
23 30     30 1 99990 my ($class) = @_;
24 30         115 bless { store => {} }, $class;
25             }
26              
27             #pod =method add
28             #pod
29             #pod $jar->add(
30             #pod "http://www.example.com/", "lang=en-US; Path=/; Domain=example.com"
31             #pod );
32             #pod
33             #pod Given a request URL and a C header string, attempts to adds the
34             #pod cookie to the jar. If the cookie is expired, instead it deletes any matching
35             #pod cookie from the jar. A C attribute will be converted to an absolute
36             #pod C attribute.
37             #pod
38             #pod It will throw an exception if the request URL is missing or invalid. Returns true if
39             #pod successful cookie processing or undef/empty-list on failure.
40             #pod
41             #pod =cut
42              
43             sub add {
44 41     41 1 259 my ( $self, $request, $cookie ) = @_;
45 41 50 33     187 return unless defined $cookie and length $cookie;
46 41         57 my ( $scheme, $host, $port, $request_path ) = eval { _split_url($request) };
  41         98  
47 41 50       81 Carp::croak($@) if $@;
48              
49 41 100       89 return unless my $parse = _parse_cookie($cookie);
50 40         69 my $name = $parse->{name};
51              
52             # check and normalize domain
53 40 100       72 if ( exists $parse->{domain} ) {
54 16 100       40 _normalize_domain( $host, $parse ) or return;
55             }
56             else {
57 24         40 $parse->{domain} = $host;
58 24         44 $parse->{hostonly} = 1;
59             }
60 36         55 my $domain = $parse->{domain};
61              
62             # normalize path
63 36 100 66     116 if ( !exists $parse->{path} || substr( $parse->{path}, 0, 1 ) ne "/" ) {
64 22         45 $parse->{path} = _default_path($request_path);
65             }
66 36         54 my $path = $parse->{path};
67             # set timestamps and normalize expires
68 36         100 my $now = $parse->{creation_time} = $parse->{last_access_time} = time;
69 36 100       86 if ( exists $parse->{'max-age'} ) {
70             # "If delta-seconds is less than or equal to zero (0), let expiry-time
71             # be the earliest representable date and time."
72             $parse->{expires} = $parse->{'max-age'} <= 0
73             ? 0
74 6 100       22 : $now + $parse->{'max-age'};
75 6         9 delete $parse->{'max-age'};
76             }
77             # update creation time from old cookie, if exists
78 36 100       121 if ( my $old = $self->{store}{$domain}{$path}{$name} ) {
79 4         7 $parse->{creation_time} = $old->{creation_time};
80             }
81             # if cookie has expired, purge any old matching cookie, too
82 36 100 100     103 if ( defined $parse->{expires} && $parse->{expires} < $now ) {
83 4         8 delete $self->{store}{$domain}{$path}{$name};
84             }
85             else {
86 32         70 $self->{store}{$domain}{$path}{$name} = $parse;
87             }
88 36         83 return 1;
89             }
90              
91             #pod =method clear
92             #pod
93             #pod $jar->clear
94             #pod
95             #pod Empties the cookie jar.
96             #pod
97             #pod =cut
98              
99             sub clear {
100 5     5 1 9533 my ($self) = @_;
101 5         32 $self->{store} = {};
102 5         10 return 1;
103             }
104              
105             #pod =method cookies_for
106             #pod
107             #pod my @cookies = $jar->cookies_for("http://www.example.com/foo/bar");
108             #pod
109             #pod Given a request URL, returns a list of hash references representing cookies
110             #pod that should be sent. The hash references are copies -- changing values
111             #pod will not change the cookies in the jar.
112             #pod
113             #pod Cookies set C will only be returned if the request scheme is C.
114             #pod Expired cookies will not be returned.
115             #pod
116             #pod Keys of a cookie hash reference might include:
117             #pod
118             #pod =for :list
119             #pod * name -- the name of the cookie
120             #pod * value -- the value of the cookie
121             #pod * domain -- the domain name to which the cookie applies
122             #pod * path -- the path to which the cookie applies
123             #pod * expires -- if present, when the cookie expires in epoch seconds
124             #pod * secure -- if present, the cookie was set C
125             #pod * httponly -- if present, the cookie was set C
126             #pod * hostonly -- if present, the cookie may only be used with the domain as a host
127             #pod * creation_time -- epoch seconds since the cookie was first stored
128             #pod * last_access_time -- epoch seconds since the cookie was last stored
129             #pod
130             #pod Keep in mind that C means it should only be used in requests and not
131             #pod made available via Javascript, etc. This is pretty meaningless for Perl user
132             #pod agents.
133             #pod
134             #pod Generally, user agents should use the C method instead.
135             #pod
136             #pod It will throw an exception if the request URL is missing or invalid.
137             #pod
138             #pod =cut
139              
140             sub cookies_for {
141 9     9 1 20 my ( $self, $request ) = @_;
142 9         15 my ( $scheme, $host, $port, $request_path ) = eval { _split_url($request) };
  9         14  
143 9 50       18 Carp::croak($@) if $@;
144              
145 9         12 my @found;
146 9         12 my $now = time;
147 9         30 for my $cookie ( $self->_all_cookies ) {
148 14 100 100     49 next if $cookie->{hostonly} && $host ne $cookie->{domain};
149 13 100 100     29 next if $cookie->{secure} && $scheme ne 'https';
150 12 50 66     27 next if defined( $cookie->{expires} ) && $cookie->{expires} < $now;
151 12 50       25 next unless _domain_match( $host, $cookie->{domain} );
152 12 100       23 next unless _path_match( $request_path, $cookie->{path} );
153 11         21 push @found, $cookie;
154             }
155             @found = sort {
156 9         26 length( $b->{path} ) <=> length( $a->{path} )
157             || $a->{creation_time} <=> $b->{creation_time}
158 5 50       19 } @found;
159 9         31 return @found;
160             }
161              
162             #pod =method cookie_header
163             #pod
164             #pod my $header = $jar->cookie_header("http://www.example.com/foo/bar");
165             #pod
166             #pod Given a request URL, returns a correctly-formatted string with all relevant
167             #pod cookies for the request. This string is ready to be used in a C header
168             #pod in an HTTP request. E.g.:
169             #pod
170             #pod SID=31d4d96e407aad42; lang=en-US
171             #pod
172             #pod It follows the same exclusion rules as C.
173             #pod
174             #pod If the request is invalid or no cookies apply, it will return an empty string.
175             #pod
176             #pod =cut
177              
178             sub cookie_header {
179 7     7 1 23 my ( $self, $req ) = @_;
180 7         13 return join( "; ", map { "$_->{name}=$_->{value}" } $self->cookies_for($req) );
  5         27  
181             }
182              
183             #pod =method dump_cookies
184             #pod
185             #pod my @list = $jar->dump_cookies;
186             #pod my @list = $jar->dump_cookies( { persistent => 1 } );
187             #pod
188             #pod Returns a list of raw cookies in string form. The strings resemble what
189             #pod would be received from C headers, but with additional internal
190             #pod fields. The list is only intended for use with C to allow
191             #pod cookie jar persistence.
192             #pod
193             #pod If a hash reference with a true C key is given as an argument,
194             #pod cookies without an C time (i.e. "session cookies") will be omitted.
195             #pod
196             #pod Here is a trivial example of saving a cookie jar file with L:
197             #pod
198             #pod path("jar.txt")->spew( join "\n", $jar->dump_cookies );
199             #pod
200             #pod =cut
201              
202             sub dump_cookies {
203 7     7 1 26 my ( $self, $args ) = @_;
204 7         10 my @list;
205 7         16 for my $c ( $self->_all_cookies ) {
206 8         22 my @parts = "$c->{name}=$c->{value}";
207 8 100       25 if ( defined $c->{expires} ) {
208 4         15 push @parts, 'Expires=' . HTTP::Date::time2str( $c->{expires} );
209             }
210             else {
211 4 100       9 next if $args->{persistent};
212             }
213 7         101 for my $attr (qw/Domain Path Creation_Time Last_Access_Time/) {
214 28 50       80 push @parts, "$attr=$c->{lc $attr}" if defined $c->{ lc $attr };
215             }
216 7         23 for my $attr (qw/Secure HttpOnly HostOnly/) {
217 21 100       42 push @parts, $attr if $c->{ lc $attr };
218             }
219 7         25 push @list, join( "; ", @parts );
220             }
221 7         33 return @list;
222             }
223              
224             #pod =method load_cookies
225             #pod
226             #pod $jar->load_cookies( @cookies );
227             #pod
228             #pod Given a list of cookie strings from C, it adds them to
229             #pod the cookie jar. Cookies added in this way will supersede any existing
230             #pod cookies with similar domain, path and name.
231             #pod
232             #pod It returns the jar object for convenience when loading a new object:
233             #pod
234             #pod my $jar = HTTP::CookieJar->new->load_cookies( @cookies );
235             #pod
236             #pod Here is a trivial example of loading a cookie jar file with L:
237             #pod
238             #pod my $jar = HTTP::CookieJar->new->load_cookies(
239             #pod path("jar.txt")->lines
240             #pod );
241             #pod
242             #pod =cut
243              
244             sub load_cookies {
245 4     4 1 12 my ( $self, @cookies ) = @_;
246 4         8 for my $cookie (@cookies) {
247 5         11 my $p = _parse_cookie( $cookie, 1 );
248 5 100 66     19 next unless exists $p->{domain} && exists $p->{path};
249 4         7 $p->{$_} = time for grep { !defined $p->{$_} } qw/creation_time last_access_time/;
  8         20  
250 4         17 $self->{store}{ $p->{domain} }{ $p->{path} }{ $p->{name} } = $p;
251             }
252 4         17 return $self;
253             }
254              
255             #--------------------------------------------------------------------------#
256             # private methods
257             #--------------------------------------------------------------------------#
258              
259             # return a copy of all cookies
260             sub _all_cookies {
261             return map {
262 22         156 { %$_ }
263 16     16   22 } map { values %$_ } map { values %$_ } values %{ $_[0]->{store} };
  23         65  
  21         57  
  16         50  
264             }
265              
266             #--------------------------------------------------------------------------#
267             # Helper subroutines
268             #--------------------------------------------------------------------------#
269              
270             my $pub_re = qr/(?:domain|path|expires|max-age|httponly|secure)/;
271             my $pvt_re = qr/(?:$pub_re|creation_time|last_access_time|hostonly)/;
272              
273             sub _parse_cookie {
274 59     59   19297 my ( $cookie, $private ) = @_;
275 59 50       110 $cookie = '' unless defined $cookie;
276 59         212 my ( $kvp, @attrs ) = split /;/, $cookie;
277 59 100       120 $kvp = '' unless defined $kvp;
278             my ( $name, $value ) =
279 59         130 map { s/^\s*//; s/\s*$//; $_ } split( /=/, $kvp, 2 ); ## no critic
  112         300  
  112         355  
  112         238  
280              
281 59 100 100     216 return unless defined $name and length $name;
282 55 50       94 $value = '' unless defined $value;
283 55         136 my $parse = { name => $name, value => $value };
284 55         97 for my $s (@attrs) {
285 94 100 66     342 next unless defined $s && $s =~ /\S/;
286 91         192 my ( $k, $v ) = map { s/^\s*//; s/\s*$//; $_ } split( /=/, $s, 2 ); ## no critic
  160         318  
  160         427  
  160         309  
287 91         153 $k = lc $k;
288 91 100       726 next unless $private ? ( $k =~ m/^$pvt_re$/ ) : ( $k =~ m/^$pub_re$/ );
    50          
289 91 100       225 $v = 1 if $k =~ m/^(?:httponly|secure|hostonly)$/; # boolean flag if present
290 91 100 100     179 $v = HTTP::Date::str2time($v) || 0 if $k eq 'expires'; # convert to epoch
291 91 100       783 next unless length $v;
292 90 100       155 $v =~ s{^\.}{} if $k eq 'domain'; # strip leading dot
293 90 100       138 $v =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if $k eq 'path'; # unescape
  0         0  
294 90         207 $parse->{$k} = $v;
295             }
296 55         142 return $parse;
297             }
298              
299             sub _domain_match {
300 37     37   65 my ( $string, $dom_string ) = @_;
301 37 100       77 return 1 if $dom_string eq $string;
302 27 100       85 return unless $string =~ /[a-z]/i; # non-numeric
303 24 100       206 if ( $string =~ s{\Q$dom_string\E$}{} ) {
304 13         51 return substr( $string, -1, 1 ) eq '.'; # "foo."
305             }
306 11         36 return;
307             }
308              
309             sub _normalize_domain {
310 16     16   32 my ( $host, $parse ) = @_;
311              
312 16 50       34 if ($HAS_MPS) {
313 16         25 my $host_pub_suff = eval { Mozilla::PublicSuffix::public_suffix($host) };
  16         54  
314 16 100       652 $host_pub_suff = '' unless defined $host_pub_suff;
315 16 100       42 if ( _domain_match( $host_pub_suff, $parse->{domain} ) ) {
316 4 100       9 if ( $parse->{domain} eq $host ) {
317 2         7 return $parse->{hostonly} = 1;
318             }
319             else {
320 2         9 return;
321             }
322             }
323             }
324              
325 12 100 66     53 if ( $parse->{domain} !~ m{\.} && $parse->{domain} eq $host ) {
326 3         11 return $parse->{hostonly} = 1;
327             }
328              
329 9         19 return _domain_match( $host, $parse->{domain} );
330             }
331              
332             sub _default_path {
333 22     22   35 my ($path) = @_;
334 22 50 33     87 return "/" if !length $path || substr( $path, 0, 1 ) ne "/";
335 22         78 my ($default) = $path =~ m{^(.*)/}; # greedy to last /
336 22 100       60 return length($default) ? $default : "/";
337             }
338              
339             sub _path_match {
340 12     12   20 my ( $req_path, $cookie_path ) = @_;
341 12 100       22 return 1 if $req_path eq $cookie_path;
342 9 100       117 if ( $req_path =~ m{^\Q$cookie_path\E(.*)} ) {
343 8         19 my $rest = $1;
344 8 100       34 return 1 if substr( $cookie_path, -1, 1 ) eq '/';
345 5 50       30 return 1 if substr( $rest, 0, 1 ) eq '/';
346             }
347 1         4 return;
348             }
349              
350             sub _split_url {
351 50     50   86 my $url = shift;
352 50 50 33     163 die(qq/No URL provided\n/) unless defined $url and length $url;
353              
354             # URI regex adapted from the URI module
355             # XXX path_query here really chops at ? or # to get just the path and not the query
356 50 50       352 my ( $scheme, $authority, $path_query ) = $url =~ m<\A([^:/?#]+)://([^/?#]*)([^#?]*)>
357             or die(qq/Cannot parse URL: '$url'\n/);
358              
359 50         115 $scheme = lc $scheme;
360 50 50       129 $path_query = "/$path_query" unless $path_query =~ m<\A/>;
361 50         77 $path_query =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
  0         0  
362              
363 50 50       123 my $host = ( length($authority) ) ? lc $authority : 'localhost';
364 50         80 $host =~ s/\A[^@]*@//; # userinfo
365 50         80 my $port = do {
366 50 50 33     201 $host =~ s/:([0-9]*)\z// && length $1
    100          
    50          
367             ? $1
368             : ( $scheme eq 'http' ? 80 : $scheme eq 'https' ? 443 : undef );
369             };
370              
371 50         163 return ( $scheme, $host, $port, $path_query );
372             }
373              
374             1;
375              
376              
377             # vim: ts=4 sts=4 sw=4 et:
378              
379             __END__