File Coverage

lib/WWW/Crawler/Mojo/Job.pm
Criterion Covered Total %
statement 39 39 100.0
branch 7 8 87.5
condition 5 5 100.0
subroutine 12 12 100.0
pod 7 7 100.0
total 70 71 98.5


line stmt bran cond sub pod time code
1             package WWW::Crawler::Mojo::Job;
2 11     11   68776 use strict;
  11         28  
  11         272  
3 11     11   48 use warnings;
  11         18  
  11         234  
4 11     11   46 use utf8;
  11         15  
  11         51  
5 11     11   648 use Mojo::Base -base;
  11         158696  
  11         64  
6 11     11   1398 use Mojo::Util qw(md5_sum);
  11         21  
  11         5074  
7              
8             has 'closed';
9             has 'context';
10             has depth => 0;
11             has 'literal_uri';
12             has 'method';
13             has 'referrer';
14             has redirect_history => sub { [] };
15             has 'tx_params';
16             has 'url';
17              
18             sub upgrade {
19 60     60 1 379 my ($class, $job) = @_;
20              
21 60 100 100     218 if (!ref $job || ref $job ne __PACKAGE__) {
22 8 100       31 my $url = !ref $job ? Mojo::URL->new($job) : $job;
23 8         462 $job = $class->new(url => $url);
24             }
25              
26 60         193 return $job;
27             }
28              
29             sub clone {
30 1     1 1 732 my $self = shift;
31 1         6 return __PACKAGE__->new(%$self);
32             }
33              
34             sub close {
35 14     14 1 1185 my $self = shift;
36 14         45 $self->{closed} = 1;
37 14         410 $self->{referrer} = undef;
38             }
39              
40             sub child {
41 77     77 1 2267 my $self = shift;
42 77         184 return __PACKAGE__->new(@_, referrer => $self, depth => $self->depth + 1);
43             }
44              
45             sub digest {
46 64     64 1 118 my $self = shift;
47 64   100     128 my $md5_seed = $self->url->to_string . ($self->method || '');
48 64 100       13427 $md5_seed .= $self->tx_params->to_string if ($self->tx_params);
49 64         881 return md5_sum($md5_seed);
50             }
51              
52             sub redirect {
53 14     14 1 761 my ($self, $last, @history) = @_;
54 14         51 $self->url($last);
55 14         109 $self->redirect_history(\@history);
56             }
57              
58             sub original_url {
59 1     1 1 881 my $self = shift;
60 1         10 my @histry = @{$self->redirect_history};
  1         6  
61 1 50       9 return $self->url unless (@histry);
62 1         4 return $histry[$#histry];
63             }
64              
65             1;
66              
67             =head1 NAME
68              
69             WWW::Crawler::Mojo::Job - Single crawler job
70              
71             =head1 SYNOPSIS
72              
73             my $job1 = WWW::Crawler::Mojo::Job->new;
74             $job1->url('http://example.com/');
75             my $job2 = $job1->child;
76              
77             =head1 DESCRIPTION
78              
79             This class represents a single crawler job.
80              
81             =head1 ATTRIBUTES
82              
83             =head2 context
84              
85             Either L or L instance that the job is referrered by.
86              
87             $job->context($dom);
88             say $job->context;
89              
90             =head2 closed
91              
92             A flag indecates whether the job is closed or not.
93              
94             $job->closed(1);
95             say $job->closed;
96              
97             =head2 depth
98              
99             The depth of the job in referrer series.
100              
101             my $job1 = WWW::Crawler::Mojo::Job->new;
102             my $job2 = $job1->child;
103             my $job3 = $job2->child;
104             say $job1->depth; # 0
105             say $job2->depth; # 1
106             say $job3->depth; # 2
107              
108             =head2 literal_uri
109              
110             A L instance of the literal URL that has appeared in the referrer
111             document.
112              
113             $job1->literal_uri('./index.html');
114             say $job1->literal_uri; # './index.html'
115              
116             =head2 referrer
117              
118             A job instance that has referred the URL.
119              
120             $job1->referrer($job);
121             my $job2 = $job1->referrer;
122              
123             =head2 redirect_history
124              
125             An array reference that contains URLs of redirect history.
126              
127             $job1->redirect_history([$url1, $url2, $url3]);
128             my $history = $job1->redirect_history;
129              
130             =head2 url
131              
132             A L instance of the resolved URL.
133              
134             $job1->url('http://example.com/');
135             say $job1->url; # 'http://example.com/'
136              
137             =head2 method
138              
139             HTTP request method such as GET or POST.
140              
141             $job1->method('GET');
142             say $job1->method; # GET
143              
144             =head2 tx_params
145              
146             A hash reference that contains params for L.
147              
148             $job1->tx_params({foo => 'bar'});
149             $params = $job1->tx_params;
150              
151             =head1 METHODS
152              
153             =head2 clone
154              
155             Clones the job.
156              
157             my $job2 = $job1->clone;
158              
159             =head2 close
160              
161             Closes the job and cuts the referrer series.
162              
163             $job->close;
164              
165             =head2 child
166              
167             Instantiates a child job by parent job. The parent URL is set to child referrer.
168              
169             my $job1 = WWW::Crawler::Mojo::Job->new(url => 'http://example.com/1');
170             my $job2 = $job1->child(url => 'http://example.com/2');
171             say $job2->referrer->url # 'http://example.com/1'
172              
173             =head2 digest
174              
175             Generates digest string with C, C, C attributes.
176              
177             say $job->digest;
178              
179             =head2 redirect
180              
181             Replaces the resolved URL and history at once.
182              
183             my $job = WWW::Crawler::Mojo::Job->new;
184             $job->url($url1);
185             $job->redirect($url2, $url3);
186             say $job->url # $url2
187             say $job->redirect_history # [$url1, $url3]
188              
189             =head2 original_url
190              
191             Returns the original URL of redirected job. If redirected, returns last element
192             of C attribute, otherwise returns C attribute.
193              
194             $job1->redirect_history([$url1, $url2, $url3]);
195             my $url4 = $job1->original_url; # $url4 is $url3
196              
197             =head2 upgrade
198              
199             Instanciates a job with string or a L instance.
200              
201             =head1 AUTHOR
202              
203             Keita Sugama, Esugama@jamadam.comE
204              
205             =head1 COPYRIGHT AND LICENSE
206              
207             Copyright (C) Keita Sugama.
208              
209             This program is free software; you can redistribute it and/or
210             modify it under the same terms as Perl itself.
211              
212             =cut