File Coverage

blib/lib/WWW/Link.pm
Criterion Covered Total %
statement 178 278 64.0
branch 68 166 40.9
condition 10 24 41.6
subroutine 36 50 72.0
pod 27 45 60.0
total 319 563 56.6


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             WWW::Link - maintain information about the state of links
4              
5             =head1 SYNOPSIS
6              
7             use WWW::Link;
8             $::link=new WWW::Link "http://www.bounce.com/";
9             $::link->failed_test;
10             $::link->is_okay or warn "link not validated";
11              
12             =head1 DESCRIPTION
13              
14             WWW::Link is a perl class which accepts and maintains information about
15             links. For example, this would include urls which are referenced from
16             a WWW page.
17              
18             The link class will be acted on by such programs as link checkers to
19             give it information and by other programs to convert that information
20             into something which can be used by humans.
21              
22             =cut
23              
24             package WWW::Link;
25             $REVISION=q$Revision: 1.25 $ ;
26             $VERSION = '0.036'; #BETA / under development
27 7     7   88864 use Carp;
  7         17  
  7         599  
28 7     7   40 use strict;
  7         21  
  7         237  
29 7     7   34 use vars qw($verbose $VERSION);
  7         12  
  7         480  
30             $verbose=0;
31 7     7   2125 use HTTP::Response;
  7         67885  
  7         32149  
32              
33             =head1 METHODS
34              
35             =head2 new
36              
37             The constructor for links expects a url as a string.
38              
39             =cut
40              
41             sub new {
42 32     32 1 2064 my $class=shift;
43 32         47 my $url=shift;
44 32 50       82 die "usage \$link->()" unless $url;
45 32 50       72 $url=$url->as_string() if ref $url;
46 32         47 my $self={};
47 32         63 bless $self, $class;
48 32         72 $self->initialise();
49 32         164 $self->url($url);
50 32         77 return $self;
51             }
52              
53             =head2 status
54              
55             The status effectively a bit field. Some of the options are mutually exculsive
56              
57             =cut
58              
59             #Constants
60             #
61             #The following are arbitrary constants for use as flags on the status.
62              
63             #FIXME: in the next version of WWW::Link, these should all be multiplied by two
64             #and Link_not_checked should become 1.
65             #Status 0 should mean undefined... but it doesn't yet.
66             #plus these should all be subroutines.
67              
68             sub LINK_NOT_CHECKED () {0;}
69             sub LINK_VALIDATED () {1<<0;}
70             sub LINK_DAMAGED () {1<<1;}
71             sub LINK_BROKEN () {1<<2;}
72             sub LINK_ABANDONED () {1<<3;}
73             sub LINK_REDIRECTED () {1<<4;}
74             sub LINK_CORRECTABLE () {1<<5;}
75             sub LINK_UNSUPPORTED () {1<<6;}
76             sub LINK_DISALLOWED () {1<<7;}
77              
78             #FIXME make more space at the time we upgrade the format.
79             sub LINK_BANNED_SIMPLE () {1<<8;}
80             sub LINK_BANNED_COMPLEX () {1<<9;}
81             sub LINK_CHECK_UNDETERMINED () {1<<10;}
82             sub LINK_CHECK_OKAY_SIMPLE () {1<<11;}
83             sub LINK_CHECK_BROKEN_SIMPLE () {1<<11;}
84             sub LINK_CHECK_INCONSISTENT () {1<<12;}
85             sub LINK_UNTESTABLE_SIMPLE () {1<<13;}
86             sub LINK_UNTESTABLE_COMPLEX () {1<<14;}
87              
88             sub LAST_SIMPLE_OKAY () {1<<8;}
89              
90             =pod
91              
92             guessed similar ; followed link on page ;
93              
94             couldnt_test - for some reason the link tester has been unable to
95             check the status of the link
96              
97             robot exlusion ; server overload ; known network break
98              
99             =head2 initialise
100              
101             Setup each of the variables into a best guess starting state
102              
103             =cut
104              
105             sub initialise{
106 32     32 1 38 my $self=shift;
107 32         76 $self->status(LINK_NOT_CHECKED); #never been link checked.
108 32         88 $self->check_method(LINK_CHECK_OKAY_SIMPLE | LINK_CHECK_BROKEN_SIMPLE);
109 32         72 $self->{breakcount}=0;
110 32         42 $self->{testcount}=0;
111 32         57 $self->{short_reliability}=0;
112 32         75 $self->{long_reliability}=0;
113             }
114              
115             =head2 status
116              
117             There are a number of options for checking the status of a link.
118             These will maintain meaning although the details of how they do their
119             tests are likely to vary.
120              
121             =over
122              
123             =item is_okay
124              
125             The link is not considered to have been damaged. N.B. this could just
126             mean that we haven't checked it yet. Use validated okay to verify that.
127              
128             =item is_not_tested
129              
130             The link has not been examined and the system doesn't know if it is
131             good or bad.
132              
133             =item is_abandoned
134              
135             We've been testing the link and finding it broken for so long we
136             aren't interested in it any more.
137              
138             =item is_broken
139              
140             After repeated attempts (as defined by user) to validate it, no answer
141             was recieved and the link is considered broken.
142              
143             =item is_damaged
144              
145             The link was broken recently, but we still think that it needs more
146             time before we can consider it broken.
147              
148             =item is_redirected
149              
150             The link was examined and an explicit redirect was found.
151              
152             =item validated_okay
153              
154             The link has been examined and was definitely okay.
155              
156             =back
157              
158             =cut
159              
160             sub is_okay{
161 40     40 1 336 my $stat=shift()->status();
162 40 100       103 return 1 if $stat & LINK_VALIDATED;
163             #states should now be pure..
164             # return 1 if $stat == LINK_NOT_CHECKED;
165 30         92 return 0;
166             }
167              
168             sub is_not_checked{
169 0 0   0 0 0 return 1 if shift()->{"status"} == LINK_NOT_CHECKED;
170 0         0 return 0;
171             }
172              
173 2     2 1 5 sub is_abandoned{return shift()->{"status"} & LINK_ABANDONED}
174 24     24 1 166 sub is_broken{return shift()->{"status"} & LINK_BROKEN}
175 3     3 1 9 sub is_damaged{return shift()->{"status"} & LINK_DAMAGED}
176 3     3 1 28 sub is_redirected{return shift()->{"status"} & LINK_REDIRECTED}
177 7     7 0 48 sub is_disallowed{return shift()->{"status"} & LINK_DISALLOWED}
178 18     18 0 150 sub is_unsupported{return shift()->{"status"} & LINK_UNSUPPORTED}
179 0     0 1 0 sub validated_okay{return shift()->{"status"} & LINK_VALIDATED}
180              
181             sub status {
182 205     205 1 238 my $self=shift;
183 205 100       575 return $self->{"status"} unless @_;
184 116         168 my $status=shift;
185 116 100 100     568 $self->{"status-change-time"}=time
186             unless defined $self->{"status"} and $self->{"status"}==$status;
187 116         200 $self->{"status"}=$status;
188             }
189              
190             sub check_method {
191 32     32 0 43 my $self=shift;
192 32 50       140 return $self->{"check-method"} unless @_;
193 32         37 my $status=shift;
194 32 50 33     112 $self->{"check-method-change-time"}=time
195             unless defined $self->{"check-method"}
196             and $self->{"check-method"}==$status;
197 32         63 $self->{"check-method"}=$status;
198             }
199              
200             #has the link been hardwired by the user to only test one way
201             sub banned_complex{
202 0     0 0 0 my $self=shift;
203 0 0       0 return $self->{"check-method"} & LINK_BANNED_COMPLEX unless @_;
204 0         0 my $set=shift;
205 0 0       0 $self->{"check-method"} ^= LINK_BANNED_COMPLEX unless $set;
206 0 0       0 $self->{"check-method"} |= LINK_BANNED_COMPLEX if $set;
207 0         0 return $self->{"check-method"};
208             }
209             sub banned_simple{
210 0     0 0 0 my $self=shift;
211 0 0       0 return $self->{"check-method"} & LINK_BANNED_SIMPLE unless @_;
212 0         0 my $set=shift;
213 0 0       0 $self->{"check-method"} ^= LINK_BANNED_SIMPLE unless $set;
214 0 0       0 $self->{"check-method"} |= LINK_BANNED_SIMPLE if $set;
215 0         0 return $self->{"check-method"};
216             }
217              
218             sub check_okay_simple {
219 0     0 0 0 my $self=shift;
220 0 0       0 return $self->{"check-method"} & LINK_CHECK_OKAY_SIMPLE unless @_;
221 0         0 my $set=shift;
222 0 0       0 $self->{"check-method"} ^= LINK_CHECK_OKAY_SIMPLE unless $set;
223 0 0       0 $self->{"check-method"} |= LINK_CHECK_OKAY_SIMPLE if $set;
224 0         0 return $self->{"check-method"};
225             }
226             sub check_broken_simple{
227 0     0 0 0 my $self=shift;
228 0 0       0 return $self->{"check-method"} & LINK_CHECK_BROKEN_SIMPLE unless @_;
229 0         0 my $set=shift;
230 0 0       0 $self->{"check-method"} ^= LINK_CHECK_BROKEN_SIMPLE unless $set;
231 0 0       0 $self->{"check-method"} |= LINK_CHECK_BROKEN_SIMPLE if $set;
232 0         0 return $self->{"check-method"};
233             }
234             sub check_undetermined{
235 0     0 0 0 my $self=shift;
236 0 0       0 return $self->{"check-method"} & LINK_CHECK_UNDETERMINED unless @_;
237 0         0 my $set=shift;
238 0 0       0 $self->{"check-method"} ^= LINK_CHECK_UNDETERMINED unless $set;
239 0 0       0 $self->{"check-method"} |= LINK_CHECK_UNDETERMINED if $set;
240 0         0 return $self->{"check-method"};
241             }
242              
243             =head2 add_status
244              
245             or the given value into the status flags.
246              
247             =cut
248              
249             sub add_status {
250 30     30 1 40 my $self=shift;
251 30         64 my $status=shift;
252 30 50       69 die "usage \$link->()" unless $status;
253 30 100       96 $self->{"status-change-time"}=time unless $self->{"status"} & $status;
254 30         81 $self->{"status"}=$status | $self->{"status"};
255             }
256              
257             =head2 remove_status
258              
259             or the given value into the status flags.
260              
261             =cut
262              
263             sub remove_status {
264 40     40 1 216 my $self=shift;
265 40         49 my $status=shift;
266 40 50       73 die "usage \$link->()" unless $status;
267 40 50       93 $self->{"status-change-time"}=time if $self->{"status"} & $status;
268 40         55 $status = ~$status;
269 40         106 $self->{"status"}=$status & $self->{"status"};
270             }
271              
272             =head2 status_change_time
273              
274             Return the last time that the status field of the link was changed.
275              
276             =cut
277              
278             sub status_change_time {
279 0     0 1 0 my $self=shift;
280 0         0 return $self->{"status-change-time"}
281             }
282              
283             =head2 breakcount
284              
285             Returns two times the number of times the link has been tested and
286             found broken. This could in future turn into a fraction or something
287             the basic idea is that at around 10 you should start to think that the
288             link is broken beyond recognition..
289              
290             With an argument sets the links broken number, but you shouldn't
291             normally do this so by default it also complains unless you've set the
292             package I_know_what_im_up_to variable.
293              
294             =cut
295              
296             sub breakcount {
297 0     0 1 0 my $self=shift;
298 0 0       0 return $self->{"breakcount"} unless @_;
299 0         0 $self->{"breakcount"}=shift;
300             }
301              
302             sub testcount {
303 28     28 0 65 return shift->{"testcount"};
304             }
305              
306              
307             ########CONFIG##########
308             # number of times we let it be tested before we consider it broken
309             sub BROKEN_COUNT () {4;}
310              
311             # number of times we let it be tested before we decide nobody cares...
312             sub ABANDONED_COUNT () {10;}
313              
314             sub HOUR () {60*60;}
315             sub DAY () {24*60*60;}
316             sub TIME_BASE () {1.7*DAY;}
317              
318             sub OKAY_FACTOR () {5;}
319             sub UNTESTABLE_FACTOR () {7;} #cost of re-examining should be low..
320             sub ABANDONED_FACTOR () {11;}
321             sub DAMAGED_FACTOR () {1;}
322             sub BROKEN_FACTOR () {2;}
323              
324             =head1 time_want_test
325              
326             This tells you the time till the link thinks it should next be tested.
327             There are three regimes:-
328              
329             The time which controls the next time we want to be tested is the last
330             time we were tested. This function doesn't worry about what the real
331             time is now and will happily return times in the past.
332              
333             =over
334              
335             =item normal testing
336              
337             In the normal situation we have a time constant for each link and we
338             do testing on the link at that time +- one day.
339              
340             =item damaged link
341              
342             The link has just been detected as damaged. We retest it repeatedly
343             spread across a small number of days and then declare it broken.
344              
345             =item broken link
346              
347             The link has been declared broken. Now we test it occasionally just
348             to verify if it has been repaired in the meantime.
349              
350             =item abandoned link
351              
352             We've detected and declared it broken, but noone has come along to
353             look at it. It's still possible that outside influences repair the
354             link in the meantime, so we keep checking it occasionally
355              
356             =back
357              
358             Please note, a link doesn't know anything about the present time, or
359             when it is scheduled to be checked. The time it want's to be checked
360             could be some time in the past.
361              
362             =cut
363              
364             sub time_want_test {
365 3     3 0 20 my $self=shift;
366              
367             #we think we should always have been checked.
368 3         5 my $test_time=$self->{"last_test"};
369 3 50       21 $test_time=1 unless defined ($test_time);
370 3         9 my $base=$self->{"base_time"};
371 3 50       17 $base=TIME_BASE unless defined $base;
372 3 50       16 unless ($base =~ m/^[0-9]*[1-9][0-9]*/ ) {
373 0         0 warn "time base $base invalid using " . TIME_BASE . " instead";
374 0         0 $base=TIME_BASE;
375             }
376              
377 3         4 my $factor;
378             CASE: {
379 3 100       3 $self->is_damaged() && do { $factor=DAMAGED_FACTOR; last };
  3         10  
  1         2  
  1         2  
380 2 50       5 $self->is_abandoned() && do {$factor=ABANDONED_FACTOR; last };
  0         0  
  0         0  
381 2 50       4 $self->is_broken() && do {$factor=BROKEN_FACTOR; last };
  0         0  
  0         0  
382 2         2 ($self->is_disallowed() || $self->is_unsupported())
383 2 50 33     5 && do {$factor=UNTESTABLE_FACTOR; last};
  2         4  
384 0         0 $factor=OKAY_FACTOR; last;
  0         0  
385             }
386 3 50       8 die "didn't set factor" unless $factor;
387 3         6 $test_time+=$base*$factor;
388 3         7 my $vary=0.3*$base*$factor;
389 3 50       11 return wantarray ? ($test_time, $vary) : $test_time ;
390             }
391              
392             # =head2 time_scheduled
393              
394             # The scheduled time is the time we have been told (by the link test
395             # system) that we will next be checked.
396              
397             # =cut
398              
399             # sub time_scheduled {
400             # my $self=shift;
401             # unless ( @_ ) {
402             # #die "link is not scheduled" unless ( defined $self->{"time_scheduled"} );
403             # # we return undef if we don´t know when we have been scheduled which is
404             # # sort of bad
405             # return $self->{"time_scheduled"};
406             # }
407             # my $time=shift;
408             # die "$_ not a valid time" unless /^[+-]?\d+$/;
409             # die "$_ not in the future" unless $time > time();
410             # $self->{"time_scheduled"} = $time;
411             # return $time;
412             # }
413              
414              
415             sub last_test {
416 2     2 0 14 my $self=shift;
417 2 50       9 return $self->{"last_test"} unless @_;
418 0         0 my $time=shift;
419 0 0       0 die "Invalid time value" unless $time =~ /^[+-]?\d+$/;
420 0         0 return $self->{"last_test"} = $time;
421             }
422              
423             =head2 $l->last_refresh([integer-time])
424              
425             The last refresh is the last time the link was reported as in use by
426             some users resource. It B be updated ever time the index to an
427             infostructure is rebuilt or else the
428              
429             =cut
430              
431             sub last_refresh {
432 0     0 1 0 my $self=shift;
433 0 0       0 return $self->{"last_refresh"} unless @_;
434 0         0 my $time=shift;
435 0 0       0 die "Invalid time value" unless $time =~ /^[+-]?\d+$/;
436 0 0 0     0 unless ( defined $self->{"last_refresh"}
437             and $time < $self->{"last_refresh"} ) {
438 0         0 return $self->{"last_refresh"} = $time;
439             } else {
440 0 0       0 warn "ignoring refresh time earlier than current"
441             if $verbose & 16;
442             }
443             }
444              
445             =head2 add_redirect
446              
447             This method adds information about a redirect from a given link.
448              
449             Redirects can be a chain.
450              
451             =cut
452              
453             sub add_redirect {
454 0     0 1 0 my $self=shift;
455 0         0 my $redirect_url=shift;
456 0 0       0 die "usage \$link->add_redirect()" unless $redirect_url;
457 0 0       0 $self->{"redirects"}=[] unless $self->{"redirects"};
458 0         0 my $redir_list=$self->{"redirects"};
459 0         0 my $found=0;
460 0         0 my $i;
461             my @deletions;
462             #we delete backwards so that the array length doesn't change on us
463 0         0 for ($i = $#$redir_list ; $i>=0; $i--) {
464 0 0       0 splice @$redir_list, $i, 1 if $redir_list->[$i] = $redirect_url;
465             }
466 0         0 unshift @$redir_list, $redirect_url;
467             }
468              
469              
470             =head2 add_suggestion
471              
472             Add a suggestion to the beginning of the list of suggested replacement
473             links. If the same suggestion is later in the list delete it. We
474             return 1 if the link is new.
475              
476             =cut
477              
478             sub add_suggestion {
479 0     0 1 0 my $self=shift;
480 0         0 my $suggestion=shift;
481 0 0       0 $self->{"fix_suggestions"}=[] unless $self->{"fix_suggestions"};
482 0         0 my $sugg_list=$self->{"fix_suggestions"};
483 0         0 my $count=@$sugg_list;
484 0         0 my $found=0;
485 0         0 my $i;
486             my @deletions;
487             #we delete backwards so that the array length doesn't change on us
488 0         0 for ($i = $#$sugg_list ; $i>=0; $i--) {
489 0 0       0 splice @$sugg_list, $i, 1 if $sugg_list->[$i] = $suggestion;
490             }
491 0         0 unshift @$sugg_list, $suggestion;
492 0 0       0 return 1 if $count < @$sugg_list;
493 0         0 return 0;
494             }
495              
496             =head2 redirects
497              
498             Redirects stores or returns a reference to an array of redirects.
499              
500             =cut
501              
502             sub redirects {
503 8     8 1 179 my $self=shift;
504 8         13 my $redirects=shift;
505 8 50 33     97 croak "usage $self->redirects([array-ref])"
      33        
506             if @_ or (defined $redirects and not ref($redirects) =~ m/ARRAY/ );
507 8 50       31 $self->{"redirects"} = $redirects if defined $redirects;
508 8         52 $self->add_status(LINK_CORRECTABLE);
509 8         31 return $self->{"redirects"};
510             }
511              
512             =head2 redirect_urls
513              
514             C returns redirections on a link in the form of urls
515             (text strings, not objects). In a list context it returns the full
516             chain of urls. In a scalar context it returns only the last url of
517             the chain.
518              
519             =cut
520              
521             sub redirect_urls {
522 2     2 1 3 my $self=shift;
523 2 50       6 croak "usage $self->redirect_urls()"
524             if @_;
525 2 50       8 wantarray && do {
526 2         5 my @urls=();
527 2         3 REDIR: foreach my $redir (@{$self->{"redirects"}} ) {
  2         7  
528 2         8 $redir = $self->_redirect_url($redir);
529 2 50       10 push @urls, $redir if defined $redir;
530             }
531 2         7 return @urls;
532             };
533 0         0 return $self->_redirect_url($self->{"redirects"}[$#{$self->{"redirects"}}]);
  0         0  
534             }
535              
536              
537             # _redirect_url hopefully gets the url whether the redirect is stored
538             # as a string, a HTTP::Response object or a URI object.
539              
540             sub _redirect_url {
541 2     2   3 my $self=shift;
542 2         2 my $redir=shift;
543 2 100       11 CASE: {
544 2         3 ref $redir or last;
545              
546 1         2 my $url;
547 1         1 eval { $url = $redir->header('location'); };
  1         18  
548 1 50 33     96 $@ && ( not $@ =~ "Can't locate object method.*header") and do {
549 0         0 die "Failed to get redirect location: $@";
550             };
551              
552 1 50       3 defined $url && do {
553 1         3 $redir=$url;
554 1         2 last;
555             };
556              
557 0 0       0 $redir = $redir->as_string() if $redir->can('as_string');
558              
559 0 0       0 do { warn "don't know how to get url from redirect: " . $redir;
  0         0  
560 0         0 return undef; } if ref $redir;
561             }
562              
563 2         5 return $redir;
564             }
565              
566             =head2 fix_suggestion
567              
568             Fix suggestion is an array of suggestions for documents which might
569             replace a broken link. These can be derived from all sorts of places
570             and some are probably not correct. The aim is that they are in order
571             from best guess to worst. You pass a reference to the new array.
572              
573             =cut
574              
575             sub fix_suggestions {
576 0     0 0 0 my $self=shift;
577 0 0       0 return $self->{"fix_suggestions"} unless @_;
578 0         0 my $suggestions=shift;
579 0         0 $self->{"fix_suggestions"} = $suggestions;
580 0         0 $self->add_status(LINK_CORRECTABLE);
581 0         0 return $suggestions;
582             }
583              
584              
585             =head2 all_suggestion
586              
587             Returns a list consisting of all of the redirect and fix suggestions
588             that have been made for that link.
589              
590             =cut
591              
592             sub all_suggestions {
593 0     0 0 0 my $self=shift;
594 0         0 my $return=[];
595 0 0       0 push @$return, @{$self->{"fix_suggestions"}} if $self->{"fix_suggestions"};
  0         0  
596 0 0       0 push @$return, @{$self->{"redirects"}} if $self->{"redirects"};
  0         0  
597 0         0 return $return;
598             }
599              
600             =head2 url
601              
602             just say what url is associated with this link
603              
604             =cut
605              
606              
607             sub url {
608 132     132 1 158 my $self=shift;
609 132 100       425 return $self->{"url"} unless @_;
610 32         39 my $url=shift;
611 32         51 $self->{"url"} = $url;
612 32         46 return $url;
613             }
614              
615              
616             =head2 failed_test
617              
618             Failed test should says that you have tested a link and think it's
619             broken. Sometimes the link won't care (it's been tested recently and
620             is waiting to give the resource time to come back if it's just
621             temporarily mislayed); mostly it'll increase it's broken value by two.
622              
623             This also creates two reliability values. The long and short. These
624             indicate how reliable the link has been over recent tests. The long
625             value takes into account approximately the last 30 tests and the short
626             takes into account approximately the last 7 tests with more weighting
627             for more recent tests. A value of 1 means totally reliably working
628             for all time and a value of -1 means totally broken for all time and
629             anything in between is a lower value of certainty.
630              
631             Probably a value less than about 0.5 is one to consider a problem,
632             depending on how important the Link is to you.
633              
634              
635             =head2 redirections and failed tests
636              
637             There are the following possibilities: A redirected link which ends in
638             success; considered as redirected. A redirected link which ends in
639             failure. This should be considered broken and finally: A failed link
640             which was previously redirected. This should be considered broken,
641             but the redirection should be remembered as a possible solution for
642             the problem.
643              
644             =cut
645              
646             $WWW::Link::inter_test_time=0.5 * TIME_BASE; #one day approx..
647              
648             sub LONG_FACTOR () {30;}
649             sub SHORT_FACTOR () {7;}
650              
651             sub failed_test {
652 49     49 1 132 my $self=shift;
653             #filter status values compatible with failed testing..
654 49         105 my $stat = $self->status() & (LINK_DAMAGED | LINK_BROKEN | LINK_ABANDONED
655             | LINK_CORRECTABLE);
656 49         96 $self->status($stat);
657 49 100 66     236 unless ( $stat & LINK_DAMAGED) {
    100          
658 12         36 $self->first_broken;
659             } elsif ( ( ! defined ($self->{"last_fail"}) )
660             || (( time() - $self->{"last_fail"} ) >$WWW::Link::inter_test_time ) ) {
661 19         45 $self->more_broken;
662             }
663             #this implies we have to pass through the abandoned_count and
664             #broken_count values
665 49         101 $self->tested();
666              
667 49 100       107 $self->{long_reliability} = -0.2 unless $self->{long_reliability} ;
668 49 100       100 $self->{short_reliability} = -0.2 unless $self->{short_reliability} ;
669 49         61 $self->{long_reliability} *= ( 1 - 1/LONG_FACTOR);
670 49         60 $self->{long_reliability} -= (1/LONG_FACTOR);
671 49         67 $self->{short_reliability} *= ( 1 - 1/SHORT_FACTOR);
672 49         55 $self->{short_reliability} -= (1/SHORT_FACTOR);
673              
674 49         100 return $self->{"breakcount"}; #they can check if we changed it.
675             }
676              
677              
678             sub first_broken {
679 12     12 0 16 my $self=shift;
680 12 50       116 print STDERR "Link found broken for first time\n" if $verbose & 4;
681 12         45 $self->{"last_fail"}=time();
682 12         39 $self->{"breakcount"}= 2;
683 12         142 $self->add_status( LINK_DAMAGED );
684             }
685              
686             sub more_broken {
687 19     19 0 21 my $self=shift;
688 19 50       38 print STDERR "Link found broken again adding to failure count\n"
689             if $verbose & 4;
690 19         25 $self->{"breakcount"} += 2;
691 19         27 $self->{"last_fail"}=time();
692 19 100       37 $self->{"breakcount"} == ABANDONED_COUNT
693             && $self->add_status(LINK_ABANDONED);
694 19 100       46 $self->{"breakcount"} == BROKEN_COUNT
695             && $self->add_status(LINK_BROKEN);
696             }
697              
698             =head2 passed_test
699              
700             This tells a link that it has been tested and found to be okay. It's
701             an internal method generally and may change name.
702              
703             N.B. this resets all other status flags. If you want to have a link
704             which is okay but is redirected you must call C
705             afterwards.
706              
707             =cut
708              
709             sub passed_test {
710 16     16 1 51 my $self=shift;
711 16         48 $self->status(LINK_VALIDATED);
712 16         40 $self->tested();
713             #reliability information?
714              
715 16 100       48 $self->{long_reliability} = 0.5 unless $self->{long_reliability} ;
716 16 100       43 $self->{short_reliability} = 0.5 unless $self->{short_reliability} ;
717              
718 16         24 $self->{long_reliability} *= ( 1 - 1/LONG_FACTOR);
719 16         27 $self->{long_reliability} += (1/LONG_FACTOR);
720 16         24 $self->{short_reliability} *= ( 1 - 1/SHORT_FACTOR);
721 16         35 $self->{short_reliability} += (1/SHORT_FACTOR);
722              
723             }
724              
725              
726             =head2 found_redirected
727              
728             tells the link that there is at least one layer of permanent
729             redirections from its URL to the final object referred to. The urls
730             in the source documents should be updated.
731              
732             =cut
733              
734             sub found_redirected {
735 6     6 1 17 shift->add_status(LINK_REDIRECTED);
736             }
737              
738             =head2 not_redirected
739              
740             tells the link that there are no redirections from its URL.
741              
742             =cut
743              
744             sub not_redirected {
745 40     40 1 92 shift->remove_status(LINK_REDIRECTED);
746             }
747              
748             # =head2 tested
749              
750             # Little helper function that should be called each time the link is
751             # tested which updates timestamps etc.
752              
753             # =cut
754              
755             sub tested{
756 84     84 0 99 my $self=shift;
757              
758             #this was in an early version of Link.pm. This line can be deleted at
759             #release time for 1.0 at which time everybody should rebuild their
760             #databases.
761 84 50       184 defined $self->{"checkcount"} and delete $self->{"checkcount"};
762              
763 84         104 $self->{"testcount"}++;
764 84         160 $self->{"last_test"}=time;
765             }
766              
767             =head2 disallowed
768              
769             Testing the link was attempted but it was disallowed, e.g. due to the
770             robots exclusion protocol. The user should examine what's going on
771             and either ignore it or get in touch with the site for permission to
772             do link checking.
773              
774             N.B. disallowed should only be called when we know that testing has
775             been disallowed. Failure to access the resource at the end of a link
776             should normally be seen as an error.
777              
778             =cut
779              
780             sub disallowed {
781 4     4 1 13 my $self=shift;
782 4         13 $self->status(LINK_DISALLOWED);
783              
784 4         10 $self->tested(); #Hmmm.. or is it??
785             }
786              
787              
788             =head2 unsupported
789              
790             Testing the link was attempted but it turns out that we don't know
791             how... We just mark this as unsupported and the user can then think
792             about sending in a patch to add the needed features to LinkController.
793              
794             =cut
795              
796             sub unsupported {
797 15     15 1 29 my $self=shift;
798 15         39 $self->status(LINK_UNSUPPORTED);
799              
800 15         39 $self->tested();
801             }
802              
803              
804              
805             sub KEEP_RESP () {10;}
806              
807             =head2 store_response ( , , , ..)
808              
809             This function is for storing the history of testing of the link so
810             that we can look through it and find out what has been going on.
811              
812             The argument should be an HTTP response object representing
813             the status of the tester and possibly synthesised by the tester. The
814             time_now is the time the response is considered to have been
815             processed.
816              
817             Tester should be an identifier of the tester used to test the link.
818             Normally this should be the class of the tester.
819              
820             The tester data can be anything that the tester wants to store with
821             the response.
822              
823             N.B. mere storage of a response does not have any affect on a link.
824              
825             =cut
826              
827             sub store_response {
828 45     45 1 55 my $self=shift;
829 45         108 my @resp=@_;
830 45         45 unshift @{$self->{"test_hist"}}, \@resp;
  45         116  
831 45         62 pop @{$self->{"test_hist"}} while $#{$self->{"test_hist"}} > KEEP_RESP;
  54         209  
  9         30  
832             }
833              
834             =head2 recover_response ()
835              
836             This function returns a previous response which has been applied to
837             the link. In a scalar context it returns only the response. In an
838             array context it will return the arguments which were given to
839             store_response. The integer argument is the age of the link (it's
840             position in the history).
841              
842             N.B. an age of 0 returns the most recently stored response.
843              
844             =cut
845              
846             sub recover_response {
847 111     111 1 126 my ($self,$age)=@_;
848 111 50       202 croak 'usage: $self->recover_response($age)'
849             unless defined $age;
850 111 50       372 croak 'age must be a natural number' unless $age =~ /^\d+$/;
851              
852 111 50       272 return wantarray ? () : undef unless defined $self->{"test_hist"};
    100          
853              
854 93 50       151 $age > KEEP_RESP && do {
855 0         0 warn "response older than could ever be stored requested";
856 0         0 return undef;
857             };
858 93 50       77 $age > $#{$self->{"test_hist"}} && return wantarray ? () : undef ;
  93 100       200  
859              
860 90 50       141 return wantarray ? @{$self->{"test_hist"}->[$age]}
  90         352  
861             : $self->{"test_hist"}->[$age]->[0] ;
862             }
863              
864             =head1 STORING TEST COOKIE
865              
866             The test cookie is any data which the tester wants to store to have
867             available next time it tests this link. Testers should normally be
868             very careful how they handle this value and expect that another tester
869             could use the value differently. The normal way to cope with this is
870             to be able to work without the cookie and, when storing the cookie,
871             use an object which can then be idenitfied easily.
872              
873             If the cookie I support a time_want_test method, then this can be
874             used to override the time the link should be tested. It will be
875             called with a reference to the link.
876              
877             =cut
878              
879             sub test_cookie {
880 86     86 0 105 my ($self,$cookie)=@_;
881 86 100       171 $self->{"test-cookie"}=$cookie if $cookie;
882 86         306 return $self->{"test-cookie"};
883             }
884              
885             =head1 DECLARING LINKS BROKEN
886              
887             A link isn't signalled as broken until after it has been checked
888             several times and found not working. The reason for this is quite
889             simple. There are many WWW servers in the world which aren't reliably
890             accessable. If a set of pages are checked at any given time a fair
891             number of links could seem to be broken, even when they will soon be
892             repaired. In fact, in a well maintained set of pages (as I hope this
893             package will let you have), these pages will outnumber by a large
894             amount the number of actual broken links.
895              
896             =head1 LINK AGING
897              
898             Links can age in two ways. Firstly, we can recognise them as broken
899             and get bored of them being checked. However, in this case, they stay
900             around in the database, and are just checked very rarely (we never
901             give up hope.. there may be some reason why WE can't see a link and
902             the user can't be bothered to solve it yet but does later.)
903              
904             The second method we use is keeping a refresh time in each link. This
905             represents the last time some user told us that this link was in their
906             infostructure. If this gets larger than a certain value (e.g. a
907             month, but this must be site determined depending on the maintainance
908             patterns of users), the link should no longer be checked.
909              
910             If this gets larger than another value (which should be considerably
911             larger than the first - say 6 months or a year) then the link can be
912             retired from the database. Even if someone did turn out to be
913             interested, the information would be so out of date as to be useless.
914              
915             =cut
916              
917              
918             =head1 SEE ALSO
919              
920             WWW::Link::Reporter WWW::Link::Selector
921              
922             =cut
923