| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
############################################################################# |
|
3
|
|
|
|
|
|
|
## $Id: WebArchive.pm 6702 2006-07-25 01:43:27Z spadkins $ |
|
4
|
|
|
|
|
|
|
############################################################################# |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package WWW::WebArchive; |
|
7
|
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
723
|
use vars qw($VERSION); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
46
|
|
|
9
|
1
|
|
|
1
|
|
4
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
34
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
13
|
use Cwd 'abs_path'; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
50
|
|
|
12
|
1
|
|
|
1
|
|
4
|
use File::Spec; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
392
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
$VERSION = "0.50"; |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
sub new { |
|
17
|
0
|
0
|
|
0
|
0
|
|
&App::sub_entry if ($App::trace); |
|
18
|
0
|
|
|
|
|
|
my ($this, @args) = @_; |
|
19
|
0
|
|
0
|
|
|
|
my $class = ref($this) || $this; |
|
20
|
0
|
|
|
|
|
|
my $self = {}; |
|
21
|
0
|
0
|
0
|
|
|
|
if ($#args == 0 && ref($args[0]) eq "HASH") { |
|
|
|
0
|
0
|
|
|
|
|
|
22
|
0
|
|
|
|
|
|
$self = { %{$args[0]} }; |
|
|
0
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
} |
|
24
|
|
|
|
|
|
|
elsif ($#args >= 1 && $#args % 2 == 1) { # even number of args |
|
25
|
0
|
|
|
|
|
|
$self = { @args }; |
|
26
|
|
|
|
|
|
|
} |
|
27
|
0
|
|
|
|
|
|
bless $self, $class; |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# Initialize agents to the individual archives |
|
30
|
0
|
|
|
|
|
|
my @archives = qw(WaybackMachine); |
|
31
|
0
|
|
|
|
|
|
my (@agents, %agent, $agent); |
|
32
|
0
|
|
|
|
|
|
foreach my $archive (@archives) { |
|
33
|
0
|
|
|
|
|
|
$class = "WWW::WebArchive::$archive"; |
|
34
|
0
|
|
|
|
|
|
eval "use $class;"; |
|
35
|
0
|
0
|
|
|
|
|
if ($@) { |
|
36
|
0
|
|
|
|
|
|
die $@; |
|
37
|
|
|
|
|
|
|
} |
|
38
|
0
|
|
|
|
|
|
$agent = $class->new(name => $archive, @args); |
|
39
|
0
|
|
|
|
|
|
$agent{$archive} = $agent; |
|
40
|
0
|
|
|
|
|
|
push(@agents, $agent); |
|
41
|
|
|
|
|
|
|
} |
|
42
|
0
|
|
|
|
|
|
$self->{archives} = \@archives; |
|
43
|
0
|
|
|
|
|
|
$self->{agents} = \@agents; |
|
44
|
0
|
|
|
|
|
|
$self->{agent} = \%agent; |
|
45
|
|
|
|
|
|
|
|
|
46
|
0
|
0
|
|
|
|
|
&App::sub_exit($self) if ($App::trace); |
|
47
|
0
|
|
|
|
|
|
return($self); |
|
48
|
|
|
|
|
|
|
} |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
sub restore { |
|
51
|
0
|
0
|
|
0
|
0
|
|
&App::sub_entry if ($App::trace); |
|
52
|
0
|
|
|
|
|
|
my ($self, $options) = @_; |
|
53
|
0
|
|
0
|
|
|
|
my $url = $options->{url} || die "restore(): URL not provided"; |
|
54
|
0
|
|
|
|
|
|
foreach my $agent (@{$self->{agents}}) { |
|
|
0
|
|
|
|
|
|
|
|
55
|
0
|
|
|
|
|
|
$agent->restore($options); |
|
56
|
|
|
|
|
|
|
} |
|
57
|
|
|
|
|
|
|
# merge the results |
|
58
|
0
|
0
|
|
|
|
|
&App::sub_exit() if ($App::trace); |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=head1 NAME |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
WWW::WebArchive - Retrieve old versions of public web pages from various web archives (i.e. www.archive.org, Internet Archive's Wayback Machine, or Google's page cache) |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
NOTE: You probably want to use the "webarchive" command line utility rather than |
|
68
|
|
|
|
|
|
|
this API. If you really want to use the API, you should look at how "webarchive" |
|
69
|
|
|
|
|
|
|
uses it as an example. |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
#!/usr/bin/perl |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
use WWW::WebArchive; |
|
74
|
|
|
|
|
|
|
my $webarchive = WWW::WebArchive->new(); |
|
75
|
|
|
|
|
|
|
$webarchive->restore( { url => "http://www.website.com" } ); |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
WWW-WebArchive is a set of modules to retrieve old versions of public web pages |
|
80
|
|
|
|
|
|
|
from various web archives. |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
* http://www.archive.org - Internet Archive's Wayback Machine |
|
83
|
|
|
|
|
|
|
* http://www.google.com - Google's page cache |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
This is useful if |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
1. Your web server crashed and you didn't have complete backups |
|
88
|
|
|
|
|
|
|
2. A site (such as a valuable reference source) changed or went away |
|
89
|
|
|
|
|
|
|
and you want to restore an old version of the site to your local |
|
90
|
|
|
|
|
|
|
disk |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
* Author: Stephen Adkins |
|
95
|
|
|
|
|
|
|
* License: This is free software. It is licensed under the same terms as Perl itself. |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=cut |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
1; |
|
102
|
|
|
|
|
|
|
|