line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#WWW/monitor.pm. Written in 2007 by Yaron Kahanoitch. This |
2
|
|
|
|
|
|
|
# source code has been placed in the public domain by the author. |
3
|
|
|
|
|
|
|
# Please be kind and preserve the documentation. |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
package WWW::Monitor; |
6
|
|
|
|
|
|
|
|
7
|
1
|
|
|
1
|
|
1191
|
use 5.005; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
36
|
|
8
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
29
|
|
9
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
36
|
|
10
|
1
|
|
|
1
|
|
5
|
use Carp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
90
|
|
11
|
1
|
|
|
1
|
|
6
|
use WWW::Monitor::Task; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
29
|
|
12
|
1
|
|
|
1
|
|
1327
|
use WWW::Mechanize; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
33
|
|
13
|
1
|
|
|
1
|
|
928
|
use HTML::FormatText; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
28
|
|
14
|
1
|
|
|
1
|
|
2195
|
use File::HomeDir; |
|
1
|
|
|
|
|
6419
|
|
|
1
|
|
|
|
|
118
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our(@ISA, @EXPORT, @EXPORT_OK, $VERSION); |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
$VERSION = 0.24; |
19
|
|
|
|
|
|
|
|
20
|
1
|
|
|
1
|
|
10
|
use base qw(Exporter WWW::Mechanize); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
1084
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
@EXPORT = qw (); |
24
|
|
|
|
|
|
|
@EXPORT_OK = qw (); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
our $DEFAULT_CACHE_SUBDIR=".www-monitor"; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 NAME |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
WWW::Monitor - Monitor websites for updates and changes |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head1 VERSION |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
Version 0.01 |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=cut |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 SYNOPSIS |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
use MIME::Lite; |
41
|
|
|
|
|
|
|
use WWW::Monitor; |
42
|
|
|
|
|
|
|
sub notify { |
43
|
|
|
|
|
|
|
my ($url,$text) =@_; |
44
|
|
|
|
|
|
|
foreach my $recipient ('user1@host','user2@host2') { |
45
|
|
|
|
|
|
|
my $mail_obj = |
46
|
|
|
|
|
|
|
MIME::Lite->new(To=>$recipient, |
47
|
|
|
|
|
|
|
From=>'from@myHost', |
48
|
|
|
|
|
|
|
Subject=>"Web alert web page changed", |
49
|
|
|
|
|
|
|
Type=>'Text', |
50
|
|
|
|
|
|
|
Data=>'For details visit '.$url."\n".$text |
51
|
|
|
|
|
|
|
); |
52
|
|
|
|
|
|
|
$mail_obj->send; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
return 1; |
55
|
|
|
|
|
|
|
} |
56
|
|
|
|
|
|
|
my $mon = WWW::Monitor->new('MAIL_CALLBACK'=>\¬ify); |
57
|
|
|
|
|
|
|
$mon->watch('http:://www.kahanovitch.com/'); |
58
|
|
|
|
|
|
|
$mon->run; |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Or: |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
use WWW::Monitor; |
64
|
|
|
|
|
|
|
my $mon=WWW::Monitor->new('MAIL_CALLBACK'=>\¬ify,'CACHE'=>$cache); |
65
|
|
|
|
|
|
|
my $task = $mon->watch("$url"); |
66
|
|
|
|
|
|
|
$mon->run or die "Query ended with error"; |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sun notify { |
69
|
|
|
|
|
|
|
my ($url,$task) =@_; |
70
|
|
|
|
|
|
|
print "$url has changed\n"; |
71
|
|
|
|
|
|
|
while (my ($sub_url,$ref_http_response) = each %{$task->added_parts()}) { |
72
|
|
|
|
|
|
|
print "New part added: $sub_url \n"; |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
while (my ($sub_url,$ref_http_response) = each %{$task->missing_parts()}) { |
76
|
|
|
|
|
|
|
print "Part deleted: $sub_url \n"; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
foreach my $sub_url ( $task->changed_parts()) { |
80
|
|
|
|
|
|
|
print "$sub_url has changed:\n"; |
81
|
|
|
|
|
|
|
my ($old,$new) = $task->get_old_new_pair($sub_url); |
82
|
|
|
|
|
|
|
my $old_content = $old->content; |
83
|
|
|
|
|
|
|
my $new_content = $new->content; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=head1 Description |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
L ia a Web monitoring mechanism built to detect and |
90
|
|
|
|
|
|
|
notify changes in web pages. The module is designed to compare |
91
|
|
|
|
|
|
|
existing, online versions and pre-cached matched version. A web page |
92
|
|
|
|
|
|
|
may include more than one file. A page may include some frames and |
93
|
|
|
|
|
|
|
visible referenced data, which all together form a sigle visible page. |
94
|
|
|
|
|
|
|
For now, WWW::Monitor compares only textual information. Images, and |
95
|
|
|
|
|
|
|
non-HTML data are not being compared. To store information, |
96
|
|
|
|
|
|
|
WWW::Monitor caches data with the "Cache" mechanism. By default, |
97
|
|
|
|
|
|
|
Cache::File is being used, but the user may choose to use any Cache |
98
|
|
|
|
|
|
|
object that implements the Cache module interface. L is |
99
|
|
|
|
|
|
|
a subclass of L, so any of L or its |
100
|
|
|
|
|
|
|
super classes can be used. |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=head1 EXPORT |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=head1 FUNCTIONS |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head2 new ( [ OPTIONS ] ) |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
A constructor. OPTIONS are passed in a hash like fashion, using key |
109
|
|
|
|
|
|
|
and value pairs. Possible options are: URL - A target URL to monitor. |
110
|
|
|
|
|
|
|
CACHE_ROOT - A root directory under which all caching is being |
111
|
|
|
|
|
|
|
managed. Default = /.www-monitor CACHE - cache |
112
|
|
|
|
|
|
|
object. The object must have get() and set() methods like the Cache |
113
|
|
|
|
|
|
|
interface, as well as set_validity and validity. |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=cut |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
sub new { |
118
|
0
|
|
|
0
|
1
|
|
my $this = shift; |
119
|
0
|
|
0
|
|
|
|
my $class = ref($this) || $this; |
120
|
0
|
|
|
|
|
|
my %args; |
121
|
0
|
0
|
|
|
|
|
unless (@_ % 2) { |
122
|
0
|
|
|
|
|
|
%args = @_; |
123
|
|
|
|
|
|
|
} else { |
124
|
0
|
|
|
|
|
|
carp( "Parameters for WWW::Monitor should be given as pair of 'OPTION'=>'VAL'"); |
125
|
|
|
|
|
|
|
} |
126
|
0
|
|
|
|
|
|
my $cache_root = delete $args{CACHE_ROOT}; |
127
|
0
|
0
|
|
|
|
|
unless ($cache_root) { |
128
|
0
|
|
|
|
|
|
my $def_dir = File::HomeDir->my_home."/".$DEFAULT_CACHE_SUBDIR; |
129
|
0
|
0
|
0
|
|
|
|
if (!-d $def_dir && !mkdir($def_dir)) { |
130
|
0
|
|
|
|
|
|
carp("directory $def_dir does not exists and cannot be created.$!"); |
131
|
0
|
|
|
|
|
|
return 0; |
132
|
|
|
|
|
|
|
} |
133
|
0
|
|
|
|
|
|
$cache_root = $def_dir; |
134
|
|
|
|
|
|
|
} |
135
|
0
|
|
|
|
|
|
my $cache = delete $args{CACHE}; |
136
|
0
|
0
|
|
|
|
|
if ($cache) { |
137
|
0
|
0
|
0
|
|
|
|
unless ((ref($cache) ne "HASH") && |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
138
|
|
|
|
|
|
|
$cache->can("get") && |
139
|
|
|
|
|
|
|
$cache->can("set") && |
140
|
|
|
|
|
|
|
$cache->can("set_validity") && |
141
|
|
|
|
|
|
|
$cache->can("get_validity") && |
142
|
|
|
|
|
|
|
$cache->can("exists")) { |
143
|
0
|
|
|
|
|
|
carp "The given CACHE object must implements Cache interface and must be initialized"; |
144
|
0
|
|
|
|
|
|
$cache = ""; |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
} else { |
147
|
0
|
|
|
|
|
|
require Cache::File; |
148
|
0
|
|
|
|
|
|
$cache = Cache::File->new( cache_root => $cache_root); |
149
|
|
|
|
|
|
|
} |
150
|
0
|
|
|
|
|
|
my $mailcallback = delete $args{MAIL_CALLBACK}; |
151
|
0
|
|
|
|
|
|
my $self=$class->SUPER::new(%args); |
152
|
0
|
|
|
|
|
|
$self->{tasks} = []; |
153
|
0
|
|
|
|
|
|
$self->{cache_root} = $cache_root; |
154
|
0
|
0
|
|
|
|
|
$cache = ($cache)?$cache:Cache::File->new( cache_root => $self->{cache_root}); |
155
|
0
|
|
|
|
|
|
$self->{cache} = $cache; |
156
|
0
|
|
|
|
|
|
$self->{errors_hash} = {}; |
157
|
0
|
0
|
|
|
|
|
$self->{mailcallback} = $mailcallback if ($mailcallback); |
158
|
0
|
|
|
|
|
|
return $self; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=head2 watch ( URL(S) ) |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
Add URL to be watched. |
164
|
|
|
|
|
|
|
watch returns a reference to a L object. |
165
|
|
|
|
|
|
|
for example $obj->watch('http://www.cnn.com' ) |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=cut |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
sub watch { |
170
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
171
|
0
|
|
|
|
|
|
my $target = shift; |
172
|
0
|
|
|
|
|
|
my $task = WWW::Monitor::Task->new('URL',$target); |
173
|
0
|
|
|
|
|
|
push @{$self->{tasks}},$task; |
|
0
|
|
|
|
|
|
|
174
|
0
|
|
|
|
|
|
return $task; |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=head2 notify_callback ( sub ) |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
A code reference to be executed whenever a change is detected |
181
|
|
|
|
|
|
|
(commonly used for sending mail). The following parameters will be |
182
|
|
|
|
|
|
|
passed to the code reference: |
183
|
|
|
|
|
|
|
$url -> a string that holds the url for which a change was detected. |
184
|
|
|
|
|
|
|
$text -> A Message to be sent. |
185
|
|
|
|
|
|
|
$task -> WWW::Monitor::Task object reference. |
186
|
|
|
|
|
|
|
The given code reference should return true for success. |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=cut |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
sub notify_callback { |
191
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
192
|
0
|
|
|
|
|
|
$self->{mailcallback} = shift; |
193
|
0
|
|
|
|
|
|
return 1; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=head2 run |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
Watch all given web pages and report changes if detected. If a url is |
199
|
|
|
|
|
|
|
first visited (i.e. the url is not in the cache db) than the url will |
200
|
|
|
|
|
|
|
be cached and no report will be created. |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=cut |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
sub run { |
206
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
207
|
0
|
|
|
|
|
|
my $carrier = $self; |
208
|
0
|
|
|
|
|
|
my $cache = $self->{cache}; |
209
|
0
|
|
|
|
|
|
my $ret = 1; |
210
|
0
|
|
|
|
|
|
$self->{errors_hash} = {}; |
211
|
0
|
|
|
|
|
|
foreach my $task (@{$self->{tasks}}) { |
|
0
|
|
|
|
|
|
|
212
|
0
|
0
|
|
|
|
|
$task->run($self,$carrier,$cache) or $ret = 0; |
213
|
|
|
|
|
|
|
} |
214
|
0
|
|
|
|
|
|
return $ret; |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=head2 errors_table |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
Return a hash reference of errors updated to last execution (i.e. when |
220
|
|
|
|
|
|
|
the run method was last executed). The returned keys are the urls where |
221
|
|
|
|
|
|
|
the values are error descriptions. |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
=cut |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
sub errors_table { |
226
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
227
|
0
|
|
|
|
|
|
my $ret_hash = {}; |
228
|
0
|
|
|
|
|
|
foreach my $task (@{$self->{tasks}}) { |
|
0
|
|
|
|
|
|
|
229
|
0
|
0
|
|
|
|
|
$ret_hash->{$task->{url}} = $task->{error} unless $task->success(); |
230
|
|
|
|
|
|
|
} |
231
|
0
|
|
|
|
|
|
while (my($url,$error) = each %{$self->{errors_hash}}) { |
|
0
|
|
|
|
|
|
|
232
|
0
|
|
|
|
|
|
$ret_hash->{$url} = $error; |
233
|
|
|
|
|
|
|
} |
234
|
0
|
|
|
|
|
|
return $ret_hash; |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=head2 errors |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
return a string that contains all errors. In array context return a list of errors. |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
=cut |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
sub errors { |
244
|
0
|
|
|
0
|
1
|
|
my $self=shift; |
245
|
0
|
|
|
|
|
|
my $all_errors_hash = $self->errors_table; |
246
|
0
|
|
|
|
|
|
my @list_of_errors; |
247
|
0
|
|
|
|
|
|
while (my($url,$error) = each %$all_errors_hash) { |
248
|
0
|
|
|
|
|
|
push @list_of_errors,$url.":".$error; |
249
|
|
|
|
|
|
|
} |
250
|
0
|
0
|
|
|
|
|
return @list_of_errors if (wantarray); |
251
|
0
|
|
|
|
|
|
return join("\n",@list_of_errors); |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=head2 notify |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
(Private Method) |
257
|
|
|
|
|
|
|
Activate notification callback |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
=cut |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
sub notify { |
262
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
263
|
0
|
|
|
|
|
|
my ($url,$task) = @_; |
264
|
0
|
0
|
0
|
|
|
|
if (exists $self->{mailcallback} and $self->{mailcallback}) { |
265
|
0
|
|
|
|
|
|
return &{$self->{mailcallback}}($url,$task); |
|
0
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
} |
267
|
0
|
|
|
|
|
|
return 1; |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=head2 targets |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
Return a list of strings out of watched targets. |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
=cut |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
sub targets { |
277
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
278
|
0
|
|
|
|
|
|
my @res = (); |
279
|
0
|
|
|
|
|
|
foreach my $task (@{$self->{tasks}}) { |
|
0
|
|
|
|
|
|
|
280
|
0
|
|
|
|
|
|
push @res,$task->{url}; |
281
|
|
|
|
|
|
|
} |
282
|
0
|
|
|
|
|
|
return @res; |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
=head1 AUTHOR |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
Yaron Kahanovitch, C<< >> |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=head1 BUGS |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
Please report any bugs or feature requests to |
294
|
|
|
|
|
|
|
C, or through the web interface at |
295
|
|
|
|
|
|
|
L. |
296
|
|
|
|
|
|
|
I will be notified, and then you'll automatically be notified of progress on |
297
|
|
|
|
|
|
|
your bug as I make changes. |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=head1 SUPPORT |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. perldoc WWW::Monitor |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=over |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
L |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=item * CPAN Ratings |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
L |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
L |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
=item * Search CPAN |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
L |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
=back |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
=head1 ACKNOWLEDGMENTS |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
=head1 COPYRIGHT & LICENSE |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
Copyright 2007 Yaron Kahanovitch, all rights reserved. |
328
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
329
|
|
|
|
|
|
|
under the same terms as Perl itself. |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=cut |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
1; # End of WWW::Monitor |