File Coverage

blib/lib/WWW/Monitor.pm
Criterion Covered Total %
statement 27 102 26.4
branch 0 22 0.0
condition 0 24 0.0
subroutine 9 17 52.9
pod 8 8 100.0
total 44 173 25.4


line stmt bran cond sub pod time code
1             #WWW/monitor.pm. Written in 2007 by Yaron Kahanoitch. This
2             # source code has been placed in the public domain by the author.
3             # Please be kind and preserve the documentation.
4              
5             package WWW::Monitor;
6              
7 1     1   1191 use 5.005;
  1         3  
  1         36  
8 1     1   5 use warnings;
  1         2  
  1         29  
9 1     1   5 use strict;
  1         1  
  1         36  
10 1     1   5 use Carp;
  1         2  
  1         90  
11 1     1   6 use WWW::Monitor::Task;
  1         2  
  1         29  
12 1     1   1327 use WWW::Mechanize;
  1         3  
  1         33  
13 1     1   928 use HTML::FormatText;
  1         4  
  1         28  
14 1     1   2195 use File::HomeDir;
  1         6419  
  1         118  
15              
16             our(@ISA, @EXPORT, @EXPORT_OK, $VERSION);
17              
18             $VERSION = 0.24;
19              
20 1     1   10 use base qw(Exporter WWW::Mechanize);
  1         2  
  1         1084  
21              
22              
23             @EXPORT = qw ();
24             @EXPORT_OK = qw ();
25              
26             our $DEFAULT_CACHE_SUBDIR=".www-monitor";
27              
28             =head1 NAME
29              
30             WWW::Monitor - Monitor websites for updates and changes
31              
32             =head1 VERSION
33              
34             Version 0.01
35              
36             =cut
37              
38             =head1 SYNOPSIS
39              
40             use MIME::Lite;
41             use WWW::Monitor;
42             sub notify {
43             my ($url,$text) =@_;
44             foreach my $recipient ('user1@host','user2@host2') {
45             my $mail_obj =
46             MIME::Lite->new(To=>$recipient,
47             From=>'from@myHost',
48             Subject=>"Web alert web page changed",
49             Type=>'Text',
50             Data=>'For details visit '.$url."\n".$text
51             );
52             $mail_obj->send;
53             }
54             return 1;
55             }
56             my $mon = WWW::Monitor->new('MAIL_CALLBACK'=>\¬ify);
57             $mon->watch('http:://www.kahanovitch.com/');
58             $mon->run;
59              
60              
61             Or:
62              
63             use WWW::Monitor;
64             my $mon=WWW::Monitor->new('MAIL_CALLBACK'=>\¬ify,'CACHE'=>$cache);
65             my $task = $mon->watch("$url");
66             $mon->run or die "Query ended with error";
67            
68             sun notify {
69             my ($url,$task) =@_;
70             print "$url has changed\n";
71             while (my ($sub_url,$ref_http_response) = each %{$task->added_parts()}) {
72             print "New part added: $sub_url \n";
73             }
74            
75             while (my ($sub_url,$ref_http_response) = each %{$task->missing_parts()}) {
76             print "Part deleted: $sub_url \n";
77             }
78            
79             foreach my $sub_url ( $task->changed_parts()) {
80             print "$sub_url has changed:\n";
81             my ($old,$new) = $task->get_old_new_pair($sub_url);
82             my $old_content = $old->content;
83             my $new_content = $new->content;
84             }
85             }
86              
87             =head1 Description
88              
89             L ia a Web monitoring mechanism built to detect and
90             notify changes in web pages. The module is designed to compare
91             existing, online versions and pre-cached matched version. A web page
92             may include more than one file. A page may include some frames and
93             visible referenced data, which all together form a sigle visible page.
94             For now, WWW::Monitor compares only textual information. Images, and
95             non-HTML data are not being compared. To store information,
96             WWW::Monitor caches data with the "Cache" mechanism. By default,
97             Cache::File is being used, but the user may choose to use any Cache
98             object that implements the Cache module interface. L is
99             a subclass of L, so any of L or its
100             super classes can be used.
101              
102             =head1 EXPORT
103              
104             =head1 FUNCTIONS
105              
106             =head2 new ( [ OPTIONS ] )
107              
108             A constructor. OPTIONS are passed in a hash like fashion, using key
109             and value pairs. Possible options are: URL - A target URL to monitor.
110             CACHE_ROOT - A root directory under which all caching is being
111             managed. Default = /.www-monitor CACHE - cache
112             object. The object must have get() and set() methods like the Cache
113             interface, as well as set_validity and validity.
114              
115             =cut
116              
117             sub new {
118 0     0 1   my $this = shift;
119 0   0       my $class = ref($this) || $this;
120 0           my %args;
121 0 0         unless (@_ % 2) {
122 0           %args = @_;
123             } else {
124 0           carp( "Parameters for WWW::Monitor should be given as pair of 'OPTION'=>'VAL'");
125             }
126 0           my $cache_root = delete $args{CACHE_ROOT};
127 0 0         unless ($cache_root) {
128 0           my $def_dir = File::HomeDir->my_home."/".$DEFAULT_CACHE_SUBDIR;
129 0 0 0       if (!-d $def_dir && !mkdir($def_dir)) {
130 0           carp("directory $def_dir does not exists and cannot be created.$!");
131 0           return 0;
132             }
133 0           $cache_root = $def_dir;
134             }
135 0           my $cache = delete $args{CACHE};
136 0 0         if ($cache) {
137 0 0 0       unless ((ref($cache) ne "HASH") &&
      0        
      0        
      0        
      0        
138             $cache->can("get") &&
139             $cache->can("set") &&
140             $cache->can("set_validity") &&
141             $cache->can("get_validity") &&
142             $cache->can("exists")) {
143 0           carp "The given CACHE object must implements Cache interface and must be initialized";
144 0           $cache = "";
145             }
146             } else {
147 0           require Cache::File;
148 0           $cache = Cache::File->new( cache_root => $cache_root);
149             }
150 0           my $mailcallback = delete $args{MAIL_CALLBACK};
151 0           my $self=$class->SUPER::new(%args);
152 0           $self->{tasks} = [];
153 0           $self->{cache_root} = $cache_root;
154 0 0         $cache = ($cache)?$cache:Cache::File->new( cache_root => $self->{cache_root});
155 0           $self->{cache} = $cache;
156 0           $self->{errors_hash} = {};
157 0 0         $self->{mailcallback} = $mailcallback if ($mailcallback);
158 0           return $self;
159             }
160              
161             =head2 watch ( URL(S) )
162              
163             Add URL to be watched.
164             watch returns a reference to a L object.
165             for example $obj->watch('http://www.cnn.com' )
166              
167             =cut
168              
169             sub watch {
170 0     0 1   my $self = shift;
171 0           my $target = shift;
172 0           my $task = WWW::Monitor::Task->new('URL',$target);
173 0           push @{$self->{tasks}},$task;
  0            
174 0           return $task;
175             }
176              
177              
178             =head2 notify_callback ( sub )
179              
180             A code reference to be executed whenever a change is detected
181             (commonly used for sending mail). The following parameters will be
182             passed to the code reference:
183             $url -> a string that holds the url for which a change was detected.
184             $text -> A Message to be sent.
185             $task -> WWW::Monitor::Task object reference.
186             The given code reference should return true for success.
187              
188             =cut
189              
190             sub notify_callback {
191 0     0 1   my $self = shift;
192 0           $self->{mailcallback} = shift;
193 0           return 1;
194             }
195              
196             =head2 run
197              
198             Watch all given web pages and report changes if detected. If a url is
199             first visited (i.e. the url is not in the cache db) than the url will
200             be cached and no report will be created.
201              
202              
203             =cut
204              
205             sub run {
206 0     0 1   my $self = shift;
207 0           my $carrier = $self;
208 0           my $cache = $self->{cache};
209 0           my $ret = 1;
210 0           $self->{errors_hash} = {};
211 0           foreach my $task (@{$self->{tasks}}) {
  0            
212 0 0         $task->run($self,$carrier,$cache) or $ret = 0;
213             }
214 0           return $ret;
215             }
216              
217             =head2 errors_table
218              
219             Return a hash reference of errors updated to last execution (i.e. when
220             the run method was last executed). The returned keys are the urls where
221             the values are error descriptions.
222              
223             =cut
224              
225             sub errors_table {
226 0     0 1   my $self = shift;
227 0           my $ret_hash = {};
228 0           foreach my $task (@{$self->{tasks}}) {
  0            
229 0 0         $ret_hash->{$task->{url}} = $task->{error} unless $task->success();
230             }
231 0           while (my($url,$error) = each %{$self->{errors_hash}}) {
  0            
232 0           $ret_hash->{$url} = $error;
233             }
234 0           return $ret_hash;
235             }
236              
237             =head2 errors
238              
239             return a string that contains all errors. In array context return a list of errors.
240              
241             =cut
242              
243             sub errors {
244 0     0 1   my $self=shift;
245 0           my $all_errors_hash = $self->errors_table;
246 0           my @list_of_errors;
247 0           while (my($url,$error) = each %$all_errors_hash) {
248 0           push @list_of_errors,$url.":".$error;
249             }
250 0 0         return @list_of_errors if (wantarray);
251 0           return join("\n",@list_of_errors);
252             }
253              
254             =head2 notify
255              
256             (Private Method)
257             Activate notification callback
258              
259             =cut
260              
261             sub notify {
262 0     0 1   my $self = shift;
263 0           my ($url,$task) = @_;
264 0 0 0       if (exists $self->{mailcallback} and $self->{mailcallback}) {
265 0           return &{$self->{mailcallback}}($url,$task);
  0            
266             }
267 0           return 1;
268             }
269              
270             =head2 targets
271              
272             Return a list of strings out of watched targets.
273              
274             =cut
275              
276             sub targets {
277 0     0 1   my $self = shift;
278 0           my @res = ();
279 0           foreach my $task (@{$self->{tasks}}) {
  0            
280 0           push @res,$task->{url};
281             }
282 0           return @res;
283             }
284              
285              
286              
287             =head1 AUTHOR
288              
289             Yaron Kahanovitch, C<< >>
290              
291             =head1 BUGS
292              
293             Please report any bugs or feature requests to
294             C, or through the web interface at
295             L.
296             I will be notified, and then you'll automatically be notified of progress on
297             your bug as I make changes.
298              
299             =head1 SUPPORT
300              
301             You can find documentation for this module with the perldoc command. perldoc WWW::Monitor
302              
303             =over
304              
305             =item * AnnoCPAN: Annotated CPAN documentation
306              
307             L
308              
309             =item * CPAN Ratings
310              
311             L
312              
313             =item * RT: CPAN's request tracker
314              
315             L
316              
317             =item * Search CPAN
318              
319             L
320              
321             =back
322              
323             =head1 ACKNOWLEDGMENTS
324              
325             =head1 COPYRIGHT & LICENSE
326              
327             Copyright 2007 Yaron Kahanovitch, all rights reserved.
328             This program is free software; you can redistribute it and/or modify it
329             under the same terms as Perl itself.
330              
331             =cut
332              
333             1; # End of WWW::Monitor