File Coverage

blib/lib/WWW/BBSWatch.pm
Criterion Covered Total %
statement 30 112 26.7
branch 0 56 0.0
condition 0 22 0.0
subroutine 10 16 62.5
pod 5 5 100.0
total 45 211 21.3


line stmt bran cond sub pod time code
1             package WWW::BBSWatch;
2              
3             =pod
4              
5             =head1 NAME
6              
7             WWW::BBSWatch - Send, via email, messages posted to a WWW bulletin board
8              
9             =head1 SYNOPSIS
10              
11             use WWW::BBSWatch; # should really be a subclass
12              
13             sub WWW::BBSWatch::article_list { # generates warning (rightly so)
14             my $self = shift;
15             my $content = shift;
16             return ($$content =~ m% 17             }
18              
19             BBSWatch->new(-MAIL => 'me',
20             -BBS_URL => 'http://www.foo.org/cgi-bin/bbs.pl')->retrieve;
21              
22             See better, working examples below.
23              
24             =head1 DESCRIPTION
25              
26             There are many interesting discussions that take place on World Wide Web
27             Bulletin Boards, but I do not have the patience to browse to each article. I
28             can process email and newsgroups many times faster than a WWW bulletin board
29             because of the lag inherent in the web. Instead of ignoring this wealth of
30             information, B was created. It will monitor a World Wide Web
31             Bulletin Board and email new postings to you. The email headers are as correct
32             as possible, including reasonable I, I, I, I
33             and I entries.
34              
35             This module requires B and B.
36              
37             =head1 INTERFACE
38              
39             =over 4
40              
41             =cut
42              
43 1     1   1188 use strict;
  1         2  
  1         42  
44              
45 1     1   5 use vars qw/$VERSION/;
  1         2  
  1         49  
46             $VERSION = "1.02";
47              
48 1     1   4977 use LWP::UserAgent ();
  1         113706  
  1         29  
49 1     1   1155 use SDBM_File;
  1         2907  
  1         46  
50 1     1   7 use Fcntl;
  1         2  
  1         325  
51 1     1   3575 use MIME::Lite ();
  1         56200  
  1         191  
52              
53             local $ = 1;
54              
55 1     1   10 use constant LOCK_SH => 1;
  1         3  
  1         330  
56 1     1   7 use constant LOCK_EX => 2;
  1         1  
  1         47  
57 1     1   5 use constant LOCK_NB => 4;
  1         3  
  1         43  
58 1     1   5 use constant LOCK_UN => 8;
  1         3  
  1         2055  
59              
60             =pod
61              
62             =item $b = WWW::BBSWatch->new
63              
64             Arguments are:
65              
66             C<-BBS_URL>: The URL of the bulletin board's index page. This field is
67             required.
68              
69             C<-MAIL>: The email address to send mail to
70              
71             C<-MDA>: Sets the mail delivery agent by calling MIME::Lite::send(HOW, HOWARGS).
72             If a scalar value is passed in, it is passed as send("sendmail", $mda_value). If
73             an array ref is provided, send(@$mda_value) is called.
74              
75             C<-DB>: Basename of the database that keeps track of visited articles
76              
77             C<-WARN_TIMEOUT>: Number of seconds before warning message is sent
78             proclaiming inability to contact BBS_URL page. Default is 10,800 (3 hours).
79              
80             C<-MAX_ARTICLES>: Maximum number of articles to send in one
81             batch. Default is essentially all articles.
82              
83             C<-VERBOSE>: Controls the amount of informative output. Useful values are 0, 1,
84             2. Default is 0 (completely silent).
85              
86             =cut
87              
88             sub new {
89 0     0 1   my $class = shift;
90 0           my %args = @_;
91              
92             # Normalize args
93 0           foreach (keys %args) {
94 0           my $new = uc($_);
95 0 0         $new = "-$new" unless $new =~ /^-/;
96 0 0         unless ($new eq $_) {
97 0           $args{$new} = $args{$_};
98 0           delete $args{$_};
99             }
100             }
101              
102 0 0         if ($args{-MDA}) {
103 0 0         if (ref $args{-MDA}) {
104 0           MIME::Lite::send(@{$args{-MDA}});
  0            
105             } else {
106 0           MIME::Lite::send("sendmail", $args{-MDA});
107             }
108             }
109              
110 0   0       my $self = {
      0        
      0        
      0        
111             addr => $args{-MAIL},
112             warn_timeout => $args{-WARN_TIMEOUT} || (3600 * 3),
113             db => $args{-DB} || 'BBSWatch',
114             bbs_url => $args{-BBS_URL},
115             max_articles => $args{-MAX_ARTICLES} || 999999999,
116             verbose => $args{-VERBOSE} || 0,
117             };
118              
119 0 0         die "Must supply -BBS_URL" unless $self->{bbs_url};
120              
121 0           return bless $self, $class;
122             }
123              
124             =pod
125              
126             =item $b->retrieve([$catchup])
127              
128             This method emails new bulletin board messages. If the optional parameter
129             I is true, messages will be marked as read without being
130             emailed. Nothing useful will happen unless the C method is
131             defined to return the list of articles from the BBS's index page.
132              
133             B uses the B module to retrieve the index and
134             articles. It honors firewall proxies by calling the
135             C method. So if you are behind a firewall, define
136             the environment variable I and your firewall will be handled
137             correctly.
138              
139             =back
140              
141             =cut
142              
143             # In hindsight this is embarrassingly monolithic.
144             sub retrieve {
145 0     0 1   my $self = shift;
146 0   0       my $catchup = shift || 0;
147 0           my %msgs = ();
148 0           my $lock_file = $self->{db}."_lock";
149 0 0         open(LOCK, ">".$lock_file) or die "Can't open lock file, '$lock_file': $!";
150 0 0         flock(LOCK, LOCK_EX|LOCK_NB) or exit;
151              
152 0           tie %msgs, 'SDBM_File', $self->{db}, O_CREAT|O_RDWR, 0644;
153              
154 0           my $ua = LWP::UserAgent->new;
155 0           $ua->env_proxy();
156              
157 0           my $res = $ua->request(HTTP::Request->new('GET', $self->{bbs_url}));
158 0 0         if ($res->is_error) {
159 0           my $now = time;
160 0 0         if (defined($msgs{ERROR_TIME})) {
161 0 0         if ($now - $msgs{ERROR_TIME} > $self->{warn_timeout}) {
162 0           $self->_mail_error("Unable to retrieve the page\n",
163             $self->{bbs_url},
164 0           "\nfor over ${\($self->{warn_timeout}/3600.0)} hours. Will keep trying.\n",
165             " ---- Server Error Response ----\n",
166             $res->error_as_HTML,);
167 0           $msgs{ERROR_TIME} = $now;
168             }
169             } else {
170 0           $msgs{ERROR_TIME} = $now;
171             }
172             } else {
173 0           my $err = '';
174 0           my $content = $res->content;
175 0 0         print STDERR "Retrieved index successfully.\n" if $self->{verbose} > 1;
176 0           my @articles = $self->article_list(\$content);
177 0 0         print STDERR "Found ", scalar(@articles), " articles.\n"
178             if $self->{verbose} > 1;
179 0           my $ct = 0;
180 0           foreach my $art_url (sort @articles) {
181 0 0 0       next if defined $msgs{$art_url} and $msgs{$art_url} > 0;
182 0 0         if ($catchup) {
183 0 0         print STDERR "Marking $art_url as read\n" if $self->{verbose};
184 0           $msgs{$art_url} = time;
185 0 0 0       exit if defined $self->{max_articles} and
186             ++$ct >= $self->{max_articles};
187 0           next;
188             }
189 0           $res = $ua->request(HTTP::Request->new('GET', $art_url));
190 0 0         if ($res->is_success) {
191 0 0         print STDERR "Sending $art_url\n" if $self->{verbose};
192 0           my $content = $res->content;
193 0           my ($type, $data) = $self->process_article(\$content);
194 0           my %opts = (To => $self->{addr},
195             Subject => $art_url,
196             Type => $type,
197             Data => $$data,
198             'Message-Id' => "<$art_url>");
199             {
200             # There is a very real and legitimate possibility of unitialized
201             # values in this block. Turn off warnings.
202 0           local $ = 0;
  0            
203 0           my ($from, $name, $subj, $timestamp, $reference) =
204             $self->get_header_info(\$content);
205 0           my $new_from = 0;
206 0 0 0       if ($from and $name) {
    0          
    0          
207 0           $new_from = "\"$name\" <$from>";
208             } elsif ($from) {
209 0           $new_from = $from;
210             } elsif ($name) {
211 0           $new_from = "\"$name\"";
212             }
213 0 0         $opts{From} = $new_from if $new_from;
214 0 0         $opts{Subject} = $subj if $subj;
215 0 0         $opts{Date} = $timestamp if $timestamp;
216 0 0         $opts{References} = "<$reference>" if $reference;
217             }
218 0           my $m = MIME::Lite->new(%opts);
219 0           $m->send;
220 0           $msgs{$art_url} = time;
221 0 0 0       exit if defined $self->{max_articles} and
222             ++$ct >= $self->{max_articles};
223             } else {
224 0 0         if (--$msgs{$art_url} <= -3) {
225 0           $self->_mail_error("Trouble retrieving $art_url. Failed 3 times. Marking as read.\n", $res->error_as_HTML);
226 0           $msgs{$art_url} = time;
227             }
228             }
229             }
230             }
231 0           untie %msgs;
232 0           flock(LOCK, LOCK_UN);
233             }
234              
235             =pod
236              
237             =head1 USER-REFINABLE METHODS
238              
239             =over 4
240              
241             =pod
242              
243             =item $b->article_list($content_ref)
244              
245             Method that returns a list of complete URLs for the articles on the bulletin
246             board. It is passed a reference to the contents of the bbs_url page. The base
247             version does not do anything.
248              
249             =cut
250              
251             sub article_list {
252 0     0 1   return;
253             }
254              
255             =pod
256              
257             =item $b->get_header_info($content_ref)
258              
259             Method that returns the header info for the message. It is passed a scalar
260             reference to the entire HTML for the message. The method should return a
261             list of
262              
263             * the poster's email address
264             * the poster's name
265             * the article's subject
266             * the article's timestamp
267             * any response-to message URL
268              
269             Any values in the return list can be undef, but the more info returned, the
270             more useful the email headers will be. The base version of the method doesn't
271             do anything.
272              
273             =cut
274              
275             sub get_header_info {
276 0     0 1   return;
277             }
278              
279             =pod
280              
281             =item $b->process_article($content_ref)
282              
283             Method that is used to process the article before it is mailed. It is passed a
284             reference to the contents of the article. It should return a list of the MIME
285             type of the article and a reference to the contents of the article. For
286             example, you could refine this method to run the article through
287             B so that text messages are sent instead of HTML ones. The
288             default method returns the list of C and its argument untouched.
289              
290             =back
291              
292             =cut
293              
294             sub process_article {
295 0     0 1   shift; # get rid of "$self"
296 0           return ('text/html', @_);
297             }
298              
299             ################################# Internal methods #########################
300              
301             sub _mail_error {
302 0     0     my $self = shift;
303 0           my @data = @_;
304              
305 0 0         if ($self->{addr}) {
306 0           MIME::Lite->new(To=>$self->{addr}, Subject=>'BBSWatch Error!',
307             Type=>'TEXT', Data=>join('', @data))->send;
308             }
309             }
310              
311             ############################## End Internal methods ########################
312              
313             1;
314              
315             __END__