File Coverage

blib/lib/SpamcupNG.pm
Criterion Covered Total %
statement 30 212 14.1
branch 0 102 0.0
condition 0 20 0.0
subroutine 10 13 76.9
pod 3 3 100.0
total 43 350 12.2


line stmt bran cond sub pod time code
1             package SpamcupNG;
2 2     2   15026 use warnings;
  2         5  
  2         68  
3 2     2   10 use strict;
  2         4  
  2         56  
4 2     2   1088 use LWP::UserAgent 6.05;
  2         70961  
  2         69  
5 2     2   1131 use HTML::Form 6.03;
  2         25441  
  2         71  
6 2     2   923 use HTTP::Cookies 6.01;
  2         10151  
  2         55  
7 2     2   2820 use Getopt::Std;
  2         71  
  2         116  
8 2     2   657 use YAML::XS 0.62 qw(LoadFile);
  2         4181  
  2         100  
9 2     2   13 use File::Spec;
  2         4  
  2         48  
10 2     2   946 use Hash::Util qw(lock_hash);
  2         4029  
  2         19  
11 2     2   154 use Exporter 'import';
  2         4  
  2         3177  
12              
13             our @EXPORT_OK = qw(read_config main_loop get_browser %MAP);
14              
15             our %MAP = (
16             'nothing' => 'n',
17             'all' => 'a',
18             'stupid' => 's',
19             'quiet' => 'q',
20             'alt_code' => 'c',
21             'alt_user' => 'l',
22             'info_level' => 'd',
23             'debug_level' => 'D'
24             );
25              
26             lock_hash(%MAP);
27              
28             our $VERSION = '0.4'; # VERSION
29              
30             =head1 NAME
31              
32             SpamcupNG - module to export functions for spamcup program
33              
34             =head1 SYNOPSIS
35              
36             use SpamcupNG qw(read_config get_browser);
37              
38             =head1 DESCRIPTION
39              
40             Spamcup NG is a Perl web crawler for finishing Spamcop.net reports automatically. This module implements the functions used by the spamcup program.
41              
42             See the README.md file on this project for more details.
43              
44             See the INSTALL for setup instructions.
45              
46             =head1 EXPORTS
47              
48             =head2 read_config
49              
50             Reads a YAML file, sets the command line options and return the associated accounts.
51              
52             Expects as parameter a string with the full path to the YAML file and a hash reference of the
53             command line options read (as returned by L C function).
54              
55             The hash reference options will set as defined in the YAML file.
56             Options defined in the YAML have preference of those read on the command line then.
57              
58             It will also return all data configured in the C section of the YAML file as a hash refence. Check the README.md file for more details about
59             the configuration file.
60              
61             =cut
62              
63             sub read_config {
64 0     0 1   my ( $cfg, $cmd_opts ) = @_;
65 0           my $data = LoadFile($cfg);
66              
67 0           for my $opt ( keys(%MAP) ) {
68              
69 0 0 0       if ( exists( $data->{ExecutionOptions}->{$opt} )
70             and ( $data->{ExecutionOptions}->{$opt} eq 'y' ) )
71             {
72 0           $cmd_opts->{$opt} = 1;
73             }
74             else {
75 0           $cmd_opts->{$opt} = 0;
76             }
77              
78             }
79              
80 0           return $data->{Accounts};
81             }
82              
83             =pod
84              
85             =head2 get_browser
86              
87             Creates a instance of L and returns it.
88              
89             Expects two string as parameters: one with the name to associated with the user
90             agent and the another as version of it.
91              
92             =cut
93              
94             # :TODO:23/04/2017 17:21:28:ARFREITAS: Add options to configure nice things
95             # like HTTP proxy
96              
97             sub get_browser {
98 0     0 1   my ( $name, $version ) = @_;
99 0           my $ua = LWP::UserAgent->new();
100 0           $ua->agent("$name/$version");
101 0           $ua->cookie_jar( HTTP::Cookies->new() );
102 0           return $ua;
103             }
104              
105             =pod
106              
107             =head2 main_loop
108              
109             Processes all the pending spam reports in a loop until finished.
110              
111             Expects as parameter (in this sequence):
112              
113             =over
114              
115             =item *
116              
117             a L instance
118              
119             =item *
120              
121             A hash reference with the following key/values:
122              
123             =over
124              
125             =item *
126              
127             ident => The identity to Spamcop
128              
129             =item *
130              
131             pass => The password to Spamcop
132              
133             =item *
134              
135             debug => true (1) or false (0) to enable/disable debug information
136              
137             =item *
138              
139             delay => time in seconds to wait for next iteration with Spamcop website
140              
141             =item *
142              
143             quiet => true (1) or false (0) to enable/disable messages
144              
145             As confusing as it seems, current implementation may accept debug messages
146             B disable other messages.
147              
148             =item *
149              
150             check_only => true (1) or false (0) to only check for unreported SPAM, but not reporting them
151              
152             =back
153              
154             =back
155              
156             Returns true if everything went right, or C if a fatal error happened.
157              
158             =cut
159              
160             # :TODO:23/04/2017 16:04:17:ARFREITAS: probably this sub is too large
161             # It should be refactored to at least separate the parsing from HTML content recover
162             sub main_loop {
163 0     0 1   my ( $ua, $opts_ref ) = @_;
164              
165             # last seen SPAM id
166 0           my $last_seen;
167              
168             # Get first page that contains link to next one...
169              
170             # :TODO:23/04/2017 17:06:59:ARFREITAS: replace all this debugging checks with Log::Log4perl
171 0 0         if ( $opts_ref->{debug} ) {
172 0 0         if ( $opts_ref->{pass} ) {
173             print 'D: GET http://', $opts_ref->{ident},
174 0           ':******@members.spamcop.net/', "\n";
175             }
176             else {
177             print 'D: GET http://www.spamcop.net/?code=', $opts_ref->{ident},
178 0           "\n";
179             }
180             }
181              
182 0 0         if ( $opts_ref->{debug} ) {
183 0           print 'D: sleeping for ', $opts_ref->{delay}, " seconds.\n";
184             }
185              
186 0           sleep $opts_ref->{delay};
187              
188 0           my $req;
189              
190 0 0         if ( $opts_ref->{pass} ) {
191 0           $req = HTTP::Request->new( GET => 'http://members.spamcop.net/' );
192 0           $req->authorization_basic( $opts_ref->{ident}, $opts_ref->{pass} );
193             }
194             else {
195             $req =
196             HTTP::Request->new(
197 0           GET => 'http://www.spamcop.net/?code=' . $opts_ref->{ident} );
198             }
199              
200 0           my $res = $ua->request($req);
201              
202             # verify response
203 0 0         if ( $res->is_success ) {
204 0 0         if ( $opts_ref->{debug} ) {
205 0           print "D: Got HTTP response\n";
206             }
207             }
208             else {
209 0           my $response = $res->status_line();
210 0 0         if ( $response =~ /500/ ) {
211 0           die "E: Can\'t connect to server: " . $response;
212             }
213             else {
214 0           warn $response;
215 0           die
216             "E: Can\'t connect to server or invalid credentials. Please verify your username and password and try again.\n";
217             }
218             }
219              
220 0 0         if ( $opts_ref->{debug} ) {
221 0           print
222             "\n--------------------------------------------------------------------------\n";
223 0           print $res->content;
224 0           print
225             "--------------------------------------------------------------------------\n\n";
226             }
227              
228             # Parse id for link
229 0 0         if ( $res->content =~ /\>No userid found\
230 0           die
231             "E: No userid found. Please check that you have entered correct code. Also consider obtaining a password to Spamcop.net instead of using the old-style authorization token.\n";
232             }
233              
234 0           my $fullname;
235              
236 0 0         if ( $res->content =~ /(Welcome, .*?)\./ ) {
237              
238             # found full name, print out the greeting string
239 0           print "* $1\n";
240             }
241              
242 0           my $nextid;
243              
244 0 0         if ( $res->content =~ /sc\?id\=(.*?)\"\>/gi ) { # this is easy to parse
245             # userid ok, new spam available
246 0           $nextid = $1;
247             }
248             else {
249             # userid ok, no new spam
250 0 0         unless ( $opts_ref->{quiet} ) {
251 0           print "* No unreported spam found. Quitting.\n";
252             }
253 0           return -1; # quit
254             }
255              
256 0 0         if ( $opts_ref->{quiet} ) {
257 0           print "* ID of the next spam is '$nextid'.\n";
258             }
259              
260             # avoid loops
261 0 0 0       if ( ($last_seen) and ( $nextid eq $last_seen ) ) {
262 0           die
263             "E: I have seen this ID earlier. We don't want to report it again. This usually happens because of a bug in Spamcup. Make sure you use latest version! You may also want to go check from Spamcop what's happening: http://www.spamcop.net/sc?id=$nextid\n";
264             }
265              
266 0           $last_seen = $nextid; # store for comparison
267              
268 0           $req = undef;
269 0           $res = undef;
270              
271             # Fetch the spam report form
272              
273 0 0         if ( $opts_ref->{debug} ) {
274 0           print "D: GET http://www.spamcop.net/sc?id=$nextid\n";
275 0           print 'D: Sleeping for ', $opts_ref->{delay}, " seconds.\n";
276             }
277              
278 0           sleep $opts_ref->{delay};
279              
280 0           $req =
281             HTTP::Request->new( GET => 'http://www.spamcop.net/sc?id=' . $nextid );
282 0           $res = $ua->request($req);
283              
284 0 0         if ( $res->is_success ) {
285 0 0         if ( $opts_ref->{debug} ) {
286 0           print "D: Got HTTP response\n";
287              
288             # print "D: Headers follow:\n". $res->headers->as_string ."\n\n";
289             }
290              
291             }
292             else {
293 0           die "E: Can't connect to server. Try again later.\n\n";
294             }
295              
296 0 0         if ( $opts_ref->{debug} ) {
297 0           print
298             "\n--------------------------------------------------------------------------\n";
299 0           print $res->content;
300 0           print
301             "--------------------------------------------------------------------------\n\n";
302             }
303              
304             # parse the spam
305              
306 0           my $_cancel = 0;
307              
308 0           my $base_uri = $res->base();
309 0 0         if ( !$base_uri ) {
310 0           print "E: No base uri found. Internal error? Please report this.\n";
311 0           exit;
312             }
313              
314             $res->content =~
315 0           /(\
]+name=\"sendreport\"\>.*?\<\/form\>)/sgi;
316 0           my $formdata = "$1";
317 0           my $form = HTML::Form->parse( $formdata, $base_uri );
318              
319             # print the header of the spam
320              
321 0           my $spamhead;
322 0 0         if ( $res->content =~
    0          
    0          
    0          
    0          
323             /Please make sure this email IS spam.*?size=2\>\n(.*?)\
324             )
325             { # this is also quite easy...
326             # this is the normal case
327              
328 0           $spamhead = $1;
329 0 0         unless ( $opts_ref->{quiet} ) {
330 0           print "* Head of the spam follows >>>\n";
331 0           $spamhead =~ s/\n/\t/igs; # prepend a tab to each line
332 0           $spamhead =~ s/
/\n/gsi; # simplify a bit
333 0           print "\t$spamhead\n";
334 0           print "<<<\n";
335             }
336              
337             # parse form fields
338             # verify form
339 0 0         unless ($form) {
340 0 0         if ( $opts_ref->{debug} ) {
341 0           print
342             "D: Spamcop returned invalid HTML form. Usually temporary error.\n";
343             }
344 0           die "E: Temporary Spamcop.net error. Try again later! Quitting.\n";
345             }
346             else {
347 0 0         if ( $opts_ref->{debug} ) {
348 0           print "D: Form data follows:\n" . $form->dump . "\n\n";
349             }
350              
351             # how many recepients for reports
352 0           my $max = $form->value("max");
353              
354 0           my $willsend;
355             my $wontsend;
356              
357             # iterate targets
358 0           for ( my $i = 1 ; $i <= $max ; $i++ ) {
359 0           my $send = $form->value("send$i");
360 0           my $type = $form->value("type$i");
361 0           my $master = $form->value("master$i");
362 0           my $info = $form->value("info$i");
363              
364             # convert %2E -style stuff back to text, if any
365 0 0         if ( $info =~ /%([A-Fa-f\d]{2})/g ) {
366 0           $info =~ s/%([A-Fa-f\d]{2})/chr hex $1/eg;
  0            
367             }
368              
369 0 0 0       if (
      0        
370             $send
371             and ( ( $send eq 'on' )
372             or ( $type =~ /^mole/ and $send == 1 ) )
373             )
374             {
375 0           $willsend .= "\t$master \t($info)\n";
376             }
377             else {
378 0           $wontsend .= "\t$master \t($info)\n";
379             }
380             }
381              
382 0           print
383             "Would send the report to the following addresses: (Reason in parenthesis)\n";
384 0 0         if ($willsend) {
385 0           print $willsend;
386             }
387             else {
388 0           print "\t--none--\n";
389             }
390              
391 0           print "Following addresses would not be used:\n";
392 0 0         if ($wontsend) {
393 0           print $wontsend;
394             }
395             else {
396 0           print "\t--none--\n";
397             }
398              
399             }
400              
401             # Run without confirming each spam? Stupid. :)
402 0 0         unless ( $opts_ref->{stupid} ) {
403 0           print "* Are you sure this is spam? [y/N] ";
404              
405 0           my $reply = <>; # this should be done differently!
406 0 0 0       if ( $reply && $reply !~ /^y/i ) {
    0          
407 0           print "* Cancelled.\n";
408 0           $_cancel = 1; # mark to be cancelled
409             }
410             elsif ( !$reply ) {
411 0           print "* Accepted.\n";
412             }
413             else {
414 0           print "* Accepted.\n";
415             }
416             }
417             else {
418             # little delay for automatic processing
419 0           sleep $opts_ref->{delay};
420             }
421 0           print "...\n";
422              
423             }
424             elsif ( $res->content =~ /Send Spam Report\(S\) Now/gi ) {
425              
426             # this happens rarely, but I've seen this; spamcop does not show preview headers for some reason
427 0 0         unless ( $opts_ref->{stupid} ) {
428 0           print
429             "* Preview headers not available, but you can still report this. Are you sure this is spam? [y/N] ";
430              
431 0           my $reply = <>;
432 0 0 0       if ( $reply && $reply !~ /^y/i ) {
433              
434             # not Y
435 0           print "* Cancelled.\n";
436 0           $_cancel = 1; # mark to be cancelled
437             }
438             else {
439             # Y
440 0           print "* Accepted.\n";
441             }
442             }
443              
444             }
445             elsif ( $res->content =~
446             /Sorry, this email is too old.*This mail was received on (.*?)\<\/.*\>/gsi
447             )
448             {
449             # perhaps it's too old then
450 0           my $ondate = $1;
451 0 0         unless ( $opts_ref->{quiet} ) {
452 0           print
453             "W: This spam is too old. You must report spam within 3 days of receipt. This mail was received on $ondate. Deleted.\n";
454             }
455 0           return 0;
456              
457             }
458             elsif ( $res->content =~
459             /click reload if this page does not refresh automatically in \n(\d+) seconds/gs
460             )
461             {
462 0           my $delay = $1;
463 0           print
464             "W: Spamcop seems to be currently overloaded. Trying again in $delay seconds. Wait...\n";
465 0           sleep $opts_ref->{delay};
466              
467             # fool it to avoid duplicate detector
468 0           $last_seen = 0;
469              
470             # fake that everything is ok
471 0           return 1;
472             }
473             elsif ( $res->content =~
474             /No source IP address found, cannot proceed. Not full header/gs )
475             {
476 0           print
477             "W: No source IP address found. Your report might be missing headers. Skipping.\n";
478 0           return 0;
479             }
480              
481             else {
482             # Shit happens. If you know it should be parseable, please report a bug!
483 0           print
484             "W: Can't parse Spamcop.net's HTML. If this does not happen very often you can ignore this warning. Otherwise check if there's new version available. Skipping.\n";
485 0           return 0;
486             }
487              
488 0 0         if ( $opts_ref->{check_only} ) {
489 0           print
490             "* You gave option -n, so we'll stop here. The spam was NOT reported.\n";
491 0           exit;
492             }
493              
494 0 0         if ( $opts_ref->{debug} ) {
495 0           print "\n\nD: Starting the parse phase...\n";
496             }
497              
498 0           undef $req;
499 0           undef $res;
500              
501             # Submit the form to Spamcop OR cancel report
502              
503 0 0         if ( !$_cancel ) { # SUBMIT spam
504              
505 0 0         if ( $opts_ref->{debug} ) {
506 0           print "D: Submitting form. We will use the default recipients.\n";
507 0           print "D: GET http://www.spamcop.net/sc?id=$nextid\n";
508 0           print 'D: Sleeping for ', $opts_ref->{delay}, " seconds.\n";
509             }
510 0           sleep $opts_ref->{delay};
511 0           $res = LWP::UserAgent->new->request( $form->click() )
512             ; # click default button, submit
513             }
514             else { # CANCEL SPAM
515 0 0         if ( $opts_ref->{debug} ) {
516 0           print "D: About to cancel report.\n";
517             }
518 0           $res = LWP::UserAgent->new->request( $form->click('cancel') )
519             ; # click cancel button
520             }
521              
522             # Check the outcome of the response
523 0 0         if ( $res->is_success ) {
524 0 0         if ( $opts_ref->{debug} ) {
525 0           print "D: Got HTTP response\n";
526 0           print "D: -- content follows -------------------------\n";
527 0           print $res->content;
528 0           print "D: -- content ended -------------------------\n\n";
529             }
530              
531             }
532             else {
533 0           die "E: Can't connect to server. Try again later. Quitting.\n";
534             }
535              
536 0 0         if ($_cancel) {
537 0           return 1; # user decided this mail is not spam
538             }
539              
540             # parse respond
541 0           my $report;
542 0 0         if ( $res->content =~ /(Spam report id .*?)\/gsi ) {
    0          
543 0   0       $report = $1 || "-none-\n";
544 0           $report =~ s/\//gi;
545             }
546             elsif ( $res->content =~ /report for mole\@devnull.spamcop.net/ ) {
547 0           $report = 'Mole report(s)';
548             }
549             else {
550 0           print
551             "W: Spamcop.net returned unexpected content. If this does not happen very often you can ignore this. Otherwise check if there new version available. Continuing.\n";
552             }
553              
554             # print the report
555              
556 0 0         unless ( $opts_ref->{quiet} ) {
557 0           print "Spamcop.net sent following spam reports:\n";
558 0 0         print "$report\n" if $report;
559 0           print "* Finished processing.\n";
560             }
561              
562 0           return 1;
563              
564             # END OF THE LOOP
565             }
566              
567             =head1 AUTHOR
568              
569             Alceu Rodrigues de Freitas Junior, Earfreitas@cpan.orgE
570              
571             =head1 COPYRIGHT AND LICENSE
572              
573             This software is copyright (c) 2015 of Alceu Rodrigues de Freitas Junior, Earfreitas@cpan.orgE
574              
575             This file is part of spamcupNG distribution.
576              
577             spamcupNG is free software: you can redistribute it and/or modify
578             it under the terms of the GNU General Public License as published by
579             the Free Software Foundation, either version 3 of the License, or
580             (at your option) any later version.
581              
582             spamcupNG is distributed in the hope that it will be useful,
583             but WITHOUT ANY WARRANTY; without even the implied warranty of
584             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
585             GNU General Public License for more details.
586              
587             You should have received a copy of the GNU General Public License
588             along with spamcupNG. If not, see .
589              
590             =cut
591              
592             1;