File Coverage

blib/lib/SpamcupNG.pm
Criterion Covered Total %
statement 33 218 15.1
branch 0 102 0.0
condition 0 20 0.0
subroutine 11 14 78.5
pod 3 3 100.0
total 47 357 13.1


line stmt bran cond sub pod time code
1             package SpamcupNG;
2 2     2   22513 use warnings;
  2         6  
  2         85  
3 2     2   14 use strict;
  2         5  
  2         64  
4 2     2   1438 use LWP::UserAgent 6.05;
  2         107932  
  2         78  
5 2     2   1313 use HTML::Form 6.03;
  2         36885  
  2         92  
6 2     2   1364 use HTTP::Cookies 6.01;
  2         18399  
  2         97  
7 2     2   4960 use Getopt::Std;
  2         117  
  2         188  
8 2     2   1363 use HTML::Entities 3.69;
  2         15897  
  2         273  
9 2     2   1295 use YAML::XS 0.62 qw(LoadFile);
  2         6405  
  2         156  
10 2     2   23 use File::Spec;
  2         6  
  2         92  
11 2     2   1546 use Hash::Util qw(lock_hash);
  2         6647  
  2         18  
12 2     2   288 use Exporter 'import';
  2         7  
  2         5389  
13              
14             our @EXPORT_OK = qw(read_config main_loop get_browser %MAP);
15              
16             our %MAP = (
17             'nothing' => 'n',
18             'all' => 'a',
19             'stupid' => 's',
20             'quiet' => 'q',
21             'alt_code' => 'c',
22             'alt_user' => 'l',
23             'info_level' => 'd',
24             'debug_level' => 'D'
25             );
26              
27             lock_hash(%MAP);
28              
29             our $VERSION = '0.6'; # VERSION
30              
31             =head1 NAME
32              
33             SpamcupNG - module to export functions for spamcup program
34              
35             =head1 SYNOPSIS
36              
37             use SpamcupNG qw(read_config get_browser);
38              
39             =head1 DESCRIPTION
40              
41             Spamcup NG is a Perl web crawler for finishing Spamcop.net reports automatically. This module implements the functions used by the spamcup program.
42              
43             See the README.md file on this project for more details.
44              
45             See the INSTALL for setup instructions.
46              
47             =head1 EXPORTS
48              
49             =head2 read_config
50              
51             Reads a YAML file, sets the command line options and return the associated accounts.
52              
53             Expects as parameter a string with the full path to the YAML file and a hash reference of the
54             command line options read (as returned by L C function).
55              
56             The hash reference options will set as defined in the YAML file.
57             Options defined in the YAML have preference of those read on the command line then.
58              
59             It will also return all data configured in the C section of the YAML file as a hash refence. Check the README.md file for more details about
60             the configuration file.
61              
62             =cut
63              
64             sub read_config {
65 0     0 1   my ( $cfg, $cmd_opts ) = @_;
66 0           my $data = LoadFile($cfg);
67              
68 0           for my $opt ( keys(%MAP) ) {
69              
70 0 0 0       if ( exists( $data->{ExecutionOptions}->{$opt} )
71             and ( $data->{ExecutionOptions}->{$opt} eq 'y' ) )
72             {
73 0           $cmd_opts->{$opt} = 1;
74             }
75             else {
76 0           $cmd_opts->{$opt} = 0;
77             }
78              
79             }
80              
81 0           return $data->{Accounts};
82             }
83              
84             =pod
85              
86             =head2 get_browser
87              
88             Creates a instance of L and returns it.
89              
90             Expects two string as parameters: one with the name to associated with the user
91             agent and the another as version of it.
92              
93             =cut
94              
95             # :TODO:23/04/2017 17:21:28:ARFREITAS: Add options to configure nice things
96             # like HTTP proxy
97              
98             sub get_browser {
99 0     0 1   my ( $name, $version ) = @_;
100 0           my $ua = LWP::UserAgent->new();
101 0           $ua->agent("$name/$version");
102 0           $ua->cookie_jar( HTTP::Cookies->new() );
103 0           return $ua;
104             }
105              
106             =pod
107              
108             =head2 main_loop
109              
110             Processes all the pending spam reports in a loop until finished.
111              
112             Expects as parameter (in this sequence):
113              
114             =over
115              
116             =item *
117              
118             a L instance
119              
120             =item *
121              
122             A hash reference with the following key/values:
123              
124             =over
125              
126             =item *
127              
128             ident => The identity to Spamcop
129              
130             =item *
131              
132             pass => The password to Spamcop
133              
134             =item *
135              
136             debug => true (1) or false (0) to enable/disable debug information
137              
138             =item *
139              
140             delay => time in seconds to wait for next iteration with Spamcop website
141              
142             =item *
143              
144             quiet => true (1) or false (0) to enable/disable messages
145              
146             As confusing as it seems, current implementation may accept debug messages
147             B disable other messages.
148              
149             =item *
150              
151             check_only => true (1) or false (0) to only check for unreported SPAM, but not reporting them
152              
153             =back
154              
155             =back
156              
157             Returns true if everything went right, or C if a fatal error happened.
158              
159             =cut
160              
161             # :TODO:23/04/2017 16:04:17:ARFREITAS: probably this sub is too large
162             # It should be refactored to at least separate the parsing from HTML content recover
163             sub main_loop {
164 0     0 1   my ( $ua, $opts_ref ) = @_;
165              
166             # last seen SPAM id
167 0           my $last_seen;
168              
169             # Get first page that contains link to next one...
170              
171             # :TODO:23/04/2017 17:06:59:ARFREITAS: replace all this debugging checks with Log::Log4perl
172 0 0         if ( $opts_ref->{debug} ) {
173 0 0         if ( $opts_ref->{pass} ) {
174             print 'D: GET http://', $opts_ref->{ident},
175 0           ':******@members.spamcop.net/', "\n";
176             }
177             else {
178             print 'D: GET http://www.spamcop.net/?code=', $opts_ref->{ident},
179 0           "\n";
180             }
181             }
182              
183 0 0         if ( $opts_ref->{debug} ) {
184 0           print 'D: sleeping for ', $opts_ref->{delay}, " seconds.\n";
185             }
186              
187 0           sleep $opts_ref->{delay};
188              
189 0           my $req;
190              
191 0 0         if ( $opts_ref->{pass} ) {
192 0           $req = HTTP::Request->new( GET => 'http://members.spamcop.net/' );
193 0           $req->authorization_basic( $opts_ref->{ident}, $opts_ref->{pass} );
194             }
195             else {
196             $req =
197             HTTP::Request->new(
198 0           GET => 'http://www.spamcop.net/?code=' . $opts_ref->{ident} );
199             }
200              
201 0           my $res = $ua->request($req);
202              
203             # verify response
204 0 0         if ( $res->is_success ) {
205 0 0         if ( $opts_ref->{debug} ) {
206 0           print "D: Got HTTP response\n";
207             }
208             }
209             else {
210 0           my $response = $res->status_line();
211 0 0         if ( $response =~ /500/ ) {
212 0           die "E: Can\'t connect to server: " . $response;
213             }
214             else {
215 0           warn $response;
216 0           die
217             "E: Can\'t connect to server or invalid credentials. Please verify your username and password and try again.\n";
218             }
219             }
220              
221 0 0         if ( $opts_ref->{debug} ) {
222 0           print
223             "\n--------------------------------------------------------------------------\n";
224 0           print $res->content;
225 0           print
226             "--------------------------------------------------------------------------\n\n";
227             }
228              
229             # Parse id for link
230 0 0         if ( $res->content =~ /\>No userid found\
231 0           die
232             "E: No userid found. Please check that you have entered correct code. Also consider obtaining a password to Spamcop.net instead of using the old-style authorization token.\n";
233             }
234              
235 0           my $fullname;
236              
237 0 0         if ( $res->content =~ /(Welcome, .*?)\./ ) {
238              
239             # found full name, print out the greeting string
240 0           print "* $1\n";
241             }
242              
243 0           my $nextid;
244              
245 0 0         if ( $res->content =~ /sc\?id\=(.*?)\"\>/gi ) { # this is easy to parse
246             # userid ok, new spam available
247 0           $nextid = $1;
248             }
249             else {
250             # userid ok, no new spam
251 0 0         unless ( $opts_ref->{quiet} ) {
252 0           print "* No unreported spam found. Quitting.\n";
253             }
254 0           return -1; # quit
255             }
256              
257 0 0         if ( $opts_ref->{quiet} ) {
258 0           print "* ID of the next spam is '$nextid'.\n";
259             }
260              
261             # avoid loops
262 0 0 0       if ( ($last_seen) and ( $nextid eq $last_seen ) ) {
263 0           die
264             "E: I have seen this ID earlier. We don't want to report it again. This usually happens because of a bug in Spamcup. Make sure you use latest version! You may also want to go check from Spamcop what's happening: http://www.spamcop.net/sc?id=$nextid\n";
265             }
266              
267 0           $last_seen = $nextid; # store for comparison
268              
269 0           $req = undef;
270 0           $res = undef;
271              
272             # Fetch the spam report form
273              
274 0 0         if ( $opts_ref->{debug} ) {
275 0           print "D: GET http://www.spamcop.net/sc?id=$nextid\n";
276 0           print 'D: Sleeping for ', $opts_ref->{delay}, " seconds.\n";
277             }
278              
279 0           sleep $opts_ref->{delay};
280              
281 0           $req =
282             HTTP::Request->new( GET => 'http://www.spamcop.net/sc?id=' . $nextid );
283 0           $res = $ua->request($req);
284              
285 0 0         if ( $res->is_success ) {
286 0 0         if ( $opts_ref->{debug} ) {
287 0           print "D: Got HTTP response\n";
288 0           print "D: Headers follow:\n" . $res->headers->as_string . "\n\n";
289             }
290              
291             }
292             else {
293 0           die "E: Can't connect to server. Try again later.\n\n";
294             }
295              
296 0 0         if ( $opts_ref->{debug} ) {
297 0           print
298             "\n--------------------------------------------------------------------------\n";
299 0           print $res->content;
300 0           print
301             "--------------------------------------------------------------------------\n\n";
302             }
303              
304             # parse the spam
305              
306 0           my $_cancel = 0;
307              
308 0           my $base_uri = $res->base();
309 0 0         if ( !$base_uri ) {
310 0           print "E: No base uri found. Internal error? Please report this.\n";
311 0           exit;
312             }
313              
314             $res->content =~
315 0           /(\
]+name=\"sendreport\"\>.*?\<\/form\>)/sgi;
316 0           my $formdata = "$1";
317 0           my $form = HTML::Form->parse( $formdata, $base_uri );
318              
319             # print the header of the spam
320              
321 0 0         if ( $res->content =~
    0          
    0          
    0          
    0          
322             /Please make sure this email IS spam.*?size=2\>\n(.*?)\
323             )
324             {
325              
326 0 0         unless ( $opts_ref->{quiet} ) {
327 0           my $spamhead = decode_entities($1);
328 0           print "* Head of the spam follows >>>\n";
329 0           $spamhead =~ s/\n/\t/igs; # prepend a tab to each line
330 0           $spamhead =~ s/<\/?strong>//gi;
331 0           $spamhead =~ s/
/\n/gsi;
332 0           $spamhead =~ s/<\/?font>//gi;
333 0           binmode( STDOUT, ":utf8" );
334 0           print "\t$spamhead\n";
335 0           print "<<<\n";
336             }
337              
338             # parse form fields
339             # verify form
340 0 0         unless ($form) {
341 0 0         if ( $opts_ref->{debug} ) {
342 0           print
343             "D: Spamcop returned invalid HTML form. Usually temporary error.\n";
344             }
345 0           die "E: Temporary Spamcop.net error. Try again later! Quitting.\n";
346             }
347             else {
348 0 0         if ( $opts_ref->{debug} ) {
349 0           print "D: Form data follows:\n" . $form->dump . "\n\n";
350             }
351              
352             # how many recepients for reports
353 0           my $max = $form->value("max");
354              
355 0           my $willsend;
356             my $wontsend;
357              
358             # iterate targets
359 0           for ( my $i = 1 ; $i <= $max ; $i++ ) {
360 0           my $send = $form->value("send$i");
361 0           my $type = $form->value("type$i");
362 0           my $master = $form->value("master$i");
363 0           my $info = $form->value("info$i");
364              
365             # convert %2E -style stuff back to text, if any
366 0 0         if ( $info =~ /%([A-Fa-f\d]{2})/g ) {
367 0           $info =~ s/%([A-Fa-f\d]{2})/chr hex $1/eg;
  0            
368             }
369              
370 0 0 0       if (
      0        
371             $send
372             and ( ( $send eq 'on' )
373             or ( $type =~ /^mole/ and $send == 1 ) )
374             )
375             {
376 0           $willsend .= "\t$master \t($info)\n";
377             }
378             else {
379 0           $wontsend .= "\t$master \t($info)\n";
380             }
381             }
382              
383 0           print
384             "Would send the report to the following addresses: (Reason in parenthesis)\n";
385 0 0         if ($willsend) {
386 0           print $willsend;
387             }
388             else {
389 0           print "\t--none--\n";
390             }
391              
392 0           print "Following addresses would not be used:\n";
393 0 0         if ($wontsend) {
394 0           print $wontsend;
395             }
396             else {
397 0           print "\t--none--\n";
398             }
399              
400             }
401              
402             # Run without confirming each spam? Stupid. :)
403 0 0         unless ( $opts_ref->{stupid} ) {
404 0           print "* Are you sure this is spam? [y/N] ";
405              
406 0           my $reply = <>; # this should be done differently!
407 0 0 0       if ( $reply && $reply !~ /^y/i ) {
    0          
408 0           print "* Cancelled.\n";
409 0           $_cancel = 1; # mark to be cancelled
410             }
411             elsif ( !$reply ) {
412 0           print "* Accepted.\n";
413             }
414             else {
415 0           print "* Accepted.\n";
416             }
417             }
418             else {
419             # little delay for automatic processing
420 0           sleep $opts_ref->{delay};
421             }
422 0           print "...\n";
423              
424             }
425             elsif ( $res->content =~ /Send Spam Report\(S\) Now/gi ) {
426              
427             # this happens rarely, but I've seen this; spamcop does not show preview headers for some reason
428 0 0         unless ( $opts_ref->{stupid} ) {
429 0           print
430             "* Preview headers not available, but you can still report this. Are you sure this is spam? [y/N] ";
431              
432 0           my $reply = <>;
433 0 0 0       if ( $reply && $reply !~ /^y/i ) {
434              
435             # not Y
436 0           print "* Cancelled.\n";
437 0           $_cancel = 1; # mark to be cancelled
438             }
439             else {
440             # Y
441 0           print "* Accepted.\n";
442             }
443             }
444              
445             }
446             elsif ( $res->content =~
447             /Sorry, this email is too old.*This mail was received on (.*?)\<\/.*\>/gsi
448             )
449             {
450             # perhaps it's too old then
451 0           my $ondate = $1;
452 0 0         unless ( $opts_ref->{quiet} ) {
453 0           print
454             "W: This spam is too old. You must report spam within 3 days of receipt. This mail was received on $ondate. Deleted.\n";
455             }
456 0           return 0;
457              
458             }
459             elsif ( $res->content =~
460             /click reload if this page does not refresh automatically in \n(\d+) seconds/gs
461             )
462             {
463 0           my $delay = $1;
464 0           print
465             "W: Spamcop seems to be currently overloaded. Trying again in $delay seconds. Wait...\n";
466 0           sleep $opts_ref->{delay};
467              
468             # fool it to avoid duplicate detector
469 0           $last_seen = 0;
470              
471             # fake that everything is ok
472 0           return 1;
473             }
474             elsif ( $res->content =~
475             /No source IP address found, cannot proceed. Not full header/gs )
476             {
477 0           print
478             "W: No source IP address found. Your report might be missing headers. Skipping.\n";
479 0           return 0;
480             }
481              
482             else {
483             # Shit happens. If you know it should be parseable, please report a bug!
484 0           print
485             "W: Can't parse Spamcop.net's HTML. If this does not happen very often you can ignore this warning. Otherwise check if there's new version available. Skipping.\n";
486 0           return 0;
487             }
488              
489 0 0         if ( $opts_ref->{check_only} ) {
490 0           print
491             "* You gave option -n, so we'll stop here. The spam was NOT reported.\n";
492 0           exit;
493             }
494              
495 0 0         if ( $opts_ref->{debug} ) {
496 0           print "\n\nD: Starting the parse phase...\n";
497             }
498              
499 0           undef $req;
500 0           undef $res;
501              
502             # Submit the form to Spamcop OR cancel report
503              
504 0 0         if ( !$_cancel ) { # SUBMIT spam
505              
506 0 0         if ( $opts_ref->{debug} ) {
507 0           print "D: Submitting form. We will use the default recipients.\n";
508 0           print "D: GET http://www.spamcop.net/sc?id=$nextid\n";
509 0           print 'D: Sleeping for ', $opts_ref->{delay}, " seconds.\n";
510             }
511 0           sleep $opts_ref->{delay};
512 0           $res = LWP::UserAgent->new->request( $form->click() )
513             ; # click default button, submit
514             }
515             else { # CANCEL SPAM
516 0 0         if ( $opts_ref->{debug} ) {
517 0           print "D: About to cancel report.\n";
518             }
519 0           $res = LWP::UserAgent->new->request( $form->click('cancel') )
520             ; # click cancel button
521             }
522              
523             # Check the outcome of the response
524 0 0         if ( $res->is_success ) {
525 0 0         if ( $opts_ref->{debug} ) {
526 0           print "D: Got HTTP response\n";
527 0           print "D: -- content follows -------------------------\n";
528 0           print $res->content;
529 0           print "D: -- content ended -------------------------\n\n";
530             }
531              
532             }
533             else {
534 0           die "E: Can't connect to server. Try again later. Quitting.\n";
535             }
536              
537 0 0         if ($_cancel) {
538 0           return 1; # user decided this mail is not spam
539             }
540              
541             # parse respond
542 0           my $report;
543 0 0         if ( $res->content =~ /(Spam report id .*?)\/gsi ) {
    0          
544 0   0       $report = $1 || "-none-\n";
545 0           $report =~ s/\//gi;
546             }
547             elsif ( $res->content =~ /report for mole\@devnull.spamcop.net/ ) {
548 0           $report = 'Mole report(s)';
549             }
550             else {
551 0           print
552             "W: Spamcop.net returned unexpected content. If this does not happen very often you can ignore this. Otherwise check if there new version available. Continuing.\n";
553             }
554              
555             # print the report
556              
557 0 0         unless ( $opts_ref->{quiet} ) {
558 0           print "Spamcop.net sent following spam reports:\n";
559 0 0         print "$report\n" if $report;
560 0           print "* Finished processing.\n";
561             }
562              
563 0           return 1;
564              
565             # END OF THE LOOP
566             }
567              
568             =head1 AUTHOR
569              
570             Alceu Rodrigues de Freitas Junior, Earfreitas@cpan.orgE
571              
572             =head1 COPYRIGHT AND LICENSE
573              
574             This software is copyright (c) 2015 of Alceu Rodrigues de Freitas Junior, Earfreitas@cpan.orgE
575              
576             This file is part of spamcupNG distribution.
577              
578             spamcupNG is free software: you can redistribute it and/or modify
579             it under the terms of the GNU General Public License as published by
580             the Free Software Foundation, either version 3 of the License, or
581             (at your option) any later version.
582              
583             spamcupNG is distributed in the hope that it will be useful,
584             but WITHOUT ANY WARRANTY; without even the implied warranty of
585             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
586             GNU General Public License for more details.
587              
588             You should have received a copy of the GNU General Public License
589             along with spamcupNG. If not, see .
590              
591             =cut
592              
593             1;