File Coverage

blib/lib/CPAN/Testers/Data/Addresses.pm
Criterion Covered Total %
statement 393 447 87.9
branch 152 206 73.7
condition 69 115 60.0
subroutine 33 33 100.0
pod 13 13 100.0
total 660 814 81.0


line stmt bran cond sub pod time code
1             package CPAN::Testers::Data::Addresses;
2              
3 14     14   272547 use 5.006;
  14         53  
  14         557  
4 14     14   72 use strict;
  14         28  
  14         429  
5 14     14   78 use warnings;
  14         32  
  14         460  
6              
7 14     14   72 use vars qw($VERSION);
  14         32  
  14         964  
8             $VERSION = '0.16';
9             $|++;
10              
11             #----------------------------------------------------------------------------
12             # Library Modules
13              
14 14     14   105 use base qw(Class::Accessor::Fast);
  14         25  
  14         13183  
15              
16 14     14   82616 use CPAN::Testers::Common::DBUtils;
  14         408860  
  14         148  
17 14     14   19755 use Config::IniFiles;
  14         608299  
  14         585  
18 14     14   165 use DBI;
  14         33  
  14         623  
19 14     14   82 use File::Basename;
  14         31  
  14         1222  
20 14     14   192 use File::Path;
  14         27  
  14         719  
21 14     14   15772 use File::Slurp;
  14         218994  
  14         1320  
22 14     14   20350 use Getopt::Long;
  14         198177  
  14         105  
23 14     14   16487 use IO::File;
  14         14617  
  14         108038  
24              
25             #----------------------------------------------------------------------------
26             # Variables
27              
28             my (%backups);
29              
30             my %phrasebook = (
31             'AllAddresses' => q{SELECT * FROM tester_address},
32             'AllAddressesFull' => q{SELECT a.*,p.name,p.pause FROM tester_address AS a INNER JOIN tester_profile AS p ON p.testerid=a.testerid},
33             'UpdateAddressIndex' => q{REPLACE INTO ixaddress (id,guid,addressid,fulldate) VALUES (?,?,?,?)},
34              
35             'InsertAddress' => q{INSERT INTO tester_address (testerid,address,email) VALUES (?,?,?)},
36             'GetAddressByText' => q{SELECT * FROM tester_address WHERE address = ?},
37             'LinkAddress' => q{UPDATE tester_address SET testerid=? WHERE addressid=?},
38              
39             'GetTesterByPause' => q{SELECT testerid FROM tester_profile WHERE pause = ?},
40             'GetTesterByName' => q{SELECT testerid FROM tester_profile WHERE name = ?},
41             'InsertTester' => q{INSERT INTO tester_profile (name,pause) VALUES (?,?)},
42              
43             'AllReports' => q{SELECT id,guid,tester,fulldate FROM cpanstats WHERE type=2 AND id > ? ORDER BY id LIMIT 1000000},
44             'GetTestersByMonth' => q{SELECT DISTINCT c.id,c.guid,c.tester,c.fulldate FROM cpanstats c LEFT JOIN ixaddress x ON x.id=c.id LEFT JOIN tester_address a ON a.addressid=x.addressid WHERE a.address IS NULL AND c.postdate >= '%s' AND c.state IN ('pass','fail','na','unknown')},
45             'GetTesters' => q{SELECT DISTINCT c.id,c.guid,c.tester,c.fulldate FROM cpanstats c LEFT JOIN ixaddress x ON x.id=c.id LEFT JOIN tester_address a ON a.addressid=x.addressid WHERE a.address IS NULL AND c.state IN ('pass','fail','na','unknown')},
46              
47             # Database backup requests
48             'DeleteBackup' => 'DELETE FROM addresses',
49             'CreateBackup' => 'CREATE TABLE addresses (testerid int, name text, pause text, PRIMARY KEY (testerid))',
50             'SelectBackup' => 'SELECT * FROM tester_profile',
51             'InsertBackup' => 'INSERT INTO addresses (testerid,name,pause) VALUES (?,?,?)',
52              
53             # Consolidations
54             'DuplicateAddresses' => q{SELECT address,count(*) AS count FROM tester_address GROUP BY address ORDER BY count DESC},
55             'UpdateAddress' => q{UPDATE ixaddress SET addressid=? WHERE addressid=?},
56             'DeleteAddress' => q{DELETE FROM tester_address WHERE addressid=?},
57             );
58              
59             my %defaults = (
60             'address' => 'data/addresses.txt',
61             'mailrc' => 'data/01mailrc.txt',
62             'lastfile' => 'data/_lastid',
63             'month' => 199000,
64             'match' => 0,
65             'logclean' => 0
66             );
67              
68             #----------------------------------------------------------------------------
69             # The Application Programming Interface
70              
71             sub new {
72 18     18 1 30003 my $class = shift;
73              
74 18         44 my $self = {};
75 18         57 bless $self, $class;
76              
77 18         88 $self->_init_options(@_);
78 13         96 return $self;
79             }
80              
81             sub DESTROY {
82 18     18   31868 my $self = shift;
83 18 100       338 $self->{fh}->close if($self->{fh});
84             }
85              
86             __PACKAGE__->mk_accessors(qw( lastfile logfile logclean dbh ));
87              
88             sub process {
89 4     4 1 12366 my $self = shift;
90              
91 4 100       35 if($self->{options}{update}) {
    100          
    50          
    50          
92 2         15 $self->update();
93              
94             } elsif($self->{options}{reindex}) {
95 1         5 $self->reindex();
96              
97             } elsif($self->{options}{backup}) {
98 0         0 $self->backup();
99              
100             } elsif($self->{options}{clean}) {
101 0         0 $self->clean();
102              
103             } else {
104 1         5 $self->search();
105             }
106             }
107              
108             sub search {
109 1     1 1 3 my $self = shift;
110 1         6 $self->_log("starting search");
111              
112 1         38 $self->load_addresses();
113 1         7 $self->match_addresses();
114 1         5 $self->print_addresses();
115              
116 1         5 $self->_log("stopping search");
117             }
118              
119             sub update {
120 2     2 1 6 my $self = shift;
121 2         6 my ($new,$all,$add,$err) = (0,0,0,0);
122 2         11 $self->_log("starting update");
123              
124 2 50       129 my $fh = IO::File->new($self->{options}{update}) or die "Cannot open mailrc file [$self->{options}{update}]: $!";
125 2         162 while(<$fh>) {
126 10         108544 s/\s+$//;
127 10 50       37 next unless($_);
128              
129 10         82 my ($reportid,$guid,$fulldate,$addressid,$testerid,$address,$name,$pause,$display,$match) = split(',');
130 10 100 66     70 unless($address && ($name || $display)) {
      66        
131 3         19 $self->_log("... bogus line: $_");
132 3         128 $err++;
133 3         271 next;
134             }
135              
136 7   100     37 $addressid ||= 0;
137 7   100     28 $testerid ||= 0;
138 7   33     23 $name ||= $display;
139 7   50     21 $pause ||= '';
140              
141 7         14 $all++;
142 7 100       35 if($testerid == 0) {
143 3         6 my @rows;
144 3 50       19 @rows = $self->dbh->get_query('hash',$phrasebook{'GetTesterByPause'},$pause) if($pause);
145 3 100       1479 @rows = $self->dbh->get_query('hash',$phrasebook{'GetTesterByName'},$name) unless(@rows);
146              
147 3 100       429 if(@rows) {
148 1         4 $testerid = $rows[0]->{testerid};
149             } else {
150 2         12 $testerid = $self->dbh->id_query($phrasebook{'InsertTester'},$name,$pause);
151 2         242712 $new++;
152             }
153             }
154              
155 7 100       38 if($addressid == 0) {
156 3         22 my @rows = $self->dbh->get_query('hash',$phrasebook{'GetAddressByText'},$address);
157 3 100       1116 if(@rows) {
158 1         4 $addressid = $rows[0]->{addressid};
159             } else {
160 2         12 $addressid = $self->dbh->id_query($phrasebook{'InsertAddress'},$testerid,$address,_extract_email($address));
161 2         33994 $add++;
162             }
163             }
164              
165 7         55 $self->dbh->do_query($phrasebook{'LinkAddress'},$testerid,$addressid);
166 7         297057 $self->_log("... profile => address: ($testerid,$name,$pause) => ($addressid,$address)");
167              
168 7 100 100     574 next unless($reportid && $guid && $fulldate);
      100        
169 4         27 $self->dbh->do_query($phrasebook{'UpdateAddressIndex'},$reportid,$guid,$addressid,$fulldate);
170             }
171              
172 2         9978 $self->_printout("$all addresses mapped");
173 2         20 $self->_printout("$new new testers");
174 2         24 $self->_printout("$add addresses added");
175 2         19 $self->_printout("$err bogus lines");
176              
177 2         21 $self->_log("$all addresses mapped, $new new addresses");
178 2         190 $self->_log("stopping update");
179             }
180              
181             sub reindex {
182 1     1 1 3 my $self = shift;
183              
184 1         4 $self->_log("starting reindex");
185              
186             # load known addresses
187 1         28 my %address;
188 1         4 my $next = $self->dbh->iterator('hash',$phrasebook{'AllAddresses'});
189 1         990 while( my $row = $next->() ) {
190 5         127 $address{$row->{address}} = $row->{addressid};
191             }
192              
193             # search through reports updating the index
194 1 50       26 my $lastid = defined $self->{options}{lastid} ? $self->{options}{lastid} : $self->_lastid();
195 1   50     4 $next = $self->dbh->iterator('hash',$phrasebook{'AllReports'},($lastid || 0));
196 1         233 while( my $row = $next->() ) {
197             #print STDERR "row: $row->{id} $row->{tester}\n";
198 33 100       3820 if($address{$row->{tester}}) {
199 24         245 $self->_log("FOUND - row: $row->{id} $row->{tester}");
200 24         1110 $self->dbh->do_query($phrasebook{'UpdateAddressIndex'},$row->{id},$row->{guid},$address{$row->{tester}},$row->{fulldate});
201             } else {
202 9         96 $self->_log("NEW - row: $row->{id} $row->{tester}");
203 9         407 $address{$row->{tester}} = $self->dbh->id_query($phrasebook{'InsertAddress'},0,$row->{tester},_extract_email($row->{tester}));
204 9         130952 $self->dbh->do_query($phrasebook{'UpdateAddressIndex'},$row->{id},$row->{guid},$address{$row->{tester}},$row->{fulldate});
205             }
206              
207 33         606584 $lastid = $row->{id};
208             }
209 1         58 $self->_lastid($lastid);
210 1         5 $self->_log("stopping reindex");
211             }
212              
213             sub clean {
214 1     1 1 178174 my $self = shift;
215 1         6 $self->_log("starting clean");
216              
217 1         152 my $next = $self->dbh->iterator('hash',$phrasebook{'DuplicateAddresses'});
218 1         886 while( my $row = $next->() ) {
219 2         13700 $self->_log("count=$row->{count}, address=$row->{address}");
220              
221 2 100       330 last if($row->{count} <= 1);
222              
223 1         3 my %addr;
224 1         6 my @rows = $self->dbh->get_query('hash',$phrasebook{'GetAddressByText'},$row->{address});
225 1         437 $self->_log("- count=".scalar(@rows).", address=$row->{address}");
226 1         120 for my $addr (@rows) {
227 5         47053 $self->_log("id=$addr->{addressid}, address=$addr->{address}");
228 5 100       740 if($addr{$addr->{address}}) {
229 4         45 $self->_log("replace $addr->{addressid},'$addr->{address}' => $addr{$addr->{address}}->{addressid},'$addr{$addr->{address}}->{address}'");
230 4         860 $self->dbh->do_query($phrasebook{'UpdateAddress'},$addr{$addr->{address}}->{addressid},$addr->{addressid});
231 4         2605 $self->dbh->do_query($phrasebook{'DeleteAddress'},$addr->{addressid});
232              
233             } else {
234 1         5 $addr{$addr->{address}} = $addr;
235             }
236             }
237             }
238              
239 1         8 $self->_log("stopping clean");
240             }
241              
242             sub backup {
243 1     1 1 3 my $self = shift;
244              
245 1         2 for my $driver (keys %{$self->{backups}}) {
  1         5  
246 1 50       13 if($self->{backups}{$driver}{'exists'}) {
    50          
247 0         0 $self->{backups}{$driver}{db}->do_query($phrasebook{'DeleteBackup'});
248             } elsif($driver =~ /(CSV|SQLite)/i) {
249 1         8 $self->{backups}{$driver}{db}->do_query($phrasebook{'CreateBackup'});
250             }
251             }
252              
253 1         116691 $self->_log("Backup via DBD drivers");
254              
255 1         62 my $rows = $self->dbh->iterator('array',$phrasebook{'SelectBackup'});
256 1         1794 while(my $row = $rows->()) {
257 5         66340 for my $driver (keys %{$self->{backups}}) {
  5         33  
258 5         41 $self->{backups}{$driver}{db}->do_query($phrasebook{'InsertBackup'},@$row);
259             }
260             }
261              
262             # handle the CSV exception
263 1 50       10816 if($self->{backups}{CSV}) {
264 0         0 $self->_log("Backup to CSV file");
265 0         0 $self->{backups}{CSV}{db} = undef; # close db handle
266 0 0       0 my $fh1 = IO::File->new('addresses','r') or die "Cannot read temporary database file 'addresses'\n";
267 0 0       0 my $fh2 = IO::File->new($self->{backups}{CSV}{dbfile},'w+') or die "Cannot write to CSV database file $self->{backups}{CSV}{dbfile}\n";
268 0         0 while(<$fh1>) { print $fh2 $_ }
  0         0  
269 0         0 $fh1->close;
270 0         0 $fh2->close;
271 0         0 unlink('addresses');
272             }
273             }
274              
275             sub load_addresses {
276 3     3 1 137460 my $self = shift;
277            
278 3         21 my $next = $self->dbh->iterator('hash',$phrasebook{'AllAddressesFull'});
279 3         11890 while( my $row = $next->() ) {
280 16 50       638 $self->{named_map}{$row->{name}} = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED NAME' } if($row->{name});
281 16 50       137 $self->{paused_map}{$row->{pause}} = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED PAUSE' } if($row->{pause});
282 16         107 $self->{parsed_map}{$row->{address}} = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED ADDRESS' };
283 16 50       43 next unless($row->{email});
284 16         91 $self->{address_map}{$row->{email}} = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED EMAIL' };
285              
286 16         60 my ($local,$domain) = split(/\@/,$row->{email});
287 16 50       40 next unless($domain);
288 16         189 $self->{domain_map}{$domain} = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED DOMAIN' };
289             }
290              
291 3 50       81 if($self->{options}{verbose}) {
292 0         0 $self->_log( "named entries = " . scalar(keys %{ $self->{named_map} }) . "\n" );
  0         0  
293 0         0 $self->_log( "paused entries = " . scalar(keys %{ $self->{paused_map} }) . "\n" );
  0         0  
294 0         0 $self->_log( "parsed entries = " . scalar(keys %{ $self->{parsed_map} }) . "\n" );
  0         0  
295 0         0 $self->_log( "address entries = " . scalar(keys %{ $self->{address_map} }) . "\n" );
  0         0  
296 0         0 $self->_log( "domain entries = " . scalar(keys %{ $self->{domain_map} }) . "\n" );
  0         0  
297             }
298              
299 3         46 $next = $self->dbh->iterator('hash',$phrasebook{'AllAddresses'});
300 3         609 while( my $row = $next->() ) {
301 26 100       728 next if($self->{parsed_map}{$row->{address}});
302 10         142 $self->{stored_map}{$row->{address}} = { name => '', pause => '', addressid => $row->{addressid}, testerid => 0, match => '# STORED ADDRESS' };
303             }
304              
305 3 50       79 my $fh = IO::File->new($self->{options}{mailrc}) or die "Cannot open mailrc file [$self->{options}{mailrc}]: $!";
306 3         389 while(<$fh>) {
307 15         116 s/\s+$//;
308 15 50       65 next if(/^$/);
309              
310 15         106 my ($alias,$name,$email) = (/alias\s+([A-Z]+)\s+"([^<]+) <([^>]+)>"/);
311 15 50       37 next unless($alias);
312              
313 15 100       55 my $testerid = $self->{address_map}{$email} ? $self->{address_map}{$email}->{testerid} : 0;
314 15 100       48 my $addressid = $self->{address_map}{$email} ? $self->{address_map}{$email}->{addressid} : 0;
315              
316 15         84 $self->{pause_map}{lc($alias)} = { name => $name, pause => $alias, testerid => $testerid, addressid => $addressid, match => '# PAUSE ID' };
317 15         134 $self->{cpan_map}{lc($email)} = { name => $name, pause => $alias, testerid => $testerid, addressid => $addressid, match => '# CPAN EMAIL' };
318             }
319 3         31 $fh->close;
320              
321 3 50       64 if($self->{options}{verbose}) {
322 0         0 $self->_log( "pause entries = " . scalar(keys %{ $self->{pause_map} }) . "\n" );
  0         0  
323 0         0 $self->_log( "cpan entries = " . scalar(keys %{ $self->{cpan_map} }) . "\n" );
  0         0  
324             }
325              
326             # grab all records for the month
327 3 50       39 my $sql = $self->{options}{month}
328             ? sprintf $phrasebook{'GetTestersByMonth'}, $self->{options}{month}
329             : $phrasebook{'GetTesters'};
330 3 50       14 if($self->{options}{verbose}) {
331 0         0 $self->_log( "sql = $sql\n" );
332             }
333 3         15 $next = $self->dbh->iterator('array',$sql);
334 3         1362 $self->{parsed} = 0;
335 3         14 while(my $row = $next->()) {
336 70         1249 $self->{parsed}++;
337 70         131 my $email = _extract_email($row->[2]);
338              
339 70 100       176 my $testerid = $self->{parsed_map}{$row->[2]} ? $self->{parsed_map}{$row->[2]}->{testerid} : 0;
340 70 100       140 my $addressid = $self->{parsed_map}{$row->[2]} ? $self->{parsed_map}{$row->[2]}->{addressid} : 0;
341 70 50 66     270 $addressid ||= $self->{stored_map}{$row->[2]} ? $self->{stored_map}{$row->[2]}->{addressid} : 0;
342 70 100 100     235 $testerid ||= $self->{address_map}{$email} ? $self->{address_map}{$email}->{testerid} : 0;
343 70 100 100     219 $addressid ||= $self->{address_map}{$email} ? $self->{address_map}{$email}->{addressid} : 0;
344              
345 70 100 66     195 next if($testerid && $addressid);
346            
347 52         429 $self->{unparsed_map}{$row->[2]} = {
348             reportid => $row->[0],
349             guid => $row->[1],
350             fulldate => $row->[3],
351             testerid => $testerid,
352             addressid => $addressid,
353             'sort' => '',
354             email => $email
355             };
356             }
357              
358 3 50       97 if($self->{options}{verbose}) {
359 0         0 $self->_log( "rows = $self->{parsed}\n" );
360 0         0 $self->_log( "unparsed entries = " . scalar(keys %{ $self->{unparsed_map} }) . "\n" );
  0         0  
361             }
362             }
363              
364             sub match_addresses {
365 3     3 1 22075 my $self = shift;
366              
367             # if($self->{options}{verbose}) {
368             # use Data::Dumper;
369             # $self->_log( "named_map=". Dumper($self->{named_map}) );
370             # $self->_log( "unparsed_map=". Dumper($self->{unparsed_map}) );
371             # $self->_log( "parsed_map=" . Dumper($self->{parsed_map}) );
372             # $self->_log( "paused_map=" . Dumper($self->{paused_map}) );
373             # $self->_log( "pause_map=" . Dumper($self->{pause_map}) );
374             # $self->_log( "cpan_map=" . Dumper($self->{cpan_map}) );
375             # $self->_log( "domain_map=" . Dumper($self->{domain_map}) );
376             # $self->_log( "address_map=" . Dumper($self->{address_map}) );
377             # $self->_log( "stored_map=" . Dumper($self->{stored_map}) );
378             # }
379              
380 3         27 for my $key (keys %{ $self->{unparsed_map} }) {
  3         15  
381 18         41 my $email = _extract_email($key);
382 18 50       40 unless($email) {
383 0         0 push @{$self->{result}{NOEMAIL}}, $key;
  0         0  
384 0         0 next;
385             }
386 18         33 $email = lc($email);
387 18         46 my ($local,$domain) = split(/\@/,$email);
388             #print STDERR "email=[$email], local=[$local], domain=[$domain]\n" if($email =~ /indiana/);
389 18 100       51 next if($self->map_address($key,$local,$domain,$email));
390              
391 16         21 my $last = 0;
392 16         49 my @parts = split(/\./,$domain);
393 16         38 while(@parts > 1) {
394 24         48 my $domain2 = join(".",@parts);
395             #print STDERR "domain2=[$domain2]\n" if($email =~ /indiana/);
396 24 100       57 if($self->map_domain($key,$local,$domain2,$email)) {
397 5         7 $last = 1;
398 5         10 last;
399             }
400 19         51 shift @parts;
401             }
402              
403 16 100       38 next if($last);
404 11 50       29 next if($self->map_name($key));
405             }
406             }
407              
408             sub print_addresses {
409 3     3 1 2539 my $self = shift;
410 3         8 my $text = '';
411              
412 3 50       57 if($self->{result}{NOMAIL}) {
413 0         0 $self->_printout( "ERRORS:" );
414 0         0 for my $email (sort @{$self->{result}{NOMAIL}}) {
  0         0  
415 0         0 $self->_printout( "NOMAIL: $email" );
416             }
417             }
418              
419 3         14 $self->_printout( "\nMATCH:" );
420 3         6 for my $key (sort {$self->{unparsed_map}{$a}->{match} cmp $self->{unparsed_map}{$b}->{match}}
  8         23  
  18         47  
421 3         14 grep {$self->{unparsed_map}{$_}->{match}}
422             keys %{ $self->{unparsed_map} }) {
423 9 100 66     105 if($self->{unparsed_map}{$key}->{match} && $self->{unparsed_map}{$key}->{match} !~ /SUGGESTION/) {
424 7   50     153 $self->_printout(
      50        
      50        
      50        
      100        
      50        
      50        
      50        
      50        
425             sprintf "%d,%s,%s,%d,%d,%s,%s,%s,%s,%s",
426             ($self->{unparsed_map}{$key}->{reportid} || 0),
427             ($self->{unparsed_map}{$key}->{guid} || ''),
428             ($self->{unparsed_map}{$key}->{fulldate} || ''),
429              
430             ($self->{unparsed_map}{$key}->{addressid} || 0),
431             ($self->{unparsed_map}{$key}->{testerid} || 0),
432             $key,
433             ($self->{unparsed_map}{$key}->{name} || ''),
434             ($self->{unparsed_map}{$key}->{pause} || ''),
435             ($self->{unparsed_map}{$key}->{display} || ''),
436             ($self->{unparsed_map}{$key}->{match} || '')
437             );
438 7         30 delete $self->{unparsed_map}{$key};
439             } else {
440 2         13 my ($local,$domain) = $self->{unparsed_map}{$key}->{email} =~ /([-+=\w.]+)\@([^\s]+)/;
441 2 50 33     12 ($local,$domain) = $key =~ /([-+=\w.]+)\@([^\s]+)/ unless($local && $domain);
442 2 50       4 if($domain) {
443 2         14 my @parts = split(/\./,$domain);
444 2         13 $self->{unparsed_map}{$key}{'sort'} = join(".",reverse @parts) . '@' . $local;
445             } else {
446 0         0 print STDERR "FAIL: $key\n";
447 0         0 $self->{unparsed_map}{$key}{'sort'} = '';
448             }
449             }
450             }
451              
452 3         10 $self->_printout( "\nSUGGESTIONS:" );
453 3 100       7 for my $key (sort {$self->{unparsed_map}{$a} cmp $self->{unparsed_map}{$b}}
  0         0  
  11         47  
454 3         8 grep {$self->{unparsed_map}{$_}->{match} && $self->{unparsed_map}{$_}->{match} =~ /SUGGESTION/}
455             keys %{ $self->{unparsed_map} }) {
456 2   50     50 $self->_printout(
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
457             sprintf "%d,%s,%s,%d,%d,%s,%s,%s,%s,%s",
458             ($self->{unparsed_map}{$key}->{reportid} || 0),
459             ($self->{unparsed_map}{$key}->{guid} || ''),
460             ($self->{unparsed_map}{$key}->{fulldate} || ''),
461              
462             ($self->{unparsed_map}{$key}->{addressid} || 0),
463             ($self->{unparsed_map}{$key}->{testerid} || 0),
464             $key,
465             ($self->{unparsed_map}{$key}->{name} || ''),
466             ($self->{unparsed_map}{$key}->{pause} || ''),
467             ($self->{unparsed_map}{$key}->{display} || ''),
468             ($self->{unparsed_map}{$key}->{match} || '')
469             );
470 2         8 delete $self->{unparsed_map}{$key};
471             }
472              
473 3         10 $self->_printout( '' );
474 3 50       11 return if($self->{options}{match});
475              
476             #use Data::Dumper;
477             #print STDERR Dumper(\%{ $self->{unparsed_map} });
478              
479 3         5 my @mails;
480 3         7 $self->_printout( "PATTERNS:" );
481 3         6 for my $key (sort { $self->{unparsed_map}{$a}{'sort'} cmp $self->{unparsed_map}{$b}{'sort'} } keys %{ $self->{unparsed_map} }) {
  9         20  
  3         12  
482 9 50       23 next unless($key);
483 9   50     291 $self->_printout(
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
484             sprintf "%d,%s,%s,%d,%d,%s,%s,%s,%s,%s,%s",
485             ($self->{unparsed_map}{$key}->{reportid} || 0),
486             ($self->{unparsed_map}{$key}->{guid} || ''),
487             ($self->{unparsed_map}{$key}->{fulldate} || ''),
488              
489             ($self->{unparsed_map}{$key}->{addressid} || 0),
490             ($self->{unparsed_map}{$key}->{testerid} || 0),
491             $key,
492             ($self->{unparsed_map}{$key}->{name} || ''),
493             ($self->{unparsed_map}{$key}->{pause} || ''),
494             ($self->{unparsed_map}{$key}->{match} || ''),
495             ($self->{unparsed_map}{$key}->{display} || ''),
496             ($self->{unparsed_map}{$key}->{'sort'} || '')
497             );
498             }
499             }
500              
501             sub map_address {
502 22     22 1 75 my ($self,$key,$local,$domain,$email) = @_;
503              
504 22 100       64 if($self->{address_map}{$key}) {
505 1         12 $self->{unparsed_map}{$key}->{$_} = $self->{address_map}{$key}->{$_} for(qw(testerid addressid name pause match));
506 1         9 return 1;
507             }
508              
509 21 50       59 if($self->{address_map}{$email}) {
510 0         0 $self->{unparsed_map}{$key}->{$_} = $self->{address_map}{$email}->{$_} for(qw(testerid addressid name pause match));
511 0         0 return 1;
512             }
513              
514 21 100       51 if($domain eq 'cpan.org') {
515 7 100       28 if($self->{pause_map}{$local}) {
516 3         45 $self->{unparsed_map}{$key}->{$_} = $self->{pause_map}{$local}->{$_} for(qw(testerid addressid name pause match));
517 3         18 return 1;
518             }
519             }
520              
521 18 100       60 if($self->{cpan_map}{$email}) {
522 1         14 $self->{unparsed_map}{$key}->{$_} = $self->{cpan_map}{$email}->{$_} for(qw(testerid addressid name pause match));
523 1         7 return 1;
524             }
525              
526 17         50 return 0;
527             }
528              
529             sub map_domain {
530 43     43 1 10407 my ($self,$key,$local,$domain,$email) = @_;
531              
532 43         62 for my $filter (@{$self->{filters}}) {
  43         110  
533 122 100       2912 return 0 if($domain =~ /^$filter$/);
534             }
535              
536 27 100       94 if($self->{domain_map}{$domain}) {
537 7         83 $self->{unparsed_map}{$key}->{$_} = $self->{domain_map}{$domain}->{$_} for(qw(testerid name pause match));
538 7         25 $self->{unparsed_map}{$key}->{match} .= " - $domain";
539 7         32 return 1;
540             }
541              
542 20         27 for my $map (keys %{ $self->{domain_map} }) {
  20         53  
543 73 50       414 if($map =~ /\b$domain$/) {
544 0         0 $self->{unparsed_map}{$key}->{$_} = $self->{domain_map}{$map}->{$_} for(qw(testerid name pause match));
545 0         0 $self->{unparsed_map}{$key}->{match} .= " - $domain - $map";
546 0         0 return 1;
547             }
548             }
549              
550 20         37 for my $map (keys %{ $self->{domain_map} }) {
  20         54  
551 73 100       632 if($domain =~ /\b$map$/) {
552 1         13 $self->{unparsed_map}{$key}->{$_} = $self->{domain_map}{$map}->{$_} for(qw(testerid name pause match));
553 1         6 $self->{unparsed_map}{$key}->{match} .= " - $domain - $map";
554 1         9 return 1;
555             }
556             }
557              
558 19         64 return 0;
559             }
560              
561             sub map_name {
562 11     11 1 19 my ($self,$key) = @_;
563 11         17 my ($name) = $key =~ /\(+"?([^"\)]+)"?\)+/;
564 11 50       36 ($name) = $key =~ /^\s*"?([^"<]+)"?\s+
565 11 100       51 return 0 unless($name);
566              
567 2 50       10 if($self->{named_map}{$name}) {
568 0         0 $self->{unparsed_map}{$key}->{$_} = $self->{named_map}{$name}->{$_} for(qw(testerid name pause match));
569 0         0 return 1;
570             }
571              
572 2 50       7 if($self->{paused_map}{$name}) {
573 0         0 $self->{unparsed_map}{$key}->{$_} = $self->{paused_map}{$name}->{$_} for(qw(testerid name pause match));
574 0         0 return 1;
575             }
576              
577 2         9 $self->{unparsed_map}{$key}{display} = $name;
578 2         5 $self->{unparsed_map}{$key}{match} = '# SUGGESTION';
579              
580 2         10 return 0;
581             }
582              
583             #----------------------------------------------------------------------------
584             # Private Methods
585              
586             sub _lastid {
587 8     8   4155 my ($self,$id) = @_;
588 8         42 my $f = $self->lastfile();
589              
590 8 100       212 unless( -f $f) {
591 2         221 mkpath(dirname($f));
592 2         16 overwrite_file( $f, 0 );
593 2   50     733 $id ||= 0;
594             }
595              
596 8 100       24 if($id) { overwrite_file( $f, $id ); }
  2         17  
597 6         26 else { $id = read_file($f); }
598              
599 8         1000 return $id;
600             }
601              
602             sub _extract_email {
603 111     111   15404 my $str = shift;
604             #my ($email) = $str =~ /([-+=\w.]+\@(?:[-\w]+\.)+(?:com|net|org|info|biz|edu|museum|mil|gov|[a-z]{2,2}))/i;
605 111         542 my ($email) = $str =~ /([-+=\w.]+\@[-\w\.]+)/i;
606 111   50     443 return $email || '';
607             }
608              
609             sub _init_options {
610 18     18   34 my $self = shift;
611 18         76 my %hash = @_;
612 18         146 $self->{options} = {};
613 18         94 my @options = qw(mailrc update clean reindex lastid backup month match verbose lastfile logfile logclean output);
614 18         30 my %options;
615              
616 18 50       142 GetOptions( \%options,
617              
618             # mandatory options
619             'config|c=s',
620              
621             # update mode options
622             'update|u=s',
623              
624             # clean mode options
625             'clean',
626              
627             # reindex mode options
628             'reindex|r',
629             'lastid|l=i',
630              
631             # backup mode options
632             'backup|b',
633              
634             # search mode options
635             'mailrc|m=s',
636             'month=s',
637             'match',
638              
639             # other options
640             'output=s',
641             'lastfile=s',
642             'logfile=s',
643             'logclean=i',
644             'verbose|v',
645             'version',
646             'help|h'
647             ) or $self->_help();
648              
649 18         15053 for my $opt (qw(config help version),@options) {
650 288 100 33     643 $self->{options}{$opt} ||= $hash{$opt} if($hash{$opt});
651 288 100 33     516 $self->{options}{$opt} ||= $options{$opt} if($options{$opt});
652             }
653              
654 18 100       240 $self->_help(1) if($self->{options}{help});
655 16 100       65 $self->_help(0) if($self->{options}{version});
656              
657 15 100       54 $self->_help(1,"Must specify the configuration file") unless( $self->{options}{config});
658 14 100       287 $self->_help(1,"Configuration file [$self->{options}{config}] not found") unless(-f $self->{options}{config});
659              
660             # load configuration
661 13         269 my $cfg = Config::IniFiles->new( -file => $self->{options}{config} );
662              
663             # configure databases
664 13         52931 my %opts;
665 13         35 my $db = 'CPANSTATS';
666 13 50       55 die "No configuration for $db database\n" unless($cfg->SectionExists($db));
667 13         337 $opts{$_} = $cfg->val($db,$_) for(qw(driver database dbfile dbhost dbport dbuser dbpass));
668 13         7764 $self->dbh( CPAN::Testers::Common::DBUtils->new(%opts) );
669 13 50       484 die "Cannot configure $db database\n" unless($self->dbh);
670              
671             # use configuration settings or defaults if none provided
672 13         346 for my $opt (@options) {
673 169   100     3929 $self->{options}{$opt} ||= $cfg->val('MASTER',$opt) || $defaults{$opt};
      100        
674             }
675              
676             # extract filters
677 13         198 my $filters = $cfg->val('DOMAINS','filters');
678 13 50       278 my @filters = split("\n", $filters) if($filters);
679 13 50       44 $self->{filters} = \@filters if(@filters);
680              
681             # mandatory options
682             #for my $opt (qw()) {
683             # $self->_help(1,"No $opt configuration setting given, see help below.") unless( $self->{options}{$opt});
684             # $self->_help(1,"Given $opt file [$self->{options}{$opt}] not a valid file, see help below.") unless(-f $self->{options}{$opt});
685             #}
686              
687             # options to check if provided
688 13         34 for my $opt (qw(update mailrc)) {
689 26 100       88 next unless( $self->{options}{$opt});
690 15 50       423 $self->_help(1,"Given $opt file [$self->{options}{$opt}] not a valid file, see help below.") unless(-f $self->{options}{$opt});
691             }
692              
693             # clean up potential rogue characters
694 13 50       63 $self->{options}{lastid} =~ s/\D+//g if($self->{options}{lastid});
695              
696             # prime accessors
697 13         83 $self->lastfile($self->{options}{lastfile});
698 13         125 $self->logfile($self->{options}{logfile});
699 13         118 $self->logclean($self->{options}{logclean});
700              
701             # configure backup DBs
702 13 100       108 if($self->{options}{backup}) {
703 1 50       4 $self->help(1,"No configuration for BACKUPS with backup option") unless($cfg->SectionExists('BACKUPS'));
704              
705             # available DBI drivers
706 1         36 my %DRIVERS_DBI = map { $_ => 1 } DBI->available_drivers();
  7         517  
707              
708 1         6 my @drivers = $cfg->val('BACKUPS','drivers');
709 1         25 for my $driver (@drivers) {
710 3 50       9 $self->help(1,"No configuration for backup option '$driver'") unless($cfg->SectionExists($driver));
711              
712             # ignore drivers that are unavailable
713 3 100       75 unless($DRIVERS_DBI{$driver}) {
714 2         10 $self->_log("Backup DBD driver '$driver' is not available");
715 2         76 next;
716             }
717              
718 1         4 %opts = ();
719 1         6 $opts{$_} = $cfg->val($driver,$_) for(qw(driver database dbfile dbhost dbport dbuser dbpass));
720 1 50       160 $self->{backups}{$driver}{'exists'} = $driver =~ /SQLite/i ? -f $opts{database} : 1;
721              
722             # CSV is a bit of an oddity!
723 1 50       5 if($driver =~ /CSV/i) {
724 0         0 $self->{backups}{$driver}{'exists'} = 0;
725 0         0 $self->{backups}{$driver}{'dbfile'} = $opts{dbfile};
726 0         0 $opts{dbfile} = 'uploads';
727 0         0 unlink($opts{dbfile});
728             }
729              
730 1         11 $self->{backups}{$driver}{db} = CPAN::Testers::Common::DBUtils->new(%opts);
731 1 50       27 $self->help(1,"Cannot configure BACKUPS database for '$driver'") unless($self->{backups}{$driver}{db});
732             }
733             }
734              
735             # set output
736 13 100       99 if($self->{options}{output}) {
737 6 50       63 if(my $fh = IO::File->new($self->{options}{output}, 'w+')) {
738 6         1198 $self->{fh} = $fh;
739             }
740             }
741              
742 13 50       370 return unless($self->{options}{verbose});
743 0   0     0 print STDERR "config: $_ = ".($self->{options}{$_}||'')."\n" for(@options);
744             }
745              
746             sub _help {
747 5     5   10 my ($self,$full,$mess) = @_;
748              
749 5 100       103 print "\n$mess\n\n" if($mess);
750              
751 5 100       11 if($full) {
752 4         115 print "\n";
753 4         50 print "Usage:$0 ( [--help|h] | --version | --config|c= \\\n";
754 4         40 print " ( [--update=] \\\n";
755 4         39 print " | [--reindex] [--lastid=] \\\n";
756 4         38 print " | [--clean] \\\n";
757 4         44 print " | [--backup] \\\n";
758 4         41 print " | [--mailrc|m=] [--month=] [--match] ) \\\n";
759 4         40 print " [--output=] \\\n";
760 4         40 print " [--logfile=] [--logclean=(0|1)] \\\n";
761 4         40 print " [--verbose|v] ) \n\n";
762              
763             # 12345678901234567890123456789012345678901234567890123456789012345678901234567890
764 4         45 print "This program manages the cpan-tester addresses.\n";
765              
766 4         41 print "\nFunctional Options:\n";
767 4         39 print " --config= # path/file to configuration file\n";
768 4         40 print " [--mailrc=] # path/file to mailrc file\n";
769 4         43 print " [--output=] # path/file to output file (defaults to STDOUT)\n";
770              
771 4         46 print "\nUpdate Options:\n";
772 4         39 print " [--update=] # run in update mode\n";
773              
774 4         38 print "\nReindex Options:\n";
775 4         39 print " [--reindex] # run in reindex mode\n";
776 4         42 print " [--lastid=] # id to start reindex from\n";
777              
778 4         38 print "\nClean Options:\n";
779 4         38 print " [--clean] # run in clean mode (de-duplication)\n";
780              
781 4         39 print "\nBackup Options:\n";
782 4         40 print " [--backup] # run in backup mode\n";
783              
784 4         37 print "\nSearch Options:\n";
785 4         38 print " [--month=] # YYYYMM string to match from\n";
786 4         39 print " [--match] # display matches only\n";
787              
788 4         37 print "\nOther Options:\n";
789 4         41 print " [--verbose] # turn on verbose messages\n";
790 4         37 print " [--version] # display current version\n";
791 4         38 print " [--help] # this screen\n";
792              
793 4         44 print "\nFor further information type 'perldoc $0'\n";
794             }
795              
796 5         161 print "$0 v$VERSION\n";
797 5         18 exit(0);
798             }
799              
800             sub _printout {
801 38     38   62 my $self = shift;
802 38 100       101 if(my $fh = $self->{fh}) {
803 34         196 print $fh "@_\n";
804             } else {
805 4         8555 print STDOUT "@_\n";
806             }
807             }
808              
809             sub _log {
810 74     74   4397 my $self = shift;
811 74 50       3416 my $log = $self->logfile or return;
812 74 100       4039 mkpath(dirname($log)) unless(-f $log);
813              
814 74 100       548 my $mode = $self->logclean ? 'w+' : 'a+';
815 74         1794 $self->logclean(0);
816              
817 74         5551 my @dt = localtime(time);
818 74         1460 my $dt = sprintf "%04d/%02d/%02d %02d:%02d:%02d", $dt[5]+1900,$dt[4]+1,$dt[3],$dt[2],$dt[1],$dt[0];
819              
820 74 50       1067 my $fh = IO::File->new($log,$mode) or die "Cannot write to log file [$log]: $!\n";
821 74         14819 print $fh "$dt ", @_, "\n";
822 74         411 $fh->close;
823             }
824              
825             q!Will code for a damn fine Balti!;
826              
827             __END__