File Coverage

blib/lib/CPAN/Testers/Data/Addresses.pm
Criterion Covered Total %
statement 300 435 68.9
branch 117 200 58.5
condition 61 112 54.4
subroutine 29 32 90.6
pod 13 13 100.0
total 520 792 65.6


line stmt bran cond sub pod time code
1             package CPAN::Testers::Data::Addresses;
2              
3 9     9   164602 use 5.006;
  9         167  
  9         481  
4 9     9   93 use strict;
  9         15  
  9         276  
5 9     9   79 use warnings;
  9         33  
  9         390  
6              
7 9     9   45 use vars qw($VERSION);
  9         18  
  9         654  
8             $VERSION = '0.12';
9             $|++;
10              
11             #----------------------------------------------------------------------------
12             # Library Modules
13              
14 9     9   56 use base qw(Class::Accessor::Fast);
  9         15  
  9         9378  
15              
16 9     9   65419 use CPAN::Testers::Common::DBUtils;
  9         284422  
  9         94  
17 9     9   13396 use Config::IniFiles;
  9         453392  
  9         368  
18 9     9   115 use File::Basename;
  9         17  
  9         988  
19 9     9   55 use File::Path;
  9         19  
  9         497  
20 9     9   11138 use File::Slurp;
  9         141472  
  9         869  
21 9     9   13104 use Getopt::Long;
  9         129511  
  9         69  
22 9     9   10484 use IO::File;
  9         9224  
  9         70155  
23              
24             #----------------------------------------------------------------------------
25             # Variables
26              
27             my (%backups);
28              
29             my %phrasebook = (
30             'AllAddresses' => q{SELECT * FROM tester_address},
31             'AllAddressesFull' => q{SELECT a.*,p.name,p.pause FROM tester_address AS a INNER JOIN tester_profile AS p ON p.testerid=a.testerid},
32             'UpdateAddressIndex' => q{REPLACE INTO ixaddress (id,guid,addressid,fulldate) VALUES (?,?,?,?)},
33              
34             'InsertAddress' => q{INSERT INTO tester_address (testerid,address,email) VALUES (?,?,?)},
35             'GetAddressByText' => q{SELECT * FROM tester_address WHERE address = ?},
36             'LinkAddress' => q{UPDATE tester_address SET testerid=? WHERE addressid=?},
37              
38             'GetTesterByPause' => q{SELECT testerid FROM tester_profile WHERE pause = ?},
39             'GetTesterByName' => q{SELECT testerid FROM tester_profile WHERE name = ?},
40             'InsertTester' => q{INSERT INTO tester_profile (name,pause) VALUES (?,?)},
41              
42             'AllReports' => q{SELECT id,guid,tester,fulldate FROM cpanstats WHERE type=2 AND id > ? ORDER BY id LIMIT 1000000},
43             'GetTestersByMonth' => q{SELECT DISTINCT c.id,c.guid,c.tester,c.fulldate FROM cpanstats c LEFT JOIN ixaddress x ON x.id=c.id LEFT JOIN tester_address a ON a.addressid=x.addressid WHERE a.address IS NULL AND c.postdate >= '%s' AND c.state IN ('pass','fail','na','unknown')},
44             'GetTesters' => q{SELECT DISTINCT c.id,c.guid,c.tester,c.fulldate FROM cpanstats c LEFT JOIN ixaddress x ON x.id=c.id LEFT JOIN tester_address a ON a.addressid=x.addressid WHERE a.address IS NULL AND c.state IN ('pass','fail','na','unknown')},
45              
46             # Database backup requests
47             'DeleteBackup' => 'DELETE FROM addresses',
48             'CreateBackup' => 'CREATE TABLE addresses (testerid int, name text, pause text, PRIMARY KEY (testerid))',
49             'SelectBackup' => 'SELECT * FROM tester_profile',
50             'InsertBackup' => 'INSERT INTO addresses (testerid,name,pause) VALUES (?,?,?)',
51              
52             # Consolidations
53             'DuplicateAddresses' => q{SELECT address,count(*) AS count FROM tester_address GROUP BY address ORDER BY count DESC},
54             'UpdateAddress' => q{UPDATE ixaddress SET addressid=? WHERE addressid=?},
55             'DeleteAddress' => q{DELETE FROM tester_address WHERE addressid=?},
56             );
57              
58             my %defaults = (
59             'address' => 'data/addresses.txt',
60             'mailrc' => 'data/01mailrc.txt',
61             'lastfile' => 'data/_lastid',
62             'month' => 199000,
63             'match' => 0,
64             'logclean' => 0
65             );
66              
67             #----------------------------------------------------------------------------
68             # The Application Programming Interface
69              
70             sub new {
71 9     9 1 8761 my $class = shift;
72              
73 9         24 my $self = {};
74 9         26 bless $self, $class;
75              
76 9         43 $self->_init_options(@_);
77 9         75 return $self;
78             }
79              
80             sub DESTROY {
81 9     9   8941 my $self = shift;
82 9 100       258 $self->{fh}->close if($self->{fh});
83             }
84              
85             __PACKAGE__->mk_accessors(qw( lastfile logfile logclean dbh ));
86              
87             sub process {
88 3     3 1 2843 my $self = shift;
89              
90 3 100       29 if($self->{options}{update}) {
    100          
    50          
    50          
91 1         6 $self->update();
92              
93             } elsif($self->{options}{reindex}) {
94 1         5 $self->reindex();
95              
96             } elsif($self->{options}{backup}) {
97 0         0 $self->backup();
98              
99             } elsif($self->{options}{clean}) {
100 0         0 $self->clean();
101              
102             } else {
103 1         4 $self->search();
104             }
105             }
106              
107             sub search {
108 1     1 1 2 my $self = shift;
109 1         5 $self->_log("starting search");
110              
111 1         36 $self->load_addresses();
112 1         5 $self->match_addresses();
113 1         8 $self->print_addresses();
114              
115 1         6 $self->_log("stopping search");
116             }
117              
118             sub update {
119 1     1 1 3 my $self = shift;
120 1         3 my ($new,$all,$add,$err) = (0,0,0,0);
121 1         14 $self->_log("starting update");
122              
123 1 50       70 my $fh = IO::File->new($self->{options}{update}) or die "Cannot open mailrc file [$self->{options}{update}]: $!";
124 1         83 while(<$fh>) {
125 1         5 s/\s+$//;
126 1 50       5 next unless($_);
127              
128 1         8 my ($reportid,$guid,$fulldate,$addressid,$testerid,$address,$name,$pause,$display,$match) = split(',');
129 1 50 33     10 unless($address && ($name || $display)) {
      33        
130 0         0 $self->_log("... bogus line: $_");
131 0         0 $err++;
132 0         0 next;
133             }
134              
135 1   50     8 $addressid ||= 0;
136 1   50     6 $testerid ||= 0;
137 1   33     4 $name ||= $display;
138 1   50     5 $pause ||= '';
139              
140 1         2 $all++;
141 1 50       5 if($testerid == 0) {
142 1         3 my @rows;
143 1 50       9 @rows = $self->dbh->get_query('hash',$phrasebook{'GetTesterByPause'},$pause) if($pause);
144 1 50       480 @rows = $self->dbh->get_query('hash',$phrasebook{'GetTesterByName'},$name) unless(@rows);
145              
146 1 50       213 if(@rows) {
147 0         0 $testerid = $rows[0]->{testerid};
148             } else {
149 1         6 $testerid = $self->dbh->id_query($phrasebook{'InsertTester'},$name,$pause);
150 1         77470 $new++;
151             }
152             }
153              
154 1 50       9 if($addressid == 0) {
155 1         10 my @rows = $self->dbh->get_query('hash',$phrasebook{'GetAddressByText'},$address);
156 1 50       277 if(@rows) {
157 0         0 $addressid = $rows[0]->{addressid};
158             } else {
159 1         5 $addressid = $self->dbh->id_query($phrasebook{'InsertAddress'},$testerid,$address,_extract_email($address));
160 1         44341 $add++;
161             }
162             }
163              
164 1         13 $self->dbh->do_query($phrasebook{'LinkAddress'},$testerid,$addressid);
165 1         104069 $self->_log("... profile => address: ($testerid,$name,$pause) => ($addressid,$address)");
166              
167 1 50 33     67 next unless($reportid && $guid && $fulldate);
      33        
168 1         7 $self->dbh->do_query($phrasebook{'UpdateAddressIndex'},$reportid,$guid,$addressid,$fulldate);
169             }
170              
171 1         58743 $self->_printout("$all addresses mapped");
172 1         8 $self->_printout("$new new testers");
173 1         6 $self->_printout("$add addresses added");
174 1         7 $self->_printout("$err bogus lines");
175              
176 1         9 $self->_log("$all addresses mapped, $new new addresses");
177 1         50 $self->_log("stopping update");
178             }
179              
180             sub reindex {
181 1     1 1 3 my $self = shift;
182              
183 1         5 $self->_log("starting reindex");
184              
185             # load known addresses
186 1         35 my %address;
187 1         4 my $next = $self->dbh->iterator('hash',$phrasebook{'AllAddresses'});
188 1         1362 while( my $row = $next->() ) {
189 5         139 $address{$row->{address}} = $row->{addressid};
190             }
191              
192             # search through reports updating the index
193 1 50       26 my $lastid = defined $self->{options}{lastid} ? $self->{options}{lastid} : $self->_lastid();
194 1   50     6 $next = $self->dbh->iterator('hash',$phrasebook{'AllReports'},($lastid || 0));
195 1         248 while( my $row = $next->() ) {
196             #print STDERR "row: $row->{id} $row->{tester}\n";
197 33 100       2952 if($address{$row->{tester}}) {
198 24         253 $self->_log("FOUND - row: $row->{id} $row->{tester}");
199 24         1092 $self->dbh->do_query($phrasebook{'UpdateAddressIndex'},$row->{id},$row->{guid},$address{$row->{tester}},$row->{fulldate});
200             } else {
201 9         92 $self->_log("NEW - row: $row->{id} $row->{tester}");
202 9         3497 $address{$row->{tester}} = $self->dbh->id_query($phrasebook{'InsertAddress'},0,$row->{tester},_extract_email($row->{tester}));
203 9         306483 $self->dbh->do_query($phrasebook{'UpdateAddressIndex'},$row->{id},$row->{guid},$address{$row->{tester}},$row->{fulldate});
204             }
205              
206 33         893286 $lastid = $row->{id};
207             }
208 1         72 $self->_lastid($lastid);
209 1         7 $self->_log("stopping reindex");
210             }
211              
212             sub clean {
213 0     0 1 0 my $self = shift;
214 0         0 $self->_log("starting clean");
215              
216 0         0 my $next = $self->dbh->iterator('hash',$phrasebook{'DuplicateAddresses'});
217 0         0 while( my $row = $next->() ) {
218 0         0 $self->_log("count=$row->{count}, address=$row->{address}");
219              
220 0 0       0 last if($row->{count} <= 1);
221              
222 0         0 my %addr;
223 0         0 my @rows = $self->dbh->get_query('hash',$phrasebook{'GetAddressByText'},$row->{address});
224 0         0 $self->_log("- count=".scalar(@rows).", address=$row->{address}");
225 0         0 for my $addr (@rows) {
226 0         0 $self->_log("id=$addr->{addressid}, address=$addr->{address}");
227 0 0       0 if($addr{$addr->{address}}) {
228 0         0 $self->_log("replace $addr->{addressid},'$addr->{address}' => $addr{$addr->{address}}->{addressid},'$addr{$addr->{address}}->{address}'");
229 0         0 $self->dbh->do_query($phrasebook{'UpdateAddress'},$addr{$addr->{address}}->{addressid},$addr->{addressid});
230 0         0 $self->dbh->do_query($phrasebook{'DeleteAddress'},$addr->{addressid});
231              
232             } else {
233 0         0 $addr{$addr->{address}} = $addr;
234             }
235             }
236             }
237              
238 0         0 $self->_log("stopping clean");
239             }
240              
241             sub backup {
242 0     0 1 0 my $self = shift;
243              
244 0         0 for my $driver (keys %{$self->{backups}}) {
  0         0  
245 0 0       0 if($self->{backups}{$driver}{'exists'}) {
    0          
246 0         0 $self->{backups}{$driver}{db}->do_query($phrasebook{'DeleteBackup'});
247             } elsif($driver =~ /(CSV|SQLite)/i) {
248 0         0 $self->{backups}{$driver}{db}->do_query($phrasebook{'CreateBackup'});
249             }
250             }
251              
252 0         0 $self->_log("Backup via DBD drivers");
253              
254 0         0 my $rows = $self->dbh->iterator('array',$phrasebook{'SelectBackup'});
255 0         0 while(my $row = $rows->()) {
256 0         0 for my $driver (keys %{$self->{backups}}) {
  0         0  
257 0         0 $self->{backups}{$driver}{db}->do_query($phrasebook{'InsertBackup'},@$row);
258             }
259             }
260              
261             # handle the CSV exception
262 0 0       0 if($self->{backups}{CSV}) {
263 0         0 $self->_log("Backup to CSV file");
264 0         0 $self->{backups}{CSV}{db} = undef; # close db handle
265 0 0       0 my $fh1 = IO::File->new('addresses','r') or die "Cannot read temporary database file 'addresses'\n";
266 0 0       0 my $fh2 = IO::File->new($self->{backups}{CSV}{dbfile},'w+') or die "Cannot write to CSV database file $self->{backups}{CSV}{dbfile}\n";
267 0         0 while(<$fh1>) { print $fh2 $_ }
  0         0  
268 0         0 $fh1->close;
269 0         0 $fh2->close;
270 0         0 unlink('addresses');
271             }
272             }
273              
274             sub load_addresses {
275 2     2 1 5 my $self = shift;
276            
277 2         8 my $next = $self->dbh->iterator('hash',$phrasebook{'AllAddressesFull'});
278 2         14927 while( my $row = $next->() ) {
279 9 50       305 $self->{named_map}{$row->{name}} = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED NAME' } if($row->{name});
280 9 50       82 $self->{paused_map}{$row->{pause}} = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED PAUSE' } if($row->{pause});
281 9         60 $self->{parsed_map}{$row->{address}} = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED ADDRESS' };
282 9 50       24 next unless($row->{email});
283 9         48 $self->{address_map}{$row->{email}} = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED EMAIL' };
284              
285 9         29 my ($local,$domain) = split(/\@/,$row->{email});
286 9 50       21 next unless($domain);
287 9         93 $self->{domain_map}{$domain} = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED DOMAIN' };
288             }
289              
290 2 50       45 if($self->{options}{verbose}) {
291 0         0 $self->_log( "named entries = " . scalar(keys %{ $self->{named_map} }) . "\n" );
  0         0  
292 0         0 $self->_log( "paused entries = " . scalar(keys %{ $self->{paused_map} }) . "\n" );
  0         0  
293 0         0 $self->_log( "parsed entries = " . scalar(keys %{ $self->{parsed_map} }) . "\n" );
  0         0  
294 0         0 $self->_log( "address entries = " . scalar(keys %{ $self->{address_map} }) . "\n" );
  0         0  
295 0         0 $self->_log( "domain entries = " . scalar(keys %{ $self->{domain_map} }) . "\n" );
  0         0  
296             }
297              
298 2         11 $next = $self->dbh->iterator('hash',$phrasebook{'AllAddresses'});
299 2         367 while( my $row = $next->() ) {
300 9 50       231 next if($self->{parsed_map}{$row->{address}});
301 0         0 $self->{stored_map}{$row->{address}} = { name => '', pause => '', addressid => $row->{addressid}, testerid => 0, match => '# STORED ADDRESS' };
302             }
303              
304 2 50       49 my $fh = IO::File->new($self->{options}{mailrc}) or die "Cannot open mailrc file [$self->{options}{mailrc}]: $!";
305 2         223 while(<$fh>) {
306 10         59 s/\s+$//;
307 10 50       30 next if(/^$/);
308              
309 10         64 my ($alias,$name,$email) = (/alias\s+([A-Z]+)\s+"([^<]+) <([^>]+)>"/);
310 10 50       25 next unless($alias);
311              
312 10 100       31 my $testerid = $self->{address_map}{$email} ? $self->{address_map}{$email}->{testerid} : 0;
313 10 100       29 my $addressid = $self->{address_map}{$email} ? $self->{address_map}{$email}->{addressid} : 0;
314              
315 10         67 $self->{pause_map}{lc($alias)} = { name => $name, pause => $alias, testerid => $testerid, addressid => $addressid, match => '# PAUSE ID' };
316 10         95 $self->{cpan_map}{lc($email)} = { name => $name, pause => $alias, testerid => $testerid, addressid => $addressid, match => '# CPAN EMAIL' };
317             }
318 2         15 $fh->close;
319              
320 2 50       42 if($self->{options}{verbose}) {
321 0         0 $self->_log( "pause entries = " . scalar(keys %{ $self->{pause_map} }) . "\n" );
  0         0  
322 0         0 $self->_log( "cpan entries = " . scalar(keys %{ $self->{cpan_map} }) . "\n" );
  0         0  
323             }
324              
325             # grab all records for the month
326 2 50       27 my $sql = $self->{options}{month}
327             ? sprintf $phrasebook{'GetTestersByMonth'}, $self->{options}{month}
328             : $phrasebook{'GetTesters'};
329 2 50       9 if($self->{options}{verbose}) {
330 0         0 $self->_log( "sql = $sql\n" );
331             }
332 2         8 $next = $self->dbh->iterator('array',$sql);
333 2         728 $self->{parsed} = 0;
334 2         9 while(my $row = $next->()) {
335 66         1403 $self->{parsed}++;
336 66         136 my $email = _extract_email($row->[2]);
337              
338 66 100       12429 my $testerid = $self->{parsed_map}{$row->[2]} ? $self->{parsed_map}{$row->[2]}->{testerid} : 0;
339 66 100       160 my $addressid = $self->{parsed_map}{$row->[2]} ? $self->{parsed_map}{$row->[2]}->{addressid} : 0;
340 66 50 66     426 $addressid ||= $self->{stored_map}{$row->[2]} ? $self->{stored_map}{$row->[2]}->{addressid} : 0;
341 66 100 100     812 $testerid ||= $self->{address_map}{$email} ? $self->{address_map}{$email}->{testerid} : 0;
342 66 100 100     388 $addressid ||= $self->{address_map}{$email} ? $self->{address_map}{$email}->{addressid} : 0;
343              
344 66 100 66     242 next if($testerid && $addressid);
345            
346 48         469 $self->{unparsed_map}{$row->[2]} = {
347             reportid => $row->[0],
348             guid => $row->[1],
349             fulldate => $row->[3],
350             testerid => $testerid,
351             addressid => $addressid,
352             'sort' => '',
353             email => $email
354             };
355             }
356              
357 2 50       83 if($self->{options}{verbose}) {
358 0         0 $self->_log( "rows = $self->{parsed}\n" );
359 0         0 $self->_log( "unparsed entries = " . scalar(keys %{ $self->{unparsed_map} }) . "\n" );
  0         0  
360             }
361             }
362              
363             sub match_addresses {
364 2     2 1 12093 my $self = shift;
365              
366             # if($self->{options}{verbose}) {
367             # use Data::Dumper;
368             # $self->_log( "named_map=". Dumper($self->{named_map}) );
369             # $self->_log( "unparsed_map=". Dumper($self->{unparsed_map}) );
370             # $self->_log( "parsed_map=" . Dumper($self->{parsed_map}) );
371             # $self->_log( "paused_map=" . Dumper($self->{paused_map}) );
372             # $self->_log( "pause_map=" . Dumper($self->{pause_map}) );
373             # $self->_log( "cpan_map=" . Dumper($self->{cpan_map}) );
374             # $self->_log( "domain_map=" . Dumper($self->{domain_map}) );
375             # $self->_log( "address_map=" . Dumper($self->{address_map}) );
376             # $self->_log( "stored_map=" . Dumper($self->{stored_map}) );
377             # }
378              
379 2         6 for my $key (keys %{ $self->{unparsed_map} }) {
  2         11  
380 14         267 my $email = _extract_email($key);
381 14 50       34 unless($email) {
382 0         0 push @{$self->{result}{NOEMAIL}}, $key;
  0         0  
383 0         0 next;
384             }
385 14         25 $email = lc($email);
386 14         39 my ($local,$domain) = split(/\@/,$email);
387             #print STDERR "email=[$email], local=[$local], domain=[$domain]\n" if($email =~ /indiana/);
388 14 100       42 next if($self->map_address($key,$local,$domain,$email));
389              
390 12         17 my $last = 0;
391 12         33 my @parts = split(/\./,$domain);
392 12         41 while(@parts > 1) {
393 20         109 my $domain2 = join(".",@parts);
394             #print STDERR "domain2=[$domain2]\n" if($email =~ /indiana/);
395 20 100       46 if($self->map_domain($key,$local,$domain2,$email)) {
396 4         7 $last = 1;
397 4         6 last;
398             }
399 16         45 shift @parts;
400             }
401              
402 12 100       32 next if($last);
403 8 50       22 next if($self->map_name($key));
404             }
405             }
406              
407             sub print_addresses {
408 2     2 1 679 my $self = shift;
409 2         6 my $text = '';
410              
411 2 50       11 if($self->{result}{NOMAIL}) {
412 0         0 $self->_printout( "ERRORS:" );
413 0         0 for my $email (sort @{$self->{result}{NOMAIL}}) {
  0         0  
414 0         0 $self->_printout( "NOMAIL: $email" );
415             }
416             }
417              
418 2         10 $self->_printout( "\nMATCH:" );
419 2         5 for my $key (sort {$self->{unparsed_map}{$a}->{match} cmp $self->{unparsed_map}{$b}->{match}}
  10         30  
  14         44  
420 2         69 grep {$self->{unparsed_map}{$_}->{match}}
421             keys %{ $self->{unparsed_map} }) {
422 8 100 66     184 if($self->{unparsed_map}{$key}->{match} && $self->{unparsed_map}{$key}->{match} !~ /SUGGESTION/) {
423 6   50     156 $self->_printout(
      50        
      50        
      50        
      100        
      50        
      50        
      50        
      50        
424             sprintf "%d,%s,%s,%d,%d,%s,%s,%s,%s,%s",
425             ($self->{unparsed_map}{$key}->{reportid} || 0),
426             ($self->{unparsed_map}{$key}->{guid} || ''),
427             ($self->{unparsed_map}{$key}->{fulldate} || ''),
428              
429             ($self->{unparsed_map}{$key}->{addressid} || 0),
430             ($self->{unparsed_map}{$key}->{testerid} || 0),
431             $key,
432             ($self->{unparsed_map}{$key}->{name} || ''),
433             ($self->{unparsed_map}{$key}->{pause} || ''),
434             ($self->{unparsed_map}{$key}->{display} || ''),
435             ($self->{unparsed_map}{$key}->{match} || '')
436             );
437 6         29 delete $self->{unparsed_map}{$key};
438             } else {
439 2         15 my ($local,$domain) = $self->{unparsed_map}{$key}->{email} =~ /([-+=\w.]+)\@([^\s]+)/;
440 2 50 33     14 ($local,$domain) = $key =~ /([-+=\w.]+)\@([^\s]+)/ unless($local && $domain);
441 2 50       5 if($domain) {
442 2         26 my @parts = split(/\./,$domain);
443 2         1117 $self->{unparsed_map}{$key}{'sort'} = join(".",reverse @parts) . '@' . $local;
444             } else {
445 0         0 print STDERR "FAIL: $key\n";
446 0         0 $self->{unparsed_map}{$key}{'sort'} = '';
447             }
448             }
449             }
450              
451 2         10 $self->_printout( "\nSUGGESTIONS:" );
452 2 100       6 for my $key (sort {$self->{unparsed_map}{$a} cmp $self->{unparsed_map}{$b}}
  0         0  
  8         39  
453 2         8 grep {$self->{unparsed_map}{$_}->{match} && $self->{unparsed_map}{$_}->{match} =~ /SUGGESTION/}
454             keys %{ $self->{unparsed_map} }) {
455 2   50     190 $self->_printout(
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
456             sprintf "%d,%s,%s,%d,%d,%s,%s,%s,%s,%s",
457             ($self->{unparsed_map}{$key}->{reportid} || 0),
458             ($self->{unparsed_map}{$key}->{guid} || ''),
459             ($self->{unparsed_map}{$key}->{fulldate} || ''),
460              
461             ($self->{unparsed_map}{$key}->{addressid} || 0),
462             ($self->{unparsed_map}{$key}->{testerid} || 0),
463             $key,
464             ($self->{unparsed_map}{$key}->{name} || ''),
465             ($self->{unparsed_map}{$key}->{pause} || ''),
466             ($self->{unparsed_map}{$key}->{display} || ''),
467             ($self->{unparsed_map}{$key}->{match} || '')
468             );
469 2         11 delete $self->{unparsed_map}{$key};
470             }
471              
472 2         8 $self->_printout( '' );
473 2 50       10 return if($self->{options}{match});
474              
475             #use Data::Dumper;
476             #print STDERR Dumper(\%{ $self->{unparsed_map} });
477              
478 2         4 my @mails;
479 2         7 $self->_printout( "PATTERNS:" );
480 2         3 for my $key (sort { $self->{unparsed_map}{$a}{'sort'} cmp $self->{unparsed_map}{$b}{'sort'} } keys %{ $self->{unparsed_map} }) {
  6         18  
  2         11  
481 6 50       15 next unless($key);
482 6   50     187 $self->_printout(
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
483             sprintf "%d,%s,%s,%d,%d,%s,%s,%s,%s,%s,%s",
484             ($self->{unparsed_map}{$key}->{reportid} || 0),
485             ($self->{unparsed_map}{$key}->{guid} || ''),
486             ($self->{unparsed_map}{$key}->{fulldate} || ''),
487              
488             ($self->{unparsed_map}{$key}->{addressid} || 0),
489             ($self->{unparsed_map}{$key}->{testerid} || 0),
490             $key,
491             ($self->{unparsed_map}{$key}->{name} || ''),
492             ($self->{unparsed_map}{$key}->{pause} || ''),
493             ($self->{unparsed_map}{$key}->{match} || ''),
494             ($self->{unparsed_map}{$key}->{display} || ''),
495             ($self->{unparsed_map}{$key}->{'sort'} || '')
496             );
497             }
498             }
499              
500             sub map_address {
501 18     18 1 57 my ($self,$key,$local,$domain,$email) = @_;
502              
503 18 100       55 if($self->{address_map}{$key}) {
504 1         13 $self->{unparsed_map}{$key}->{$_} = $self->{address_map}{$key}->{$_} for(qw(testerid addressid name pause match));
505 1         7 return 1;
506             }
507              
508 17 50       45 if($self->{address_map}{$email}) {
509 0         0 $self->{unparsed_map}{$key}->{$_} = $self->{address_map}{$email}->{$_} for(qw(testerid addressid name pause match));
510 0         0 return 1;
511             }
512              
513 17 100       41 if($domain eq 'cpan.org') {
514 7 100       27 if($self->{pause_map}{$local}) {
515 3         44 $self->{unparsed_map}{$key}->{$_} = $self->{pause_map}{$local}->{$_} for(qw(testerid addressid name pause match));
516 3         17 return 1;
517             }
518             }
519              
520 14 100       119 if($self->{cpan_map}{$email}) {
521 1         16 $self->{unparsed_map}{$key}->{$_} = $self->{cpan_map}{$email}->{$_} for(qw(testerid addressid name pause match));
522 1         10833 return 1;
523             }
524              
525 13         35 return 0;
526             }
527              
528             sub map_domain {
529 39     39 1 11691 my ($self,$key,$local,$domain,$email) = @_;
530              
531 39         47 for my $filter (@{$self->{filters}}) {
  39         102  
532 122 100       2715 return 0 if($domain =~ /^$filter$/);
533             }
534              
535 23 100       71 if($self->{domain_map}{$domain}) {
536 6         201 $self->{unparsed_map}{$key}->{$_} = $self->{domain_map}{$domain}->{$_} for(qw(testerid name pause match));
537 6         174 $self->{unparsed_map}{$key}->{match} .= " - $domain";
538 6         34 return 1;
539             }
540              
541 17         20 for my $map (keys %{ $self->{domain_map} }) {
  17         48  
542 58 50       613 if($map =~ /\b$domain$/) {
543 0         0 $self->{unparsed_map}{$key}->{$_} = $self->{domain_map}{$map}->{$_} for(qw(testerid name pause match));
544 0         0 $self->{unparsed_map}{$key}->{match} .= " - $domain - $map";
545 0         0 return 1;
546             }
547             }
548              
549 17         30 for my $map (keys %{ $self->{domain_map} }) {
  17         45  
550 57 100       4746 if($domain =~ /\b$map$/) {
551 1         44 $self->{unparsed_map}{$key}->{$_} = $self->{domain_map}{$map}->{$_} for(qw(testerid name pause match));
552 1         7 $self->{unparsed_map}{$key}->{match} .= " - $domain - $map";
553 1         9 return 1;
554             }
555             }
556              
557 16         58 return 0;
558             }
559              
560             sub map_name {
561 8     8 1 12 my ($self,$key) = @_;
562 8         16 my ($name) = $key =~ /\(+"?([^"\)]+)"?\)+/;
563 8 50       122 ($name) = $key =~ /^\s*"?([^"<]+)"?\s+
564 8 100       34 return 0 unless($name);
565              
566 2 50       11 if($self->{named_map}{$name}) {
567 0         0 $self->{unparsed_map}{$key}->{$_} = $self->{named_map}{$name}->{$_} for(qw(testerid name pause match));
568 0         0 return 1;
569             }
570              
571 2 50       10 if($self->{paused_map}{$name}) {
572 0         0 $self->{unparsed_map}{$key}->{$_} = $self->{paused_map}{$name}->{$_} for(qw(testerid name pause match));
573 0         0 return 1;
574             }
575              
576 2         10 $self->{unparsed_map}{$key}{display} = $name;
577 2         4 $self->{unparsed_map}{$key}{match} = '# SUGGESTION';
578              
579 2         11 return 0;
580             }
581              
582             #----------------------------------------------------------------------------
583             # Private Methods
584              
585             sub _lastid {
586 8     8   3735 my ($self,$id) = @_;
587 8         32 my $f = $self->lastfile();
588              
589 8 100       204 unless( -f $f) {
590 2         247 mkpath(dirname($f));
591 2         13 overwrite_file( $f, 0 );
592 2   50     607 $id ||= 0;
593             }
594              
595 8 100       23 if($id) { overwrite_file( $f, $id ); }
  2         14  
596 6         25 else { $id = read_file($f); }
597              
598 8         994 return $id;
599             }
600              
601             sub _extract_email {
602 102     102   16147 my $str = shift;
603             #my ($email) = $str =~ /([-+=\w.]+\@(?:[-\w]+\.)+(?:com|net|org|info|biz|edu|museum|mil|gov|[a-z]{2,2}))/i;
604 102         555 my ($email) = $str =~ /([-+=\w.]+\@[-\w\.]+)/i;
605 102   50     468 return $email || '';
606             }
607              
608             sub _init_options {
609 9     9   18 my $self = shift;
610 9         39 my %hash = @_;
611 9         66 $self->{options} = {};
612 9         54 my @options = qw(mailrc update clean reindex lastid backup month match verbose lastfile logfile logclean output);
613              
614 9 50       66 GetOptions( $self->{options},
615              
616             # mandatory options
617             'config|c=s',
618              
619             # update mode options
620             'update|u=s',
621              
622             # clean mode options
623             'clean',
624              
625             # reindex mode options
626             'reindex|r',
627             'lastid|l=i',
628              
629             # backup mode options
630             'backup|b',
631              
632             # search mode options
633             'mailrc|m=s',
634             'month=s',
635             'match',
636              
637             # other options
638             'output=s',
639             'lastfile=s',
640             'logfile=s',
641             'logclean=i',
642             'verbose|v',
643             'help|h'
644             ) or $self->_help();
645              
646 9   66     9187 $self->{options}{$_} ||= $hash{$_} for(qw(config help),@options);
647              
648 9 50       50 $self->_help(1) if($self->{options}{help});
649 9 50       36 $self->_help(0) if($self->{options}{version});
650              
651 9 50       37 $self->_help(1,"Must specify the configuration file") unless( $self->{options}{config});
652 9 50       317 $self->_help(1,"Configuration file [$self->{options}{config}] not found") unless(-f $self->{options}{config});
653              
654             # load configuration
655 9         129 my $cfg = Config::IniFiles->new( -file => $self->{options}{config} );
656              
657             # configure databases
658 9         42241 my %opts;
659 9         85 my $db = 'CPANSTATS';
660 9 50       40 die "No configuration for $db database\n" unless($cfg->SectionExists($db));
661 9         254 $opts{$_} = $cfg->val($db,$_) for(qw(driver database dbfile dbhost dbport dbuser dbpass));
662 9         1575 $self->dbh( CPAN::Testers::Common::DBUtils->new(%opts) );
663 9 50       670 die "Cannot configure $db database\n" unless($self->dbh);
664              
665             # use configuration settings or defaults if none provided
666 9         114 for my $opt (@options) {
667 117   100     2631 $self->{options}{$opt} ||= $cfg->val('MASTER',$opt) || $defaults{$opt};
      100        
668             }
669              
670             # extract filters
671 9         225 my $filters = $cfg->val('DOMAINS','filters');
672 9 50       198 my @filters = split("\n", $filters) if($filters);
673 9 50       30 $self->{filters} = \@filters if(@filters);
674              
675             # mandatory options
676             #for my $opt (qw()) {
677             # $self->_help(1,"No $opt configuration setting given, see help below.") unless( $self->{options}{$opt});
678             # $self->_help(1,"Given $opt file [$self->{options}{$opt}] not a valid file, see help below.") unless(-f $self->{options}{$opt});
679             #}
680              
681             # options to check if provided
682 9         25 for my $opt (qw(update mailrc)) {
683 18 100       62 next unless( $self->{options}{$opt});
684 10 50       288 $self->_help(1,"Given $opt file [$self->{options}{$opt}] not a valid file, see help below.") unless(-f $self->{options}{$opt});
685             }
686              
687             # configure backup DBs
688 9 50       44 if($self->{options}{backup}) {
689 0 0       0 $self->help(1,"No configuration for BACKUPS with backup option") unless($cfg->SectionExists('BACKUPS'));
690              
691 0         0 my @drivers = $cfg->val('BACKUPS','drivers');
692 0         0 for my $driver (@drivers) {
693 0 0       0 $self->help(1,"No configuration for backup option '$driver'") unless($cfg->SectionExists($driver));
694              
695 0         0 %opts = ();
696 0         0 $opts{$_} = $cfg->val($driver,$_) for(qw(driver database dbfile dbhost dbport dbuser dbpass));
697 0 0       0 $self->{backups}{$driver}{'exists'} = $driver =~ /SQLite/i ? -f $opts{database} : 1;
698              
699             # CSV is a bit of an oddity!
700 0 0       0 if($driver =~ /CSV/i) {
701 0         0 $self->{backups}{$driver}{'exists'} = 0;
702 0         0 $self->{backups}{$driver}{'dbfile'} = $opts{dbfile};
703 0         0 $opts{dbfile} = 'uploads';
704 0         0 unlink($opts{dbfile});
705             }
706              
707 0         0 $self->{backups}{$driver}{db} = CPAN::Testers::Common::DBUtils->new(%opts);
708 0 0       0 $self->help(1,"Cannot configure BACKUPS database for '$driver'") unless($self->{backups}{$driver}{db});
709             }
710             }
711              
712             # clean up potential rogue characters
713 9 50       34 $self->{options}{lastid} =~ s/\D+//g if($self->{options}{lastid});
714              
715             # prime accessors
716 9         56 $self->lastfile($self->{options}{lastfile});
717 9         92 $self->logfile($self->{options}{logfile});
718 9         77 $self->logclean($self->{options}{logclean});
719              
720             # set output
721 9 100       68 if($self->{options}{output}) {
722 2 50       20 if(my $fh = IO::File->new($self->{options}{output}, 'w+')) {
723 2         349 $self->{fh} = $fh;
724             }
725             }
726              
727 9 50       262 return unless($self->{options}{verbose});
728 0   0     0 print STDERR "config: $_ = ".($self->{options}{$_}||'')."\n" for(@options);
729             }
730              
731             sub _help {
732 0     0   0 my ($self,$full,$mess) = @_;
733              
734 0 0       0 print "\n$mess\n\n" if($mess);
735              
736 0 0       0 if($full) {
737 0         0 print "\n";
738 0         0 print "Usage:$0 [--verbose|v] --config|c= \\\n";
739 0         0 print " ( [--help|h] \\\n";
740 0         0 print " | [--update=] \\\n";
741 0         0 print " | [--reindex] [--lastid=] \\\n";
742 0         0 print " | [--clean] \\\n";
743 0         0 print " | [--backup] \\\n";
744 0         0 print " | [--mailrc|m=] [--month=] [--match] ) \\\n";
745 0         0 print " [--output=] \n\n";
746 0         0 print " [--logfile=] [--logclean=(0|1)] \n\n";
747              
748             # 12345678901234567890123456789012345678901234567890123456789012345678901234567890
749 0         0 print "This program manages the cpan-tester addresses.\n";
750              
751 0         0 print "\nFunctional Options:\n";
752 0         0 print " --config= # path/file to configuration file\n";
753 0         0 print " [--mailrc=] # path/file to mailrc file\n";
754 0         0 print " [--output=] # path/file to output file (defaults to STDOUT)\n";
755              
756 0         0 print "\nUpdate Options:\n";
757 0         0 print " [--update=] # run in update mode\n";
758              
759 0         0 print "\nReindex Options:\n";
760 0         0 print " [--reindex] # run in reindex mode\n";
761 0         0 print " [--lastid=] # id to start reindex from\n";
762              
763 0         0 print "\nClean Options:\n";
764 0         0 print " [--clean] # run in clean mode (de-duplication)\n";
765              
766 0         0 print "\nBackup Options:\n";
767 0         0 print " [--backup] # run in backup mode\n";
768              
769 0         0 print "\nSearch Options:\n";
770 0         0 print " [--month=] # YYYYMM string to match from\n";
771 0         0 print " [--match] # display matches only\n";
772              
773 0         0 print "\nOther Options:\n";
774 0         0 print " [--verbose] # turn on verbose messages\n";
775 0         0 print " [--help] # this screen\n";
776              
777 0         0 print "\nFor further information type 'perldoc $0'\n";
778             }
779              
780 0         0 print "$0 v$VERSION\n";
781 0         0 exit(0);
782             }
783              
784             sub _printout {
785 26     26   41 my $self = shift;
786 26 100       71 if(my $fh = $self->{fh}) {
787 22         109 print $fh "@_\n";
788             } else {
789 4         718 print STDOUT "@_\n";
790             }
791             }
792              
793             sub _log {
794 45     45   4000 my $self = shift;
795 45 50       446 my $log = $self->logfile or return;
796 45 100       2216 mkpath(dirname($log)) unless(-f $log);
797              
798 45 100       242 my $mode = $self->logclean ? 'w+' : 'a+';
799 45         435 $self->logclean(0);
800              
801 45         2859 my @dt = localtime(time);
802 45         490 my $dt = sprintf "%04d/%02d/%02d %02d:%02d:%02d", $dt[5]+1900,$dt[4]+1,$dt[3],$dt[2],$dt[1],$dt[0];
803              
804 45 50       613 my $fh = IO::File->new($log,$mode) or die "Cannot write to log file [$log]: $!\n";
805 45         9529 print $fh "$dt ", @_, "\n";
806 45         528 $fh->close;
807             }
808              
809             q!Will code for a damn fine Balti!;
810              
811             __END__