File Coverage

blib/lib/CPAN/Testers/Data/Addresses.pm
Criterion Covered Total %
statement 355 446 79.6
branch 142 206 68.9
condition 68 115 59.1
subroutine 32 33 96.9
pod 13 13 100.0
total 610 813 75.0


line stmt bran cond sub pod time code
1             package CPAN::Testers::Data::Addresses;
2              
3 13     13   129048 use 5.006;
  13         45  
  13         634  
4 13     13   98 use strict;
  13         23  
  13         376  
5 13     13   76 use warnings;
  13         23  
  13         457  
6              
7 13     13   68 use vars qw($VERSION);
  13         22  
  13         926  
8             $VERSION = '0.13';
9             $|++;
10              
11             #----------------------------------------------------------------------------
12             # Library Modules
13              
14 13     13   68 use base qw(Class::Accessor::Fast);
  13         22  
  13         12445  
15              
16 13     13   72752 use CPAN::Testers::Common::DBUtils;
  13         370323  
  13         135  
17 13     13   19372 use Config::IniFiles;
  13         598914  
  13         468  
18 13     13   181 use DBI;
  13         28  
  13         465  
19 13     13   74 use File::Basename;
  13         23  
  13         1032  
20 13     13   253 use File::Path;
  13         24  
  13         641  
21 13     13   14355 use File::Slurp;
  13         188784  
  13         1060  
22 13     13   17724 use Getopt::Long;
  13         200327  
  13         113  
23 13     13   19982 use IO::File;
  13         16206  
  13         107790  
24              
25             #----------------------------------------------------------------------------
26             # Variables
27              
28             my (%backups);
29              
30             my %phrasebook = (
31             'AllAddresses' => q{SELECT * FROM tester_address},
32             'AllAddressesFull' => q{SELECT a.*,p.name,p.pause FROM tester_address AS a INNER JOIN tester_profile AS p ON p.testerid=a.testerid},
33             'UpdateAddressIndex' => q{REPLACE INTO ixaddress (id,guid,addressid,fulldate) VALUES (?,?,?,?)},
34              
35             'InsertAddress' => q{INSERT INTO tester_address (testerid,address,email) VALUES (?,?,?)},
36             'GetAddressByText' => q{SELECT * FROM tester_address WHERE address = ?},
37             'LinkAddress' => q{UPDATE tester_address SET testerid=? WHERE addressid=?},
38              
39             'GetTesterByPause' => q{SELECT testerid FROM tester_profile WHERE pause = ?},
40             'GetTesterByName' => q{SELECT testerid FROM tester_profile WHERE name = ?},
41             'InsertTester' => q{INSERT INTO tester_profile (name,pause) VALUES (?,?)},
42              
43             'AllReports' => q{SELECT id,guid,tester,fulldate FROM cpanstats WHERE type=2 AND id > ? ORDER BY id LIMIT 1000000},
44             'GetTestersByMonth' => q{SELECT DISTINCT c.id,c.guid,c.tester,c.fulldate FROM cpanstats c LEFT JOIN ixaddress x ON x.id=c.id LEFT JOIN tester_address a ON a.addressid=x.addressid WHERE a.address IS NULL AND c.postdate >= '%s' AND c.state IN ('pass','fail','na','unknown')},
45             'GetTesters' => q{SELECT DISTINCT c.id,c.guid,c.tester,c.fulldate FROM cpanstats c LEFT JOIN ixaddress x ON x.id=c.id LEFT JOIN tester_address a ON a.addressid=x.addressid WHERE a.address IS NULL AND c.state IN ('pass','fail','na','unknown')},
46              
47             # Database backup requests
48             'DeleteBackup' => 'DELETE FROM addresses',
49             'CreateBackup' => 'CREATE TABLE addresses (testerid int, name text, pause text, PRIMARY KEY (testerid))',
50             'SelectBackup' => 'SELECT * FROM tester_profile',
51             'InsertBackup' => 'INSERT INTO addresses (testerid,name,pause) VALUES (?,?,?)',
52              
53             # Consolidations
54             'DuplicateAddresses' => q{SELECT address,count(*) AS count FROM tester_address GROUP BY address ORDER BY count DESC},
55             'UpdateAddress' => q{UPDATE ixaddress SET addressid=? WHERE addressid=?},
56             'DeleteAddress' => q{DELETE FROM tester_address WHERE addressid=?},
57             );
58              
59             my %defaults = (
60             'address' => 'data/addresses.txt',
61             'mailrc' => 'data/01mailrc.txt',
62             'lastfile' => 'data/_lastid',
63             'month' => 199000,
64             'match' => 0,
65             'logclean' => 0
66             );
67              
68             #----------------------------------------------------------------------------
69             # The Application Programming Interface
70              
71             sub new {
72 13     13 1 9559 my $class = shift;
73              
74 13         31 my $self = {};
75 13         43 bless $self, $class;
76              
77 13         75 $self->_init_options(@_);
78 13         103 return $self;
79             }
80              
81             sub DESTROY {
82 13     13   24851 my $self = shift;
83 13 100       288 $self->{fh}->close if($self->{fh});
84             }
85              
86             __PACKAGE__->mk_accessors(qw( lastfile logfile logclean dbh ));
87              
88             sub process {
89 4     4 1 12634 my $self = shift;
90              
91 4 100       35 if($self->{options}{update}) {
    100          
    50          
    50          
92 2         15 $self->update();
93              
94             } elsif($self->{options}{reindex}) {
95 1         6 $self->reindex();
96              
97             } elsif($self->{options}{backup}) {
98 0         0 $self->backup();
99              
100             } elsif($self->{options}{clean}) {
101 0         0 $self->clean();
102              
103             } else {
104 1         4 $self->search();
105             }
106             }
107              
108             sub search {
109 1     1 1 2 my $self = shift;
110 1         6 $self->_log("starting search");
111              
112 1         32 $self->load_addresses();
113 1         4 $self->match_addresses();
114 1         4 $self->print_addresses();
115              
116 1         4 $self->_log("stopping search");
117             }
118              
119             sub update {
120 2     2 1 18 my $self = shift;
121 2         7 my ($new,$all,$add,$err) = (0,0,0,0);
122 2         11 $self->_log("starting update");
123              
124 2 50       130 my $fh = IO::File->new($self->{options}{update}) or die "Cannot open mailrc file [$self->{options}{update}]: $!";
125 2         151 while(<$fh>) {
126 10         53448 s/\s+$//;
127 10 50       28 next unless($_);
128              
129 10         270 my ($reportid,$guid,$fulldate,$addressid,$testerid,$address,$name,$pause,$display,$match) = split(',');
130 10 100 66     74 unless($address && ($name || $display)) {
      66        
131 3         18 $self->_log("... bogus line: $_");
132 3         127 $err++;
133 3         17 next;
134             }
135              
136 7   100     38 $addressid ||= 0;
137 7   100     22 $testerid ||= 0;
138 7   33     18 $name ||= $display;
139 7   50     20 $pause ||= '';
140              
141 7         14 $all++;
142 7 100       50 if($testerid == 0) {
143 3         5 my @rows;
144 3 50       20 @rows = $self->dbh->get_query('hash',$phrasebook{'GetTesterByPause'},$pause) if($pause);
145 3 100       1108 @rows = $self->dbh->get_query('hash',$phrasebook{'GetTesterByName'},$name) unless(@rows);
146              
147 3 100       440 if(@rows) {
148 1         5 $testerid = $rows[0]->{testerid};
149             } else {
150 2         10 $testerid = $self->dbh->id_query($phrasebook{'InsertTester'},$name,$pause);
151 2         1324545 $new++;
152             }
153             }
154              
155 7 100       44 if($addressid == 0) {
156 3         35 my @rows = $self->dbh->get_query('hash',$phrasebook{'GetAddressByText'},$address);
157 3 100       1230 if(@rows) {
158 1         4 $addressid = $rows[0]->{addressid};
159             } else {
160 2         18 $addressid = $self->dbh->id_query($phrasebook{'InsertAddress'},$testerid,$address,_extract_email($address));
161 2         137584 $add++;
162             }
163             }
164              
165 7         52 $self->dbh->do_query($phrasebook{'LinkAddress'},$testerid,$addressid);
166 7         233821 $self->_log("... profile => address: ($testerid,$name,$pause) => ($addressid,$address)");
167              
168 7 100 100     751 next unless($reportid && $guid && $fulldate);
      100        
169 4         22 $self->dbh->do_query($phrasebook{'UpdateAddressIndex'},$reportid,$guid,$addressid,$fulldate);
170             }
171              
172 2         13602 $self->_printout("$all addresses mapped");
173 2         11 $self->_printout("$new new testers");
174 2         14 $self->_printout("$add addresses added");
175 2         10 $self->_printout("$err bogus lines");
176              
177 2         17 $self->_log("$all addresses mapped, $new new addresses");
178 2         72 $self->_log("stopping update");
179             }
180              
181             sub reindex {
182 1     1 1 3 my $self = shift;
183              
184 1         7 $self->_log("starting reindex");
185              
186             # load known addresses
187 1         33 my %address;
188 1         6 my $next = $self->dbh->iterator('hash',$phrasebook{'AllAddresses'});
189 1         1115 while( my $row = $next->() ) {
190 5         131 $address{$row->{address}} = $row->{addressid};
191             }
192              
193             # search through reports updating the index
194 1 50       23 my $lastid = defined $self->{options}{lastid} ? $self->{options}{lastid} : $self->_lastid();
195 1   50     5 $next = $self->dbh->iterator('hash',$phrasebook{'AllReports'},($lastid || 0));
196 1         236 while( my $row = $next->() ) {
197             #print STDERR "row: $row->{id} $row->{tester}\n";
198 33 100       2828 if($address{$row->{tester}}) {
199 24         285 $self->_log("FOUND - row: $row->{id} $row->{tester}");
200 24         2288 $self->dbh->do_query($phrasebook{'UpdateAddressIndex'},$row->{id},$row->{guid},$address{$row->{tester}},$row->{fulldate});
201             } else {
202 9         115 $self->_log("NEW - row: $row->{id} $row->{tester}");
203 9         456 $address{$row->{tester}} = $self->dbh->id_query($phrasebook{'InsertAddress'},0,$row->{tester},_extract_email($row->{tester}));
204 9         188321 $self->dbh->do_query($phrasebook{'UpdateAddressIndex'},$row->{id},$row->{guid},$address{$row->{tester}},$row->{fulldate});
205             }
206              
207 33         3109624 $lastid = $row->{id};
208             }
209 1         68 $self->_lastid($lastid);
210 1         8 $self->_log("stopping reindex");
211             }
212              
213             sub clean {
214 1     1 1 167726 my $self = shift;
215 1         7 $self->_log("starting clean");
216              
217 1         69 my $next = $self->dbh->iterator('hash',$phrasebook{'DuplicateAddresses'});
218 1         376 while( my $row = $next->() ) {
219 2         19660 $self->_log("count=$row->{count}, address=$row->{address}");
220              
221 2 100       111 last if($row->{count} <= 1);
222              
223 1         3 my %addr;
224 1         7 my @rows = $self->dbh->get_query('hash',$phrasebook{'GetAddressByText'},$row->{address});
225 1         323 $self->_log("- count=".scalar(@rows).", address=$row->{address}");
226 1         66 for my $addr (@rows) {
227 5         55128 $self->_log("id=$addr->{addressid}, address=$addr->{address}");
228 5 100       256 if($addr{$addr->{address}}) {
229 4         34 $self->_log("replace $addr->{addressid},'$addr->{address}' => $addr{$addr->{address}}->{addressid},'$addr{$addr->{address}}->{address}'");
230 4         140 $self->dbh->do_query($phrasebook{'UpdateAddress'},$addr{$addr->{address}}->{addressid},$addr->{addressid});
231 4         1174 $self->dbh->do_query($phrasebook{'DeleteAddress'},$addr->{addressid});
232              
233             } else {
234 1         7 $addr{$addr->{address}} = $addr;
235             }
236             }
237             }
238              
239 1         9 $self->_log("stopping clean");
240             }
241              
242             sub backup {
243 1     1 1 3 my $self = shift;
244              
245 1         3 for my $driver (keys %{$self->{backups}}) {
  1         7  
246 1 50       18 if($self->{backups}{$driver}{'exists'}) {
    50          
247 0         0 $self->{backups}{$driver}{db}->do_query($phrasebook{'DeleteBackup'});
248             } elsif($driver =~ /(CSV|SQLite)/i) {
249 1         10 $self->{backups}{$driver}{db}->do_query($phrasebook{'CreateBackup'});
250             }
251             }
252              
253 1         47629 $self->_log("Backup via DBD drivers");
254              
255 1         43 my $rows = $self->dbh->iterator('array',$phrasebook{'SelectBackup'});
256 1         1310 while(my $row = $rows->()) {
257 5         70594 for my $driver (keys %{$self->{backups}}) {
  5         37  
258 5         44 $self->{backups}{$driver}{db}->do_query($phrasebook{'InsertBackup'},@$row);
259             }
260             }
261              
262             # handle the CSV exception
263 1 50       23051 if($self->{backups}{CSV}) {
264 0         0 $self->_log("Backup to CSV file");
265 0         0 $self->{backups}{CSV}{db} = undef; # close db handle
266 0 0       0 my $fh1 = IO::File->new('addresses','r') or die "Cannot read temporary database file 'addresses'\n";
267 0 0       0 my $fh2 = IO::File->new($self->{backups}{CSV}{dbfile},'w+') or die "Cannot write to CSV database file $self->{backups}{CSV}{dbfile}\n";
268 0         0 while(<$fh1>) { print $fh2 $_ }
  0         0  
269 0         0 $fh1->close;
270 0         0 $fh2->close;
271 0         0 unlink('addresses');
272             }
273             }
274              
275             sub load_addresses {
276 2     2 1 4 my $self = shift;
277            
278 2         8 my $next = $self->dbh->iterator('hash',$phrasebook{'AllAddressesFull'});
279 2         13050 while( my $row = $next->() ) {
280 9 50       340 $self->{named_map}{$row->{name}} = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED NAME' } if($row->{name});
281 9 50       83 $self->{paused_map}{$row->{pause}} = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED PAUSE' } if($row->{pause});
282 9         49 $self->{parsed_map}{$row->{address}} = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED ADDRESS' };
283 9 50       25 next unless($row->{email});
284 9         51 $self->{address_map}{$row->{email}} = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED EMAIL' };
285              
286 9         30 my ($local,$domain) = split(/\@/,$row->{email});
287 9 50       22 next unless($domain);
288 9         88 $self->{domain_map}{$domain} = { name => $row->{name}, pause => $row->{pause}, addressid => $row->{addressid}, testerid => $row->{testerid}, match => '# MAPPED DOMAIN' };
289             }
290              
291 2 50       44 if($self->{options}{verbose}) {
292 0         0 $self->_log( "named entries = " . scalar(keys %{ $self->{named_map} }) . "\n" );
  0         0  
293 0         0 $self->_log( "paused entries = " . scalar(keys %{ $self->{paused_map} }) . "\n" );
  0         0  
294 0         0 $self->_log( "parsed entries = " . scalar(keys %{ $self->{parsed_map} }) . "\n" );
  0         0  
295 0         0 $self->_log( "address entries = " . scalar(keys %{ $self->{address_map} }) . "\n" );
  0         0  
296 0         0 $self->_log( "domain entries = " . scalar(keys %{ $self->{domain_map} }) . "\n" );
  0         0  
297             }
298              
299 2         12 $next = $self->dbh->iterator('hash',$phrasebook{'AllAddresses'});
300 2         506 while( my $row = $next->() ) {
301 9 50       221 next if($self->{parsed_map}{$row->{address}});
302 0         0 $self->{stored_map}{$row->{address}} = { name => '', pause => '', addressid => $row->{addressid}, testerid => 0, match => '# STORED ADDRESS' };
303             }
304              
305 2 50       52 my $fh = IO::File->new($self->{options}{mailrc}) or die "Cannot open mailrc file [$self->{options}{mailrc}]: $!";
306 2         245 while(<$fh>) {
307 10         58 s/\s+$//;
308 10 50       40 next if(/^$/);
309              
310 10         58 my ($alias,$name,$email) = (/alias\s+([A-Z]+)\s+"([^<]+) <([^>]+)>"/);
311 10 50       26 next unless($alias);
312              
313 10 100       33 my $testerid = $self->{address_map}{$email} ? $self->{address_map}{$email}->{testerid} : 0;
314 10 100       25 my $addressid = $self->{address_map}{$email} ? $self->{address_map}{$email}->{addressid} : 0;
315              
316 10         55 $self->{pause_map}{lc($alias)} = { name => $name, pause => $alias, testerid => $testerid, addressid => $addressid, match => '# PAUSE ID' };
317 10         86 $self->{cpan_map}{lc($email)} = { name => $name, pause => $alias, testerid => $testerid, addressid => $addressid, match => '# CPAN EMAIL' };
318             }
319 2         17 $fh->close;
320              
321 2 50       41 if($self->{options}{verbose}) {
322 0         0 $self->_log( "pause entries = " . scalar(keys %{ $self->{pause_map} }) . "\n" );
  0         0  
323 0         0 $self->_log( "cpan entries = " . scalar(keys %{ $self->{cpan_map} }) . "\n" );
  0         0  
324             }
325              
326             # grab all records for the month
327 2 50       25 my $sql = $self->{options}{month}
328             ? sprintf $phrasebook{'GetTestersByMonth'}, $self->{options}{month}
329             : $phrasebook{'GetTesters'};
330 2 50       9 if($self->{options}{verbose}) {
331 0         0 $self->_log( "sql = $sql\n" );
332             }
333 2         8 $next = $self->dbh->iterator('array',$sql);
334 2         705 $self->{parsed} = 0;
335 2         10 while(my $row = $next->()) {
336 66         1444 $self->{parsed}++;
337 66         120 my $email = _extract_email($row->[2]);
338              
339 66 100       181 my $testerid = $self->{parsed_map}{$row->[2]} ? $self->{parsed_map}{$row->[2]}->{testerid} : 0;
340 66 100       140 my $addressid = $self->{parsed_map}{$row->[2]} ? $self->{parsed_map}{$row->[2]}->{addressid} : 0;
341 66 50 66     253 $addressid ||= $self->{stored_map}{$row->[2]} ? $self->{stored_map}{$row->[2]}->{addressid} : 0;
342 66 100 100     239 $testerid ||= $self->{address_map}{$email} ? $self->{address_map}{$email}->{testerid} : 0;
343 66 100 100     217 $addressid ||= $self->{address_map}{$email} ? $self->{address_map}{$email}->{addressid} : 0;
344              
345 66 100 66     198 next if($testerid && $addressid);
346            
347 48         417 $self->{unparsed_map}{$row->[2]} = {
348             reportid => $row->[0],
349             guid => $row->[1],
350             fulldate => $row->[3],
351             testerid => $testerid,
352             addressid => $addressid,
353             'sort' => '',
354             email => $email
355             };
356             }
357              
358 2 50       79 if($self->{options}{verbose}) {
359 0         0 $self->_log( "rows = $self->{parsed}\n" );
360 0         0 $self->_log( "unparsed entries = " . scalar(keys %{ $self->{unparsed_map} }) . "\n" );
  0         0  
361             }
362             }
363              
364             sub match_addresses {
365 2     2 1 10370 my $self = shift;
366              
367             # if($self->{options}{verbose}) {
368             # use Data::Dumper;
369             # $self->_log( "named_map=". Dumper($self->{named_map}) );
370             # $self->_log( "unparsed_map=". Dumper($self->{unparsed_map}) );
371             # $self->_log( "parsed_map=" . Dumper($self->{parsed_map}) );
372             # $self->_log( "paused_map=" . Dumper($self->{paused_map}) );
373             # $self->_log( "pause_map=" . Dumper($self->{pause_map}) );
374             # $self->_log( "cpan_map=" . Dumper($self->{cpan_map}) );
375             # $self->_log( "domain_map=" . Dumper($self->{domain_map}) );
376             # $self->_log( "address_map=" . Dumper($self->{address_map}) );
377             # $self->_log( "stored_map=" . Dumper($self->{stored_map}) );
378             # }
379              
380 2         4 for my $key (keys %{ $self->{unparsed_map} }) {
  2         11  
381 14         27 my $email = _extract_email($key);
382 14 50       29 unless($email) {
383 0         0 push @{$self->{result}{NOEMAIL}}, $key;
  0         0  
384 0         0 next;
385             }
386 14         21 $email = lc($email);
387 14         33 my ($local,$domain) = split(/\@/,$email);
388             #print STDERR "email=[$email], local=[$local], domain=[$domain]\n" if($email =~ /indiana/);
389 14 100       30 next if($self->map_address($key,$local,$domain,$email));
390              
391 12         16 my $last = 0;
392 12         30 my @parts = split(/\./,$domain);
393 12         32 while(@parts > 1) {
394 20         39 my $domain2 = join(".",@parts);
395             #print STDERR "domain2=[$domain2]\n" if($email =~ /indiana/);
396 20 100       41 if($self->map_domain($key,$local,$domain2,$email)) {
397 4         4 $last = 1;
398 4         5 last;
399             }
400 16         39 shift @parts;
401             }
402              
403 12 100       24 next if($last);
404 8 50       20 next if($self->map_name($key));
405             }
406             }
407              
408             sub print_addresses {
409 2     2 1 562 my $self = shift;
410 2         4 my $text = '';
411              
412 2 50       42 if($self->{result}{NOMAIL}) {
413 0         0 $self->_printout( "ERRORS:" );
414 0         0 for my $email (sort @{$self->{result}{NOMAIL}}) {
  0         0  
415 0         0 $self->_printout( "NOMAIL: $email" );
416             }
417             }
418              
419 2         9 $self->_printout( "\nMATCH:" );
420 2         3 for my $key (sort {$self->{unparsed_map}{$a}->{match} cmp $self->{unparsed_map}{$b}->{match}}
  10         22  
  14         32  
421 2         10 grep {$self->{unparsed_map}{$_}->{match}}
422             keys %{ $self->{unparsed_map} }) {
423 8 100 66     45 if($self->{unparsed_map}{$key}->{match} && $self->{unparsed_map}{$key}->{match} !~ /SUGGESTION/) {
424 6   50     109 $self->_printout(
      50        
      50        
      50        
      100        
      50        
      50        
      50        
      50        
425             sprintf "%d,%s,%s,%d,%d,%s,%s,%s,%s,%s",
426             ($self->{unparsed_map}{$key}->{reportid} || 0),
427             ($self->{unparsed_map}{$key}->{guid} || ''),
428             ($self->{unparsed_map}{$key}->{fulldate} || ''),
429              
430             ($self->{unparsed_map}{$key}->{addressid} || 0),
431             ($self->{unparsed_map}{$key}->{testerid} || 0),
432             $key,
433             ($self->{unparsed_map}{$key}->{name} || ''),
434             ($self->{unparsed_map}{$key}->{pause} || ''),
435             ($self->{unparsed_map}{$key}->{display} || ''),
436             ($self->{unparsed_map}{$key}->{match} || '')
437             );
438 6         22 delete $self->{unparsed_map}{$key};
439             } else {
440 2         15 my ($local,$domain) = $self->{unparsed_map}{$key}->{email} =~ /([-+=\w.]+)\@([^\s]+)/;
441 2 50 33     13 ($local,$domain) = $key =~ /([-+=\w.]+)\@([^\s]+)/ unless($local && $domain);
442 2 50       5 if($domain) {
443 2         8 my @parts = split(/\./,$domain);
444 2         14 $self->{unparsed_map}{$key}{'sort'} = join(".",reverse @parts) . '@' . $local;
445             } else {
446 0         0 print STDERR "FAIL: $key\n";
447 0         0 $self->{unparsed_map}{$key}{'sort'} = '';
448             }
449             }
450             }
451              
452 2         7 $self->_printout( "\nSUGGESTIONS:" );
453 2 100       4 for my $key (sort {$self->{unparsed_map}{$a} cmp $self->{unparsed_map}{$b}}
  0         0  
  8         44  
454 2         6 grep {$self->{unparsed_map}{$_}->{match} && $self->{unparsed_map}{$_}->{match} =~ /SUGGESTION/}
455             keys %{ $self->{unparsed_map} }) {
456 2   50     63 $self->_printout(
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
457             sprintf "%d,%s,%s,%d,%d,%s,%s,%s,%s,%s",
458             ($self->{unparsed_map}{$key}->{reportid} || 0),
459             ($self->{unparsed_map}{$key}->{guid} || ''),
460             ($self->{unparsed_map}{$key}->{fulldate} || ''),
461              
462             ($self->{unparsed_map}{$key}->{addressid} || 0),
463             ($self->{unparsed_map}{$key}->{testerid} || 0),
464             $key,
465             ($self->{unparsed_map}{$key}->{name} || ''),
466             ($self->{unparsed_map}{$key}->{pause} || ''),
467             ($self->{unparsed_map}{$key}->{display} || ''),
468             ($self->{unparsed_map}{$key}->{match} || '')
469             );
470 2         10 delete $self->{unparsed_map}{$key};
471             }
472              
473 2         7 $self->_printout( '' );
474 2 50       7 return if($self->{options}{match});
475              
476             #use Data::Dumper;
477             #print STDERR Dumper(\%{ $self->{unparsed_map} });
478              
479 2         3 my @mails;
480 2         4 $self->_printout( "PATTERNS:" );
481 2         3 for my $key (sort { $self->{unparsed_map}{$a}{'sort'} cmp $self->{unparsed_map}{$b}{'sort'} } keys %{ $self->{unparsed_map} }) {
  6         14  
  2         8  
482 6 50       12 next unless($key);
483 6   50     184 $self->_printout(
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
484             sprintf "%d,%s,%s,%d,%d,%s,%s,%s,%s,%s,%s",
485             ($self->{unparsed_map}{$key}->{reportid} || 0),
486             ($self->{unparsed_map}{$key}->{guid} || ''),
487             ($self->{unparsed_map}{$key}->{fulldate} || ''),
488              
489             ($self->{unparsed_map}{$key}->{addressid} || 0),
490             ($self->{unparsed_map}{$key}->{testerid} || 0),
491             $key,
492             ($self->{unparsed_map}{$key}->{name} || ''),
493             ($self->{unparsed_map}{$key}->{pause} || ''),
494             ($self->{unparsed_map}{$key}->{match} || ''),
495             ($self->{unparsed_map}{$key}->{display} || ''),
496             ($self->{unparsed_map}{$key}->{'sort'} || '')
497             );
498             }
499             }
500              
501             sub map_address {
502 18     18 1 63 my ($self,$key,$local,$domain,$email) = @_;
503              
504 18 100       52 if($self->{address_map}{$key}) {
505 1         13 $self->{unparsed_map}{$key}->{$_} = $self->{address_map}{$key}->{$_} for(qw(testerid addressid name pause match));
506 1         6 return 1;
507             }
508              
509 17 50       42 if($self->{address_map}{$email}) {
510 0         0 $self->{unparsed_map}{$key}->{$_} = $self->{address_map}{$email}->{$_} for(qw(testerid addressid name pause match));
511 0         0 return 1;
512             }
513              
514 17 100       37 if($domain eq 'cpan.org') {
515 7 100       21 if($self->{pause_map}{$local}) {
516 3         40 $self->{unparsed_map}{$key}->{$_} = $self->{pause_map}{$local}->{$_} for(qw(testerid addressid name pause match));
517 3         23 return 1;
518             }
519             }
520              
521 14 100       39 if($self->{cpan_map}{$email}) {
522 1         13 $self->{unparsed_map}{$key}->{$_} = $self->{cpan_map}{$email}->{$_} for(qw(testerid addressid name pause match));
523 1         7 return 1;
524             }
525              
526 13         36 return 0;
527             }
528              
529             sub map_domain {
530 39     39 1 8058 my ($self,$key,$local,$domain,$email) = @_;
531              
532 39         46 for my $filter (@{$self->{filters}}) {
  39         87  
533 122 100       2378 return 0 if($domain =~ /^$filter$/);
534             }
535              
536 23 100       62 if($self->{domain_map}{$domain}) {
537 6         66 $self->{unparsed_map}{$key}->{$_} = $self->{domain_map}{$domain}->{$_} for(qw(testerid name pause match));
538 6         20 $self->{unparsed_map}{$key}->{match} .= " - $domain";
539 6         24 return 1;
540             }
541              
542 17         17 for my $map (keys %{ $self->{domain_map} }) {
  17         39  
543 58 50       311 if($map =~ /\b$domain$/) {
544 0         0 $self->{unparsed_map}{$key}->{$_} = $self->{domain_map}{$map}->{$_} for(qw(testerid name pause match));
545 0         0 $self->{unparsed_map}{$key}->{match} .= " - $domain - $map";
546 0         0 return 1;
547             }
548             }
549              
550 17         25 for my $map (keys %{ $self->{domain_map} }) {
  17         40  
551 58 100       477 if($domain =~ /\b$map$/) {
552 1         15 $self->{unparsed_map}{$key}->{$_} = $self->{domain_map}{$map}->{$_} for(qw(testerid name pause match));
553 1         6 $self->{unparsed_map}{$key}->{match} .= " - $domain - $map";
554 1         8 return 1;
555             }
556             }
557              
558 16         46 return 0;
559             }
560              
561             sub map_name {
562 8     8 1 12 my ($self,$key) = @_;
563 8         15 my ($name) = $key =~ /\(+"?([^"\)]+)"?\)+/;
564 8 50       27 ($name) = $key =~ /^\s*"?([^"<]+)"?\s+
565 8 100       45 return 0 unless($name);
566              
567 2 50       10 if($self->{named_map}{$name}) {
568 0         0 $self->{unparsed_map}{$key}->{$_} = $self->{named_map}{$name}->{$_} for(qw(testerid name pause match));
569 0         0 return 1;
570             }
571              
572 2 50       8 if($self->{paused_map}{$name}) {
573 0         0 $self->{unparsed_map}{$key}->{$_} = $self->{paused_map}{$name}->{$_} for(qw(testerid name pause match));
574 0         0 return 1;
575             }
576              
577 2         8 $self->{unparsed_map}{$key}{display} = $name;
578 2         5 $self->{unparsed_map}{$key}{match} = '# SUGGESTION';
579              
580 2         9 return 0;
581             }
582              
583             #----------------------------------------------------------------------------
584             # Private Methods
585              
586             sub _lastid {
587 8     8   4551 my ($self,$id) = @_;
588 8         44 my $f = $self->lastfile();
589              
590 8 100       278 unless( -f $f) {
591 2         243 mkpath(dirname($f));
592 2         18 overwrite_file( $f, 0 );
593 2   50     704 $id ||= 0;
594             }
595              
596 8 100       26 if($id) { overwrite_file( $f, $id ); }
  2         23  
597 6         33 else { $id = read_file($f); }
598              
599 8         2552 return $id;
600             }
601              
602             sub _extract_email {
603 103     103   14152 my $str = shift;
604             #my ($email) = $str =~ /([-+=\w.]+\@(?:[-\w]+\.)+(?:com|net|org|info|biz|edu|museum|mil|gov|[a-z]{2,2}))/i;
605 103         539 my ($email) = $str =~ /([-+=\w.]+\@[-\w\.]+)/i;
606 103   50     420 return $email || '';
607             }
608              
609             sub _init_options {
610 13     13   30 my $self = shift;
611 13         65 my %hash = @_;
612 13         120 $self->{options} = {};
613 13         68 my @options = qw(mailrc update clean reindex lastid backup month match verbose lastfile logfile logclean output);
614 13         26 my %options;
615              
616 13 50       99 GetOptions( \%options,
617              
618             # mandatory options
619             'config|c=s',
620              
621             # update mode options
622             'update|u=s',
623              
624             # clean mode options
625             'clean',
626              
627             # reindex mode options
628             'reindex|r',
629             'lastid|l=i',
630              
631             # backup mode options
632             'backup|b',
633              
634             # search mode options
635             'mailrc|m=s',
636             'month=s',
637             'match',
638              
639             # other options
640             'output=s',
641             'lastfile=s',
642             'logfile=s',
643             'logclean=i',
644             'verbose|v',
645             'help|h'
646             ) or $self->_help();
647              
648 13         11104 for my $opt (qw(config help),@options) {
649 195 100 33     509 $self->{options}{$opt} ||= $hash{$opt} if($hash{$opt});
650 195 50 0     502 $self->{options}{$opt} ||= $options{$opt} if($options{$opt});
651             }
652              
653 13 50       204 $self->_help(1) if($self->{options}{help});
654 13 50       59 $self->_help(0) if($self->{options}{version});
655              
656 13 50       59 $self->_help(1,"Must specify the configuration file") unless( $self->{options}{config});
657 13 50       318 $self->_help(1,"Configuration file [$self->{options}{config}] not found") unless(-f $self->{options}{config});
658              
659             # load configuration
660 13         204 my $cfg = Config::IniFiles->new( -file => $self->{options}{config} );
661              
662             # configure databases
663 13         56673 my %opts;
664 13         39 my $db = 'CPANSTATS';
665 13 50       58 die "No configuration for $db database\n" unless($cfg->SectionExists($db));
666 13         370 $opts{$_} = $cfg->val($db,$_) for(qw(driver database dbfile dbhost dbport dbuser dbpass));
667 13         2241 $self->dbh( CPAN::Testers::Common::DBUtils->new(%opts) );
668 13 50       482 die "Cannot configure $db database\n" unless($self->dbh);
669              
670             # use configuration settings or defaults if none provided
671 13         175 for my $opt (@options) {
672 169   100     3825 $self->{options}{$opt} ||= $cfg->val('MASTER',$opt) || $defaults{$opt};
      100        
673             }
674              
675             # extract filters
676 13         197 my $filters = $cfg->val('DOMAINS','filters');
677 13 50       292 my @filters = split("\n", $filters) if($filters);
678 13 50       44 $self->{filters} = \@filters if(@filters);
679              
680             # mandatory options
681             #for my $opt (qw()) {
682             # $self->_help(1,"No $opt configuration setting given, see help below.") unless( $self->{options}{$opt});
683             # $self->_help(1,"Given $opt file [$self->{options}{$opt}] not a valid file, see help below.") unless(-f $self->{options}{$opt});
684             #}
685              
686             # options to check if provided
687 13         35 for my $opt (qw(update mailrc)) {
688 26 100       88 next unless( $self->{options}{$opt});
689 15 50       441 $self->_help(1,"Given $opt file [$self->{options}{$opt}] not a valid file, see help below.") unless(-f $self->{options}{$opt});
690             }
691              
692             # clean up potential rogue characters
693 13 50       63 $self->{options}{lastid} =~ s/\D+//g if($self->{options}{lastid});
694              
695             # prime accessors
696 13         86 $self->lastfile($self->{options}{lastfile});
697 13         135 $self->logfile($self->{options}{logfile});
698 13         108 $self->logclean($self->{options}{logclean});
699              
700             # configure backup DBs
701 13 100       100 if($self->{options}{backup}) {
702 1 50       5 $self->help(1,"No configuration for BACKUPS with backup option") unless($cfg->SectionExists('BACKUPS'));
703              
704             # available DBI drivers
705 1         38 my %DRIVERS_DBI = map { $_ => 1 } DBI->available_drivers();
  7         568  
706              
707 1         8 my @drivers = $cfg->val('BACKUPS','drivers');
708 1         25 for my $driver (@drivers) {
709 3 50       13 $self->help(1,"No configuration for backup option '$driver'") unless($cfg->SectionExists($driver));
710              
711             # ignore drivers that are unavailable
712 3 100       91 unless($DRIVERS_DBI{$driver}) {
713 2         12 $self->_log("Backup DBD driver '$driver' is not available");
714 2         111 next;
715             }
716              
717 1         5 %opts = ();
718 1         7 $opts{$_} = $cfg->val($driver,$_) for(qw(driver database dbfile dbhost dbport dbuser dbpass));
719 1 50       194 $self->{backups}{$driver}{'exists'} = $driver =~ /SQLite/i ? -f $opts{database} : 1;
720              
721             # CSV is a bit of an oddity!
722 1 50       6 if($driver =~ /CSV/i) {
723 0         0 $self->{backups}{$driver}{'exists'} = 0;
724 0         0 $self->{backups}{$driver}{'dbfile'} = $opts{dbfile};
725 0         0 $opts{dbfile} = 'uploads';
726 0         0 unlink($opts{dbfile});
727             }
728              
729 1         12 $self->{backups}{$driver}{db} = CPAN::Testers::Common::DBUtils->new(%opts);
730 1 50       35 $self->help(1,"Cannot configure BACKUPS database for '$driver'") unless($self->{backups}{$driver}{db});
731             }
732             }
733              
734             # set output
735 13 100       78 if($self->{options}{output}) {
736 6 50       58 if(my $fh = IO::File->new($self->{options}{output}, 'w+')) {
737 6         1339 $self->{fh} = $fh;
738             }
739             }
740              
741 13 50       399 return unless($self->{options}{verbose});
742 0   0     0 print STDERR "config: $_ = ".($self->{options}{$_}||'')."\n" for(@options);
743             }
744              
745             sub _help {
746 0     0   0 my ($self,$full,$mess) = @_;
747              
748 0 0       0 print "\n$mess\n\n" if($mess);
749              
750 0 0       0 if($full) {
751 0         0 print "\n";
752 0         0 print "Usage:$0 [--verbose|v] --config|c= \\\n";
753 0         0 print " ( [--help|h] \\\n";
754 0         0 print " | [--update=] \\\n";
755 0         0 print " | [--reindex] [--lastid=] \\\n";
756 0         0 print " | [--clean] \\\n";
757 0         0 print " | [--backup] \\\n";
758 0         0 print " | [--mailrc|m=] [--month=] [--match] ) \\\n";
759 0         0 print " [--output=] \n\n";
760 0         0 print " [--logfile=] [--logclean=(0|1)] \n\n";
761              
762             # 12345678901234567890123456789012345678901234567890123456789012345678901234567890
763 0         0 print "This program manages the cpan-tester addresses.\n";
764              
765 0         0 print "\nFunctional Options:\n";
766 0         0 print " --config= # path/file to configuration file\n";
767 0         0 print " [--mailrc=] # path/file to mailrc file\n";
768 0         0 print " [--output=] # path/file to output file (defaults to STDOUT)\n";
769              
770 0         0 print "\nUpdate Options:\n";
771 0         0 print " [--update=] # run in update mode\n";
772              
773 0         0 print "\nReindex Options:\n";
774 0         0 print " [--reindex] # run in reindex mode\n";
775 0         0 print " [--lastid=] # id to start reindex from\n";
776              
777 0         0 print "\nClean Options:\n";
778 0         0 print " [--clean] # run in clean mode (de-duplication)\n";
779              
780 0         0 print "\nBackup Options:\n";
781 0         0 print " [--backup] # run in backup mode\n";
782              
783 0         0 print "\nSearch Options:\n";
784 0         0 print " [--month=] # YYYYMM string to match from\n";
785 0         0 print " [--match] # display matches only\n";
786              
787 0         0 print "\nOther Options:\n";
788 0         0 print " [--verbose] # turn on verbose messages\n";
789 0         0 print " [--help] # this screen\n";
790              
791 0         0 print "\nFor further information type 'perldoc $0'\n";
792             }
793              
794 0         0 print "$0 v$VERSION\n";
795 0         0 exit(0);
796             }
797              
798             sub _printout {
799 30     30   41 my $self = shift;
800 30 100       67 if(my $fh = $self->{fh}) {
801 26         107 print $fh "@_\n";
802             } else {
803 4         65 print STDOUT "@_\n";
804             }
805             }
806              
807             sub _log {
808 74     74   3814 my $self = shift;
809 74 50       566 my $log = $self->logfile or return;
810 74 100       4016 mkpath(dirname($log)) unless(-f $log);
811              
812 74 100       399 my $mode = $self->logclean ? 'w+' : 'a+';
813 74         1345 $self->logclean(0);
814              
815 74         4275 my @dt = localtime(time);
816 74         786 my $dt = sprintf "%04d/%02d/%02d %02d:%02d:%02d", $dt[5]+1900,$dt[4]+1,$dt[3],$dt[2],$dt[1],$dt[0];
817              
818 74 50       877 my $fh = IO::File->new($log,$mode) or die "Cannot write to log file [$log]: $!\n";
819 74         19807 print $fh "$dt ", @_, "\n";
820 74         426 $fh->close;
821             }
822              
823             q!Will code for a damn fine Balti!;
824              
825             __END__