File Coverage

blib/lib/TheBat/Read_TBB.pm
Criterion Covered Total %
statement 124 171 72.5
branch 28 50 56.0
condition 4 6 66.6
subroutine 7 9 77.7
pod 2 4 50.0
total 165 240 68.7


line stmt bran cond sub pod time code
1             package TheBat::Read_TBB;
2              
3 1     1   23190 use 5.00001;
  1         4  
4 1     1   5 use strict;
  1         2  
  1         26  
5 1     1   4 use warnings;
  1         5  
  1         34  
6 1     1   875 use bytes;
  1         10  
  1         5  
7 1     1   25 use Digest::MD5;
  1         2  
  1         2155  
8              
9             require Exporter;
10              
11             =head1 NAME
12              
13             TheBat::Read_TBB - Read individual email messages out of TheBat! .tbb messages files
14              
15             =head1 SYNOPSIS
16              
17             use TheBat::Read_TBB;
18             while(&Read_TBB("messages.tbb",\%ref) {
19             print "$ref{'sender'}\n";
20             }
21              
22             =head1 DESCRIPTION
23              
24             Reads the TheBat! binary messages flags (status codes like "deleted" or "replied" or "flagged" etc)
25             as well as the email headers and body, and returns md5 checksums of some parts as well as flags
26             and headers etc.
27              
28             Call repeatedly in a loop; returns 0 when there's no more to read.
29              
30             The Write_TBB function will set or reset only the 4 flags: flag_P (parked) flag_F (flagged) flag_o (read) flag_D (deleted) (see note below re writing binary files)
31              
32              
33             =head2 EXPORT
34              
35             Read_TBB
36             Write_TBB
37              
38             =head2 EXAMPLE
39              
40             use TheBat::Read_TBB;
41             my %ref;
42             while(&Read_TBB("t/messages.tbb",\%ref)) { # List all emails
43             foreach my $k (keys %ref) {
44             print "$k:\t" . $ref{$k} . "\n";
45             }
46             if($ref{$msgno}==3){ $ref{'flag_P'}=1; &Write_TBB(\%ref); } # Set the "Parked" flag on the 3rd email
47             }
48              
49            
50             =head1 SEE ALSO
51              
52             http://www.felix-schwarz.name/files/thebat/tbb.txt
53              
54              
55             =head1 NOTE re TheBat TBB file format differences
56              
57             Some more modern TheBat! clients may store messages in .TBB files with a different format in them which this code can't read.
58             You can fix this problem by finding an older .tbb file (there's one in this package in the t/ folder)
59             and creating a new bat folder, then exiting TheBat!, copying the old .tbb over the new folder tbb
60             and erasing the tbn file, restarting TheBat!, and moving all your messages into this new folder
61              
62              
63             =head1 NOTE re Writing to the binary files
64              
65             I only coded changed to the Deleted, Parked, and Flagged flags; and you may need to erase
66             the MESSAGES.TBN file if you change those; warning; the TBN file holdes your memos and message colours and stuff.
67              
68              
69             =head1 AUTHOR
70              
71             Chris Drake, Ecdrake@cpan.orgE
72              
73             =head1 COPYRIGHT AND LICENSE
74              
75             Copyright (C) 2013 by Chris Drake
76              
77             This library is free software; you can redistribute it and/or modify
78             it under the same terms as Perl itself, either Perl version 5.10.1 or,
79             at your option, any later version of Perl 5 you may have available.
80              
81              
82             =cut
83              
84              
85              
86             our @ISA = qw(Exporter);
87              
88             # Items to export into callers namespace by default. Note: do not export
89             # names by default without a very good reason. Use EXPORT_OK instead.
90             # Do not simply export all your public functions/methods/constants.
91              
92             # This allows declaration use TheBat::Read_TBB ':all';
93             # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
94             # will save memory.
95             our %EXPORT_TAGS = ( 'all' => [ qw(
96            
97             ) ] );
98              
99             our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
100              
101             our @EXPORT = qw(
102             Read_TBB
103             Write_TBB
104             );
105              
106             our $VERSION = '0.02';
107              
108              
109             # Change some flags (deleted, parked, or flagged only; flag_P flag_F flag_D) inside a TBB file
110             sub Write_TBB {
111 0     0 0 0 my($ref)=@_; # Must use an already opened Read_TBB file
112 0         0 $ref->{'curpos'}=sysseek(TBB,0,1); # Remember where we are now
113 0         0 sysseek(TBB,$ref->{'tell'},0); # Go to where they want to write
114 0         0 my($buf,$bytes_read);
115 0         0 $bytes_read=sysread(TBB,$buf,$ref->{'msghead_size'}); # Get the tbb message header
116 0         0 my $tbbhdr=unpack('b*',$buf);
117 0         0 my %flagmod=qw(D 112 o 113 P 115 F 118);
118 0         0 foreach my $k (keys %flagmod) { # Set or unset 4 flags:
119 0 0       0 if($ref->{"flag_$k"}) {
120 0         0 substr($tbbhdr,$flagmod{$k},1)="1";
121             } else {
122 0         0 substr($tbbhdr,$flagmod{$k},1)="0";
123             }
124             }
125 0         0 my $newbuf=pack('b*',$tbbhdr);
126 0         0 my $ret=-2;
127 0 0       0 if($newbuf ne $buf) { # changed something
128 0         0 sysseek(TBB,$ref->{'tell'},0); # Go to where they want to write
129 0         0 $bytes_read=syswrite(TBB,$newbuf,$ref->{'msghead_size'}); # Write the tbb message header
130 0         0 $ret=1;
131 0 0       0 if($bytes_read ne $ref->{'msghead_size'}) {
132 0         0 warn ".TBB ".$ref->{'tbb'}." file write problem: $!";
133 0         0 $ret=-1;
134             }
135             } else {
136 0         0 $ret=0;
137             }
138 0         0 sysseek(TBB,$ref->{'curpos'},0); # go back to where we were
139 0         0 return $ret;
140             } # Write_TBB
141              
142              
143             # Call repeatedly; returns 1 email at a time
144             sub Read_TBB {
145 2     2 0 381 my($tbb,$ref)=@_; # Supply input .TBB filename, and a ref to keep status info
146              
147 2         4 my($readbuf)=536870911; # How big shall our message-sysread chunks be?
148 2         3 my $flagcodes='DoaPAxFft.c................m....';
149 2 100       3 my($buf,$bytes_read)=(undef,0); my %persist=qw(tbb 1 more 1 total 1 msgs 1 deld 1 msgno 1 seek 1); foreach my $k(keys %{$ref}){$ref->{$k}=undef unless($persist{$k})}
  2         13  
  2         3  
  2         11  
  40         101  
150 2 100       9 if(!$ref->{'tbb'}) { # Not opened yet
151 1 50       33 if(open(TBB,'+<',$tbb)) {
152 1         2 $ref->{'tbb'}=$tbb;
153 1         3 binmode(TBB);
154 1         9 $bytes_read=sysread(TBB,$buf,6); # Get magic number and file header size
155 1         3 $ref->{'seek'}=$bytes_read;
156 1         5 ($ref->{'file_magic'},$ref->{'head_size'})=unpack("VS",$buf);
157 1         2 $ref->{'head_size'}-=6; # remove the size of the header(magic+size) itself from the header
158              
159 1 50       4 if($ref->{'file_magic'} != 427361824) { # if(unpack('H*',substr($buf,0,6)) ne '20067919080c');
160 0         0 &write_log("Possible input file corruption: magic number mismatch: " . $ref->{'file_magic'} . " != 427361824 " . unpack('H*',$buf));
161 0         0 close(TBB);
162 0         0 return 0;
163             }
164              
165 1         8 $bytes_read=sysread(TBB,$buf,$ref->{'head_size'}); # Get the TBB file header
166 1         2 $ref->{'seek'}+=$bytes_read;
167 1 50       10 &write_log("Possible input file corruption: $bytes_read != ". $ref->{'head_size'} ." - too short") if($bytes_read != $ref->{'head_size'});
168              
169 1         2 $ref->{'more'}=1;
170 1         2 $ref->{'total'}=0;
171 1         2 $ref->{'msgs'}=0;
172 1         3 $ref->{'deld'}=0;
173 1         2 $ref->{'msgno'}=0;
174              
175             } else {
176 0         0 &write_log("Skipping: " . $ref->{'tbb'} . " - $!");
177 0         0 return 0;
178             }
179             } else {
180 1 50       3 goto NOMORE unless($ref->{'more'});
181             }
182              
183 2         10 $bytes_read=sysread(TBB,$buf,6); # Get the message header's magic and size
184 2         4 $ref->{'seek'}+=$bytes_read;
185 2 100       5 if($bytes_read<6) {
186 1         8 NOMORE:
187             close(TBB);
188             # &write_log("End of " . $ref->{'tbb'} . " - Total messages:".$ref->{'total'} ." (." . $ref->{'msgs'}." messages, ".$ref->{'deld'}." deleted): run=".$ref->{'grandtot'});# if($switch{'debug'});
189 1         3 undef $ref; # works
190 1         4 return 0;
191             }
192              
193              
194 1         5 ($ref->{'msg_magic'},$ref->{'msghead_size'})=unpack("VS",$buf); # V is a little-endian 32bit int, S is an unsigned little-ending 16 bit short)
195 1         2 $ref->{'msghead_size'}-=6; # remove the size of the header(magic+size) itself from the header
196 1 50       4 if($ref->{'msg_magic'} != 426772769) {
197 0         0 die "Possible input file corruption: message magic number mismatch: " . $ref->{'msg_magic'}. " != 426772769 / 21 09 70 19 != " . unpack('H*',$buf);
198             }
199              
200 1 50       3 $ref->{'msghead_size'}=0 if($ref->{'msghead_size'}<0);
201             # &write_log($ref->{'tbb'} . " message header size is $ref->{'msghead_size'}"); # debug
202 1 50       4 &write_log("Possible input file corruption: message header size unusual") if($ref->{'msghead_size'} != 42);
203            
204 1         3 $ref->{'tell'}=$ref->{'seek'}; # remember where this message started
205 1         5 $bytes_read=sysread(TBB,$buf,$ref->{'msghead_size'}); # Get the tbb message header
206 1         2 $ref->{'tbbheader'}=$buf;
207 1         11 $ref->{'seek'}+=$bytes_read;
208 1 50       4 if($bytes_read != $ref->{'msghead_size'}) {
209 0         0 &write_log("Possible input file corruption: message header too short");
210 0         0 goto NOMORE;
211             }
212              
213 1         2 $ref->{'total'}++; $ref->{'grandtot'}++;
  1         3  
214 1         2 $ref->{'msgno'}++;
215            
216             #( 0 ,3441210366,1038086103,1830,0 ,146 ,0 ,0 ,1 ,78594 ,0 ,0)
217 1         6 my( $z1,$unknown ,$time ,$id ,$z2,$status,$z4,$col,$pri,$msg_size,$msg_sizeB,$z5)=unpack(
218             'v V V v v b32 V V V V V V',$buf);
219 1 100       3 my $flags='';for(my $i=0; $i{"flag_$f"}++;} # DoaPAxFft.c................m....
  1 100       5  
  32         40  
  32         42  
  32         65  
  32         64  
  32         39  
  32         97  
220 1         5 $ref->{'flags'}=$flags;
221              
222              
223 1         4 my($read)=0;
224 1         3 my($head)=1;
225 1         3 my($towrite)=1;
226 1         4 my($full)='';
227 1         4 $ref->{'sender'}='<>';
228            
229 1 50       4 if(substr($status,0,1)) {
230 0         0 $ref->{'deld'}++;
231             } else {
232 1         3 $ref->{'msgs'}++;
233             }
234              
235 1         13 $ref->{'md5'} = Digest::MD5->new;
236 1         6 $ref->{'md5h'} = Digest::MD5->new;
237 1         6 $ref->{'md5b'} = Digest::MD5->new;
238 1         2 my %hdr; my($more)=1;
  1         3  
239 1         3 $ref->{'size'} = 0;
240              
241 1   66     13 while(($more)&&($read<$msg_size)&&($msg_size>0)) { # Loop over sensible sized reads of the message
      66        
242 1 50       4 if(($msg_size-$read)<$readbuf) {
243 1         7 $bytes_read=sysread(TBB,$buf,($msg_size-$read)); # Get the rest of the message
244 1         3 $ref->{'seek'}+=$bytes_read;
245 1         1 $ref->{'size'}+=$bytes_read;
246 1 50       4 if($bytes_read != ($msg_size-$read)) {
247 0         0 $more=0; $ref->{'more'}=0;
  0         0  
248 0         0 &write_log("Possible input file corruption: file ends before message $ref->{'total'} does");
249             }
250 1         2 $read=$msg_size; # Got the lot now
251             } else {
252 0         0 $bytes_read=sysread(TBB,$buf,$readbuf); # Get 32k of the message
253 0         0 $ref->{'seek'}+=$bytes_read;
254 0         0 $ref->{'size'}+=$bytes_read;
255 0 0       0 if($bytes_read != $readbuf) {
256 0         0 $more=0; $ref->{'more'}=0;
  0         0  
257 0         0 &write_log("Possible input file corruption: file ends before message $ref->{'total'} does");
258             }
259 0         0 $read+=$readbuf;
260             }
261              
262              
263 1 50       3 if($head) { # We're in the header
264 1         1 $head=0;
265 1         36 ($ref->{'header'},$buf)=split(/(?:\r\r|\n\n|\r\n\r\n|\n\r\n\r)/,$buf,2);
266 1         14 $ref->{'header'}=~s/\r\n/\n/gsm;
267 1         9 chop($ref->{'header'}) while(substr($ref->{'header'},-1) eq "\r");
268 1         4 &ParseHead(\$ref->{'header'},\%hdr);
269              
270 1         13 ($ref->{'sender'})=($ref->{'header'}=~/[\s<]([^\s<>]+\@[a-z0-9\.\-]+)/i);
271              
272 1 50       5 if(substr($ref->{'header'},0,5) ne 'From ') {
273 1         5 $ref->{'header'}=~s/^From /X-From:/gsm; # Don't allow another "^From " in the headers
274 1         5 $ref->{'header'}="From $ref->{'sender'}\n" . $ref->{'header'};
275             }
276              
277 1         14 $ref->{'md5h'}->add($ref->{'header'});
278 1         2 foreach my $hk (qw(From To Date Message-ID Subject)) {
279 5 50       16 $hdr{lc($hk)}='' unless(defined $hdr{lc($hk)});
280 5         20 $ref->{'md5b'}->add("$hk:" . $hdr{lc($hk)});
281             }
282              
283             # $ref->{'md5b'}->add("From:" . $hdr{'from'} . "\tTo:" . $hdr{'to'} . "\tDate:" . $hdr{'date'} . "\tId:" . $hdr{'message-id'} . "\tSubject:" . $hdr{'subject'} );
284              
285 1         4 foreach my $k (keys %hdr){$ref->{"h_$k"}=$hdr{$k};}
  10         25  
286             }
287              
288 1         14 $ref->{'md5'}->add($buf);
289              
290             #$full.=$buf if($switch{'uniq'});
291              
292             } # loop over 1 email
293              
294 1         2 foreach my $h (qw(md5 md5h md5b)) {
295 3         16 $ref->{"b$h"}=$ref->{$h}->digest;
296 3         16 $ref->{$h.'hex'}=unpack("H*",$ref->{"b$h"}); # $ref->{'bmd5'} / $ref->{'md5hex'} / etc
297             }
298              
299 1         7 return 1;
300             } # Read_TBB
301              
302              
303              
304             #######################################################################
305              
306             =head2 ParseHead
307              
308             Extract fields from header
309              
310             =cut
311             #######################################################################
312              
313              
314             sub ParseHead {
315 1     1 1 2 my($phdrc,$phdr)=@_;
316 1         26 my @hdr=split(/(?:\r\n|\n\r|\r|\n)/,$$phdrc);
317 1         2 push @hdr,''; # So the loop below also does the last element properly (the $i++ bit)
318 1         2 my $i=0; while($i<($#hdr)) {
  1         13  
319             #print "h$i =$hdr[$i]\n";
320 20 100       61 if($hdr[1+$i]=~/^\s(.*)/) {
321 4         14 $hdr[$i].=" $1"; splice(@hdr,$i+1,1);
  4         13  
322             } else {
323 16 100       40 my($f,$b)=split(':',$hdr[$i],2); if(!defined $phdr->{lc($f)}){$phdr->{lc($f)}=$b}else{$phdr->{lc($f)}.=" ".$b}
  16         38  
  10         27  
  6         15  
324 16         39 $i++;
325             }
326             #print "h$i'=$hdr[$i]\n";
327             }
328 1         4 pop @hdr; # @hdr now has headers, all one-lined.
329              
330             } # ParseHead
331              
332              
333              
334              
335              
336              
337             #######################################################################
338              
339             =head2 write_log
340              
341             Outputs debugging and informational letters to /var/log file.
342              
343             =cut
344             #######################################################################
345             sub write_log {
346 0     0 1   my($message)=@_;
347 0           my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
348 0           my($date)=sprintf("%02d/%02d %02d:%02d.%02d",$mon+1,$mday,$hour,$min,$sec);
349 0           print STDERR "$date $$ Read_TBB $message\n";
350             }
351              
352              
353              
354              
355             __END__