File Coverage

blib/lib/Email/Outlook/Message/Base.pm
Criterion Covered Total %
statement 127 159 79.8
branch 31 52 59.6
condition 3 5 60.0
subroutine 25 27 92.5
pod 5 5 100.0
total 191 248 77.0


line stmt bran cond sub pod time code
1             package Email::Outlook::Message::Base;
2             =head1 NAME
3              
4             Email::Outlook::Message::Base - Base parser for .msg files.
5              
6             =head1 DESCRIPTION
7              
8             This is an internal module of Email::Outlook::Message.
9              
10             =head1 METHODS
11              
12             =over 8
13              
14             =item B
15              
16             =item B
17              
18             =item B
19              
20             =item B
21              
22             =item B
23              
24             =back
25              
26             =head1 AUTHOR
27              
28             Matijs van Zuijlen, C
29              
30             =head1 COPYRIGHT AND LICENSE
31              
32             Copyright 2002--2020 by Matijs van Zuijlen
33              
34             This module is free software; you can redistribute it and/or modify
35             it under the same terms as Perl itself.
36              
37             =cut
38 8     8   69 use strict;
  8         27  
  8         230  
39 8     8   44 use warnings;
  8         17  
  8         184  
40 8     8   37 use Encode;
  8         17  
  8         547  
41 8     8   4130 use IO::String;
  8         33800  
  8         298  
42 8     8   5101 use POSIX;
  8         52113  
  8         69  
43 8     8   22525 use Carp;
  8         20  
  8         413  
44 8     8   5456 use OLE::Storage_Lite;
  8         165117  
  8         436  
45 8     8   72 use vars qw($VERSION);
  8         20  
  8         17786  
46             $VERSION = "0.920";
47              
48             my $DIR_TYPE = 1;
49             my $FILE_TYPE = 2;
50              
51             # Variable encodings
52             my $ENCODING_UNICODE = '001F';
53             my $ENCODING_ASCII = '001E';
54             my $ENCODING_BINARY = '0102';
55             my $ENCODING_DIRECTORY = '000D';
56              
57             our $VARIABLE_ENCODINGS = {
58             '000D' => 'Directory',
59             '001F' => 'Unicode',
60             '001E' => 'Ascii?',
61             '0102' => 'Binary',
62             };
63              
64             # Fixed encodings
65             my $ENCODING_INTEGER16 = '0002';
66             my $ENCODING_INTEGER32 = '0003';
67             my $ENCODING_BOOLEAN = '000B';
68             my $ENCODING_DATE = '0040';
69              
70             #
71             # Descriptions partially based on mapitags.h
72             #
73             our $skipproperties = {
74             # Envelope properties
75             '0002' => "Alternate Recipient Allowed",
76             '000B' => "Conversation Key",
77             '0017' => "Importance", #TODO: Use this.
78             '001A' => "Message Class",
79             '0023' => "Originator Delivery Report Requested",
80             '0026' => "Priority", #TODO: Use this.
81             '0029' => "Read Receipt Requested", #TODO: Use this.
82             '0036' => "Sensitivity", # As assessed by the Sender
83             '003B' => "Sent Representing Search Key",
84             '003D' => "Subject Prefix",
85             '003F' => "Received By EntryId",
86             '0040' => "Received By Name",
87             # TODO: These two fields are part of the Sender field.
88             '0041' => "Sent Representing EntryId",
89             '0042' => "Sent Representing Name",
90             '0043' => "Received Representing EntryId",
91             '0044' => "Received Representing Name",
92             '0046' => "Read Receipt EntryId",
93             '0051' => "Received By Search Key",
94             '0052' => "Received Representing Search Key",
95             '0053' => "Read Receipt Search Key",
96             # TODO: These two fields are part of the Sender field.
97             '0064' => "Sent Representing Address Type",
98             '0065' => "Sent Representing Email Address",
99             '0070' => "Conversation Topic",
100             '0071' => "Conversation Index",
101             '0075' => "Received By Address Type",
102             '0076' => "Received By Email Address",
103             '0077' => "Received Representing Address Type",
104             '0078' => "Received Representing Email Address",
105             '007F' => "TNEF Correlation Key",
106             # Recipient properties
107             '0C15' => "Recipient Type",
108             # Sender properties
109             '0C19' => "Sender Entry Id",
110             '0C1D' => "Sender Search Key",
111             '0C1E' => "Sender Address Type",
112             # Non-transmittable properties
113             '0E02' => "Display Bcc",
114             '0E06' => "Message Delivery Time",
115             '0E07' => "Message Flags",
116             '0E0A' => "Sent Mail EntryId",
117             '0E0F' => "Responsibility",
118             '0E1B' => "Has Attachments",
119             '0E1D' => "Normalized Subject",
120             '0E1F' => "RTF In Sync",
121             '0E20' => "Attachment Size",
122             '0E21' => "Attachment Number",
123             '0E23' => "Internet Article Number",
124             '0E27' => "Security Descriptor",
125             '0E79' => "Trust Sender",
126             '0FF4' => "Access",
127             '0FF6' => "Instance Key",
128             '0FF7' => "Access Level",
129             '0FF9' => "Record Key",
130             '0FFE' => "Object Type",
131             '0FFF' => "EntryId",
132             # Content properties
133             '1006' => "RTF Sync Body CRC",
134             '1007' => "RTF Sync Body Count",
135             '1008' => "RTF Sync Body Tag",
136             '1010' => "RTF Sync Prefix Count",
137             '1011' => "RTF Sync Trailing Count",
138             '1046' => "Original Message ID",
139             '1080' => "Icon Index",
140             '1081' => "Last Verb Executed",
141             '1082' => "Last Verb Execution Time",
142             '10F3' => "URL Component Name",
143             '10F4' => "Attribute Hidden",
144             '10F5' => "Attribute System",
145             '10F6' => "Attribute Read Only",
146             # 'Common property'
147             '3000' => "Row Id",
148             '3001' => "Display Name",
149             '3002' => "Address Type",
150             '3007' => "Creation Time",
151             '3008' => "Last Modification Time",
152             '300B' => "Search Key",
153             # Message store info
154             '340D' => "Store Support Mask",
155             '3414' => "Message Store Provider",
156             # Attachment properties
157             '3702' => "Attachment Encoding",
158             '3703' => "Attachment Extension",
159             # TODO: Use the following to distinguish between nested msg and other OLE
160             # stores.
161             '3705' => "Attachment Method",
162             '3709' => "Attachment Rendering", # Icon as WMF
163             '370A' => "Tag identifying application that supplied the attachment",
164             '370B' => "Attachment Rendering Position",
165             '3713' => "Attachment Content Location", #TODO: Use this?
166             # 3900 -- 39FF: 'Address book'
167             '3900' => "Address Book Display Type",
168             '39FF' => "Address Book 7 Bit Display Name",
169             # Mail User Object
170             '3A00' => "Account",
171             '3A20' => "Transmittable Display Name",
172             '3A40' => "Send Rich Info",
173             # 'Display table properties'
174             '3FF8' => "Creator Name",
175             '3FF9' => "Creator EntryId",
176             '3FFA' => "Last Modifier Name",
177             '3FFB' => "Last Modifier EntryId",
178             '3FFD' => "Message Code Page",
179             # 'Transport-defined envelope property'
180             '4019' => "Sender Flags",
181             '401A' => "Sent Representing Flags",
182             '401B' => "Received By Flags",
183             '401C' => "Received Representing Flags",
184             '4029' => "Read Receipt Address Type",
185             '402A' => "Read Receipt Email Address",
186             '402B' => "Read Receipt Name",
187             '5FF6' => "Recipient Display Name",
188             '5FF7' => "Recipient EntryId",
189             '5FFD' => "Recipient Flags",
190             '5FFF' => "Recipient Track Status",
191             # 'Provider-defined internal non-transmittable property'
192             '664A' => "Has Named Properties",
193             '6740' => "Sent Mail Server EntryId",
194             };
195              
196             sub new {
197 8     8 1 29 my ($class, $pps, $verbose) = @_;
198 8         40 my $self = bless {
199             _pps_file_entries => {},
200             _pps => $pps
201             }, $class;
202 8         56 $self->_set_verbosity($verbose);
203 8         67 $self->_process_pps($pps);
204 8         29 return $self;
205             }
206              
207             sub mapi_property_names {
208 14     14 1 51 my $self = shift;
209 14         22 return keys %{$self->{_pps_file_entries}};
  14         130  
210             }
211              
212             sub get_mapi_property {
213 108     108 1 775 my ($self, $code) = @_;
214 108         299 return $self->{_pps_file_entries}->{$code};
215             }
216              
217             sub set_mapi_property {
218 340     340 1 616 my ($self, $code, $data) = @_;
219 340         799 $self->{_pps_file_entries}->{$code} = $data;
220 340         583 return;
221             }
222              
223             sub property {
224 26     26 1 68 my ($self, $name) = @_;
225 26         65 my $map = $self->_property_map;
226             # TODO: Prepare reverse map instead of doing dumb lookup.
227 26         59 foreach my $code (keys %{$map}) {
  26         91  
228 81         133 my $key = $map->{$code};
229 81 100       167 next unless $key eq $name;
230 26         61 my $prop = $self->get_mapi_property($code);
231 26 100       85 if ($prop) {
232 21         34 my ($encoding, $data) = @{$prop};
  21         55  
233 21         52 return $self->_decode_mapi_property($encoding, $data);
234             } else {
235 5         46 return;
236             }
237             }
238 0         0 return;
239             }
240              
241             sub _decode_mapi_property {
242 102     102   205 my ($self, $encoding, $data) = @_;
243              
244 102 100 100     317 if ($encoding eq $ENCODING_ASCII or $encoding eq $ENCODING_UNICODE) {
245 82 100       151 if ($encoding eq $ENCODING_UNICODE) {
246 22         62 $data = decode("UTF-16LE", $data);
247             }
248 82         7143 $data =~ s/ \000 $ //sgx;
249 82         351 return $data;
250             }
251              
252 20 100       70 if ($encoding eq $ENCODING_BINARY) {
253 7         33 return $data;
254             }
255              
256 13 100       60 if ($encoding eq $ENCODING_DATE) {
257 8         34 my @a = OLE::Storage_Lite::OLEDate2Local $data;
258 8         175 return $self->_format_date(\@a);
259             }
260              
261 5 50       21 if ($encoding eq $ENCODING_INTEGER16) {
262 0         0 return unpack("v", substr($data, 0, 2));
263             }
264              
265 5 50       32 if ($encoding eq $ENCODING_INTEGER32) {
266 5         41 return unpack("V", substr($data, 0, 4));
267             }
268              
269 0 0       0 if ($encoding eq $ENCODING_BOOLEAN) {
270 0         0 return unpack("C", substr($data, 0, 1));
271             }
272              
273 0         0 warn "Unhandled encoding $encoding\n";
274 0         0 return $data;
275             }
276              
277             sub _process_pps {
278 14     14   39 my ($self, $pps) = @_;
279 14         24 foreach my $child (@{$pps->{Child}}) {
  14         50  
280 235 100       556 if ($child->{Type} == $DIR_TYPE) {
    50          
281 14         56 $self->_process_subdirectory($child);
282             } elsif ($child->{Type} == $FILE_TYPE) {
283 221         539 $self->_process_pps_file_entry($child);
284             } else {
285 0         0 carp "Unknown entry type: $child->{Type}";
286             }
287             }
288 14         86 $self->_check_pps_file_entries($self->_property_map);
289 14         30 return;
290             }
291              
292             sub _get_pps_name {
293 241     241   462 my ($self, $pps) = @_;
294 241         516 my $name = OLE::Storage_Lite::Ucs2Asc($pps->{Name});
295 241         4431 $name =~ s/ \W / /gx;
296 241         514 return $name;
297             }
298              
299             sub _parse_item_name {
300 221     221   445 my ($self, $name) = @_;
301              
302 221 100       669 if ($name =~ / ^ __substg1 [ ] 0_ (....) (....) $ /x) {
303 207         507 my ($property, $encoding) = ($1, $2);
304 207         642 return ($property, $encoding);
305             } else {
306 14         37 return (undef, undef);
307             }
308             }
309              
310             sub _warn_about_unknown_directory {
311 6     6   21 my ($self, $pps) = @_;
312              
313 6         21 my $name = $self->_get_pps_name($pps);
314 6 50       28 if ($name eq '__nameid_version1 0') {
315             # TODO: Use this data to access so-called named properties.
316             $self->{VERBOSE}
317 6 50       138 and warn "Skipping DIR entry $name (Introductory stuff)\n";
318             } else {
319 0         0 warn "Unknown DIR entry $name\n";
320             }
321 6         18 return;
322             }
323              
324             sub _warn_about_unknown_file {
325 0     0   0 my ($self, $pps) = @_;
326              
327 0         0 my $name = $self->_get_pps_name($pps);
328              
329 0 0       0 if ($name eq 'Olk10SideProps_0001') {
330             $self->{VERBOSE}
331 0 0       0 and warn "Skipping FILE entry $name (Properties)\n";
332             } else {
333 0         0 warn "Unknown FILE entry $name\n";
334             }
335 0         0 return;
336             }
337              
338             #
339             # Generic processor for a file entry: Inserts the entry's data into the
340             # $self's mapi property list.
341             #
342             sub _process_pps_file_entry {
343 221     221   385 my ($self, $pps) = @_;
344 221         377 my $name = $self->_get_pps_name($pps);
345 221         504 my ($property, $encoding) = $self->_parse_item_name($name);
346              
347 221 100       451 if (defined $property) {
    50          
348 207         591 $self->set_mapi_property($property, [$encoding, $pps->{Data}]);
349             } elsif ($name eq '__properties_version1 0') {
350 14         105 $self->_process_property_stream ($pps->{Data});
351             } else {
352 0         0 $self->_warn_about_unknown_file($pps);
353             }
354 221         452 return;
355             }
356              
357             sub _process_property_stream {
358 14     14   40 my ($self, $data) = @_;
359 14         67 my ($n, $len) = ($self->_property_stream_header_length, length $data) ;
360              
361 14         54 while ($n + 16 <= $len) {
362 340         735 my @f = unpack "v4", substr $data, $n, 8;
363              
364 340         705 my $encoding = sprintf("%04X", $f[0]);
365              
366 340 100       759 unless ($VARIABLE_ENCODINGS->{$encoding}) {
367 133         246 my $property = sprintf("%04X", $f[1]);
368 133         256 my $propdata = substr $data, $n+8, 8;
369 133         352 $self->set_mapi_property($property, [$encoding, $propdata]);
370             }
371             } continue {
372 340         642 $n += 16 ;
373             }
374 14         29 return;
375             }
376              
377             sub _check_pps_file_entries {
378 14     14   47 my ($self, $map) = @_;
379              
380 14         57 foreach my $property ($self->mapi_property_names) {
381 340 100       691 if (my $key = $map->{$property}) {
382 81         182 $self->_use_property($key, $property);
383             } else {
384 259         486 $self->_warn_about_skipped_property($property);
385             }
386             }
387 14         44 return;
388             }
389              
390             sub _use_property {
391 81     81   173 my ($self, $key, $property) = @_;
392 81         109 my ($encoding, $data) = @{$self->get_mapi_property($property)};
  81         182  
393 81         210 $self->{$key} = $self->_decode_mapi_property($encoding, $data);
394              
395             $self->{VERBOSE}
396 81 50       205 and $self->_log_property("Using ", $property, $key);
397 81         168 return;
398             }
399              
400             sub _warn_about_skipped_property {
401 259     259   427 my ($self, $property) = @_;
402              
403 259 50       552 return unless $self->{VERBOSE};
404              
405 0   0     0 my $meaning = $skipproperties->{$property} || "UNKNOWN";
406              
407 0         0 $self->_log_property("Skipping", $property, $meaning);
408 0         0 return;
409             }
410              
411             sub _log_property {
412 0     0   0 my ($self, $message, $property, $meaning) = @_;
413              
414 0         0 my ($encoding, $data) = @{$self->get_mapi_property($property)};
  0         0  
415 0         0 my $value = $self->_decode_mapi_property($encoding, $data);
416 0         0 $value = substr($value, 0, 50);
417              
418 0 0       0 if ($encoding eq $ENCODING_BINARY) {
419 0 0       0 if ($value =~ / [[:print:]] /x) {
420 0         0 $value =~ s/ [^[:print:]] /./gx;
421             } else {
422 0         0 $value =~ s/ . / sprintf("%02x ", ord($&)) /sgex;
  0         0  
423             }
424             }
425              
426 0 0       0 if (length($value) > 45) {
427 0         0 $value = substr($value, 0, 41) . " ...";
428             }
429              
430 0         0 warn "$message property $encoding:$property ($meaning): $value\n";
431 0         0 return;
432             }
433              
434             sub _set_verbosity {
435 14     14   40 my ($self, $verbosity) = @_;
436 14 50       94 $self->{VERBOSE} = $verbosity ? 1 : 0;
437 14         34 return;
438             }
439              
440             #
441             # Format a gmt date according to RFC822
442             #
443             sub _format_date {
444 18     18   52 my ($self, $datearr) = @_;
445 18         37 my $day = qw(Sun Mon Tue Wed Thu Fri Sat)[strftime("%w", @{$datearr})];
  18         860  
446 18         80 my $month = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)[strftime("%m", @{$datearr}) - 1];
  18         477  
447 18         88 return strftime("$day, %d $month %Y %H:%M:%S +0000", @{$datearr});
  18         520  
448             }
449              
450             1;