File Coverage

blib/lib/Email/Outlook/Message.pm
Criterion Covered Total %
statement 190 195 97.4
branch 47 60 78.3
condition 6 10 60.0
subroutine 31 31 100.0
pod 2 2 100.0
total 276 298 92.6


line stmt bran cond sub pod time code
1             package Email::Outlook::Message;
2             =head1 NAME
3              
4             Email::Outlook::Message.pm - Read Outlook .msg files
5              
6             =head1 SYNOPSIS
7              
8             use Email::Outlook::Message;
9              
10             my $msg = new Email::Outlook::Message $filename, $verbose;
11             my $mime = $msg->to_email_mime;
12             $mime->as_string;
13              
14             =head1 DESCRIPTION
15              
16             Parses .msg message files as produced by Microsoft Outlook.
17              
18             =head1 METHODS
19              
20             =over 8
21              
22             =item B
23              
24             Parse the file pointed at by $msg. Set $verbose to a true value to
25             print information about skipped parts of the .msg file on STDERR.
26              
27             =item B
28              
29             Output result as an Email::MIME object.
30              
31             =back
32              
33             =head1 BUGS
34              
35             Not all data that is in the .msg file is converted. There are some
36             parts whose meaning escapes me, although more documentation on MIME
37             properties is available these days. Other parts do not make sense outside
38             of Outlook and Exchange.
39              
40             GPG signed mail is not processed correctly. Neither are attachments of
41             type 'appledoublefile'.
42              
43             It would be nice if we could write .MSG files too, but that will require
44             quite a big rewrite.
45              
46             =head1 AUTHOR
47              
48             Matijs van Zuijlen, C
49              
50             =head1 COPYRIGHT AND LICENSE
51              
52             Copyright 2002--2014 by Matijs van Zuijlen
53              
54             This module is free software; you can redistribute it and/or modify
55             it under the same terms as Perl itself.
56              
57             =cut
58 7     7   323880 use strict;
  7         14  
  7         171  
59 7     7   29 use warnings;
  7         13  
  7         138  
60 7     7   133 use 5.006;
  7         25  
61 7     7   32 use vars qw($VERSION);
  7         10  
  7         343  
62             $VERSION = "0.919";
63              
64 7     7   1793 use Email::Simple;
  7         20721  
  7         157  
65 7     7   1488 use Email::MIME::Creator;
  7         267225  
  7         218  
66 7     7   2028 use Email::Outlook::Message::AddressInfo;
  7         19  
  7         230  
67 7     7   2185 use Email::Outlook::Message::Attachment;
  7         17  
  7         169  
68 7     7   43 use Carp;
  7         74  
  7         405  
69 7     7   36 use base 'Email::Outlook::Message::Base';
  7         16  
  7         11021  
70              
71             our $skipheaders = {
72             map { uc($_) => 1 }
73             "MIME-Version",
74             "Content-Type",
75             "Content-Transfer-Encoding",
76             "X-Mailer",
77             "X-Msgconvert",
78             "X-MS-Tnef-Correlator",
79             "X-MS-Has-Attach"
80             };
81              
82             our $MAP_SUBITEM_FILE = {
83             '1000' => "BODY_PLAIN", # Body
84             '1009' => "BODY_RTF", # Compressed-RTF version of body
85             '1013' => "BODY_HTML", # HTML Version of body
86             '0037' => "SUBJECT", # Subject
87             '0047' => "SUBMISSION_ID", # Seems to contain the date
88             '007D' => "HEAD", # Full headers
89             '0C1A' => "FROM", # From: Name
90             '0C1E' => "FROM_ADDR_TYPE", # From: Address type
91             '0C1F' => "FROM_ADDR", # From: Address
92             '0E04' => "TO", # To: Names
93             '0E03' => "CC", # Cc: Names
94             '1035' => "MESSAGEID", # Message-Id
95             '1039' => "REFERENCES", # References: Header
96             '1042' => "INREPLYTO", # In reply to Message-Id
97             '3007' => 'DATE2ND', # Creation Time
98             '0039' => 'DATE1ST', # Outlook sent date
99             };
100              
101             #
102             # Main body of module
103             #
104              
105             sub new {
106 6     6 1 440 my $class = shift;
107 6 100       164 my $file = shift or croak "File name is required parameter";
108 5         14 my $verbose = shift;
109              
110 5         19 my $self = $class->_empty_new;
111              
112 5         38 $self->{EMBEDDED} = 0;
113              
114 5         39 my $msg = OLE::Storage_Lite->new($file);
115 5         64 my $pps = $msg->getPpsTree(1);
116 5 50       102465 $pps or croak "Parsing $file as OLE file failed";
117 5         78 $self->_set_verbosity($verbose);
118             # TODO: Use separate object as parser?
119 5         35 $self->_process_pps($pps);
120              
121 5         237 return $self;
122             }
123              
124             sub _empty_new {
125 6     6   82 my $class = shift;
126              
127 6         41 return bless {
128             ADDRESSES => [], ATTACHMENTS => [], FROM_ADDR_TYPE => "",
129             VERBOSE => 0, EMBEDDED => 1
130             }, $class;
131             }
132              
133             sub to_email_mime {
134 9     9 1 5619 my $self = shift;
135              
136 9         27 my $bodymime;
137             my $mime;
138              
139 9         0 my @parts;
140              
141 9 100       30 if ($self->{BODY_PLAIN}) { push(@parts, $self->_create_mime_plain_body()); }
  7         25  
142 9 100       14204 if ($self->{BODY_HTML}) { push(@parts, $self->_create_mime_html_body()); }
  2         7  
143 9 100       1917 if ($self->{BODY_RTF}) { push(@parts, $self->_create_mime_rtf_body()); }
  5         21  
144              
145 9 100       5166 if ((scalar @parts) > 1) {
    100          
146 6         16 for (@parts) { $self->_clean_part_header($_) };
  12         33  
147              
148 6         32 $bodymime = Email::MIME->create(
149             attributes => {
150             content_type => "multipart/alternative",
151             encoding => "8bit",
152             },
153             parts => \@parts
154             );
155             } elsif ((@parts) == 1) {
156 2         4 $bodymime = $parts[0];
157             } else {
158 1         3 $bodymime = $self->_create_mime_plain_body();
159             }
160              
161 9 100       23025 if (@{$self->{ATTACHMENTS}}>0) {
  9         34  
162 2         8 $self->_clean_part_header($bodymime);
163 2         12 my $mult = Email::MIME->create(
164             attributes => {
165             content_type => "multipart/mixed",
166             encoding => "8bit",
167             },
168             parts => [$bodymime],
169             );
170 2         5318 foreach my $att (@{$self->{ATTACHMENTS}}) {
  2         9  
171 2         9 $self->_SaveAttachment($mult, $att);
172             }
173 2         5 $mime = $mult;
174             } else {
175 7         12 $mime = $bodymime;
176             }
177              
178             #$mime->header_set('Date', undef);
179 9         35 $self->_SetHeaderFields($mime);
180 9         28 $self->_copy_header_data($mime);
181              
182 9         98 return $mime;
183             }
184              
185             #
186             # Below are functions that walk the PPS tree. This is simply a tree walk.
187             # It's not really recursive (except when an attachment contains a .msg
188             # file), since the tree is shallow (max. 1 subdirectory deep).
189             #
190             # The structure is as follows:
191             #
192             # Root
193             # Items with properties of the e-mail
194             # Dirs containting adresses
195             # Items with properties of the address
196             # Dirs containing Attachments
197             # Items with properties of the attachment (including its data)
198             # Dir that is itself a .msg file (if the attachment is an email).
199             #
200              
201             sub _property_map {
202 5     5   26 return $MAP_SUBITEM_FILE;
203             }
204              
205             #
206             # Process a subdirectory. This is either an address or an attachment.
207             #
208             sub _process_subdirectory {
209 12     12   21 my ($self, $pps) = @_;
210              
211 12         68 $self->_extract_ole_date($pps);
212              
213 12         43 my $name = $self->_get_pps_name($pps);
214              
215 12 100       46 if ($name =~ '__recip_version1 0_ ') { # Address of one recipient
    100          
216 5         17 $self->_process_address($pps);
217             } elsif ($name =~ '__attach_version1 0_ ') { # Attachment
218 2         9 $self->_process_attachment($pps);
219             } else {
220 5         30 $self->_warn_about_unknown_directory($pps);
221             }
222 12         29 return;
223             }
224              
225             #
226             # Process a subdirectory that contains an email address.
227             #
228             sub _process_address {
229 5     5   15 my ($self, $pps) = @_;
230              
231             my $addr_info = Email::Outlook::Message::AddressInfo->new($pps,
232 5         55 $self->{VERBOSE});
233              
234 5         12 push @{$self->{ADDRESSES}}, $addr_info;
  5         13  
235 5         9 return;
236             }
237              
238             #
239             # Process a subdirectory that contains an attachment.
240             #
241             sub _process_attachment {
242 2     2   6 my ($self, $pps) = @_;
243              
244             my $attachment = Email::Outlook::Message::Attachment->new($pps,
245 2         19 $self->{VERBOSE});
246 2         8 push @{$self->{ATTACHMENTS}}, $attachment;
  2         6  
247 2         4 return;
248             }
249              
250             #
251             # Header length of the property stream depends on whether the Message
252             # object is embedded or not.
253             #
254             sub _property_stream_header_length {
255 5     5   10 my $self = shift;
256 5 50       21 return ($self->{EMBEDDED} ? 24 : 32)
257             }
258              
259             #
260             # Helper functions
261             #
262              
263             #
264             # Extract time stamp of this OLE item (this is in GMT)
265             #
266             sub _extract_ole_date {
267 12     12   25 my ($self, $pps) = @_;
268 12 100       30 unless (defined ($self->{OLEDATE})) {
269             # Make Date
270 5         9 my $datearr;
271 5         10 $datearr = $pps->{Time2nd};
272 5 50 33     44 $datearr = $pps->{Time1st} unless $datearr and $datearr->[0];
273 5 50 33     55 $self->{OLEDATE} = $self->_format_date($datearr) if $datearr and $datearr->[0];
274             }
275 12         21 return;
276             }
277              
278             # If we didn't get the date from the original header data, we may be able
279             # to get it from the SUBMISSION_ID:
280             # It seems to have the format of a semicolon-separated list of key=value
281             # pairs. The key l has a value with the format:
282             # -Z-, where DATETIME is the date and time (gmt)
283             # in the format YYMMDDHHMMSS.
284             sub _submission_id_date {
285 10     10   1146 my $self = shift;
286              
287 10 100       37 my $submission_id = $self->{SUBMISSION_ID} or return;
288 5 50       28 $submission_id =~ m/ l=.*- (\d\d) (\d\d) (\d\d) (\d\d) (\d\d) (\d\d) Z-.* /x
289             or return;
290 5         10 my $year = $1;
291 5 50       15 $year += 100 if $year < 20;
292 5         32 return $self->_format_date([$6,$5,$4,$3,$2-1,$year]);
293             }
294              
295             sub _SaveAttachment {
296 2     2   7 my ($self, $mime, $att) = @_;
297              
298 2         10 my $m = $att->to_email_mime;
299 2         8 $self->_clean_part_header($m);
300 2         10 $mime->parts_add([$m]);
301 2         4683 return;
302             }
303              
304             # Set header fields
305             sub _AddHeaderField {
306 99     99   197 my ($self, $mime, $fieldname, $value) = @_;
307              
308             #my $oldvalue = $mime->header($fieldname);
309             #return if $oldvalue;
310 99 100       199 $mime->header_set($fieldname, $value) if $value;
311 99         1235 return;
312             }
313              
314             sub _Address {
315 9     9   21 my ($self, $tag) = @_;
316              
317 9   100     41 my $result = $self->{$tag} || "";
318              
319 9   100     40 my $address = $self->{$tag . "_ADDR"} || "";
320 9 100       26 if ($address) {
321 2 50       7 $result .= " " if $result;
322 2         6 $result .= "<$address>";
323             }
324              
325 9         28 return $result;
326             }
327              
328             # Find SMTP addresses for the given list of names
329             sub _expand_address_list {
330 18     18   36 my ($self, $names) = @_;
331              
332 18 100       51 return "" unless defined $names;
333              
334 10         30 my @namelist = split / ; [ ]* /x, $names;
335 10         16 my @result;
336 10         23 name: foreach my $name (@namelist) {
337 5         16 my $addresstext = $self->_find_name_in_addresspool($name);
338 5 50       20 if ($addresstext) {
339 5         15 push @result, $addresstext;
340             } else {
341 0         0 push @result, $name;
342             }
343             }
344 10         43 return join ", ", @result;
345             }
346              
347             sub _find_name_in_addresspool {
348 5     5   12 my ($self, $name) = @_;
349              
350 5         12 my $addresspool = $self->{ADDRESSES};
351              
352 5         8 foreach my $address (@{$addresspool}) {
  5         12  
353 5 50       23 if ($name eq $address->name) {
354 5         19 return $address->display_address;
355             }
356             }
357 0         0 return;
358             }
359              
360             # TODO: Don't really want to need this!
361             sub _clean_part_header {
362 16     16   33 my ($self, $part) = @_;
363 16         56 $part->header_set('Date');
364 16 100       660 unless ($part->content_type =~ m{ ^ multipart / }x) {
365 13         494 $part->header_set('MIME-Version')
366             };
367 16         553 return;
368             }
369              
370             sub _create_mime_plain_body {
371 8     8   14 my $self = shift;
372             return Email::MIME->create(
373             attributes => {
374             content_type => "text/plain",
375             charset => "UTF-8",
376             disposition => "inline",
377             encoding => "8bit",
378             },
379             body => $self->{BODY_PLAIN}
380 8         72 );
381             }
382              
383             sub _create_mime_html_body {
384 2     2   3 my $self = shift;
385             return Email::MIME->create(
386             attributes => {
387             content_type => "text/html",
388             disposition => "inline",
389             encoding => "8bit",
390             },
391             body => $self->{BODY_HTML}
392 2         10 );
393             }
394              
395             # Implementation based on the information in
396             # http://www.freeutils.net/source/jtnef/rtfcompressed.jsp,
397             # and the implementation in tnef version 1.4.5.
398             my $MAGIC_COMPRESSED_RTF = 0x75465a4c;
399             my $MAGIC_UNCOMPRESSED_RTF = 0x414c454d;
400             my $BASE_BUFFER =
401             "{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}{\\f0\\fnil \\froman "
402             . "\\fswiss \\fmodern \\fscript \\fdecor MS Sans SerifSymbolArial"
403             . "Times New RomanCourier{\\colortbl\\red0\\green0\\blue0\n\r\\par "
404             . "\\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx";
405              
406              
407             sub _create_mime_rtf_body {
408 5     5   8 my $self = shift;
409 5         13 my $data = $self->{BODY_RTF};
410              
411 5         25 my ($size, $rawsize, $magic, $crc) = unpack "V4", substr $data, 0, 16;
412              
413 5         12 my $buffer;
414              
415 5 50       17 if ($magic == $MAGIC_COMPRESSED_RTF) {
    0          
416 5         9 $buffer = $BASE_BUFFER;
417 5         10 my $output_length = length($buffer) + $rawsize;
418 5         9 my @flags;
419 5         10 my $in = 16;
420 5         21 while (length($buffer) < $output_length) {
421 456 100       682 if (@flags == 0) {
422 60         173 @flags = split "", unpack "b8", substr $data, $in++, 1;
423             }
424 456         582 my $flag = shift @flags;
425 456 100       678 if ($flag eq "0") {
426 195         330 $buffer .= substr $data, $in++, 1;
427             } else {
428 261         450 my ($a, $b) = unpack "C2", substr $data, $in, 2;
429 261         352 my $offset = ($a << 4) | ($b >> 4);
430 261         321 my $length = ($b & 0xf) + 2;
431 261         293 my $buflen = length $buffer;
432 261         329 my $longoffset = $buflen - ($buflen % 4096) + $offset;
433 261 50       374 if ($longoffset >= $buflen) { $longoffset -= 4096; }
  0         0  
434 261         368 while ($length > 0) {
435 1364         1596 $buffer .= substr $buffer, $longoffset, 1;
436 1364         1454 $length--;
437 1364         1860 $longoffset++;
438             }
439 261         425 $in += 2;
440             }
441             }
442 5         22 $buffer = substr $buffer, length $BASE_BUFFER;
443             } elsif ($magic == $MAGIC_UNCOMPRESSED_RTF) {
444 0         0 $buffer = substr $data, 16;
445             } else {
446 0         0 carp "Incorrect magic number in RTF body.\n";
447             }
448 5         35 return Email::MIME->create(
449             attributes => {
450             content_type => "application/rtf",
451             disposition => "inline",
452             encoding => "base64",
453             },
454             body => $buffer
455             );
456             }
457             # Copy original header data.
458             # Note: This should contain the Date: header.
459             sub _copy_header_data {
460 10     10   1976 my ($self, $mime) = @_;
461              
462 10 100       35 defined $self->{HEAD} or return;
463              
464             # The extra \n is neede for Email::Simple to pick up all headers.
465             # This is a change in Email::Simple.
466 7         33 my $parsed = Email::Simple->new($self->{HEAD} . "\n");
467              
468 7         1542 foreach my $tag (grep { !$skipheaders->{uc $_}} $parsed->header_names) {
  73         494  
469 34         2523 $mime->header_set($tag, $parsed->header($tag));
470             }
471 7         594 return;
472             }
473              
474             # Set header fields
475             sub _SetHeaderFields {
476 9     9   20 my ($self, $mime) = @_;
477              
478 9         39 $self->_AddHeaderField($mime, 'Subject', $self->{SUBJECT});
479 9         34 $self->_AddHeaderField($mime, 'From', $self->_Address("FROM"));
480             #$self->_AddHeaderField($mime, 'Reply-To', $self->_Address("REPLYTO"));
481 9         32 $self->_AddHeaderField($mime, 'To', $self->_expand_address_list($self->{TO}));
482 9         29 $self->_AddHeaderField($mime, 'Cc', $self->_expand_address_list($self->{CC}));
483 9         33 $self->_AddHeaderField($mime, 'Message-Id', $self->{MESSAGEID});
484 9         30 $self->_AddHeaderField($mime, 'In-Reply-To', $self->{INREPLYTO});
485 9         31 $self->_AddHeaderField($mime, 'References', $self->{REFERENCES});
486              
487             # Least preferred option to set the Date: header; this uses the date the
488             # msg file was saved.
489 9         26 $self->_AddHeaderField($mime, 'Date', $self->{OLEDATE});
490              
491             # Second preferred option: get it from the SUBMISSION_ID:
492 9         25 $self->_AddHeaderField($mime, 'Date', $self->_submission_id_date());
493              
494             # Most prefered option from the property list
495 9         32 $self->_AddHeaderField($mime, 'Date', $self->{DATE2ND});
496 9         29 $self->_AddHeaderField($mime, 'Date', $self->{DATE1ST});
497              
498             # After this, we'll try getting the date from the original headers.
499 9         16 return;
500             }
501              
502             1;