File Coverage

blib/lib/Mail/SpamAssassin/Message.pm

Criterion	Covered	Total	%
statement	389	454	85.6
branch	163	232	70.2
condition	50	78	64.1
subroutine	33	36	91.6
pod	16	24	66.6
total	651	824	79.0

line	stmt	bran	cond	sub	pod	time	code
1							# <@LICENSE>
2							# Licensed to the Apache Software Foundation (ASF) under one or more
3							# contributor license agreements. See the NOTICE file distributed with
4							# this work for additional information regarding copyright ownership.
5							# The ASF licenses this file to you under the Apache License, Version 2.0
6							# (the "License"); you may not use this file except in compliance with
7							# the License. You may obtain a copy of the License at:
8							#
9							# http://www.apache.org/licenses/LICENSE-2.0
10							#
11							# Unless required by applicable law or agreed to in writing, software
12							# distributed under the License is distributed on an "AS IS" BASIS,
13							# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14							# See the License for the specific language governing permissions and
15							# limitations under the License.
16							# </@LICENSE>
17
18							=head1 NAME
19
20							Mail::SpamAssassin::Message - decode, render, and hold an RFC-2822 message
21
22							=head1 DESCRIPTION
23
24							This module encapsulates an email message and allows access to the various MIME
25							message parts and message metadata.
26
27							The message structure, after initiating a parse() cycle, looks like this:
28
29							Message object, also top-level node in Message::Node tree
30							\|
31							+---> Message::Node for other parts in MIME structure
32							\| \|---> [ more Message::Node parts ... ]
33							\| [ others ... ]
34							\|
35							+---> Message::Metadata object to hold metadata
36
37							=head1 PUBLIC METHODS
38
39							=over 4
40
41							=cut
42
43							package Mail::SpamAssassin::Message;
44
45	40			40		300	use strict;
	40					92
	40					1298
46	40			40		248	use warnings;
	40					83
	40					1427
47	40			40		247	use re 'taint';
	40					79
	40					3522
48
49							BEGIN {
50	40					21803	eval { require Digest::SHA; import Digest::SHA qw(sha1 sha1_hex); 1 }
	40					118978
	40					1314
51	40	50		40		152	or do { require Digest::SHA1; import Digest::SHA1 qw(sha1 sha1_hex) }
	0					0
	0					0
52							}
53
54	40			40		297	use Mail::SpamAssassin;
	40					85
	40					957
55	40			40		13476	use Mail::SpamAssassin::Message::Node;
	40					138
	40					1699
56	40			40		14337	use Mail::SpamAssassin::Message::Metadata;
	40					779
	40					4456
57	40			40		351	use Mail::SpamAssassin::Constants qw(:sa);
	40					130
	40					8033
58	40			40		363	use Mail::SpamAssassin::Logger;
	40					150
	40					225146
59
60							our @ISA = qw(Mail::SpamAssassin::Message::Node);
61
62							# ---------------------------------------------------------------------------
63
64							=item new()
65
66							Creates a Mail::SpamAssassin::Message object. Takes a hash reference
67							as a parameter. The used hash key/value pairs are as follows:
68
69							C<message> is either undef (which will use STDIN), a scalar - a string
70							containing an entire message, a reference to such string, an array reference
71							of the message with one line per array element, or either a file glob
72							or an IO::File object which holds the entire contents of the message.
73
74							Note: The message is expected to generally be in RFC 2822 format, optionally
75							including an mbox message separator line (the "From " line) as the first line.
76
77							C<parse_now> specifies whether or not to create the MIME tree
78							at object-creation time or later as necessary.
79
80							The I<parse_now> option, by default, is set to false (0).
81							This allows SpamAssassin to not have to generate the tree of
82							Mail::SpamAssassin::Message::Node objects and their related data if the
83							tree is not going to be used. This is handy, for instance, when running
84							C<spamassassin -d>, which only needs the pristine header and body which
85							is always handled when the object is created.
86
87							C<subparse> specifies how many MIME recursion levels should be parsed.
88							Defaults to 20.
89
90							=cut
91
92							# month mappings (ripped from Util.pm)
93							my %MONTH = (jan => 1, feb => 2, mar => 3, apr => 4, may => 5, jun => 6,
94							jul => 7, aug => 8, sep => 9, oct => 10, nov => 11, dec => 12);
95
96							# day of week mapping (starting from zero)
97							my @DAY_OF_WEEK = qw/Sun Mon Tue Wed Thu Fri Sat/ ;
98
99							sub new {
100	137			137	1	420	my $class = shift;
101	137		33			971	$class = ref($class) \|\| $class;
102
103	137					413	my($opts) = @_;
104	137	50				596	my $message = defined $opts->{'message'} ? $opts->{'message'} : \*STDIN;
105	137		100			742	my $parsenow = $opts->{'parsenow'} \|\| 0;
106	137		50			1039	my $normalize = $opts->{'normalize'} \|\| 0;
107
108							# Specifies whether or not to parse message/rfc822 parts into its own tree.
109							# If the # > 0, it'll subparse, otherwise it won't. By default, do twenty
110							# levels deep.
111	137	100				540	my $subparse = defined $opts->{'subparse'} ? $opts->{'subparse'} : 20;
112
113	137					1770	my $self = $class->SUPER::new({normalize=>$normalize});
114
115	137					658	$self->{tmpfiles} = [];
116	137					626	$self->{pristine_headers} = '';
117	137					551	$self->{pristine_body} = '';
118	137					654	$self->{mime_boundary_state} = {};
119	137					658	$self->{line_ending} = "\012";
120	137					404	$self->{master_deadline} = $opts->{'master_deadline'};
121	137					339	$self->{suppl_attrib} = $opts->{'suppl_attrib'};
122
123	137	100				520	if ($self->{suppl_attrib}) { # caller-provided additional information
124							# pristine_body_length is currently used by an eval test check_body_length
125							# Possible To-Do: Base the length on the @message array later down?
126	49	50				252	if (defined $self->{suppl_attrib}{body_size}) {
127							# Optional info provided by a caller; should reflect the original
128							# message body size if provided, and as such it may differ from the
129							# $self->{pristine_body} size, e.g. when the caller passed a truncated
130							# message to SpamAssassin, or when counting line-endings differently.
131	0					0	$self->{pristine_body_length} = $self->{suppl_attrib}{body_size};
132							}
133	49	50				235	if (ref $self->{suppl_attrib}{mimepart_digests}) {
134							# Optional info provided by a caller: an array of digest codes (e.g. SHA1)
135							# of each MIME part. Should reflect the original message if provided.
136							# As such it may differ from digests calculated by get_mimepart_digests(),
137							# e.g. when the caller passed a truncated message to SpamAssassin.
138	0					0	$self->{mimepart_digests} = $self->{suppl_attrib}{mimepart_digests};
139							}
140							}
141
142	137					327	bless($self,$class);
143
144							# create the metadata holder class
145	137					1793	$self->{metadata} = Mail::SpamAssassin::Message::Metadata->new($self);
146
147							# Ok, go ahead and do the message "parsing"
148
149							# protect it from abuse ...
150	137					281	local $_;
151
152							# Figure out how the message was passed to us, and deal with it.
153	137					298	my @message;
154	137	100	66			1147	if (ref $message eq 'ARRAY') {
		100
		50
		50
		50
155	63					139	@message = @{$message};
	63					349
156							}
157							elsif (ref($message) eq 'GLOB' \|\| ref($message) =~ /^IO::/) {
158	18	50				70	if (defined fileno $message) {
159
160							# sysread+split avoids a Perl I/O bug (Bug 5985)
161							# and is faster than (<$message>) by 10..25 %
162							# (a drawback is a short-term double storage of a text in $raw_str)
163							#
164	18					29	my($nread,$raw_str); $raw_str = '';
	18					32
165	18					621	while ( $nread=sysread($message, $raw_str, 16384, length $raw_str) ) { }
166	18	50				70	defined $nread or die "error reading: $!";
167	18					595	@message = split(/^/m, $raw_str, -1);
168
169	18	50				89	if ($raw_str eq '') {
		50
170	0					0	dbg("message: empty message read");
171							} elsif (length($raw_str) > 128*1024) {
172							# ditch rarely used large chunks of allocated memory, Bug 6514
173							# http://www.perlmonks.org/?node_id=803515
174							# about 97% of mail messages are below 128 kB,
175							# about 98% of mail messages are below 256 kB (2010 statistics)
176							# dbg("message: deallocating %.2f MB", length($raw_str)/1024/1024);
177	0					0	undef $raw_str;
178							}
179							}
180							}
181							elsif (ref $message eq 'SCALAR') {
182	0					0	@message = split(/^/m, $$message, -1);
183							}
184							elsif (ref $message) {
185	0					0	dbg("message: Input is a reference of unknown type!");
186							}
187							elsif (defined $message) {
188	56					1306	@message = split(/^/m, $message, -1);
189							}
190
191							# Pull off mbox and mbx separators
192							# also deal with null messages
193	137	50				1618	if (!@message) {
		100
		50
194							# bug 4884:
195							# if we get here, it means that the input was null, so fake the message
196							# content as a single newline...
197	0					0	@message = ("\n");
198							} elsif ($message[0] =~ /^From\s+(?!:)/) {
199							# careful not to confuse with obsolete syntax which allowed WSP before ':'
200							# mbox formated mailbox
201	17					67	$self->{'mbox_sep'} = shift @message;
202							} elsif ($message[0] =~ MBX_SEPARATOR) {
203	0					0	$_ = shift @message;
204
205							# Munge the mbx message separator into mbox format as a sort of
206							# de facto portability standard in SA's internals. We need to
207							# to this so that Mail::SpamAssassin::Util::parse_rfc822_date
208							# can parse the date string...
209	0	0				0	if (/([\s\d]\d)-([a-zA-Z]{3})-(\d{4})\s(\d{2}):(\d{2}):(\d{2})/) {
210							# $1 = day of month
211							# $2 = month (text)
212							# $3 = year
213							# $4 = hour
214							# $5 = min
215							# $6 = sec
216	0					0	my @arr = localtime(timelocal($6,$5,$4,$1,$MONTH{lc($2)}-1,$3));
217	0					0	my $address;
218	0					0	foreach (@message) {
219	0	0				0	if (/^From:[^<]*<([^>]+)>/) {
		0
220	0					0	$address = $1;
221	0					0	last;
222							} elsif (/^From:\s*([^<> ]+)/) {
223	0					0	$address = $1;
224	0					0	last;
225							}
226							}
227	0					0	$self->{'mbox_sep'} = "From $address $DAY_OF_WEEK[$arr[6]] $2 $1 $4:$5:$6 $3\n";
228							}
229							}
230
231							# bug 4363
232							# Check to see if we should do CRLF instead of just LF
233							# For now, just check the first and last line and do whatever it does
234	137	50	33			1678	if (@message && ($message[0] =~ /\015\012/ \|\| $message[-1] =~ /\015\012/)) {
			33
235	0					0	$self->{line_ending} = "\015\012";
236	0					0	dbg("message: line ending changed to CRLF");
237							}
238
239							# Is a CRLF -> LF line endings conversion necessary?
240	137					523	my $squash_crlf = $self->{line_ending} eq "\015\012";
241
242							# Go through all the header fields of the message
243	137					250	my $hdr_errors = 0;
244	137					237	my $header;
245	137					329	for (;;) {
246							# make sure not to lose the last header field when there is no body
247	1056					1540	my $eof = !@message;
248	1056	100				1934	my $current = $eof ? "\n" : shift @message;
249
250	1056	100				2648	if ( $current =~ /^[ \t]/ ) {
251							# This wasn't useful in terms of a rule, but we may want to treat it
252							# specially at some point. Perhaps ignore it?
253							#unless ($current =~ /\S/) {
254							# $self->{'obsolete_folding_whitespace'} = 1;
255							#}
256
257	200	50				400	$header = '' if !defined $header; # header starts with a continuation!?
258	200					334	$header .= $current; # append continuations, no matter what
259	200					353	$self->{'pristine_headers'} .= $current;
260							}
261							else { # not a continuation
262							# Ok, there's a header here, let's go ahead and add it in.
263	856	100				1673	if (defined $header) { # deal with a previous header field
264	719					2197	my ($key, $value) = split (/:/s, $header, 2);
265
266							# If it's not a valid header (aka: not in the form "foo:bar"), skip it.
267	719	100				1350	if (defined $value) {
268							# CRLF -> LF line-endings conversion if necessary
269	713	50				1168	$value =~ s/\015\012/\012/sg if $squash_crlf;
270	713					1332	$key =~ s/[ \t]+\z//; # strip WSP before colon, obsolete rfc822 syn
271							# limit the length of the pairs we store
272	713	50				1459	if (length($key) > MAX_HEADER_KEY_LENGTH) {
273	0					0	$key = substr($key, 0, MAX_HEADER_KEY_LENGTH);
274	0					0	$self->{'truncated_header'} = 1;
275							}
276	713	50				1260	if (length($value) > MAX_HEADER_VALUE_LENGTH) {
277	0					0	$value = substr($value, 0, MAX_HEADER_VALUE_LENGTH);
278	0					0	$self->{'truncated_header'} = 1;
279							}
280	713					2215	$self->header($key, $value);
281							}
282							}
283
284	856	100				3004	if ($current =~ /^\r?$/) { # a regular end of a header section
		100
285	132	100				435	if ($eof) {
286	1					10	$self->{'missing_head_body_separator'} = 1;
287							} else {
288	131					343	$self->{'pristine_headers'} .= $current;
289							}
290	132					285	last;
291							}
292							elsif ($current =~ /^--/) { # mime boundary encountered, bail out
293	4					11	$self->{'missing_head_body_separator'} = 1;
294	4					10	unshift(@message, $current);
295	4					7	last;
296							}
297							# should we assume entering a body on encountering invalid header field?
298							else {
299							# no re "strict"; # since perl 5.21.8: Ranges of ASCII printables...
300	720	100				2401	if ($current !~ /^[\041-\071\073-\176]+[ \t]*:/) {
301							# A field name MUST be composed of printable US-ASCII characters
302							# (i.e., characters that have values between 33 (041) and 126 (176),
303							# inclusive), except colon (072). Obsolete header field syntax
304							# allowed WSP before a colon.
305	7	100				13	if (++$hdr_errors <= 3) {
306							# just consume but ignore a few invalid header fields
307							} else { # enough is enough...
308	1					3	$self->{'missing_head_body_separator'} = 1;
309	1					2	unshift(@message, $current);
310	1					3	last;
311							}
312							}
313							}
314
315							# start collecting a new header field
316	719					1087	$header = $current;
317	719					1785	$self->{'pristine_headers'} .= $current;
318							}
319							}
320	137					289	undef $header;
321
322							# Store the pristine body for later -- store as a copy since @message
323							# will get modified below
324	137					1360	$self->{'pristine_body'} = join('', @message);
325
326	137	50				590	if (!defined $self->{pristine_body_length}) {
327	137					645	$self->{'pristine_body_length'} = length $self->{'pristine_body'};
328							}
329
330							# iterate over lines in reverse order
331							# merge multiple blank lines into a single one
332	137					282	my $start;
333	137					634	for (my $cnt=$#message; $cnt>=0; $cnt--) {
334							# CRLF -> LF line-endings conversion if necessary
335	4180	50				4907	$message[$cnt] =~ s/\015\012\z/\012/ if $squash_crlf;
336
337							# line is blank
338	4180	100				6408	if ($message[$cnt] =~ /^\s*$/) {
339							# /^\s*$/ is about 5% faster then !/\S/, but still expensive here
340	414	100				698	if (!defined $start) {
341	346					409	$start=$cnt;
342							}
343	414	100				847	next unless $cnt == 0;
344							}
345
346							# line is not blank, or we've reached the beginning
347
348							# if we've got a series of blank lines, get rid of them
349	3801	100				6082	if (defined $start) {
350	346					394	my $max_blank_lines = 20;
351	346					418	my $num = $start-$cnt;
352	346	50				548	if ($num > $max_blank_lines) {
353	0					0	splice @message, $cnt+2, $num-$max_blank_lines;
354							}
355	346					554	undef $start;
356							}
357							}
358
359							# Figure out the boundary
360	137					345	my ($boundary);
361	137					568	($self->{'type'}, $boundary) = Mail::SpamAssassin::Util::parse_content_type($self->header('content-type'));
362	137					1012	dbg("message: main message type: ".$self->{'type'});
363
364							# dbg("message: \$message[0]: \"" . $message[0] . "\"");
365
366							# bug 6845: if main message type is multipart and the message body does not begin with
367							# either a blank line or the boundary (if defined), insert a blank line
368							# to ensure proper parsing - do not consider MIME headers at the beginning of the body
369							# to be part of the message headers.
370	137	100	66			942	if ($self->{'type'} =~ /^multipart\//i && $#message > 0 && $message[0] =~ /\S/)
			100
371							{
372	14	100	66			290	if (!defined $boundary \|\| $message[0] !~ /^--\Q$boundary\E/)
373							{
374	4					12	dbg("message: Inserting blank line at top of body to ensure correct multipart MIME parsing");
375	4					8	unshift(@message, "\012");
376							}
377							}
378
379							# dbg("message: \$message[0]: \"" . $message[0] . "\"");
380							# dbg("message: \$message[1]: \"" . $message[1] . "\"");
381
382							# parse queue, simple array of parts to parse:
383							# 0: part object, already in the tree
384							# 1: boundary used to focus body parsing
385							# 2: message content
386							# 3: how many MIME subparts to parse down
387							#
388	137					779	$self->{'parse_queue'} = [ [ $self, $boundary, \@message, $subparse ] ];
389
390							# If the message does need to get parsed, save off a copy of the body
391							# in a format we can easily parse later so we don't have to rip from
392							# pristine_body ... If we do want to parse now, go ahead and do so ...
393							#
394	137	100				494	if ($parsenow) {
395	101					558	$self->parse_body();
396							}
397
398	137					695	$self;
399							}
400
401							# ---------------------------------------------------------------------------
402
403							=item find_parts()
404
405							Used to search the tree for specific MIME parts. See
406							I<Mail::SpamAssassin::Message::Node> for more details.
407
408							=cut
409
410							# Used to find any MIME parts whose simple content-type matches a given regexp
411							# Searches it's own and any children parts. Returns an array of MIME
412							# objects which match.
413							#
414							sub find_parts {
415	231			231	1	3125	my $self = shift;
416
417							# ok, we need to do the parsing now...
418	231	100				872	$self->parse_body() if (exists $self->{'parse_queue'});
419
420							# and pass through to the Message::Node version of the method
421	231					1432	return $self->SUPER::find_parts(@_);
422							}
423
424							# ---------------------------------------------------------------------------
425
426							=item get_pristine_header()
427
428							Returns pristine headers of the message. If no specific header name
429							is given as a parameter (case-insensitive), then all headers will be
430							returned as a scalar, including the blank line at the end of the headers.
431
432							If called in an array context, an array will be returned with each
433							specific header in a different element. In a scalar context, the last
434							specific header is returned.
435
436							ie: If 'Subject' is specified as the header, and there are 2 Subject
437							headers in a message, the last/bottom one in the message is returned in
438							scalar context or both are returned in array context.
439
440							Btw, returning the last header field (not the first) happens to be consistent
441							with DKIM signatures, which search for and cover multiple header fields
442							bottom-up according to the 'h' tag. Let's keep it this way.
443
444							Note: the returned header will include the ending newline and any embedded
445							whitespace folding.
446
447							=cut
448
449							sub get_pristine_header {
450	55			55	1	163	my ($self, $hdr) = @_;
451
452	55	100	66			512	return $self->{pristine_headers} if !defined $hdr \|\| $hdr eq '';
453							my(@ret) =
454	12					361	$self->{pristine_headers} =~ /^\Q$hdr\E[ \t]:[ \t](.*?\n(?![ \t]))/smgi;
455							# taintedness is retained by "use re 'taint'" (fix in bug 5283 now redundant)
456	12	100				51	if (!@ret) {
		50
457	2					20	return $self->get_header($hdr);
458							} elsif (wantarray) {
459	0					0	return @ret;
460							} else {
461	10					47	return $ret[-1];
462							}
463							}
464
465							=item get_mbox_separator()
466
467							Returns the mbox separator found in the message, or undef if there
468							wasn't one.
469
470							=cut
471
472							sub get_mbox_separator {
473	34			34	1	211	return $_[0]->{mbox_sep};
474							}
475
476							=item get_body()
477
478							Returns an array of the pristine message body, one line per array element.
479
480							=cut
481
482							sub get_body {
483	0			0	1	0	my ($self) = @_;
484	0					0	my @ret = split(/^/m, $self->{pristine_body});
485	0					0	return \@ret;
486							}
487
488							# ---------------------------------------------------------------------------
489
490							=item get_pristine()
491
492							Returns a scalar of the entire pristine message.
493
494							=cut
495
496							sub get_pristine {
497	98			98	1	231	my ($self) = @_;
498	98					595	return $self->{pristine_headers} . $self->{pristine_body};
499							}
500
501							=item get_pristine_body()
502
503							Returns a scalar of the pristine message body.
504
505							=cut
506
507							sub get_pristine_body {
508	46			46	1	167	my ($self) = @_;
509	46					305	return $self->{pristine_body};
510							}
511
512							# ---------------------------------------------------------------------------
513
514							=item extract_message_metadata($permsgstatus)
515
516							=cut
517
518							sub extract_message_metadata {
519	116			116	1	409	my ($self, $permsgstatus) = @_;
520
521							# do this only once per message, it can be expensive
522	116	100				460	return if $self->{already_extracted_metadata};
523	102					400	$self->{already_extracted_metadata} = 1;
524
525	102					771	$self->{metadata}->extract ($self, $permsgstatus);
526							}
527
528							# ---------------------------------------------------------------------------
529
530							=item $str = get_metadata($hdr)
531
532							=cut
533
534							sub get_metadata {
535	2859			2859	1	4491	my ($self, $hdr) = @_;
536	2859	50				5083	if (!$self->{metadata}) {
537	0					0	warn "metadata: oops! get_metadata() called after finish_metadata()"; return;
	0					0
538							}
539							# dbg("message: get_metadata - %s: %s", $hdr, defined $_ ? $_ : '<undef>')
540							# for $self->{metadata}->{strings}->{lc $hdr};
541
542	2859					6669	$self->{metadata}->{strings}->{lc $hdr};
543							}
544
545							=item put_metadata($hdr, $text)
546
547							=cut
548
549							sub put_metadata {
550	408			408	1	804	my ($self, $hdr, $text) = @_;
551	408	50				924	if (!$self->{metadata}) {
552	0					0	warn "metadata: oops! put_metadata() called after finish_metadata()"; return;
	0					0
553							}
554							# dbg("message: put_metadata - %s: %s", $hdr, $text);
555	408					1434	$self->{metadata}->{strings}->{lc $hdr} = $text;
556							}
557
558							=item delete_metadata($hdr)
559
560							=cut
561
562							sub delete_metadata {
563	0			0	1	0	my ($self, $hdr) = @_;
564	0	0				0	if (!$self->{metadata}) {
565	0					0	warn "metadata: oops! delete_metadata() called after finish_metadata()"; return;
	0					0
566							}
567	0					0	delete $self->{metadata}->{strings}->{lc $hdr};
568							}
569
570							=item $str = get_all_metadata()
571
572							=cut
573
574							sub get_all_metadata {
575	16			16	1	47	my ($self) = @_;
576
577	16	50				67	if (!$self->{metadata}) {
578	0					0	warn "metadata: oops! get_all_metadata() called after finish_metadata()"; return;
	0					0
579							}
580	16					30	my @ret;
581	16					38	my $keys_ref = $self->{metadata}->{strings};
582	16					123	foreach my $key (sort keys %$keys_ref) {
583	64					132	my $val = $keys_ref->{$key};
584	64	50				121	$val = '' if !defined $val;
585	64					169	push (@ret, "$key: $val\n");
586							}
587	16	50				93	return (wantarray ? @ret : join('', @ret));
588							}
589
590							# ---------------------------------------------------------------------------
591
592							=item finish_metadata()
593
594							Destroys the metadata for this message. Once a message has been
595							scanned fully, the metadata is no longer required. Destroying
596							this will free up some memory.
597
598							=cut
599
600							sub finish_metadata {
601	112			112	1	259	my ($self) = @_;
602	112	50				430	if (defined ($self->{metadata})) {
603	112					832	$self->{metadata}->finish();
604	112					439	delete $self->{metadata};
605							}
606							}
607
608							=item finish()
609
610							Clean up an object so that it can be destroyed.
611
612							=cut
613
614							sub finish {
615	80			80	1	6610	my ($self) = @_;
616
617							# Clean ourself up
618	80					342	$self->finish_metadata();
619
620							# These will only be in the root Message node
621	80					224	delete $self->{'mime_boundary_state'};
622	80					438	delete $self->{'mbox_sep'};
623	80					195	delete $self->{'normalize'};
624	80					217	delete $self->{'pristine_body'};
625	80					191	delete $self->{'pristine_headers'};
626	80					192	delete $self->{'line_ending'};
627	80					151	delete $self->{'missing_head_body_separator'};
628
629							# Remove the queue variable, in case the body has not been parsed
630	80					155	delete $self->{'parse_queue'};
631
632	80					211	my @toclean = ( $self );
633
634							# Go ahead and clean up all of the Message::Node parts
635	80					293	while (my $part = shift @toclean) {
636							# bug 5557: windows requires tmp file be closed before it can be rm'd
637	126	100				468	if (ref $part->{'raw'} eq 'GLOB') {
638	20	50				186	close($part->{'raw'}) or die "error closing input file: $!";
639							}
640
641							# bug 5858: avoid memory leak with deep MIME structure
642	126	100				322	if (defined ($part->{metadata})) {
643	4					14	$part->{metadata}->finish();
644	4					8	delete $part->{metadata};
645							}
646
647	126					515	delete $part->{'headers'};
648	126					428	delete $part->{'raw_headers'};
649	126					289	delete $part->{'header_order'};
650	126					309	delete $part->{'raw'};
651	126					264	delete $part->{'decoded'};
652	126					211	delete $part->{'rendered'};
653	126					202	delete $part->{'visible_rendered'};
654	126					200	delete $part->{'invisible_rendered'};
655	126					202	delete $part->{'type'};
656	126					215	delete $part->{'rendered_type'};
657
658							# if there are children nodes, add them to the queue of nodes to clean up
659	126	100				493	if (exists $part->{'body_parts'}) {
660	23					30	push(@toclean, @{$part->{'body_parts'}});
	23					36
661	23					50	delete $part->{'body_parts'};
662							}
663							}
664
665							# delete temporary files
666	80	50				292	if ($self->{'tmpfiles'}) {
667	80					133	for my $fn (@{$self->{'tmpfiles'}}) {
	80					311
668	20	50				731	unlink($fn) or warn "cannot unlink $fn: $!";
669							}
670	80					321	delete $self->{'tmpfiles'};
671							}
672							}
673
674							# also use a DESTROY method, just to ensure (as much as possible) that
675							# temporary files are deleted even if the finish() method is omitted
676							sub DESTROY {
677	116			116		525	my $self = shift;
678							# best practices: prevent potential calls to eval and to system routines
679							# in code of a DESTROY method from clobbering global variables $@ and $!
680	116					993	local($@,$!); # keep outer error handling unaffected by DESTROY
681	116	100				2174	if ($self->{'tmpfiles'}) {
682	36					92	for my $fn (@{$self->{'tmpfiles'}}) {
	36					1708
683	0	0				0	unlink($fn) or dbg("message: cannot unlink $fn: $!");
684							}
685							}
686							}
687
688							# ---------------------------------------------------------------------------
689
690							=item receive_date()
691
692							Return a time_t value with the received date of the current message,
693							or current time if received time couldn't be determined.
694
695							=cut
696
697							sub receive_date {
698	10			10	1	39	my($self) = @_;
699
700	10					58	return Mail::SpamAssassin::Util::receive_date(scalar $self->get_all_headers(0,1));
701							}
702
703							# ---------------------------------------------------------------------------
704
705							=back
706
707							=head1 PARSING METHODS, NON-PUBLIC
708
709							These methods take a RFC2822-esque formatted message and create a tree
710							with all of the MIME body parts included. Those parts will be decoded
711							as necessary, and text/html parts will be rendered into a standard text
712							format, suitable for use in SpamAssassin.
713
714							=over 4
715
716							=item parse_body()
717
718							parse_body() passes the body part that was passed in onto the
719							correct part parser, either _parse_multipart() for multipart/* parts,
720							or _parse_normal() for everything else. Multipart sections become the
721							root of sub-trees, while everything else becomes a leaf in the tree.
722
723							For multipart messages, the first call to parse_body() doesn't create a
724							new sub-tree and just uses the parent node to contain children. All other
725							calls to parse_body() will cause a new sub-tree root to be created and
726							children will exist underneath that root. (this is just so the tree
727							doesn't have a root node which points at the actual root node ...)
728
729							=cut
730
731							sub parse_body {
732	125			125	1	358	my($self) = @_;
733
734							# This shouldn't happen, but just in case, abort.
735	125	50				505	return unless (exists $self->{'parse_queue'});
736
737	125					479	dbg("message: ---- MIME PARSER START ----");
738
739	125					270	while (my $toparse = shift @{$self->{'parse_queue'}}) {
	298					1084
740							# multipart sections are required to have a boundary set ... If this
741							# one doesn't, assume it's malformed and send it to be parsed as a
742							# non-multipart section
743							#
744	173					444	my ($msg, $boundary, $body, $subparse) = @$toparse;
745
746	173	100	66			1072	if ($msg->{'type'} =~ m{^multipart/}i && defined $boundary && $subparse > 0) {
			66
747	20					56	$self->_parse_multipart($toparse);
748							}
749							else {
750							# If it's not multipart, go ahead and just deal with it.
751	153					917	$self->_parse_normal($toparse);
752
753							# bug 5041: process message/*, but exclude message/partial content types
754	153	100	66			1029	if ($msg->{'type'} =~ m{^message/(?!partial\z)}i && $subparse > 0)
755							{
756							# Just decode the part, but we don't need the resulting string here.
757	4					27	$msg->decode(0);
758
759							# bug 7125: decode and parse only message/rfc822 or message/global,
760							# but do not treat other message/* content types (like the ones listed
761							# here) as a message consisting of a header and a body, as they are not:
762							# message/delivery-status, message/global-delivery-status,
763							# message/feedback-report, message/global-headers,
764							# message/global-disposition-notification,
765							# message/disposition-notification, (and message/partial)
766
767							# bug 5051, bug 3748: check $msg->{decoded}: sometimes message/* parts
768							# have no content, and we get stuck waiting for STDIN, which is bad. :(
769
770	4	50	33			32	if ($msg->{'type'} =~ m{^message/(?:rfc822\|global)\z}i &&
			33
771							defined $msg->{'decoded'} && $msg->{'decoded'} ne '')
772							{
773							# Ok, so this part is still semi-recursive, since M::SA::Message
774							# calls M::SA::Message, but we don't subparse the new message,
775							# and pull a sneaky "steal our child's queue" maneuver to deal
776							# with it on our own time. Reference the decoded array directly
777							# since it's faster.
778							#
779							my $msg_obj = Mail::SpamAssassin::Message->new({
780							message => $msg->{'decoded'},
781							parsenow => 0,
782							normalize => $self->{normalize},
783	4					37	subparse => $subparse - 1,
784							});
785
786							# Add the new message to the current node
787	4					17	$msg->add_body_part($msg_obj);
788
789							# now this is the sneaky bit ... steal the sub-message's parse_queue
790							# and add it to ours. then we'll handle the sub-message in our
791							# normal loop and get all the glory. muhaha. :)
792	4					4	push(@{$self->{'parse_queue'}}, @{$msg_obj->{'parse_queue'}});
	4					6
	4					8
793	4					8	delete $msg_obj->{'parse_queue'};
794
795							# Ok, we've subparsed, so go ahead and remove the raw and decoded
796							# data because we won't need them anymore (the tree under this part
797							# will have that data)
798	4	50				11	if (ref $msg->{'raw'} eq 'GLOB') {
799							# Make sure we close it if it's a temp file -- Bug 5166
800	0	0				0	close($msg->{'raw'})
801							or die "error closing input file: $!";
802							}
803
804	4					6	delete $msg->{'raw'};
805
806	4					36	delete $msg->{'decoded'};
807							}
808							}
809							}
810							}
811
812	125					440	dbg("message: ---- MIME PARSER END ----");
813
814							# we're done parsing, so remove the queue variable
815	125					378	delete $self->{'parse_queue'};
816							}
817
818							=item _parse_multipart()
819
820							Generate a root node, and for each child part call parse_body()
821							to generate the tree.
822
823							=cut
824
825							sub _parse_multipart {
826	20			20		37	my($self, $toparse) = @_;
827
828	20					22	my ($msg, $boundary, $body, $subparse) = @{$toparse};
	20					38
829
830							# we're not supposed to be a leaf, so prep ourselves
831	20					46	$msg->{'body_parts'} = [];
832
833							# the next set of objects will be one level deeper
834	20					26	$subparse--;
835
836	20	50				76	dbg("message: parsing multipart, got boundary: ".(defined $boundary ? $boundary : ''));
837
838							# NOTE: The MIME boundary REs here are very specific to be mostly RFC 1521
839							# compliant, but also allow possible malformations to still work. Please
840							# see Bugzilla bug 3749 for more information before making any changes!
841
842							# ignore preamble per RFC 1521, unless there's no boundary ...
843	20	50				38	if ( defined $boundary ) {
844	20					22	my $line;
845	20					26	my $tmp_line = @{$body};
	20					34
846	20					47	for ($line=0; $line < $tmp_line; $line++) {
847							# dbg("message: multipart line $line: \"" . $body->[$line] . "\"");
848							# specifically look for an opening boundary
849	120	100	100			516	if (substr($body->[$line],0,2) eq '--' # triage
850							&& $body->[$line] =~ /^--\Q$boundary\E\s*$/) {
851							# Make note that we found the opening boundary
852	19					63	$self->{mime_boundary_state}->{$boundary} = 1;
853
854							# if the line after the opening boundary isn't a header, flag it.
855							# we need to make sure that there's actually another line though.
856							# no re "strict"; # since perl 5.21.8: Ranges of ASCII printables...
857	19	100	66			103	if ($line+1 < $tmp_line && $body->[$line+1] !~ /^[\041-\071\073-\176]+:/) {
858	5					12	$self->{'missing_mime_headers'} = 1;
859							}
860
861	19					31	last;
862							}
863							}
864
865							# Found a boundary, ignore the preamble
866	20	100				38	if ( $line < $tmp_line ) {
867	19					23	splice @{$body}, 0, $line+1;
	19					41
868							}
869
870							# Else, there's no boundary, so leave the whole part...
871							}
872
873							# prepare a new tree node
874	20					158	my $part_msg = Mail::SpamAssassin::Message::Node->new({ normalize=>$self->{normalize} });
875	20					42	my $in_body = 0;
876	20					38	my $header;
877							my $part_array;
878	20					0	my $found_end_boundary;
879
880	20					21	my $line_count = @{$body};
	20					28
881	20					23	foreach ( @{$body} ) {
	20					36
882							# if we're on the last body line, or we find any boundary marker,
883							# deal with the mime part;
884							# a triage before an unlikely-to-match regexp avoids a CPU hotspot
885	2260		100			5390	$found_end_boundary = defined $boundary && substr($_,0,2) eq '--'
886							&& /^--\Q$boundary\E(?:--)?\s*$/;
887	2260	100	100			4432	if ( --$line_count == 0 \|\| $found_end_boundary ) {
888	44					57	my $line = $_; # remember the last line
889
890							# If at last line and no end boundary found, the line belongs to body
891							# TODO:
892							# Is $self->{mime_boundary_state}->{$boundary}-- needed here?
893							# Could "missing end boundary" be a useful rule? Mark it somewhere?
894							# If SA processed truncated message from amavis etc, this could also
895							# be hit legimately..
896	44	50				93	if (!$found_end_boundary) {
		50
897							# TODO: This is duplicate code from few pages down below..
898	0					0	while (length ($_) > MAX_BODY_LINE_LENGTH) {
899	0					0	push (@{$part_array}, substr($_, 0, MAX_BODY_LINE_LENGTH)."\n");
	0					0
900	0					0	substr($_, 0, MAX_BODY_LINE_LENGTH) = '';
901							}
902	0					0	push ( @{$part_array}, $_ );
	0					0
903							}
904							# per rfc 1521, the CRLF before the boundary is part of the boundary:
905							# NOTE: The CRLF preceding the encapsulation line is conceptually
906							# attached to the boundary so that it is possible to have a part
907							# that does not end with a CRLF (line break). Body parts that must
908							# be considered to end with line breaks, therefore, must have two
909							# CRLFs preceding the encapsulation line, the first of which is part
910							# of the preceding body part, and the second of which is part of the
911							# encapsulation boundary.
912							elsif ($part_array) {
913	44					78	chomp( $part_array->[-1] ); # trim the CRLF that's part of the boundary
914	44	100				73	splice @{$part_array}, -1 if ( $part_array->[-1] eq '' ); # blank line for the boundary only ...
	24					46
915							}
916							else {
917							# Invalid parts can have no body, so fake in a blank body
918							# in that case.
919	0					0	$part_array = [];
920							}
921
922	44					46	my($p_boundary);
923	44					102	($part_msg->{'type'}, $p_boundary) = Mail::SpamAssassin::Util::parse_content_type($part_msg->header('content-type'));
924	44		66			157	$p_boundary \|\|= $boundary;
925	44	50				168	dbg("message: found part of type ".$part_msg->{'type'}.", boundary: ".(defined $p_boundary ? $p_boundary : ''));
926
927							# we've created a new node object, so add it to the queue along with the
928							# text that belongs to that part, then add the new part to the current
929							# node to create the tree.
930	44					47	push(@{$self->{'parse_queue'}}, [ $part_msg, $p_boundary, $part_array, $subparse ]);
	44					151
931	44					158	$msg->add_body_part($part_msg);
932
933							# rfc 1521 says /^--boundary--$/, some MUAs may just require /^--boundary--/
934							# but this causes problems with horizontal lines when the boundary is
935							# made up of dashes as well, etc.
936	44	50				71	if (defined $boundary) {
937							# no re "strict"; # since perl 5.21.8: Ranges of ASCII printables...
938	44	100	33			456	if ($line =~ /^--\Q${boundary}\E--\s*$/) {
		50
939							# Make a note that we've seen the end boundary
940	20					40	$self->{mime_boundary_state}->{$boundary}--;
941	20					38	last;
942							}
943							elsif ($line_count && $body->[-$line_count] !~ /^[\041-\071\073-\176]+:/) {
944							# if we aren't on an end boundary and there are still lines left, it
945							# means we hit a new start boundary. therefore, the next line ought
946							# to be a mime header. if it's not, mark it.
947	0					0	$self->{'missing_mime_headers'} = 1;
948							}
949							}
950
951							# make sure we start with a new clean node
952	24					42	$in_body = 0;
953	24					82	$part_msg = Mail::SpamAssassin::Message::Node->new({ normalize=>$self->{normalize} });
954	24					41	undef $part_array;
955	24					28	undef $header;
956
957	24					51	next;
958							}
959
960	2216	100				2501	if (!$in_body) {
961							# s/\s+$//; # bug 5127: don't clean this up (yet)
962							# no re "strict"; # since perl 5.21.8: Ranges of ASCII printables...
963	132	100	66			362	if (/^[\041-\071\073-\176]+[ \t]*:/) {
		100
964	87	100				135	if ($header) {
965	49					136	my ( $key, $value ) = split ( /:\s*/, $header, 2 );
966	49					112	$part_msg->header( $key, $value );
967							}
968	87					93	$header = $_;
969	87					103	next;
970							}
971							elsif (/^[ \t]/ && $header) {
972							# $_ =~ s/^\s*//; # bug 5127, again
973	1					2	$header .= $_;
974	1					2	next;
975							}
976							else {
977	44	100				87	if ($header) {
978	38					125	my ( $key, $value ) = split ( /:\s*/, $header, 2 );
979	38					81	$part_msg->header( $key, $value );
980							}
981	44					54	$in_body = 1;
982
983							# if there's a blank line separator, that's good. if there isn't,
984							# it's a body line, so drop through.
985	44	100				113	if (/^\r?$/) {
986	43					55	next;
987							}
988							else {
989	1					9	$self->{'missing_mime_head_body_separator'} = 1;
990							}
991							}
992							}
993
994							# we run into a perl bug if the lines are astronomically long (probably
995							# due to lots of regexp backtracking); so split any individual line
996							# over MAX_BODY_LINE_LENGTH bytes in length. This can wreck HTML
997							# totally -- but IMHO the only reason a luser would use
998							# MAX_BODY_LINE_LENGTH-byte lines is to crash filters, anyway.
999	2085					2448	while (length ($_) > MAX_BODY_LINE_LENGTH) {
1000	0					0	push (@{$part_array}, substr($_, 0, MAX_BODY_LINE_LENGTH)."\n");
	0					0
1001	0					0	substr($_, 0, MAX_BODY_LINE_LENGTH) = '';
1002							}
1003	2085					1731	push ( @{$part_array}, $_ );
	2085					2764
1004							}
1005
1006							# Look for a message epilogue
1007							# originally ignored whitespace: 0.185 0.2037 0.0654 0.757 0.00 0.00 TVD_TAB
1008							# ham FPs were all "." on a line by itself.
1009							# spams seem to only have NULL chars afterwards ?
1010	20	100				95	if ($line_count) {
1011	8					24	for(; $line_count > 0; $line_count--) {
1012	15	100				58	if ($body->[-$line_count] =~ /[^\s.]/) {
1013	2					6	$self->{mime_epilogue_exists} = 1;
1014	2					9	last;
1015							}
1016							}
1017							}
1018
1019							}
1020
1021							=item _parse_normal()
1022
1023							Generate a leaf node and add it to the parent.
1024
1025							=cut
1026
1027							sub _parse_normal {
1028	153			153		511	my($self, $toparse) = @_;
1029
1030	153					308	my ($msg, $boundary, $body) = @{$toparse};
	153					444
1031
1032	153					501	dbg("message: parsing normal part");
1033
1034							# 0: content-type, 1: boundary, 2: charset, 3: filename
1035	153					606	my @ct = Mail::SpamAssassin::Util::parse_content_type($msg->header('content-type'));
1036
1037							# multipart sections are required to have a boundary set ... If this
1038							# one doesn't, assume it's malformed and revert to text/plain
1039	153	50	33			1090	$msg->{'type'} = ($ct[0] !~ m@^multipart/@i \|\| defined $boundary ) ? $ct[0] : 'text/plain';
1040	153					720	$msg->{'charset'} = $ct[2];
1041
1042							# attempt to figure out a name for this attachment if there is one ...
1043	153		100			667	my $disp = $msg->header('content-disposition') \|\| '';
1044	153	100				718	if ($disp =~ /name=\s*"?([^";]+)"?/i) {
		100
1045	1					4	$msg->{'name'} = $1;
1046							}
1047							elsif ($ct[3]) {
1048	2					6	$msg->{'name'} = $ct[3];
1049							}
1050	153	100				510	if ($msg->{'name'}) {
1051	3					38	$msg->{'name'} = Encode::decode("MIME-Header", $msg->{'name'});
1052							}
1053
1054	153					6102	$msg->{'boundary'} = $boundary;
1055
1056							# If the part type is not one that we're likely to want to use, go
1057							# ahead and write the part data out to a temp file -- why keep sucking
1058							# up RAM with something we're not going to use?
1059							#
1060	153	100				1145	if ($msg->{'type'} !~ m@^(?:text/(?:plain\|html)$\|message\b)@) {
1061	20					28	my($filepath, $fh);
1062							eval {
1063	20					42	($filepath, $fh) = Mail::SpamAssassin::Util::secure_tmpfile(); 1;
	20					46
1064	20	50				23	} or do {
1065	0	0				0	my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
	0					0
1066	0					0	info("message: failed to create a temp file: %s", $eval_stat);
1067							};
1068	20	50				33	if ($fh) {
1069							# The temp file was created, add it to the list of pending deletions
1070							# we cannot just delete immediately in the POSIX idiom, as this is
1071							# unportable (to win32 at least)
1072	20					18	push @{$self->{tmpfiles}}, $filepath;
	20					49
1073	20					44	dbg("message: storing a message part to file %s", $filepath);
1074	20	50				20	$fh->print(@{$body}) or die "error writing to $filepath: $!";
	20					79
1075	20	50				884	$fh->flush or die "error writing (flush) to $filepath: $!";
1076	20					91	$msg->{'raw'} = $fh;
1077							}
1078							}
1079
1080							# if the part didn't get a temp file, go ahead and store the data in memory
1081	153	100				598	if (!defined $msg->{'raw'}) {
1082	133					498	dbg("message: storing a body to memory");
1083	133					722	$msg->{'raw'} = $body;
1084							}
1085							}
1086
1087							# ---------------------------------------------------------------------------
1088
1089							sub get_mimepart_digests {
1090	0			0	0	0	my ($self) = @_;
1091
1092	0	0				0	if (!exists $self->{mimepart_digests}) {
1093							# traverse all parts which are leaves, recursively
1094							$self->{mimepart_digests} =
1095	0		0			0	[ map(sha1_hex($_->decode) . ':' . lc($_->{type}\|\|''),
1096							$self->find_parts(qr/^/,1,1)) ];
1097							}
1098	0					0	return $self->{mimepart_digests};
1099							}
1100
1101							# ---------------------------------------------------------------------------
1102
1103							# common code for get_rendered_body_text_array,
1104							# get_visible_rendered_body_text_array, get_invisible_rendered_body_text_array
1105							#
1106							sub get_body_text_array_common {
1107	375			375	0	781	my ($self, $method_name) = @_;
1108
1109	375					863	my $key = 'text_' . $method_name;
1110	375	100				1019	if (exists $self->{$key}) { return $self->{$key} }
	247					781
1111
1112	128					538	$self->{$key} = [];
1113
1114							# Find all parts which are leaves
1115	128					1053	my @parts = $self->find_parts(qr/./,1);
1116	128	50				593	return $self->{$key} unless @parts;
1117
1118							# the html metadata may have already been set, so let's not bother if it's
1119							# already been done.
1120	128					425	my $html_needs_setting = !exists $self->{metadata}->{html};
1121
1122	128	100	100			794	my $text = $method_name eq 'invisible_rendered' ? ''
1123							: ($self->get_header('subject') \|\| "\n");
1124
1125							# Go through each part
1126	128					645	for (my $pt = 0 ; $pt <= $#parts ; $pt++ ) {
1127	129					293	my $p = $parts[$pt];
1128
1129							# put a blank line between parts ...
1130	129	100				480	$text .= "\n" if $text ne '';
1131
1132	129					1099	my($type, $rnd) = $p->$method_name(); # decode this part
1133	129	50				495	if ( defined $rnd ) {
1134							# Only text/* types are rendered ...
1135	129					770	$text .= $rnd;
1136
1137							# TVD - if there are multiple parts, what should we do?
1138							# right now, just use the last one. we may need to give some priority
1139							# at some point, ie: use text/html rendered if it exists, or
1140							# text/plain rendered as html otherwise.
1141	129	100	100			1067	if ($html_needs_setting && $type eq 'text/html') {
1142	7					53	$self->{metadata}->{html} = $p->{html_results};
1143							}
1144							}
1145							}
1146
1147							# whitespace handling (warning: small changes have large effects!)
1148	128					1892	$text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed
1149							# $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace (incl. VT, NBSP) => space
1150	128					751	$text =~ tr/ \t\n\r\x0b/ /s; # whitespace (incl. VT) => space
1151	128					418	$text =~ tr/\f/\n/; # form feeds => newline
1152
1153	128					617	my @textary = split_into_array_of_short_lines($text);
1154	128					544	$self->{$key} = \@textary;
1155
1156	128					555	return $self->{$key};
1157							}
1158
1159							# ---------------------------------------------------------------------------
1160
1161							sub get_rendered_body_text_array {
1162	327			327	0	680	my ($self) = @_;
1163	327					911	return $self->get_body_text_array_common('rendered');
1164							}
1165
1166							sub get_visible_rendered_body_text_array {
1167	24			24	0	58	my ($self) = @_;
1168	24					122	return $self->get_body_text_array_common('visible_rendered');
1169							}
1170
1171							sub get_invisible_rendered_body_text_array {
1172	24			24	0	56	my ($self) = @_;
1173	24					60	return $self->get_body_text_array_common('invisible_rendered');
1174							}
1175
1176							# ---------------------------------------------------------------------------
1177
1178							sub get_decoded_body_text_array {
1179	98			98	0	257	my ($self) = @_;
1180
1181	98	100				461	if (defined $self->{text_decoded}) { return $self->{text_decoded}; }
	2					17
1182	96					468	$self->{text_decoded} = [ ];
1183
1184							# Find all parts which are leaves
1185	96					614	my @parts = $self->find_parts(qr/^(?:text\|message)\b/i,1);
1186	96	50				398	return $self->{text_decoded} unless @parts;
1187
1188							# Go through each part
1189	96					458	for(my $pt = 0 ; $pt <= $#parts ; $pt++ ) {
1190							# bug 4843: skip text/calendar parts since they're usually an attachment
1191							# and not displayed
1192	96	50				369	next if ($parts[$pt]->{'type'} eq 'text/calendar');
1193
1194	96					193	push(@{$self->{text_decoded}},
	96					405
1195							split_into_array_of_short_paragraphs($parts[$pt]->decode()));
1196							}
1197
1198	96					349	return $self->{text_decoded};
1199							}
1200
1201							# ---------------------------------------------------------------------------
1202
1203							sub split_into_array_of_short_lines {
1204	128			128	0	264	my @result;
1205	128					877	foreach my $line (split (/^/m, $_[0])) {
1206	488					1063	while (length ($line) > MAX_BODY_LINE_LENGTH) {
1207							# try splitting "nicely" so that we don't chop a url in half or
1208							# something. if there's no space, then just split at max length.
1209	4					10	my $length = rindex($line, ' ', MAX_BODY_LINE_LENGTH) + 1;
1210	4		50			8	$length \|\|= MAX_BODY_LINE_LENGTH;
1211	4					21	push (@result, substr($line, 0, $length, ''));
1212							}
1213	488					957	push (@result, $line);
1214							}
1215	128					555	@result;
1216							}
1217
1218							# ---------------------------------------------------------------------------
1219
1220							# split a text into array of paragraphs of sizes between
1221							# $chunk_size and 2 * $chunk_size, returning the resulting array
1222
1223							sub split_into_array_of_short_paragraphs {
1224	96			96	0	279	my @result;
1225	96					218	my $chunk_size = 1024;
1226	96					239	my $text_l = length($_[0]);
1227	96					183	my($j,$ofs);
1228	96					488	for ($ofs = 0; $text_l - $ofs > 2 * $chunk_size; $ofs = $j+1) {
1229	8					32	$j = index($_[0], "\n", $ofs+$chunk_size);
1230	8	50				30	if ($j < 0) {
1231	0					0	$j = index($_[0], " ", $ofs+$chunk_size);
1232	0	0				0	if ($j < 0) { $j = $ofs+$chunk_size }
	0					0
1233							}
1234	8					45	push(@result, substr($_[0], $ofs, $j-$ofs+1));
1235							}
1236	96	100				598	push(@result, substr($_[0], $ofs)) if $ofs < $text_l;
1237	96					507	@result;
1238							}
1239
1240							# ---------------------------------------------------------------------------
1241
1242							1;
1243
1244							=back
1245
1246							=cut