File Coverage

blib/lib/MIME/Parser.pm
Criterion Covered Total %
statement 391 446 87.6
branch 129 198 65.1
condition 25 40 62.5
subroutine 51 62 82.2
pod 32 46 69.5
total 628 792 79.2


line stmt bran cond sub pod time code
1             package MIME::Parser;
2              
3              
4             =head1 NAME
5              
6             MIME::Parser - experimental class for parsing MIME streams
7              
8              
9             =head1 SYNOPSIS
10              
11             Before reading further, you should see L to make sure that
12             you understand where this module fits into the grand scheme of things.
13             Go on, do it now. I'll wait.
14              
15             Ready? Ok...
16              
17             =head2 Basic usage examples
18              
19             ### Create a new parser object:
20             my $parser = new MIME::Parser;
21              
22             ### Tell it where to put things:
23             $parser->output_under("/tmp");
24              
25             ### Parse an input filehandle:
26             $entity = $parser->parse(\*STDIN);
27              
28             ### Congratulations: you now have a (possibly multipart) MIME entity!
29             $entity->dump_skeleton; # for debugging
30              
31              
32             =head2 Examples of input
33              
34             ### Parse from filehandles:
35             $entity = $parser->parse(\*STDIN);
36             $entity = $parser->parse(IO::File->new("some command|");
37              
38             ### Parse from any object that supports getline() and read():
39             $entity = $parser->parse($myHandle);
40              
41             ### Parse an in-core MIME message:
42             $entity = $parser->parse_data($message);
43              
44             ### Parse an MIME message in a file:
45             $entity = $parser->parse_open("/some/file.msg");
46              
47             ### Parse an MIME message out of a pipeline:
48             $entity = $parser->parse_open("gunzip - < file.msg.gz |");
49              
50             ### Parse already-split input (as "deliver" would give it to you):
51             $entity = $parser->parse_two("msg.head", "msg.body");
52              
53              
54             =head2 Examples of output control
55              
56             ### Keep parsed message bodies in core (default outputs to disk):
57             $parser->output_to_core(1);
58              
59             ### Output each message body to a one-per-message directory:
60             $parser->output_under("/tmp");
61              
62             ### Output each message body to the same directory:
63             $parser->output_dir("/tmp");
64              
65             ### Change how nameless message-component files are named:
66             $parser->output_prefix("msg");
67              
68             ### Put temporary files somewhere else
69             $parser->tmp_dir("/var/tmp/mytmpdir");
70              
71             =head2 Examples of error recovery
72              
73             ### Normal mechanism:
74             eval { $entity = $parser->parse(\*STDIN) };
75             if ($@) {
76             $results = $parser->results;
77             $decapitated = $parser->last_head; ### get last top-level head
78             }
79              
80             ### Ultra-tolerant mechanism:
81             $parser->ignore_errors(1);
82             $entity = eval { $parser->parse(\*STDIN) };
83             $error = ($@ || $parser->last_error);
84              
85             ### Cleanup all files created by the parse:
86             eval { $entity = $parser->parse(\*STDIN) };
87             ...
88             $parser->filer->purge;
89              
90              
91             =head2 Examples of parser options
92              
93             ### Automatically attempt to RFC 2047-decode the MIME headers?
94             $parser->decode_headers(1); ### default is false
95              
96             ### Parse contained "message/rfc822" objects as nested MIME streams?
97             $parser->extract_nested_messages(0); ### default is true
98              
99             ### Look for uuencode in "text" messages, and extract it?
100             $parser->extract_uuencode(1); ### default is false
101              
102             ### Should we forgive normally-fatal errors?
103             $parser->ignore_errors(0); ### default is true
104              
105              
106             =head2 Miscellaneous examples
107              
108             ### Convert a Mail::Internet object to a MIME::Entity:
109             my $data = join('', (@{$mail->header}, "\n", @{$mail->body}));
110             $entity = $parser->parse_data(\$data);
111              
112              
113              
114             =head1 DESCRIPTION
115              
116             You can inherit from this class to create your own subclasses
117             that parse MIME streams into MIME::Entity objects.
118              
119              
120             =head1 PUBLIC INTERFACE
121              
122             =cut
123              
124             #------------------------------
125              
126             require 5.004;
127              
128             ### Pragmas:
129 17     17   416004 use strict;
  17         53  
  17         531  
130 17     17   64 use vars (qw($VERSION $CAT $CRLF));
  17         22  
  17         846  
131              
132             ### core Perl modules
133 17     17   4912 use IO::File;
  17         67579  
  17         1887  
134 17     17   97 use File::Spec;
  17         19  
  17         311  
135 17     17   57 use File::Path;
  17         21  
  17         779  
136 17     17   67 use Config qw(%Config);
  17         21  
  17         548  
137 17     17   63 use Carp;
  17         20  
  17         787  
138              
139             ### Kit modules:
140 17     17   4604 use MIME::Tools qw(:config :utils :msgtypes usage tmpopen );
  17         31  
  17         2966  
141 17     17   6229 use MIME::Head;
  17         40  
  17         483  
142 17     17   5841 use MIME::Body;
  17         32  
  17         345  
143 17     17   7982 use MIME::Entity;
  17         44  
  17         494  
144 17     17   90 use MIME::Decoder;
  17         22  
  17         285  
145 17     17   7198 use MIME::Parser::Reader;
  17         32  
  17         474  
146 17     17   7121 use MIME::Parser::Filer;
  17         39  
  17         430  
147 17     17   6327 use MIME::Parser::Results;
  17         31  
  17         67135  
148              
149             #------------------------------
150             #
151             # Globals
152             #
153             #------------------------------
154              
155             ### The package version, both in 1.23 style *and* usable by MakeMaker:
156             $VERSION = "5.509";
157              
158             ### How to catenate:
159             $CAT = '/bin/cat';
160              
161             ### The CRLF sequence:
162             $CRLF = "\015\012";
163              
164             ### Who am I?
165             my $ME = 'MIME::Parser';
166              
167              
168              
169             #------------------------------------------------------------
170              
171             =head2 Construction
172              
173             =over 4
174              
175             =cut
176              
177             #------------------------------
178              
179             =item new ARGS...
180              
181             I
182             Create a new parser object.
183             Once you do this, you can then set up various parameters
184             before doing the actual parsing. For example:
185              
186             my $parser = new MIME::Parser;
187             $parser->output_dir("/tmp");
188             $parser->output_prefix("msg1");
189             my $entity = $parser->parse(\*STDIN);
190              
191             Any arguments are passed into C.
192             Don't override this in your subclasses; override init() instead.
193              
194             =cut
195              
196             sub new {
197 43     43 1 13212 my $self = bless {}, shift;
198 43         174 $self->init(@_);
199             }
200              
201             #------------------------------
202              
203             =item init ARGS...
204              
205             I
206             Initiallize a new MIME::Parser object.
207             This is automatically sent to a new object; you may want to override it.
208             If you override this, be sure to invoke the inherited method.
209              
210             =cut
211              
212             sub init {
213 43     43 1 70 my $self = shift;
214              
215 43         195 $self->{MP5_DecodeHeaders} = 0;
216 43         70 $self->{MP5_DecodeBodies} = 1;
217 43         88 $self->{MP5_Interface} = {};
218 43         81 $self->{MP5_ParseNested} = 'NEST';
219 43         54 $self->{MP5_TmpToCore} = 0;
220 43         64 $self->{MP5_IgnoreErrors} = 1;
221 43         58 $self->{MP5_UUDecode} = 0;
222 43         99 $self->{MP5_MaxParts} = -1;
223 43         74 $self->{MP5_TmpDir} = undef;
224              
225 43         143 $self->interface(ENTITY_CLASS => 'MIME::Entity');
226 43         94 $self->interface(HEAD_CLASS => 'MIME::Head');
227              
228 43         130 $self->output_dir(".");
229              
230 43         93 $self;
231             }
232              
233             #------------------------------
234              
235             =item init_parse
236              
237             I
238             Invoked automatically whenever one of the top-level parse() methods
239             is called, to reset the parser to a "ready" state.
240              
241             =cut
242              
243             sub init_parse {
244 54     54 1 82 my $self = shift;
245              
246 54         350 $self->{MP5_Results} = new MIME::Parser::Results;
247              
248 54         181 $self->{MP5_Filer}->results($self->{MP5_Results});
249 54         214 $self->{MP5_Filer}->purgeable([]);
250 54         190 $self->{MP5_Filer}->init_parse();
251 54         81 $self->{MP5_NumParts} = 0;
252 54         71 1;
253             }
254              
255             =back
256              
257             =cut
258              
259              
260              
261              
262              
263             #------------------------------------------------------------
264              
265             =head2 Altering how messages are parsed
266              
267             =over 4
268              
269             =cut
270              
271             #------------------------------
272              
273             =item decode_headers [YESNO]
274              
275             I
276             Controls whether the parser will attempt to decode all the MIME headers
277             (as per RFC 2047) the moment it sees them. B
278             for two very important reasons:>
279              
280             =over
281              
282             =item *
283              
284             B
285             If you fully decode the headers into bytes, you can inadvertently
286             transform a parseable MIME header like this:
287              
288             Content-type: text/plain; filename="=?ISO-8859-1?Q?Hi=22Ho?="
289              
290             into unparseable gobbledygook; in this case:
291              
292             Content-type: text/plain; filename="Hi"Ho"
293              
294             =item *
295              
296             B An encoded string which contains
297             both Latin-1 and Cyrillic characters will be turned into a binary
298             mishmosh which simply can't be rendered.
299              
300             =back
301              
302             B
303             This method was once the only out-of-the-box way to deal with attachments
304             whose filenames had non-ASCII characters. However, since MIME-tools 5.4xx
305             this is no longer necessary.
306              
307             B
308             If YESNO is true, decoding is done. However, you will get a warning
309             unless you use one of the special "true" values:
310              
311             "I_NEED_TO_FIX_THIS"
312             Just shut up and do it. Not recommended.
313             Provided only for those who need to keep old scripts functioning.
314              
315             "I_KNOW_WHAT_I_AM_DOING"
316             Just shut up and do it. Not recommended.
317             Provided for those who REALLY know what they are doing.
318              
319             If YESNO is false (the default), no attempt at decoding will be done.
320             With no argument, just returns the current setting.
321             B you can always decode the headers I the parsing
322             has completed (see L), or
323             decode the words on demand (see L).
324              
325             =cut
326              
327             sub decode_headers {
328 0     0 1 0 my ($self, $yesno) = @_;
329 0 0       0 if (@_ > 1) {
330 0         0 $self->{MP5_DecodeHeaders} = $yesno;
331 0 0       0 if ($yesno) {
332 0 0 0     0 if (($yesno eq "I_KNOW_WHAT_I_AM_DOING") ||
333             ($yesno eq "I_NEED_TO_FIX_THIS")) {
334             ### ok
335             }
336             else {
337 0         0 $self->whine("as of 5.4xx, decode_headers() should NOT be ".
338             "set true... if you are doing this to make sure ".
339             "that non-ASCII filenames are translated, ".
340             "that's now done automatically; for all else, ".
341             "use MIME::Words.");
342             }
343             }
344             }
345 0         0 $self->{MP5_DecodeHeaders};
346             }
347              
348             #------------------------------
349              
350             =item extract_nested_messages OPTION
351              
352             I
353             Some MIME messages will contain a part of type C
354             ,C or C:
355             literally, the text of an embedded mail/news/whatever message.
356             This option controls whether (and how) we parse that embedded message.
357              
358             If the OPTION is false, we treat such a message just as if it were a
359             C document, without attempting to decode its contents.
360              
361             If the OPTION is true (the default), the body of the C
362             or C part is parsed by this parser, creating an
363             entity object. What happens then is determined by the actual OPTION:
364              
365             =over 4
366              
367             =item NEST or 1
368              
369             The default setting.
370             The contained message becomes the sole "part" of the C
371             entity (as if the containing message were a special kind of
372             "multipart" message).
373             You can recover the sub-entity by invoking the L
374             method on the C entity.
375              
376             =item REPLACE
377              
378             The contained message replaces the C entity, as though
379             the C "container" never existed.
380              
381             B notice that, with this option, all the header information
382             in the C header is lost. This might seriously bother
383             you if you're dealing with a top-level message, and you've just lost
384             the sender's address and the subject line. C<:-/>.
385              
386             =back
387              
388             I
389              
390             =cut
391              
392             sub extract_nested_messages {
393 51     51 1 140 my ($self, $option) = @_;
394 51 100       122 $self->{MP5_ParseNested} = $option if (@_ > 1);
395 51         132 $self->{MP5_ParseNested};
396             }
397              
398             sub parse_nested_messages {
399 0     0 0 0 usage "parse_nested_messages() is now extract_nested_messages()";
400 0         0 shift->extract_nested_messages(@_);
401             }
402              
403             #------------------------------
404              
405             =item extract_uuencode [YESNO]
406              
407             I
408             If set true, then whenever we are confronted with a message
409             whose effective content-type is "text/plain" and whose encoding
410             is 7bit/8bit/binary, we scan the encoded body to see if it contains
411             uuencoded data (generally given away by a "begin XXX" line).
412              
413             If it does, we explode the uuencoded message into a multipart,
414             where the text before the first "begin XXX" becomes the first part,
415             and all "begin...end" sections following become the subsequent parts.
416             The filename (if given) is accessible through the normal means.
417              
418             =cut
419              
420             sub extract_uuencode {
421 259     259 1 363 my ($self, $yesno) = @_;
422 259 100       431 $self->{MP5_UUDecode} = $yesno if @_ > 1;
423 259         598 $self->{MP5_UUDecode};
424             }
425              
426             #------------------------------
427              
428             =item ignore_errors [YESNO]
429              
430             I
431             Controls whether the parser will attempt to ignore normally-fatal
432             errors, treating them as warnings and continuing with the parse.
433              
434             If YESNO is true (the default), many syntax errors are tolerated.
435             If YESNO is false, fatal errors throw exceptions.
436             With no argument, just returns the current setting.
437              
438             =cut
439              
440             sub ignore_errors {
441 25     25 1 84 my ($self, $yesno) = @_;
442 25 50       62 $self->{MP5_IgnoreErrors} = $yesno if (@_ > 1);
443 25         32 $self->{MP5_IgnoreErrors};
444             }
445              
446              
447             #------------------------------
448              
449             =item decode_bodies [YESNO]
450              
451             I
452             Controls whether the parser should decode entity bodies or not.
453             If this is set to a false value (default is true), all entity bodies
454             will be kept as-is in the original content-transfer encoding.
455              
456             To prevent double encoding on the output side MIME::Body->is_encoded
457             is set, which tells MIME::Body not to encode the data again, if encoded
458             data was requested. This is in particular useful, when it's important that
459             the content B be modified, e.g. if you want to calculate
460             OpenPGP signatures from it.
461              
462             B: the semantics change significantly if you parse MIME
463             messages with this option set, because MIME::Entity resp. MIME::Body
464             *always* see encoded data now, while the default behaviour is
465             working with *decoded* data (and encoding it only if you request it).
466             You need to decode the data yourself, if you want to have it decoded.
467              
468             So use this option only if you exactly know, what you're doing, and
469             that you're sure, that you really need it.
470              
471             =cut
472              
473             sub decode_bodies {
474 262     262 1 477 my ($self, $yesno) = @_;
475 262 100       744 $self->{MP5_DecodeBodies} = $yesno if (@_ > 1);
476 262         681 $self->{MP5_DecodeBodies};
477             }
478              
479             #------------------------------
480             #
481             # MESSAGES...
482             #
483              
484             #------------------------------
485             #
486             # debug MESSAGE...
487             #
488             sub debug {
489 1265     1265 0 1013 my $self = shift;
490 1265 50       3216 if (MIME::Tools->debugging()) {
491 0 0       0 if (my $r = $self->{MP5_Results}) {
492 0         0 unshift @_, $r->indent;
493 0         0 $r->msg($M_DEBUG, @_);
494             }
495 0         0 MIME::Tools::debug(@_);
496             }
497             }
498              
499             #------------------------------
500             #
501             # whine PROBLEM...
502             #
503             sub whine {
504 6     6 0 7 my $self = shift;
505 6 50       17 if (my $r = $self->{MP5_Results}) {
506 6         37 unshift @_, $r->indent;
507 6         33 $r->msg($M_WARNING, @_);
508             }
509 6         26 &MIME::Tools::whine(@_);
510             }
511              
512             #------------------------------
513             #
514             # error PROBLEM...
515             #
516             # Possibly-forgivable parse error occurred.
517             # Raises a fatal exception unless we are ignoring errors.
518             #
519             sub error {
520 5     5 1 7 my $self = shift;
521 5 50       14 if (my $r = $self->{MP5_Results}) {
522 5         17 unshift @_, $r->indent;
523 5         15 $r->msg($M_ERROR, @_);
524             }
525 5         15 &MIME::Tools::error(@_);
526 5 50       16 $self->{MP5_IgnoreErrors} ? return undef : die @_;
527             }
528              
529              
530              
531              
532             #------------------------------
533             #
534             # PARSING...
535             #
536              
537             #------------------------------
538             #
539             # process_preamble IN, READER, ENTITY
540             #
541             # I
542             # Dispose of a multipart message's preamble.
543             #
544             sub process_preamble {
545 40     40 0 60 my ($self, $in, $rdr, $ent) = @_;
546              
547             ### Sanity:
548 40 50       113 ($rdr->depth > 0) or die "$ME: internal logic error";
549              
550             ### Parse preamble:
551 40         49 my @saved;
552 40         57 my $data = '';
553 40 50       411 open(my $fh, '>', \$data) or die $!;
554 40         131 $rdr->read_chunk($in, $fh, 1);
555 40         64 close $fh;
556              
557             # Ugh. Horrible. If the preamble consists only of CRLF, squash it down
558             # to the empty string. Else, remove the trailing CRLF.
559 40 100       108 if( $data =~ m/^[\r\n]\z/ ) {
560 2         5 @saved = ('');
561             } else {
562 38         91 $data =~ s/[\r\n]\z//;
563 38         117 @saved = split(/^/, $data);
564             }
565 40         161 $ent->preamble(\@saved);
566 40         103 1;
567             }
568              
569             #------------------------------
570             #
571             # process_epilogue IN, READER, ENTITY
572             #
573             # I
574             # Dispose of a multipart message's epilogue.
575             #
576             sub process_epilogue {
577 38     38 0 63 my ($self, $in, $rdr, $ent) = @_;
578 38         70 $self->debug("process_epilogue");
579              
580             ### Parse epilogue:
581 38         39 my @saved;
582 38         976 $rdr->read_lines($in, \@saved);
583 38         127 $ent->epilogue(\@saved);
584 38         47 1;
585             }
586              
587             #------------------------------
588             #
589             # process_to_bound IN, READER, OUT
590             #
591             # I
592             # Dispose of the next chunk into the given output stream OUT.
593             #
594             sub process_to_bound {
595 97     97 0 120 my ($self, $in, $rdr, $out) = @_;
596              
597             ### Parse:
598 97         264 $rdr->read_chunk($in, $out);
599 97         99 1;
600             }
601              
602             #------------------------------
603             #
604             # process_header IN, READER
605             #
606             # I
607             # Process and return the next header.
608             # Return undef if, instead of a header, the encapsulation boundary is found.
609             # Fatal exception on failure.
610             #
611             sub process_header {
612 172     172 0 223 my ($self, $in, $rdr) = @_;
613 172         284 $self->debug("process_header");
614              
615             ### Parse and save the (possibly empty) header, up to and including the
616             ### blank line that terminates it:
617 172         263 my $head = $self->interface('HEAD_CLASS')->new;
618              
619             ### Read the lines of the header.
620             ### We localize IO inside here, so that we can support the IO:: interface
621 172         3160 my @headlines;
622 172         462 my $hdr_rdr = $rdr->spawn;
623 172         446 $hdr_rdr->add_terminator("");
624 172         297 $hdr_rdr->add_terminator("\r"); ### sigh
625              
626 172         166 my $headstr = '';
627 10 50   10   80 open(my $outfh, '>:scalar', \$headstr) or die $!;
  10         11  
  10         63  
  172         2069  
628 172         9448 $hdr_rdr->read_chunk($in, $outfh, 0, 1);
629 172         249 close $outfh;
630              
631             ### How did we do?
632 172 100       368 if ($hdr_rdr->eos_type eq 'DELIM') {
633 2         6 $self->whine("bogus part, without CRLF before body");
634 2         13 return undef;
635             }
636 170 100       327 ($hdr_rdr->eos_type eq 'DONE') or
637             $self->error("unexpected end of header\n");
638              
639             ### Extract the header (note that zero-size headers are admissible!):
640 170 50       1434 open(my $readfh, '<:scalar', \$headstr) or die $!;
641 170         520 $head->read( $readfh );
642              
643 170 50       50506 unless( $readfh->eof() ) {
644             # Not entirely correct, since ->read consumes the line it gives up on.
645             # it's actually the line /before/ the one we get with ->getline
646 0         0 $self->error("couldn't parse head; error near:\n", $readfh->getline());
647             }
648              
649              
650             ### If desired, auto-decode the header as per RFC 2047
651             ### This shouldn't affect non-encoded headers; however, it will decode
652             ### headers with international characters. WARNING: currently, the
653             ### character-set information is LOST after decoding.
654 170 50       1247 $head->decode($self->{MP5_DecodeHeaders}) if $self->{MP5_DecodeHeaders};
655              
656             ### If this is the top-level head, save it:
657 170 100       346 $self->results->top_head($head) if !$self->results->top_head;
658              
659 170         1364 return $head;
660             }
661              
662             #------------------------------
663             #
664             # process_multipart IN, READER, ENTITY
665             #
666             # I
667             # Process the multipart body, and return the state.
668             # Fatal exception on failure.
669             # Invoked by process_part().
670             #
671             sub process_multipart {
672 40     40 0 61 my ($self, $in, $rdr, $ent) = @_;
673 40         104 my $head = $ent->head;
674              
675 40         86 $self->debug("process_multipart...");
676              
677             ### Get actual type and subtype from the header:
678 40         157 my ($type, $subtype) = (split('/', $head->mime_type, -1), '');
679              
680             ### If this was a type "multipart/digest", then the RFCs say we
681             ### should default the parts to have type "message/rfc822".
682             ### Thanks to Carsten Heyl for suggesting this...
683 40 100       113 my $retype = (($subtype eq 'digest') ? 'message/rfc822' : '');
684              
685             ### Get the boundaries for the parts:
686 40         110 my $bound = $head->multipart_boundary;
687 40 50 33     251 if (!defined($bound) || ($bound =~ /[\r\n]/)) {
688 0         0 $self->error("multipart boundary is missing, or contains CR or LF\n");
689 0         0 $ent->effective_type("application/x-unparseable-multipart");
690 0         0 return $self->process_singlepart($in, $rdr, $ent);
691             }
692 40         132 my $part_rdr = $rdr->spawn->add_boundary($bound);
693              
694             ### Prepare to parse:
695 40         53 my $eos_type;
696             my $more_parts;
697              
698             ### Parse preamble...
699 40         125 $self->process_preamble($in, $part_rdr, $ent);
700              
701             ### ...and look at how we finished up:
702 40         91 $eos_type = $part_rdr->eos_type;
703 40 100       105 if ($eos_type eq 'DELIM'){ $more_parts = 1 }
  39 50       43  
704 1         3 elsif ($eos_type eq 'CLOSE'){ $self->whine("empty multipart message\n");
705 1         1 $more_parts = 0; }
706 0         0 else { $self->error("unexpected end of preamble\n");
707 0         0 return 1; }
708              
709             ### Parse parts:
710 40         51 my $partno = 0;
711 40         47 my $part;
712 40         94 while ($more_parts) {
713 105         117 ++$partno;
714 105         313 $self->debug("parsing part $partno...");
715              
716             ### Parse the next part, and add it to the entity...
717 105         291 my $part = $self->process_part($in, $part_rdr, Retype=>$retype);
718 105 50       217 return undef unless defined($part);
719              
720 105         294 $ent->add_part($part);
721              
722             ### ...and look at how we finished up:
723 105         263 $eos_type = $part_rdr->eos_type;
724 105 100       256 if ($eos_type eq 'DELIM') { $more_parts = 1 }
  66 100       135  
725 37         102 elsif ($eos_type eq 'CLOSE') { $more_parts = 0; }
726 2         7 else { $self->error("unexpected end of parts ".
727             "before epilogue\n");
728 2         14 return 1; }
729             }
730              
731             ### Parse epilogue...
732             ### (note that we use the *parent's* reader here, which does not
733             ### know about the boundaries in this multipart!)
734 38         111 $self->process_epilogue($in, $rdr, $ent);
735              
736             ### ...and there's no need to look at how we finished up!
737 38         227 1;
738             }
739              
740             #------------------------------
741             #
742             # process_singlepart IN, READER, ENTITY
743             #
744             # I
745             # Process the singlepart body. Returns true.
746             # Fatal exception on failure.
747             # Invoked by process_part().
748             #
749             sub process_singlepart {
750 117     117 0 135 my ($self, $in, $rdr, $ent) = @_;
751 117         253 my $head = $ent->head;
752              
753 117         207 $self->debug("process_singlepart...");
754              
755             ### Obtain a filehandle for reading the encoded information:
756             ### We have two different approaches, based on whether or not we
757             ### have to contend with boundaries.
758 117         97 my $ENCODED; ### handle
759 117   100     284 my $can_shortcut = (!$rdr->has_bounds and !$self->{MP5_UUDecode});
760 117 100       182 if ($can_shortcut) {
761 20         45 $self->debug("taking shortcut");
762              
763 20         19 $ENCODED = $in;
764 20         52 $rdr->eos('EOF'); ### be sure to bogus-up the reader state to EOF:
765             }
766             else {
767              
768 97         136 $self->debug("using temp file");
769 97         204 $ENCODED = $self->new_tmpfile();
770              
771             ### Read encoded body until boundary (or EOF)...
772 97         273 $self->process_to_bound($in, $rdr, $ENCODED);
773              
774             ### ...and look at how we finished up.
775             ### If we have bounds, we want DELIM or CLOSE.
776             ### Otherwise, we want EOF (and that's all we'd get, anyway!).
777 97 100       207 if ($rdr->has_bounds) {
778 96 100       187 ($rdr->eos_type =~ /^(DELIM|CLOSE)$/) or
779             $self->error("part did not end with expected boundary\n");
780             }
781              
782             ### Flush and rewind encoded buffer, so we can read it:
783 97 50       3087 $ENCODED->flush or die "$ME: can't flush: $!";
784 97 50       437 $ENCODED->seek(0, 0) or die "$ME: can't seek: $!";
785             }
786              
787             ### Get a content-decoder to decode this part's encoding:
788 117         845 my $encoding = $head->mime_encoding;
789 117         608 my $decoder = new MIME::Decoder $encoding;
790 117 50       310 if (!$decoder) {
791 0         0 $self->whine("Unsupported encoding '$encoding': using 'binary'... \n".
792             "The entity will have an effective MIME type of \n".
793             "application/octet-stream."); ### as per RFC-2045
794 0         0 $ent->effective_type('application/octet-stream');
795 0         0 $decoder = new MIME::Decoder 'binary';
796 0         0 $encoding = 'binary';
797             }
798              
799             ### Data should be stored encoded / as-is?
800 117 100       256 if ( !$self->decode_bodies ) {
801 7         14 $decoder = new MIME::Decoder 'binary';
802 7         27 $encoding = 'binary';
803             }
804              
805             ### If desired, sidetrack to troll for UUENCODE:
806 117         272 $self->debug("extract uuencode? ", $self->extract_uuencode);
807 117         235 $self->debug("encoding? ", $encoding);
808 117         348 $self->debug("effective type? ", $ent->effective_type);
809              
810 117 50 66     235 if ($self->extract_uuencode and
      66        
811             ($encoding =~ /^(7bit|8bit|binary)\Z/) and
812             ($ent->effective_type =~
813             m{^(?:text/plain|application/mac-binhex40|application/mac-binhex)\Z})) {
814             ### Hunt for it:
815 3         3 my $uu_ent = eval { $self->hunt_for_uuencode($ENCODED, $ent) };
  3         8  
816 3 100       9 if ($uu_ent) { ### snark
817 2         8 %$ent = %$uu_ent;
818 2         16 return 1;
819             }
820             else { ### boojum
821 1         6 $self->whine("while hunting for uuencode: $@");
822 1 50       3 $ENCODED->seek(0,0) or die "$ME: can't seek: $!";
823             }
824             }
825              
826             ### Open a new bodyhandle for outputting the data:
827 115 50       277 my $body = $self->new_body_for($head) or die "$ME: no body"; # gotta die
828 115 100 50     302 $body->binmode(1) or die "$ME: can't set to binmode: $!"
      100        
829             unless textual_type($ent->effective_type) or !$self->decode_bodies;
830 115 100       246 $body->is_encoded(1) if !$self->decode_bodies;
831              
832             ### Decode and save the body (using the decoder):
833 115 50       361 my $DECODED = $body->open("w") or die "$ME: body not opened: $!";
834 115         1350 eval { $decoder->decode($ENCODED, $DECODED); };
  115         503  
835 115 50       233 $@ and $self->error($@);
836 115 50       331 $DECODED->close or die "$ME: can't close: $!";
837              
838             ### Success! Remember where we put stuff:
839 115         3612 $ent->bodyhandle($body);
840              
841             ### Done!
842 115         724 1;
843             }
844              
845             #------------------------------
846             #
847             # hunt_for_uuencode ENCODED, ENTITY
848             #
849             # I
850             # Try to detect and dispatch embedded uuencode as a fake multipart message.
851             # Returns new entity or undef.
852             #
853             sub hunt_for_uuencode {
854 3     3 0 5 my ($self, $ENCODED, $ent) = @_;
855 3         2 my ($good, $how_encoded);
856 3         3 local $_;
857 3         5 $self->debug("sniffing around for UUENCODE");
858              
859             ### Heuristic:
860 3 50       9 $ENCODED->seek(0,0) or die "$ME: can't seek: $!";
861 3         77 while (defined($_ = $ENCODED->getline)) {
862 18 100       318 if ($good = /^begin [0-7]{3}/) {
863 2         3 $how_encoded = 'uu';
864 2         3 last;
865             }
866 16 50       207 if ($good = /^\(This file must be converted with/i) {
867 0         0 $how_encoded = 'binhex';
868 0         0 last;
869             }
870             }
871 3 100       23 $good or do { $self->debug("no one made the cut"); return 0 };
  1         3  
  1         2  
872              
873             # If a decoder doesn't exist for this type, forget it!
874 2 50       9 my $decoder = MIME::Decoder->new(($how_encoded eq 'uu')?'x-uuencode'
875             :'binhex');
876 2 50       6 unless (defined($decoder)) {
877 0         0 $self->debug("No decoder for $how_encoded attachments");
878 0         0 return 0;
879             }
880              
881             ### New entity:
882 2         8 my $top_ent = $ent->dup; ### no data yet
883 2         9 $top_ent->make_multipart;
884 2         4 my @parts;
885              
886             ### Made the first cut; on to the real stuff:
887 2 50       6 $ENCODED->seek(0,0) or die "$ME: can't seek: $!";
888 2         21 $self->whine("Found a $how_encoded attachment");
889 2         2 my $pre;
890 2         2 while (1) {
891 6         10 my $bin_data = '';
892              
893             ### Try next part:
894 6         23 my $out = IO::File->new(\$bin_data, '>:');
895 6 100       207 eval { $decoder->decode($ENCODED, $out) }; last if $@;
  6         24  
  6         16  
896 4         18 my $preamble = $decoder->last_preamble;
897 4         11 my $filename = $decoder->last_filename;
898 4         9 my $mode = $decoder->last_mode;
899              
900             ### Get probable type:
901 4         5 my $type = 'application/octet-stream';
902 4   50     17 my ($ext) = $filename =~ /\.(\w+)\Z/; $ext = lc($ext || '');
  4         21  
903 4 50       19 if ($ext =~ /^(gif|jpe?g|xbm|xpm|png)\Z/) { $type = "image/$1" }
  4         10  
904              
905             ### If we got our first preamble, create the text portion:
906 4 100 66     50 if (@$preamble and
      66        
907             (grep /\S/, @$preamble) and
908             !@parts) {
909 2         6 my $txt_ent = $self->interface('ENTITY_CLASS')->new;
910              
911 2         8 MIME::Entity->build(Type => "text/plain",
912             Data => "");
913 2         4 $txt_ent->bodyhandle($self->new_body_for($txt_ent->head));
914 2 50       4 my $io = $txt_ent->bodyhandle->open("w") or die "$ME: can't create: $!";
915 2 50       8 $io->print(@$preamble) or die "$ME: can't print: $!";
916 2 50       25 $io->close or die "$ME: can't close: $!";
917 2         70 push @parts, $txt_ent;
918             }
919              
920             ### Create the attachment:
921             ### We use the x-unix-mode convention from "dtmail 1.2.1 SunOS 5.6".
922 4         4 if (1) {
923 4         29 my $bin_ent = MIME::Entity->build(Type=>$type,
924             Filename=>$filename,
925             Data=>"");
926 4         10 $bin_ent->head->mime_attr('Content-type.x-unix-mode' => "0$mode");
927 4         12 $bin_ent->bodyhandle($self->new_body_for($bin_ent->head));
928 4 50       9 $bin_ent->bodyhandle->binmode(1) or die "$ME: can't set to binmode: $!";
929 4 50       9 my $io = $bin_ent->bodyhandle->open("w") or die "$ME: can't create: $!";
930 4 50       10 $io->print($bin_data) or die "$ME: can't print: $!";
931 4 50       56 $io->close or die "$ME: can't close: $!";
932 4         192 push @parts, $bin_ent;
933             }
934             }
935              
936             ### Did we get anything?
937 2 50       7 @parts or return undef;
938             ### Set the parts and a nice preamble:
939 2         9 $top_ent->parts(\@parts);
940 2         9 $top_ent->preamble
941             (["The following is a multipart MIME message which was extracted\n",
942             "from a $how_encoded-encoded message.\n"]);
943 2         17 $top_ent;
944             }
945              
946             #------------------------------
947             #
948             # process_message IN, READER, ENTITY
949             #
950             # I
951             # Process the singlepart body, and return true.
952             # Fatal exception on failure.
953             # Invoked by process_part().
954             #
955             sub process_message {
956 13     13 0 17 my ($self, $in, $rdr, $ent) = @_;
957 13         30 my $head = $ent->head;
958              
959 13         26 $self->debug("process_message");
960              
961             ### Verify the encoding restrictions:
962 13         28 my $encoding = $head->mime_encoding;
963 13 50       85 if ($encoding !~ /^(7bit|8bit|binary)$/) {
964 0         0 $self->error("illegal encoding [$encoding] for MIME type ".
965             $head->mime_type."\n");
966 0         0 $encoding = 'binary';
967             }
968              
969             ### Parse the message:
970 13         797 my $msg = $self->process_part($in, $rdr);
971 13 50       41 return undef unless defined($msg);
972              
973             ### How to handle nested messages?
974 13 100       27 if ($self->extract_nested_messages eq 'REPLACE') {
975 1         6 %$ent = %$msg; ### shallow replace
976 1         3 %$msg = ();
977             }
978             else { ### "NEST" or generic 1:
979 12         37 $ent->bodyhandle(undef);
980 12         34 $ent->add_part($msg);
981             }
982 13         38 1;
983             }
984              
985             #------------------------------
986             #
987             # process_part IN, READER, [OPTSHASH...]
988             #
989             # I
990             # The real back-end engine.
991             # See the documentation up top for the overview of the algorithm.
992             # The OPTSHASH can contain:
993             #
994             # Retype => retype this part to the given content-type
995             #
996             # Return the entity.
997             # Fatal exception on failure. Returns undef if message to complex
998             #
999             sub process_part {
1000 172     172 0 349 my ($self, $in, $rdr, %p) = @_;
1001              
1002 172 50       1268 if ($self->{MP5_MaxParts} > 0) {
1003 0         0 $self->{MP5_NumParts}++;
1004 0 0       0 if ($self->{MP5_NumParts} > $self->{MP5_MaxParts}) {
1005             # Return UNDEF if msg too complex
1006 0         0 return undef;
1007             }
1008             }
1009              
1010 172   66     605 $rdr ||= MIME::Parser::Reader->new;
1011             #debug "process_part";
1012 172         304 $self->results->level(+1);
1013              
1014             ### Create a new entity:
1015 172         308 my $ent = $self->interface('ENTITY_CLASS')->new;
1016              
1017             ### Parse and add the header:
1018 172         369 my $head = $self->process_header($in, $rdr);
1019 172 100       353 if (not defined $head) {
1020 2         4 $self->debug("bogus empty part");
1021 2         4 $head = $self->interface('HEAD_CLASS')->new;
1022 2         40 $head->mime_type('text/plain');
1023 2         5 $ent->head($head);
1024 2         4 $ent->bodyhandle($self->new_body_for($head));
1025 2 50       4 $ent->bodyhandle->open("w")->close or die "$ME: can't close: $!";
1026 2         34 $self->results->level(-1);
1027 2         4 return $ent;
1028             }
1029 170         457 $ent->head($head);
1030              
1031             ### Tweak the content-type based on context from our parent...
1032             ### For example, multipart/digest messages default to type message/rfc822:
1033 170 100       356 $head->mime_type($p{Retype}) if $p{Retype};
1034              
1035             ### Get the MIME type and subtype:
1036 170         392 my ($type, $subtype) = (split('/', $head->mime_type, -1), '');
1037 170         541 $self->debug("type = $type, subtype = $subtype");
1038              
1039             ### Handle, according to the MIME type:
1040 170 100 66     1241 if ($type eq 'multipart') {
    100 33        
1041 40 50       122 return undef unless defined($self->process_multipart($in, $rdr, $ent));
1042             }
1043             elsif (("$type/$subtype" eq "message/rfc822" ||
1044             "$type/$subtype" eq "message/external-body" ||
1045             ("$type/$subtype" eq "message/partial" && defined($head->mime_attr("content-type.number")) && $head->mime_attr("content-type.number") == 1)) &&
1046             $self->extract_nested_messages) {
1047 13         24 $self->debug("attempting to process a nested message");
1048 13 50       36 return undef unless defined($self->process_message($in, $rdr, $ent));
1049             }
1050             else {
1051 117         288 $self->process_singlepart($in, $rdr, $ent);
1052             }
1053              
1054             ### Done (we hope!):
1055 170         19671 $self->results->level(-1);
1056 170         528 return $ent;
1057             }
1058              
1059              
1060              
1061             =back
1062              
1063             =head2 Parsing an input source
1064              
1065             =over 4
1066              
1067             =cut
1068              
1069             #------------------------------
1070              
1071             =item parse_data DATA
1072              
1073             I
1074             Parse a MIME message that's already in core. This internally creates an "in
1075             memory" filehandle on a Perl scalar value using PerlIO
1076              
1077             You may supply the DATA in any of a number of ways...
1078              
1079             =over 4
1080              
1081             =item *
1082              
1083             B which holds the message. A reference to this scalar will be used
1084             internally.
1085              
1086             =item *
1087              
1088             B which holds the message. This reference will be used
1089             internally.
1090              
1091             =item *
1092              
1093             B
1094              
1095             B The array is internally concatenated into a
1096             temporary string, and a reference to the new string is used internally.
1097              
1098             It is much more efficient to pass in a scalar reference, so please consider
1099             refactoring your code to use that interface instead. If you absolutely MUST
1100             pass an array, you may be better off using IO::ScalarArray in the calling code
1101             to generate a filehandle, and passing that filehandle to I
1102              
1103             =back
1104              
1105             Returns the parsed MIME::Entity on success.
1106              
1107             =cut
1108              
1109             sub parse_data {
1110 11     11 1 751 my ($self, $data) = @_;
1111              
1112 11 50       30 if (!defined($data)) {
1113 0         0 croak "parse_data: No data passed";
1114             }
1115              
1116             ### Get data as a scalar:
1117 11         19 my $io;
1118              
1119 11 100       31 if (! ref $data ) {
    100          
    50          
1120 9         68 $io = IO::File->new(\$data, '<:');
1121             } elsif( ref $data eq 'SCALAR' ) {
1122 1         6 $io = IO::File->new($data, '<:');
1123             } elsif( ref $data eq 'ARRAY' ) {
1124             # Passing arrays is deprecated now that we've nuked IO::ScalarArray
1125             # but for backwards compatibility we still support it by joining the
1126             # array lines to a scalar and doing scalar IO on it.
1127 1         3 my $tmp_data = join('', @$data);
1128 1         5 $io = IO::File->new(\$tmp_data, '<:');
1129             } else {
1130 0         0 croak "parse_data: wrong argument ref type: ", ref($data);
1131             }
1132              
1133 11 50       4109 if (!$io) {
1134 0         0 croak "parse_data: unable to open in-memory file handle";
1135             }
1136              
1137             ### Parse!
1138 11         33 return $self->parse($io);
1139             }
1140              
1141             #------------------------------
1142              
1143             =item parse INSTREAM
1144              
1145             I
1146             Takes a MIME-stream and splits it into its component entities.
1147              
1148             The INSTREAM can be given as an IO::File, a globref filehandle (like
1149             C<\*STDIN>), or as I blessed object conforming to the IO::
1150             interface (which minimally implements getline() and read()).
1151              
1152             Returns the parsed MIME::Entity on success.
1153             Throws exception on failure. If the message contained too many
1154             parts (as set by I), returns undef.
1155              
1156             =cut
1157              
1158             sub parse {
1159 54     54 1 2427 my $self = shift;
1160 54         80 my $in = shift;
1161 54         60 my $entity;
1162 54         217 local $/ = "\n"; ### just to be safe
1163              
1164 54         126 local $\ = undef; # CPAN ticket #71041
1165 54         156 $self->init_parse;
1166 54         179 $entity = $self->process_part($in, undef); ### parse!
1167              
1168 54         298 $entity;
1169             }
1170              
1171             ### Backcompat:
1172             sub read {
1173 0     0 1 0 shift->parse(@_);
1174             }
1175             sub parse_FH {
1176 0     0 0 0 shift->parse(@_);
1177             }
1178              
1179             #------------------------------
1180              
1181             =item parse_open EXPR
1182              
1183             I
1184             Convenience front-end onto C.
1185             Simply give this method any expression that may be sent as the second
1186             argument to open() to open a filehandle for reading.
1187              
1188             Returns the parsed MIME::Entity on success.
1189             Throws exception on failure.
1190              
1191             =cut
1192              
1193             sub parse_open {
1194 31     31 1 5021 my ($self, $expr) = @_;
1195 31         46 my $ent;
1196              
1197 31 50       193 my $io = IO::File->new($expr) or die "$ME: couldn't open $expr: $!";
1198 31         1997 $ent = $self->parse($io);
1199 31 50       90 $io->close or die "$ME: can't close: $!";
1200 31         446 $ent;
1201             }
1202              
1203             ### Backcompat:
1204             sub parse_in {
1205 0     0 0 0 usage "parse_in() is now parse_open()";
1206 0         0 shift->parse_open(@_);
1207             }
1208              
1209             #------------------------------
1210              
1211             =item parse_two HEADFILE, BODYFILE
1212              
1213             I
1214             Convenience front-end onto C, intended for programs
1215             running under mail-handlers like B, which splits the incoming
1216             mail message into a header file and a body file.
1217             Simply give this method the paths to the respective files.
1218              
1219             B it is assumed that, once the files are cat'ed together,
1220             there will be a blank line separating the head part and the body part.
1221              
1222             B new implementation slurps files into line array
1223             for portability, instead of using 'cat'. May be an issue if
1224             your messages are large.
1225              
1226             Returns the parsed MIME::Entity on success.
1227             Throws exception on failure.
1228              
1229             =cut
1230              
1231             sub parse_two {
1232 1     1 1 5 my ($self, $headfile, $bodyfile) = @_;
1233 1         2 my $data;
1234 1         2 foreach ($headfile, $bodyfile) {
1235 2 50       54 open IN, "<$_" or die "$ME: open $_: $!";
1236 2         3 $data .= do { local $/; };
  2         5  
  2         23  
1237 2 50       14 close IN or die "$ME: can't close: $!";
1238             }
1239 1         3 return $self->parse_data($data);
1240             }
1241              
1242             =back
1243              
1244             =cut
1245              
1246              
1247              
1248              
1249             #------------------------------------------------------------
1250              
1251             =head2 Specifying output destination
1252              
1253             B in 5.212 and before, this was done by methods
1254             of MIME::Parser. However, since many users have requested
1255             fine-tuned control over how this is done, the logic has been split
1256             off from the parser into its own class, MIME::Parser::Filer
1257             Every MIME::Parser maintains an instance of a MIME::Parser::Filer
1258             subclass to manage disk output (see L for details.)
1259              
1260             The benefit to this is that the MIME::Parser code won't be
1261             confounded with a lot of garbage related to disk output.
1262             The drawback is that the way you override the default behavior
1263             will change.
1264              
1265             For now, all the normal public-interface methods are still provided,
1266             but many are only stubs which create or delegate to the underlying
1267             MIME::Parser::Filer object.
1268              
1269             =over 4
1270              
1271             =cut
1272              
1273             #------------------------------
1274              
1275             =item filer [FILER]
1276              
1277             I
1278             Get/set the FILER object used to manage the output of files to disk.
1279             This will be some subclass of L.
1280              
1281             =cut
1282              
1283             sub filer {
1284 321     321 1 11787 my ($self, $filer) = @_;
1285 321 100       580 if (@_ > 1) {
1286 76         117 $self->{MP5_Filer} = $filer;
1287 76         355 $filer->results($self->results); ### but we still need in init_parse
1288             }
1289 321         914 $self->{MP5_Filer};
1290             }
1291              
1292             #------------------------------
1293              
1294             =item output_dir DIRECTORY
1295              
1296             I
1297             Causes messages to be filed directly into the given DIRECTORY.
1298             It does this by setting the underlying L to
1299             a new instance of MIME::Parser::FileInto, and passing the arguments
1300             into that class' new() method.
1301              
1302             B Since this method replaces the underlying
1303             filer, you must invoke it I doing changing any attributes
1304             of the filer, like the output prefix; otherwise those changes
1305             will be lost.
1306              
1307             =cut
1308              
1309             sub output_dir {
1310 79     79 1 224 my ($self, @init) = @_;
1311 79 100       181 if (@_ > 1) {
1312 73         437 $self->filer(MIME::Parser::FileInto->new(@init));
1313             }
1314             else {
1315 6         13 &MIME::Tools::whine("0-arg form of output_dir is deprecated.");
1316 6         10 return $self->filer->output_dir;
1317             }
1318             }
1319              
1320             #------------------------------
1321              
1322             =item output_under BASEDIR, OPTS...
1323              
1324             I
1325             Causes messages to be filed directly into subdirectories of the given
1326             BASEDIR, one subdirectory per message. It does this by setting the
1327             underlying L to a new instance of MIME::Parser::FileUnder,
1328             and passing the arguments into that class' new() method.
1329              
1330             B Since this method replaces the underlying
1331             filer, you must invoke it I doing changing any attributes
1332             of the filer, like the output prefix; otherwise those changes
1333             will be lost.
1334              
1335             =cut
1336              
1337             sub output_under {
1338 3     3 1 15 my ($self, @init) = @_;
1339 3 50       8 if (@_ > 1) {
1340 3         24 $self->filer(MIME::Parser::FileUnder->new(@init));
1341             }
1342             else {
1343 0         0 &MIME::Tools::whine("0-arg form of output_under is deprecated.");
1344 0         0 return $self->filer->output_dir;
1345             }
1346             }
1347              
1348             #------------------------------
1349              
1350             =item output_path HEAD
1351              
1352             I
1353             Given a MIME head for a file to be extracted, come up with a good
1354             output pathname for the extracted file.
1355             Identical to the preferred form:
1356              
1357             $parser->filer->output_path(...args...);
1358              
1359             We just delegate this to the underlying L object.
1360              
1361             =cut
1362              
1363             sub output_path {
1364 90     90 1 95 my $self = shift;
1365             ### We use it, so don't warn!
1366             ### &MIME::Tools::whine("output_path deprecated in MIME::Parser");
1367 90         162 $self->filer->output_path(@_);
1368             }
1369              
1370             #------------------------------
1371              
1372             =item output_prefix [PREFIX]
1373              
1374             I
1375             Get/set the short string that all filenames for extracted body-parts
1376             will begin with (assuming that there is no better "recommended filename").
1377             Identical to the preferred form:
1378              
1379             $parser->filer->output_prefix(...args...);
1380              
1381             We just delegate this to the underlying L object.
1382              
1383             =cut
1384              
1385             sub output_prefix {
1386 0     0 1 0 my $self = shift;
1387 0         0 &MIME::Tools::whine("output_prefix deprecated in MIME::Parser");
1388 0         0 $self->filer->output_prefix(@_);
1389             }
1390              
1391             #------------------------------
1392              
1393             =item evil_filename NAME
1394              
1395             I
1396             Identical to the preferred form:
1397              
1398             $parser->filer->evil_filename(...args...);
1399              
1400             We just delegate this to the underlying L object.
1401              
1402             =cut
1403              
1404             sub evil_filename {
1405 2     2 1 13 my $self = shift;
1406 2         5 &MIME::Tools::whine("evil_filename deprecated in MIME::Parser");
1407 2         5 $self->filer->evil_filename(@_);
1408             }
1409              
1410             #------------------------------
1411              
1412             =item max_parts NUM
1413              
1414             I
1415             Limits the number of MIME parts we will parse.
1416              
1417             Normally, instances of this class parse a message to the bitter end.
1418             Messages with many MIME parts can cause excessive memory consumption.
1419             If you invoke this method, parsing will abort with a die() if a message
1420             contains more than NUM parts.
1421              
1422             If NUM is set to -1 (the default), then no maximum limit is enforced.
1423              
1424             With no argument, returns the current setting as an integer
1425              
1426             =cut
1427              
1428             sub max_parts {
1429 0     0 1 0 my($self, $num) = @_;
1430 0 0       0 if (@_ > 1) {
1431 0         0 $self->{MP5_MaxParts} = $num;
1432             }
1433 0         0 return $self->{MP5_MaxParts};
1434             }
1435              
1436             #------------------------------
1437              
1438             =item output_to_core YESNO
1439              
1440             I
1441             Normally, instances of this class output all their decoded body
1442             data to disk files (via MIME::Body::File). However, you can change
1443             this behaviour by invoking this method before parsing:
1444              
1445             If YESNO is false (the default), then all body data goes
1446             to disk files.
1447              
1448             If YESNO is true, then all body data goes to in-core data structures
1449             This is a little risky (what if someone emails you an MPEG or a tar
1450             file, hmmm?) but people seem to want this bit of noose-shaped rope,
1451             so I'm providing it.
1452             Note that setting this attribute true I mean that parser-internal
1453             temporary files are avoided! Use L for that.
1454              
1455             With no argument, returns the current setting as a boolean.
1456              
1457             =cut
1458              
1459             sub output_to_core {
1460 165     165 1 2592 my ($self, $yesno) = @_;
1461 165 100       303 if (@_ > 1) {
1462 42 100 100     163 $yesno = 0 if ($yesno and $yesno eq 'NONE');
1463 42         91 $self->{MP5_FilerToCore} = $yesno;
1464             }
1465 165         328 $self->{MP5_FilerToCore};
1466             }
1467              
1468              
1469             =item tmp_recycling
1470              
1471             I
1472              
1473             This method is a no-op to preserve the pre-5.421 API.
1474              
1475             The tmp_recycling() feature was removed in 5.421 because it had never actually
1476             worked. Please update your code to stop using it.
1477              
1478             =cut
1479              
1480             sub tmp_recycling
1481             {
1482 1     1 1 190 return;
1483             }
1484              
1485              
1486              
1487             #------------------------------
1488              
1489             =item tmp_to_core [YESNO]
1490              
1491             I
1492             Should L create real temp files, or
1493             use fake in-core ones? Normally we allow the creation of temporary
1494             disk files, since this allows us to handle huge attachments even when
1495             core is limited.
1496              
1497             If YESNO is true, we implement new_tmpfile() via in-core handles.
1498             If YESNO is false (the default), we use real tmpfiles.
1499             With no argument, just returns the current setting.
1500              
1501             =cut
1502              
1503             sub tmp_to_core {
1504 0     0 1 0 my ($self, $yesno) = @_;
1505 0 0       0 $self->{MP5_TmpToCore} = $yesno if (@_ > 1);
1506 0         0 $self->{MP5_TmpToCore};
1507             }
1508              
1509             #------------------------------
1510              
1511             =item use_inner_files [YESNO]
1512              
1513             I.
1514              
1515             I
1516              
1517             MIME::Parser no longer supports IO::InnerFile, but this method is retained for
1518             backwards compatibility. It does nothing.
1519              
1520             The original reasoning for IO::InnerFile was that inner files were faster than
1521             "in-core" temp files. At the time, the "in-core" tempfile support was
1522             implemented with IO::Scalar from the IO-Stringy distribution, which used the
1523             tie() interface to wrap a scalar with the appropriate IO::Handle operations.
1524             The penalty for this was fairly hefty, and IO::InnerFile actually was faster.
1525              
1526             Nowadays, MIME::Parser uses Perl's built in ability to open a filehandle on an
1527             in-memory scalar variable via PerlIO. Benchmarking shows that IO::InnerFile is
1528             slightly slower than using in-memory temporary files, and is slightly faster
1529             than on-disk temporary files. Both measurements are within a few percent of
1530             each other. Since there's no real benefit, and since the IO::InnerFile abuse
1531             was fairly hairy and evil ("writes" to it were faked by extending the size of
1532             the inner file with the assumption that the only data you'd ever ->print() to
1533             it would be the line from the "outer" file, for example) it's been removed.
1534              
1535             =cut
1536              
1537             sub use_inner_files {
1538 0     0 1 0 return 0;
1539             }
1540              
1541             =back
1542              
1543             =cut
1544              
1545              
1546             #------------------------------------------------------------
1547              
1548             =head2 Specifying classes to be instantiated
1549              
1550             =over 4
1551              
1552             =cut
1553              
1554             #------------------------------
1555              
1556             =item interface ROLE,[VALUE]
1557              
1558             I
1559             During parsing, the parser normally creates instances of certain classes,
1560             like MIME::Entity. However, you may want to create a parser subclass
1561             that uses your own experimental head, entity, etc. classes (for example,
1562             your "head" class may provide some additional MIME-field-oriented methods).
1563              
1564             If so, then this is the method that your subclass should invoke during
1565             init. Use it like this:
1566              
1567             package MyParser;
1568             @ISA = qw(MIME::Parser);
1569             ...
1570             sub init {
1571             my $self = shift;
1572             $self->SUPER::init(@_); ### do my parent's init
1573             $self->interface(ENTITY_CLASS => 'MIME::MyEntity');
1574             $self->interface(HEAD_CLASS => 'MIME::MyHead');
1575             $self; ### return
1576             }
1577              
1578             With no VALUE, returns the VALUE currently associated with that ROLE.
1579              
1580             =cut
1581              
1582             sub interface {
1583 434     434 1 483 my ($self, $role, $value) = @_;
1584 434 100       812 $self->{MP5_Interface}{$role} = $value if (defined($value));
1585 434         1392 $self->{MP5_Interface}{$role};
1586             }
1587              
1588             #------------------------------
1589              
1590             =item new_body_for HEAD
1591              
1592             I
1593             Based on the HEAD of a part we are parsing, return a new
1594             body object (any desirable subclass of MIME::Body) for
1595             receiving that part's data.
1596              
1597             If you set the C option to false before parsing
1598             (the default), then we call C and create a
1599             new MIME::Body::File on that filename.
1600              
1601             If you set the C option to true before parsing,
1602             then you get a MIME::Body::InCore instead.
1603              
1604             If you want the parser to do something else entirely, you can
1605             override this method in a subclass.
1606              
1607             =cut
1608              
1609             sub new_body_for {
1610 123     123 1 114 my ($self, $head) = @_;
1611              
1612 123 100       226 if ($self->output_to_core) {
1613 27         47 $self->debug("outputting body to core");
1614 27         211 return (new MIME::Body::InCore);
1615             }
1616             else {
1617 96         191 my $outpath = $self->output_path($head);
1618 96         301 $self->debug("outputting body to disk file: $outpath");
1619 96         227 $self->filer->purgeable($outpath); ### we plan to use it
1620 96         588 return (new MIME::Body::File $outpath);
1621             }
1622             }
1623              
1624             #------------------------------
1625              
1626             =pod
1627              
1628             =back
1629              
1630             =head2 Temporary File Creation
1631              
1632             =over
1633              
1634             =item tmp_dir DIRECTORY
1635              
1636             I
1637             Causes any temporary files created by this parser to be created in the
1638             given DIRECTORY.
1639              
1640             If called without arguments, returns current value.
1641              
1642             The default value is undef, which will cause new_tmpfile() to use the
1643             system default temporary directory.
1644              
1645             =cut
1646              
1647             sub tmp_dir
1648             {
1649 100     100 1 114 my ($self, $dirname) = @_;
1650 100 50       170 if ( $dirname ) {
1651 0         0 $self->{MP5_TmpDir} = $dirname;
1652             }
1653              
1654 100         185 return $self->{MP5_TmpDir};
1655             }
1656              
1657             =item new_tmpfile
1658              
1659             I
1660             Return an IO handle to be used to hold temporary data during a parse.
1661              
1662             The default uses MIME::Tools::tmpopen() to create a new temporary file,
1663             unless L dictates otherwise, but you can
1664             override this. You shouldn't need to.
1665              
1666             The location for temporary files can be changed on a per-parser basis
1667             with L.
1668              
1669             If you do override this, make certain that the object you return is
1670             set for binmode(), and is able to handle the following methods:
1671              
1672             read(BUF, NBYTES)
1673             getline()
1674             getlines()
1675             print(@ARGS)
1676             flush()
1677             seek(0, 0)
1678              
1679             Fatal exception if the stream could not be established.
1680              
1681             =cut
1682              
1683             sub new_tmpfile {
1684 100     100 1 2509 my ($self) = @_;
1685              
1686 100         108 my $io;
1687 100 100       174 if ($self->{MP5_TmpToCore}) {
1688 1         1 my $var;
1689 1 50       6 $io = IO::File->new(\$var, '+>:') or die "$ME: Can't open in-core tmpfile: $!";
1690             } else {
1691 99         135 my $args = {};
1692 99 100       187 if( $self->tmp_dir ) {
1693 1         3 $args->{DIR} = $self->tmp_dir;
1694             }
1695 99 50       236 $io = tmpopen( $args ) or die "$ME: can't open tmpfile: $!\n";
1696 99 50       33531 binmode($io) or die "$ME: can't set to binmode: $!";
1697             }
1698 100         207 return $io;
1699             }
1700              
1701             =back
1702              
1703             =cut
1704              
1705              
1706              
1707              
1708              
1709              
1710             #------------------------------------------------------------
1711              
1712             =head2 Parse results and error recovery
1713              
1714             =over 4
1715              
1716             =cut
1717              
1718             #------------------------------
1719              
1720             =item last_error
1721              
1722             I
1723             Return the error (if any) that we ignored in the last parse.
1724              
1725             =cut
1726              
1727             sub last_error {
1728 0     0 1 0 join '', shift->results->errors;
1729             }
1730              
1731              
1732             #------------------------------
1733              
1734             =item last_head
1735              
1736             I
1737             Return the top-level MIME header of the last stream we attempted to parse.
1738             This is useful for replying to people who sent us bad MIME messages.
1739              
1740             ### Parse an input stream:
1741             eval { $entity = $parser->parse(\*STDIN) };
1742             if (!$entity) { ### parse failed!
1743             my $decapitated = $parser->last_head;
1744             ...
1745             }
1746              
1747             =cut
1748              
1749             sub last_head {
1750 0     0 1 0 shift->results->top_head;
1751             }
1752              
1753             #------------------------------
1754              
1755             =item results
1756              
1757             I
1758             Return an object containing lots of info from the last entity parsed.
1759             This will be an instance of class
1760             L.
1761              
1762             =cut
1763              
1764             sub results {
1765 669     669 1 2147 shift->{MP5_Results};
1766             }
1767              
1768              
1769             =back
1770              
1771             =cut
1772              
1773              
1774             #------------------------------
1775             1;
1776             __END__