File Coverage

blib/lib/MIME/Parser.pm
Criterion Covered Total %
statement 390 442 88.2
branch 129 194 66.4
condition 25 40 62.5
subroutine 51 62 82.2
pod 32 46 69.5
total 627 784 79.9


line stmt bran cond sub pod time code
1             package MIME::Parser;
2              
3              
4             =head1 NAME
5              
6             MIME::Parser - experimental class for parsing MIME streams
7              
8              
9             =head1 SYNOPSIS
10              
11             Before reading further, you should see L to make sure that
12             you understand where this module fits into the grand scheme of things.
13             Go on, do it now. I'll wait.
14              
15             Ready? Ok...
16              
17             =head2 Basic usage examples
18              
19             ### Create a new parser object:
20             my $parser = new MIME::Parser;
21              
22             ### Tell it where to put things:
23             $parser->output_under("/tmp");
24              
25             ### Parse an input filehandle:
26             $entity = $parser->parse(\*STDIN);
27              
28             ### Congratulations: you now have a (possibly multipart) MIME entity!
29             $entity->dump_skeleton; # for debugging
30              
31              
32             =head2 Examples of input
33              
34             ### Parse from filehandles:
35             $entity = $parser->parse(\*STDIN);
36             $entity = $parser->parse(IO::File->new("some command|");
37              
38             ### Parse from any object that supports getline() and read():
39             $entity = $parser->parse($myHandle);
40              
41             ### Parse an in-core MIME message:
42             $entity = $parser->parse_data($message);
43              
44             ### Parse an MIME message in a file:
45             $entity = $parser->parse_open("/some/file.msg");
46              
47             ### Parse an MIME message out of a pipeline:
48             $entity = $parser->parse_open("gunzip - < file.msg.gz |");
49              
50             ### Parse already-split input (as "deliver" would give it to you):
51             $entity = $parser->parse_two("msg.head", "msg.body");
52              
53              
54             =head2 Examples of output control
55              
56             ### Keep parsed message bodies in core (default outputs to disk):
57             $parser->output_to_core(1);
58              
59             ### Output each message body to a one-per-message directory:
60             $parser->output_under("/tmp");
61              
62             ### Output each message body to the same directory:
63             $parser->output_dir("/tmp");
64              
65             ### Change how nameless message-component files are named:
66             $parser->output_prefix("msg");
67              
68             ### Put temporary files somewhere else
69             $parser->tmp_dir("/var/tmp/mytmpdir");
70              
71             =head2 Examples of error recovery
72              
73             ### Normal mechanism:
74             eval { $entity = $parser->parse(\*STDIN) };
75             if ($@) {
76             $results = $parser->results;
77             $decapitated = $parser->last_head; ### get last top-level head
78             }
79              
80             ### Ultra-tolerant mechanism:
81             $parser->ignore_errors(1);
82             $entity = eval { $parser->parse(\*STDIN) };
83             $error = ($@ || $parser->last_error);
84              
85             ### Cleanup all files created by the parse:
86             eval { $entity = $parser->parse(\*STDIN) };
87             ...
88             $parser->filer->purge;
89              
90              
91             =head2 Examples of parser options
92              
93             ### Automatically attempt to RFC 2047-decode the MIME headers?
94             $parser->decode_headers(1); ### default is false
95              
96             ### Parse contained "message/rfc822" objects as nested MIME streams?
97             $parser->extract_nested_messages(0); ### default is true
98              
99             ### Look for uuencode in "text" messages, and extract it?
100             $parser->extract_uuencode(1); ### default is false
101              
102             ### Should we forgive normally-fatal errors?
103             $parser->ignore_errors(0); ### default is true
104              
105              
106             =head2 Miscellaneous examples
107              
108             ### Convert a Mail::Internet object to a MIME::Entity:
109             my $data = join('', (@{$mail->header}, "\n", @{$mail->body}));
110             $entity = $parser->parse_data(\$data);
111              
112              
113              
114             =head1 DESCRIPTION
115              
116             You can inherit from this class to create your own subclasses
117             that parse MIME streams into MIME::Entity objects.
118              
119              
120             =head1 PUBLIC INTERFACE
121              
122             =cut
123              
124             #------------------------------
125              
126             require 5.004;
127              
128             ### Pragmas:
129 15     15   175261 use strict;
  15         50  
  15         450  
130 15     15   71 use vars (qw($VERSION $CAT $CRLF));
  15         27  
  15         919  
131              
132             ### core Perl modules
133 15     15   8957 use IO::File;
  15         95457  
  15         2261  
134 15     15   100 use File::Spec;
  15         23  
  15         343  
135 15     15   75 use File::Path;
  15         26  
  15         836  
136 15     15   74 use Config qw(%Config);
  15         36  
  15         495  
137 15     15   65 use Carp;
  15         35  
  15         909  
138              
139             ### Kit modules:
140 15     15   6642 use MIME::Tools qw(:config :utils :msgtypes usage tmpopen );
  15         36  
  15         3445  
141 15     15   8097 use MIME::Head;
  15         49  
  15         518  
142 15     15   7988 use MIME::Body;
  15         40  
  15         349  
143 15     15   11071 use MIME::Entity;
  15         52  
  15         461  
144 15     15   80 use MIME::Decoder;
  15         29  
  15         278  
145 15     15   8611 use MIME::Parser::Reader;
  15         41  
  15         511  
146 15     15   9219 use MIME::Parser::Filer;
  15         40  
  15         367  
147 15     15   7557 use MIME::Parser::Results;
  15         38  
  15         74864  
148              
149             #------------------------------
150             #
151             # Globals
152             #
153             #------------------------------
154              
155             ### The package version, both in 1.23 style *and* usable by MakeMaker:
156             $VERSION = "5.507";
157              
158             ### How to catenate:
159             $CAT = '/bin/cat';
160              
161             ### The CRLF sequence:
162             $CRLF = "\015\012";
163              
164             ### Who am I?
165             my $ME = 'MIME::Parser';
166              
167              
168              
169             #------------------------------------------------------------
170              
171             =head2 Construction
172              
173             =over 4
174              
175             =cut
176              
177             #------------------------------
178              
179             =item new ARGS...
180              
181             I
182             Create a new parser object.
183             Once you do this, you can then set up various parameters
184             before doing the actual parsing. For example:
185              
186             my $parser = new MIME::Parser;
187             $parser->output_dir("/tmp");
188             $parser->output_prefix("msg1");
189             my $entity = $parser->parse(\*STDIN);
190              
191             Any arguments are passed into C.
192             Don't override this in your subclasses; override init() instead.
193              
194             =cut
195              
196             sub new {
197 42     42 1 13321 my $self = bless {}, shift;
198 42         190 $self->init(@_);
199             }
200              
201             #------------------------------
202              
203             =item init ARGS...
204              
205             I
206             Initiallize a new MIME::Parser object.
207             This is automatically sent to a new object; you may want to override it.
208             If you override this, be sure to invoke the inherited method.
209              
210             =cut
211              
212             sub init {
213 42     42 1 118 my $self = shift;
214              
215 42         176 $self->{MP5_DecodeHeaders} = 0;
216 42         114 $self->{MP5_DecodeBodies} = 1;
217 42         104 $self->{MP5_Interface} = {};
218 42         120 $self->{MP5_ParseNested} = 'NEST';
219 42         95 $self->{MP5_TmpToCore} = 0;
220 42         97 $self->{MP5_IgnoreErrors} = 1;
221 42         91 $self->{MP5_UUDecode} = 0;
222 42         124 $self->{MP5_MaxParts} = -1;
223 42         105 $self->{MP5_TmpDir} = undef;
224              
225 42         190 $self->interface(ENTITY_CLASS => 'MIME::Entity');
226 42         130 $self->interface(HEAD_CLASS => 'MIME::Head');
227              
228 42         194 $self->output_dir(".");
229              
230 42         293 $self;
231             }
232              
233             #------------------------------
234              
235             =item init_parse
236              
237             I
238             Invoked automatically whenever one of the top-level parse() methods
239             is called, to reset the parser to a "ready" state.
240              
241             =cut
242              
243             sub init_parse {
244 53     53 1 106 my $self = shift;
245              
246 53         464 $self->{MP5_Results} = new MIME::Parser::Results;
247              
248 53         243 $self->{MP5_Filer}->results($self->{MP5_Results});
249 53         298 $self->{MP5_Filer}->purgeable([]);
250 53         266 $self->{MP5_Filer}->init_parse();
251 53         104 $self->{MP5_NumParts} = 0;
252 53         87 1;
253             }
254              
255             =back
256              
257             =cut
258              
259              
260              
261              
262              
263             #------------------------------------------------------------
264              
265             =head2 Altering how messages are parsed
266              
267             =over 4
268              
269             =cut
270              
271             #------------------------------
272              
273             =item decode_headers [YESNO]
274              
275             I
276             Controls whether the parser will attempt to decode all the MIME headers
277             (as per RFC 2047) the moment it sees them. B
278             for two very important reasons:>
279              
280             =over
281              
282             =item *
283              
284             B
285             If you fully decode the headers into bytes, you can inadvertently
286             transform a parseable MIME header like this:
287              
288             Content-type: text/plain; filename="=?ISO-8859-1?Q?Hi=22Ho?="
289              
290             into unparseable gobbledygook; in this case:
291              
292             Content-type: text/plain; filename="Hi"Ho"
293              
294             =item *
295              
296             B An encoded string which contains
297             both Latin-1 and Cyrillic characters will be turned into a binary
298             mishmosh which simply can't be rendered.
299              
300             =back
301              
302             B
303             This method was once the only out-of-the-box way to deal with attachments
304             whose filenames had non-ASCII characters. However, since MIME-tools 5.4xx
305             this is no longer necessary.
306              
307             B
308             If YESNO is true, decoding is done. However, you will get a warning
309             unless you use one of the special "true" values:
310              
311             "I_NEED_TO_FIX_THIS"
312             Just shut up and do it. Not recommended.
313             Provided only for those who need to keep old scripts functioning.
314              
315             "I_KNOW_WHAT_I_AM_DOING"
316             Just shut up and do it. Not recommended.
317             Provided for those who REALLY know what they are doing.
318              
319             If YESNO is false (the default), no attempt at decoding will be done.
320             With no argument, just returns the current setting.
321             B you can always decode the headers I the parsing
322             has completed (see L), or
323             decode the words on demand (see L).
324              
325             =cut
326              
327             sub decode_headers {
328 0     0 1 0 my ($self, $yesno) = @_;
329 0 0       0 if (@_ > 1) {
330 0         0 $self->{MP5_DecodeHeaders} = $yesno;
331 0 0       0 if ($yesno) {
332 0 0 0     0 if (($yesno eq "I_KNOW_WHAT_I_AM_DOING") ||
333             ($yesno eq "I_NEED_TO_FIX_THIS")) {
334             ### ok
335             }
336             else {
337 0         0 $self->whine("as of 5.4xx, decode_headers() should NOT be ".
338             "set true... if you are doing this to make sure ".
339             "that non-ASCII filenames are translated, ".
340             "that's now done automatically; for all else, ".
341             "use MIME::Words.");
342             }
343             }
344             }
345 0         0 $self->{MP5_DecodeHeaders};
346             }
347              
348             #------------------------------
349              
350             =item extract_nested_messages OPTION
351              
352             I
353             Some MIME messages will contain a part of type C
354             ,C or C:
355             literally, the text of an embedded mail/news/whatever message.
356             This option controls whether (and how) we parse that embedded message.
357              
358             If the OPTION is false, we treat such a message just as if it were a
359             C document, without attempting to decode its contents.
360              
361             If the OPTION is true (the default), the body of the C
362             or C part is parsed by this parser, creating an
363             entity object. What happens then is determined by the actual OPTION:
364              
365             =over 4
366              
367             =item NEST or 1
368              
369             The default setting.
370             The contained message becomes the sole "part" of the C
371             entity (as if the containing message were a special kind of
372             "multipart" message).
373             You can recover the sub-entity by invoking the L
374             method on the C entity.
375              
376             =item REPLACE
377              
378             The contained message replaces the C entity, as though
379             the C "container" never existed.
380              
381             B notice that, with this option, all the header information
382             in the C header is lost. This might seriously bother
383             you if you're dealing with a top-level message, and you've just lost
384             the sender's address and the subject line. C<:-/>.
385              
386             =back
387              
388             I
389              
390             =cut
391              
392             sub extract_nested_messages {
393 51     51 1 181 my ($self, $option) = @_;
394 51 100       182 $self->{MP5_ParseNested} = $option if (@_ > 1);
395 51         169 $self->{MP5_ParseNested};
396             }
397              
398             sub parse_nested_messages {
399 0     0 0 0 usage "parse_nested_messages() is now extract_nested_messages()";
400 0         0 shift->extract_nested_messages(@_);
401             }
402              
403             #------------------------------
404              
405             =item extract_uuencode [YESNO]
406              
407             I
408             If set true, then whenever we are confronted with a message
409             whose effective content-type is "text/plain" and whose encoding
410             is 7bit/8bit/binary, we scan the encoded body to see if it contains
411             uuencoded data (generally given away by a "begin XXX" line).
412              
413             If it does, we explode the uuencoded message into a multipart,
414             where the text before the first "begin XXX" becomes the first part,
415             and all "begin...end" sections following become the subsequent parts.
416             The filename (if given) is accessible through the normal means.
417              
418             =cut
419              
420             sub extract_uuencode {
421 253     253 1 514 my ($self, $yesno) = @_;
422 253 100       595 $self->{MP5_UUDecode} = $yesno if @_ > 1;
423 253         912 $self->{MP5_UUDecode};
424             }
425              
426             #------------------------------
427              
428             =item ignore_errors [YESNO]
429              
430             I
431             Controls whether the parser will attempt to ignore normally-fatal
432             errors, treating them as warnings and continuing with the parse.
433              
434             If YESNO is true (the default), many syntax errors are tolerated.
435             If YESNO is false, fatal errors throw exceptions.
436             With no argument, just returns the current setting.
437              
438             =cut
439              
440             sub ignore_errors {
441 25     25 1 221 my ($self, $yesno) = @_;
442 25 50       80 $self->{MP5_IgnoreErrors} = $yesno if (@_ > 1);
443 25         117 $self->{MP5_IgnoreErrors};
444             }
445              
446              
447             #------------------------------
448              
449             =item decode_bodies [YESNO]
450              
451             I
452             Controls whether the parser should decode entity bodies or not.
453             If this is set to a false value (default is true), all entity bodies
454             will be kept as-is in the original content-transfer encoding.
455              
456             To prevent double encoding on the output side MIME::Body->is_encoded
457             is set, which tells MIME::Body not to encode the data again, if encoded
458             data was requested. This is in particular useful, when it's important that
459             the content B be modified, e.g. if you want to calculate
460             OpenPGP signatures from it.
461              
462             B: the semantics change significantly if you parse MIME
463             messages with this option set, because MIME::Entity resp. MIME::Body
464             *always* see encoded data now, while the default behaviour is
465             working with *decoded* data (and encoding it only if you request it).
466             You need to decode the data yourself, if you want to have it decoded.
467              
468             So use this option only if you exactly know, what you're doing, and
469             that you're sure, that you really need it.
470              
471             =cut
472              
473             sub decode_bodies {
474 256     256 1 703 my ($self, $yesno) = @_;
475 256 100       595 $self->{MP5_DecodeBodies} = $yesno if (@_ > 1);
476 256         951 $self->{MP5_DecodeBodies};
477             }
478              
479             #------------------------------
480             #
481             # MESSAGES...
482             #
483              
484             #------------------------------
485             #
486             # debug MESSAGE...
487             #
488             sub debug {
489 1236     1236 0 1663 my $self = shift;
490 1236 50       4205 if (MIME::Tools->debugging()) {
491 0 0       0 if (my $r = $self->{MP5_Results}) {
492 0         0 unshift @_, $r->indent;
493 0         0 $r->msg($M_DEBUG, @_);
494             }
495 0         0 MIME::Tools::debug(@_);
496             }
497             }
498              
499             #------------------------------
500             #
501             # whine PROBLEM...
502             #
503             sub whine {
504 6     6 0 12 my $self = shift;
505 6 50       45 if (my $r = $self->{MP5_Results}) {
506 6         32 unshift @_, $r->indent;
507 6         29 $r->msg($M_WARNING, @_);
508             }
509 6         31 &MIME::Tools::whine(@_);
510             }
511              
512             #------------------------------
513             #
514             # error PROBLEM...
515             #
516             # Possibly-forgivable parse error occurred.
517             # Raises a fatal exception unless we are ignoring errors.
518             #
519             sub error {
520 6     6 1 36 my $self = shift;
521 6 50       25 if (my $r = $self->{MP5_Results}) {
522 6         23 unshift @_, $r->indent;
523 6         26 $r->msg($M_ERROR, @_);
524             }
525 6         27 &MIME::Tools::error(@_);
526 6 100       59 $self->{MP5_IgnoreErrors} ? return undef : die @_;
527             }
528              
529              
530              
531              
532             #------------------------------
533             #
534             # PARSING...
535             #
536              
537             #------------------------------
538             #
539             # process_preamble IN, READER, ENTITY
540             #
541             # I
542             # Dispose of a multipart message's preamble.
543             #
544             sub process_preamble {
545 39     39 0 91 my ($self, $in, $rdr, $ent) = @_;
546              
547             ### Sanity:
548 39 50       152 ($rdr->depth > 0) or die "$ME: internal logic error";
549              
550             ### Parse preamble:
551 39         63 my @saved;
552 39         59 my $data = '';
553 39 50       396 open(my $fh, '>', \$data) or die $!;
554 39         160 $rdr->read_chunk($in, $fh, 1);
555 39         76 close $fh;
556              
557             # Ugh. Horrible. If the preamble consists only of CRLF, squash it down
558             # to the empty string. Else, remove the trailing CRLF.
559 39 100       131 if( $data =~ m/^[\r\n]\z/ ) {
560 2         6 @saved = ('');
561             } else {
562 37         106 $data =~ s/[\r\n]\z//;
563 37         122 @saved = split(/^/, $data);
564             }
565 39         174 $ent->preamble(\@saved);
566 39         176 1;
567             }
568              
569             #------------------------------
570             #
571             # process_epilogue IN, READER, ENTITY
572             #
573             # I
574             # Dispose of a multipart message's epilogue.
575             #
576             sub process_epilogue {
577 37     37 0 88 my ($self, $in, $rdr, $ent) = @_;
578 37         107 $self->debug("process_epilogue");
579              
580             ### Parse epilogue:
581 37         70 my @saved;
582 37         188 $rdr->read_lines($in, \@saved);
583 37         174 $ent->epilogue(\@saved);
584 37         67 1;
585             }
586              
587             #------------------------------
588             #
589             # process_to_bound IN, READER, OUT
590             #
591             # I
592             # Dispose of the next chunk into the given output stream OUT.
593             #
594             sub process_to_bound {
595 95     95 0 185 my ($self, $in, $rdr, $out) = @_;
596              
597             ### Parse:
598 95         350 $rdr->read_chunk($in, $out);
599 95         152 1;
600             }
601              
602             #------------------------------
603             #
604             # process_header IN, READER
605             #
606             # I
607             # Process and return the next header.
608             # Return undef if, instead of a header, the encapsulation boundary is found.
609             # Fatal exception on failure.
610             #
611             sub process_header {
612 169     169 0 374 my ($self, $in, $rdr) = @_;
613 169         390 $self->debug("process_header");
614              
615             ### Parse and save the (possibly empty) header, up to and including the
616             ### blank line that terminates it:
617 169         409 my $head = $self->interface('HEAD_CLASS')->new;
618              
619             ### Read the lines of the header.
620             ### We localize IO inside here, so that we can support the IO:: interface
621 169         3951 my @headlines;
622 169         559 my $hdr_rdr = $rdr->spawn;
623 169         648 $hdr_rdr->add_terminator("");
624 169         463 $hdr_rdr->add_terminator("\r"); ### sigh
625              
626 169         263 my $headstr = '';
627 9 50   9   73 open(my $outfh, '>:scalar', \$headstr) or die $!;
  9         17  
  9         77  
  169         2187  
628 169         11974 $hdr_rdr->read_chunk($in, $outfh, 0, 1);
629 169         340 close $outfh;
630              
631             ### How did we do?
632 169 100       534 if ($hdr_rdr->eos_type eq 'DELIM') {
633 2         6 $self->whine("bogus part, without CRLF before body");
634 2         15 return undef;
635             }
636 167 100       470 ($hdr_rdr->eos_type eq 'DONE') or
637             $self->error("unexpected end of header\n");
638              
639             ### Extract the header (note that zero-size headers are admissible!):
640 167 50       1552 open(my $readfh, '<:scalar', \$headstr) or die $!;
641 167         625 $head->read( $readfh );
642              
643 167 100       68071 unless( $readfh->eof() ) {
644             # Not entirely correct, since ->read consumes the line it gives up on.
645             # it's actually the line /before/ the one we get with ->getline
646 1         44 $self->error("couldn't parse head; error near:\n", $readfh->getline());
647             }
648              
649              
650             ### If desired, auto-decode the header as per RFC 2047
651             ### This shouldn't affect non-encoded headers; however, it will decode
652             ### headers with international characters. WARNING: currently, the
653             ### character-set information is LOST after decoding.
654 166 50       1500 $head->decode($self->{MP5_DecodeHeaders}) if $self->{MP5_DecodeHeaders};
655              
656             ### If this is the top-level head, save it:
657 166 100       427 $self->results->top_head($head) if !$self->results->top_head;
658              
659 166         2024 return $head;
660             }
661              
662             #------------------------------
663             #
664             # process_multipart IN, READER, ENTITY
665             #
666             # I
667             # Process the multipart body, and return the state.
668             # Fatal exception on failure.
669             # Invoked by process_part().
670             #
671             sub process_multipart {
672 39     39 0 68 my ($self, $in, $rdr, $ent) = @_;
673 39         105 my $head = $ent->head;
674              
675 39         204 $self->debug("process_multipart...");
676              
677             ### Get actual type and subtype from the header:
678 39         123 my ($type, $subtype) = (split('/', $head->mime_type, -1), '');
679              
680             ### If this was a type "multipart/digest", then the RFCs say we
681             ### should default the parts to have type "message/rfc822".
682             ### Thanks to Carsten Heyl for suggesting this...
683 39 100       147 my $retype = (($subtype eq 'digest') ? 'message/rfc822' : '');
684              
685             ### Get the boundaries for the parts:
686 39         149 my $bound = $head->multipart_boundary;
687 39 50 33     277 if (!defined($bound) || ($bound =~ /[\r\n]/)) {
688 0         0 $self->error("multipart boundary is missing, or contains CR or LF\n");
689 0         0 $ent->effective_type("application/x-unparseable-multipart");
690 0         0 return $self->process_singlepart($in, $rdr, $ent);
691             }
692 39         157 my $part_rdr = $rdr->spawn->add_boundary($bound);
693              
694             ### Prepare to parse:
695 39         66 my $eos_type;
696             my $more_parts;
697              
698             ### Parse preamble...
699 39         140 $self->process_preamble($in, $part_rdr, $ent);
700              
701             ### ...and look at how we finished up:
702 39         131 $eos_type = $part_rdr->eos_type;
703 39 100       119 if ($eos_type eq 'DELIM'){ $more_parts = 1 }
  38 50       57  
704 1         5 elsif ($eos_type eq 'CLOSE'){ $self->whine("empty multipart message\n");
705 1         2 $more_parts = 0; }
706 0         0 else { $self->error("unexpected end of preamble\n");
707 0         0 return 1; }
708              
709             ### Parse parts:
710 39         65 my $partno = 0;
711 39         59 my $part;
712 39         110 while ($more_parts) {
713 103         152 ++$partno;
714 103         382 $self->debug("parsing part $partno...");
715              
716             ### Parse the next part, and add it to the entity...
717 103         384 my $part = $self->process_part($in, $part_rdr, Retype=>$retype);
718 103 50       285 return undef unless defined($part);
719              
720 103         379 $ent->add_part($part);
721              
722             ### ...and look at how we finished up:
723 103         396 $eos_type = $part_rdr->eos_type;
724 103 100       352 if ($eos_type eq 'DELIM') { $more_parts = 1 }
  65 100       185  
725 36         155 elsif ($eos_type eq 'CLOSE') { $more_parts = 0; }
726 2         9 else { $self->error("unexpected end of parts ".
727             "before epilogue\n");
728 2         18 return 1; }
729             }
730              
731             ### Parse epilogue...
732             ### (note that we use the *parent's* reader here, which does not
733             ### know about the boundaries in this multipart!)
734 37         162 $self->process_epilogue($in, $rdr, $ent);
735              
736             ### ...and there's no need to look at how we finished up!
737 37         323 1;
738             }
739              
740             #------------------------------
741             #
742             # process_singlepart IN, READER, ENTITY
743             #
744             # I
745             # Process the singlepart body. Returns true.
746             # Fatal exception on failure.
747             # Invoked by process_part().
748             #
749             sub process_singlepart {
750 114     114 0 214 my ($self, $in, $rdr, $ent) = @_;
751 114         344 my $head = $ent->head;
752              
753 114         348 $self->debug("process_singlepart...");
754              
755             ### Obtain a filehandle for reading the encoded information:
756             ### We have two different approaches, based on whether or not we
757             ### have to contend with boundaries.
758 114         156 my $ENCODED; ### handle
759 114   100     362 my $can_shortcut = (!$rdr->has_bounds and !$self->{MP5_UUDecode});
760 114 100       240 if ($can_shortcut) {
761 19         52 $self->debug("taking shortcut");
762              
763 19         33 $ENCODED = $in;
764 19         63 $rdr->eos('EOF'); ### be sure to bogus-up the reader state to EOF:
765             }
766             else {
767              
768 95         229 $self->debug("using temp file");
769 95         335 $ENCODED = $self->new_tmpfile();
770              
771             ### Read encoded body until boundary (or EOF)...
772 95         336 $self->process_to_bound($in, $rdr, $ENCODED);
773              
774             ### ...and look at how we finished up.
775             ### If we have bounds, we want DELIM or CLOSE.
776             ### Otherwise, we want EOF (and that's all we'd get, anyway!).
777 95 100       293 if ($rdr->has_bounds) {
778 94 100       280 ($rdr->eos_type =~ /^(DELIM|CLOSE)$/) or
779             $self->error("part did not end with expected boundary\n");
780             }
781              
782             ### Flush and rewind encoded buffer, so we can read it:
783 95 50       79370 $ENCODED->flush or die "$ME: can't flush: $!";
784 95 50       639 $ENCODED->seek(0, 0) or die "$ME: can't seek: $!";
785             }
786              
787             ### Get a content-decoder to decode this part's encoding:
788 114         1104 my $encoding = $head->mime_encoding;
789 114         861 my $decoder = new MIME::Decoder $encoding;
790 114 50       458 if (!$decoder) {
791 0         0 $self->whine("Unsupported encoding '$encoding': using 'binary'... \n".
792             "The entity will have an effective MIME type of \n".
793             "application/octet-stream."); ### as per RFC-2045
794 0         0 $ent->effective_type('application/octet-stream');
795 0         0 $decoder = new MIME::Decoder 'binary';
796 0         0 $encoding = 'binary';
797             }
798              
799             ### Data should be stored encoded / as-is?
800 114 100       363 if ( !$self->decode_bodies ) {
801 7         21 $decoder = new MIME::Decoder 'binary';
802 7         45 $encoding = 'binary';
803             }
804              
805             ### If desired, sidetrack to troll for UUENCODE:
806 114         383 $self->debug("extract uuencode? ", $self->extract_uuencode);
807 114         371 $self->debug("encoding? ", $encoding);
808 114         451 $self->debug("effective type? ", $ent->effective_type);
809              
810 114 50 66     328 if ($self->extract_uuencode and
      66        
811             ($encoding =~ /^(7bit|8bit|binary)\Z/) and
812             ($ent->effective_type =~
813             m{^(?:text/plain|application/mac-binhex40|application/mac-binhex)\Z})) {
814             ### Hunt for it:
815 3         5 my $uu_ent = eval { $self->hunt_for_uuencode($ENCODED, $ent) };
  3         11  
816 3 100       10 if ($uu_ent) { ### snark
817 2         14 %$ent = %$uu_ent;
818 2         24 return 1;
819             }
820             else { ### boojum
821 1         5 $self->whine("while hunting for uuencode: $@");
822 1 50       5 $ENCODED->seek(0,0) or die "$ME: can't seek: $!";
823             }
824             }
825              
826             ### Open a new bodyhandle for outputting the data:
827 112 50       375 my $body = $self->new_body_for($head) or die "$ME: no body"; # gotta die
828 112 100 50     394 $body->binmode(1) or die "$ME: can't set to binmode: $!"
      100        
829             unless textual_type($ent->effective_type) or !$self->decode_bodies;
830 112 100       389 $body->is_encoded(1) if !$self->decode_bodies;
831              
832             ### Decode and save the body (using the decoder):
833 112 50       509 my $DECODED = $body->open("w") or die "$ME: body not opened: $!";
834 112         1686 eval { $decoder->decode($ENCODED, $DECODED); };
  112         671  
835 112 50       265 $@ and $self->error($@);
836 112 50       436 $DECODED->close or die "$ME: can't close: $!";
837              
838             ### Success! Remember where we put stuff:
839 112         5638 $ent->bodyhandle($body);
840              
841             ### Done!
842 112         833 1;
843             }
844              
845             #------------------------------
846             #
847             # hunt_for_uuencode ENCODED, ENTITY
848             #
849             # I
850             # Try to detect and dispatch embedded uuencode as a fake multipart message.
851             # Returns new entity or undef.
852             #
853             sub hunt_for_uuencode {
854 3     3 0 6 my ($self, $ENCODED, $ent) = @_;
855 3         5 my ($good, $how_encoded);
856 3         5 local $_;
857 3         10 $self->debug("sniffing around for UUENCODE");
858              
859             ### Heuristic:
860 3 50       11 $ENCODED->seek(0,0) or die "$ME: can't seek: $!";
861 3         92 while (defined($_ = $ENCODED->getline)) {
862 18 100       476 if ($good = /^begin [0-7]{3}/) {
863 2         4 $how_encoded = 'uu';
864 2         3 last;
865             }
866 16 50       405 if ($good = /^\(This file must be converted with/i) {
867 0         0 $how_encoded = 'binhex';
868 0         0 last;
869             }
870             }
871 3 100       33 $good or do { $self->debug("no one made the cut"); return 0 };
  1         3  
  1         3  
872              
873             # If a decoder doesn't exist for this type, forget it!
874 2 50       13 my $decoder = MIME::Decoder->new(($how_encoded eq 'uu')?'x-uuencode'
875             :'binhex');
876 2 50       8 unless (defined($decoder)) {
877 0         0 $self->debug("No decoder for $how_encoded attachments");
878 0         0 return 0;
879             }
880              
881             ### New entity:
882 2         11 my $top_ent = $ent->dup; ### no data yet
883 2         8 $top_ent->make_multipart;
884 2         2 my @parts;
885              
886             ### Made the first cut; on to the real stuff:
887 2 50       10 $ENCODED->seek(0,0) or die "$ME: can't seek: $!";
888 2         25 $self->whine("Found a $how_encoded attachment");
889 2         5 my $pre;
890 2         2 while (1) {
891 6         13 my $bin_data = '';
892              
893             ### Try next part:
894 6         32 my $out = IO::File->new(\$bin_data, '>:');
895 6 100       300 eval { $decoder->decode($ENCODED, $out) }; last if $@;
  6         31  
  6         17  
896 4         17 my $preamble = $decoder->last_preamble;
897 4         14 my $filename = $decoder->last_filename;
898 4         13 my $mode = $decoder->last_mode;
899              
900             ### Get probable type:
901 4         10 my $type = 'application/octet-stream';
902 4   50     17 my ($ext) = $filename =~ /\.(\w+)\Z/; $ext = lc($ext || '');
  4         23  
903 4 50       23 if ($ext =~ /^(gif|jpe?g|xbm|xpm|png)\Z/) { $type = "image/$1" }
  4         11  
904              
905             ### If we got our first preamble, create the text portion:
906 4 100 66     45 if (@$preamble and
      66        
907             (grep /\S/, @$preamble) and
908             !@parts) {
909 2         9 my $txt_ent = $self->interface('ENTITY_CLASS')->new;
910              
911 2         10 MIME::Entity->build(Type => "text/plain",
912             Data => "");
913 2         6 $txt_ent->bodyhandle($self->new_body_for($txt_ent->head));
914 2 50       7 my $io = $txt_ent->bodyhandle->open("w") or die "$ME: can't create: $!";
915 2 50       8 $io->print(@$preamble) or die "$ME: can't print: $!";
916 2 50       29 $io->close or die "$ME: can't close: $!";
917 2         180 push @parts, $txt_ent;
918             }
919              
920             ### Create the attachment:
921             ### We use the x-unix-mode convention from "dtmail 1.2.1 SunOS 5.6".
922 4         7 if (1) {
923 4         29 my $bin_ent = MIME::Entity->build(Type=>$type,
924             Filename=>$filename,
925             Data=>"");
926 4         11 $bin_ent->head->mime_attr('Content-type.x-unix-mode' => "0$mode");
927 4         62 $bin_ent->bodyhandle($self->new_body_for($bin_ent->head));
928 4 50       13 $bin_ent->bodyhandle->binmode(1) or die "$ME: can't set to binmode: $!";
929 4 50       16 my $io = $bin_ent->bodyhandle->open("w") or die "$ME: can't create: $!";
930 4 50       14 $io->print($bin_data) or die "$ME: can't print: $!";
931 4 50       59 $io->close or die "$ME: can't close: $!";
932 4         408 push @parts, $bin_ent;
933             }
934             }
935              
936             ### Did we get anything?
937 2 50       9 @parts or return undef;
938             ### Set the parts and a nice preamble:
939 2         11 $top_ent->parts(\@parts);
940 2         24 $top_ent->preamble
941             (["The following is a multipart MIME message which was extracted\n",
942             "from a $how_encoded-encoded message.\n"]);
943 2         28 $top_ent;
944             }
945              
946             #------------------------------
947             #
948             # process_message IN, READER, ENTITY
949             #
950             # I
951             # Process the singlepart body, and return true.
952             # Fatal exception on failure.
953             # Invoked by process_part().
954             #
955             sub process_message {
956 13     13 0 30 my ($self, $in, $rdr, $ent) = @_;
957 13         43 my $head = $ent->head;
958              
959 13         40 $self->debug("process_message");
960              
961             ### Verify the encoding restrictions:
962 13         44 my $encoding = $head->mime_encoding;
963 13 50       81 if ($encoding !~ /^(7bit|8bit|binary)$/) {
964 0         0 $self->error("illegal encoding [$encoding] for MIME type ".
965             $head->mime_type."\n");
966 0         0 $encoding = 'binary';
967             }
968              
969             ### Parse the message:
970 13         52 my $msg = $self->process_part($in, $rdr);
971 13 50       40 return undef unless defined($msg);
972              
973             ### How to handle nested messages?
974 13 100       43 if ($self->extract_nested_messages eq 'REPLACE') {
975 1         6 %$ent = %$msg; ### shallow replace
976 1         4 %$msg = ();
977             }
978             else { ### "NEST" or generic 1:
979 12         45 $ent->bodyhandle(undef);
980 12         44 $ent->add_part($msg);
981             }
982 13         57 1;
983             }
984              
985             #------------------------------
986             #
987             # process_part IN, READER, [OPTSHASH...]
988             #
989             # I
990             # The real back-end engine.
991             # See the documentation up top for the overview of the algorithm.
992             # The OPTSHASH can contain:
993             #
994             # Retype => retype this part to the given content-type
995             #
996             # Return the entity.
997             # Fatal exception on failure. Returns undef if message to complex
998             #
999             sub process_part {
1000 169     169 0 453 my ($self, $in, $rdr, %p) = @_;
1001              
1002 169 50       477 if ($self->{MP5_MaxParts} > 0) {
1003 0         0 $self->{MP5_NumParts}++;
1004 0 0       0 if ($self->{MP5_NumParts} > $self->{MP5_MaxParts}) {
1005             # Return UNDEF if msg too complex
1006 0         0 return undef;
1007             }
1008             }
1009              
1010 169   66     850 $rdr ||= MIME::Parser::Reader->new;
1011             #debug "process_part";
1012 169         421 $self->results->level(+1);
1013              
1014             ### Create a new entity:
1015 169         413 my $ent = $self->interface('ENTITY_CLASS')->new;
1016              
1017             ### Parse and add the header:
1018 169         514 my $head = $self->process_header($in, $rdr);
1019 168 100       458 if (not defined $head) {
1020 2         5 $self->debug("bogus empty part");
1021 2         6 $head = $self->interface('HEAD_CLASS')->new;
1022 2         51 $head->mime_type('text/plain');
1023 2         5 $ent->head($head);
1024 2         7 $ent->bodyhandle($self->new_body_for($head));
1025 2 50       6 $ent->bodyhandle->open("w")->close or die "$ME: can't close: $!";
1026 2         29 $self->results->level(-1);
1027 2         5 return $ent;
1028             }
1029 166         523 $ent->head($head);
1030              
1031             ### Tweak the content-type based on context from our parent...
1032             ### For example, multipart/digest messages default to type message/rfc822:
1033 166 100       417 $head->mime_type($p{Retype}) if $p{Retype};
1034              
1035             ### Get the MIME type and subtype:
1036 166         509 my ($type, $subtype) = (split('/', $head->mime_type, -1), '');
1037 166         702 $self->debug("type = $type, subtype = $subtype");
1038              
1039             ### Handle, according to the MIME type:
1040 166 100 66     1546 if ($type eq 'multipart') {
    100 33        
1041 39 50       161 return undef unless defined($self->process_multipart($in, $rdr, $ent));
1042             }
1043             elsif (("$type/$subtype" eq "message/rfc822" ||
1044             "$type/$subtype" eq "message/external-body" ||
1045             ("$type/$subtype" eq "message/partial" && defined($head->mime_attr("content-type.number")) && $head->mime_attr("content-type.number") == 1)) &&
1046             $self->extract_nested_messages) {
1047 13         34 $self->debug("attempting to process a nested message");
1048 13 50       57 return undef unless defined($self->process_message($in, $rdr, $ent));
1049             }
1050             else {
1051 114         370 $self->process_singlepart($in, $rdr, $ent);
1052             }
1053              
1054             ### Done (we hope!):
1055 166         27681 $self->results->level(-1);
1056 166         1083 return $ent;
1057             }
1058              
1059              
1060              
1061             =back
1062              
1063             =head2 Parsing an input source
1064              
1065             =over 4
1066              
1067             =cut
1068              
1069             #------------------------------
1070              
1071             =item parse_data DATA
1072              
1073             I
1074             Parse a MIME message that's already in core. This internally creates an "in
1075             memory" filehandle on a Perl scalar value using PerlIO
1076              
1077             You may supply the DATA in any of a number of ways...
1078              
1079             =over 4
1080              
1081             =item *
1082              
1083             B which holds the message. A reference to this scalar will be used
1084             internally.
1085              
1086             =item *
1087              
1088             B which holds the message. This reference will be used
1089             internally.
1090              
1091             =item *
1092              
1093             B
1094              
1095             B The array is internally concatenated into a
1096             temporary string, and a reference to the new string is used internally.
1097              
1098             It is much more efficient to pass in a scalar reference, so please consider
1099             refactoring your code to use that interface instead. If you absolutely MUST
1100             pass an array, you may be better off using IO::ScalarArray in the calling code
1101             to generate a filehandle, and passing that filehandle to I
1102              
1103             =back
1104              
1105             Returns the parsed MIME::Entity on success.
1106              
1107             =cut
1108              
1109             sub parse_data {
1110 11     11 1 747 my ($self, $data) = @_;
1111              
1112             ### Get data as a scalar:
1113 11         18 my $io;
1114              
1115 11 100       49 if (! ref $data ) {
    100          
    50          
1116 9         73 $io = IO::File->new(\$data, '<:');
1117             } elsif( ref $data eq 'SCALAR' ) {
1118 1         7 $io = IO::File->new($data, '<:');
1119             } elsif( ref $data eq 'ARRAY' ) {
1120             # Passing arrays is deprecated now that we've nuked IO::ScalarArray
1121             # but for backwards compatibility we still support it by joining the
1122             # array lines to a scalar and doing scalar IO on it.
1123 1         3 my $tmp_data = join('', @$data);
1124 1         7 $io = IO::File->new(\$tmp_data, '<:');
1125             } else {
1126 0         0 croak "parse_data: wrong argument ref type: ", ref($data);
1127             }
1128              
1129             ### Parse!
1130 11         6062 return $self->parse($io);
1131             }
1132              
1133             #------------------------------
1134              
1135             =item parse INSTREAM
1136              
1137             I
1138             Takes a MIME-stream and splits it into its component entities.
1139              
1140             The INSTREAM can be given as an IO::File, a globref filehandle (like
1141             C<\*STDIN>), or as I blessed object conforming to the IO::
1142             interface (which minimally implements getline() and read()).
1143              
1144             Returns the parsed MIME::Entity on success.
1145             Throws exception on failure. If the message contained too many
1146             parts (as set by I), returns undef.
1147              
1148             =cut
1149              
1150             sub parse {
1151 53     53 1 2747 my $self = shift;
1152 53         95 my $in = shift;
1153 53         93 my $entity;
1154 53         246 local $/ = "\n"; ### just to be safe
1155              
1156 53         152 local $\ = undef; # CPAN ticket #71041
1157 53         265 $self->init_parse;
1158 53         239 $entity = $self->process_part($in, undef); ### parse!
1159              
1160 52         375 $entity;
1161             }
1162              
1163             ### Backcompat:
1164             sub read {
1165 0     0 1 0 shift->parse(@_);
1166             }
1167             sub parse_FH {
1168 0     0 0 0 shift->parse(@_);
1169             }
1170              
1171             #------------------------------
1172              
1173             =item parse_open EXPR
1174              
1175             I
1176             Convenience front-end onto C.
1177             Simply give this method any expression that may be sent as the second
1178             argument to open() to open a filehandle for reading.
1179              
1180             Returns the parsed MIME::Entity on success.
1181             Throws exception on failure.
1182              
1183             =cut
1184              
1185             sub parse_open {
1186 30     30 1 6520 my ($self, $expr) = @_;
1187 30         45 my $ent;
1188              
1189 30 50       236 my $io = IO::File->new($expr) or die "$ME: couldn't open $expr: $!";
1190 30         2484 $ent = $self->parse($io);
1191 29 50       113 $io->close or die "$ME: can't close: $!";
1192 29         511 $ent;
1193             }
1194              
1195             ### Backcompat:
1196             sub parse_in {
1197 0     0 0 0 usage "parse_in() is now parse_open()";
1198 0         0 shift->parse_open(@_);
1199             }
1200              
1201             #------------------------------
1202              
1203             =item parse_two HEADFILE, BODYFILE
1204              
1205             I
1206             Convenience front-end onto C, intended for programs
1207             running under mail-handlers like B, which splits the incoming
1208             mail message into a header file and a body file.
1209             Simply give this method the paths to the respective files.
1210              
1211             B it is assumed that, once the files are cat'ed together,
1212             there will be a blank line separating the head part and the body part.
1213              
1214             B new implementation slurps files into line array
1215             for portability, instead of using 'cat'. May be an issue if
1216             your messages are large.
1217              
1218             Returns the parsed MIME::Entity on success.
1219             Throws exception on failure.
1220              
1221             =cut
1222              
1223             sub parse_two {
1224 1     1 1 7 my ($self, $headfile, $bodyfile) = @_;
1225 1         2 my $data;
1226 1         3 foreach ($headfile, $bodyfile) {
1227 2 50       104 open IN, "<$_" or die "$ME: open $_: $!";
1228 2         3 $data .= do { local $/; };
  2         7  
  2         31  
1229 2 50       47 close IN or die "$ME: can't close: $!";
1230             }
1231 1         4 return $self->parse_data($data);
1232             }
1233              
1234             =back
1235              
1236             =cut
1237              
1238              
1239              
1240              
1241             #------------------------------------------------------------
1242              
1243             =head2 Specifying output destination
1244              
1245             B in 5.212 and before, this was done by methods
1246             of MIME::Parser. However, since many users have requested
1247             fine-tuned control over how this is done, the logic has been split
1248             off from the parser into its own class, MIME::Parser::Filer
1249             Every MIME::Parser maintains an instance of a MIME::Parser::Filer
1250             subclass to manage disk output (see L for details.)
1251              
1252             The benefit to this is that the MIME::Parser code won't be
1253             confounded with a lot of garbage related to disk output.
1254             The drawback is that the way you override the default behavior
1255             will change.
1256              
1257             For now, all the normal public-interface methods are still provided,
1258             but many are only stubs which create or delegate to the underlying
1259             MIME::Parser::Filer object.
1260              
1261             =over 4
1262              
1263             =cut
1264              
1265             #------------------------------
1266              
1267             =item filer [FILER]
1268              
1269             I
1270             Get/set the FILER object used to manage the output of files to disk.
1271             This will be some subclass of L.
1272              
1273             =cut
1274              
1275             sub filer {
1276 313     313 1 17410 my ($self, $filer) = @_;
1277 313 100       768 if (@_ > 1) {
1278 74         186 $self->{MP5_Filer} = $filer;
1279 74         477 $filer->results($self->results); ### but we still need in init_parse
1280             }
1281 313         1377 $self->{MP5_Filer};
1282             }
1283              
1284             #------------------------------
1285              
1286             =item output_dir DIRECTORY
1287              
1288             I
1289             Causes messages to be filed directly into the given DIRECTORY.
1290             It does this by setting the underlying L to
1291             a new instance of MIME::Parser::FileInto, and passing the arguments
1292             into that class' new() method.
1293              
1294             B Since this method replaces the underlying
1295             filer, you must invoke it I doing changing any attributes
1296             of the filer, like the output prefix; otherwise those changes
1297             will be lost.
1298              
1299             =cut
1300              
1301             sub output_dir {
1302 78     78 1 274 my ($self, @init) = @_;
1303 78 100       215 if (@_ > 1) {
1304 72         548 $self->filer(MIME::Parser::FileInto->new(@init));
1305             }
1306             else {
1307 6         21 &MIME::Tools::whine("0-arg form of output_dir is deprecated.");
1308 6         16 return $self->filer->output_dir;
1309             }
1310             }
1311              
1312             #------------------------------
1313              
1314             =item output_under BASEDIR, OPTS...
1315              
1316             I
1317             Causes messages to be filed directly into subdirectories of the given
1318             BASEDIR, one subdirectory per message. It does this by setting the
1319             underlying L to a new instance of MIME::Parser::FileUnder,
1320             and passing the arguments into that class' new() method.
1321              
1322             B Since this method replaces the underlying
1323             filer, you must invoke it I doing changing any attributes
1324             of the filer, like the output prefix; otherwise those changes
1325             will be lost.
1326              
1327             =cut
1328              
1329             sub output_under {
1330 2     2 1 12 my ($self, @init) = @_;
1331 2 50       7 if (@_ > 1) {
1332 2         22 $self->filer(MIME::Parser::FileUnder->new(@init));
1333             }
1334             else {
1335 0         0 &MIME::Tools::whine("0-arg form of output_under is deprecated.");
1336 0         0 return $self->filer->output_dir;
1337             }
1338             }
1339              
1340             #------------------------------
1341              
1342             =item output_path HEAD
1343              
1344             I
1345             Given a MIME head for a file to be extracted, come up with a good
1346             output pathname for the extracted file.
1347             Identical to the preferred form:
1348              
1349             $parser->filer->output_path(...args...);
1350              
1351             We just delegate this to the underlying L object.
1352              
1353             =cut
1354              
1355             sub output_path {
1356 87     87 1 128 my $self = shift;
1357             ### We use it, so don't warn!
1358             ### &MIME::Tools::whine("output_path deprecated in MIME::Parser");
1359 87         233 $self->filer->output_path(@_);
1360             }
1361              
1362             #------------------------------
1363              
1364             =item output_prefix [PREFIX]
1365              
1366             I
1367             Get/set the short string that all filenames for extracted body-parts
1368             will begin with (assuming that there is no better "recommended filename").
1369             Identical to the preferred form:
1370              
1371             $parser->filer->output_prefix(...args...);
1372              
1373             We just delegate this to the underlying L object.
1374              
1375             =cut
1376              
1377             sub output_prefix {
1378 0     0 1 0 my $self = shift;
1379 0         0 &MIME::Tools::whine("output_prefix deprecated in MIME::Parser");
1380 0         0 $self->filer->output_prefix(@_);
1381             }
1382              
1383             #------------------------------
1384              
1385             =item evil_filename NAME
1386              
1387             I
1388             Identical to the preferred form:
1389              
1390             $parser->filer->evil_filename(...args...);
1391              
1392             We just delegate this to the underlying L object.
1393              
1394             =cut
1395              
1396             sub evil_filename {
1397 2     2 1 16 my $self = shift;
1398 2         7 &MIME::Tools::whine("evil_filename deprecated in MIME::Parser");
1399 2         7 $self->filer->evil_filename(@_);
1400             }
1401              
1402             #------------------------------
1403              
1404             =item max_parts NUM
1405              
1406             I
1407             Limits the number of MIME parts we will parse.
1408              
1409             Normally, instances of this class parse a message to the bitter end.
1410             Messages with many MIME parts can cause excessive memory consumption.
1411             If you invoke this method, parsing will abort with a die() if a message
1412             contains more than NUM parts.
1413              
1414             If NUM is set to -1 (the default), then no maximum limit is enforced.
1415              
1416             With no argument, returns the current setting as an integer
1417              
1418             =cut
1419              
1420             sub max_parts {
1421 0     0 1 0 my($self, $num) = @_;
1422 0 0       0 if (@_ > 1) {
1423 0         0 $self->{MP5_MaxParts} = $num;
1424             }
1425 0         0 return $self->{MP5_MaxParts};
1426             }
1427              
1428             #------------------------------
1429              
1430             =item output_to_core YESNO
1431              
1432             I
1433             Normally, instances of this class output all their decoded body
1434             data to disk files (via MIME::Body::File). However, you can change
1435             this behaviour by invoking this method before parsing:
1436              
1437             If YESNO is false (the default), then all body data goes
1438             to disk files.
1439              
1440             If YESNO is true, then all body data goes to in-core data structures
1441             This is a little risky (what if someone emails you an MPEG or a tar
1442             file, hmmm?) but people seem to want this bit of noose-shaped rope,
1443             so I'm providing it.
1444             Note that setting this attribute true I mean that parser-internal
1445             temporary files are avoided! Use L for that.
1446              
1447             With no argument, returns the current setting as a boolean.
1448              
1449             =cut
1450              
1451             sub output_to_core {
1452 161     161 1 4901 my ($self, $yesno) = @_;
1453 161 100       562 if (@_ > 1) {
1454 41 100 100     226 $yesno = 0 if ($yesno and $yesno eq 'NONE');
1455 41         108 $self->{MP5_FilerToCore} = $yesno;
1456             }
1457 161         515 $self->{MP5_FilerToCore};
1458             }
1459              
1460              
1461             =item tmp_recycling
1462              
1463             I
1464              
1465             This method is a no-op to preserve the pre-5.421 API.
1466              
1467             The tmp_recycling() feature was removed in 5.421 because it had never actually
1468             worked. Please update your code to stop using it.
1469              
1470             =cut
1471              
1472             sub tmp_recycling
1473             {
1474 1     1 1 264 return;
1475             }
1476              
1477              
1478              
1479             #------------------------------
1480              
1481             =item tmp_to_core [YESNO]
1482              
1483             I
1484             Should L create real temp files, or
1485             use fake in-core ones? Normally we allow the creation of temporary
1486             disk files, since this allows us to handle huge attachments even when
1487             core is limited.
1488              
1489             If YESNO is true, we implement new_tmpfile() via in-core handles.
1490             If YESNO is false (the default), we use real tmpfiles.
1491             With no argument, just returns the current setting.
1492              
1493             =cut
1494              
1495             sub tmp_to_core {
1496 0     0 1 0 my ($self, $yesno) = @_;
1497 0 0       0 $self->{MP5_TmpToCore} = $yesno if (@_ > 1);
1498 0         0 $self->{MP5_TmpToCore};
1499             }
1500              
1501             #------------------------------
1502              
1503             =item use_inner_files [YESNO]
1504              
1505             I.
1506              
1507             I
1508              
1509             MIME::Parser no longer supports IO::InnerFile, but this method is retained for
1510             backwards compatibility. It does nothing.
1511              
1512             The original reasoning for IO::InnerFile was that inner files were faster than
1513             "in-core" temp files. At the time, the "in-core" tempfile support was
1514             implemented with IO::Scalar from the IO-Stringy distribution, which used the
1515             tie() interface to wrap a scalar with the appropriate IO::Handle operations.
1516             The penalty for this was fairly hefty, and IO::InnerFile actually was faster.
1517              
1518             Nowadays, MIME::Parser uses Perl's built in ability to open a filehandle on an
1519             in-memory scalar variable via PerlIO. Benchmarking shows that IO::InnerFile is
1520             slightly slower than using in-memory temporary files, and is slightly faster
1521             than on-disk temporary files. Both measurements are within a few percent of
1522             each other. Since there's no real benefit, and since the IO::InnerFile abuse
1523             was fairly hairy and evil ("writes" to it were faked by extending the size of
1524             the inner file with the assumption that the only data you'd ever ->print() to
1525             it would be the line from the "outer" file, for example) it's been removed.
1526              
1527             =cut
1528              
1529             sub use_inner_files {
1530 0     0 1 0 return 0;
1531             }
1532              
1533             =back
1534              
1535             =cut
1536              
1537              
1538             #------------------------------------------------------------
1539              
1540             =head2 Specifying classes to be instantiated
1541              
1542             =over 4
1543              
1544             =cut
1545              
1546             #------------------------------
1547              
1548             =item interface ROLE,[VALUE]
1549              
1550             I
1551             During parsing, the parser normally creates instances of certain classes,
1552             like MIME::Entity. However, you may want to create a parser subclass
1553             that uses your own experimental head, entity, etc. classes (for example,
1554             your "head" class may provide some additional MIME-field-oriented methods).
1555              
1556             If so, then this is the method that your subclass should invoke during
1557             init. Use it like this:
1558              
1559             package MyParser;
1560             @ISA = qw(MIME::Parser);
1561             ...
1562             sub init {
1563             my $self = shift;
1564             $self->SUPER::init(@_); ### do my parent's init
1565             $self->interface(ENTITY_CLASS => 'MIME::MyEntity');
1566             $self->interface(HEAD_CLASS => 'MIME::MyHead');
1567             $self; ### return
1568             }
1569              
1570             With no VALUE, returns the VALUE currently associated with that ROLE.
1571              
1572             =cut
1573              
1574             sub interface {
1575 426     426 1 709 my ($self, $role, $value) = @_;
1576 426 100       1059 $self->{MP5_Interface}{$role} = $value if (defined($value));
1577 426         1903 $self->{MP5_Interface}{$role};
1578             }
1579              
1580             #------------------------------
1581              
1582             =item new_body_for HEAD
1583              
1584             I
1585             Based on the HEAD of a part we are parsing, return a new
1586             body object (any desirable subclass of MIME::Body) for
1587             receiving that part's data.
1588              
1589             If you set the C option to false before parsing
1590             (the default), then we call C and create a
1591             new MIME::Body::File on that filename.
1592              
1593             If you set the C option to true before parsing,
1594             then you get a MIME::Body::InCore instead.
1595              
1596             If you want the parser to do something else entirely, you can
1597             override this method in a subclass.
1598              
1599             =cut
1600              
1601             sub new_body_for {
1602 120     120 1 195 my ($self, $head) = @_;
1603              
1604 120 100       338 if ($self->output_to_core) {
1605 27         73 $self->debug("outputting body to core");
1606 27         293 return (new MIME::Body::InCore);
1607             }
1608             else {
1609 93         294 my $outpath = $self->output_path($head);
1610 93         433 $self->debug("outputting body to disk file: $outpath");
1611 93         241 $self->filer->purgeable($outpath); ### we plan to use it
1612 93         750 return (new MIME::Body::File $outpath);
1613             }
1614             }
1615              
1616             #------------------------------
1617              
1618             =pod
1619              
1620             =back
1621              
1622             =head2 Temporary File Creation
1623              
1624             =over
1625              
1626             =item tmp_dir DIRECTORY
1627              
1628             I
1629             Causes any temporary files created by this parser to be created in the
1630             given DIRECTORY.
1631              
1632             If called without arguments, returns current value.
1633              
1634             The default value is undef, which will cause new_tmpfile() to use the
1635             system default temporary directory.
1636              
1637             =cut
1638              
1639             sub tmp_dir
1640             {
1641 98     98 1 149 my ($self, $dirname) = @_;
1642 98 50       213 if ( $dirname ) {
1643 0         0 $self->{MP5_TmpDir} = $dirname;
1644             }
1645              
1646 98         270 return $self->{MP5_TmpDir};
1647             }
1648              
1649             =item new_tmpfile
1650              
1651             I
1652             Return an IO handle to be used to hold temporary data during a parse.
1653              
1654             The default uses MIME::Tools::tmpopen() to create a new temporary file,
1655             unless L dictates otherwise, but you can
1656             override this. You shouldn't need to.
1657              
1658             The location for temporary files can be changed on a per-parser basis
1659             with L.
1660              
1661             If you do override this, make certain that the object you return is
1662             set for binmode(), and is able to handle the following methods:
1663              
1664             read(BUF, NBYTES)
1665             getline()
1666             getlines()
1667             print(@ARGS)
1668             flush()
1669             seek(0, 0)
1670              
1671             Fatal exception if the stream could not be established.
1672              
1673             =cut
1674              
1675             sub new_tmpfile {
1676 98     98 1 3183 my ($self) = @_;
1677              
1678 98         136 my $io;
1679 98 100       276 if ($self->{MP5_TmpToCore}) {
1680 1         2 my $var;
1681 1 50       7 $io = IO::File->new(\$var, '+>:') or die "$ME: Can't open in-core tmpfile: $!";
1682             } else {
1683 97         154 my $args = {};
1684 97 100       274 if( $self->tmp_dir ) {
1685 1         4 $args->{DIR} = $self->tmp_dir;
1686             }
1687 97 50       351 $io = tmpopen( $args ) or die "$ME: can't open tmpfile: $!\n";
1688 97 50       277920 binmode($io) or die "$ME: can't set to binmode: $!";
1689             }
1690 98         263 return $io;
1691             }
1692              
1693             =back
1694              
1695             =cut
1696              
1697              
1698              
1699              
1700              
1701              
1702             #------------------------------------------------------------
1703              
1704             =head2 Parse results and error recovery
1705              
1706             =over 4
1707              
1708             =cut
1709              
1710             #------------------------------
1711              
1712             =item last_error
1713              
1714             I
1715             Return the error (if any) that we ignored in the last parse.
1716              
1717             =cut
1718              
1719             sub last_error {
1720 0     0 1 0 join '', shift->results->errors;
1721             }
1722              
1723              
1724             #------------------------------
1725              
1726             =item last_head
1727              
1728             I
1729             Return the top-level MIME header of the last stream we attempted to parse.
1730             This is useful for replying to people who sent us bad MIME messages.
1731              
1732             ### Parse an input stream:
1733             eval { $entity = $parser->parse(\*STDIN) };
1734             if (!$entity) { ### parse failed!
1735             my $decapitated = $parser->last_head;
1736             ...
1737             }
1738              
1739             =cut
1740              
1741             sub last_head {
1742 0     0 1 0 shift->results->top_head;
1743             }
1744              
1745             #------------------------------
1746              
1747             =item results
1748              
1749             I
1750             Return an object containing lots of info from the last entity parsed.
1751             This will be an instance of class
1752             L.
1753              
1754             =cut
1755              
1756             sub results {
1757 654     654 1 2676 shift->{MP5_Results};
1758             }
1759              
1760              
1761             =back
1762              
1763             =cut
1764              
1765              
1766             #------------------------------
1767             1;
1768             __END__