File Coverage

blib/lib/Text/Original.pm
Criterion Covered Total %
statement 22 22 100.0
branch n/a
condition 2 2 100.0
subroutine 8 8 100.0
pod 3 3 100.0
total 35 35 100.0


line stmt bran cond sub pod time code
1             package Text::Original;
2 1     1   789 use 5.006; use strict; use warnings;
  1     1   3  
  1     1   39  
  1         6  
  1         2  
  1         65  
  1         15  
  1         2  
  1         29  
3 1     1   912 use Memoize;
  1         2176  
  1         223  
4              
5             =head1 NAME
6              
7             Text::Original - Find original, non-quoted text in a message
8              
9             =head1 SYNOPSIS
10              
11             use Text::Original;
12             my $sentence = first_sentence($email->body);
13              
14             =head1 FUNCTIONS
15              
16             =cut
17              
18              
19             our @ISA = qw(Exporter);
20             our %EXPORT_TAGS = ( 'all' => [ qw( first_lines first_paragraph first_sentence) ] );
21             our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
22             our @EXPORT = ( @{ $EXPORT_TAGS{'all'} } );
23              
24             our $VERSION = '1.5';
25              
26             =head2 first_lines
27              
28             first_lines($text, 20);
29              
30             Returns the a number of lines after the first non blank, none quoted
31             line of the body of the email.
32              
33             It will guess at attribution lines and skip them as well.
34              
35             It will return super cited lines. This is the super-citers'
36             fault, not ours.
37              
38             It won't catch all types of attribution lines;
39              
40             It can optionally be passed a number of lines to get.
41              
42             =cut
43              
44             sub first_lines {
45 2     2 1 844 my $text = shift;
46 2   100     13 my $num = shift || 1;
47              
48 2         49 return _significant_signal($text, lines => $num);
49             }
50              
51              
52             =head2 first_paragraph
53              
54             Returns the first original paragraph of the message
55              
56             =cut
57              
58             sub first_paragraph {
59 2     2 1 47 return _significant_signal(shift, para => 1);
60             }
61              
62             =head2 first_sentence
63              
64             Returns the first original sentence of the message
65              
66             =cut
67              
68             sub first_sentence {
69 1     1 1 392 my $text = first_paragraph(shift);
70 1     1   798 $text =~ s/(\p{STerm}) .*/$1/s;
  1         7  
  1         12  
  1         16  
71 1         14 return $text;
72             }
73              
74             # Kudos to Damian Conway for this bit.
75             my $quotechar = qq{[!#%=|:]};
76             my $quotechunk = qq{(?:$quotechar(?![a-z])|[a-z]*>+)};
77             my $quoter = qq{(?:(?i)(?:$quotechunk(?:[ \\t]*$quotechunk)*))};
78              
79             sub _significant_signal {
80             my $text = shift;
81             my %opts = @_;
82              
83             my $return = "";
84             my $lines = 0;
85              
86             # get all the lines from the main part of the body
87             my @lines = split /$/m, $text;
88              
89             # right, find the start of the original content or quoted
90             # content (i.e. skip past the attributation)
91             my $not_started = 1;
92             while (@lines && $not_started) {
93             # next line
94             local $_ = shift @lines;
95             #print "}}$_";
96              
97             # blank lines, euurgh
98             next if /^\s*$/;
99             # quotes (we don't count quoted From's)
100             next if /^\s*>(?!From)/;
101             # Other kinds of quoter:
102             next if /^\s*$quoter/;
103             # skip obvious attribution
104             next if /^\s*On (Mon|Tue|Wed|Thu|Fri|Sat|Sun)/i;
105             next if /\d{4}-?\w{2,3}-?\d{2}.*\d+:\d+:\d+/i; # Looks like a date
106             next if /^\w+(\s\w+)?:$/; # lathos' minimalist attributions. :)
107             next if /^\s*.+=? wrote:/i;
108              
109             # skip signed messages
110             next if /^\s*-----/;
111             next if /^Hash:/;
112              
113             # annoying hi messages (this won't work with i18n)
114             next if /^\s*(?:hello|hi|hey|greetings|salut
115             |good (?:morning|afternoon|day|evening))
116             (?:\W.{0,14})?\s*$/ixs;
117              
118             # snips
119             next if m~\s* # whitespace
120             [<.=-_*+({\[]*? # opening bracket
121             (?:snip|cut|delete|deleted) # snip?
122             [^>}\]]*? # some words?
123             [>.=-_*+)}\]]*? # closing bracket
124             \s*$ # end of the line
125             ~xi;
126              
127             # [.. foo ..] or ...foo.. or so on
128             next if m~\s*\[?\.\..*?\.\.]?\s*$~;
129              
130             # ... or [...]
131             next if m~\s*\[?\.\.\.]?\s*$~;
132              
133             # if we got this far then we've probably got past the
134             # attibutation lines
135             unshift @lines, $_; # undo the shift
136             undef $not_started; # and say we've started.
137             }
138              
139             # okay, let's _try_ to build up some content then
140             foreach (@lines) {
141             # are we at the end of a paragraph?
142             last if (defined $opts{'para'} # paragraph mode?
143             && $opts{'para'}==1
144             && $lines>0 # got some lines aready?
145             && /^\s*$/); # and now we've found a gap?
146              
147             # blank lines, euurgh
148             next if /^\s*$/;
149             # quotes (we don't count quoted From's)
150             next if /^\s*>(?!From)/;
151              
152             # if we got this far then the line was a useful one
153             $lines++;
154              
155             # sort of munged Froms
156             s/^>From/From/;
157             s/^\n+//;
158             $return .= "\n" if $lines>1;
159             $return .= $_;
160             last if (defined $opts{'lines'} && $opts{'lines'}==$lines);
161             }
162             return $return;
163             }
164              
165             memoize('_significant_signal');
166              
167             1;
168              
169             =head1 EXPORTS
170              
171             All of the above.
172              
173             =head1 AUTHOR
174              
175             Simon Wistow and the Mariachi project.
176             See http://siesta.unixbeard.net/
177              
178             Packaged by Simon Cozens
179              
180             Currently maintained by Simon Wistow
181              
182             =head1 COPYRIGHT
183              
184             Copyright 2004 The Siesta Project
185              
186             This library is free software; you can redistribute it and/or modify it
187             under the same terms as Perl itself.