line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WWW::Topica; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
938
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
40
|
|
4
|
1
|
|
|
1
|
|
5
|
use Cwd; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
79
|
|
5
|
1
|
|
|
1
|
|
14
|
use Carp qw(carp croak); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
56
|
|
6
|
1
|
|
|
1
|
|
4
|
use Date::Parse; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
131
|
|
7
|
1
|
|
|
1
|
|
1130
|
use Email::Date; |
|
1
|
|
|
|
|
13865
|
|
|
1
|
|
|
|
|
73
|
|
8
|
1
|
|
|
1
|
|
896
|
use Email::Simple; |
|
1
|
|
|
|
|
5823
|
|
|
1
|
|
|
|
|
34
|
|
9
|
1
|
|
|
1
|
|
11
|
use Email::Simple::Creator; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
22
|
|
10
|
1
|
|
|
1
|
|
1062
|
use HTML::Entities; |
|
1
|
|
|
|
|
6192
|
|
|
1
|
|
|
|
|
108
|
|
11
|
1
|
|
|
1
|
|
1055
|
use HTML::Scrubber; |
|
1
|
|
|
|
|
2394
|
|
|
1
|
|
|
|
|
34
|
|
12
|
1
|
|
|
1
|
|
1180
|
use LWP::UserAgent; |
|
1
|
|
|
|
|
63360
|
|
|
1
|
|
|
|
|
29
|
|
13
|
1
|
|
|
1
|
|
8
|
use URI; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
22
|
|
14
|
|
|
|
|
|
|
|
15
|
1
|
|
|
1
|
|
5
|
use vars qw($VERSION); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
52
|
|
16
|
|
|
|
|
|
|
|
17
|
1
|
|
|
1
|
|
601
|
use WWW::Topica::Index; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
22
|
|
18
|
1
|
|
|
1
|
|
497
|
use WWW::Topica::Mail; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
22
|
|
19
|
1
|
|
|
1
|
|
500
|
use WWW::Topica::Reply; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
1869
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
$VERSION = '0.6'; |
22
|
|
|
|
|
|
|
my $USER_AGENT = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=pod |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
=head1 NAME |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
WWW::Topica - read emails from a Topica mailing list |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 SYNOPSIS |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
my $topica = WWW::Topica->new( list => 'mylist', login => 'mylogin', password => 'mypass' ); |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
while (my $mail = $topica->mail) { |
37
|
|
|
|
|
|
|
Email::LocalDelivery->deliver($mail, 'mylist.mbox'); |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 DESCRIPTION |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
This module screen scrapes the Topica website and fetches back RFC822 text representations |
43
|
|
|
|
|
|
|
of all the mails posted to a given list. Where possible it fills in the from, to and date |
44
|
|
|
|
|
|
|
fields. It should be noted that in some cases it's impossible to get both the sender name |
45
|
|
|
|
|
|
|
and their email address. |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head1 METHODS |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=cut |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head2 new |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
Takes three options, the list name, your login account and your password; |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
You can also pass in C and C. C will print out |
56
|
|
|
|
|
|
|
various debugging messages whereas C will use local files for |
57
|
|
|
|
|
|
|
testing. C automatically sets C to C<1> unless debug is |
58
|
|
|
|
|
|
|
explicitly set to C<0>. |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Furthermore if you pass in a C option the parsing will start from |
62
|
|
|
|
|
|
|
that offset. A C lets you set an upper bound. |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=cut |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
sub new { |
67
|
1
|
|
|
1
|
1
|
743
|
my $class = shift; |
68
|
1
|
|
|
|
|
3
|
my %opts = @_; |
69
|
|
|
|
|
|
|
|
70
|
1
|
50
|
|
|
|
5
|
die "You must pass a list\n" unless defined $opts{list}; |
71
|
|
|
|
|
|
|
#die "You must pass an email\n" unless defined $opts{email}; |
72
|
|
|
|
|
|
|
#die "You must pass a password\n" unless defined $opts{password}; |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
|
75
|
1
|
|
50
|
|
|
7
|
$opts{_next} = $opts{first} || 0; |
76
|
1
|
50
|
33
|
|
|
7
|
$opts{debug} = $opts{local} if exists $opts{local} and not exists $opts{local}; |
77
|
|
|
|
|
|
|
|
78
|
1
|
|
|
|
|
8
|
$opts{scrubber} = HTML::Scrubber->new( allow => [] ); |
79
|
|
|
|
|
|
|
|
80
|
1
|
|
|
|
|
145
|
return bless \%opts, $class; |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=head2 mail |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
Returns a mail at a time |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Logs in automatically. |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=cut |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
sub mail { |
94
|
301
|
|
|
301
|
1
|
3765
|
my $self = shift; |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# first time ever |
97
|
301
|
100
|
|
|
|
1609
|
unless ($self->{_index}) |
98
|
|
|
|
|
|
|
{ |
99
|
1
|
|
|
|
|
20
|
$self->login; |
100
|
1
|
50
|
|
|
|
4
|
print STDERR "Beginning to collect mails\n" if $self->{debug}; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# relog in after an hour |
104
|
|
|
|
|
|
|
# TODO: untested |
105
|
301
|
50
|
|
|
|
2551
|
unless ($self->{local}) { |
106
|
0
|
|
|
|
|
0
|
my $time_diff = time() - $self->{_logged_in}; |
107
|
0
|
0
|
|
|
|
0
|
$self->login() if ($time_diff>60*60); |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
INDEX: |
112
|
|
|
|
|
|
|
# need to get new message ids |
113
|
301
|
100
|
100
|
|
|
3260
|
unless (defined $self->{_message_ids} && @{$self->{_message_ids}}) |
|
300
|
|
|
|
|
1632
|
|
114
|
|
|
|
|
|
|
{ |
115
|
|
|
|
|
|
|
# all over |
116
|
4
|
100
|
|
|
|
14
|
return undef unless defined $self->{_next}; |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
# the last one we want |
119
|
3
|
50
|
33
|
|
|
15
|
return undef if defined $self->{last} and $self->{_next} >= $self->{last}; |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# end of first page |
122
|
|
|
|
|
|
|
# return undef if $self->{debug} && $self->{_index}; |
123
|
|
|
|
|
|
|
|
124
|
3
|
|
|
|
|
22
|
$self->{_index} = WWW::Topica::Index->new($self->fetch_index($self->{_next})); |
125
|
|
|
|
|
|
|
|
126
|
3
|
50
|
|
|
|
98
|
return undef unless $self->{_index}; |
127
|
|
|
|
|
|
|
|
128
|
3
|
|
|
|
|
13
|
$self->{_next} = $self->{_index}->next(); |
129
|
3
|
|
|
|
|
12
|
$self->{_prev} = $self->{_index}->prev(); |
130
|
3
|
|
|
|
|
16
|
@{$self->{_message_ids}} = $self->{_index}->message_ids; |
|
3
|
|
|
|
|
32
|
|
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
300
|
|
|
|
|
567
|
GET: my $mess_id = shift @{$self->{_message_ids}}; |
|
300
|
|
|
|
|
1299
|
|
135
|
300
|
50
|
|
|
|
878
|
goto INDEX unless defined $mess_id; |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# the mail has some information and also provides a link to the reply if we're logged in... |
138
|
300
|
|
|
|
|
1811
|
my $mail_html = $self->fetch_mail($mess_id); |
139
|
300
|
50
|
|
|
|
24747
|
goto GET unless $mail_html; |
140
|
300
|
|
|
|
|
6391
|
my $mail = WWW::Topica::Mail->new($mail_html, $mess_id); |
141
|
|
|
|
|
|
|
|
142
|
300
|
|
|
|
|
709
|
my $reply; |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# which has other information (like the un-htmled mail and the email address) ... |
145
|
300
|
50
|
|
|
|
1450
|
if ($mail->eto) { |
146
|
300
|
50
|
|
|
|
1010
|
my $reply_html = $self->fetch_reply($mess_id,$mail->eto) if defined $mail->eto; |
147
|
300
|
50
|
|
|
|
14574
|
goto GET unless $reply_html; |
148
|
300
|
|
|
|
|
3891
|
$reply = WWW::Topica::Reply->new($reply_html, $mess_id, $mail->eto); |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
# now build the rfc822 mail string |
152
|
300
|
|
|
|
|
2558
|
return $self->build_rfc822($mail, $reply); |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=head2 login |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
Logs in to Topica and stashes the cookie. |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
Called automatically by the first call to C. |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
Builds the loader automatically. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=cut |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
sub login { |
167
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
168
|
|
|
|
|
|
|
|
169
|
1
|
|
|
|
|
4
|
$self->build_loader; |
170
|
|
|
|
|
|
|
|
171
|
1
|
|
33
|
|
|
10
|
my $anon = !defined $self->{email} || !defined $self->{password}; |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
|
174
|
1
|
50
|
|
|
|
4
|
if ($anon) { |
175
|
0
|
|
|
|
|
0
|
$self->{email} = $self->{password} = 'anonymous'; |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
|
180
|
1
|
50
|
|
|
|
6
|
print STDERR "Logging in using ".$self->{email}."/".$self->{password}."\n" if $self->{debug}; |
181
|
|
|
|
|
|
|
|
182
|
1
|
50
|
|
|
|
5
|
return if $self->{local}; |
183
|
|
|
|
|
|
|
|
184
|
0
|
0
|
|
|
|
0
|
if (!$anon) { |
185
|
0
|
|
|
|
|
0
|
(undef) = $self->fetch_page("http://lists.topica.com/"); |
186
|
0
|
|
|
|
|
0
|
(undef) = $self->fetch_page("http://lists.topica.com/list.html"); |
187
|
0
|
|
|
|
|
0
|
(undef) = $self->fetch_page("http://lists.topica.com/perl/login.pl?email=".$self->{email}."&password=".$self->{password}); |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
# store when we logged in so that we can relog in again after an hour |
195
|
0
|
|
|
|
|
0
|
$self->{_logged_in} = time; |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=head2 fetch_index |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
Retrieve the html of the index page with the given offset. |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=cut |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub fetch_index { |
207
|
3
|
|
|
3
|
1
|
4
|
my $self = shift; |
208
|
3
|
|
|
|
|
6
|
my $offset = shift; |
209
|
3
|
|
|
|
|
5
|
my $list = $self->{list}; |
210
|
|
|
|
|
|
|
|
211
|
3
|
50
|
|
|
|
10
|
print STDERR "Fetching index $offset of list ${list}\n" if $self->{debug}; |
212
|
|
|
|
|
|
|
|
213
|
3
|
|
|
|
|
15
|
my $url = "http://lists.topica.com/lists/${list}/read?sort=d&start=$offset"; |
214
|
|
|
|
|
|
|
|
215
|
3
|
50
|
|
|
|
10
|
if ($self->{local}) { |
216
|
3
|
|
|
|
|
20032
|
$url = "file://".cwd."/t/local_files/"; |
217
|
3
|
100
|
|
|
|
107
|
if (0 == $offset) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
218
|
1
|
|
|
|
|
7
|
$url .= "list_first.html"; |
219
|
|
|
|
|
|
|
} elsif (100 == $offset) { |
220
|
1
|
|
|
|
|
17
|
$url .= "list_middle.html"; |
221
|
|
|
|
|
|
|
} elsif (200 == $offset) { |
222
|
1
|
|
|
|
|
9
|
$url .= "list_last.html"; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
|
227
|
3
|
|
|
|
|
45
|
return $self->fetch_page($url); |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
=head2 fetch_mail |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
Retrieve the html of a the message page with the given id. |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=cut |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
sub fetch_mail { |
239
|
300
|
|
|
300
|
1
|
483
|
my $self = shift; |
240
|
300
|
|
|
|
|
570
|
my $id = shift; |
241
|
300
|
|
|
|
|
638
|
my $list = $self->{list}; |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
|
244
|
300
|
50
|
|
|
|
1147
|
print STDERR "\tFetching mail $id\n" if $self->{debug}; |
245
|
|
|
|
|
|
|
|
246
|
300
|
|
|
|
|
961
|
my $url = "http://lists.topica.com/lists/${list}/read/message.html?mid=$id"; |
247
|
|
|
|
|
|
|
|
248
|
300
|
50
|
|
|
|
869
|
if ($self->{local}) { |
249
|
300
|
|
|
|
|
2353631
|
$url = "file://".cwd."/t/local_files/mail.html"; |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
300
|
|
|
|
|
20480
|
return $self->fetch_page($url); |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
=head2 fetch_reply |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
Retrieve the html of a the reply page with the given id and eto. |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=cut |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
sub fetch_reply { |
264
|
300
|
|
|
300
|
1
|
861
|
my $self = shift; |
265
|
300
|
|
|
|
|
596
|
my $id = shift; |
266
|
300
|
|
|
|
|
418
|
my $eto = shift; |
267
|
300
|
|
|
|
|
909
|
my $list = $self->{list}; |
268
|
|
|
|
|
|
|
|
269
|
300
|
50
|
|
|
|
2225
|
print STDERR "\t\tFetching reply $id - $eto\n" if $self->{debug}; |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
|
272
|
300
|
|
|
|
|
1404
|
my $url = "http://lists.topica.com/lists/${list}/read/post.html?mode=replytosender&mid=$id&eto=$eto"; |
273
|
|
|
|
|
|
|
|
274
|
300
|
50
|
|
|
|
1049
|
if ($self->{local}) { |
275
|
300
|
|
|
|
|
2357623
|
$url = "file://".cwd."/t/local_files/reply.html"; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
300
|
|
|
|
|
12816
|
return $self->fetch_page($url); |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=head2 build_rfc822 |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
Given a C object and a C object |
287
|
|
|
|
|
|
|
build up the text of an RFC822 compliant email. |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
=cut |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
sub build_rfc822 { |
292
|
300
|
|
|
300
|
1
|
704
|
my $self = shift; |
293
|
300
|
|
|
|
|
630
|
my $mail = shift; |
294
|
300
|
|
|
|
|
497
|
my $reply = shift; |
295
|
|
|
|
|
|
|
|
296
|
300
|
|
|
|
|
987
|
my $list = $self->{list}; |
297
|
|
|
|
|
|
|
|
298
|
300
|
|
|
|
|
1519
|
my $mid = $mail->id; |
299
|
|
|
|
|
|
|
|
300
|
300
|
|
|
|
|
1919
|
my $name = decode_entities($mail->from); |
301
|
300
|
|
|
|
|
1561
|
my $email = ""; |
302
|
300
|
50
|
|
|
|
1040
|
if (defined $reply) { |
303
|
300
|
|
|
|
|
1275
|
$email = decode_entities($reply->email); |
304
|
|
|
|
|
|
|
} else { |
305
|
0
|
|
|
|
|
0
|
$email = "${list}\@topica.com"; |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
# we may have been confused and got name and email mixed up |
309
|
300
|
50
|
33
|
|
|
2477
|
if ($name =~ /@/ && $email !~ /@/) { |
310
|
0
|
|
|
|
|
0
|
my $tmp = $name; |
311
|
0
|
|
|
|
|
0
|
$name = $email; |
312
|
0
|
|
|
|
|
0
|
$email = $tmp; |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
# try and build a sane From: line |
316
|
300
|
|
|
|
|
529
|
my $from; |
317
|
300
|
50
|
33
|
|
|
4561
|
if ($name ne $email && $email =~ /@/) { |
|
|
0
|
|
|
|
|
|
318
|
300
|
|
|
|
|
1061
|
$from = "$name <$email>"; |
319
|
|
|
|
|
|
|
} elsif ($email =~ /@/) { |
320
|
0
|
|
|
|
|
0
|
$from = "<$email>"; |
321
|
|
|
|
|
|
|
} else { |
322
|
0
|
|
|
|
|
0
|
$from = "$name <${list}\@topica.com>"; |
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
# get the subject from somewhere - mail preferably because then it |
326
|
|
|
|
|
|
|
# doesn't have the Re: which we don't know whether to strip out or not |
327
|
300
|
|
|
|
|
2550
|
my $subject = $mail->subject; |
328
|
300
|
50
|
33
|
|
|
7000
|
$subject = $reply->subject if defined $reply && $subject =~ /^\s*$/; |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
# remove newlines |
331
|
300
|
|
|
|
|
827
|
$subject =~ s/[\n\r]//gs; |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
# strip out html |
334
|
300
|
|
|
|
|
727
|
$subject =~ s! \s+!!sg; # hack |
335
|
300
|
|
|
|
|
3795
|
$subject = $self->{scrubber}->scrub($subject); |
336
|
|
|
|
|
|
|
|
337
|
300
|
|
|
|
|
56534
|
$subject = decode_entities($subject); |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
# time |
340
|
300
|
|
33
|
|
|
1435
|
my $time = str2time(decode_entities($mail->date)) || gmtime; |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
# message-id |
344
|
300
|
|
|
|
|
214823
|
my $message_id = "${mid}\@lists.topica.com"; |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
# time to build the mail |
348
|
|
|
|
|
|
|
# we should probably use Email::Simple::Creator for this |
349
|
300
|
|
|
|
|
1114
|
my $string = ""; |
350
|
|
|
|
|
|
|
|
351
|
300
|
|
|
|
|
610
|
my $body = ""; |
352
|
300
|
50
|
33
|
|
|
3280
|
if ($reply && defined $reply->body) { |
353
|
300
|
|
|
|
|
1300
|
$body = $reply->body; |
354
|
|
|
|
|
|
|
}else { |
355
|
0
|
|
0
|
|
|
0
|
$body = $self->{scrubber}->scrub($mail->body) || ""; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
300
|
|
|
|
|
5624
|
$string .= "Date: ".format_date($time)."\n"; |
359
|
300
|
|
|
|
|
43139
|
$string .= "To: ${list}\@topica.com\n"; |
360
|
300
|
|
|
|
|
1609
|
$string .= "From: $from\n"; |
361
|
300
|
|
|
|
|
872
|
$string .= "Message-ID: $message_id\n"; |
362
|
300
|
|
|
|
|
1304
|
$string .= "X-TopicaMailUrl: http://lists.topica.com/lists/${list}/read/message.html?mid=${mid}\n"; |
363
|
300
|
50
|
|
|
|
979
|
if ($reply) { |
364
|
300
|
|
|
|
|
1240
|
my $rid = $reply->id; |
365
|
300
|
|
|
|
|
1242
|
my $eto = $reply->eto; |
366
|
300
|
|
|
|
|
2035
|
$string .= "X-TopicaReplyUrl: http://lists.topica.com/lists/${list}/read/post.html?mode=replytosender&mid=${rid}&eto=${eto}\n"; |
367
|
|
|
|
|
|
|
} |
368
|
300
|
|
|
|
|
1188
|
$string .= "Subject: $subject\n"; |
369
|
300
|
|
|
|
|
1701
|
$string .= "\n$body\n\n"; |
370
|
|
|
|
|
|
|
|
371
|
300
|
|
|
|
|
8973
|
return $string; |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
=head2 build_loader |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
Set up the LWP::UserAgent object used to fetch pages. |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
=cut |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
sub build_loader { |
381
|
1
|
|
|
1
|
1
|
1
|
my $self = shift; |
382
|
|
|
|
|
|
|
|
383
|
1
|
|
|
|
|
7
|
my $ua = new LWP::UserAgent( keep_alive => 1, timeout => 30, agent => $USER_AGENT, ); |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
# setting it in the 'new' seems not to work sometimes |
387
|
1
|
|
|
|
|
5472
|
$ua->agent($USER_AGENT); |
388
|
|
|
|
|
|
|
# for some reason this makes stuff work |
389
|
1
|
|
|
|
|
62
|
$ua->max_redirect( 0 ); |
390
|
|
|
|
|
|
|
# cookies! |
391
|
1
|
|
|
|
|
17
|
$ua->cookie_jar( {} ); |
392
|
|
|
|
|
|
|
|
393
|
1
|
|
|
|
|
8287
|
$self->{_ua} = $ua; |
394
|
|
|
|
|
|
|
} |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
=head2 fetch_page |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
Utility function for getting a page with various niceties. |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
=cut |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
sub fetch_page { |
403
|
603
|
|
|
603
|
1
|
5194
|
my $self = shift; |
404
|
603
|
|
|
|
|
1401
|
my $url = shift; |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
# print STDERR "\tfetching $url\n" if $self->{debug}; |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
# make a full set of headers |
409
|
603
|
|
|
|
|
30155
|
my $h = new HTTP::Headers( |
410
|
|
|
|
|
|
|
'Host' => "lists.topica.com", |
411
|
|
|
|
|
|
|
'User-Agent' => $USER_AGENT, |
412
|
|
|
|
|
|
|
'Referer' => $url, |
413
|
|
|
|
|
|
|
'Accept' => 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,video/x-mng,image/png,image/jpeg,image/gif;q=0.2,*/*;q=0.1', |
414
|
|
|
|
|
|
|
'Accept-Language' => 'en-us,en;q=0.5', |
415
|
|
|
|
|
|
|
'Accept-Charset' => 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', |
416
|
|
|
|
|
|
|
#'Accept-Encoding' => 'gzip,deflate', |
417
|
|
|
|
|
|
|
'Keep-Alive' => '300', |
418
|
|
|
|
|
|
|
'Connection' => 'keep-alive', |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
); |
421
|
|
|
|
|
|
|
|
422
|
603
|
|
|
|
|
346058
|
$h->referer("$url"); |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
|
426
|
603
|
|
|
|
|
48825
|
my $request = HTTP::Request->new ( 'GET', $url, $h ); |
427
|
603
|
|
|
|
|
319225
|
my $response; |
428
|
|
|
|
|
|
|
|
429
|
603
|
|
|
|
|
1228
|
my $times = 0; |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
# LWP should be able to do this but seemingly fails sometimes |
432
|
603
|
|
|
|
|
3838
|
while ($times++<3) { |
433
|
603
|
|
|
|
|
17426
|
$response = $self->{_ua}->request($request); |
434
|
603
|
50
|
|
|
|
2681250
|
last if $response->is_success; |
435
|
0
|
0
|
|
|
|
0
|
if ($response->is_redirect) { |
436
|
0
|
|
|
|
|
0
|
$url = URI->new($response->header("Location")); |
437
|
0
|
|
|
|
|
0
|
$url = $url->abs("http://lists.topica.com"); |
438
|
0
|
|
|
|
|
0
|
$h->referer("$url"); |
439
|
0
|
|
|
|
|
0
|
$request = HTTP::Request->new ( 'GET', $url, $h ); |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
|
443
|
603
|
0
|
33
|
|
|
18623
|
if (!$response->is_success && !$response->is_redirect) { |
444
|
0
|
|
|
|
|
0
|
carp "Failed to retrieve $url"; |
445
|
0
|
|
|
|
|
0
|
return undef; |
446
|
|
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
|
448
|
603
|
|
|
|
|
7062
|
return $response->content; |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
1; |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=head1 AUTHOR |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
Simon Wistow |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
=head1 COPYRIGHT |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
Copyright (c) 2004, Simon Wistow |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
=cut |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
|