line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
require 5; |
3
|
|
|
|
|
|
|
package XML::RSS::TimingBot; # Time-stamp: "2004-05-19 12:50:53 ADT" |
4
|
7
|
|
|
7
|
|
63099
|
use LWP::UserAgent::Determined (); |
|
7
|
|
|
|
|
577536
|
|
|
7
|
|
|
|
|
2333
|
|
5
|
|
|
|
|
|
|
@ISA = ('LWP::UserAgent::Determined'); |
6
|
|
|
|
|
|
|
|
7
|
7
|
|
|
7
|
|
86
|
use strict; |
|
7
|
|
|
|
|
13
|
|
|
7
|
|
|
|
|
229
|
|
8
|
7
|
|
|
7
|
|
42
|
use vars qw($VERSION); |
|
7
|
|
|
|
|
21
|
|
|
7
|
|
|
|
|
470
|
|
9
|
|
|
|
|
|
|
$VERSION = '2.03'; |
10
|
|
|
|
|
|
|
|
11
|
7
|
|
|
7
|
|
11294
|
use LWP::Debug (); |
|
7
|
|
|
|
|
5637
|
|
|
7
|
|
|
|
|
150
|
|
12
|
7
|
|
|
7
|
|
9004
|
use XML::RSS::Timing (); |
|
7
|
|
|
|
|
44709
|
|
|
7
|
|
|
|
|
1131
|
|
13
|
|
|
|
|
|
|
|
14
|
7
|
50
|
|
7
|
|
87206
|
BEGIN { *DEBUG = sub () {0} unless defined &DEBUG } |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
die "Where's _elem?!!?" unless __PACKAGE__->can('_elem'); |
17
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
18
|
|
|
|
|
|
|
# Some incidental accessors: |
19
|
9
|
|
|
9
|
1
|
138
|
sub minAge { shift->_elem( 'minAge' , @_) } |
20
|
9
|
|
|
9
|
1
|
953
|
sub maxAge { shift->_elem( 'maxAge' , @_) } |
21
|
0
|
|
|
0
|
0
|
0
|
sub min_age { shift->minAge(@_) } #alias |
22
|
0
|
|
|
0
|
0
|
0
|
sub max_age { shift->maxAge(@_) } #alias |
23
|
|
|
|
|
|
|
|
24
|
60
|
|
|
60
|
0
|
256
|
sub rss_semaphore_file { shift->_elem( 'rss_semaphore_file' , @_) } |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
#========================================================================== |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub feed_get_last_modified { |
29
|
|
|
|
|
|
|
# This is not an epochtime, it's a string that we probably got |
30
|
|
|
|
|
|
|
# from the remote server. |
31
|
|
|
|
|
|
|
# As says RFC2616 section 14.25 : << |
32
|
|
|
|
|
|
|
# Note: When handling an If-Modified-Since header field, some |
33
|
|
|
|
|
|
|
# servers will use an exact date comparison function, rather than a |
34
|
|
|
|
|
|
|
# less-than function, for deciding whether to send a 304 (Not |
35
|
|
|
|
|
|
|
# Modified) response. To get best results when sending an If- |
36
|
|
|
|
|
|
|
# Modified-Since header field for cache validation, clients are |
37
|
|
|
|
|
|
|
# advised to use the exact date string received in a previous Last- |
38
|
|
|
|
|
|
|
# Modified header field whenever possible. |
39
|
|
|
|
|
|
|
# >> (among many other wise things) |
40
|
|
|
|
|
|
|
# Example value: "Sun, 04 Apr 2004 11:58:04 GMT" |
41
|
|
|
|
|
|
|
|
42
|
46
|
|
|
46
|
1
|
289
|
my($self, $url) = @_; |
43
|
46
|
|
|
|
|
68
|
DEBUG and print "Getting lastmod value for $url ...\n"; |
44
|
46
|
|
|
|
|
120
|
return $self->get_datum($url, 'lastmodified'); |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
sub feed_get_next_update { # this is an epochtime |
48
|
47
|
|
|
47
|
1
|
86
|
my($self, $url) = @_; |
49
|
47
|
|
|
|
|
65
|
DEBUG and print "Getting next-update value for $url ...\n"; |
50
|
47
|
|
|
|
|
152
|
return $self->get_datum($url, 'nextupdate'); |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub feed_get_etag { # this is a string |
54
|
|
|
|
|
|
|
# See RFC 2616, "3.11 Entity Tags" |
55
|
16
|
|
|
16
|
1
|
483
|
my($self, $url) = @_; |
56
|
16
|
|
|
|
|
22
|
DEBUG and print "Getting etag value for $url ...\n"; |
57
|
16
|
|
|
|
|
50
|
return $self->get_datum($url, 'etag'); |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
sub feed_set_last_modified { |
61
|
12
|
|
|
12
|
1
|
49
|
my($self, $url, $last_modified_time) = @_; |
62
|
12
|
|
|
|
|
22
|
DEBUG and print "Setting lastmod for $url to $last_modified_time\n"; |
63
|
12
|
|
|
|
|
57
|
$self->set_datum($url, 'lastmodified', $last_modified_time); |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
sub feed_set_next_update { |
67
|
12
|
|
|
12
|
1
|
26
|
my($self, $url, $next_update_time) = @_; |
68
|
12
|
|
|
|
|
22
|
DEBUG and print "Setting next-update for $url to $next_update_time\n"; |
69
|
12
|
|
|
|
|
42
|
$self->set_datum($url, 'nextupdate', $next_update_time); |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub feed_set_etag { |
73
|
6
|
|
|
6
|
1
|
16
|
my($self, $url, $etag) = @_; |
74
|
6
|
|
|
|
|
12
|
DEBUG and print "Setting etag for $url to $etag\n"; |
75
|
6
|
|
|
|
|
19
|
$self->set_datum($url, 'etag', $etag); |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
sub new { |
81
|
30
|
|
|
30
|
1
|
13336
|
my $self = shift->SUPER::new(@_); |
82
|
30
|
|
|
|
|
58250
|
$self->_rssagent_init(); |
83
|
30
|
|
|
|
|
101
|
return $self; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub _rssagent_init { |
87
|
30
|
|
|
30
|
|
69
|
my $self = shift; |
88
|
30
|
|
|
|
|
261
|
$self->agent("XmlRssTimingBot/$VERSION (" . $self->agent . ")" ); |
89
|
|
|
|
|
|
|
|
90
|
30
|
50
|
|
|
|
3622
|
$self->rss_semaphore_file(1) unless $^O =~ m/(?:Mac|MSWin)/; |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# Whatever needs doing here |
93
|
30
|
|
|
|
|
255
|
return; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
#========================================================================== |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
sub commit { # save all our new data for these various feeds we've been seeing |
99
|
10
|
|
|
10
|
1
|
2564
|
my $self = shift; |
100
|
10
|
|
|
|
|
45
|
return $self->_stupid_commit; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
sub datum_from_db { |
104
|
48
|
|
|
48
|
0
|
100
|
my($self, $url, $varname) = @_; |
105
|
48
|
|
|
|
|
158
|
return $self->_stupid_datum_from_db($url, $varname); |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
#========================================================================== |
109
|
|
|
|
|
|
|
|
110
|
12
|
|
|
12
|
0
|
18
|
sub rssagent_just_before_real_request { # Override if you like |
111
|
|
|
|
|
|
|
#my($self, $args_ref) = @_; |
112
|
|
|
|
|
|
|
} |
113
|
12
|
|
|
12
|
0
|
31
|
sub rssagent_just_after_real_request { # Override if you like |
114
|
|
|
|
|
|
|
#my($self, $response, $args_ref) = @_; |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
#========================================================================== |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub simple_request { |
120
|
28
|
|
|
28
|
1
|
60563
|
my($self, @args) = @_; |
121
|
28
|
|
|
|
|
133
|
LWP::Debug::trace('simple_request()'); |
122
|
|
|
|
|
|
|
|
123
|
28
|
|
100
|
|
|
561
|
LWP::Debug::debug("Trying simple_request with args: [" |
124
|
|
|
|
|
|
|
. join(',', map $_||"''", @args) . "]"); |
125
|
|
|
|
|
|
|
|
126
|
28
|
|
|
|
|
759
|
DEBUG and print( "Trying simple_request with args: [" |
127
|
|
|
|
|
|
|
. join(',', map $_||"''", @args) . "]"); |
128
|
|
|
|
|
|
|
|
129
|
28
|
|
|
|
|
50
|
my $resp; |
130
|
28
|
|
|
|
|
108
|
my $maybe_response = $self->_rssagent_maybe_null_response($args[0]); |
131
|
28
|
100
|
|
|
|
90
|
if( $maybe_response ) { |
132
|
16
|
|
|
|
|
55
|
LWP::Debug::debug("Returning cached response"); |
133
|
16
|
|
|
|
|
111
|
return $maybe_response; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
12
|
|
|
|
|
64
|
$self-> _rssagent_add_header_conditions(@args); |
137
|
12
|
|
|
|
|
54
|
$self-> rssagent_just_before_real_request(\@args); |
138
|
12
|
|
|
|
|
87
|
$resp = $self->SUPER::simple_request(@args); |
139
|
12
|
|
|
|
|
3105092
|
$self-> rssagent_just_after_real_request($resp, \@args); |
140
|
12
|
|
|
|
|
65
|
$self-> _rssagent_response_consider($resp); |
141
|
|
|
|
|
|
|
|
142
|
12
|
|
|
|
|
47
|
LWP::Debug::debug("Returning uncached response"); |
143
|
12
|
|
|
|
|
97
|
return $resp; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
#========================================================================== |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub _rssagent_maybe_null_response { |
149
|
|
|
|
|
|
|
# Return a (virtual) response object if you want to block this request. |
150
|
|
|
|
|
|
|
# Otherwise, to allow this request to actually happen, return a false value. |
151
|
|
|
|
|
|
|
|
152
|
28
|
|
|
28
|
|
56
|
my($self, $req) = @_; |
153
|
28
|
|
|
|
|
82
|
LWP::Debug::trace('_rssagent_maybe_null_response()'); |
154
|
28
|
|
|
|
|
283
|
my $url = $req->uri; |
155
|
|
|
|
|
|
|
|
156
|
28
|
|
|
|
|
334
|
my $not_until = $self->feed_get_next_update($url); |
157
|
28
|
100
|
|
|
|
433
|
unless(defined $not_until) { |
158
|
11
|
|
|
|
|
240
|
LWP::Debug::debug( "No restrictions on when to get $url"); |
159
|
11
|
|
|
|
|
227
|
DEBUG > 1 and print "No restrictions on when to get $url\n"; |
160
|
11
|
|
|
|
|
33
|
return undef; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
17
|
|
|
|
|
64
|
my $now = $self->now; |
164
|
17
|
100
|
|
|
|
74
|
if($not_until >= $now) { |
165
|
16
|
|
|
|
|
82
|
LWP::Debug::debug( "It's now $now, but I shouldn't look at $url until $not_until."); |
166
|
16
|
|
|
|
|
143
|
DEBUG > 1 and print "It's now $now, but I shouldn't look at $url until $not_until.\n"; |
167
|
16
|
|
|
|
|
63
|
return $self->_rss_agent_null_response($req, $not_until); |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
# Else give the all-clear |
171
|
1
|
|
|
|
|
2
|
return undef; |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
#========================================================================== |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub _rssagent_response_consider { |
177
|
12
|
|
|
12
|
|
33
|
my($self, $response) = @_; |
178
|
|
|
|
|
|
|
# Possibly extract the RSS-timing content from this response. |
179
|
|
|
|
|
|
|
|
180
|
12
|
|
|
|
|
76
|
LWP::Debug::trace('_maybe_null_response()'); |
181
|
12
|
|
|
|
|
84
|
my $code = $response->code; |
182
|
12
|
100
|
|
|
|
174
|
unless($code eq '200') { # or $code eq '304' ? |
183
|
1
|
|
|
|
|
7
|
LWP::Debug::debug('Not trying to find RSS content in this $code response'); |
184
|
1
|
|
|
|
|
7
|
return; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
11
|
|
|
|
|
60
|
my $url = $self->_url_from_response( $response ); |
188
|
|
|
|
|
|
|
|
189
|
11
|
100
|
|
|
|
519
|
return unless $self->_looks_like_rss( $url, $response ); |
190
|
|
|
|
|
|
|
|
191
|
9
|
|
|
|
|
50
|
my $now = $self->now(); |
192
|
|
|
|
|
|
|
|
193
|
9
|
|
|
|
|
158
|
my $time_string_from_resp = $self->_time_string_from_resp($response, $now); |
194
|
9
|
50
|
|
|
|
70
|
$self->feed_set_last_modified($url, $time_string_from_resp) |
195
|
|
|
|
|
|
|
if defined $time_string_from_resp; |
196
|
|
|
|
|
|
|
|
197
|
9
|
|
|
|
|
154
|
my $etag = $self->_etag_from_resp($response); |
198
|
9
|
100
|
|
|
|
54
|
$self->feed_set_etag($url, $etag) if defined $etag; |
199
|
|
|
|
|
|
|
|
200
|
9
|
50
|
|
|
|
113
|
$self->_ponder_next_update($url, $now, $response) unless $code eq '304'; |
201
|
9
|
|
|
|
|
24
|
return; |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
#========================================================================== |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub _looks_like_rss { |
207
|
11
|
|
|
11
|
|
25
|
my($self, $url, $response) = @_; |
208
|
11
|
|
|
|
|
45
|
LWP::Debug::trace('_ponder_next_update'); |
209
|
11
|
|
|
|
|
47
|
my $content; |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
# Look for rss/rdf in the first 2000 bytes |
212
|
|
|
|
|
|
|
# TODO: support Atom here? Does anyone ever use sy:* stuff in Atom? |
213
|
|
|
|
|
|
|
|
214
|
11
|
|
|
|
|
68
|
my $c = $response->content_ref; |
215
|
11
|
50
|
33
|
|
|
270
|
unless( $c and $$c ) { |
216
|
0
|
|
|
|
|
0
|
LWP::Debug::debug("Content from $url is apparently null."); |
217
|
0
|
|
|
|
|
0
|
print "NULL!\n"; |
218
|
|
|
|
|
|
|
# so it's sure not RSS! |
219
|
0
|
|
|
|
|
0
|
return 0; |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
11
|
100
|
|
|
|
138
|
if ( $$c =~ m{^[^\x00]{0,2000}?(?:
|
223
|
9
|
|
|
|
|
82
|
LWP::Debug::debug("Content from $url looks like RSS/RDF."); |
224
|
9
|
|
|
|
|
127
|
return 1; |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
|
227
|
2
|
|
|
|
|
19
|
LWP::Debug::debug("Content from $url doesn't look like RSS/RDF."); |
228
|
2
|
|
|
|
|
30
|
return 0; |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
#========================================================================== |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
sub _ponder_next_update { |
234
|
9
|
|
|
9
|
|
24
|
my($self, $url, $now, $response) = @_; |
235
|
9
|
|
|
|
|
77
|
LWP::Debug::trace('_ponder_next_update'); |
236
|
9
|
|
|
|
|
74
|
my $content = $response->content; |
237
|
9
|
|
|
|
|
188
|
$content =~ s///sg; # kill XML comments |
238
|
|
|
|
|
|
|
|
239
|
9
|
50
|
|
|
|
66
|
unless( $content =~ m{^[^\x00]{0,2000}?
|
240
|
|
|
|
|
|
|
# Make super-sure that the our apparent start-tag wasn't just in a comment! |
241
|
0
|
|
|
|
|
0
|
LWP::Debug::debug("Content from $url doesn't look like RSS/RDF."); |
242
|
0
|
|
|
|
|
0
|
return; |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
9
|
|
33
|
|
|
144
|
my $timing = $self->{"_rss_timing_obj"} || XML::RSS::Timing->new; |
246
|
9
|
|
|
|
|
2280
|
$timing->use_exceptions(0); |
247
|
9
|
50
|
|
|
|
77
|
$timing->minAge( $self->minAge ) if defined $self->minAge; |
248
|
9
|
50
|
|
|
|
120
|
$timing->maxAge( $self->maxAge ) if defined $self->maxAge; |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
# Note that we use our server-time, not the other server's time |
251
|
9
|
|
|
|
|
197
|
$timing->last_polled($now); |
252
|
|
|
|
|
|
|
|
253
|
9
|
|
|
|
|
149
|
$self->_scan_xml_timing(\$content, $timing); |
254
|
|
|
|
|
|
|
|
255
|
9
|
50
|
|
|
|
50
|
if( $timing->complaints ) { |
256
|
0
|
|
|
|
|
0
|
LWP::Debug::debug("Errors in this feed's timing fields:\n" |
257
|
|
|
|
|
|
|
. map("* $_\n", $timing->complaints) |
258
|
|
|
|
|
|
|
. "]... so ignoring it all.\n" |
259
|
|
|
|
|
|
|
); |
260
|
0
|
|
|
|
|
0
|
return; |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# Now actually learn... |
264
|
9
|
|
|
|
|
148
|
my $next_update = $timing->nextUpdate(); |
265
|
9
|
|
|
|
|
1015
|
LWP::Debug::debug("Remembering not to poll $url until $next_update"); |
266
|
9
|
|
|
|
|
110
|
$self->feed_set_next_update( $url, $next_update ); |
267
|
|
|
|
|
|
|
# Now, we /could/ also slip this into the response as a faked-out |
268
|
|
|
|
|
|
|
# "Expires" header value, except that 1) who the hell ever looks at |
269
|
|
|
|
|
|
|
# those, and 2) "Expires" is the expiration time expressed against |
270
|
|
|
|
|
|
|
# the REMOTE server's clock, whereas nextUpdate is expressed |
271
|
|
|
|
|
|
|
# against OUR clock. So mixing these up would screw up all kinds of |
272
|
|
|
|
|
|
|
# things in the unhappy event of clock skew combined with someone |
273
|
|
|
|
|
|
|
# actually looking at a fake-o Expires value. |
274
|
|
|
|
|
|
|
|
275
|
9
|
|
|
|
|
169
|
return; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
#========================================================================== |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
sub _scan_xml_timing { |
281
|
93
|
|
|
93
|
|
42894
|
my($self, $contentref, $timingobj) = @_; |
282
|
93
|
100
|
66
|
|
|
504
|
return unless $contentref and $$contentref; |
283
|
92
|
|
|
|
|
96
|
DEBUG > 1 and print "# _scan_xml_timing << self <$self>; contentref <$contentref>; timingobj <$timingobj>\n"; |
284
|
92
|
|
|
|
|
322
|
$self->_scan_for_ttl( $contentref, $timingobj ); |
285
|
92
|
|
|
|
|
234
|
$self->_scan_for_skipDays( $contentref, $timingobj ); |
286
|
92
|
|
|
|
|
225
|
$self->_scan_for_skipHours( $contentref, $timingobj ); |
287
|
92
|
|
|
|
|
287
|
$self->_scan_for_updatePeriod( $contentref, $timingobj ); |
288
|
92
|
|
|
|
|
842
|
$self->_scan_for_updateFrequency( $contentref, $timingobj ); |
289
|
92
|
|
|
|
|
264
|
$self->_scan_for_updateBase( $contentref, $timingobj ); |
290
|
92
|
|
|
|
|
190
|
return; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
92
|
|
|
92
|
|
144
|
sub _scan_for_updateFrequency {my($s,$c,$t)=@_;$s->_scan_xml('updateFrequency', $c, $t) } |
|
92
|
|
|
|
|
198
|
|
294
|
92
|
|
|
92
|
|
326
|
sub _scan_for_updatePeriod {my($s,$c,$t)=@_;$s->_scan_xml('updatePeriod', $c, $t) } |
|
92
|
|
|
|
|
281
|
|
295
|
92
|
|
|
92
|
|
217
|
sub _scan_for_updateBase {my($s,$c,$t)=@_;$s->_scan_xml('updateBase', $c, $t) } |
|
92
|
|
|
|
|
201
|
|
296
|
92
|
|
|
92
|
|
124
|
sub _scan_for_ttl {my($s,$c,$t)=@_;$s->_scan_xml('ttl', $c, $t) } |
|
92
|
|
|
|
|
233
|
|
297
|
92
|
|
|
92
|
|
147
|
sub _scan_for_skipDays {my($s,$c,$t)=@_;$s->_scan_xml('skipDays' , $c, $t, 'day' ) } |
|
92
|
|
|
|
|
255
|
|
298
|
92
|
|
|
92
|
|
237
|
sub _scan_for_skipHours {my($s,$c,$t)=@_;$s->_scan_xml('skipHours', $c, $t, 'hour') } |
|
92
|
|
|
|
|
270
|
|
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
#========================================================================== |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
sub _etag_from_resp { |
303
|
9
|
|
|
9
|
|
26
|
my($self, $response) = @_; |
304
|
9
|
|
|
|
|
135
|
my $etag = $response->header('ETag'); |
305
|
9
|
50
|
66
|
|
|
610
|
return undef unless defined $etag |
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
306
|
|
|
|
|
|
|
and length($etag) |
307
|
|
|
|
|
|
|
and length($etag) < 251 # A good sanity limit, I think |
308
|
|
|
|
|
|
|
and $etag !~ m/[\n\r]/ # Enforce this minimal sanity on content |
309
|
|
|
|
|
|
|
; |
310
|
6
|
|
|
|
|
14
|
DEBUG and print "Using etag $etag for resp-obj $response\'s etag\n"; |
311
|
6
|
|
|
|
|
18
|
return $etag; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
sub _time_string_from_resp { |
315
|
9
|
|
|
9
|
|
21
|
my($self, $response, $now) = @_; |
316
|
9
|
|
|
|
|
127
|
require HTTP::Date; |
317
|
9
|
|
|
|
|
94
|
foreach my $time_string ( |
318
|
|
|
|
|
|
|
$response->header('Last-Modified'), |
319
|
|
|
|
|
|
|
$response->header('Date'), |
320
|
|
|
|
|
|
|
HTTP::Date::time2str( $now ), |
321
|
|
|
|
|
|
|
) { |
322
|
|
|
|
|
|
|
next unless # enforce minimal sanity on the value... |
323
|
9
|
50
|
33
|
|
|
1088
|
defined $time_string |
|
|
|
33
|
|
|
|
|
324
|
|
|
|
|
|
|
and $time_string =~ m/^[- \,\.\:a-zA-Z0-9]{4,40}$/s |
325
|
|
|
|
|
|
|
and $time_string =~ m/[0-9A-Za-z]/; |
326
|
9
|
|
|
|
|
18
|
DEBUG and print "Using time-string \"$time_string\" for resp-obj $response\'s lastmod\n"; |
327
|
9
|
|
|
|
|
28
|
return $time_string; |
328
|
|
|
|
|
|
|
} |
329
|
0
|
|
|
|
|
0
|
return undef; |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
#========================================================================== |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
sub _url_from_response { |
335
|
11
|
|
|
11
|
|
26
|
my($self, $response) = @_; |
336
|
|
|
|
|
|
|
|
337
|
11
|
|
|
|
|
26
|
my $this_res = $response; |
338
|
11
|
|
|
|
|
37
|
for(1 .. 30) { # get the original request's URL |
339
|
11
|
|
50
|
|
|
52
|
$this_res = ($this_res->previous || last); |
340
|
|
|
|
|
|
|
} |
341
|
11
|
|
|
|
|
162
|
return $this_res->request->uri; |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
#========================================================================== |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
sub _rssagent_add_header_conditions { |
347
|
12
|
|
|
12
|
|
28
|
my $self = shift; |
348
|
12
|
|
|
|
|
52
|
$self->_rssagent_add_ifmod_header(@_); |
349
|
12
|
|
|
|
|
50
|
$self->_rssagent_add_ifnonematch_header(@_); |
350
|
12
|
|
|
|
|
18
|
return; |
351
|
|
|
|
|
|
|
} |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
sub _rssagent_add_ifmod_header { |
354
|
12
|
|
|
12
|
|
28
|
my($self, $req) = @_; |
355
|
12
|
|
|
|
|
43
|
LWP::Debug::trace('_rssagent_add_ifmod_header()'); |
356
|
12
|
|
|
|
|
73
|
my $url = $req->uri; |
357
|
|
|
|
|
|
|
|
358
|
12
|
|
|
|
|
119
|
my $lastmod = $self->feed_get_last_modified( $url ); |
359
|
|
|
|
|
|
|
|
360
|
12
|
100
|
66
|
|
|
133
|
if(defined $lastmod and length $lastmod) { |
361
|
1
|
|
|
|
|
4
|
LWP::Debug::debug("Setting If-Modified-Since on get-$url to $lastmod"); |
362
|
1
|
|
|
|
|
7
|
DEBUG and print "Setting If-Modified-Since on get-$url to $lastmod\n"; |
363
|
1
|
|
|
|
|
9
|
$req->header('If-Modified-Since' => $lastmod); |
364
|
|
|
|
|
|
|
} else { |
365
|
11
|
|
|
|
|
47
|
LWP::Debug::debug("I see no last-polled time for $url"); |
366
|
11
|
|
|
|
|
86
|
DEBUG and print "I see no last-polled time for $url\n"; |
367
|
|
|
|
|
|
|
} |
368
|
12
|
|
|
|
|
65
|
return; |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
sub _rssagent_add_ifnonematch_header { |
372
|
12
|
|
|
12
|
|
23
|
my($self, $req) = @_; |
373
|
12
|
|
|
|
|
35
|
LWP::Debug::trace('_rssagent_add_ifnonematch_header()'); |
374
|
12
|
|
|
|
|
68
|
my $url = $req->uri; |
375
|
|
|
|
|
|
|
|
376
|
12
|
|
|
|
|
117
|
my $etag = $self->feed_get_etag( $url ); |
377
|
|
|
|
|
|
|
|
378
|
12
|
100
|
66
|
|
|
137
|
if(defined $etag and length $etag) { |
379
|
1
|
|
|
|
|
4
|
LWP::Debug::debug("Setting If-None-Match on get-$url to $etag"); |
380
|
1
|
|
|
|
|
7
|
DEBUG and print "Setting If-None-Match on get-$url to $etag\n"; |
381
|
1
|
|
|
|
|
3
|
$req->header('If-None-Match' => $etag); |
382
|
|
|
|
|
|
|
} else { |
383
|
11
|
|
|
|
|
50
|
LWP::Debug::debug("I see no etag for $url"); |
384
|
11
|
|
|
|
|
91
|
DEBUG and print "I see no etag for $url\n"; |
385
|
|
|
|
|
|
|
} |
386
|
12
|
|
|
|
|
55
|
return; |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
#========================================================================== |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
sub _rss_agent_null_response { |
392
|
17
|
|
|
17
|
|
12631
|
my($self, $request, $not_until) = @_; |
393
|
17
|
|
|
|
|
820
|
require HTTP::Response; |
394
|
17
|
|
|
|
|
85
|
require HTTP::Date; |
395
|
17
|
|
|
|
|
76
|
require HTTP::Status; |
396
|
|
|
|
|
|
|
|
397
|
17
|
|
|
|
|
57
|
my $now_str = HTTP::Date::time2str( $self->now ); |
398
|
17
|
|
|
|
|
361
|
my $not_until_str = HTTP::Date::time2str( $not_until - 1); |
399
|
|
|
|
|
|
|
# The -1 is because "Expires" means the last moment when it's still |
400
|
|
|
|
|
|
|
# good, and not_until is the first moment when we can check. Q.E.D. |
401
|
|
|
|
|
|
|
|
402
|
17
|
|
|
|
|
324
|
my $response = HTTP::Response->new( |
403
|
|
|
|
|
|
|
HTTP::Status::RC_NOT_MODIFIED() => "Not Modified (" . __PACKAGE__ |
404
|
|
|
|
|
|
|
. " says it won't change until after $not_until_str)" |
405
|
|
|
|
|
|
|
); |
406
|
17
|
|
|
|
|
1082
|
my $h = $response->headers; |
407
|
17
|
|
|
|
|
159
|
$h->header( "Client-Date" => $now_str); |
408
|
17
|
|
|
|
|
11391
|
$h->header( "Date" => $now_str); |
409
|
17
|
|
|
|
|
964
|
$h->header( "Expires" => $not_until_str); |
410
|
17
|
|
|
|
|
846
|
$response->request($request); |
411
|
|
|
|
|
|
|
|
412
|
17
|
|
|
|
|
231
|
return $response; |
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
#========================================================================== |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
sub now { |
418
|
|
|
|
|
|
|
# This is here just so we can change what we mean by 'now', when we're |
419
|
|
|
|
|
|
|
# running tests. Trust me, it's handy. |
420
|
43
|
100
|
66
|
43
|
0
|
334
|
return $_[0]->{'_now_hack'} if ref $_[0] and defined $_[0]->{'_now_hack'}; |
421
|
42
|
|
|
|
|
171
|
return time(); |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
########################################################################### |
425
|
|
|
|
|
|
|
# |
426
|
|
|
|
|
|
|
# get_datum and set_datum implement the caching that both speeds things up |
427
|
|
|
|
|
|
|
# and allows a layer of indirection so that changes don't happen until |
428
|
|
|
|
|
|
|
# we commit |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
sub get_datum { |
431
|
109
|
|
|
109
|
0
|
187
|
my($self, $url, $varname) = @_; |
432
|
109
|
|
|
|
|
604
|
$url =~ s{\s+}{}g; |
433
|
109
|
50
|
|
|
|
497
|
return unless length $url; |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
# First look in our dirty cache |
436
|
109
|
|
100
|
|
|
566
|
my $for_db = ($self->{'rsstimingbot_for_db'} ||= {}); |
437
|
109
|
100
|
66
|
|
|
331
|
if( $for_db->{$url} and exists $for_db->{$url}{$varname} ) { |
438
|
15
|
|
|
|
|
133
|
DEBUG > 6 and print " Found $varname for $url in dirty cache\n"; |
439
|
15
|
|
|
|
|
68
|
return $for_db->{$url}{$varname}; |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
# then look in our has-been-read-from-disk cache |
443
|
94
|
|
100
|
|
|
627
|
my $from_db = ($self->{'rsstimingbot_from_db'} ||= {}); |
444
|
94
|
100
|
100
|
|
|
623
|
if( $from_db->{$url} and exists $from_db->{$url}{$varname} ) { |
445
|
46
|
|
|
|
|
115
|
DEBUG > 6 and print " Found $varname for $url in clean cache\n"; |
446
|
46
|
|
|
|
|
260
|
return $from_db->{$url}{$varname}; |
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
# and finally, as a last resort, actually fetch from the real DB |
450
|
|
|
|
|
|
|
return( |
451
|
48
|
|
|
|
|
1003
|
$from_db->{$url}{$varname} = $self->datum_from_db($url, $varname) |
452
|
|
|
|
|
|
|
); |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
sub set_datum { |
456
|
30
|
|
|
30
|
0
|
79
|
my($self, $url, $varname, $value) = @_; |
457
|
30
|
|
|
|
|
96
|
$url =~ s{\s+}{}g; |
458
|
30
|
50
|
|
|
|
189
|
return unless length $url; |
459
|
30
|
|
|
|
|
235
|
$self->{'rsstimingbot_from_db' }{$url}{$varname} |
460
|
|
|
|
|
|
|
= $self->{'rsstimingbot_for_db'}{$url}{$varname} |
461
|
|
|
|
|
|
|
= $value; |
462
|
|
|
|
|
|
|
# And upon commit, we'll save all of 'rsstimingbot_for_db' to database |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
########################################################################### |
466
|
|
|
|
|
|
|
# |
467
|
|
|
|
|
|
|
# Our lame default storage methods, in case you didn't override |
468
|
|
|
|
|
|
|
# commit and datum_from_db |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
sub _stupid_datum_from_db { |
471
|
48
|
|
|
48
|
|
321
|
my($self, $url, $varname) = @_; |
472
|
48
|
|
|
|
|
225
|
my $dbfile = $self->_stupid___url2dbfile($url); |
473
|
48
|
|
|
|
|
221
|
DEBUG > 1 and print " DB: Getting datum $varname for $url ...\n"; |
474
|
|
|
|
|
|
|
|
475
|
48
|
100
|
|
|
|
2382
|
return undef unless -e $dbfile; |
476
|
|
|
|
|
|
|
|
477
|
23
|
|
|
|
|
78
|
my $unlocker = $self->_stupid_lock(); |
478
|
|
|
|
|
|
|
|
479
|
23
|
50
|
|
|
|
762
|
open( STUPID_DB, $dbfile) |
480
|
|
|
|
|
|
|
or die "Can't read-open $dbfile for $url : $!\nAborting"; |
481
|
23
|
|
|
|
|
51
|
my @f; |
482
|
23
|
|
|
|
|
120
|
local $/ = "\n"; |
483
|
23
|
|
50
|
|
|
83
|
my $from_db = ($self->{'rsstimingbot_from_db'} ||= {}); |
484
|
23
|
|
|
|
|
29
|
DEBUG > 8 and print " Reading DB file $dbfile...\n"; |
485
|
23
|
|
|
|
|
413
|
while() { |
486
|
95
|
|
|
|
|
134
|
chomp; |
487
|
95
|
|
|
|
|
471
|
@f = split ' ', $_, 3; |
488
|
|
|
|
|
|
|
# Yup, just three space-separated fields: "url varname value" |
489
|
95
|
50
|
|
|
|
241
|
if( @f >= 2 ) { |
490
|
95
|
|
|
|
|
117
|
DEBUG > 9 and print " Datum read {$f[0]} {$f[1]} {$f[2]}\n"; |
491
|
95
|
50
|
|
|
|
786
|
$from_db->{ $f[0] }->{ $f[1] } = |
492
|
|
|
|
|
|
|
defined($f[2]) ? $f[2] : ""; # because of split's behavior |
493
|
|
|
|
|
|
|
} |
494
|
|
|
|
|
|
|
} |
495
|
23
|
|
|
|
|
313
|
close(STUPID_DB); |
496
|
23
|
50
|
|
|
|
87
|
$unlocker and $unlocker->(); |
497
|
23
|
|
|
|
|
29
|
DEBUG > 8 and print " Done reading DB file $dbfile\n"; |
498
|
|
|
|
|
|
|
|
499
|
23
|
100
|
100
|
|
|
252
|
return $from_db->{$url}{$varname} |
500
|
|
|
|
|
|
|
if $from_db->{$url} and exists $from_db->{$url}{$varname}; |
501
|
9
|
|
|
|
|
178
|
return undef; |
502
|
|
|
|
|
|
|
} |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
# See XML::RSS::TimingBotDBI for a better example of a commit method |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
sub _stupid_commit { # write all our dirty cache to DB files |
507
|
10
|
|
|
10
|
|
18
|
my $self = shift; |
508
|
10
|
|
50
|
|
|
45
|
my $for_db = $self->{'rsstimingbot_for_db'} || return; |
509
|
10
|
|
|
|
|
13
|
DEBUG > 1 and print " I see ", scalar(keys %$for_db), " url-data in $self to save\n"; |
510
|
10
|
100
|
|
|
|
45
|
return unless %$for_db; |
511
|
|
|
|
|
|
|
|
512
|
7
|
|
|
|
|
13
|
my %path2mods; |
513
|
7
|
|
|
|
|
40
|
foreach my $url (sort keys %$for_db) { |
514
|
7
|
|
|
|
|
29
|
my $dbfile = $self->_stupid___url2dbfile($url); |
515
|
7
|
|
|
|
|
17
|
foreach my $varname (sort keys %{ $for_db->{$url} }) { |
|
7
|
|
|
|
|
44
|
|
516
|
13
|
|
|
|
|
34
|
my $value = $for_db->{$url}{$varname}; |
517
|
13
|
50
|
|
|
|
156
|
$path2mods{$dbfile}{ "$url $varname" } = defined($value) ? $value : ''; |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
} |
520
|
7
|
|
|
|
|
13
|
DEBUG > 7 and print " Committing to ", scalar(keys %path2mods), |
521
|
|
|
|
|
|
|
" database files...\n"; |
522
|
|
|
|
|
|
|
|
523
|
7
|
|
|
|
|
24
|
my $unlocker = $self->_stupid_lock(); |
524
|
|
|
|
|
|
|
|
525
|
7
|
|
|
|
|
66
|
foreach my $dbfile (keys %path2mods) { |
526
|
7
|
|
|
|
|
35
|
$self->_stupid___mod_db( $dbfile, $path2mods{$dbfile} ); |
527
|
7
|
|
|
|
|
39
|
$path2mods{$dbfile} = undef; # potentially free up some memory |
528
|
|
|
|
|
|
|
} |
529
|
7
|
|
|
|
|
26
|
DEBUG > 7 and print " Done committing all database files\n"; |
530
|
|
|
|
|
|
|
|
531
|
7
|
50
|
|
|
|
33
|
$unlocker and $unlocker->(); |
532
|
7
|
|
|
|
|
25
|
%$for_db = (); |
533
|
7
|
|
|
|
|
56
|
return; |
534
|
|
|
|
|
|
|
} |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
# |
537
|
|
|
|
|
|
|
# And now some very internal stuff: |
538
|
|
|
|
|
|
|
# |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
sub _stupid_lock { |
541
|
30
|
|
|
30
|
|
41
|
my $self = shift; |
542
|
30
|
|
|
|
|
82
|
my $file = $self->rss_semaphore_file; |
543
|
30
|
50
|
|
|
|
339
|
return unless $file; |
544
|
|
|
|
|
|
|
|
545
|
30
|
50
|
|
|
|
164
|
$file = $self->_stupid___url2dbfile("http://lock.nowhere.int/lock") |
546
|
|
|
|
|
|
|
if $file eq '1'; |
547
|
|
|
|
|
|
|
|
548
|
30
|
|
|
|
|
45
|
DEBUG > 2 and |
549
|
|
|
|
|
|
|
print "About to request (maybe wait for!) an exclusive lock on $file\n"; |
550
|
|
|
|
|
|
|
|
551
|
30
|
|
|
|
|
105
|
return $self->_getsem($file); |
552
|
|
|
|
|
|
|
} |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
sub _stupid___my_db_path { |
556
|
85
|
|
|
85
|
|
131
|
my $self = shift; |
557
|
|
|
|
|
|
|
return $self->{'_dbpath'} |
558
|
|
|
|
|
|
|
|| $ENV{'TIMINGBOTPATH'} |
559
|
|
|
|
|
|
|
|| $ENV{'APPDATA'} |
560
|
|
|
|
|
|
|
|| $ENV{'HOME'} |
561
|
85
|
|
0
|
|
|
7114
|
|| do { require File::Spec; File::Spec->curdir; } |
562
|
|
|
|
|
|
|
} |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
sub _stupid___url2dbfile { |
565
|
85
|
|
|
85
|
|
136
|
my($self, $url) = @_; |
566
|
85
|
|
|
|
|
540
|
require File::Spec; |
567
|
85
|
|
|
|
|
135
|
DEBUG > 2 and print " Pondering filespec for url $url\n"; |
568
|
85
|
|
|
|
|
289
|
my $url_stem = lc $url; |
569
|
85
|
|
|
|
|
695
|
$url_stem =~ s{^http://(?:www\.)?}{}s; |
570
|
85
|
|
|
|
|
345
|
$url_stem =~ s{\.(?:xml|rss|rdf)$}{}s; |
571
|
85
|
|
|
|
|
541
|
$url_stem =~ s/\W+//sg; # includes killing all whitespace |
572
|
85
|
50
|
|
|
|
229
|
$url_stem = 'misc' unless length $url_stem; |
573
|
85
|
100
|
|
|
|
309
|
$url_stem = substr($url_stem,0,30) if length($url) > 30; # truncate |
574
|
|
|
|
|
|
|
|
575
|
85
|
|
|
|
|
439
|
my $rssdir = File::Spec->catfile( $self->_stupid___my_db_path, 'rssdata'); |
576
|
85
|
50
|
|
|
|
3104
|
if( -e $rssdir ) { |
577
|
85
|
|
|
|
|
291
|
DEBUG > 12 and print " RSSdir $rssdir exists already.\n"; |
578
|
|
|
|
|
|
|
} else { |
579
|
0
|
0
|
|
|
|
0
|
if( mkdir($rssdir, 0777) ) { |
580
|
0
|
|
|
|
|
0
|
DEBUG > 1 and print " Successfully created RSSdir $rssdir\n"; |
581
|
|
|
|
|
|
|
} else { |
582
|
0
|
|
|
|
|
0
|
die "Can't mkdir $rssdir: $!\nAborting"; |
583
|
|
|
|
|
|
|
} |
584
|
|
|
|
|
|
|
} |
585
|
85
|
|
|
|
|
16229
|
my $path = File::Spec->catfile( $rssdir, $url_stem ); |
586
|
85
|
|
|
|
|
205
|
DEBUG > 6 and print "# Path to stupid DB: $path\n"; |
587
|
85
|
|
|
|
|
410
|
return $path; |
588
|
|
|
|
|
|
|
} |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
# . . . . . . . . . . . . . . . . . . . . . . . . . |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
sub _stupid___mod_db { |
593
|
7
|
|
|
7
|
|
17
|
my($self, $dbfile, $to_write) = @_; |
594
|
|
|
|
|
|
|
|
595
|
7
|
100
|
66
|
|
|
142
|
if( -e $dbfile and -s _ ) { |
596
|
4
|
|
|
|
|
7
|
DEBUG > 9 and print "Reading db $dbfile ...\n"; |
597
|
4
|
50
|
|
|
|
135
|
open( STUPID_DB, $dbfile) |
598
|
|
|
|
|
|
|
or die "Can't read-open $dbfile : $!\nAborting"; |
599
|
4
|
|
|
|
|
10
|
my @f; |
600
|
4
|
|
|
|
|
82
|
local $/ = "\n"; |
601
|
4
|
|
|
|
|
115
|
while() { |
602
|
10
|
|
|
|
|
19
|
chomp; |
603
|
10
|
|
|
|
|
54
|
@f = split ' ', $_, 3; # yup, just three space-separated fields |
604
|
10
|
50
|
66
|
|
|
163
|
$to_write->{ "$f[0] $f[1]" } = defined($f[2]) ? $f[2] : "" |
|
|
100
|
|
|
|
|
|
605
|
|
|
|
|
|
|
if @f >= 2 and ! exists $to_write->{ "$f[0] $f[1]" }; |
606
|
|
|
|
|
|
|
} |
607
|
4
|
|
|
|
|
64
|
close(STUPID_DB); |
608
|
|
|
|
|
|
|
} else { |
609
|
3
|
|
|
|
|
5
|
DEBUG > 9 and print "No db $dbfile to read, so just writing new.\n"; |
610
|
|
|
|
|
|
|
} |
611
|
|
|
|
|
|
|
|
612
|
7
|
|
|
|
|
13
|
DEBUG > 8 and print " Saving DB file $dbfile (", scalar(keys %$to_write), " entries)\n"; |
613
|
|
|
|
|
|
|
|
614
|
7
|
50
|
|
|
|
626
|
open( STUPID_DB, ">$dbfile" ) or die "Can't write-open $dbfile: $!\nAborting"; |
615
|
7
|
|
|
|
|
17
|
my $value; |
616
|
7
|
|
|
|
|
48
|
foreach my $key (sort keys %$to_write) { |
617
|
20
|
50
|
|
|
|
66
|
next unless defined( $value = $to_write->{$key} ); |
618
|
20
|
|
|
|
|
43
|
$value =~ tr/\n\r//d; # Enforce sanity |
619
|
20
|
|
|
|
|
138
|
print STUPID_DB "$key $value\n"; |
620
|
|
|
|
|
|
|
} |
621
|
7
|
|
|
|
|
369
|
close(STUPID_DB); |
622
|
7
|
|
|
|
|
13
|
DEBUG > 8 and print " Done saving DB file $dbfile\n"; |
623
|
7
|
|
|
|
|
19
|
return; |
624
|
|
|
|
|
|
|
} |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
############################################################################ |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
sub _getsem { |
629
|
30
|
|
|
30
|
|
55
|
my($self, $file, $be_nonblocking) = @_; |
630
|
|
|
|
|
|
|
# Lock this semaphore file. Returns the unlocker sub!! |
631
|
|
|
|
|
|
|
# |
632
|
|
|
|
|
|
|
# To have the lock be non-blocking, specify a true second parameter. |
633
|
|
|
|
|
|
|
# In that case, returns false if can't get the lock. Unlocker |
634
|
|
|
|
|
|
|
# sub otherwise. |
635
|
|
|
|
|
|
|
|
636
|
30
|
50
|
33
|
|
|
345
|
unless(defined $file and length $file) { |
637
|
0
|
|
|
|
|
0
|
require Carp; |
638
|
0
|
|
|
|
|
0
|
Carp::confess("Filename argument to _getsem must be contentful!") |
639
|
|
|
|
|
|
|
} |
640
|
|
|
|
|
|
|
|
641
|
30
|
50
|
|
|
|
2062894
|
open(my $fh, ">$file") or Carp::croak("Can't write-open $file\: $!"); |
642
|
|
|
|
|
|
|
#chmod 0666, $file; # yes, make it world-writeable. or at least try. |
643
|
|
|
|
|
|
|
|
644
|
30
|
50
|
|
|
|
110
|
if($be_nonblocking) { # non-blocking! |
645
|
0
|
0
|
|
|
|
0
|
eval { flock($fh, 2 | 4) } # Exclusive + NONblocking |
|
0
|
|
|
|
|
0
|
|
646
|
|
|
|
|
|
|
or return; # couldn't get a lock. |
647
|
|
|
|
|
|
|
} else { # normal case: Exclusive, Blocking |
648
|
30
|
|
|
|
|
320
|
eval { flock($fh, 2) } # Exclusive |
649
|
30
|
50
|
|
|
|
66
|
or do { require Carp; Carp::confess("Can't exclusive-block lock $file: $!") }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
650
|
|
|
|
|
|
|
# should never just fail -- should queue up forever |
651
|
|
|
|
|
|
|
} |
652
|
|
|
|
|
|
|
|
653
|
30
|
50
|
|
|
|
449
|
unless( print $fh "I am a lowly " , __PACKAGE__ , " semaphore file\cm\cj" ) { |
654
|
0
|
|
|
|
|
0
|
require Carp; |
655
|
0
|
|
|
|
|
0
|
Carp::confess("Can't write to $file\: $!"); |
656
|
|
|
|
|
|
|
} |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
return( |
659
|
|
|
|
|
|
|
sub { |
660
|
30
|
50
|
|
30
|
|
73
|
if($fh) { # So we can call multiple times |
661
|
30
|
|
|
|
|
33
|
DEBUG > 1 and print "Releasing lock on $file\n"; |
662
|
30
|
|
|
|
|
1693
|
close($fh); # Presumably will never fail! |
663
|
|
|
|
|
|
|
# Will release the lock. |
664
|
30
|
|
|
|
|
66
|
undef $fh; |
665
|
30
|
|
|
|
|
117
|
return 1; |
666
|
|
|
|
|
|
|
} else { |
667
|
0
|
|
|
|
|
0
|
return ''; |
668
|
|
|
|
|
|
|
} |
669
|
|
|
|
|
|
|
} |
670
|
30
|
|
|
|
|
355
|
); |
671
|
|
|
|
|
|
|
# Now, I /could/ just have this work by returning the globref -- |
672
|
|
|
|
|
|
|
# then when all the references to the glob go to 0, the FH |
673
|
|
|
|
|
|
|
# closes, and the lock is released. However, this /relies/ on |
674
|
|
|
|
|
|
|
# the timing of garbage collection. |
675
|
|
|
|
|
|
|
} |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
########################################################################### |
679
|
|
|
|
|
|
|
# |
680
|
|
|
|
|
|
|
# XML HELL BEGINS HERE |
681
|
|
|
|
|
|
|
# |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
sub _scan_xml { |
684
|
552
|
|
|
552
|
|
941
|
my($self, $tag, $c, $timing, $subtag) = @_; |
685
|
552
|
50
|
|
|
|
2216
|
die "Crazy tag \"$tag\"!!" |
686
|
|
|
|
|
|
|
unless $tag =~ m/^[a-zA-Z_][a-zA-Z_0-9]*$/s; # sanity |
687
|
552
|
50
|
33
|
|
|
3468
|
die "Contentref has to be a scalar ref" unless $c and ref($c) eq 'SCALAR'; |
688
|
552
|
50
|
33
|
|
|
3631
|
die "Timing object has to be an object!" unless $timing and ref($timing) |
|
|
|
33
|
|
|
|
|
689
|
|
|
|
|
|
|
and ref($timing) ne 'SCALAR'; |
690
|
|
|
|
|
|
|
|
691
|
552
|
|
|
|
|
752
|
my $method = $tag; |
692
|
|
|
|
|
|
|
|
693
|
552
|
|
|
|
|
513
|
DEBUG > 5 and print "# _scan_xml << self <$self>; timingobj <$timing>\n# tag <$tag>; subtag ", |
694
|
|
|
|
|
|
|
defined($subtag) ? "<$subtag>" : "(nil)", "\n", |
695
|
|
|
|
|
|
|
"# Content {\n$$c\n# }\n", |
696
|
|
|
|
|
|
|
; |
697
|
|
|
|
|
|
|
|
698
|
552
|
100
|
|
|
|
1042
|
unless(defined $subtag) { # common case: just someval |
699
|
368
|
100
|
|
|
|
29155
|
if( $$c =~ |
700
|
|
|
|
|
|
|
m{ |
701
|
|
|
|
|
|
|
< |
702
|
|
|
|
|
|
|
(?: [a-zA-Z_][-_\.a-zA-Z0-9]* \: )? # optional namespace |
703
|
|
|
|
|
|
|
$tag\b |
704
|
|
|
|
|
|
|
.*? # optional attributes and whitespace and junk |
705
|
|
|
|
|
|
|
> |
706
|
|
|
|
|
|
|
\s* |
707
|
|
|
|
|
|
|
([^<>\s"]+) |
708
|
|
|
|
|
|
|
\s* |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
(?: [a-zA-Z_][-_\.a-zA-Z0-9]* \: )? # optional namespace |
711
|
|
|
|
|
|
|
$tag |
712
|
|
|
|
|
|
|
\s* # just the optional (and rare) whitespace |
713
|
|
|
|
|
|
|
> |
714
|
|
|
|
|
|
|
}sx |
715
|
|
|
|
|
|
|
) { |
716
|
94
|
|
|
|
|
196
|
my $it = $1; |
717
|
94
|
|
|
|
|
361
|
LWP::Debug::debug("Content has $method value: \"$it\""); |
718
|
94
|
|
|
|
|
386
|
DEBUG > 2 and print(" Content has $method value: \"$it\"!!\n"); |
719
|
94
|
|
|
|
|
494
|
$timing->$method( $it ); |
720
|
|
|
|
|
|
|
} else { |
721
|
274
|
|
|
|
|
1290
|
LWP::Debug::debug("Content has no $method value"); |
722
|
274
|
|
|
|
|
1086
|
DEBUG > 2 and print(" Content has no $method value\n"); |
723
|
|
|
|
|
|
|
} |
724
|
368
|
|
|
|
|
3609
|
return; |
725
|
|
|
|
|
|
|
} |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
# Else it's a tag and subtaggy thing |
728
|
184
|
50
|
|
|
|
637
|
die "Crazy subtag \"$tag\"!!" |
729
|
|
|
|
|
|
|
unless $subtag =~ m/^[a-zA-Z_][a-zA-Z_0-9]*$/s; # sanity |
730
|
|
|
|
|
|
|
|
731
|
184
|
100
|
|
|
|
17183
|
if( $$c =~ |
732
|
|
|
|
|
|
|
m{ |
733
|
|
|
|
|
|
|
< |
734
|
|
|
|
|
|
|
(?: [a-zA-Z_][-_\.a-zA-Z0-9]* \: )? # optional namespace |
735
|
|
|
|
|
|
|
$tag |
736
|
|
|
|
|
|
|
\b.*?> |
737
|
|
|
|
|
|
|
\s* |
738
|
|
|
|
|
|
|
( |
739
|
|
|
|
|
|
|
(?: |
740
|
|
|
|
|
|
|
< |
741
|
|
|
|
|
|
|
(?: [a-zA-Z_][-_\.a-zA-Z0-9]* \: )? # optional namespace |
742
|
|
|
|
|
|
|
$subtag |
743
|
|
|
|
|
|
|
\b.*?> |
744
|
|
|
|
|
|
|
\s* |
745
|
|
|
|
|
|
|
[^<>\s"]+ |
746
|
|
|
|
|
|
|
\s* |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
(?: [a-zA-Z_][-_\.a-zA-Z0-9]* \: )? # optional namespace |
749
|
|
|
|
|
|
|
$subtag |
750
|
|
|
|
|
|
|
\s*> |
751
|
|
|
|
|
|
|
\s* |
752
|
|
|
|
|
|
|
)+ |
753
|
|
|
|
|
|
|
) |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
(?: [a-zA-Z_][-_\.a-zA-Z0-9]* \: )? # optional namespace |
756
|
|
|
|
|
|
|
$tag |
757
|
|
|
|
|
|
|
\s*> |
758
|
|
|
|
|
|
|
}sx |
759
|
|
|
|
|
|
|
) { |
760
|
32
|
|
|
|
|
80
|
my $there = $1; |
761
|
32
|
|
|
|
|
38
|
DEBUG > 3 and print " $method+subtag valuecluster \"$there\"\n"; |
762
|
32
|
|
|
|
|
649
|
my(@them) = ( $there =~ |
763
|
|
|
|
|
|
|
# Our previous RE made sure that this is a very simply-structured |
764
|
|
|
|
|
|
|
# area, so we can get away with just this simple regexp: |
765
|
|
|
|
|
|
|
m{ |
766
|
|
|
|
|
|
|
> |
767
|
|
|
|
|
|
|
\s* |
768
|
|
|
|
|
|
|
([^<>\s"]+) |
769
|
|
|
|
|
|
|
\s* |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
}xsg |
772
|
|
|
|
|
|
|
); |
773
|
32
|
|
|
|
|
183
|
LWP::Debug::debug("Content $method+$subtag values: @them"); |
774
|
32
|
|
|
|
|
103
|
DEBUG > 2 and print(" Content $method+$subtag values: (", |
775
|
|
|
|
|
|
|
join(q<,>, map("\"$_\" ", @them)), ") !!\n"); |
776
|
32
|
|
|
|
|
190
|
$timing->$method( @them ); # yes, we call the method, not a submethod |
777
|
|
|
|
|
|
|
} else { |
778
|
152
|
|
|
|
|
619
|
LWP::Debug::debug("Content has no $method+$subtag values"); |
779
|
152
|
|
|
|
|
524
|
DEBUG > 2 and print(" Content has no $method+$subtag values\n"); |
780
|
|
|
|
|
|
|
} |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
# Qvia sicvt exaltantvr Caeli a Terra, |
783
|
|
|
|
|
|
|
# sic exaltatae svnt viae Meae a viis vestris, |
784
|
|
|
|
|
|
|
# et cogitationes Meae a cogitationibvs vestris! |
785
|
|
|
|
|
|
|
# VGABVGA! |
786
|
|
|
|
|
|
|
|
787
|
184
|
|
|
|
|
1423
|
return; |
788
|
|
|
|
|
|
|
} |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
# EndHell. |
791
|
|
|
|
|
|
|
########################################################################### |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
1; |
794
|
|
|
|
|
|
|
__END__ |