| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# WebFetch::Input::SiteNews.pm - get headlines from a site-local file |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# Copyright (c) 1998-2009 Ian Kluft. This program is free software; you can |
|
5
|
|
|
|
|
|
|
# redistribute it and/or modify it under the terms of the GNU General Public |
|
6
|
|
|
|
|
|
|
# License Version 3. See http://www.webfetch.org/GPLv3.txt |
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
package WebFetch::Input::SiteNews; |
|
9
|
|
|
|
|
|
|
|
|
10
|
1
|
|
|
1
|
|
1305
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
34
|
|
|
11
|
1
|
|
|
1
|
|
5
|
use base "WebFetch"; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
90
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
use Carp; |
|
14
|
|
|
|
|
|
|
use Date::Calc qw(Today Delta_Days Month_to_Text); |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 NAME |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
WebFetch::Input::SiteNews - download and save SiteNews headlines |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=cut |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# set defaults |
|
23
|
|
|
|
|
|
|
our ( $cat_priorities, $now, $nowstamp ); |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
our @Options = ( |
|
26
|
|
|
|
|
|
|
"short=s", |
|
27
|
|
|
|
|
|
|
"long=s", |
|
28
|
|
|
|
|
|
|
); |
|
29
|
|
|
|
|
|
|
our $Usage = "--short short-output-file --long long-output-file"; |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# configuration parameters |
|
32
|
|
|
|
|
|
|
our $num_links = 5; |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# no user-servicable parts beyond this point |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# register capabilities with WebFetch |
|
37
|
|
|
|
|
|
|
__PACKAGE__->module_register( "cmdline", "input:sitenews" ); |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
In perl scripts: |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
C |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
From the command line: |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
C
|
|
48
|
|
|
|
|
|
|
--source news-file --short short-form-output-file |
|
49
|
|
|
|
|
|
|
--long long-form-output-file> |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
This module gets the current headlines from a site-local file. |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
The I<--source> parameter specifies a file name which contains news to be |
|
56
|
|
|
|
|
|
|
posted. See L<"FILE FORMAT"> below for details on contents to put in the |
|
57
|
|
|
|
|
|
|
file. I<--source> may be specified more than once, allowing a single news |
|
58
|
|
|
|
|
|
|
output to come from more than one input. For example, one file could be |
|
59
|
|
|
|
|
|
|
manually maintained in CVS or RCS and another could be entered from a |
|
60
|
|
|
|
|
|
|
web form. |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
After this runs, the file C will be created or replaced. |
|
63
|
|
|
|
|
|
|
If there already was a C file, it will be moved to |
|
64
|
|
|
|
|
|
|
C. |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=cut |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# constants for state names |
|
69
|
|
|
|
|
|
|
sub initial_state { 0; } |
|
70
|
|
|
|
|
|
|
sub attr_state { 1; } |
|
71
|
|
|
|
|
|
|
sub text_state { 2; } |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub fetch |
|
74
|
|
|
|
|
|
|
{ |
|
75
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# set parameters for WebFetch routines |
|
78
|
|
|
|
|
|
|
if ( !defined $self->{num_links}) { |
|
79
|
|
|
|
|
|
|
$self->{num_links} = $WebFetch::Input::SiteNews::num_links; |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
if ( !defined $self->{style}) { |
|
82
|
|
|
|
|
|
|
$self->{style} = {}; |
|
83
|
|
|
|
|
|
|
$self->{style}{para} = 1; |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# set up Webfetch Embedding API data |
|
87
|
|
|
|
|
|
|
$self->{actions} = {}; |
|
88
|
|
|
|
|
|
|
$self->data->add_fields( "date", "title", "priority", "expired", |
|
89
|
|
|
|
|
|
|
"position", "label", "url", "category", "text" ); |
|
90
|
|
|
|
|
|
|
# defined which fields match to which "well-known field names" |
|
91
|
|
|
|
|
|
|
$self->data->add_wk_names( |
|
92
|
|
|
|
|
|
|
"title" => "title", |
|
93
|
|
|
|
|
|
|
"url" => "url", |
|
94
|
|
|
|
|
|
|
"date" => "date", |
|
95
|
|
|
|
|
|
|
"summary" => "text", |
|
96
|
|
|
|
|
|
|
"category" => "category" |
|
97
|
|
|
|
|
|
|
); |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# process the links |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# get local time for various date comparisons |
|
102
|
|
|
|
|
|
|
$now = [ Today ]; |
|
103
|
|
|
|
|
|
|
$nowstamp = sprintf "%04d%02d%02d", @$now; |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# parse data file |
|
106
|
|
|
|
|
|
|
my $source; |
|
107
|
|
|
|
|
|
|
if (( exists $self->{sources}) and ( ref $self->{sources} eq "ARRAY" )) { |
|
108
|
|
|
|
|
|
|
foreach $source ( @{$self->{sources}}) { |
|
109
|
|
|
|
|
|
|
$self->parse_input( $source ); |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# set parameters for the short news format |
|
114
|
|
|
|
|
|
|
if ( defined $self->{short_path} ) { |
|
115
|
|
|
|
|
|
|
# create the HTML actions list |
|
116
|
|
|
|
|
|
|
$self->{actions}{html} = []; |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
# create the HTML-generation parameters |
|
119
|
|
|
|
|
|
|
my $params = {}; |
|
120
|
|
|
|
|
|
|
$params = {}; |
|
121
|
|
|
|
|
|
|
$params->{sort_func} = sub { |
|
122
|
|
|
|
|
|
|
my ( $a, $b ) = @_; |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
# sort/compare news entries for the short display |
|
125
|
|
|
|
|
|
|
# sorting priority: |
|
126
|
|
|
|
|
|
|
# expiration status first (expired items last) |
|
127
|
|
|
|
|
|
|
# priority second (category/age combo) |
|
128
|
|
|
|
|
|
|
# label third (chronological order) |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# check expirations first |
|
131
|
|
|
|
|
|
|
my $exp_fnum = $self->fname2fnum("expired"); |
|
132
|
|
|
|
|
|
|
( $a->[$exp_fnum] and !$b->[$exp_fnum]) and return 1; |
|
133
|
|
|
|
|
|
|
( !$a->[$exp_fnum] and $b->[$exp_fnum]) and return -1; |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# compare priority - posting category w/ age penalty |
|
136
|
|
|
|
|
|
|
my $pri_fnum = $self->fname2fnum("priority"); |
|
137
|
|
|
|
|
|
|
if ( $a->[$pri_fnum] != $b->[$pri_fnum] ) { |
|
138
|
|
|
|
|
|
|
return $a->[$pri_fnum] <=> $b->[$pri_fnum]; |
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# otherwise sort by label (chronological order) |
|
142
|
|
|
|
|
|
|
my $lbl_fnum = $self->fname2fnum("label"); |
|
143
|
|
|
|
|
|
|
return $a->[$lbl_fnum] cmp $b->[$lbl_fnum]; |
|
144
|
|
|
|
|
|
|
}; |
|
145
|
|
|
|
|
|
|
$params->{filter_func} = sub { |
|
146
|
|
|
|
|
|
|
# filter: skip expired items |
|
147
|
|
|
|
|
|
|
my $exp_fnum = $self->fname2fnum("expired"); |
|
148
|
|
|
|
|
|
|
return ! $_[$exp_fnum]; |
|
149
|
|
|
|
|
|
|
}; |
|
150
|
|
|
|
|
|
|
$params->{format_func} = sub { |
|
151
|
|
|
|
|
|
|
# generate HTML text |
|
152
|
|
|
|
|
|
|
my $txt_fnum = $self->fname2fnum("text"); |
|
153
|
|
|
|
|
|
|
my $pri_fnum = $self->fname2fnum("priority"); |
|
154
|
|
|
|
|
|
|
return $_[$txt_fnum] |
|
155
|
|
|
|
|
|
|
."\n"; |
|
156
|
|
|
|
|
|
|
}; |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# put parameters for fmt_handler_html() on the html list |
|
159
|
|
|
|
|
|
|
push @{$self->{actions}{html}}, [ $self->{short_path}, $params ]; |
|
160
|
|
|
|
|
|
|
} |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# set parameters for the long news format |
|
163
|
|
|
|
|
|
|
if ( defined $self->{long_path} ) { |
|
164
|
|
|
|
|
|
|
# create the SiteNews-specific action list |
|
165
|
|
|
|
|
|
|
# It will use WebFetch::Input::SiteNews::fmt_handler_sitenews_long() |
|
166
|
|
|
|
|
|
|
# which is defined in this file |
|
167
|
|
|
|
|
|
|
$self->{actions}{sitenews_long} = []; |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# put parameters for fmt_handler_sitenews_long() on the list |
|
170
|
|
|
|
|
|
|
push @{$self->{actions}{sitenews_long}}, [ $self->{long_path} ]; |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
} |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# parse input file |
|
175
|
|
|
|
|
|
|
sub parse_input |
|
176
|
|
|
|
|
|
|
{ |
|
177
|
|
|
|
|
|
|
my ( $self, $input ) = @_; |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
# parse data file |
|
180
|
|
|
|
|
|
|
if ( ! open ( news_data, $input )) { |
|
181
|
|
|
|
|
|
|
croak "$0: failed to open $input: $!\n"; |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
my @news_items; |
|
184
|
|
|
|
|
|
|
my $position = 0; |
|
185
|
|
|
|
|
|
|
my $state = initial_state; # before first entry |
|
186
|
|
|
|
|
|
|
my ( $current ); |
|
187
|
|
|
|
|
|
|
$cat_priorities = {}; # priorities for sorting |
|
188
|
|
|
|
|
|
|
while ( ) { |
|
189
|
|
|
|
|
|
|
chop; |
|
190
|
|
|
|
|
|
|
/^\s*\#/ and next; # skip comments |
|
191
|
|
|
|
|
|
|
/^\s*$/ and next; # skip blank lines |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
if ( /^[^\s]/ ) { |
|
194
|
|
|
|
|
|
|
# found attribute line |
|
195
|
|
|
|
|
|
|
if ( $state == initial_state ) { |
|
196
|
|
|
|
|
|
|
if ( /^categories:\s*(.*)/ ) { |
|
197
|
|
|
|
|
|
|
my @cats = split ( /\s+/, $1 ); |
|
198
|
|
|
|
|
|
|
my ( $i ); |
|
199
|
|
|
|
|
|
|
$cat_priorities->{"default"} = 999; |
|
200
|
|
|
|
|
|
|
for ( $i=0; $i<=$#cats; $i++ ) { |
|
201
|
|
|
|
|
|
|
$cat_priorities->{$cats[$i]} |
|
202
|
|
|
|
|
|
|
= $i + 1; |
|
203
|
|
|
|
|
|
|
} |
|
204
|
|
|
|
|
|
|
next; |
|
205
|
|
|
|
|
|
|
} elsif ( /^url-prefix:\s*(.*)/ ) { |
|
206
|
|
|
|
|
|
|
$self->{url_prefix} = $1; |
|
207
|
|
|
|
|
|
|
} |
|
208
|
|
|
|
|
|
|
} |
|
209
|
|
|
|
|
|
|
if ( $state == initial_state or $state == text_state ) |
|
210
|
|
|
|
|
|
|
{ |
|
211
|
|
|
|
|
|
|
# found first attribute of a new entry |
|
212
|
|
|
|
|
|
|
if ( /^([^=]+)=(.*)/ ) { |
|
213
|
|
|
|
|
|
|
$current = {}; |
|
214
|
|
|
|
|
|
|
$current->{position} = $position++; |
|
215
|
|
|
|
|
|
|
$current->{$1} = $2; |
|
216
|
|
|
|
|
|
|
push( @news_items, $current ); |
|
217
|
|
|
|
|
|
|
$state = attr_state; |
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
} elsif ( $state == attr_state ) { |
|
220
|
|
|
|
|
|
|
# found a followup attribute |
|
221
|
|
|
|
|
|
|
if ( /^([^=]+)=(.*)/ ) { |
|
222
|
|
|
|
|
|
|
$current->{$1} = $2; |
|
223
|
|
|
|
|
|
|
} |
|
224
|
|
|
|
|
|
|
} |
|
225
|
|
|
|
|
|
|
} else { |
|
226
|
|
|
|
|
|
|
# found text line |
|
227
|
|
|
|
|
|
|
if ( $state == initial_state ) { |
|
228
|
|
|
|
|
|
|
# cannot accept text before any attributes |
|
229
|
|
|
|
|
|
|
next; |
|
230
|
|
|
|
|
|
|
} elsif ( $state == attr_state or $state == text_state ) { |
|
231
|
|
|
|
|
|
|
if ( defined $current->{text}) { |
|
232
|
|
|
|
|
|
|
$current->{text} .= "\n$_"; |
|
233
|
|
|
|
|
|
|
} else { |
|
234
|
|
|
|
|
|
|
$current->{text} = $_; |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
$state = text_state; |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
} |
|
239
|
|
|
|
|
|
|
} |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
# translate parsed news into the WebFetch Embedding API data table |
|
242
|
|
|
|
|
|
|
my ( $item, %label_hash, $pos ); |
|
243
|
|
|
|
|
|
|
$pos = 0; |
|
244
|
|
|
|
|
|
|
foreach $item ( @news_items ) { |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# generate an intra-page link label |
|
247
|
|
|
|
|
|
|
my ( $label, $count ); |
|
248
|
|
|
|
|
|
|
$count=0; |
|
249
|
|
|
|
|
|
|
while (( $label = $item->{posted}."-".sprintf("%03d",$count)), |
|
250
|
|
|
|
|
|
|
defined $label_hash{$label}) |
|
251
|
|
|
|
|
|
|
{ |
|
252
|
|
|
|
|
|
|
$count++; |
|
253
|
|
|
|
|
|
|
} |
|
254
|
|
|
|
|
|
|
$label_hash{$label} = 1; |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
# save the data record |
|
257
|
|
|
|
|
|
|
my $title = ( defined $item->{title}) ? $item->{title} : ""; |
|
258
|
|
|
|
|
|
|
my $posted = ( defined $item->{posted}) ? $item->{posted} : ""; |
|
259
|
|
|
|
|
|
|
my $category = ( defined $item->{category}) |
|
260
|
|
|
|
|
|
|
? $item->{category} : ""; |
|
261
|
|
|
|
|
|
|
my $text = ( defined $item->{text}) ? $item->{text} : ""; |
|
262
|
|
|
|
|
|
|
my $url_prefix = ( defined $self->{url_prefix}) |
|
263
|
|
|
|
|
|
|
? $self->{url_prefix} : ""; |
|
264
|
|
|
|
|
|
|
$self->data->add_record( |
|
265
|
|
|
|
|
|
|
printstamp($posted), $title, priority( $item ), |
|
266
|
|
|
|
|
|
|
expired( $item ), $pos, $label, |
|
267
|
|
|
|
|
|
|
$url_prefix."#".$label, $category, $text ); |
|
268
|
|
|
|
|
|
|
$pos++; |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
} |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
# |
|
273
|
|
|
|
|
|
|
# utility functions |
|
274
|
|
|
|
|
|
|
# |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
# generate a printable version of the datestamp |
|
277
|
|
|
|
|
|
|
sub printstamp |
|
278
|
|
|
|
|
|
|
{ |
|
279
|
|
|
|
|
|
|
my ( $stamp ) = @_; |
|
280
|
|
|
|
|
|
|
my ( $year, $mon, $day ) = ( $stamp =~ /^(....)(..)(..)/ ); |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
return Month_to_Text(int($mon))." ".int($day).", $year"; |
|
283
|
|
|
|
|
|
|
} |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
# function to detect if a news entry is expired |
|
286
|
|
|
|
|
|
|
sub expired |
|
287
|
|
|
|
|
|
|
{ |
|
288
|
|
|
|
|
|
|
my ( $entry ) = @_; |
|
289
|
|
|
|
|
|
|
return (( defined $entry->{expires}) and |
|
290
|
|
|
|
|
|
|
( $entry->{expires} lt $nowstamp )); |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
# function to get the priority value from |
|
294
|
|
|
|
|
|
|
sub priority |
|
295
|
|
|
|
|
|
|
{ |
|
296
|
|
|
|
|
|
|
my ( $entry ) = @_; |
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
( defined $entry->{posted}) or return 999; |
|
299
|
|
|
|
|
|
|
my ( $year, $mon, $day ) = ( $entry->{posted} =~ /^(....)(..)(..)/ ); |
|
300
|
|
|
|
|
|
|
my $age = Delta_Days( $year, $mon, $day, @$now ); |
|
301
|
|
|
|
|
|
|
my $bonus = 0; |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
if ( $age <= 2 ) { |
|
304
|
|
|
|
|
|
|
$bonus -= 2 - $age; |
|
305
|
|
|
|
|
|
|
} |
|
306
|
|
|
|
|
|
|
if (( defined $entry->{category}) and |
|
307
|
|
|
|
|
|
|
( defined $cat_priorities->{$entry->{category}})) |
|
308
|
|
|
|
|
|
|
{ |
|
309
|
|
|
|
|
|
|
my $cat_pri = ( exists $cat_priorities->{$entry->{category}}) |
|
310
|
|
|
|
|
|
|
? $cat_priorities->{$entry->{category}} : 0; |
|
311
|
|
|
|
|
|
|
return $cat_pri + $age * 0.025 + $bonus; |
|
312
|
|
|
|
|
|
|
} else { |
|
313
|
|
|
|
|
|
|
return $cat_priorities->{"default"} + $age * 0.025 |
|
314
|
|
|
|
|
|
|
+ $bonus; |
|
315
|
|
|
|
|
|
|
} |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
1; |
|
319
|
|
|
|
|
|
|
__END__ |