| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# WebFetch::Input::SiteNews |
|
2
|
|
|
|
|
|
|
# ABSTRACT: download and save SiteNews headlines from a local file |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# Copyright (c) 1998-2022 Ian Kluft. This program is free software; you can |
|
5
|
|
|
|
|
|
|
# redistribute it and/or modify it under the terms of the GNU General Public |
|
6
|
|
|
|
|
|
|
# License Version 3. See https://www.gnu.org/licenses/gpl-3.0-standalone.html |
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
# pragmas to silence some warnings from Perl::Critic |
|
9
|
|
|
|
|
|
|
## no critic (Modules::RequireExplicitPackage) |
|
10
|
|
|
|
|
|
|
# This solves a catch-22 where parts of Perl::Critic want both package and use-strict to be first |
|
11
|
3
|
|
|
3
|
|
2077
|
use strict; |
|
|
3
|
|
|
|
|
13
|
|
|
|
3
|
|
|
|
|
98
|
|
|
12
|
3
|
|
|
3
|
|
15
|
use warnings; |
|
|
3
|
|
|
|
|
17
|
|
|
|
3
|
|
|
|
|
76
|
|
|
13
|
3
|
|
|
3
|
|
14
|
use utf8; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
22
|
|
|
14
|
|
|
|
|
|
|
## use critic (Modules::RequireExplicitPackage) |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
package WebFetch::Input::SiteNews; |
|
17
|
|
|
|
|
|
|
$WebFetch::Input::SiteNews::VERSION = '0.15.8'; |
|
18
|
3
|
|
|
3
|
|
185
|
use base "WebFetch"; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
285
|
|
|
19
|
|
|
|
|
|
|
|
|
20
|
3
|
|
|
3
|
|
19
|
use Carp; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
204
|
|
|
21
|
3
|
|
|
3
|
|
23
|
use Readonly; |
|
|
3
|
|
|
|
|
16
|
|
|
|
3
|
|
|
|
|
186
|
|
|
22
|
3
|
|
|
3
|
|
28
|
use Scalar::Util qw(reftype); |
|
|
3
|
|
|
|
|
8
|
|
|
|
3
|
|
|
|
|
146
|
|
|
23
|
3
|
|
|
3
|
|
20
|
use DateTime; |
|
|
3
|
|
|
|
|
13
|
|
|
|
3
|
|
|
|
|
97
|
|
|
24
|
3
|
|
|
3
|
|
18
|
use DateTime::Format::ISO8601; |
|
|
3
|
|
|
|
|
15
|
|
|
|
3
|
|
|
|
|
8802
|
|
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# set defaults |
|
27
|
|
|
|
|
|
|
my ( $cat_priorities, $now ); |
|
28
|
|
|
|
|
|
|
Readonly::Array my @Options => ( "short_path|short=s", "long_path|long=s", ); |
|
29
|
|
|
|
|
|
|
Readonly::Scalar my $Usage => "--short short-output-file --long long-output-file"; |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# configuration parameters |
|
32
|
|
|
|
|
|
|
Readonly::Scalar my $num_links => 5; |
|
33
|
|
|
|
|
|
|
Readonly::Array my @dt_keys => qw(locale time_zone); |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# functions for access to internal data for testing |
|
36
|
|
|
|
|
|
|
## no critic (Subroutines::ProhibitUnusedPrivateSubroutines) |
|
37
|
|
|
|
|
|
|
sub _config_params |
|
38
|
|
|
|
|
|
|
{ |
|
39
|
4
|
|
|
4
|
|
47
|
return { Options => \@Options, Usage => $Usage, num_links => $num_links }; |
|
40
|
|
|
|
|
|
|
} |
|
41
|
0
|
|
|
0
|
|
0
|
sub _cat_priorities { return $cat_priorities; } |
|
42
|
|
|
|
|
|
|
## critic (Subroutines::ProhibitUnusedPrivateSubroutines) |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# no user-servicable parts beyond this point |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# register capabilities with WebFetch |
|
47
|
|
|
|
|
|
|
__PACKAGE__->module_register( _config_params(), "cmdline", "input:sitenews", "output:sitenews_long" ); |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# constants for state names |
|
50
|
299
|
|
|
299
|
0
|
602
|
sub initial_state { return 0; } |
|
51
|
184
|
|
|
184
|
0
|
410
|
sub attr_state { return 1; } |
|
52
|
234
|
|
|
234
|
0
|
559
|
sub text_state { return 2; } |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# fetch() is the WebFetch API call point to run this module |
|
55
|
|
|
|
|
|
|
sub fetch |
|
56
|
|
|
|
|
|
|
{ |
|
57
|
2
|
|
|
2
|
1
|
15
|
my ($self) = @_; |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# set parameters for WebFetch routines |
|
60
|
2
|
50
|
|
|
|
8
|
if ( not defined $self->{num_links} ) { |
|
61
|
2
|
|
|
|
|
8
|
$self->set_param( "num_links", WebFetch->config("num_links") ); |
|
62
|
|
|
|
|
|
|
} |
|
63
|
2
|
|
|
|
|
7
|
$self->set_param( "para", 1 ); |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# set up Webfetch Embedding API data |
|
66
|
2
|
|
|
|
|
6
|
$self->{actions} = {}; |
|
67
|
2
|
|
|
|
|
11
|
$self->data->add_fields( "date", "title", "priority", "expired", "position", "label", "url", "category", "text" ); |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# defined which fields match to which "well-known field names" |
|
70
|
2
|
|
|
|
|
8
|
$self->data->add_wk_names( |
|
71
|
|
|
|
|
|
|
"title" => "title", |
|
72
|
|
|
|
|
|
|
"url" => "url", |
|
73
|
|
|
|
|
|
|
"date" => "date", |
|
74
|
|
|
|
|
|
|
"summary" => "text", |
|
75
|
|
|
|
|
|
|
"category" => "category" |
|
76
|
|
|
|
|
|
|
); |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# process the links |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# get local time for various date comparisons |
|
81
|
2
|
|
|
|
|
4
|
my %dt_opts; |
|
82
|
2
|
50
|
33
|
|
|
7
|
if ( exists $self->{datetime_settings} and reftype( $self->{datetime_settings} ) eq "HASH" ) { |
|
83
|
0
|
|
|
|
|
0
|
%dt_opts = %{ $self->{datetime_settings} }; # get locale and time_zone settings if available |
|
|
0
|
|
|
|
|
0
|
|
|
84
|
|
|
|
|
|
|
} |
|
85
|
2
|
50
|
|
|
|
7
|
if ( exists $self->{testing_faketime} ) { |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# use a pre-specified timestamp for testing purposes so news elements are same age as expected result |
|
88
|
2
|
|
|
|
|
23
|
$now = DateTime::Format::ISO8601->parse_datetime( $self->{testing_faketime}, %dt_opts ); |
|
89
|
2
|
50
|
|
|
|
1780
|
if ( exists $dt_opts{locale} ) { |
|
90
|
0
|
|
|
|
|
0
|
$now->set_locale( $dt_opts{locale} ); |
|
91
|
|
|
|
|
|
|
} |
|
92
|
2
|
50
|
|
|
|
8
|
if ( exists $dt_opts{time_zone} ) { |
|
93
|
0
|
|
|
|
|
0
|
$now->set_time_zone( $dt_opts{time_zone} ); |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
} else { |
|
96
|
0
|
|
|
|
|
0
|
$now = DateTime->now(%dt_opts); |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# parse data file(s) |
|
100
|
2
|
|
|
|
|
5
|
my @sources; |
|
101
|
2
|
50
|
33
|
|
|
9
|
if ( ( exists $self->{sources} ) and ( ref $self->{sources} eq "ARRAY" ) ) { |
|
102
|
0
|
|
|
|
|
0
|
@sources = @{ $self->{sources} }; |
|
|
0
|
|
|
|
|
0
|
|
|
103
|
|
|
|
|
|
|
} |
|
104
|
2
|
50
|
|
|
|
6
|
if ( exists $self->{source} ) { |
|
105
|
2
|
|
|
|
|
7
|
push @sources, $self->{source}; |
|
106
|
|
|
|
|
|
|
} |
|
107
|
2
|
|
|
|
|
5
|
foreach my $source (@sources) { |
|
108
|
2
|
|
|
|
|
7
|
$self->parse_input($source); |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# set parameters for the short news format |
|
112
|
2
|
50
|
|
|
|
9
|
if ( defined $self->{short_path} ) { |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# create the HTML actions list |
|
115
|
2
|
|
|
|
|
9
|
$self->{actions}{html} = []; |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
# create the HTML-generation parameters |
|
118
|
2
|
|
|
|
|
5
|
my $params = {}; |
|
119
|
2
|
|
|
|
|
5
|
$params = {}; |
|
120
|
|
|
|
|
|
|
$params->{sort_func} = sub { |
|
121
|
48
|
|
|
48
|
|
77
|
my ( $a, $b ) = @_; |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
# sort/compare news entries for the short display |
|
124
|
|
|
|
|
|
|
# sorting priority: |
|
125
|
|
|
|
|
|
|
# expiration status first (expired items last) |
|
126
|
|
|
|
|
|
|
# priority second (category/age combo) |
|
127
|
|
|
|
|
|
|
# label third (chronological order) |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# check expirations first |
|
130
|
48
|
|
|
|
|
88
|
my $exp_fnum = $self->fname2fnum("expired"); |
|
131
|
48
|
50
|
33
|
|
|
108
|
( $a->[$exp_fnum] and not $b->[$exp_fnum] ) and return 1; |
|
132
|
48
|
50
|
33
|
|
|
188
|
( not $a->[$exp_fnum] and $b->[$exp_fnum] ) and return -1; |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# compare priority - posting category w/ age penalty |
|
135
|
48
|
|
|
|
|
104
|
my $pri_fnum = $self->fname2fnum("priority"); |
|
136
|
48
|
100
|
|
|
|
120
|
if ( $a->[$pri_fnum] != $b->[$pri_fnum] ) { |
|
137
|
18
|
|
|
|
|
58
|
return $a->[$pri_fnum] <=> $b->[$pri_fnum]; |
|
138
|
|
|
|
|
|
|
} |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# otherwise sort by label (chronological order) |
|
141
|
30
|
|
|
|
|
62
|
my $lbl_fnum = $self->fname2fnum("label"); |
|
142
|
30
|
|
|
|
|
76
|
return $a->[$lbl_fnum] cmp $b->[$lbl_fnum]; |
|
143
|
2
|
|
|
|
|
10
|
}; |
|
144
|
|
|
|
|
|
|
$params->{filter_func} = sub { |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# filter: skip expired items |
|
147
|
33
|
|
|
33
|
|
70
|
my $exp_fnum = $self->fname2fnum("expired"); |
|
148
|
33
|
|
|
|
|
95
|
return not $_[$exp_fnum]; |
|
149
|
2
|
|
|
|
|
9
|
}; |
|
150
|
|
|
|
|
|
|
$params->{format_func} = sub { |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
# generate HTML text |
|
153
|
10
|
|
|
10
|
|
25
|
my $txt_fnum = $self->fname2fnum("text"); |
|
154
|
10
|
|
|
|
|
28
|
my $pri_fnum = $self->fname2fnum("priority"); |
|
155
|
10
|
|
|
|
|
52
|
return $_[$txt_fnum] . "\n<!--- priority " . $_[$pri_fnum] . " --->"; |
|
156
|
2
|
|
|
|
|
11
|
}; |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# put parameters for fmt_handler_html() on the html list |
|
159
|
2
|
|
|
|
|
4
|
push @{ $self->{actions}{html} }, [ $self->{short_path}, $params ]; |
|
|
2
|
|
|
|
|
8
|
|
|
160
|
|
|
|
|
|
|
} |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# set parameters for the long news format |
|
163
|
2
|
50
|
|
|
|
6
|
if ( defined $self->{long_path} ) { |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
# create the SiteNews-specific action list |
|
166
|
|
|
|
|
|
|
# It will use WebFetch::Input::SiteNews::fmt_handler_sitenews_long() |
|
167
|
|
|
|
|
|
|
# which is defined in this file |
|
168
|
2
|
|
|
|
|
6
|
$self->{actions}{sitenews_long} = []; |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
# put parameters for fmt_handler_sitenews_long() on the list |
|
171
|
2
|
|
|
|
|
3
|
push @{ $self->{actions}{sitenews_long} }, [ $self->{long_path} ]; |
|
|
2
|
|
|
|
|
5
|
|
|
172
|
|
|
|
|
|
|
} |
|
173
|
2
|
|
|
|
|
7
|
return; |
|
174
|
|
|
|
|
|
|
} |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
# inner portion of input parsing - called by parse_input() |
|
177
|
|
|
|
|
|
|
# read SiteNews text file and parse it into news items to return to caller |
|
178
|
|
|
|
|
|
|
sub parse_input_inner |
|
179
|
|
|
|
|
|
|
{ |
|
180
|
2
|
|
|
2
|
0
|
5
|
my ( $self, $news_data_fd ) = @_; |
|
181
|
2
|
|
|
|
|
5
|
my @news_items; |
|
182
|
2
|
|
|
|
|
4
|
my $position = 0; |
|
183
|
2
|
|
|
|
|
6
|
my $state = initial_state; # before first entry |
|
184
|
2
|
|
|
|
|
6
|
my ($current); |
|
185
|
2
|
|
|
|
|
9
|
$cat_priorities = {}; # priorities for sorting |
|
186
|
2
|
|
|
|
|
84
|
while (<$news_data_fd>) { |
|
187
|
233
|
|
|
|
|
358
|
chomp; |
|
188
|
233
|
100
|
|
|
|
518
|
/^\s*\#/x and next; # skip comments |
|
189
|
228
|
100
|
|
|
|
623
|
/^\s*$/x and next; # skip blank lines |
|
190
|
|
|
|
|
|
|
|
|
191
|
192
|
100
|
|
|
|
419
|
if (/^[^\s]/x) { |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
# found attribute line |
|
194
|
107
|
100
|
|
|
|
206
|
if ( $state == initial_state ) { |
|
195
|
10
|
100
|
|
|
|
46
|
if (/^categories:\s*(.*)/x) { |
|
|
|
100
|
|
|
|
|
|
|
196
|
2
|
|
|
|
|
15
|
my @cats = split( ' ', $1 ); |
|
197
|
2
|
|
|
|
|
5
|
my ($i); |
|
198
|
2
|
|
|
|
|
6
|
$cat_priorities->{"default"} = 999; |
|
199
|
2
|
|
|
|
|
9
|
for ( $i = 0 ; $i <= $#cats ; $i++ ) { |
|
200
|
8
|
|
|
|
|
45
|
$cat_priorities->{ $cats[$i] } = $i + 1; |
|
201
|
|
|
|
|
|
|
} |
|
202
|
2
|
|
|
|
|
17
|
next; |
|
203
|
|
|
|
|
|
|
} elsif (/^(\w+):\s*(.*)/x) { |
|
204
|
6
|
|
|
|
|
22
|
$self->set_param( $1, $2 ); |
|
205
|
|
|
|
|
|
|
} |
|
206
|
|
|
|
|
|
|
} |
|
207
|
105
|
100
|
100
|
|
|
168
|
if ( $state == initial_state or $state == text_state ) { |
|
|
|
50
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# found first attribute of a new entry |
|
210
|
39
|
100
|
|
|
|
154
|
if (/^([^=]+)=(.*)/x) { |
|
211
|
33
|
|
|
|
|
53
|
$current = {}; |
|
212
|
33
|
|
|
|
|
81
|
$current->{position} = $position++; |
|
213
|
33
|
|
|
|
|
87
|
$current->{$1} = $2; |
|
214
|
33
|
|
|
|
|
59
|
push( @news_items, $current ); |
|
215
|
33
|
|
|
|
|
48
|
$state = attr_state; |
|
216
|
|
|
|
|
|
|
} |
|
217
|
|
|
|
|
|
|
} elsif ( $state == attr_state ) { |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
# found a followup attribute |
|
220
|
66
|
50
|
|
|
|
200
|
if (/^([^=]+)=(.*)/x) { |
|
221
|
66
|
|
|
|
|
349
|
$current->{$1} = $2; |
|
222
|
|
|
|
|
|
|
} |
|
223
|
|
|
|
|
|
|
} |
|
224
|
|
|
|
|
|
|
} else { |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
# found text line |
|
227
|
85
|
50
|
66
|
|
|
127
|
if ( $state == initial_state ) { |
|
|
|
50
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
# cannot accept text before any attributes |
|
230
|
0
|
|
|
|
|
0
|
next; |
|
231
|
|
|
|
|
|
|
} elsif ( $state == attr_state or $state == text_state ) { |
|
232
|
85
|
100
|
|
|
|
146
|
if ( defined $current->{text} ) { |
|
233
|
52
|
|
|
|
|
126
|
$current->{text} .= "\n$_"; |
|
234
|
|
|
|
|
|
|
} else { |
|
235
|
33
|
|
|
|
|
65
|
$current->{text} = $_; |
|
236
|
|
|
|
|
|
|
} |
|
237
|
85
|
|
|
|
|
131
|
$state = text_state; |
|
238
|
|
|
|
|
|
|
} |
|
239
|
|
|
|
|
|
|
} |
|
240
|
|
|
|
|
|
|
} |
|
241
|
2
|
|
|
|
|
16
|
return @news_items; |
|
242
|
|
|
|
|
|
|
} |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
# parse input file |
|
245
|
|
|
|
|
|
|
sub parse_input |
|
246
|
|
|
|
|
|
|
{ |
|
247
|
2
|
|
|
2
|
0
|
9
|
my ( $self, $input ) = @_; |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
# parse data file |
|
250
|
2
|
|
|
|
|
5
|
my $news_data; |
|
251
|
2
|
50
|
|
|
|
123
|
if ( not open( $news_data, "<", $input ) ) { |
|
252
|
0
|
|
|
|
|
0
|
croak "$0: failed to open $input: $!\n"; |
|
253
|
|
|
|
|
|
|
} |
|
254
|
2
|
|
|
|
|
14
|
my @news_items = $self->parse_input_inner($news_data); |
|
255
|
2
|
|
|
|
|
38
|
close $news_data; |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
# translate parsed news into the WebFetch Embedding API data table |
|
258
|
2
|
|
|
|
|
7
|
my ( %label_hash, $pos ); |
|
259
|
2
|
|
|
|
|
5
|
$pos = 0; |
|
260
|
2
|
|
|
|
|
7
|
foreach my $item (@news_items) { |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# collect fields for the data record |
|
263
|
33
|
50
|
|
|
|
93
|
my $title = ( defined $item->{title} ) ? $item->{title} : ""; |
|
264
|
33
|
50
|
|
|
|
102
|
my $posted = ( defined $item->{posted} ) ? $item->{posted} : ""; |
|
265
|
33
|
50
|
|
|
|
69
|
my $category = ( defined $item->{category} ) ? $item->{category} : ""; |
|
266
|
33
|
50
|
|
|
|
127
|
my $text = ( defined $item->{text} ) ? $item->{text} : ""; |
|
267
|
|
|
|
|
|
|
my $url_prefix = |
|
268
|
33
|
50
|
|
|
|
104
|
( defined $self->{url_prefix} ) ? $self->{url_prefix} : ""; |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
# timestamp processing using optional locale and time_zone from WebFetch object's datetime_settings hash |
|
271
|
|
|
|
|
|
|
# This is usually set by SiteNews file's global settings at the top |
|
272
|
33
|
|
|
|
|
60
|
my ( %dt_opts, $time_str, $anchor_time ); |
|
273
|
33
|
50
|
33
|
|
|
192
|
if ( exists $self->{datetime_settings} and reftype $self->{datetime_settings} eq "HASH" ) { |
|
274
|
33
|
|
|
|
|
71
|
%dt_opts = %{ $self->{datetime_settings} }; # get locale and time_zone settings if available |
|
|
33
|
|
|
|
|
126
|
|
|
275
|
|
|
|
|
|
|
} |
|
276
|
33
|
50
|
|
|
|
79
|
if ($posted) { |
|
277
|
33
|
|
|
|
|
83
|
my $date_ref = WebFetch::parse_date( \%dt_opts, $posted ); |
|
278
|
33
|
100
|
|
|
|
94
|
if ( defined $date_ref ) { |
|
279
|
8
|
100
|
|
|
|
77
|
my $dt = ( ref $date_ref eq "DateTime" ) ? $date_ref : DateTime->new( @$date_ref, %dt_opts ); |
|
280
|
8
|
|
|
|
|
16687
|
$time_str = WebFetch::gen_timestamp( \%dt_opts, $dt ); |
|
281
|
8
|
|
|
|
|
9272
|
$anchor_time = WebFetch::anchor_timestr( \%dt_opts, $dt ); |
|
282
|
|
|
|
|
|
|
} |
|
283
|
|
|
|
|
|
|
} |
|
284
|
33
|
100
|
66
|
|
|
312
|
if ( not defined $time_str or not defined $anchor_time ) { |
|
285
|
25
|
|
|
|
|
47
|
$time_str = "undated"; |
|
286
|
25
|
|
|
|
|
31
|
$anchor_time = "0000-undated"; |
|
287
|
|
|
|
|
|
|
} |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
# generate an intra-page link label |
|
290
|
33
|
|
|
|
|
55
|
my ( $label, $count ); |
|
291
|
33
|
|
|
|
|
46
|
$count = 0; |
|
292
|
33
|
|
66
|
|
|
210
|
while ( ( $label = $anchor_time . "-" . sprintf( "%03d", $count ) ) |
|
293
|
|
|
|
|
|
|
and defined $label_hash{$label} ) |
|
294
|
|
|
|
|
|
|
{ |
|
295
|
300
|
|
|
|
|
1043
|
$count++; |
|
296
|
|
|
|
|
|
|
} |
|
297
|
33
|
|
|
|
|
159
|
$label_hash{$label} = 1; |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
# generate data record for output |
|
300
|
33
|
|
|
|
|
100
|
$self->data->add_record( |
|
301
|
|
|
|
|
|
|
$time_str, $title, $self->priority($item), |
|
302
|
|
|
|
|
|
|
expired($item), $pos, $label, $url_prefix . "#" . $label, |
|
303
|
|
|
|
|
|
|
$category, $text |
|
304
|
|
|
|
|
|
|
); |
|
305
|
33
|
|
|
|
|
99
|
$pos++; |
|
306
|
|
|
|
|
|
|
} |
|
307
|
2
|
|
|
|
|
38
|
return; |
|
308
|
|
|
|
|
|
|
} |
|
309
|
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
# format handler function specific to this module's long-news output format |
|
311
|
|
|
|
|
|
|
sub fmt_handler_sitenews_long |
|
312
|
|
|
|
|
|
|
{ |
|
313
|
|
|
|
|
|
|
# TODO move sort block to separate function and remove the no-critic exemption |
|
314
|
|
|
|
|
|
|
## no critic ( BuiltinFunctions::RequireSimpleSortBlock ) |
|
315
|
2
|
|
|
2
|
0
|
11
|
my ($self) = @_; |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
# sort events for long display |
|
318
|
|
|
|
|
|
|
my @long_news = sort { |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
# sort news entries for long display |
|
321
|
|
|
|
|
|
|
# sorting priority: |
|
322
|
|
|
|
|
|
|
# date first |
|
323
|
|
|
|
|
|
|
# category/priority second |
|
324
|
|
|
|
|
|
|
# reverse file order last |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
# sort by date |
|
327
|
37
|
|
|
|
|
81
|
my $lbl_fnum = $self->fname2fnum("label"); |
|
328
|
37
|
|
|
|
|
84
|
my ( $a_date, $b_date ) = ( $a->[$lbl_fnum], $b->[$lbl_fnum] ); |
|
329
|
37
|
|
|
|
|
165
|
$a_date =~ s/-.*//x; |
|
330
|
37
|
|
|
|
|
95
|
$b_date =~ s/-.*//x; |
|
331
|
37
|
100
|
|
|
|
81
|
if ( $a_date ne $b_date ) { |
|
332
|
7
|
|
|
|
|
27
|
return $b_date cmp $a_date; |
|
333
|
|
|
|
|
|
|
} |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
# sort by priority (within same date) |
|
336
|
30
|
|
|
|
|
69
|
my $pri_fnum = $self->fname2fnum("priority"); |
|
337
|
30
|
100
|
|
|
|
79
|
if ( $a->[$pri_fnum] != $b->[$pri_fnum] ) { |
|
338
|
1
|
|
|
|
|
20
|
return $a->[$pri_fnum] <=> $b->[$pri_fnum]; |
|
339
|
|
|
|
|
|
|
} |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
# sort by chronological order (within same date and priority) |
|
342
|
29
|
|
|
|
|
76
|
return $a->[$lbl_fnum] cmp $b->[$lbl_fnum]; |
|
343
|
2
|
|
|
|
|
7
|
} @{ $self->{data}{records} }; |
|
|
2
|
|
|
|
|
23
|
|
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
# process the links for the long list |
|
346
|
2
|
|
|
|
|
9
|
my ( @long_text, $prev, $url_prefix, $i ); |
|
347
|
|
|
|
|
|
|
$url_prefix = |
|
348
|
|
|
|
|
|
|
( defined $self->{url_prefix} ) |
|
349
|
|
|
|
|
|
|
? $self->{url_prefix} |
|
350
|
2
|
50
|
|
|
|
14
|
: ""; |
|
351
|
2
|
|
|
|
|
14
|
$prev = undef; |
|
352
|
2
|
|
|
|
|
7
|
push @long_text, "<dl>"; |
|
353
|
2
|
|
|
|
|
8
|
my $lbl_fnum = $self->fname2fnum("label"); |
|
354
|
2
|
|
|
|
|
14
|
my $date_fnum = $self->fname2fnum("date"); |
|
355
|
2
|
|
|
|
|
26
|
my $title_fnum = $self->fname2fnum("title"); |
|
356
|
2
|
|
|
|
|
16
|
my $txt_fnum = $self->fname2fnum("text"); |
|
357
|
2
|
|
|
|
|
10
|
my $exp_fnum = $self->fname2fnum("expired"); |
|
358
|
2
|
|
|
|
|
9
|
my $pri_fnum = $self->fname2fnum("priority"); |
|
359
|
|
|
|
|
|
|
|
|
360
|
2
|
|
|
|
|
19
|
for ( $i = 0 ; $i <= $#long_news ; $i++ ) { |
|
361
|
33
|
|
|
|
|
60
|
my $news = $long_news[$i]; |
|
362
|
33
|
100
|
100
|
|
|
107
|
if ( ( !defined $prev->[$date_fnum] ) |
|
363
|
|
|
|
|
|
|
or $prev->[$date_fnum] ne $news->[$date_fnum] ) |
|
364
|
|
|
|
|
|
|
{ |
|
365
|
9
|
|
|
|
|
32
|
push @long_text, "<dt>" . $news->[$date_fnum]; |
|
366
|
9
|
|
|
|
|
16
|
push @long_text, "<dd>"; |
|
367
|
|
|
|
|
|
|
} |
|
368
|
33
|
50
|
|
|
|
128
|
push @long_text, |
|
369
|
|
|
|
|
|
|
"<a name=\"" |
|
370
|
|
|
|
|
|
|
. $news->[$lbl_fnum] . "\">" |
|
371
|
|
|
|
|
|
|
. $news->[$title_fnum] |
|
372
|
|
|
|
|
|
|
. "</a>\n" |
|
373
|
|
|
|
|
|
|
. $news->[$txt_fnum] |
|
374
|
|
|
|
|
|
|
. "<!--- priority: " |
|
375
|
|
|
|
|
|
|
. $news->[$pri_fnum] |
|
376
|
|
|
|
|
|
|
. ( $news->[$exp_fnum] ? " expired" : "" ) . " --->"; |
|
377
|
33
|
|
|
|
|
50
|
push @long_text, "<p>"; |
|
378
|
33
|
|
|
|
|
64
|
$prev = $news; |
|
379
|
|
|
|
|
|
|
} |
|
380
|
2
|
|
|
|
|
4
|
push @long_text, "</dl>"; |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
# store it for later save to disk |
|
383
|
2
|
|
|
|
|
67
|
$self->html_savable( $self->{long_path}, join( "\n", @long_text ) . "\n" ); |
|
384
|
2
|
|
|
|
|
14
|
return; |
|
385
|
|
|
|
|
|
|
} |
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# |
|
388
|
|
|
|
|
|
|
# utility functions |
|
389
|
|
|
|
|
|
|
# |
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
# function to detect if a news entry is expired |
|
392
|
|
|
|
|
|
|
sub expired |
|
393
|
|
|
|
|
|
|
{ |
|
394
|
33
|
|
|
33
|
0
|
62
|
my ($entry) = @_; |
|
395
|
33
|
50
|
|
|
|
229
|
return 0 if not exists $entry->{expires}; |
|
396
|
0
|
0
|
|
|
|
0
|
return 0 if not defined $entry->{expires}; |
|
397
|
0
|
|
|
|
|
0
|
return $entry->{expires} < $now; |
|
398
|
|
|
|
|
|
|
} |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
# function to compute a priority value which decays with age |
|
401
|
|
|
|
|
|
|
sub priority |
|
402
|
|
|
|
|
|
|
{ |
|
403
|
33
|
|
|
33
|
0
|
67
|
my ( $self, $entry ) = @_; |
|
404
|
|
|
|
|
|
|
|
|
405
|
33
|
50
|
|
|
|
83
|
return 999 if not exists $entry->{posted}; |
|
406
|
33
|
50
|
|
|
|
70
|
return 999 if not defined $entry->{posted}; |
|
407
|
|
|
|
|
|
|
|
|
408
|
33
|
|
|
|
|
87
|
my $date_ref = WebFetch::parse_date( $entry->{posted} ); |
|
409
|
33
|
100
|
|
|
|
119
|
return 999 if not defined $date_ref; |
|
410
|
|
|
|
|
|
|
|
|
411
|
8
|
|
|
|
|
16
|
my %dt_opts; |
|
412
|
8
|
50
|
33
|
|
|
51
|
if ( exists $self->{datetime_settings} and reftype( $self->{datetime_settings} ) eq "HASH" ) { |
|
413
|
8
|
|
|
|
|
23
|
%dt_opts = %{ $self->{datetime_settings} }; # get locale and time_zone settings if available |
|
|
8
|
|
|
|
|
35
|
|
|
414
|
|
|
|
|
|
|
} |
|
415
|
8
|
100
|
|
|
|
29
|
my $dt = ( ref $date_ref eq "DateTime" ) ? $date_ref : DateTime->new( @$date_ref, %dt_opts ); |
|
416
|
8
|
|
|
|
|
709
|
my $age = ( $now->subtract_datetime($dt) )->delta_days(); |
|
417
|
8
|
|
|
|
|
2555
|
my $bonus = 0; |
|
418
|
|
|
|
|
|
|
|
|
419
|
8
|
50
|
|
|
|
22
|
if ( $age <= 2 ) { |
|
420
|
0
|
|
|
|
|
0
|
$bonus -= 2 - $age; |
|
421
|
|
|
|
|
|
|
} |
|
422
|
8
|
100
|
66
|
|
|
47
|
if ( ( defined $entry->{category} ) |
|
423
|
|
|
|
|
|
|
and ( defined $cat_priorities->{ $entry->{category} } ) ) |
|
424
|
|
|
|
|
|
|
{ |
|
425
|
|
|
|
|
|
|
my $cat_pri = |
|
426
|
|
|
|
|
|
|
( exists $cat_priorities->{ $entry->{category} } ) |
|
427
|
|
|
|
|
|
|
? $cat_priorities->{ $entry->{category} } |
|
428
|
7
|
50
|
|
|
|
23
|
: 0; |
|
429
|
7
|
|
|
|
|
52
|
return $cat_pri + $age * 0.025 + $bonus; |
|
430
|
|
|
|
|
|
|
} else { |
|
431
|
1
|
|
|
|
|
12
|
return $cat_priorities->{"default"} + $age * 0.025 + $bonus; |
|
432
|
|
|
|
|
|
|
} |
|
433
|
|
|
|
|
|
|
} |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
1; |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
=pod |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
=encoding UTF-8 |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
=head1 NAME |
|
442
|
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
WebFetch::Input::SiteNews - download and save SiteNews headlines from a local file |
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=head1 VERSION |
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
version 0.15.8 |
|
448
|
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
450
|
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
In perl scripts: |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
C<use WebFetch::Input::SiteNews;> |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
From the command line: |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
C<perl -w -MWebFetch::Input::SiteNews -e "&fetch_main" -- --dir directory |
|
458
|
|
|
|
|
|
|
--source news-file --short short-form-output-file |
|
459
|
|
|
|
|
|
|
--long long-form-output-file> |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
This module gets the current headlines from a site-local file. |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
The I<--source> parameter specifies a file name which contains news to be |
|
466
|
|
|
|
|
|
|
posted. See L<"FILE FORMAT"> below for details on contents to put in the |
|
467
|
|
|
|
|
|
|
file. I<--source> may be specified more than once, allowing a single news |
|
468
|
|
|
|
|
|
|
output to come from more than one input. For example, one file could be |
|
469
|
|
|
|
|
|
|
manually maintained in CVS or RCS and another could be entered from a |
|
470
|
|
|
|
|
|
|
web form. |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
After this runs, the file C<site_news.html> will be created or replaced. |
|
473
|
|
|
|
|
|
|
If there already was a C<site_news.html> file, it will be moved to |
|
474
|
|
|
|
|
|
|
C<Osite_news.html>. |
|
475
|
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=head1 FILE FORMAT |
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
The WebFetch::Input::SiteNews data format is used to set up news for the |
|
479
|
|
|
|
|
|
|
local web site and allow other sites to import it via WebFetch. |
|
480
|
|
|
|
|
|
|
The file is plain text containing comments and the news items. |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
There are three forms of outputs generated from these news files. |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
The I<"short news" output> is a small number (5 by default) |
|
485
|
|
|
|
|
|
|
of HTML text and links used for display in a small news window. |
|
486
|
|
|
|
|
|
|
This list takes into account expiration dates and |
|
487
|
|
|
|
|
|
|
priorities to pick which news entries are displayed and in what order. |
|
488
|
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
The I<"long news" output> lists all the news entries chronologically. |
|
490
|
|
|
|
|
|
|
It does not take expiration or priority into account. |
|
491
|
|
|
|
|
|
|
It is intended for a comprehensive site news list. |
|
492
|
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
The I<export modes> make news items available in formats other web sites |
|
494
|
|
|
|
|
|
|
can retrieve to post news about your site. They are chronological |
|
495
|
|
|
|
|
|
|
listings that omit expired items. |
|
496
|
|
|
|
|
|
|
They do not take priorities into account. |
|
497
|
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=over 4 |
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=item global parameters |
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
Lines coming before the first news item can set global parameters. |
|
503
|
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
=over 4 |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
=item categories |
|
507
|
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
A line before the first news item beginning with "categories:" contains a |
|
509
|
|
|
|
|
|
|
whitespace-delimited list of news category names in order from highest |
|
510
|
|
|
|
|
|
|
to lowest priority. |
|
511
|
|
|
|
|
|
|
These priority names are used by the news item attributes and |
|
512
|
|
|
|
|
|
|
then for sorting "short news" list items. |
|
513
|
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
=back |
|
515
|
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=item data lines |
|
517
|
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
Non-blank non-indented non-comment lines are I<data lines>. |
|
519
|
|
|
|
|
|
|
Each data line contains a I<name=value> pair. |
|
520
|
|
|
|
|
|
|
Each group of consecutive data lines is followed by an arbitrary number |
|
521
|
|
|
|
|
|
|
of indented lines which contain HTML text for the news entry. |
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
The recognized attributes are as follows: |
|
524
|
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
=over 4 |
|
526
|
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
=item category |
|
528
|
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
used for prioritization, values are set by the categories global parameter |
|
530
|
|
|
|
|
|
|
(required) |
|
531
|
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
=item posted |
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
date posted, format is a numerical date YYYYMMDD (required) |
|
535
|
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
=item expires |
|
537
|
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
expiration date, format is a numerical date YYYYMMDD (optional) |
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
=item title |
|
541
|
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
shorter title for use in news exports to other sites, |
|
543
|
|
|
|
|
|
|
otherwise the whole news text will be used (optional) |
|
544
|
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
=back |
|
546
|
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
=item text lines |
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
Intended lines are HTML text for the news item. |
|
550
|
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
=item comments |
|
552
|
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
Comments are lines beginning with "#". |
|
554
|
|
|
|
|
|
|
They are ignored so they can be used for human-readable information. |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
=back |
|
557
|
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
Note that the "short news" list has some modifications to |
|
559
|
|
|
|
|
|
|
priorities based on the age of the news item, |
|
560
|
|
|
|
|
|
|
so that the short list will favor newer items when |
|
561
|
|
|
|
|
|
|
they're the same priority. |
|
562
|
|
|
|
|
|
|
There is a sorting "priority bonus" for items less than a |
|
563
|
|
|
|
|
|
|
day old, which increases their priority by two priority levels. |
|
564
|
|
|
|
|
|
|
Day-old news items get a bonus of one priority level. |
|
565
|
|
|
|
|
|
|
All news items also "decay" in priority slightly every day, |
|
566
|
|
|
|
|
|
|
dropping a whole priority level every 40 days. |
|
567
|
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
569
|
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
L<WebFetch> |
|
571
|
|
|
|
|
|
|
L<https://github.com/ikluft/WebFetch> |
|
572
|
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
=head1 BUGS AND LIMITATIONS |
|
574
|
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
Please report bugs via GitHub at L<https://github.com/ikluft/WebFetch/issues> |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
Patches and enhancements may be submitted via a pull request at L<https://github.com/ikluft/WebFetch/pulls> |
|
578
|
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
=head1 AUTHOR |
|
580
|
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
Ian Kluft <https://github.com/ikluft> |
|
582
|
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
584
|
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
This software is Copyright (c) 1998-2023 by Ian Kluft. |
|
586
|
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
This is free software, licensed under: |
|
588
|
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
The GNU General Public License, Version 3, June 2007 |
|
590
|
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
=cut |
|
592
|
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
__END__ |
|
594
|
|
|
|
|
|
|
# POD docs follow |
|
595
|
|
|
|
|
|
|
|