| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Sweat::Article; |
|
2
|
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
25
|
use warnings; |
|
|
3
|
|
|
|
|
8
|
|
|
|
3
|
|
|
|
|
115
|
|
|
4
|
3
|
|
|
3
|
|
20
|
use strict; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
86
|
|
|
5
|
3
|
|
|
3
|
|
19
|
use Moo; |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
18
|
|
|
6
|
3
|
|
|
3
|
|
7554
|
use namespace::clean; |
|
|
3
|
|
|
|
|
8
|
|
|
|
3
|
|
|
|
|
22
|
|
|
7
|
3
|
|
|
3
|
|
719
|
use utf8::all; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
36
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
3
|
|
|
3
|
|
4859
|
use Types::Standard qw( Str Maybe ); |
|
|
3
|
|
|
|
|
8
|
|
|
|
3
|
|
|
|
|
32
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
3
|
|
|
3
|
|
2242
|
use Scalar::Util qw( blessed ); |
|
|
3
|
|
|
|
|
9
|
|
|
|
3
|
|
|
|
|
228
|
|
|
12
|
3
|
|
|
3
|
|
1826
|
use HTML::Strip; |
|
|
3
|
|
|
|
|
23599
|
|
|
|
3
|
|
|
|
|
150
|
|
|
13
|
3
|
|
|
3
|
|
24
|
use List::Util qw( shuffle ); |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
201
|
|
|
14
|
3
|
|
|
3
|
|
23
|
use MediaWiki::API; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
4956
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
has 'text' => ( |
|
17
|
|
|
|
|
|
|
is => 'ro', |
|
18
|
|
|
|
|
|
|
required => 1, |
|
19
|
|
|
|
|
|
|
isa => Str, |
|
20
|
|
|
|
|
|
|
); |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
has 'title' => ( |
|
23
|
|
|
|
|
|
|
is => 'ro', |
|
24
|
|
|
|
|
|
|
required => 1, |
|
25
|
|
|
|
|
|
|
isa => Str, |
|
26
|
|
|
|
|
|
|
); |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
has 'url' => ( |
|
29
|
|
|
|
|
|
|
is => 'ro', |
|
30
|
|
|
|
|
|
|
required => 1, |
|
31
|
|
|
|
|
|
|
); |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
our $stripper = HTML::Strip->new; |
|
34
|
|
|
|
|
|
|
our $mw = MediaWiki::API->new; |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
$mw->{config}->{max_lag} = 5; |
|
37
|
|
|
|
|
|
|
$mw->{config}->{max_lag_delay} = 1; |
|
38
|
|
|
|
|
|
|
$mw->{ua}->timeout( 10 ); |
|
39
|
|
|
|
|
|
|
our $language; |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
our %seen_titles; |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
sub new_from_newsapi_article { |
|
44
|
0
|
|
|
0
|
0
|
|
my ( $class, $newsapi_article ) = @_; |
|
45
|
|
|
|
|
|
|
|
|
46
|
0
|
0
|
0
|
|
|
|
die "Expected a NewsAPI article, got $newsapi_article" |
|
47
|
|
|
|
|
|
|
unless blessed($newsapi_article) |
|
48
|
|
|
|
|
|
|
&& $newsapi_article->isa( 'Web::NewsAPI::Article' ); |
|
49
|
|
|
|
|
|
|
|
|
50
|
0
|
|
0
|
|
|
|
my $sweat_article = $class->new( |
|
|
|
|
0
|
|
|
|
|
|
51
|
|
|
|
|
|
|
text => ($newsapi_article->title // q{}) |
|
52
|
|
|
|
|
|
|
. q{. } |
|
53
|
|
|
|
|
|
|
. ($newsapi_article->description // q{}), |
|
54
|
|
|
|
|
|
|
url => $newsapi_article->url, |
|
55
|
|
|
|
|
|
|
title => $newsapi_article->title, |
|
56
|
|
|
|
|
|
|
); |
|
57
|
|
|
|
|
|
|
|
|
58
|
0
|
|
|
|
|
|
return $sweat_article; |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
sub new_from_random_wikipedia_article { |
|
62
|
0
|
|
|
0
|
0
|
|
my ($class) = @_; |
|
63
|
|
|
|
|
|
|
|
|
64
|
0
|
|
|
|
|
|
my $title = _get_random_title(); |
|
65
|
0
|
|
|
|
|
|
return $class->new_from_wikipedia_title($title); |
|
66
|
|
|
|
|
|
|
} |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub new_from_linked_wikipedia_article { |
|
69
|
0
|
|
|
0
|
0
|
|
my ($class, $article) = @_; |
|
70
|
|
|
|
|
|
|
|
|
71
|
0
|
|
|
|
|
|
my $title = _get_random_title_linked_from_title($article->title); |
|
72
|
0
|
|
|
|
|
|
return $class->new_from_wikipedia_title($title); |
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
sub new_from_wikipedia_title { |
|
76
|
0
|
|
|
0
|
0
|
|
my ($class, $title) = @_; |
|
77
|
|
|
|
|
|
|
|
|
78
|
0
|
|
|
|
|
|
my $summary = _get_summary_for_title($title); |
|
79
|
0
|
|
|
|
|
|
my $tries = 0; |
|
80
|
0
|
|
0
|
|
|
|
until ($summary || ($tries >= 3) ) { |
|
81
|
0
|
|
|
|
|
|
$tries++; |
|
82
|
0
|
|
|
|
|
|
$title = _get_random_title_linked_from_title($title); |
|
83
|
0
|
|
|
|
|
|
$summary = _get_summary_for_title($title); |
|
84
|
|
|
|
|
|
|
} |
|
85
|
0
|
0
|
|
|
|
|
unless ( $summary ) { |
|
86
|
0
|
|
|
|
|
|
$title = _get_random_title(); |
|
87
|
0
|
|
|
|
|
|
$summary = _get_summary_for_title($title); |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
|
|
90
|
0
|
|
|
|
|
|
return $class->new( |
|
91
|
|
|
|
|
|
|
title => $title, |
|
92
|
|
|
|
|
|
|
text => $summary, |
|
93
|
|
|
|
|
|
|
url => "https://$language.wikipedia.org/wiki/$title", |
|
94
|
|
|
|
|
|
|
); |
|
95
|
|
|
|
|
|
|
} |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub _get_random_title { |
|
98
|
0
|
|
|
0
|
|
|
my $result = $mw->api( { |
|
99
|
|
|
|
|
|
|
list => 'random', |
|
100
|
|
|
|
|
|
|
action => 'query', |
|
101
|
|
|
|
|
|
|
rnnamespace => 0, |
|
102
|
|
|
|
|
|
|
} ); |
|
103
|
|
|
|
|
|
|
|
|
104
|
0
|
|
|
|
|
|
return $result->{query}->{random}->[0]->{title}; |
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
sub _get_summary_for_title { |
|
108
|
0
|
|
|
0
|
|
|
my ($title) = @_; |
|
109
|
|
|
|
|
|
|
|
|
110
|
0
|
|
|
|
|
|
my $result = $mw->api( { |
|
111
|
|
|
|
|
|
|
action => 'query', |
|
112
|
|
|
|
|
|
|
prop => 'extracts', |
|
113
|
|
|
|
|
|
|
exintro => undef, |
|
114
|
|
|
|
|
|
|
titles => $title, |
|
115
|
|
|
|
|
|
|
} ); |
|
116
|
|
|
|
|
|
|
|
|
117
|
0
|
|
|
|
|
|
my $summary = (values(%{$result->{query}->{pages}}))[0]->{extract}; |
|
|
0
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
|
|
119
|
0
|
0
|
|
|
|
|
if (defined $summary) { |
|
120
|
0
|
|
|
|
|
|
$summary = $stripper->parse( $summary ); |
|
121
|
|
|
|
|
|
|
# Eliminate all parentheticals (birth/death dates, alternate-language |
|
122
|
|
|
|
|
|
|
# representations, and so on) because they don't read out loud well. |
|
123
|
0
|
|
|
|
|
|
my $found_some; |
|
124
|
0
|
|
0
|
|
|
|
until ( defined($found_some) && not($found_some) ) { |
|
125
|
0
|
|
|
|
|
|
$found_some = $summary =~ s/\([^\(]*?\)//g; |
|
126
|
|
|
|
|
|
|
} |
|
127
|
|
|
|
|
|
|
# Clean up redundant whitespace, and whitespace before punctuation. |
|
128
|
0
|
|
|
|
|
|
$summary =~ s/\s+([,.!?;:])/$1/g; |
|
129
|
0
|
|
|
|
|
|
$summary =~ s/ {2,}/ /g; |
|
130
|
|
|
|
|
|
|
} |
|
131
|
0
|
0
|
0
|
|
|
|
if ( $summary && $summary =~ /\S/ ) { |
|
132
|
0
|
|
|
|
|
|
return $summary; |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
else { |
|
135
|
0
|
|
|
|
|
|
return undef; |
|
136
|
|
|
|
|
|
|
} |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub _erase_parentheticals { |
|
140
|
0
|
|
|
0
|
|
|
my ( $summary, $opener, $closer ) = @_; |
|
141
|
0
|
|
|
|
|
|
my $found_some; |
|
142
|
0
|
|
0
|
|
|
|
until ( defined($found_some) && not($found_some) ) { |
|
143
|
0
|
|
|
|
|
|
$found_some = $summary =~ s/\([^\(]*?\)//g; |
|
144
|
0
|
|
|
|
|
|
warn "Found some: $found_some\n"; |
|
145
|
|
|
|
|
|
|
} |
|
146
|
0
|
|
|
|
|
|
return $summary; |
|
147
|
|
|
|
|
|
|
} |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
sub _get_random_title_linked_from_title { |
|
151
|
0
|
|
|
0
|
|
|
my ($title) = @_; |
|
152
|
|
|
|
|
|
|
|
|
153
|
0
|
|
|
|
|
|
my $result = $mw->api( { |
|
154
|
|
|
|
|
|
|
action => 'query', |
|
155
|
|
|
|
|
|
|
prop => 'links', |
|
156
|
|
|
|
|
|
|
titles => $title, |
|
157
|
|
|
|
|
|
|
plnamespace => 0, |
|
158
|
|
|
|
|
|
|
pllimit => 100, |
|
159
|
|
|
|
|
|
|
} ); |
|
160
|
|
|
|
|
|
|
|
|
161
|
0
|
|
|
|
|
|
my $links_ref = (values(%{$result->{query}->{pages}}))[0]->{links}; |
|
|
0
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
|
|
163
|
0
|
|
|
|
|
|
my @links = shuffle(@$links_ref); |
|
164
|
|
|
|
|
|
|
|
|
165
|
0
|
|
|
|
|
|
my $linked_title; |
|
166
|
|
|
|
|
|
|
|
|
167
|
0
|
|
0
|
|
|
|
until ($linked_title || (@links == 0 )) { |
|
168
|
0
|
0
|
|
|
|
|
if (defined $links[0]) { |
|
169
|
0
|
|
|
|
|
|
my $proposed_title = $links[0]->{title}; |
|
170
|
|
|
|
|
|
|
# Skip: |
|
171
|
|
|
|
|
|
|
# * Any title we've already seen |
|
172
|
|
|
|
|
|
|
# * Any title with a numeral in it (to stay away from annual- |
|
173
|
|
|
|
|
|
|
# statistics gravity wells) |
|
174
|
|
|
|
|
|
|
# * Any title with a word suggesting it's a just a list or table |
|
175
|
0
|
0
|
0
|
|
|
|
unless ( |
|
176
|
|
|
|
|
|
|
$seen_titles{$proposed_title} |
|
177
|
|
|
|
|
|
|
|| |
|
178
|
|
|
|
|
|
|
$proposed_title =~ /\d|^list of\s|^comparison of\s|^table of\s/i |
|
179
|
|
|
|
|
|
|
) { |
|
180
|
0
|
|
|
|
|
|
$linked_title = $proposed_title; |
|
181
|
0
|
|
|
|
|
|
$seen_titles{$proposed_title} = 1; |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
} |
|
184
|
0
|
|
|
|
|
|
shift @links; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
|
|
187
|
0
|
0
|
|
|
|
|
if ($linked_title) { |
|
188
|
0
|
|
|
|
|
|
return $linked_title; |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
else { |
|
191
|
0
|
|
|
|
|
|
return _get_random_title(); |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
} |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
1; |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=head1 Sweat::Article - Library for the `sweat` command-line program |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
This library is intended for internal use by the L<sweat> command-line program, |
|
203
|
|
|
|
|
|
|
and as such offers no publicly documented methods. |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
L<sweat> |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=head1 AUTHOR |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Jason McIntosh <jmac@jmac.org> |