File Coverage

lib/Whim/Core.pm
Criterion Covered Total %
statement 158 194 81.4
branch 43 58 74.1
condition 2 3 66.6
subroutine 24 27 88.8
pod 0 6 0.0
total 227 288 78.8


line stmt bran cond sub pod time code
1             package Whim::Core;
2              
3 4     4   2414614 use warnings;
  4         28  
  4         132  
4 4     4   34 use strict;
  4         16  
  4         79  
5 4     4   80 use v5.24;
  4         18  
6              
7 4     4   1797 use Moo;
  4         35048  
  4         21  
8 4     4   12229 use DBI;
  4         75308  
  4         226  
9 4     4   1052 use Path::Tiny;
  4         13062  
  4         228  
10 4     4   33 use Scalar::Util qw(blessed);
  4         19  
  4         188  
11 4     4   2291 use DateTime::Format::ISO8601;
  4         3016799  
  4         244  
12 4     4   2808 use LWP::UserAgent;
  4         136621  
  4         160  
13 4     4   1277 use Digest::SHA qw(sha256_hex);
  4         5722  
  4         332  
14              
15 4     4   967 use lib '/Users/jmac/Documents/Plerd/indieweb/webmention-perl/lib';
  4         1404  
  4         30  
16              
17 4     4   2548 use Whim::Mention;
  4         16  
  4         1409  
18              
19             # Specifying $TRANSIENT_DB for data_directory tells SQLite to use an in-memory database rather than persist to disk.
20             # Helpful for automated tests and maybe future non-persistent uses of whim
21             our $TRANSIENT_DB = ":memory:";
22              
23             has 'home' => (
24             is => 'rw',
25             required => 1,
26             coerce => sub { path( $_[0] ) },
27             trigger => sub {
28             my ( $self, $dir ) = @_;
29             $self->_make_homedir($dir);
30             },
31             );
32              
33             has 'data_directory' => (
34             is => 'lazy',
35             isa => sub {
36             unless ( blessed( $_[0] ) && $_[0]->isa('Path::Tiny') ) {
37              
38             # $TRANSIENT_DB can be coerced to a Path::Tiny,
39             # so this check still works.
40             die "data_directory must be a valid path or Path::Tiny object\n";
41             }
42             },
43             coerce => sub { path( $_[0] ) },
44             );
45              
46             has 'dbh' => ( is => 'lazy', );
47              
48             has 'author_photo_directory' => (
49             is => 'lazy',
50             isa => sub {
51             unless ( blessed( $_[0] ) && $_[0]->isa('Path::Tiny') ) {
52             die "author_photo_directory must be a valid path or "
53             . "Path::Tiny object\n";
54             }
55             },
56             coerce => sub { path( $_[0] ) },
57             );
58              
59             has 'ua' => (
60             is => 'ro',
61             default => sub {
62             LWP::UserAgent->new( agent => "Whim/$Whim::VERSION" );
63             },
64             );
65              
66 4     4   43 use Readonly;
  4         21  
  4         374  
67             Readonly my $IMAGEDIR_NAME => 'images';
68              
69 4     4   37 no warnings "experimental::signatures";
  4         35  
  4         178  
70 4     4   33 use feature "signatures";
  4         9  
  4         10526  
71              
72 0     0 0 0 sub unblock_sources ( $self, @sources ) {
  0         0  
  0         0  
  0         0  
73 0         0 my @failures;
74 0         0 for my $source (@sources) {
75 0         0 my ($extant) = $self->dbh->selectrow_array(
76             'select * from block where source = ?',
77             {}, $source );
78              
79 0 0       0 if ($extant) {
80 0         0 $self->dbh->do( 'delete from block where source = ?',
81             {}, $source );
82             }
83             else {
84 0         0 push @failures, $source;
85             }
86             }
87 0         0 return @failures;
88             }
89              
90 0     0 0 0 sub block_sources ( $self, @sources ) {
  0         0  
  0         0  
  0         0  
91 0         0 for my $source (@sources) {
92 0         0 $self->dbh->do( 'insert into block values (?)', {}, $source );
93             }
94             }
95              
96 0     0 0 0 sub blocked_sources ($self) {
  0         0  
  0         0  
97 0         0 my @sources;
98 0         0 my $sth = $self->dbh->prepare('select source from block order by source');
99 0         0 $sth->execute;
100 0         0 while ( my ($source) = $sth->fetchrow_array ) {
101 0         0 push @sources, $source;
102             }
103 0         0 return @sources;
104             }
105              
106 32     32 0 5331 sub fetch_webmentions ( $self, $args ) {
  32         60  
  32         53  
  32         44  
107              
108             # This complex query lets us flexibly use the contents of the `block`
109             # table as a blocklist.
110 32         61 my $query =
111             'select wm.* from wm where original_source not in '
112             . '(select original_source from wm '
113             . 'inner join (select source from block) b on '
114             . ' wm.original_source like \'%\' || b.source || \'%\') ';
115 32         57 my @wheres;
116             my @bind_args;
117              
118 32 50       102 if ( $args->{before} ) {
119 0         0 push @wheres, "time_received <= ?";
120 0         0 push @bind_args, $args->{before};
121             }
122 32 50       76 if ( $args->{after} ) {
123 0         0 push @wheres, "time_received >= ?";
124 0         0 push @bind_args, $args->{after};
125             }
126 32 100       86 if ( $args->{source} ) {
127 21         69 foreach ( $args->{source}->@* ) {
128 21         53 push @wheres, "original_source like ?";
129 21         69 push @bind_args, "\%$_\%";
130             }
131             }
132 32 50       79 if ( $args->{'not-source'} ) {
133 0         0 foreach ( $args->{'not-source'}->@* ) {
134 0         0 push @wheres, "original_source not like ?";
135 0         0 push @bind_args, "\%$_\%";
136             }
137             }
138 32 100       86 if ( $args->{target} ) {
139 25         48 push @wheres, "target like ?";
140 25         61 push @bind_args, "\%$args->{target}\%";
141             }
142 32 50       71 if ( $args->{type} ) {
143 0         0 push @wheres, "type like ?";
144 0         0 push @bind_args, $args->{type};
145             }
146              
147             # Unless we're processing WMs, we want only verified ones.
148 32 100       77 if ( $args->{process} ) {
149 3         10 push @wheres, "is_tested != 1";
150             }
151             else {
152 29         52 push @wheres, "is_verified = 1";
153             }
154              
155 32         54 my $where_clause = '';
156 32 50       70 if (@wheres) {
157 32         90 $where_clause = 'and ' . join( ' and ', @wheres );
158             }
159              
160 32         75 $query .= "$where_clause order by time_received";
161              
162 32         782 my $sth = $self->dbh->prepare($query);
163 32         11414 $sth->execute(@bind_args);
164              
165 32         162 my @wms;
166 32         1274 while ( my $row = $sth->fetchrow_hashref ) {
167             my %args = (
168             source => URI->new( $row->{source} ),
169             target => URI->new( $row->{target} ),
170             original_source => $row->{original_source}
171             ? URI->new( $row->{original_source} )
172             : undef,
173             title => $row->{title},
174             content => $row->{content},
175             source_html => $row->{html},
176             is_verified => $row->{is_verified},
177             is_tested => $row->{is_tested},
178             type => $row->{type},
179             time_received => DateTime::Format::ISO8601->parse_datetime(
180             $row->{time_received}
181             ),
182             author_photo_hash => $row->{author_photo_hash},
183             time_verified => $row->{time_verified}
184             ? DateTime::Format::ISO8601->parse_datetime(
185             $row->{time_verified}
186             )
187 53 100       333 : undef,
    100          
188             );
189              
190             # Delete keys that, if undef, we want the webmention object to
191             # lazily re-derive
192 53         56815 foreach (
193             qw(time_verified is_verified original_source content title type))
194             {
195 318 100       707 delete $args{$_} unless defined $args{$_};
196             }
197              
198 53 100       141 if ( $row->{author_name} ) {
199 39         66 my %author_args;
200 39         96 $author_args{name} = $row->{author_name};
201 39         73 foreach (qw(url photo)) {
202 78 50       208 if ( $row->{"author_$_"} ) {
203 78         198 $author_args{$_} = $row->{"author_$_"};
204             }
205             }
206              
207 39         947 $args{author} = Web::Mention::Author->new( \%author_args );
208             }
209 53         11390 my $wm = Whim::Mention->new( \%args );
210 53         27893 push @wms, $wm;
211              
212             }
213              
214 32         1066 return @wms;
215              
216             }
217              
218             # process_webmentions: Verify all untested WMs.
219 3     3 0 5652 sub process_webmentions ($self) {
  3         9  
  3         7  
220 3         8 my $verified_count = 0;
221 3         6 my $total_count = 0;
222 3         77 my $sth = $self->dbh->prepare(
223             'update wm set is_tested = 1, is_verified = ?, '
224             . 'author_name = ?, author_url = ?, author_photo = ?, '
225             . 'time_verified = ?, html = ?, author_photo_hash = ?, '
226             . 'type = ?, original_source = ?, content = ?, title = ? '
227             . 'where source = ? and target = ? and time_received = ?' );
228              
229 3         321 for my $stored_wm ( $self->fetch_webmentions( { process => 1 } ) ) {
230 21         1798 $total_count++;
231              
232             # Verify a new, minimal wm based on the stored one.
233             # This allows us to update re-sent wms that might have new content
234             # (or might be deleted, or otherwise no longer valid).
235 21         785 my $wm = Whim::Mention->new(
236             { source => $stored_wm->source,
237             target => $stored_wm->target,
238             time_received => $stored_wm->time_received,
239             }
240             );
241              
242             # Grab the author image
243 21 100 66     9055 if ( $wm->author && $wm->author->photo ) {
244 20         947174 my $url = $wm->author->photo->abs( $wm->source )->as_string;
245 20         5872 my $response = $self->ua->get($url);
246 20         45478 my $photo_hash = $self->_process_author_photo_tx($response);
247 20         541 $wm->author_photo_hash($photo_hash);
248             }
249              
250 21 50       3712 my @bind_values = (
    50          
    50          
    100          
    100          
    100          
251             $wm->author ? $wm->author->name : undef,
252             $wm->author ? $wm->author->url : undef,
253             $wm->author ? $wm->author->photo : undef,
254             $wm->is_verified ? $wm->time_verified->iso8601 : undef,
255             $wm->source_html,
256             $wm->author_photo_hash,
257             $wm->type,
258             $wm->original_source->as_string,
259             $wm->source_html ? $wm->content : undef,
260             $wm->source_html ? $wm->title : undef,
261             $wm->source->as_string,
262             $wm->target->as_string,
263             $wm->time_received->iso8601,
264             );
265              
266 21 100       120732 if ( $wm->is_verified ) {
267 20         253 $verified_count++;
268 20         874 $sth->execute( 1, @bind_values );
269             }
270             else {
271 1         54 $sth->execute( 0, @bind_values );
272             }
273             }
274 3         454 return [ $verified_count, $total_count ];
275             }
276              
277 21     21 0 52607 sub receive_webmention ( $self, $wm ) {
  21         47  
  21         29  
  21         33  
278              
279             # If a wm with this source+target already exists in the db, then set it
280             # to be re-verified, but don't modify its present verification state.
281             # Otherwise, store a new wm, unverified and untested.
282 21         103 my ($existing_wm) = $self->fetch_webmentions(
283             { source => [ $wm->source->as_string ],
284             target => $wm->target->as_string,
285             }
286             );
287 21 100       98 if ($existing_wm) {
288 7         163 $self->dbh->do(
289             'update wm set is_tested = 0 where source = ? and target = ?',
290             {},
291             $wm->source->as_string,
292             $wm->target->as_string,
293             );
294             }
295             else {
296 14         374 $self->dbh->do(
297             'insert into wm '
298             . '(source, target, time_received, is_tested ) '
299             . 'values (?, ?, ?, ? )',
300             {},
301             $wm->source->as_string,
302             $wm->target->as_string,
303             $wm->time_received->iso8601,
304             0,
305             0,
306             );
307             }
308             }
309              
310 4     4   2705 sub _build_dbh ($self) {
  4         8  
  4         7  
311 4         79 my $dir = $self->data_directory;
312 4         40 my $db_needs_initialization = 1;
313 4         10 my $db_file;
314              
315 4 100       30 if ( $dir eq $TRANSIENT_DB ) {
316 3         28 $db_file = $dir;
317             }
318             else {
319 1         10 $db_file = $dir->child('wm.db');
320 1 50       65 $db_needs_initialization = 0 if $db_file->exists;
321             }
322              
323 4 50       58 my $dbh =
324             DBI->connect( "dbi:SQLite:$db_file", "", "", { sqlite_unicode => 1, },
325             ) or die "Can't create or use a database file in $dir: $DBI::errtr\n";
326              
327 4 50       40947 _initialize_database($dbh) if $db_needs_initialization;
328              
329 4         15776 return $dbh;
330             }
331              
332 2     2   40 sub _build_data_directory ($self) {
  2         10  
  2         2  
333 2         32 return $self->home->child('data');
334             }
335              
336 6     6   163 sub _build_author_photo_directory ($self) {
  6         16  
  6         11  
337 6         126 return $self->home->child('public')->child('author_photos');
338             }
339              
340 20     20   56 sub _process_author_photo_tx ( $self, $response ) {
  20         49  
  20         36  
  20         36  
341 20 50       89 if ( $response->is_success ) {
342 20         263 my $photo_hash = sha256_hex( $response->decoded_content );
343 20         5848 my $photo_file = $self->author_photo_directory->child($photo_hash);
344 20 100       1678 unless ( -e $photo_file ) {
345 2         156 $photo_file->spew( $response->decoded_content );
346             }
347 20         2774 return $photo_hash;
348             }
349             else {
350 0         0 return undef;
351             }
352             }
353              
354 4     4   11 sub _initialize_database ($dbh) {
  4         10  
  4         7  
355 4         39 my @statements = (
356             "CREATE TABLE wm (source char(128), original_source char(128), target "
357             . "char(128), time_received text, is_verified int, is_tested int, "
358             . "html text, content text, time_verified text, type char(16), "
359             . "author_name char(64), author_url char(128), author_photo "
360             . "char(128), author_photo_hash char(128), title char(255))",
361             "CREATE UNIQUE INDEX source_target on wm(source, target)",
362             "CREATE TABLE block (source char(128))",
363             "CREATE UNIQUE INDEX source on block(source)",
364             );
365              
366 4         14 foreach (@statements) {
367 16         52503 $dbh->do($_);
368             }
369             }
370              
371 6     6   19 sub _make_homedir ( $self, $dir ) {
  6         18  
  6         10  
  6         13  
372              
373             # Path::Tiny's mkpath() method executes without complaint if the path
374             # already exists, so let's just create-if-needed every expected subdir.
375 6         45 $dir->mkpath;
376 6         891 $dir->child('log')->mkpath;
377 6         1266 $self->data_directory->mkpath;
378 6         962 $self->author_photo_directory->mkpath;
379             }
380              
381             1;
382              
383             =head1 NAME
384              
385             Whim::Core - A code library used by the Whim webmention multitool
386              
387             =head1 DESCRIPTION
388              
389             This is a code library used by the C<whim> executable. It doesn't have a
390             public interface!
391              
392             =head1 SEE ALSO
393              
394             L<whim>
395              
396             =head1 AUTHOR
397              
398             Jason McIntosh E<lt>jmac@jmac.orgE<gt>
399