File Coverage

lib/Whim/Core.pm
Criterion Covered Total %
statement 158 194 81.4
branch 43 58 74.1
condition 2 3 66.6
subroutine 24 27 88.8
pod 0 6 0.0
total 227 288 78.8


line stmt bran cond sub pod time code
1             package Whim::Core;
2              
3 4     4   1996278 use warnings;
  4         24  
  4         141  
4 4     4   21 use strict;
  4         8  
  4         94  
5 4     4   60 use v5.24;
  4         18  
6              
7 4     4   1740 use Moo;
  4         34105  
  4         24  
8 4     4   11368 use DBI;
  4         74577  
  4         242  
9 4     4   866 use Path::Tiny;
  4         11433  
  4         206  
10 4     4   40 use Scalar::Util qw(blessed);
  4         8  
  4         187  
11 4     4   2381 use DateTime::Format::ISO8601;
  4         1850396  
  4         163  
12 4     4   2511 use LWP::UserAgent;
  4         125504  
  4         151  
13 4     4   1228 use Digest::SHA qw(sha256_hex);
  4         5870  
  4         321  
14              
15 4     4   1056 use lib '/Users/jmac/Documents/Plerd/indieweb/webmention-perl/lib';
  4         1417  
  4         25  
16              
17 4     4   2585 use Whim::Mention;
  4         16  
  4         1391  
18              
19             # Specifying $TRANSIENT_DB for data_directory tells SQLite to use an in-memory database rather than persist to disk.
20             # Helpful for automated tests and maybe future non-persistent uses of whim
21             our $TRANSIENT_DB = ":memory:";
22              
23             has 'home' => (
24             is => 'rw',
25             required => 1,
26             coerce => sub { path( $_[0] ) },
27             trigger => sub {
28             my ( $self, $dir ) = @_;
29             $self->_make_homedir($dir);
30             },
31             );
32              
33             has 'data_directory' => (
34             is => 'lazy',
35             isa => sub {
36             unless ( blessed( $_[0] ) && $_[0]->isa('Path::Tiny') ) {
37              
38             # $TRANSIENT_DB can be coerced to a Path::Tiny,
39             # so this check still works.
40             die "data_directory must be a valid path or Path::Tiny object\n";
41             }
42             },
43             coerce => sub { path( $_[0] ) },
44             );
45              
46             has 'dbh' => ( is => 'lazy', );
47              
48             has 'author_photo_directory' => (
49             is => 'lazy',
50             isa => sub {
51             unless ( blessed( $_[0] ) && $_[0]->isa('Path::Tiny') ) {
52             die "author_photo_directory must be a valid path or "
53             . "Path::Tiny object\n";
54             }
55             },
56             coerce => sub { path( $_[0] ) },
57             );
58              
59             has 'ua' => (
60             is => 'ro',
61             default => sub { LWP::UserAgent->new },
62             );
63              
64 4     4   38 use Readonly;
  4         8  
  4         516  
65             Readonly my $IMAGEDIR_NAME => 'images';
66              
67 4     4   31 no warnings "experimental::signatures";
  4         15  
  4         190  
68 4     4   52 use feature "signatures";
  4         26  
  4         10507  
69              
70 0     0 0 0 sub unblock_sources ( $self, @sources ) {
  0         0  
  0         0  
  0         0  
71 0         0 my @failures;
72 0         0 for my $source (@sources) {
73 0         0 my ($extant) = $self->dbh->selectrow_array(
74             'select * from block where source = ?',
75             {}, $source );
76              
77 0 0       0 if ($extant) {
78 0         0 $self->dbh->do( 'delete from block where source = ?',
79             {}, $source );
80             }
81             else {
82 0         0 push @failures, $source;
83             }
84             }
85 0         0 return @failures;
86             }
87              
88 0     0 0 0 sub block_sources ( $self, @sources ) {
  0         0  
  0         0  
  0         0  
89 0         0 for my $source (@sources) {
90 0         0 $self->dbh->do( 'insert into block values (?)', {}, $source );
91             }
92             }
93              
94 0     0 0 0 sub blocked_sources ($self) {
  0         0  
  0         0  
95 0         0 my @sources;
96 0         0 my $sth = $self->dbh->prepare('select source from block order by source');
97 0         0 $sth->execute;
98 0         0 while ( my ($source) = $sth->fetchrow_array ) {
99 0         0 push @sources, $source;
100             }
101 0         0 return @sources;
102             }
103              
104 32     32 0 4901 sub fetch_webmentions ( $self, $args ) {
  32         51  
  32         53  
  32         45  
105              
106             # This complex query lets us flexibly use the contents of the `block`
107             # table as a blocklist.
108 32         64 my $query =
109             'select wm.* from wm where original_source not in '
110             . '(select original_source from wm '
111             . 'inner join (select source from block) b on '
112             . ' wm.original_source like \'%\' || b.source || \'%\') ';
113 32         61 my @wheres;
114             my @bind_args;
115              
116 32 50       93 if ( $args->{before} ) {
117 0         0 push @wheres, "time_received <= ?";
118 0         0 push @bind_args, $args->{before};
119             }
120 32 50       133 if ( $args->{after} ) {
121 0         0 push @wheres, "time_received >= ?";
122 0         0 push @bind_args, $args->{after};
123             }
124 32 100       78 if ( $args->{source} ) {
125 21         56 foreach ( $args->{source}->@* ) {
126 21         42 push @wheres, "original_source like ?";
127 21         58 push @bind_args, "\%$_\%";
128             }
129             }
130 32 50       83 if ( $args->{'not-source'} ) {
131 0         0 foreach ( $args->{'not-source'}->@* ) {
132 0         0 push @wheres, "original_source not like ?";
133 0         0 push @bind_args, "\%$_\%";
134             }
135             }
136 32 100       76 if ( $args->{target} ) {
137 25         45 push @wheres, "target like ?";
138 25         63 push @bind_args, "\%$args->{target}\%";
139             }
140 32 50       67 if ( $args->{type} ) {
141 0         0 push @wheres, "type like ?";
142 0         0 push @bind_args, $args->{type};
143             }
144              
145             # Unless we're processing WMs, we want only verified ones.
146 32 100       82 if ( $args->{process} ) {
147 3         13 push @wheres, "is_tested != 1";
148             }
149             else {
150 29         53 push @wheres, "is_verified = 1";
151             }
152              
153 32         53 my $where_clause = '';
154 32 50       74 if (@wheres) {
155 32         83 $where_clause = 'and ' . join( ' and ', @wheres );
156             }
157              
158 32         79 $query .= "$where_clause order by time_received";
159              
160 32         828 my $sth = $self->dbh->prepare($query);
161 32         9283 $sth->execute(@bind_args);
162              
163 32         177 my @wms;
164 32         1171 while ( my $row = $sth->fetchrow_hashref ) {
165             my %args = (
166             source => URI->new( $row->{source} ),
167             target => URI->new( $row->{target} ),
168             original_source => $row->{original_source}
169             ? URI->new( $row->{original_source} )
170             : undef,
171             title => $row->{title},
172             content => $row->{content},
173             source_html => $row->{html},
174             is_verified => $row->{is_verified},
175             is_tested => $row->{is_tested},
176             type => $row->{type},
177             time_received => DateTime::Format::ISO8601->parse_datetime(
178             $row->{time_received}
179             ),
180             author_photo_hash => $row->{author_photo_hash},
181             time_verified => $row->{time_verified}
182             ? DateTime::Format::ISO8601->parse_datetime(
183             $row->{time_verified}
184             )
185 53 100       289 : undef,
    100          
186             );
187              
188             # Delete keys that, if undef, we want the webmention object to
189             # lazily re-derive
190 53         51244 foreach (
191             qw(time_verified is_verified original_source content title type))
192             {
193 318 100       709 delete $args{$_} unless defined $args{$_};
194             }
195              
196 53 100       132 if ( $row->{author_name} ) {
197 39         56 my %author_args;
198 39         86 $author_args{name} = $row->{author_name};
199 39         69 foreach (qw(url photo)) {
200 78 50       205 if ( $row->{"author_$_"} ) {
201 78         178 $author_args{$_} = $row->{"author_$_"};
202             }
203             }
204              
205 39         1272 $args{author} = Web::Mention::Author->new( \%author_args );
206             }
207 53         11465 my $wm = Whim::Mention->new( \%args );
208 53         27633 push @wms, $wm;
209              
210             }
211              
212 32         988 return @wms;
213              
214             }
215              
216             # process_webmentions: Verify all untested WMs.
217 3     3 0 5778 sub process_webmentions ($self) {
  3         8  
  3         8  
218 3         7 my $verified_count = 0;
219 3         8 my $total_count = 0;
220 3         79 my $sth = $self->dbh->prepare(
221             'update wm set is_tested = 1, is_verified = ?, '
222             . 'author_name = ?, author_url = ?, author_photo = ?, '
223             . 'time_verified = ?, html = ?, author_photo_hash = ?, '
224             . 'type = ?, original_source = ?, content = ?, title = ? '
225             . 'where source = ? and target = ? and time_received = ?' );
226              
227 3         370 for my $stored_wm ( $self->fetch_webmentions( { process => 1 } ) ) {
228 21         1600 $total_count++;
229              
230             # Verify a new, minimal wm based on the stored one.
231             # This allows us to update re-sent wms that might have new content
232             # (or might be deleted, or otherwise no longer valid).
233 21         770 my $wm = Whim::Mention->new(
234             { source => $stored_wm->source,
235             target => $stored_wm->target,
236             time_received => $stored_wm->time_received,
237             }
238             );
239              
240             # Grab the author image
241 21 100 66     8313 if ( $wm->author && $wm->author->photo ) {
242 20         929029 my $url = $wm->author->photo->abs( $wm->source )->as_string;
243 20         5490 my $response = $self->ua->get($url);
244 20         43918 my $photo_hash = $self->_process_author_photo_tx($response);
245 20         563 $wm->author_photo_hash($photo_hash);
246             }
247              
248 21 50       3485 my @bind_values = (
    50          
    50          
    100          
    100          
    100          
249             $wm->author ? $wm->author->name : undef,
250             $wm->author ? $wm->author->url : undef,
251             $wm->author ? $wm->author->photo : undef,
252             $wm->is_verified ? $wm->time_verified->iso8601 : undef,
253             $wm->source_html,
254             $wm->author_photo_hash,
255             $wm->type,
256             $wm->original_source->as_string,
257             $wm->source_html ? $wm->content : undef,
258             $wm->source_html ? $wm->title : undef,
259             $wm->source->as_string,
260             $wm->target->as_string,
261             $wm->time_received->iso8601,
262             );
263              
264 21 100       116274 if ( $wm->is_verified ) {
265 20         226 $verified_count++;
266 20         727 $sth->execute( 1, @bind_values );
267             }
268             else {
269 1         396 $sth->execute( 0, @bind_values );
270             }
271             }
272 3         448 return [ $verified_count, $total_count ];
273             }
274              
275 21     21 0 51909 sub receive_webmention ( $self, $wm ) {
  21         39  
  21         30  
  21         28  
276              
277             # If a wm with this source+target already exists in the db, then set it
278             # to be re-verified, but don't modify its present verification state.
279             # Otherwise, store a new wm, unverified and untested.
280 21         102 my ($existing_wm) = $self->fetch_webmentions(
281             { source => [ $wm->source->as_string ],
282             target => $wm->target->as_string,
283             }
284             );
285 21 100       100 if ($existing_wm) {
286 7         168 $self->dbh->do(
287             'update wm set is_tested = 0 where source = ? and target = ?',
288             {},
289             $wm->source->as_string,
290             $wm->target->as_string,
291             );
292             }
293             else {
294 14         372 $self->dbh->do(
295             'insert into wm '
296             . '(source, target, time_received, is_tested ) '
297             . 'values (?, ?, ?, ? )',
298             {},
299             $wm->source->as_string,
300             $wm->target->as_string,
301             $wm->time_received->iso8601,
302             0,
303             0,
304             );
305             }
306             }
307              
308 4     4   2939 sub _build_dbh ($self) {
  4         11  
  4         8  
309 4         76 my $dir = $self->data_directory;
310 4         49 my $db_needs_initialization = 1;
311 4         10 my $db_file;
312              
313 4 100       27 if ( $dir eq $TRANSIENT_DB ) {
314 3         21 $db_file = $dir;
315             }
316             else {
317 1         10 $db_file = $dir->child('wm.db');
318 1 50       48 $db_needs_initialization = 0 if $db_file->exists;
319             }
320              
321 4 50       73 my $dbh =
322             DBI->connect( "dbi:SQLite:$db_file", "", "", { sqlite_unicode => 1, },
323             ) or die "Can't create or use a database file in $dir: $DBI::errtr\n";
324              
325 4 50       40987 _initialize_database($dbh) if $db_needs_initialization;
326              
327 4         15070 return $dbh;
328             }
329              
330 2     2   39 sub _build_data_directory ($self) {
  2         3  
  2         3  
331 2         34 return $self->home->child('data');
332             }
333              
334 6     6   140 sub _build_author_photo_directory ($self) {
  6         13  
  6         11  
335 6         110 return $self->home->child('public')->child('author_photos');
336             }
337              
338 20     20   86 sub _process_author_photo_tx ( $self, $response ) {
  20         45  
  20         35  
  20         32  
339 20 50       82 if ( $response->is_success ) {
340 20         257 my $photo_hash = sha256_hex( $response->decoded_content );
341 20         5558 my $photo_file = $self->author_photo_directory->child($photo_hash);
342 20 100       1503 unless ( -e $photo_file ) {
343 2         149 $photo_file->spew( $response->decoded_content );
344             }
345 20         2403 return $photo_hash;
346             }
347             else {
348 0         0 return undef;
349             }
350             }
351              
352 4     4   8 sub _initialize_database ($dbh) {
  4         10  
  4         8  
353 4         26 my @statements = (
354             "CREATE TABLE wm (source char(128), original_source char(128), target "
355             . "char(128), time_received text, is_verified int, is_tested int, "
356             . "html text, content text, time_verified text, type char(16), "
357             . "author_name char(64), author_url char(128), author_photo "
358             . "char(128), author_photo_hash char(128), title char(255))",
359             "CREATE UNIQUE INDEX source_target on wm(source, target)",
360             "CREATE TABLE block (source char(128))",
361             "CREATE UNIQUE INDEX source on block(source)",
362             );
363              
364 4         17 foreach (@statements) {
365 16         49460 $dbh->do($_);
366             }
367             }
368              
369 6     6   16 sub _make_homedir ( $self, $dir ) {
  6         14  
  6         8  
  6         14  
370              
371             # Path::Tiny's mkpath() method executes without complaint if the path
372             # already exists, so let's just create-if-needed every expected subdir.
373 6         32 $dir->mkpath;
374 6         834 $dir->child('log')->mkpath;
375 6         1196 $self->data_directory->mkpath;
376 6         876 $self->author_photo_directory->mkpath;
377             }
378              
379             1;
380              
381             =head1 NAME
382              
383             Whim::Core - A code library used by the Whim webmention multitool
384              
385             =head1 DESCRIPTION
386              
387             This is a code library used by the C<whim> executable. It doesn't have a
388             public interface!
389              
390             =head1 SEE ALSO
391              
392             L<whim>
393              
394             =head1 AUTHOR
395              
396             Jason McIntosh E<lt>jmac@jmac.orgE<gt>
397