File Coverage

lib/Whim/Core.pm
Criterion Covered Total %
statement 158 194 81.4
branch 43 58 74.1
condition 2 3 66.6
subroutine 24 27 88.8
pod 0 6 0.0
total 227 288 78.8


line stmt bran cond sub pod time code
1             package Whim::Core;
2              
3 4     4   2014562 use warnings;
  4         32  
  4         163  
4 4     4   23 use strict;
  4         10  
  4         115  
5 4     4   61 use v5.24;
  4         15  
6              
7 4     4   1821 use Moo;
  4         35813  
  4         24  
8 4     4   11798 use DBI;
  4         74570  
  4         297  
9 4     4   1116 use Path::Tiny;
  4         12901  
  4         245  
10 4     4   40 use Scalar::Util qw(blessed);
  4         8  
  4         212  
11 4     4   2370 use DateTime::Format::ISO8601;
  4         1946072  
  4         231  
12 4     4   2822 use LWP::UserAgent;
  4         131056  
  4         172  
13 4     4   1259 use Digest::SHA qw(sha256_hex);
  4         5577  
  4         363  
14              
15 4     4   1258 use lib '/Users/jmac/Documents/Plerd/indieweb/webmention-perl/lib';
  4         1374  
  4         31  
16              
17 4     4   2561 use Whim::Mention;
  4         19  
  4         1481  
18              
19             # Specifying $TRANSIENT_DB for data_directory tells SQLite to use an in-memory database rather than persist to disk.
20             # Helpful for automated tests and maybe future non-persistent uses of whim
21             our $TRANSIENT_DB = ":memory:";
22              
23             has 'home' => (
24             is => 'rw',
25             required => 1,
26             coerce => sub { path( $_[0] ) },
27             trigger => sub {
28             my ( $self, $dir ) = @_;
29             $self->_make_homedir($dir);
30             },
31             );
32              
33             has 'data_directory' => (
34             is => 'lazy',
35             isa => sub {
36             unless ( blessed( $_[0] ) && $_[0]->isa('Path::Tiny') ) {
37              
38             # $TRANSIENT_DB can be coerced to a Path::Tiny,
39             # so this check still works.
40             die "data_directory must be a valid path or Path::Tiny object\n";
41             }
42             },
43             coerce => sub { path( $_[0] ) },
44             );
45              
46             has 'dbh' => ( is => 'lazy', );
47              
48             has 'author_photo_directory' => (
49             is => 'lazy',
50             isa => sub {
51             unless ( blessed( $_[0] ) && $_[0]->isa('Path::Tiny') ) {
52             die "author_photo_directory must be a valid path or "
53             . "Path::Tiny object\n";
54             }
55             },
56             coerce => sub { path( $_[0] ) },
57             );
58              
59             has 'ua' => (
60             is => 'ro',
61             default => sub { LWP::UserAgent->new },
62             );
63              
64 4     4   43 use Readonly;
  4         8  
  4         475  
65             Readonly my $IMAGEDIR_NAME => 'images';
66              
67 4     4   33 no warnings "experimental::signatures";
  4         17  
  4         188  
68 4     4   41 use feature "signatures";
  4         9  
  4         10324  
69              
70 0     0 0 0 sub unblock_sources ( $self, @sources ) {
  0         0  
  0         0  
  0         0  
71 0         0 my @failures;
72 0         0 for my $source (@sources) {
73 0         0 my ($extant) = $self->dbh->selectrow_array(
74             'select * from block where source = ?',
75             {}, $source );
76              
77 0 0       0 if ($extant) {
78 0         0 $self->dbh->do( 'delete from block where source = ?',
79             {}, $source );
80             }
81             else {
82 0         0 push @failures, $source;
83             }
84             }
85 0         0 return @failures;
86             }
87              
88 0     0 0 0 sub block_sources ( $self, @sources ) {
  0         0  
  0         0  
  0         0  
89 0         0 for my $source (@sources) {
90 0         0 $self->dbh->do( 'insert into block values (?)', {}, $source );
91             }
92             }
93              
94 0     0 0 0 sub blocked_sources ($self) {
  0         0  
  0         0  
95 0         0 my @sources;
96 0         0 my $sth = $self->dbh->prepare('select source from block order by source');
97 0         0 $sth->execute;
98 0         0 while ( my ($source) = $sth->fetchrow_array ) {
99 0         0 push @sources, $source;
100             }
101 0         0 return @sources;
102             }
103              
104 32     32 0 4979 sub fetch_webmentions ( $self, $args ) {
  32         62  
  32         55  
  32         47  
105              
106             # This complex query lets us flexibly use the contents of the `block`
107             # table as a blocklist.
108 32         68 my $query =
109             'select wm.* from wm where original_source not in '
110             . '(select original_source from wm '
111             . 'inner join (select source from block) b on '
112             . ' wm.original_source like \'%\' || b.source || \'%\') ';
113 32         57 my @wheres;
114             my @bind_args;
115              
116 32 50       96 if ( $args->{before} ) {
117 0         0 push @wheres, "time_received <= ?";
118 0         0 push @bind_args, $args->{before};
119             }
120 32 50       83 if ( $args->{after} ) {
121 0         0 push @wheres, "time_received >= ?";
122 0         0 push @bind_args, $args->{after};
123             }
124 32 100       81 if ( $args->{source} ) {
125 21         56 foreach ( $args->{source}->@* ) {
126 21         46 push @wheres, "original_source like ?";
127 21         67 push @bind_args, "\%$_\%";
128             }
129             }
130 32 50       74 if ( $args->{'not-source'} ) {
131 0         0 foreach ( $args->{'not-source'}->@* ) {
132 0         0 push @wheres, "original_source not like ?";
133 0         0 push @bind_args, "\%$_\%";
134             }
135             }
136 32 100       74 if ( $args->{target} ) {
137 25         48 push @wheres, "target like ?";
138 25         67 push @bind_args, "\%$args->{target}\%";
139             }
140 32 50       77 if ( $args->{type} ) {
141 0         0 push @wheres, "type like ?";
142 0         0 push @bind_args, $args->{type};
143             }
144              
145             # Unless we're processing WMs, we want only verified ones.
146 32 100       115 if ( $args->{process} ) {
147 3         8 push @wheres, "is_tested != 1";
148             }
149             else {
150 29         59 push @wheres, "is_verified = 1";
151             }
152              
153 32         59 my $where_clause = '';
154 32 50       67 if (@wheres) {
155 32         102 $where_clause = 'and ' . join( ' and ', @wheres );
156             }
157              
158 32         79 $query .= "$where_clause order by time_received";
159              
160 32         819 my $sth = $self->dbh->prepare($query);
161 32         9175 $sth->execute(@bind_args);
162              
163 32         139 my @wms;
164 32         1232 while ( my $row = $sth->fetchrow_hashref ) {
165             my %args = (
166             source => URI->new( $row->{source} ),
167             target => URI->new( $row->{target} ),
168             original_source => $row->{original_source}
169             ? URI->new( $row->{original_source} )
170             : undef,
171             title => $row->{title},
172             content => $row->{content},
173             source_html => $row->{html},
174             is_verified => $row->{is_verified},
175             is_tested => $row->{is_tested},
176             type => $row->{type},
177             time_received => DateTime::Format::ISO8601->parse_datetime(
178             $row->{time_received}
179             ),
180             author_photo_hash => $row->{author_photo_hash},
181             time_verified => $row->{time_verified}
182             ? DateTime::Format::ISO8601->parse_datetime(
183             $row->{time_verified}
184             )
185 53 100       316 : undef,
    100          
186             );
187              
188             # Delete keys that, if undef, we want the webmention object to
189             # lazily re-derive
190 53         53098 foreach (
191             qw(time_verified is_verified original_source content title type))
192             {
193 318 100       684 delete $args{$_} unless defined $args{$_};
194             }
195              
196 53 100       136 if ( $row->{author_name} ) {
197 39         60 my %author_args;
198 39         84 $author_args{name} = $row->{author_name};
199 39         69 foreach (qw(url photo)) {
200 78 50       196 if ( $row->{"author_$_"} ) {
201 78         175 $author_args{$_} = $row->{"author_$_"};
202             }
203             }
204              
205 39         935 $args{author} = Web::Mention::Author->new( \%author_args );
206             }
207 53         11141 my $wm = Whim::Mention->new( \%args );
208 53         27699 push @wms, $wm;
209              
210             }
211              
212 32         1017 return @wms;
213              
214             }
215              
216             # process_webmentions: Verify all untested WMs.
217 3     3 0 5803 sub process_webmentions ($self) {
  3         8  
  3         5  
218 3         7 my $verified_count = 0;
219 3         8 my $total_count = 0;
220 3         82 my $sth = $self->dbh->prepare(
221             'update wm set is_tested = 1, is_verified = ?, '
222             . 'author_name = ?, author_url = ?, author_photo = ?, '
223             . 'time_verified = ?, html = ?, author_photo_hash = ?, '
224             . 'type = ?, original_source = ?, content = ?, title = ? '
225             . 'where source = ? and target = ? and time_received = ?' );
226              
227 3         318 for my $stored_wm ( $self->fetch_webmentions( { process => 1 } ) ) {
228 21         1894 $total_count++;
229              
230             # Verify a new, minimal wm based on the stored one.
231             # This allows us to update re-sent wms that might have new content
232             # (or might be deleted, or otherwise no longer valid).
233 21         809 my $wm = Whim::Mention->new(
234             { source => $stored_wm->source,
235             target => $stored_wm->target,
236             time_received => $stored_wm->time_received,
237             }
238             );
239              
240             # Grab the author image
241 21 100 66     8762 if ( $wm->author && $wm->author->photo ) {
242 20         940772 my $url = $wm->author->photo->abs( $wm->source )->as_string;
243 20         5754 my $response = $self->ua->get($url);
244 20         47842 my $photo_hash = $self->_process_author_photo_tx($response);
245 20         592 $wm->author_photo_hash($photo_hash);
246             }
247              
248 21 50       3538 my @bind_values = (
    50          
    50          
    100          
    100          
    100          
249             $wm->author ? $wm->author->name : undef,
250             $wm->author ? $wm->author->url : undef,
251             $wm->author ? $wm->author->photo : undef,
252             $wm->is_verified ? $wm->time_verified->iso8601 : undef,
253             $wm->source_html,
254             $wm->author_photo_hash,
255             $wm->type,
256             $wm->original_source->as_string,
257             $wm->source_html ? $wm->content : undef,
258             $wm->source_html ? $wm->title : undef,
259             $wm->source->as_string,
260             $wm->target->as_string,
261             $wm->time_received->iso8601,
262             );
263              
264 21 100       120759 if ( $wm->is_verified ) {
265 20         223 $verified_count++;
266 20         865 $sth->execute( 1, @bind_values );
267             }
268             else {
269 1         51 $sth->execute( 0, @bind_values );
270             }
271             }
272 3         522 return [ $verified_count, $total_count ];
273             }
274              
275 21     21 0 54041 sub receive_webmention ( $self, $wm ) {
  21         42  
  21         30  
  21         28  
276              
277             # If a wm with this source+target already exists in the db, then set it
278             # to be re-verified, but don't modify its present verification state.
279             # Otherwise, store a new wm, unverified and untested.
280 21         97 my ($existing_wm) = $self->fetch_webmentions(
281             { source => [ $wm->source->as_string ],
282             target => $wm->target->as_string,
283             }
284             );
285 21 100       96 if ($existing_wm) {
286 7         172 $self->dbh->do(
287             'update wm set is_tested = 0 where source = ? and target = ?',
288             {},
289             $wm->source->as_string,
290             $wm->target->as_string,
291             );
292             }
293             else {
294 14         401 $self->dbh->do(
295             'insert into wm '
296             . '(source, target, time_received, is_tested ) '
297             . 'values (?, ?, ?, ? )',
298             {},
299             $wm->source->as_string,
300             $wm->target->as_string,
301             $wm->time_received->iso8601,
302             0,
303             0,
304             );
305             }
306             }
307              
308 4     4   2616 sub _build_dbh ($self) {
  4         9  
  4         8  
309 4         75 my $dir = $self->data_directory;
310 4         36 my $db_needs_initialization = 1;
311 4         11 my $db_file;
312              
313 4 100       25 if ( $dir eq $TRANSIENT_DB ) {
314 3         21 $db_file = $dir;
315             }
316             else {
317 1         23 $db_file = $dir->child('wm.db');
318 1 50       44 $db_needs_initialization = 0 if $db_file->exists;
319             }
320              
321 4 50       57 my $dbh =
322             DBI->connect( "dbi:SQLite:$db_file", "", "", { sqlite_unicode => 1, },
323             ) or die "Can't create or use a database file in $dir: $DBI::errtr\n";
324              
325 4 50       40459 _initialize_database($dbh) if $db_needs_initialization;
326              
327 4         8935 return $dbh;
328             }
329              
330 2     2   45 sub _build_data_directory ($self) {
  2         3  
  2         3  
331 2         38 return $self->home->child('data');
332             }
333              
334 6     6   140 sub _build_author_photo_directory ($self) {
  6         11  
  6         11  
335 6         115 return $self->home->child('public')->child('author_photos');
336             }
337              
338 20     20   152 sub _process_author_photo_tx ( $self, $response ) {
  20         61  
  20         39  
  20         41  
339 20 50       101 if ( $response->is_success ) {
340 20         288 my $photo_hash = sha256_hex( $response->decoded_content );
341 20         5836 my $photo_file = $self->author_photo_directory->child($photo_hash);
342 20 100       1629 unless ( -e $photo_file ) {
343 2         130 $photo_file->spew( $response->decoded_content );
344             }
345 20         2367 return $photo_hash;
346             }
347             else {
348 0         0 return undef;
349             }
350             }
351              
352 4     4   8 sub _initialize_database ($dbh) {
  4         12  
  4         7  
353 4         23 my @statements = (
354             "CREATE TABLE wm (source char(128), original_source char(128), target "
355             . "char(128), time_received text, is_verified int, is_tested int, "
356             . "html text, content text, time_verified text, type char(16), "
357             . "author_name char(64), author_url char(128), author_photo "
358             . "char(128), author_photo_hash char(128), title char(255))",
359             "CREATE UNIQUE INDEX source_target on wm(source, target)",
360             "CREATE TABLE block (source char(128))",
361             "CREATE UNIQUE INDEX source on block(source)",
362             );
363              
364 4         13 foreach (@statements) {
365 16         37455 $dbh->do($_);
366             }
367             }
368              
369 6     6   13 sub _make_homedir ( $self, $dir ) {
  6         16  
  6         11  
  6         11  
370              
371             # Path::Tiny's mkpath() method executes without complaint if the path
372             # already exists, so let's just create-if-needed every expected subdir.
373 6         34 $dir->mkpath;
374 6         833 $dir->child('log')->mkpath;
375 6         1224 $self->data_directory->mkpath;
376 6         841 $self->author_photo_directory->mkpath;
377             }
378              
379             1;
380              
381             =head1 NAME
382              
383             Whim::Core - A code library used by the Whim webmention multitool
384              
385             =head1 DESCRIPTION
386              
387             This is a code library used by the C<whim> executable. It doesn't have a
388             public interface!
389              
390             =head1 SEE ALSO
391              
392             L<whim>
393              
394             =head1 AUTHOR
395              
396             Jason McIntosh E<lt>jmac@jmac.orgE<gt>
397