File Coverage

blib/lib/Statocles/Plugin/LinkCheck.pm
Criterion Covered Total %
statement 51 51 100.0
branch 23 24 95.8
condition 3 3 100.0
subroutine 5 5 100.0
pod 2 2 100.0
total 84 85 98.8


line stmt bran cond sub pod time code
1             package Statocles::Plugin::LinkCheck;
2             our $VERSION = '0.086';
3             # ABSTRACT: Check links and images for validity during build
4              
5 2     2   20353 use Statocles::Base 'Class';
  2         5  
  2         16  
6             with 'Statocles::Plugin';
7 2     2   13288 use Mojo::Util qw( url_escape url_unescape );
  2         4  
  2         1468  
8              
9             #pod =attr ignore
10             #pod
11             #pod An array of URL patterns to ignore. These are interpreted as regular expressions,
12             #pod and are anchored to the beginning of the URL.
13             #pod
14             #pod For example:
15             #pod
16             #pod /broken will match "/broken.html" "/broken/page.html" but not "/page/broken"
17             #pod .*/broken will match "/broken.html" "/broken/page.html" and "/page/broken"
18             #pod
19             #pod =cut
20              
21             has ignore => (
22             is => 'ro',
23             isa => ArrayRef[Str],
24             default => sub { [] },
25             );
26              
27             #pod =method check_pages
28             #pod
29             #pod $plugin->check_pages( $event );
30             #pod
31             #pod Check the pages inside the given
32             #pod L<Statocles::Event::Pages|Statocles::Event::Pages> event.
33             #pod
34             #pod =cut
35              
36             sub check_pages {
37 10     10 1 32 my ( $self, $event ) = @_;
38              
39 10         23 my %page_paths = ();
40 10         23 my %links = ();
41 10         24 my %empty = (); # Pages with empty links
42 10         19 for my $page ( @{ $event->pages } ) {
  10         37  
43 200         6136 $page_paths{ $page->path } = 1;
44 200 100       1938 if ( $page->DOES( 'Statocles::Page::Document' ) ) {
45 47         1719 my $dom = $page->dom;
46              
47 47         364 for my $attr ( qw( src href ) ) {
48 94         32808 for my $el ( $dom->find( "[$attr]" )->each ) {
49 462         86172 my $url = $el->attr( $attr );
50              
51 462 100       6838 if ( !$url ) {
52 3         8 push @{ $empty{ $page->path } }, $el;
  3         44  
53             }
54              
55 462         949 $url =~ s{#.*$}{};
56 462 100       880 next unless $url; # Skip checking fragment-internal links for now
57 423 100       926 next if $url =~ m{^(?:[a-z][a-z0-9+.-]*):}i;
58 366 100       632 next if $url =~ m{^//};
59 316 100       757 if ( $url !~ m{^/} ) {
60 30         446 $url = $page->path->parent->child( $url );
61             }
62              
63             # Fix ".." and ".". Path::Tiny->canonpath can't do
64             # this for us because these paths do not exist on
65             # the filesystem
66 316         2510 $url =~ s{/[^/]+/[.][.]/}{/}g; # Fix ".." to refer to parent
67 316         599 $url =~ s{/[.]/}{/}g; # Fix "." to refer to self
68              
69 316         658 $links{ url_unescape $url }{ $page->path }++;
70              
71             }
72             }
73             }
74             }
75              
76 10         224 my @missing; # Array of arrayrefs of [ link_url, page_path, link_element ]
77 10         60 for my $link_url ( keys %links ) {
78 109 100       251 $link_url .= 'index.html' if $link_url =~ m{/$};
79 109 100 100     322 next if $page_paths{ $link_url } || $page_paths{ "$link_url/index.html" };
80 21 100       30 next if grep { $link_url =~ /^$_/ } @{ $self->ignore };
  18         204  
  21         49  
81 13         17 push @missing, [ $link_url, $_ ] for keys %{ $links{ $link_url } };
  13         65  
82             }
83              
84 10         52 for my $page_url ( keys %empty ) {
85 3         5 push @missing, map { [ '', $page_url, $_ ] } @{ $empty{ $page_url } };
  3         15  
  3         7  
86             }
87              
88 10 100       72 if ( @missing ) {
89             # Sort by page url and then missing link url
90 3 50       17 for my $m ( sort { $a->[1] cmp $b->[1] || $a->[0] cmp $b->[0] } @missing ) {
  172         252  
91 46 100       3788 my $msg = $m->[0] ? sprintf( q{'%s' not found}, $m->[0] )
92             : sprintf( q{Link with text "%s" has no destination}, $m->[2]->text )
93             ;
94 46         1030 $event->emitter->log->warn( "URL broken on $m->[1]: $msg" );
95             }
96             }
97             }
98              
99             #pod =method register
100             #pod
101             #pod Register this plugin to install its event handlers. Called automatically.
102             #pod
103             #pod =cut
104              
105             sub register {
106 10     10 1 247 my ( $self, $site ) = @_;
107 10     10   86 $site->on( build => sub { $self->check_pages( @_ ) } );
  10         8778  
108             }
109              
110             1;
111              
112             __END__
113              
114             =pod
115              
116             =encoding UTF-8
117              
118             =head1 NAME
119              
120             Statocles::Plugin::LinkCheck - Check links and images for validity during build
121              
122             =head1 VERSION
123              
124             version 0.086
125              
126             =head1 SYNOPSIS
127              
128             # site.yml
129             site:
130             class: Statocles::Site
131             args:
132             plugins:
133             link_check:
134             $class: Statocles::Plugin::LinkCheck
135              
136             =head1 DESCRIPTION
137              
138             This plugin checks all of the links and images to ensure they exist. If something
139             is missing, this plugin will write a warning to the screen.
140              
141             =head1 ATTRIBUTES
142              
143             =head2 ignore
144              
145             An array of URL patterns to ignore. These are interpreted as regular expressions,
146             and are anchored to the beginning of the URL.
147              
148             For example:
149              
150             /broken will match "/broken.html" "/broken/page.html" but not "/page/broken"
151             .*/broken will match "/broken.html" "/broken/page.html" and "/page/broken"
152              
153             =head1 METHODS
154              
155             =head2 check_pages
156              
157             $plugin->check_pages( $event );
158              
159             Check the pages inside the given
160             L<Statocles::Event::Pages|Statocles::Event::Pages> event.
161              
162             =head2 register
163              
164             Register this plugin to install its event handlers. Called automatically.
165              
166             =head1 AUTHOR
167              
168             Doug Bell <preaction@cpan.org>
169              
170             =head1 COPYRIGHT AND LICENSE
171              
172             This software is copyright (c) 2016 by Doug Bell.
173              
174             This is free software; you can redistribute it and/or modify it under
175             the same terms as the Perl 5 programming language system itself.
176              
177             =cut