File Coverage

blib/lib/Statocles/Plugin/LinkCheck.pm
Criterion Covered Total %
statement 54 54 100.0
branch 23 24 95.8
condition 3 3 100.0
subroutine 6 6 100.0
pod 2 2 100.0
total 88 89 98.8


line stmt bran cond sub pod time code
1             package Statocles::Plugin::LinkCheck;
2             our $VERSION = '0.084';
3             # ABSTRACT: Check links and images for validity during build
4              
5 2     2   27278 use Statocles::Base 'Class';
  2         7  
  2         19  
6             with 'Statocles::Plugin';
7 2     2   14942 use Mojo::DOM;
  2         9946  
  2         86  
8 2     2   19 use Mojo::Util qw( url_escape url_unescape );
  2         5  
  2         1641  
9              
10             #pod =attr ignore
11             #pod
12             #pod An array of URL patterns to ignore. These are interpreted as regular expressions,
13             #pod and are anchored to the beginning of the URL.
14             #pod
15             #pod For example:
16             #pod
17             #pod /broken will match "/broken.html" "/broken/page.html" but not "/page/broken"
18             #pod .*/broken will match "/broken.html" "/broken/page.html" and "/page/broken"
19             #pod
20             #pod =cut
21              
22             has ignore => (
23             is => 'ro',
24             isa => ArrayRef[Str],
25             default => sub { [] },
26             );
27              
28             #pod =method check_pages
29             #pod
30             #pod $plugin->check_pages( $event );
31             #pod
32             #pod Check the pages inside the given
33             #pod L<Statocles::Event::Pages|Statocles::Event::Pages> event.
34             #pod
35             #pod =cut
36              
37             sub check_pages {
38 10     10 1 39 my ( $self, $event ) = @_;
39              
40 10         33 my %page_paths = ();
41 10         25 my %links = ();
42 10         26 my %empty = (); # Pages with empty links
43 10         22 for my $page ( @{ $event->pages } ) {
  10         50  
44 200         7422 $page_paths{ $page->path } = 1;
45 200 100       2474 if ( $page->DOES( 'Statocles::Page::Document' ) ) {
46 47         1753 my $dom = Mojo::DOM->new( $page->render( site => $event->emitter ) );
47              
48 47         172071 for my $attr ( qw( src href ) ) {
49 94         34236 for my $el ( $dom->find( "[$attr]" )->each ) {
50 462         94341 my $url = $el->attr( $attr );
51              
52 462 100       7695 if ( !$url ) {
53 3         8 push @{ $empty{ $page->path } }, $el;
  3         47  
54             }
55              
56 462         1083 $url =~ s{#.*$}{};
57 462 100       2013 next unless $url; # Skip checking fragment-internal links for now
58 423 100       1484 next if $url =~ m{^(?:[a-z][a-z0-9+.-]*):}i;
59 366 100       725 next if $url =~ m{^//};
60 316 100       880 if ( $url !~ m{^/} ) {
61 30         507 $url = $page->path->parent->child( $url );
62             }
63              
64             # Fix ".." and ".". Path::Tiny->canonpath can't do
65             # this for us because these paths do not exist on
66             # the filesystem
67 316         3249 $url =~ s{/[^/]+/[.][.]/}{/}g; # Fix ".." to refer to parent
68 316         646 $url =~ s{/[.]/}{/}g; # Fix "." to refer to self
69              
70 316         848 $links{ url_unescape $url }{ $page->path }++;
71              
72             }
73             }
74             }
75             }
76              
77 10         259 my @missing; # Array of arrayrefs of [ link_url, page_path, link_element ]
78 10         61 for my $link_url ( keys %links ) {
79 109 100       290 $link_url .= 'index.html' if $link_url =~ m{/$};
80 109 100 100     363 next if $page_paths{ $link_url } || $page_paths{ "$link_url/index.html" };
81 21 100       35 next if grep { $link_url =~ /^$_/ } @{ $self->ignore };
  18         281  
  21         62  
82 13         20 push @missing, [ $link_url, $_ ] for keys %{ $links{ $link_url } };
  13         70  
83             }
84              
85 10         47 for my $page_url ( keys %empty ) {
86 3         8 push @missing, map { [ '', $page_url, $_ ] } @{ $empty{ $page_url } };
  3         16  
  3         9  
87             }
88              
89 10 100       95 if ( @missing ) {
90             # Sort by page url and then missing link url
91 3 50       21 for my $m ( sort { $a->[1] cmp $b->[1] || $a->[0] cmp $b->[0] } @missing ) {
  170         270  
92 46 100       3888 my $msg = $m->[0] ? sprintf( q{'%s' not found}, $m->[0] )
93             : sprintf( q{Link with text "%s" has no destination}, $m->[2]->text )
94             ;
95 46         1088 $event->emitter->log->warn( "URL broken on $m->[1]: $msg" );
96             }
97             }
98             }
99              
100             #pod =method register
101             #pod
102             #pod Register this plugin to install its event handlers. Called automatically.
103             #pod
104             #pod =cut
105              
106             sub register {
107 10     10 1 236 my ( $self, $site ) = @_;
108 10     10   105 $site->on( build => sub { $self->check_pages( @_ ) } );
  10         8455  
109             }
110              
111             1;
112              
113             __END__
114              
115             =pod
116              
117             =encoding UTF-8
118              
119             =head1 NAME
120              
121             Statocles::Plugin::LinkCheck - Check links and images for validity during build
122              
123             =head1 VERSION
124              
125             version 0.084
126              
127             =head1 SYNOPSIS
128              
129             # site.yml
130             site:
131             class: Statocles::Site
132             args:
133             plugins:
134             link_check:
135             $class: Statocles::Plugin::LinkCheck
136              
137             =head1 DESCRIPTION
138              
139             This plugin checks all of the links and images to ensure they exist. If something
140             is missing, this plugin will write a warning to the screen.
141              
142             =head1 ATTRIBUTES
143              
144             =head2 ignore
145              
146             An array of URL patterns to ignore. These are interpreted as regular expressions,
147             and are anchored to the beginning of the URL.
148              
149             For example:
150              
151             /broken will match "/broken.html" "/broken/page.html" but not "/page/broken"
152             .*/broken will match "/broken.html" "/broken/page.html" and "/page/broken"
153              
154             =head1 METHODS
155              
156             =head2 check_pages
157              
158             $plugin->check_pages( $event );
159              
160             Check the pages inside the given
161             L<Statocles::Event::Pages|Statocles::Event::Pages> event.
162              
163             =head2 register
164              
165             Register this plugin to install its event handlers. Called automatically.
166              
167             =head1 AUTHOR
168              
169             Doug Bell <preaction@cpan.org>
170              
171             =head1 COPYRIGHT AND LICENSE
172              
173             This software is copyright (c) 2016 by Doug Bell.
174              
175             This is free software; you can redistribute it and/or modify it under
176             the same terms as the Perl 5 programming language system itself.
177              
178             =cut