File Coverage

blib/lib/Dancer/Plugin/SiteMap.pm
Criterion Covered Total %
statement 4 6 66.6
branch n/a
condition n/a
subroutine 2 2 100.0
pod n/a
total 6 8 75.0


line stmt bran cond sub pod time code
1             package Dancer::Plugin::SiteMap;
2              
3 1     1   26844 use strict;
  1         1  
  1         36  
4 1     1   468 use Dancer qw(:syntax);
  0            
  0            
5             use Dancer::Plugin;
6              
7             use Scalar::Util;
8             use XML::Simple;
9              
10             our $VERSION = '0.14';
11             my $OMIT_ROUTES = [];
12             my @sitemap_urls;
13              
14             # Add syntactic sugar for omitting routes.
15             register 'sitemap_ignore' => sub {
16              
17             # Dancer 2 keywords receive a reference to the DSL object as a first param,
18             # So if we're running under D2, we need to make sure we don't pass that on
19             # to the route gathering code.
20             shift if Scalar::Util::blessed($_[0]) && $_[0]->isa('Dancer::Core::DSL');
21             push @$Dancer::Plugin::SiteMap::OMIT_ROUTES, @_;
22             };
23              
24             # Add this plugin to Dancer, both Dancer 1 and Dancer 2 :-)
25             register_plugin( for_versions => [ qw( 1 2 ) ] );
26              
27             my $conf = plugin_setting();
28             my %routes = (
29             html => {
30             urlpath => '/sitemap',
31             coderef => \&_html_sitemap,
32             },
33             xml => {
34             urlpath => '/sitemap.xml',
35             coderef => \&_xml_sitemap,
36             },
37             );
38              
39             # If a route exists but it's not defined within the app settings, this means the
40             # developer wishes the app omit that particular sitemap type. If the route
41             # doesn't exist in the plugin settings at all, we go with the default urlpath
42             # for that route.
43             for my $route_type (keys %routes) {
44             my $route = $routes{$route_type};
45             my $config_key = $route_type . "_route";
46              
47             if (exists $conf->{$config_key}) {
48             $route->{urlpath} = $conf->{$config_key} || undef;
49             }
50              
51             get $route->{urlpath} => $route->{coderef} if $route->{urlpath};
52             }
53              
54             # Add omissions defined in the robots.txt file, if that option is specified in
55             # the config.
56             if ( defined $conf->{'robots_disallow'} ) {
57              
58             # Read the Disallow lines from robots.txt and add to $OMIT_ROUTES
59             my $robots_txt = $conf->{'robots_disallow'};
60             my @disallowed_list = ();
61             open my $robots_fh, '<', $robots_txt or die "Error reading $robots_txt $!";
62              
63             while (my $line = <$robots_fh>) {
64             if ($line =~ m/^\s*Disallow: \s*(\/[^\s#]*)/) {
65             push @disallowed_list, $1;
66             }
67             }
68              
69             close $robots_fh;
70             sitemap_ignore(@disallowed_list);
71             }
72              
73             # The action handler for the automagic /sitemap route. Uses the list of
74             # URLs from _retreive_get_urls and outputs a basic HTML template to the
75             # browser using the standard layout if one is defined.
76             sub _html_sitemap {
77             my @urls = _retreive_get_urls();
78              
79             my $content = qq[

Site Map

\n
    \n];
80             for my $url (@urls) {
81             $content .= qq[
  • $url
  • \n];
    82             }
    83             $content .= qq[\n];
    84              
    85             # If the config specifies a HTML Wrapper for the HTML SiteMap - then use
    86             # that (which handily also stuffs it in the layout). Failing that, we need
    87             # to just take the sitemap and whack it in the site layout
    88             return ($conf->{html_template})
    89             ? template $conf->{html_template}, { sitemap => $content }
    90             : engine('template')->apply_layout($content);
    91             };
    92              
    93              
    94             # The action handler for the automagic /sitemap.xml route. Uses the list of
    95             # URLs from _retreive_get_urls and outputs an XML document to the browser.
    96             sub _xml_sitemap {
    97             my @urls = _retreive_get_urls();
    98             my @sitemap_urls;
    99              
    100             # add the "loc" key to each url so XML::Simple creates tags.
    101             for my $url (@urls) {
    102             my $uri = uri_for($url);
    103             push @sitemap_urls, { loc => [ "$uri" ] }; # $uri has to be stringified
    104             }
    105              
    106             # create a hash for XML::Simple to turn into XML.
    107             my %urlset = (
    108             xmlns => 'http://www.sitemaps.org/schemas/sitemap/0.9',
    109             url => \@sitemap_urls
    110             );
    111              
    112             my $xs = new XML::Simple( KeepRoot => 1,
    113             ForceArray => 0,
    114             KeyAttr => {urlset => 'xmlns'},
    115             XMLDecl => '' );
    116             my $xml = $xs->XMLout( { urlset => \%urlset } );
    117              
    118             content_type "text/xml";
    119             return $xml;
    120             };
    121              
    122              
    123             # Obtains the list of URLs from Dancers Route Registry.
    124             sub _retreive_get_urls {
    125             return @sitemap_urls if @sitemap_urls;
    126              
    127             my $version = (exists &dancer_version) ? int( dancer_version() ) : 1;
    128             my @apps = ($version == 2) ? @{ runner->server->apps }
    129             : Dancer::App->applications;
    130              
    131             my ($route, @urls);
    132             for my $app ( @apps ) {
    133             my $routes = ($version == 2) ? $app->routes
    134             : $app->{registry}->{routes};
    135              
    136             # push the static get routes into an array.
    137             get_route:
    138             for my $get_route ( @{ $routes->{get} } ) {
    139             my $pattern = ($version == 2) ? $get_route->spec_route
    140             : $get_route->{pattern};
    141              
    142             if (ref($pattern) !~ m/HASH/i) {
    143              
    144             # If the pattern is a true comprehensive regexp or the route
    145             # has a :variable element to it, then omit it. Dancer 2 also
    146             # has /** entries - we'll dump them too.
    147             next get_route if ($pattern =~ m/[()[\]|]|:\w/);
    148             next get_route if ($pattern =~ m{/\*\*});
    149              
    150             # If there is a wildcard modifier, then drop it and have the
    151             # full route.
    152             $pattern =~ s/\?//g;
    153              
    154             # Only add any given route once.
    155             next get_route if grep { $_ eq $pattern } @urls;
    156              
    157             # Other than that, its cool to be added.
    158             push (@urls, $pattern)
    159             if ! grep { $pattern =~ m/^$_/i }
    160             @$Dancer::Plugin::SiteMap::OMIT_ROUTES;
    161             }
    162             }
    163             }
    164              
    165             return @sitemap_urls = sort(@urls);
    166             }
    167              
    168              
    169             1; # End of Dancer::Plugin::SiteMap
    170             __END__