File Coverage

blib/lib/NNexus/Index/Mathworld.pm
Criterion Covered Total %
statement 28 37 75.6
branch 6 12 50.0
condition 1 3 33.3
subroutine 8 11 72.7
pod 5 7 71.4
total 48 70 68.5


line stmt bran cond sub pod time code
1             # /=====================================================================\ #
2             # | NNexus Autolinker | #
3             # | Indexing Plug-in, MathWorld.wolfram.com domain | #
4             # |=====================================================================| #
5             # | Part of the Planetary project: http://trac.mathweb.org/planetary | #
6             # | Research software, produced as part of work done by: | #
7             # | the KWARC group at Jacobs University | #
8             # | Copyright (c) 2012 | #
9             # | Released under the MIT License (MIT) | #
10             # |---------------------------------------------------------------------| #
11             # | Adapted from the original NNexus code by | #
12             # | James Gardner and Aaron Krowne | #
13             # |---------------------------------------------------------------------| #
14             # | Deyan Ginev #_# | #
15             # | http://kwarc.info/people/dginev (o o) | #
16             # \=========================================================ooo==U==ooo=/ #
17             package NNexus::Index::Mathworld;
18 3     3   1319 use warnings;
  3         6  
  3         132  
19 3     3   20 use strict;
  3         7  
  3         133  
20 3     3   17 use base qw(NNexus::Index::Template);
  3         6  
  3         1727  
21              
22 0     0 1 0 sub domain_root { "http://mathworld.wolfram.com/letters/"; }
23 0     0 0 0 sub domain_base { "http://mathworld.wolfram.com" }
24             sub candidate_links {
25 2     2 1 6 my ($self) = @_;
26 2         11 my $url = $self->current_url;
27 2         10 my $dom = $self->current_dom;
28             # Only a letter or a single-slashed path to a concept
29 2         10 my $directory = $dom->find('#directory')->[0];
30 2 50       13418 $directory = $dom->find('#directorysix')->[0] unless $directory; # Top level?
31 2 50       12714 return [] unless $directory; # Only index the alphabetical indices
32 0         0 my @next_jobs = $directory->find('a')->each;
33 0         0 @next_jobs = map { $self->domain_base . $_ } grep {defined } map {$_->{href}} @next_jobs;
  0         0  
  0         0  
  0         0  
34 0         0 \@next_jobs; }
35              
36             sub index_page {
37 2     2 1 3 my ($self) = @_;
38 2         7 my $url = $self->current_url;
39 2 50       6 return [] unless $self->leaf_test($url);
40 2         5 my $dom = $self->current_dom;
41             # TODO: Support multiple MSC categories in the same page, not only [0]
42 2         9 my $msc = $dom->find(':root > head > meta[scheme="MSC_2000"]');
43 2 50   2   10770 my @categories = $msc->map(sub{ $_->attr('content')})->each if $msc;
  2         21  
44 2 50       75 @categories = ('XX-XX') unless @categories;
45              
46 2         12 my $title = $dom->find(':root > head > meta[name="DC.Title"]')->[0];
47 2   33     10164 my $name = $title && $title->attr('content');
48 2 50       91 return $name ?
49             [{
50             url=>$url,
51             concept=>$name,
52             categories=>\@categories,
53             }] : []; }
54              
55 4     4 1 15 sub depth_limit {10;}
56 0     0 1 0 sub request_interval { 12; } # We'll sleep manually extra for the GET requests on the letters index
57 2     2 0 10 sub leaf_test { $_[1] !~ /letters/ }
58             1;
59             __END__