File Coverage

blib/lib/Text/Annotate/DataSource.pm
Criterion Covered Total %
statement 53 55 96.3
branch 4 6 66.6
condition 1 3 33.3
subroutine 13 14 92.8
pod 1 8 12.5
total 72 86 83.7


line stmt bran cond sub pod time code
1             package Text::Annotate::DataSource;
2 1     1   7 use strict;
  1         7  
  1         37  
3 1     1   1653 use Text::Annotate::HTMLWriter;
  1         3  
  1         142  
4 1     1   568 use Text::Annotate::WordScan;
  1         2  
  1         34  
5 1     1   7 use Carp;
  1         2  
  1         75  
6             BEGIN {
7 1     1   1 our $VERSION;
8 1         593 $VERSION = 0.01_2;
9             };
10              
11             =head1 NAME
12              
13             Text::Annotate::DataSource - parent class for annotation data sources
14              
15             =head1
16              
17             An annotation datasource provides two things:
18             a) a way to look up potential key phrases. Key phrases that match our data
19             yield URIs or similar identifiers.
20             b) a way to "explain" those URIs, producing a brief HTML description of the
21             phrase in question.
22              
23             =cut
24              
25             sub new {
26 1     1 0 2 my $class = shift;
27 1   33     8 $class = ref $class || $class;
28 1         5 my $self = bless {}, $class;
29 1         4 $self;
30             };
31              
32              
33             # "obj" is supposed to provide access to annotation objects.
34             # TODO: refactor into a superclass
35             # TODO: some classes will want to be instantiated at the beginning, shut down
36             # later ...
37             # TODO: "require" objects when needed
38             # TODO: make objects configurable, at with an easily inherited interface
39             sub obj {
40 5     5 0 14 my ($class, $name) = @_;
41 5         33 my $o = {
42             HTMLWriter => "Text::Annotate::HTMLWriter",
43             WordScan => "Text::Annotate::WordScan"
44             }->{$name};
45 5 50       18 ($o) or confess "no '$o' object recognised";
46 5         33 $o;
47             };
48              
49             =head1 METHODS
50              
51             =head2 index_lookup
52              
53             Typical call:
54              
55             $datasource->index_lookup ("get", "your", "widgets", ... "getyourwidgetsfrom", "wigentiacorp")
56              
57             index_lookup is called with a list of (already canonicalised) key phrases. Where those
58             phrases match one we know about, we return a two-element list.
59              
60             Typical output:
61              
62             (["wigentiacorp", "http://www.wigentia.com/"],
63             ["herring", "http://www.example.com/fishguide/herring/"])
64              
65             It is passed a list so we don't get overwhelmed by function-switching overhead, whilst
66             painstakingly trundling through piles of useless words in search of something interesting.
67              
68             You may well want to override this.
69              
70             =cut
71              
72             # TODO: doc interface here
73             # TODO: wouldn't references be faster?
74             sub index_lookup {
75 1     1 1 8 my ($self, @keys) = @_;
76 1         3 my $idx = $self->{idx};
77 1         2 my @out;
78 1         3 foreach my $k (@keys) {
79 45 100       87 push @out, [$k, $idx->{$k}] if $idx->{$k};
80             }
81 1         8 @out;
82             }
83              
84             sub search_html {
85 1     1 0 102 my ($self, $to_search) = @_;
86 1         11 $to_search =~ s/\<.+?\>//sg;
87 1         10 $self->search($to_search);
88             }
89              
90             sub search {
91 1     1 0 2 my $self = shift;
92 1         2 my $to_search = shift;
93 1         2 my @out;
94             my %seen;
95             my $cbk = sub {
96 1     1   2 my $in = shift;
97 1         10 my ($results) = [$self->index_lookup(@$in)];
98 1         7 while (@$results) {
99 2         4 my $r = shift @$results;
100 2 50       6 if ($seen{$r->[0]}) {
101 0         0 next; # ooh
102             };
103 2         5 $seen{$r->[0]} = 1;
104 2         3 foreach my $link (@{$r->[1]}) {
  2         5  
105 2         9 push @out, {title => $r->[0], link => $link, $self->explain_link($link)};
106             };
107             };
108 1         4 ();
109 1         7 };
110 1         7 $self->scan_words($to_search, $cbk);
111              
112 1         3 foreach my $e (@out) {
113 2         10 %$e = (%$e, $self->explain_link($e->{link}));
114             };
115              
116 1         3 return $self->obj("HTMLWriter")->format_annotations(\@out);
117             };
118              
119              
120             sub scan_words {
121 1     1 0 2 my $self = shift;
122 1         3 $self->obj("WordScan")->scan_words(@_);
123             };
124              
125             sub canonicalize_id {
126 2     2 0 4 my ($self, $word) = @_;
127 2         9 $self->obj("WordScan")->canonicalize_id($word);
128             };
129              
130 0     0 0   sub explain_link { () };
131              
132             1;