| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package App::DuckPAN::Fathead; |
|
2
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:DDG'; |
|
3
|
|
|
|
|
|
|
# ABSTRACT: Searches a given output.txt file for a query match |
|
4
|
|
|
|
|
|
|
$App::DuckPAN::Fathead::VERSION = '1018'; |
|
5
|
1
|
|
|
1
|
|
1280
|
use Moo; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
7
|
|
|
6
|
|
|
|
|
|
|
with 'App::DuckPAN::HasApp'; |
|
7
|
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
1573
|
use DBI; |
|
|
1
|
|
|
|
|
13601
|
|
|
|
1
|
|
|
|
|
67
|
|
|
9
|
1
|
|
|
1
|
|
626
|
use JSON; |
|
|
1
|
|
|
|
|
3983
|
|
|
|
1
|
|
|
|
|
4
|
|
|
10
|
1
|
|
|
1
|
|
119
|
use Path::Tiny; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
42
|
|
|
11
|
1
|
|
|
1
|
|
6
|
use HTML::TreeBuilder; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
12
|
|
|
12
|
1
|
|
|
1
|
|
21
|
use HTML::Element; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
4
|
|
|
13
|
1
|
|
|
1
|
|
20
|
use Data::Printer return_value => 'dump'; |
|
|
1
|
|
|
|
|
6
|
|
|
|
1
|
|
|
|
|
8
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
has selected => ( |
|
16
|
|
|
|
|
|
|
is => 'rw', |
|
17
|
|
|
|
|
|
|
lazy => 1, |
|
18
|
|
|
|
|
|
|
required => 0, |
|
19
|
|
|
|
|
|
|
predicate => 1, |
|
20
|
|
|
|
|
|
|
trigger => 1 |
|
21
|
|
|
|
|
|
|
); |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
sub _trigger_selected { |
|
24
|
0
|
|
|
0
|
|
|
my ( $self, $id ) = @_; |
|
25
|
0
|
|
|
|
|
|
my $dir = path("lib/fathead/$id"); |
|
26
|
0
|
0
|
|
|
|
|
unless ($dir->is_dir) { |
|
27
|
0
|
|
|
|
|
|
my $full_path = $dir->realpath; |
|
28
|
0
|
|
|
|
|
|
$self->app->emit_and_exit(1, "Directory not found: $full_path") ; |
|
29
|
|
|
|
|
|
|
} |
|
30
|
0
|
|
|
|
|
|
my $file = $dir->child("output.txt"); |
|
31
|
0
|
0
|
|
|
|
|
unless ($file->exists){ |
|
32
|
0
|
|
|
|
|
|
my $full_path = $file->realpath; |
|
33
|
0
|
|
|
|
|
|
$self->app->emit_and_exit(1, "No output.txt was found in $full_path"); |
|
34
|
|
|
|
|
|
|
} |
|
35
|
0
|
|
|
|
|
|
$self->_set_output_txt($file); |
|
36
|
0
|
|
|
|
|
|
return $dir; |
|
37
|
|
|
|
|
|
|
} |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
has _trigger_words => ( |
|
40
|
|
|
|
|
|
|
is => 'ro', |
|
41
|
|
|
|
|
|
|
builder => 1, |
|
42
|
|
|
|
|
|
|
lazy => 1, |
|
43
|
|
|
|
|
|
|
); |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub _build__trigger_words { |
|
46
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
|
47
|
0
|
|
|
|
|
|
my $tf = 'trigger_words.txt'; |
|
48
|
0
|
0
|
|
|
|
|
return [] unless $self->has_selected; |
|
49
|
0
|
|
|
|
|
|
my $file = path("lib/fathead/", $self->selected, $tf); |
|
50
|
0
|
0
|
|
|
|
|
unless ($file->exists){ |
|
51
|
0
|
|
|
|
|
|
my $full_path = $file->realpath; |
|
52
|
0
|
|
|
|
|
|
$self->app->emit_debug("No $tf was found in $full_path"); |
|
53
|
0
|
|
|
|
|
|
return []; |
|
54
|
|
|
|
|
|
|
} |
|
55
|
0
|
|
|
|
|
|
chomp (my @words = $file->lines); |
|
56
|
0
|
|
|
|
|
|
return \@words; |
|
57
|
|
|
|
|
|
|
} |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
has _trigger_re => ( |
|
60
|
|
|
|
|
|
|
is => 'ro', |
|
61
|
|
|
|
|
|
|
lazy => 1, |
|
62
|
|
|
|
|
|
|
builder => 1, |
|
63
|
|
|
|
|
|
|
); |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub _build__trigger_re { |
|
66
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
|
67
|
0
|
|
|
|
|
|
my @words = @{$self->_trigger_words}; |
|
|
0
|
|
|
|
|
|
|
|
68
|
0
|
|
|
|
|
|
my $text = join '|', map { quotemeta $_ } @words; |
|
|
0
|
|
|
|
|
|
|
|
69
|
0
|
|
|
|
|
|
return qr/\b(?:$text)\b/i; |
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
has output_txt => ( |
|
73
|
|
|
|
|
|
|
is => 'rwp', |
|
74
|
|
|
|
|
|
|
lazy => 1, |
|
75
|
|
|
|
|
|
|
required => 0 |
|
76
|
|
|
|
|
|
|
); |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
has dbh => ( |
|
79
|
|
|
|
|
|
|
is => 'rw', |
|
80
|
|
|
|
|
|
|
lazy => 1, |
|
81
|
|
|
|
|
|
|
required => 0, |
|
82
|
|
|
|
|
|
|
builder => 1 |
|
83
|
|
|
|
|
|
|
); |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
sub _build_dbh { |
|
86
|
0
|
|
|
0
|
|
|
my ( $self ) = @_; |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
# Open output.txt file for searching |
|
89
|
|
|
|
|
|
|
# Handles as a CSV with "\t" separator |
|
90
|
|
|
|
|
|
|
# Provide numbered column names |
|
91
|
0
|
0
|
|
|
|
|
my $dbh = DBI->connect ("dbi:CSV:", undef, undef, { |
|
92
|
|
|
|
|
|
|
f_dir => $self->output_txt->parent, |
|
93
|
|
|
|
|
|
|
f_ext => ".txt/r", |
|
94
|
|
|
|
|
|
|
csv_sep_char => "\t", |
|
95
|
|
|
|
|
|
|
csv_quote_char => undef, |
|
96
|
|
|
|
|
|
|
csv_escape_char => undef, |
|
97
|
|
|
|
|
|
|
csv_allow_whitespace => 1, |
|
98
|
|
|
|
|
|
|
csv_allow_quotes => 1, |
|
99
|
|
|
|
|
|
|
RaiseError => 1, |
|
100
|
|
|
|
|
|
|
PrintError => 0, |
|
101
|
|
|
|
|
|
|
csv_tables => { |
|
102
|
|
|
|
|
|
|
output => { |
|
103
|
|
|
|
|
|
|
file => 'output.txt', |
|
104
|
|
|
|
|
|
|
col_names => [ |
|
105
|
|
|
|
|
|
|
"title", |
|
106
|
|
|
|
|
|
|
"type", |
|
107
|
|
|
|
|
|
|
"redirect", |
|
108
|
|
|
|
|
|
|
"col4", |
|
109
|
|
|
|
|
|
|
"categories", |
|
110
|
|
|
|
|
|
|
"col6", |
|
111
|
|
|
|
|
|
|
"related_topics", |
|
112
|
|
|
|
|
|
|
"col8", |
|
113
|
|
|
|
|
|
|
"external_links", |
|
114
|
|
|
|
|
|
|
"disambiguation", |
|
115
|
|
|
|
|
|
|
"images", |
|
116
|
|
|
|
|
|
|
"abstract", |
|
117
|
|
|
|
|
|
|
"abstract_url", |
|
118
|
|
|
|
|
|
|
], |
|
119
|
|
|
|
|
|
|
}, |
|
120
|
|
|
|
|
|
|
}, |
|
121
|
|
|
|
|
|
|
}) or die $DBI::errstr; |
|
122
|
|
|
|
|
|
|
|
|
123
|
0
|
|
|
|
|
|
return $dbh; |
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# Get a Fathead result from the DB |
|
127
|
|
|
|
|
|
|
# Requery when we get a Redirect |
|
128
|
|
|
|
|
|
|
sub _search_output { |
|
129
|
|
|
|
|
|
|
|
|
130
|
0
|
|
|
0
|
|
|
my ($self, $query) = @_; |
|
131
|
|
|
|
|
|
|
|
|
132
|
0
|
|
|
|
|
|
my $trigger_re = $self->_trigger_re; |
|
133
|
0
|
|
|
|
|
|
$query =~ s/^$trigger_re\s+|\s+$trigger_re$//; |
|
134
|
0
|
|
|
|
|
|
my $result = $self->_db_lookup($query); |
|
135
|
|
|
|
|
|
|
|
|
136
|
0
|
|
0
|
|
|
|
while ($result && $result->{type} eq 'R') { |
|
137
|
0
|
|
|
|
|
|
my $redirect = $result->{redirect}; |
|
138
|
0
|
|
|
|
|
|
$self->app->emit_notice("Following Redirect: '$result->{title}' -> '$redirect'"); |
|
139
|
0
|
|
|
|
|
|
$result = $self->_db_lookup($redirect); |
|
140
|
|
|
|
|
|
|
} |
|
141
|
0
|
|
|
|
|
|
return $result; |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# Attempt to get a result from DB (output.txt) |
|
145
|
|
|
|
|
|
|
# Capture & display any raised errors |
|
146
|
|
|
|
|
|
|
sub _db_lookup { |
|
147
|
0
|
|
|
0
|
|
|
my ($self, $query) = @_; |
|
148
|
|
|
|
|
|
|
|
|
149
|
0
|
|
|
|
|
|
my $result; |
|
150
|
0
|
|
|
|
|
|
$@ = ''; |
|
151
|
|
|
|
|
|
|
|
|
152
|
0
|
|
|
|
|
|
eval { |
|
153
|
|
|
|
|
|
|
# TODO lowercase all titles first |
|
154
|
0
|
|
|
|
|
|
my $sth = $self->dbh->prepare("SELECT * FROM output WHERE lower(title) = ?"); |
|
155
|
0
|
|
|
|
|
|
$sth->execute(lc $query); |
|
156
|
0
|
|
|
|
|
|
while (my $row = $sth->fetchrow_hashref) { |
|
157
|
0
|
|
|
|
|
|
$result = $row; |
|
158
|
|
|
|
|
|
|
} |
|
159
|
0
|
|
|
|
|
|
$sth->finish(); |
|
160
|
|
|
|
|
|
|
}; |
|
161
|
0
|
0
|
|
|
|
|
$self->app->emit_error("SQL database error: $@") if $@; |
|
162
|
0
|
|
|
|
|
|
return $result; |
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub structured_answer_for_query { |
|
166
|
0
|
|
|
0
|
0
|
|
my ($self, $query) = @_; |
|
167
|
0
|
0
|
|
|
|
|
my $result = $self->_search_output($query) or return undef; |
|
168
|
0
|
|
|
|
|
|
return $self->_build_structured_answer($result); |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# Build a Structured Answer hash |
|
172
|
|
|
|
|
|
|
# Properties depend on Fathead result type |
|
173
|
|
|
|
|
|
|
sub _build_structured_answer { |
|
174
|
0
|
|
|
0
|
|
|
my ($self, $data) = @_; |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
# Get IA Metadata via ID lookup |
|
177
|
|
|
|
|
|
|
# Assume selected is an ID |
|
178
|
0
|
|
0
|
|
|
|
my $metadata = DDG::Meta::Data->get_ia(id => $self->selected) // {}; |
|
179
|
0
|
0
|
|
|
|
|
$self->app->emit_error("No Metadata found for ID: ".$self->selected) unless keys %$metadata; |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
# DBD::Csv ignores col_names letter casing |
|
182
|
|
|
|
|
|
|
# So, manually map columns to template properties |
|
183
|
|
|
|
|
|
|
# TODO update info_detail template to use lowercase variable names |
|
184
|
0
|
|
|
|
|
|
my %extra_data; |
|
185
|
|
|
|
|
|
|
|
|
186
|
0
|
|
|
|
|
|
my $out = { |
|
187
|
|
|
|
|
|
|
id => $self->selected, |
|
188
|
|
|
|
|
|
|
signal => "high", |
|
189
|
|
|
|
|
|
|
meta => $metadata, |
|
190
|
|
|
|
|
|
|
}; |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# Article Result |
|
193
|
0
|
0
|
|
|
|
|
if ($data->{type} eq 'A') { |
|
194
|
0
|
|
|
|
|
|
$out->{duckbar_topic} = 'About'; |
|
195
|
0
|
|
|
|
|
|
$out->{model} = 'FatheadArticle'; |
|
196
|
0
|
|
|
|
|
|
$out->{templates} = { detail => 'info_detail' }; |
|
197
|
|
|
|
|
|
|
%extra_data = ( |
|
198
|
|
|
|
|
|
|
Heading => $data->{title}, |
|
199
|
|
|
|
|
|
|
Abstract => $self->_replace_newlines($data->{abstract}), |
|
200
|
|
|
|
|
|
|
AbstractURL => $data->{abstract_url}, |
|
201
|
|
|
|
|
|
|
FirstURL => $metadata->{src_url}, |
|
202
|
0
|
|
|
|
|
|
Image => $self->_get_image($data->{images}), |
|
203
|
|
|
|
|
|
|
); |
|
204
|
|
|
|
|
|
|
} |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
# Disambiguation Result |
|
207
|
0
|
0
|
|
|
|
|
if ($data->{type} eq 'D') { |
|
208
|
0
|
|
|
|
|
|
$out->{duckbar_topic} = 'Meanings'; |
|
209
|
0
|
|
|
|
|
|
$out->{model} = 'FatheadListItem'; |
|
210
|
0
|
|
|
|
|
|
$out->{templates} = { item => 'meanings_item' }; |
|
211
|
|
|
|
|
|
|
%extra_data = ( |
|
212
|
|
|
|
|
|
|
Heading => $data->{title}." (".$metadata->{name}.")", |
|
213
|
0
|
|
|
|
|
|
RelatedTopics => $self->_parse_disambiguations($data->{disambiguation}, $out) |
|
214
|
|
|
|
|
|
|
); |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
# Category Pages Result |
|
218
|
0
|
0
|
|
|
|
|
if ($data->{type} eq 'C') { |
|
219
|
0
|
|
|
|
|
|
$out->{duckbar_topic} = 'List'; |
|
220
|
0
|
|
|
|
|
|
$out->{model} = 'FatheadListItem'; |
|
221
|
0
|
|
|
|
|
|
$out->{templates} = { item => 'categories_item' }; |
|
222
|
|
|
|
|
|
|
} |
|
223
|
|
|
|
|
|
|
|
|
224
|
0
|
|
|
|
|
|
$out->{data} = { %$data, %extra_data }; |
|
225
|
0
|
|
|
|
|
|
return $out; |
|
226
|
|
|
|
|
|
|
} |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
# Emulate internal processing to build JSON |
|
229
|
|
|
|
|
|
|
# matching DDG API result format |
|
230
|
|
|
|
|
|
|
sub _parse_disambiguations { |
|
231
|
0
|
|
|
0
|
|
|
my ($self, $disambiguations, $out) = @_; |
|
232
|
0
|
|
|
|
|
|
my @out; |
|
233
|
0
|
|
|
|
|
|
my @disambiguations = split /\\n/, $disambiguations; |
|
234
|
0
|
|
|
|
|
|
foreach my $disambiguation (@disambiguations){ |
|
235
|
0
|
|
|
|
|
|
my $result = {}; |
|
236
|
0
|
0
|
|
|
|
|
if ($disambiguation =~ m/^\*\[\[(.+)\]\],(.+)$/) { |
|
237
|
|
|
|
|
|
|
|
|
238
|
0
|
|
|
|
|
|
my $title = $1; |
|
239
|
0
|
|
|
|
|
|
my $html = $2; |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
# Parse HTML into plaintext |
|
242
|
0
|
|
|
|
|
|
my $root = HTML::TreeBuilder->new_from_content($html); |
|
243
|
0
|
|
|
|
|
|
$root->elementify(); |
|
244
|
0
|
|
|
|
|
|
my $text = $root->as_trimmed_text; |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# Build URL Path |
|
247
|
0
|
|
|
|
|
|
my $href = "/?q=$title&ia=about"; |
|
248
|
0
|
|
|
|
|
|
my $a = HTML::Element->new('a', href => $href); |
|
249
|
0
|
|
|
|
|
|
$a->push_content($title); |
|
250
|
|
|
|
|
|
|
|
|
251
|
0
|
|
|
|
|
|
$result = { |
|
252
|
|
|
|
|
|
|
Result => $a->as_HTML . "$text", # generates `<a href="$url">$title</a>$text` which gets parsed by template helpers |
|
253
|
|
|
|
|
|
|
FirstURL => $href, |
|
254
|
|
|
|
|
|
|
Text => $text |
|
255
|
|
|
|
|
|
|
}; |
|
256
|
|
|
|
|
|
|
} |
|
257
|
0
|
|
|
|
|
|
push @out, $result; |
|
258
|
|
|
|
|
|
|
} |
|
259
|
0
|
|
|
|
|
|
return \@out; |
|
260
|
|
|
|
|
|
|
} |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# Emulate internal processing to build JSON |
|
263
|
|
|
|
|
|
|
# matching DDG API result format |
|
264
|
|
|
|
|
|
|
sub _get_image { |
|
265
|
0
|
|
|
0
|
|
|
my ($self, $image) = @_; |
|
266
|
0
|
|
|
|
|
|
my $url = ""; |
|
267
|
0
|
0
|
|
|
|
|
if ($image =~ m/^\[\[Image:(.+)\]\]$/) { |
|
268
|
0
|
|
|
|
|
|
$url = $1; |
|
269
|
|
|
|
|
|
|
} |
|
270
|
0
|
|
|
|
|
|
return $url; |
|
271
|
|
|
|
|
|
|
} |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
sub _replace_newlines { |
|
275
|
0
|
|
|
0
|
|
|
my ($self, $abstract) = @_; |
|
276
|
0
|
|
|
|
|
|
$abstract =~ s/\\n/<br>/g; |
|
277
|
0
|
|
|
|
|
|
return $abstract; |
|
278
|
|
|
|
|
|
|
} |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
1; |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
__END__ |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
=pod |
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
=head1 NAME |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
App::DuckPAN::Fathead - Searches a given output.txt file for a query match |
|
290
|
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=head1 VERSION |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
version 1018 |
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
=head1 AUTHOR |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
DuckDuckGo <open@duckduckgo.com>, Zach Thompson <zach@duckduckgo.com>, Zaahir Moolla <moollaza@duckduckgo.com>, Torsten Raudssus <torsten@raudss.us> L<https://raudss.us/> |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
This software is Copyright (c) 2013 by DuckDuckGo, Inc. L<https://duckduckgo.com/>. |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
This is free software, licensed under: |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
The Apache License, Version 2.0, January 2004 |
|
306
|
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=cut |