| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
##-*- Mode: CPerl -*- |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
## File: DDC::Format::Kwic.pm |
|
4
|
|
|
|
|
|
|
## Author: Bryan Jurish |
|
5
|
|
|
|
|
|
|
## Description: |
|
6
|
|
|
|
|
|
|
## + DDC Query utilities: output formatting: keywords-in-context |
|
7
|
|
|
|
|
|
|
##====================================================================== |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
package DDC::Format::Kwic; |
|
10
|
26
|
|
|
26
|
|
198
|
use File::Basename; |
|
|
26
|
|
|
|
|
60
|
|
|
|
26
|
|
|
|
|
3264
|
|
|
11
|
26
|
|
|
26
|
|
233
|
use Carp; |
|
|
26
|
|
|
|
|
60
|
|
|
|
26
|
|
|
|
|
1613
|
|
|
12
|
26
|
|
|
26
|
|
174
|
use strict; |
|
|
26
|
|
|
|
|
81
|
|
|
|
26
|
|
|
|
|
1439
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
##====================================================================== |
|
15
|
|
|
|
|
|
|
## Globals |
|
16
|
|
|
|
|
|
|
our @ISA = qw(DDC::Format); |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
BEGIN { |
|
19
|
26
|
|
|
26
|
|
27883
|
*isa = \&UNIVERSAL::isa; |
|
20
|
|
|
|
|
|
|
} |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
##====================================================================== |
|
23
|
|
|
|
|
|
|
## Constructors, etc. |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
## $fmt = $CLASS_OR_OBJ->new(%args) |
|
26
|
|
|
|
|
|
|
## + %args: |
|
27
|
|
|
|
|
|
|
## ( |
|
28
|
|
|
|
|
|
|
## start=>$previous_hit_num, ##-- pre-initial hit number (default=0) |
|
29
|
|
|
|
|
|
|
## highlight=>[$pre,$post], ##-- highlighting substrings |
|
30
|
|
|
|
|
|
|
## width=>$nchars, ##-- context width; default=32 |
|
31
|
|
|
|
|
|
|
## useMatchIds=>$bool, ##-- whether to use match-ids if available; undef (default) if non-trivial match-ids are specified |
|
32
|
|
|
|
|
|
|
## ) |
|
33
|
|
|
|
|
|
|
sub new { |
|
34
|
0
|
|
|
0
|
1
|
|
my $that = shift; |
|
35
|
0
|
|
0
|
|
|
|
return bless { |
|
36
|
|
|
|
|
|
|
highlight=>['__','__'], |
|
37
|
|
|
|
|
|
|
width=>32, |
|
38
|
|
|
|
|
|
|
useMatchIds=>undef, |
|
39
|
|
|
|
|
|
|
@_ |
|
40
|
|
|
|
|
|
|
}, ref($that)||$that; |
|
41
|
|
|
|
|
|
|
} |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
## $fmt = $fmt->reset() |
|
44
|
|
|
|
|
|
|
## + reset counters, etc. |
|
45
|
|
|
|
|
|
|
sub reset { |
|
46
|
0
|
|
|
0
|
1
|
|
$_[0]{start}=0; |
|
47
|
0
|
|
|
|
|
|
return $_[0]->SUPER::reset(); |
|
48
|
|
|
|
|
|
|
} |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
##====================================================================== |
|
51
|
|
|
|
|
|
|
## Helper functions |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
## $len = maxlen(@strings) |
|
54
|
|
|
|
|
|
|
sub maxlen { |
|
55
|
0
|
|
|
0
|
0
|
|
my $l = 0; |
|
56
|
0
|
0
|
|
|
|
|
do { $l=length($_) if (length($_) > $l) } foreach (@_); |
|
|
0
|
|
|
|
|
|
|
|
57
|
0
|
|
|
|
|
|
return $l; |
|
58
|
|
|
|
|
|
|
} |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
##====================================================================== |
|
61
|
|
|
|
|
|
|
## API |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
## $str = $fmt->toString($hitList) |
|
64
|
|
|
|
|
|
|
sub toString { |
|
65
|
0
|
|
|
0
|
1
|
|
my ($fmt,$hits) = @_; |
|
66
|
|
|
|
|
|
|
|
|
67
|
0
|
0
|
0
|
|
|
|
if ($hits->{counts_} && @{$hits->{counts_}}) { |
|
|
0
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
##-- count-query: format as text |
|
69
|
0
|
|
|
|
|
|
my ($i); |
|
70
|
0
|
|
|
|
|
|
my @lens = map {$i=$_; maxlen(map {$_->[$i]} @{$hits->{counts_}})} (0..$#{$hits->{counts_}[0]}); |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
71
|
0
|
|
|
|
|
|
my $fmt = join("\t", map {"%-${_}s"} @lens)."\n"; |
|
|
0
|
|
|
|
|
|
|
|
72
|
0
|
|
|
|
|
|
return join('', map {sprintf($fmt,@$_)} @{$hits->{counts_}}); |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
|
|
75
|
0
|
|
0
|
|
|
|
my $xlen = $fmt->{width} || 2**31; |
|
76
|
0
|
|
|
|
|
|
my $hnum = $hits->{start}; |
|
77
|
0
|
0
|
|
|
|
|
my $useMatchIds = defined($fmt->{useMatchIds}) ? $fmt->{useMatchIds} : (grep {$_>0 && $_!=1} map {$_->{hl_}} map {@{$_->{ctx_}[1]}} @{$hits->{hits_}}); |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
|
|
79
|
0
|
|
|
|
|
|
my (@hits); |
|
80
|
0
|
|
|
|
|
|
foreach my $hit (@{$hits->{hits_}}) { |
|
|
0
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
##-- hit key: number + file basename + page |
|
82
|
0
|
|
|
|
|
|
my $f = basename($hit->{meta_}{file_}); |
|
83
|
0
|
|
|
|
|
|
$f =~ s/\..*$//; |
|
84
|
0
|
0
|
|
|
|
|
my $p = defined($hit->{meta_}{page_}) ? $hit->{meta_}{page_} : 0; |
|
85
|
0
|
|
|
|
|
|
my $pagei = (grep {$hit->{meta_}{indices_}[$_] eq 'page'} (0..$#{$hit->{meta_}{indices_}}))[0]; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
86
|
0
|
0
|
|
|
|
|
my $targetMatchId = $useMatchIds ? (sort {$a<=>$b} grep {$_} map {$_->{hl_}} @{$hit->{ctx_}[1]})[0] : undef; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
##-- hit context |
|
89
|
0
|
|
0
|
|
|
|
my $fkey = $hits->{defaultField} || $hit->{meta_}{indices_}[0] || 'w'; |
|
90
|
0
|
|
|
|
|
|
my (@l,@c,@r); |
|
91
|
0
|
|
|
|
|
|
my $ary = \@l; |
|
92
|
0
|
|
|
|
|
|
my $hl = '__'; |
|
93
|
0
|
|
|
|
|
|
foreach (map {@$_} @{$hit->{ctx_}}) { |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
94
|
0
|
0
|
0
|
|
|
|
if ($ary eq \@l && ref($_) && ($useMatchIds ? $_->{hl_}==$targetMatchId : $_->{hl_})) { |
|
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
95
|
0
|
|
|
|
|
|
$ary=\@c; |
|
96
|
0
|
0
|
0
|
|
|
|
$p=$_->{page} if (!$p && defined($_->{page})); |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
elsif ($ary eq \@c) { |
|
99
|
0
|
|
|
|
|
|
$ary = \@r; |
|
100
|
|
|
|
|
|
|
} |
|
101
|
0
|
0
|
|
|
|
|
$hl = ($ary eq \@c ? '__' : '_'); |
|
102
|
|
|
|
|
|
|
push(@$ary, (ref($_) |
|
103
|
|
|
|
|
|
|
? ($_->{hl_} |
|
104
|
|
|
|
|
|
|
? ($hl.$_->{$fkey}.$hl.($useMatchIds ? "/$_->{hl_}" : '')) |
|
105
|
0
|
0
|
|
|
|
|
: $_->{$fkey}) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
: $_)); |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
|
|
109
|
0
|
|
|
|
|
|
my $ls = join(' ', @l); |
|
110
|
0
|
|
|
|
|
|
my $rs = join(' ', @r); |
|
111
|
0
|
0
|
|
|
|
|
substr($ls, 0, length($ls)-$xlen+3, '...') if (length($ls) > $xlen); |
|
112
|
0
|
0
|
|
|
|
|
substr($rs, $xlen-3, length($rs)-$xlen+3, '...') if (length($rs) > $xlen); |
|
113
|
|
|
|
|
|
|
|
|
114
|
0
|
|
|
|
|
|
push(@hits,[$hnum++, "[$f:$p]", $ls, join(' ',@c), $rs]); |
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
|
|
117
|
0
|
|
|
|
|
|
my $ln = maxlen(map {$_->[0]} @hits); |
|
|
0
|
|
|
|
|
|
|
|
118
|
0
|
|
|
|
|
|
my $lf = maxlen(map {$_->[1]} @hits); |
|
|
0
|
|
|
|
|
|
|
|
119
|
0
|
|
|
|
|
|
my $ll = maxlen(map {$_->[2]} @hits); |
|
|
0
|
|
|
|
|
|
|
|
120
|
0
|
|
|
|
|
|
my $lc = maxlen(map {$_->[3]} @hits); |
|
|
0
|
|
|
|
|
|
|
|
121
|
0
|
|
|
|
|
|
my $lr = maxlen(map {$_->[4]} @hits); |
|
|
0
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
return ( |
|
123
|
|
|
|
|
|
|
"# Hit(s) $hits[0][0]-$hits[$#hits][0] of $hits->{nhits_}" |
|
124
|
|
|
|
|
|
|
.($hits->{hint_} ? " {hint_}>" : '') |
|
125
|
|
|
|
|
|
|
."\n" |
|
126
|
0
|
0
|
|
|
|
|
.join('', map {sprintf("%${ln}d: %-${lf}s %${ll}s %-${lc}s %-${lr}s\n", @$_)} @hits) |
|
|
0
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
); |
|
128
|
|
|
|
|
|
|
} |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
1; ##-- be happy |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
__END__ |