| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Text::Context::Para; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
Text::Context::Para - A paragraph in context |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
This is a paragraph being used by Text::Context. |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=cut |
|
12
|
|
|
|
|
|
|
|
|
13
|
4
|
|
|
4
|
|
26
|
use strict; |
|
|
4
|
|
|
|
|
9
|
|
|
|
4
|
|
|
|
|
160
|
|
|
14
|
4
|
|
|
4
|
|
22
|
use warnings; |
|
|
4
|
|
|
|
|
9
|
|
|
|
4
|
|
|
|
|
126
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
4
|
|
|
4
|
|
5454
|
use HTML::Entities; |
|
|
4
|
|
|
|
|
54326
|
|
|
|
4
|
|
|
|
|
513
|
|
|
17
|
4
|
|
|
4
|
|
22281
|
use Text::Context::EitherSide qw(get_context); |
|
|
4
|
|
|
|
|
39831
|
|
|
|
4
|
|
|
|
|
338
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
4
|
|
|
4
|
|
35
|
use constant DEFAULT_START_TAG => ''; |
|
|
4
|
|
|
|
|
10
|
|
|
|
4
|
|
|
|
|
222
|
|
|
20
|
4
|
|
|
4
|
|
21
|
use constant DEFAULT_END_TAG => ""; |
|
|
4
|
|
|
|
|
10
|
|
|
|
4
|
|
|
|
|
2448
|
|
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head2 new |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
my $para = Text::Context::Para->new($content, $order); |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=cut |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub new { |
|
31
|
83
|
|
|
83
|
1
|
311
|
my ($class, $content, $order) = @_; |
|
32
|
83
|
|
|
|
|
1144
|
return bless { |
|
33
|
|
|
|
|
|
|
content => $content, |
|
34
|
|
|
|
|
|
|
scoretable => [], |
|
35
|
|
|
|
|
|
|
marked_words => [], |
|
36
|
|
|
|
|
|
|
final_score => 0, |
|
37
|
|
|
|
|
|
|
order => $order |
|
38
|
|
|
|
|
|
|
}, $class; |
|
39
|
|
|
|
|
|
|
} |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head1 METHODS |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=head2 best_keywords / slim |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head2 as_text / marked_up |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
You can override DEFAULT_START_TAG and DEFAULT_END_TAG. These default to |
|
48
|
|
|
|
|
|
|
and |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=cut |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub best_keywords { |
|
54
|
20
|
|
|
20
|
1
|
621
|
my $self = shift; |
|
55
|
20
|
100
|
|
|
|
25
|
return @{ $self->{scoretable}->[-1] || [] }; |
|
|
20
|
|
|
|
|
147
|
|
|
56
|
|
|
|
|
|
|
} |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub slim { |
|
59
|
8
|
|
|
8
|
1
|
13
|
my ($self, $max_weight) = @_; |
|
60
|
8
|
|
|
|
|
45
|
$self->{content} =~ s/^\s+//; |
|
61
|
8
|
|
|
|
|
188
|
$self->{content} =~ s/\s+$//; |
|
62
|
8
|
100
|
|
|
|
49
|
return $self if length $self->{content} <= $max_weight; |
|
63
|
4
|
|
|
|
|
228
|
my @words = split /\s+/, $self->{content}; |
|
64
|
4
|
|
|
|
|
29
|
for (reverse(0 .. @words / 2)) { |
|
65
|
70
|
|
|
|
|
1109
|
my $trial = get_context($_, $self->{content}, @{ $self->{marked_words} }); |
|
|
70
|
|
|
|
|
272
|
|
|
66
|
70
|
100
|
|
|
|
93340
|
if (length $trial < $max_weight) { |
|
67
|
4
|
|
|
|
|
14
|
$self->{content} = $trial; |
|
68
|
4
|
|
|
|
|
75
|
return $self; |
|
69
|
|
|
|
|
|
|
} |
|
70
|
|
|
|
|
|
|
} |
|
71
|
0
|
|
|
|
|
0
|
$self->{content} = join " ... ", @{ $self->{marked_words} }; |
|
|
0
|
|
|
|
|
0
|
|
|
72
|
0
|
|
|
|
|
0
|
return $self; # Should not happen. |
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
|
|
75
|
10
|
|
|
10
|
1
|
1504
|
sub as_text { return $_[0]->{content} } |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub marked_up { |
|
78
|
4
|
|
|
4
|
1
|
10
|
my $self = shift; |
|
79
|
4
|
|
100
|
|
|
286
|
my $start_tag = shift || DEFAULT_START_TAG; |
|
80
|
4
|
|
100
|
|
|
27
|
my $end_tag = shift || DEFAULT_END_TAG; |
|
81
|
4
|
|
|
|
|
15
|
my $content = $self->as_text; |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# Need to escape entities in here. |
|
84
|
4
|
|
|
|
|
19
|
my $re = join "|", map { qr/\Q$_\E/i } @{ $self->{marked_words} }; |
|
|
7
|
|
|
|
|
109
|
|
|
|
4
|
|
|
|
|
12
|
|
|
85
|
4
|
|
|
|
|
193
|
my $re2 = qr/\b($re)\b/i; |
|
86
|
4
|
|
|
|
|
57
|
my @fragments = split /$re2/i, $content; |
|
87
|
4
|
|
|
|
|
11
|
my $output; |
|
88
|
4
|
|
|
|
|
13
|
for my $orig_frag (@fragments) { |
|
89
|
17
|
|
|
|
|
57
|
my $frag = encode_entities($orig_frag); |
|
90
|
17
|
100
|
|
|
|
1731
|
if ($orig_frag =~ /$re2/i) { |
|
91
|
7
|
|
|
|
|
18
|
$frag = $start_tag . $frag . $end_tag; |
|
92
|
|
|
|
|
|
|
} |
|
93
|
17
|
|
|
|
|
45
|
$output .= $frag; |
|
94
|
|
|
|
|
|
|
} |
|
95
|
4
|
|
|
|
|
55
|
return $output; |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
1; |