line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package String::Normal::Type::Title; |
2
|
9
|
|
|
9
|
|
32
|
use strict; |
|
9
|
|
|
|
|
11
|
|
|
9
|
|
|
|
|
182
|
|
3
|
9
|
|
|
9
|
|
23
|
use warnings; |
|
9
|
|
|
|
|
9
|
|
|
9
|
|
|
|
|
143
|
|
4
|
9
|
|
|
9
|
|
27
|
use String::Normal::Type; |
|
9
|
|
|
|
|
7
|
|
|
9
|
|
|
|
|
108
|
|
5
|
9
|
|
|
9
|
|
21
|
use String::Normal::Config; |
|
9
|
|
|
|
|
8
|
|
|
9
|
|
|
|
|
123
|
|
6
|
|
|
|
|
|
|
|
7
|
9
|
|
|
9
|
|
23
|
use Lingua::Stem; |
|
9
|
|
|
|
|
8
|
|
|
9
|
|
|
|
|
2971
|
|
8
|
|
|
|
|
|
|
our $STEM; |
9
|
|
|
|
|
|
|
our $title_stem; |
10
|
|
|
|
|
|
|
our $title_stop; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
sub transform { |
13
|
6
|
|
|
6
|
1
|
7
|
my ($self,$value) = @_; |
14
|
|
|
|
|
|
|
|
15
|
6
|
50
|
|
|
|
22
|
$value =~ s/\([^)]*\)/ /g if $value =~ /^[^(]|[^)]$/; |
16
|
|
|
|
|
|
|
|
17
|
6
|
|
|
|
|
12
|
$value = String::Normal::Type::_scrub_value( $value ); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# tokenize and stem |
20
|
6
|
|
|
|
|
8
|
my @tokens = (); |
21
|
6
|
|
|
|
|
15
|
for my $token (split ' ', $value) { |
22
|
|
|
|
|
|
|
#$token = defined( $title_stem->{$token} ) ? $title_stem->{$token} : $token; |
23
|
24
|
|
|
|
|
27
|
push @tokens, $token; |
24
|
|
|
|
|
|
|
} |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# Remove all middle stop words that are safe to remove, based on the number of |
27
|
|
|
|
|
|
|
# tokens, of course. |
28
|
|
|
|
|
|
|
my @filtered = map { |
29
|
6
|
|
100
|
|
|
9
|
my $count = $title_stop->{middle}{$_} || ''; |
|
24
|
|
|
|
|
56
|
|
30
|
24
|
100
|
66
|
|
|
71
|
(length $count and @tokens >= $count) ? () : $_; |
31
|
|
|
|
|
|
|
} @tokens; |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# stem, but override if Stemmer "blanks out" token |
34
|
6
|
|
|
|
|
10
|
my @copy = @filtered; |
35
|
6
|
|
|
|
|
20
|
$STEM->stem_in_place( @copy ); |
36
|
6
|
|
|
|
|
2976
|
for my $i (0 .. $#copy) { |
37
|
13
|
100
|
|
|
|
32
|
$filtered[$i] = $copy[$i] unless $filtered[$i] =~ /\d/; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
6
|
|
|
|
|
34
|
return join ' ', @filtered; |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
sub new { |
44
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
45
|
1
|
|
|
|
|
4
|
$title_stem = String::Normal::Config::TitleStem::_data( @_ ); |
46
|
1
|
|
|
|
|
4
|
$title_stop = String::Normal::Config::TitleStop::_data( @_ ); |
47
|
1
|
|
|
|
|
7
|
$STEM = Lingua::Stem->new; |
48
|
1
|
|
|
|
|
39
|
$STEM->add_exceptions( $title_stem ); |
49
|
1
|
|
|
|
|
51
|
return bless {@_}, $self; |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
1; |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
__END__ |