| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package My::Tagger; |
|
2
|
|
|
|
|
|
|
@My::Tagger::ISA=qw(Lingua::EN::Tagger); |
|
3
|
|
|
|
|
|
|
my %known_stems; |
|
4
|
|
|
|
|
|
|
sub stem { |
|
5
|
103
|
|
|
103
|
|
159184
|
my ( $self, $word ) = @_; |
|
6
|
103
|
50
|
|
|
|
365
|
return $word unless $self->{'stem'}; |
|
7
|
0
|
0
|
|
|
|
0
|
return $known_stems{ $word } if exists $known_stems{$word}; |
|
8
|
0
|
|
|
|
|
0
|
my $stemref = Lingua::Stem::En::stem( -words => [ $word ] ); |
|
9
|
|
|
|
|
|
|
|
|
10
|
0
|
0
|
|
|
|
0
|
$known_stems{ $word } = $stemref->[0] if exists $stemref->[0]; |
|
11
|
|
|
|
|
|
|
} |
|
12
|
|
|
|
|
|
|
|
|
13
|
172
|
|
|
172
|
|
246
|
sub stems { reverse %known_stems; } |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# To test: |
|
16
|
|
|
|
|
|
|
package Lingua::EN::Keywords; |
|
17
|
1
|
|
|
1
|
|
1992
|
use Lingua::EN::Tagger; |
|
|
1
|
|
|
|
|
118464
|
|
|
|
1
|
|
|
|
|
56
|
|
|
18
|
|
|
|
|
|
|
require 5.005_62; |
|
19
|
1
|
|
|
1
|
|
13
|
use strict; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
41
|
|
|
20
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
|
1
|
|
|
|
|
7
|
|
|
|
1
|
|
|
|
|
295
|
|
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
my $t = My::Tagger->new(longest_noun_phrase => 5,weight_noun_phrases=>0); |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
require Exporter; |
|
25
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
|
26
|
|
|
|
|
|
|
our @EXPORT = qw( keywords); |
|
27
|
|
|
|
|
|
|
our $VERSION = '2.0'; |
|
28
|
|
|
|
|
|
|
sub keywords { |
|
29
|
1
|
|
|
1
|
0
|
84
|
my %wl = $t->get_words(shift); |
|
30
|
1
|
|
|
|
|
386
|
my %newwl; |
|
31
|
1
|
|
|
|
|
30
|
$newwl{unstem($_)} += $wl{$_} for keys %wl; |
|
32
|
1
|
|
|
|
|
61
|
return (sort { $newwl{$b} <=> $newwl{$a} } keys %newwl)[0..5]; |
|
|
698
|
|
|
|
|
924
|
|
|
33
|
|
|
|
|
|
|
} |
|
34
|
|
|
|
|
|
|
sub unstem { |
|
35
|
172
|
|
|
172
|
0
|
279
|
my %cache = $t->stems; |
|
36
|
172
|
|
|
|
|
181
|
my $word = shift; |
|
37
|
172
|
|
33
|
|
|
911
|
return $cache{$word} || $word; |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
#undef $/; |
|
40
|
|
|
|
|
|
|
#my $in = ; |
|
41
|
|
|
|
|
|
|
#print ((join " ", ((),keywords($in))),"\n"); |
|
42
|
|
|
|
|
|
|
1; |
|
43
|
|
|
|
|
|
|
__END__ |