| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Lingua::JA::Summarize::Extract::Plugin::Parser::NgramSimple; |
|
2
|
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
7
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
43
|
|
|
4
|
1
|
|
|
1
|
|
6
|
use base qw( Lingua::JA::Summarize::Extract::Plugin ); |
|
|
1
|
|
|
|
|
50
|
|
|
|
1
|
|
|
|
|
204
|
|
|
5
|
|
|
|
|
|
|
__PACKAGE__->mk_accessors(qw/ gram /); |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
sub parse { |
|
8
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
|
9
|
0
|
|
0
|
|
|
|
my $gram = $self->gram || 2; |
|
10
|
|
|
|
|
|
|
|
|
11
|
0
|
|
|
|
|
|
my $term_list = {}; |
|
12
|
0
|
|
|
|
|
|
my $text = $self->text; |
|
13
|
1
|
|
|
1
|
|
1093
|
while ($text =~ /([^\p{Common}]+)/g) { |
|
|
1
|
|
|
|
|
10
|
|
|
|
1
|
|
|
|
|
12
|
|
|
|
0
|
|
|
|
|
|
|
|
14
|
0
|
|
|
|
|
|
my $word = $1; |
|
15
|
0
|
|
|
|
|
|
my @part; |
|
16
|
0
|
|
|
|
|
|
for (my $i = 0;$i + $gram <= length $word;$i++) { |
|
17
|
0
|
|
|
|
|
|
push @part, substr $word, $i, $gram; |
|
18
|
|
|
|
|
|
|
} |
|
19
|
0
|
0
|
|
|
|
|
$term_list->{join ' ', @part}++ if @part; |
|
20
|
|
|
|
|
|
|
} |
|
21
|
0
|
|
|
|
|
|
$term_list; |
|
22
|
|
|
|
|
|
|
} |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
1; |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
__END__ |