File Coverage

blib/lib/Lingua/TFIDF/WordSegmenter/LetterNgram.pm

Criterion	Covered	Total	%
statement	28	28	100.0
branch	7	8	87.5
condition			n/a
subroutine	8	8	100.0
pod	2	3	66.6
total	45	47	95.7

line	stmt	bran	sub	pod	time	code
1						package Lingua::TFIDF::WordSegmenter::LetterNgram;
2
3						# ABSTRACT: Letter N-gram segmenter
4
5	2		2		2730	use strict;
	2				4
	2				81
6	2		2		13	use warnings;
	2				4
	2				66
7	2		2		11	use Carp qw//;
	2				7
	2				36
8	2		2		1748	use Smart::Args;
	2				50683
	2				555
9
10						sub new {
11	2		2	1	1616	args
12						my $class => 'ClassName',
13						my $n => 'Int';
14
15	2	50			271	Carp::croak('Word length must be 1+.') if $n <= 0;
16
17	2				18	bless +{ n => $n } => $class;
18						}
19
20	9		9	0	30	sub n { $_[0]->{n} }
21
22						sub segment {
23	9		9	1	1113	args_pos
24						my $self,
25						my $document => 'Ref \| Str';
26
27	9	100			799	$document = \"$document" unless ref $document;
28
29	9				32	my $length = length $$document;
30	9				14	my $index = -1;
31	9				22	my $n = $self->n;
32						sub {
33	615				625	GET_NEXT_NGRAM:
34						{
35	480		480		1010	++$index;
36	615	100			1240	return if $index + $n > $length;
37
38	606				1338	my $ngram = substr $$document, $index, $n;
39	606	100			1511	redo GET_NEXT_NGRAM if $ngram =~ /\s/;
40	471				1660	return $ngram;
41						}
42	9				55	};
43						}
44
45						1;
46
47						__END__
48
49						=pod
50
51						=encoding UTF-8
52
53						=head1 NAME
54
55						Lingua::TFIDF::WordSegmenter::LetterNgram - Letter N-gram segmenter
56
57						=head1 VERSION
58
59						version 0.01
60
61						=head1 SYNOPSIS
62
63						use Lingua::TFIDF::WordSegmenter::LetterNgram;
64
65						my $segmenter = Lingua::TFIDF::WordSegmenter::LetterNgram->new(n => 2);
66						my $iter = $segmenter->segment('ãƒãƒ³ãƒ‰ãƒ³æ©‹è½ã¡ãŸã€€è½ã¡ãŸã€€è½ã¡ãŸ...');
67						while (defined(my $word = $iter->())) { ... }
68
69						=head1 DESCRIPTION
70
71						This class provides a N-gram word segmenter.
72
73						=head1 METHODS
74
75						=head2 new(n => $n)
76
77						Constructor.
78
79						=head2 segment($document \| \$document)
80
81						Executes word segmentation on given C<$document> and returns an word iterator.
82
83						=head1 AUTHOR
84
85						Koichi SATOH <sekia@cpan.org>
86
87						=head1 COPYRIGHT AND LICENSE
88
89						This software is Copyright (c) 2014 by Koichi SATOH.
90
91						This is free software, licensed under:
92
93						The MIT (X11) License
94
95						=cut