File Coverage

blib/lib/Hailo/Role/Tokenizer.pm
Criterion Covered Total %
statement 16 16 100.0
branch n/a
condition n/a
subroutine 5 5 100.0
pod 0 1 0.0
total 21 22 95.4


line stmt bran cond sub pod time code
1             package Hailo::Role::Tokenizer;
2             our $AUTHORITY = 'cpan:AVAR';
3             $Hailo::Role::Tokenizer::VERSION = '0.75';
4 30     30   20877 use v5.10.0;
  30         120  
5 30     30   221 use Moose::Role;
  30         69  
  30         263  
6 30     30   109015 use MooseX::Types::Moose ':all';
  30         68  
  30         288  
7 30     30   245541 use namespace::clean -except => 'meta';
  30         76  
  30         287  
8              
9             has spacing => (
10             isa => HashRef[Int],
11             is => 'rw',
12             default => sub { {
13             normal => 0,
14             prefix => 1,
15             postfix => 2,
16             infix => 3,
17             } },
18             );
19              
20             sub BUILD {
21 262     262 0 570 my ($self) = @_;
22              
23             # This performance hack is here because calling
24             # $self->spacing->{...} was significant part Tokenizer execution
25             # time (~20s / ~1200s) since we're doing one method call and a
26             # hash dereference
27              
28 262         7401 my $spacing = $self->spacing;
29 262         1115 while (my ($k, $v) = each %$spacing) {
30 1048         3339 $self->{"_spacing_$k"} = $v;
31             }
32              
33 262         6619 return;
34             }
35              
36             requires 'make_tokens';
37             requires 'make_output';
38              
39             1;
40              
41             =encoding utf8
42              
43             =head1 NAME
44              
45             Hailo::Role::Tokenizer - A role representing a L<Hailo|Hailo> tokenizer
46              
47             =head1 METHODS
48              
49             =head2 C<new>
50              
51             This is the constructor. It takes no arguments.
52              
53             =head2 C<make_tokens>
54              
55             Takes a line of input and returns an array reference of tokens. A token is
56             an array reference containing two elements: a I<spacing attribute> and the
57             I<token text>. The spacing attribute is an integer which will be stored along
58             with the token text in the database. The following values are currently being
59             used:
60              
61             =over
62              
63             =item C<0> - normal token
64              
65             =item C<1> - prefix token (no whitespace follows it)
66              
67             =item C<2> - postfix token (no whitespace precedes it)
68              
69             =item C<3> - infix token (no whitespace follows or precedes it)
70              
71             =back
72              
73             =head2 C<make_output>
74              
75             Takes an array reference of tokens and returns a line of output. A token is
76             an array reference as described in L<C<make_tokens>|/make_tokens>. The tokens
77             will be joined together into a sentence according to the whitespace
78             attributes associated with the tokens, as well as any formatting provided by
79             the tokenizer implementation.
80              
81             =head1 AUTHORS
82              
83             Hinrik E<Ouml>rn SigurE<eth>sson, hinrik.sig@gmail.com
84              
85             E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avar@cpan.org>
86              
87             =head1 LICENSE AND COPYRIGHT
88              
89             Copyright 2010 Hinrik E<Ouml>rn SigurE<eth>sson and
90             E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avar@cpan.org>
91              
92             This program is free software, you can redistribute it and/or modify
93             it under the same terms as Perl itself.
94              
95             =cut