File Coverage

blib/lib/Treex/Tool/Parser/MSTperl/ModelUnlabelled.pm
Criterion Covered Total %
statement 1 3 33.3
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 2 4 50.0


line stmt bran cond sub pod time code
1             package Treex::Tool::Parser::MSTperl::ModelUnlabelled;
2             {
3             $Treex::Tool::Parser::MSTperl::ModelUnlabelled::VERSION = '0.11949';
4             }
5              
6 1     1   2606 use Moose;
  0            
  0            
7              
8             extends 'Treex::Tool::Parser::MSTperl::ModelBase';
9              
10             # TODO: features indexed? (i.e. weights would be an ArrayRef etc.)
11             # It would help to push down the size of edge_features_cache
12             # (no speedup or slowdown is expected).
13             has 'weights' => (
14             is => 'rw',
15             isa => 'HashRef',
16             default => sub { {} },
17             );
18              
19             sub BUILD {
20             my ($self) = @_;
21              
22             $self->featuresControl( $self->config->unlabelledFeaturesControl );
23              
24             return;
25             }
26              
27             # STORING AND LOADING
28              
29             sub get_data_to_store {
30             my ($self) = @_;
31              
32             return $self->weights;
33             }
34              
35             sub get_data_to_store_tsv {
36             my ($self) = @_;
37              
38             my @result;
39             foreach my $feature ( keys %{ $self->weights } ) {
40             if ( $feature =~ /^([0-9]+):(.*)$/ ) {
41             my $index = $1;
42             my $value = $2;
43             my $code = $self->featuresControl->feature_codes->[$index];
44             my $feature_str = "$code:$value";
45             push @result, $feature_str . "\t" . $self->weights->{$feature};
46             } else {
47             print STDERR "Feature $feature is not in correct format!\n";
48             }
49             }
50              
51             return [@result];
52             }
53              
54             sub load_data {
55              
56             my ( $self, $data ) = @_;
57              
58             $self->weights($data);
59              
60             if ( scalar( keys %{ $self->weights } ) ) {
61             return 1;
62             } else {
63             return 0;
64             }
65             }
66              
67             sub load_data_tsv {
68              
69             my ( $self, $data ) = @_;
70              
71             my %weights;
72              
73             #precompute feature code to feature index translation table
74             my %code2index;
75             my $feature_num = $self->featuresControl->feature_count;
76             for (
77             my $feature_index = 0;
78             $feature_index < $feature_num;
79             $feature_index++
80             )
81             {
82             my $code = $self->featuresControl->feature_codes->[$feature_index];
83             $code2index{$code} = $feature_index;
84             }
85              
86             foreach my $line (@$data) {
87             my ( $feature, $weight ) = split /\t/, $line;
88             if ( $feature =~ /^([^:]+):(.*)$/ ) {
89             my $code = $1;
90             my $value = $2;
91             my $index = $code2index{$code};
92             my $feature_indexed = "$index:$value";
93             $weights{$feature_indexed} = $weight;
94             } else {
95             print STDERR "Feature $feature is not in correct format!\n";
96             }
97             }
98              
99             $self->weights( \%weights );
100              
101             if ( scalar( keys %{ $self->weights } ) ) {
102             return 1;
103             } else {
104             return 0;
105             }
106             }
107              
108             # FEATURE WEIGHTS
109              
110             sub score_edge {
111              
112             # (Treex::Tool::Parser::MSTperl::Edge $edge)
113             my ( $self, $edge ) = @_;
114              
115             my $features_rf = $self->featuresControl->get_all_features($edge);
116             return $self->score_features($features_rf);
117             }
118              
119             sub score_sentence {
120              
121             # (Treex::Tool::Parser::MSTperl::Sentence $sentence)
122             my ( $self, $sentence ) = @_;
123              
124             my $score = $self->score_features( $sentence->features );
125              
126             return $score;
127             }
128              
129             sub score_features {
130              
131             # (ArrayRef[Str] $features)
132             my ( $self, $features ) = @_;
133              
134             my $score = 0;
135             foreach my $feature ( @{$features} ) {
136             $score += $self->get_feature_weight($feature);
137             }
138              
139             return $score;
140             }
141              
142             sub get_feature_weight {
143              
144             # (Str $feature)
145             my ( $self, $feature ) = @_;
146              
147             my $weight = $self->weights->{$feature};
148             if ($weight) {
149             return $weight;
150             } else {
151             return 0;
152             }
153             }
154              
155             sub feature_is_unknown {
156              
157             # (Str $feature)
158             my ( $self, $feature ) = @_;
159              
160             my $weight = $self->weights->{$feature};
161             if ($weight) {
162             return 0;
163             } else {
164             return 1;
165             }
166             }
167              
168             sub set_feature_weight {
169              
170             # (Str $feature, Num $weight)
171             my ( $self, $feature, $weight ) = @_;
172              
173             $self->weights->{$feature} = $weight;
174              
175             return;
176             }
177              
178             sub update_feature_weight {
179              
180             # (Str $feature, Num $update)
181             my ( $self, $feature, $update ) = @_;
182              
183             $self->weights->{$feature} += $update;
184              
185             return;
186             }
187              
188             # returns number of features in the model
189             sub get_feature_count {
190             my ($self) = @_;
191              
192             return scalar( keys %{ $self->weights } );
193             }
194              
195             1;
196              
197             __END__
198              
199             =pod
200              
201             =for Pod::Coverage BUILD
202              
203             =encoding utf-8
204              
205             =head1 NAME
206              
207             Treex::Tool::Parser::MSTperl::ModelUnlabelled
208              
209             =head1 VERSION
210              
211             version 0.11949
212              
213             =head1 DESCRIPTION
214              
215             This is an in-memory represenation of a parsing model,
216             extended from L<Treex::Tool::Parser::MSTperl::ModelBase>.
217              
218             The model is represented by features and their weights.
219              
220             =head1 FIELDS
221              
222             =head2 Feature weights
223              
224             =over 4
225              
226             =item weights
227              
228             A hash reference containing weights of all features. This is the actual model.
229              
230             =back
231              
232             =head1 METHODS
233              
234             =head2 Access to feature weights
235              
236             =over 4
237              
238             =item my $edge_score = $model->score_edge($edge);
239              
240             Counts the score of an edge by summing up weights of all of its features.
241              
242             =item my $sentence_score = $model->score_sentence($sentence)
243              
244             Returns score of the sentence (by calling
245             C<score_features> on the sentence features).
246              
247             =item my $score = $model->score_features(['0:být|VB', '1:pes|N1', ...]);
248              
249             Counts the score of an edge or sentence by summing up weights of all of its
250             features, which are passed as an array reference.
251              
252             =item my $feature_weight = $model->get_feature_weight('1:pes|N1');
253              
254             Returns the weight of a given feature,
255             or C<0> if the feature is not contained in the model.
256              
257             =item $model->set_feature_weight('1:pes|N1', 0.0021);
258              
259             Sets a new weight for a given feature.
260              
261             =item $model->update_feature_weight('1:pes|N1', 0.0042);
262              
263             Adds the update value to current feature weight - eg. if the weight of the
264             feature C<'1:pes|N1'> is currently C<0.0021>, it will be C<0.0063> after the
265             call.
266             The update can also be negative - then the weight of the feature decreases.
267              
268             =back
269              
270             =head1 AUTHORS
271              
272             Rudolf Rosa <rosa@ufal.mff.cuni.cz>
273              
274             =head1 COPYRIGHT AND LICENSE
275              
276             Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles
277             University in Prague
278              
279             This module is free software; you can redistribute it and/or modify it under
280             the same terms as Perl itself.