File Coverage

blib/lib/Treex/Tool/Parser/MSTperl/ModelLabelling.pm
Criterion Covered Total %
statement 1 3 33.3
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 2 4 50.0


line stmt bran cond sub pod time code
1             package Treex::Tool::Parser::MSTperl::ModelLabelling;
2             {
3             $Treex::Tool::Parser::MSTperl::ModelLabelling::VERSION = '0.11949';
4             }
5              
6 1     1   4198 use Moose;
  0            
  0            
7             use Carp;
8              
9             extends 'Treex::Tool::Parser::MSTperl::ModelBase';
10              
11             # basic MLE from data
12             # unigrams->{label} = prob
13             # to be used for smoothing and/or backoff
14             # (can be used both for emissions and transitions)
15             # It also contains the SEQUENCE_BOUNDARY_LABEL prob
16             # (the SEQUENCE_BOUNDARY_LABEL is counted once for each sequence)
17             # which might be unappropriate in some cases (eg. for emission probs)
18             has 'unigrams' => (
19             is => 'rw',
20             isa => 'HashRef',
21             default => sub { {} },
22             );
23              
24             # transition scores for Viterbi with the structure (if MIRA-computed):
25             # transitions->{feature}->{label_prev}->{label_this} = score
26             # or probabilties (if obtained by MLE):
27             # transitions->{label_prev}->{label_this} = prob
28             # (if MLE is used for transitions, during the precomputing phase
29             # counts are temporarily stored instead of probs
30             # and they are converted to probs on calling prepare_for_mira() );
31             has 'transitions' => (
32             is => 'rw',
33             isa => 'HashRef',
34             default => sub { {} },
35             );
36              
37             # smoothing parameters of transition probabilities
38             # (to be computed by EM algorithm)
39             # PROB(label|prev_label) =
40             # smooth_bigrams * transitions->{prev_label}->{label} +
41             # smooth_unigrams * unigrams->{label} +
42             # smooth_uniform
43              
44             has 'smooth_bigrams' => (
45             is => 'rw',
46             isa => 'Num',
47             default => 0.6,
48             );
49              
50             has 'smooth_unigrams' => (
51             is => 'rw',
52             isa => 'Num',
53             default => 0.3,
54             );
55              
56             has 'smooth_uniform' => (
57             is => 'rw',
58             isa => 'Num',
59             default => 0.1,
60             );
61              
62             # = 1 / ( keys %{ $self->unigrams } )
63             # set in compute_smoothing_params
64             has 'uniform_prob' => (
65             is => 'rw',
66             isa => 'Num',
67             default => 0.02,
68             );
69              
70             # emission scores for Viterbi with the structure
71             # emissions->{feature}->{label} = score
72             has 'emissions' => (
73             is => 'rw',
74             isa => 'HashRef',
75             default => sub { {} },
76             );
77              
78             # just an array ref with the sentences that represent the heldout data
79             # to be able to run the EM algorithm in prepare_for_mira()
80             has 'EM_heldout_data' => (
81             is => 'rw',
82             isa => 'ArrayRef[Treex::Tool::Parser::MSTperl::Sentence]',
83             default => sub { [] },
84             );
85              
86             sub BUILD {
87             my ($self) = @_;
88              
89             $self->featuresControl( $self->config->labelledFeaturesControl );
90              
91             return;
92             }
93              
94             # STORING AND LOADING
95              
96             sub get_data_to_store {
97             my ($self) = @_;
98              
99             return {
100             'unigrams' => $self->unigrams,
101             'transitions' => $self->transitions,
102             'emissions' => $self->emissions,
103             'smooth_uniform' => $self->smooth_uniform,
104             'smooth_unigrams' => $self->smooth_unigrams,
105             'smooth_bigrams' => $self->smooth_bigrams,
106             'uniform_prob' => $self->uniform_prob,
107             };
108             }
109              
110             sub load_data {
111              
112             my ( $self, $data ) = @_;
113              
114             $self->unigrams( $data->{'unigrams'} );
115             $self->transitions( $data->{'transitions'} );
116             $self->emissions( $data->{'emissions'} );
117              
118             $self->smooth_uniform( $data->{'smooth_uniform'} );
119             $self->smooth_unigrams( $data->{'smooth_unigrams'} );
120             $self->smooth_bigrams( $data->{'smooth_bigrams'} );
121             $self->uniform_prob( $data->{'uniform_prob'} );
122              
123             my $unigrams_ok = scalar( keys %{ $self->unigrams } );
124             my $transitions_ok = scalar( keys %{ $self->transitions } );
125             my $emissions_ok = scalar( keys %{ $self->emissions } );
126              
127             my $smooth_sum = $self->smooth_uniform + $self->smooth_unigrams
128             + $self->smooth_bigrams;
129              
130             my $smooth_ok = (
131              
132             # should be 1 but might be a little shifted
133             $smooth_sum > 0.999
134             && $smooth_sum < 1.001
135              
136             # must be between 0 and 1
137             && $self->uniform_prob > 0
138             && $self->uniform_prob < 1
139             );
140              
141             my $ALGORITHM = $self->config->labeller_algorithm;
142              
143             if ($ALGORITHM == 0
144             || $ALGORITHM == 1
145             || $ALGORITHM == 2
146             || $ALGORITHM == 3
147             || $ALGORITHM == 4
148             || $ALGORITHM == 8
149             || $ALGORITHM == 9
150             || $ALGORITHM == 10
151             || $ALGORITHM == 11
152             || $ALGORITHM == 14
153             )
154             {
155              
156             # these algorithms do not use lambda smoothing
157             # (smoothing is kind of part of the learning)
158             $smooth_ok = 1;
159             }
160              
161             if ( $ALGORITHM >= 20 ) {
162              
163             # these algorithms do not use separate transitions
164             # (transitions are included in emissions)
165             $transitions_ok = 1;
166             }
167              
168             if ( $unigrams_ok && $transitions_ok && $emissions_ok && $smooth_ok ) {
169             return 1;
170             } else {
171             return 0;
172             }
173             }
174              
175             # TRANSITION AND EMISSION COUNTS AND PROBABILITIES
176             # (more or less standard MLE)
177              
178             sub add_unigram {
179             my ( $self, $label ) = @_;
180              
181             if ( $self->config->DEBUG >= 2 ) {
182             print "add_unigram($label)\n";
183             }
184              
185             # increment number of unigrams
186             $self->unigrams->{$label} += 1;
187              
188             return;
189             }
190              
191             sub add_transition {
192              
193             # Str, Str, Maybe[Str]
194             my ( $self, $label_this, $label_prev, $feature ) = @_;
195              
196             if ( defined $feature ) {
197             if ( $self->config->DEBUG >= 2 ) {
198             print "add_transition($label_this, $label_prev, $feature)\n";
199             }
200              
201             # increment number of bigrams
202             $self->transitions->{$feature}->{$label_prev}->{$label_this} += 1;
203             } else {
204             if ( $self->config->DEBUG >= 2 ) {
205             print "add_transition($label_this, $label_prev)\n";
206             }
207              
208             # increment number of bigrams
209             $self->transitions->{$label_prev}->{$label_this} += 1;
210             }
211              
212             return;
213             }
214              
215             sub add_emission {
216             my ( $self, $feature, $label ) = @_;
217              
218             if ( $self->config->DEBUG >= 3 ) {
219             print "add_emission($feature, $label)\n";
220             }
221              
222             $self->emissions->{$feature}->{$label} += 1;
223              
224             return;
225             }
226              
227             # called after preprocessing training data, before entering the MIRA phase
228             sub prepare_for_mira {
229              
230             my ( $self, $trainer ) = @_;
231              
232             # $trainer used only in algoprithm no. 9 for emissions initialization
233              
234             my $ALGORITHM = $self->config->labeller_algorithm;
235              
236             if ( $ALGORITHM == 9 ) {
237              
238             # no need to recompute to probabilities (counts are OK)
239             # but have to update emissions_summed
240             # and transitions_summed appropriately
241              
242             my $sumUpdateWeight = $trainer->number_of_inner_iterations;
243              
244             # emissions->{feature}->{label}
245             foreach my $feature ( keys %{ $self->emissions } ) {
246             foreach my $label ( keys %{ $self->emissions->{$feature} } ) {
247             $trainer->emissions_summed->{$feature}->{$label}
248             = $sumUpdateWeight * $self->emissions->{$feature}->{$label};
249             }
250             }
251              
252             # transitions->{feature}->{label_prev}->{label_this}
253             foreach my $feature ( keys %{ $self->transitions } ) {
254             foreach my $label_prev (
255             keys %{ $self->transitions->{$feature} }
256             )
257             {
258             foreach my $label_this (
259             keys %{ $self->transitions->{$feature}->{$label_prev} }
260             )
261             {
262             $trainer->transitions_summed
263             ->{$feature}->{$label_prev}->{$label_this}
264             = $sumUpdateWeight * $self->transitions
265             ->{$feature}->{$label_prev}->{$label_this};
266             }
267             }
268             }
269              
270             } elsif ( $ALGORITHM == 1 || $ALGORITHM == 8 || $ALGORITHM >= 20 ) {
271              
272             # no recomputing taking place
273              
274             } elsif (
275             $ALGORITHM == 0
276             || $ALGORITHM == 2
277             || $ALGORITHM == 3
278             || $ALGORITHM == 4
279             || $ALGORITHM == 5
280             || $ALGORITHM == 10
281             || $ALGORITHM == 11
282             || $ALGORITHM == 12
283             || $ALGORITHM == 13
284             || $ALGORITHM == 14
285             || $ALGORITHM == 15
286             || $ALGORITHM == 16
287             || $ALGORITHM == 17
288             || $ALGORITHM == 18
289             || $ALGORITHM == 19
290             )
291             {
292              
293             # compute unigram probs
294             $self->compute_probs_from_counts( $self->unigrams );
295              
296             # compute transition probs
297             foreach my $label ( keys %{ $self->transitions } ) {
298             $self->compute_probs_from_counts( $self->transitions->{$label} );
299             }
300              
301             if ($ALGORITHM == 4
302             || $ALGORITHM == 5
303             )
304             {
305              
306             # compute emission probs (MLE)
307             foreach my $feature ( keys %{ $self->emissions } ) {
308             $self->compute_probs_from_counts(
309             $self->emissions->{$feature}
310             );
311             }
312             } # end if $ALGORITHM == 4|5
313              
314             if ($ALGORITHM == 5
315             || $ALGORITHM == 12
316             || $ALGORITHM == 13
317             || $ALGORITHM == 15
318             || $ALGORITHM == 16
319             || $ALGORITHM == 17
320             || $ALGORITHM == 18
321             || $ALGORITHM == 19
322             )
323             {
324              
325             # run the EM algorithm to compute
326             # transtition probs smoothing params
327             $self->compute_smoothing_params();
328             } # end if $ALGORITHM == 5|12|12|>=16
329              
330             } else { # $ALGORITHM not in 0~9
331             croak "ModelLabelling->prepare_for_mira not implemented"
332             . " for algorithm no. $ALGORITHM!";
333             }
334              
335             return;
336             } # end prepare_for_mira
337              
338             # basic MLE
339             sub compute_probs_from_counts {
340             my ( $self, $hashref ) = @_;
341              
342             my $sum = 0;
343             foreach my $key ( keys %{$hashref} ) {
344             $sum += $hashref->{$key};
345             }
346             foreach my $key ( keys %{$hashref} ) {
347             $hashref->{$key} = $hashref->{$key} / $sum;
348             }
349              
350             return;
351             }
352              
353             # EM algorithm to estimate linear interpolation smoothing parameters
354             # for smoothing of transition probabilities
355             sub compute_smoothing_params {
356             my ($self) = @_;
357              
358             # only progress and/or debug info
359             if ( $self->config->DEBUG >= 1 ) {
360             print "Running EM algorithm to estimate lambdas...\n";
361             }
362              
363             # uniform probability is 1 / number of different labels
364             $self->uniform_prob( 1 / ( keys %{ $self->unigrams } ) );
365              
366             my $change = 1;
367             while ( $change > $self->config->EM_EPSILON ) {
368              
369             #compute "expected counts"
370             my $expectedCounts = $self->count_expected_counts_all();
371             my $expectedCountsSum = $expectedCounts->[0] + $expectedCounts->[1]
372             + $expectedCounts->[2];
373              
374             #compute new lambdas
375             my @new_lambdas = map { $_ / $expectedCountsSum } @$expectedCounts;
376              
377             #compute the change (sum of changes of lambdas)
378             $change = abs( $self->smooth_uniform - $new_lambdas[0] )
379             + abs( $self->smooth_unigrams - $new_lambdas[1] )
380             + abs( $self->smooth_bigrams - $new_lambdas[2] );
381              
382             # set new lambdas
383             $self->smooth_uniform( $new_lambdas[0] );
384             $self->smooth_unigrams( $new_lambdas[1] );
385             $self->smooth_bigrams( $new_lambdas[2] );
386              
387             # only progress and/or debug info
388             if ( $self->config->DEBUG >= 2 ) {
389             print "Last change: $change\n";
390             }
391             }
392              
393             # only progress and/or debug info
394             if ( $self->config->DEBUG >= 2 ) {
395             print "Final lambdas:\n"
396             . "uniform: " . $self->smooth_uniform
397             . "unigram: " . $self->smooth_unigrams
398             . "bigram: " . $self->smooth_bigrams;
399             }
400             if ( $self->config->DEBUG >= 1 ) {
401             print "Done.\n";
402             }
403              
404             return;
405             }
406              
407             #count "expected counts" of lambdas
408             sub count_expected_counts_all {
409             my ($self) = @_;
410              
411             my $expectedCounts = [ 0, 0, 0 ];
412             my $sentence_counts;
413              
414             foreach my $sentence ( @{ $self->EM_heldout_data } ) {
415             $sentence_counts = $self->count_expected_counts_tree(
416             $sentence->nodes_with_root->[0]
417             );
418             $expectedCounts->[0] += $sentence_counts->[0];
419             $expectedCounts->[1] += $sentence_counts->[1];
420             $expectedCounts->[2] += $sentence_counts->[2];
421             }
422              
423             return $expectedCounts;
424             }
425              
426             #count "expected counts" of lambdas for a parse (sub)tree, recursively
427             sub count_expected_counts_tree {
428             my ( $self, $root_node ) = @_;
429              
430             my @edges = @{ $root_node->children };
431              
432             # get sequence of labels
433             my @labels = map { $_->child->label } @edges;
434              
435             # counts for this sequence
436             my $expectedCounts = $self->count_expected_counts_sequence( \@labels );
437              
438             # recursion
439             my $subtree_counts;
440             foreach my $edge (@edges) {
441             $subtree_counts = $self->count_expected_counts_tree( $edge->child );
442             $expectedCounts->[0] += $subtree_counts->[0];
443             $expectedCounts->[1] += $subtree_counts->[1];
444             $expectedCounts->[2] += $subtree_counts->[2];
445             }
446              
447             return $expectedCounts;
448             }
449              
450             # count "expected counts" of lambdas for a sequence of labels
451             # (including the boundaries)
452             sub count_expected_counts_sequence {
453              
454             my ( $self, $labels_sequence ) = @_;
455              
456             # to be computed here
457             my $expectedCounts = [ 0, 0, 0 ];
458              
459             # boundary at the beginning
460             my $label_prev = $self->config->SEQUENCE_BOUNDARY_LABEL;
461              
462             # boundary at the end
463             push @$labels_sequence, $self->config->SEQUENCE_BOUNDARY_LABEL;
464              
465             foreach my $label_this (@$labels_sequence) {
466              
467             # get probs
468             my $ngramProbs =
469             $self->get_transition_probs_array( $label_this, $label_prev );
470             my $finalProb = $ngramProbs->[0] * $self->smooth_uniform
471             + $ngramProbs->[1] * $self->smooth_unigrams
472             + $ngramProbs->[2] * $self->smooth_bigrams;
473              
474             # update expected counts
475             $expectedCounts->[0] +=
476             $self->smooth_uniform * $ngramProbs->[0] / $finalProb;
477             $expectedCounts->[1] +=
478             $self->smooth_unigrams * $ngramProbs->[1] / $finalProb;
479             $expectedCounts->[2] +=
480             $self->smooth_bigrams * $ngramProbs->[2] / $finalProb;
481              
482             $label_prev = $label_this;
483             }
484              
485             return $expectedCounts;
486             }
487              
488             sub get_all_labels {
489             my ($self) = @_;
490              
491             my @labels = keys %{ $self->unigrams };
492              
493             return \@labels;
494             }
495              
496             # ACCESS TO SCORES
497              
498             sub get_label_score {
499              
500             # (Str $label, Str $label_prev, ArrayRef[Str] $features)
501             my ( $self, $label, $label_prev, $features ) = @_;
502              
503             my $ALGORITHM = $self->config->labeller_algorithm;
504              
505             if ( $ALGORITHM == 8 || $ALGORITHM == 9 ) {
506              
507             my $result = 0;
508              
509             # foreach present feature
510             foreach my $feature (@$features) {
511              
512             # add "emission score" and "transition score"
513             $result +=
514             $self->get_emission_score( $label, $feature )
515             +
516             $self->get_transition_score(
517             $label, $label_prev, $feature
518             )
519             ;
520             } # end foreach $feature
521              
522             return $result;
523              
524             } elsif ( $ALGORITHM == 14 || $ALGORITHM == 15 ) {
525              
526             my $label_scores = $self->get_emission_scores($features);
527              
528             my $result = $label_scores->{$label};
529             if ( !defined $result ) {
530             $result = 0;
531             }
532              
533             # multiply by transitions score
534             $result *= $self->get_transition_score( $label, $label_prev );
535              
536             return $result;
537              
538             } elsif ( $ALGORITHM == 16 || $ALGORITHM == 18 ) {
539              
540             my $result = 0;
541              
542             # sum of emission scores
543             foreach my $feature (@$features) {
544             $result += $self->get_emission_score( $label, $feature );
545             }
546              
547             # multiply by transitions score
548             $result *= $self->get_transition_score( $label, $label_prev );
549              
550             return $result;
551              
552             } elsif ( $ALGORITHM == 19 ) {
553              
554             my $result = 0;
555              
556             # sum of emission scores
557             foreach my $feature (@$features) {
558             $result += $self->get_emission_score( $label, $feature );
559             }
560              
561             # sigmoid transformation
562             $result = 1 / ( 1 + exp( -$result * $self->config->SIGM_LAMBDA ) );
563              
564             # multiply by transitions score
565             $result *= $self->get_transition_score( $label, $label_prev );
566              
567             return $result;
568              
569             } elsif ( $ALGORITHM == 17 ) {
570              
571             my $result = 0;
572              
573             # sum of emission scores
574             foreach my $feature (@$features) {
575             $result += $self->get_emission_score( $label, $feature );
576             }
577              
578             # multiply by transitions score
579             if ( $result > 0 ) {
580             $result *= $self->get_transition_score( $label, $label_prev );
581             } else {
582              
583             # For negative scores this works the other way round,
584             # eg. if I had two labels, both with emission score -5
585             # and their transition probs were 0.2 and 0.9,
586             # then the latter should get a higher score;
587             # simple mltiplication won't help as that would yield scores
588             # of -1.0 and -4.5, thus inverting the order.
589             # What I do is that for transition prob p I use (1-p)
590             # which yields 0.8 and 0.1 transition probabilities here,
591             # giving scores of -4.0 and -0.5, which is much better.
592             # Still, a label with negative emission score, even if very close
593             # to 0 and with a high transition prob, cannot outscore any label
594             # with a positive emission score, even if low with a low transition
595             # prob - normalizing scores to be non-negative would be necessary
596             # for this, as is alg 0 and similar.
597             # $result *=
598             # ( 1 - $self->get_transition_score( $label, $label_prev ) );
599              
600             # TODO trying new variant - setting negative scores to 0
601             $result = 0;
602             }
603              
604             return $result;
605              
606             } elsif ( $ALGORITHM >= 20 ) {
607              
608             my $result = 0;
609              
610             # sum of emission scores
611             foreach my $feature (@$features) {
612             $result += $self->get_emission_score( $label, $feature );
613             }
614              
615             # TODO: could also compute using $label_prev,
616             # using transitions to store these;
617             # would allow to use full Viterbi
618              
619             return $result;
620              
621             } else {
622             croak "ModelLabelling->get_label_score not implemented"
623             . " for algorithm no. $ALGORITHM!";
624              
625             # usually because it needs to know scores of all possible labels
626             # to normalize them properly
627             }
628             }
629              
630             sub get_emission_score {
631              
632             # (Str $label, Str $feature)
633             my ( $self, $label, $feature ) = @_;
634              
635             my $ALGORITHM = $self->config->labeller_algorithm;
636              
637             if ($ALGORITHM == 8
638             || $ALGORITHM == 9
639             || $ALGORITHM == 16
640             || $ALGORITHM == 17
641             || $ALGORITHM == 18
642             || $ALGORITHM == 19
643             || $ALGORITHM >= 20
644             )
645             {
646              
647             if ($self->emissions->{$feature}
648             && $self->emissions->{$feature}->{$label}
649             )
650             {
651             return $self->emissions->{$feature}->{$label};
652             } else {
653             return 0;
654             }
655              
656             } else {
657             croak "ModelLabelling->get_emission_score not implemented"
658             . " for algorithm no. $ALGORITHM!";
659             }
660             }
661              
662             sub get_transition_score {
663              
664             # (Str $label_this, Str $label_prev, Maybe[Str] $feature)
665             my ( $self, $label_this, $label_prev, $feature ) = @_;
666              
667             my $ALGORITHM = $self->config->labeller_algorithm;
668              
669             if ( $ALGORITHM == 8 || $ALGORITHM == 9 ) {
670             if ($self->transitions->{$feature}
671             && $self->transitions->{$feature}->{$label_prev}
672             && $self->transitions->{$feature}->{$label_prev}->{$label_this}
673             )
674             {
675             return $self->transitions->{$feature}->{$label_prev}->{$label_this};
676             } else {
677              
678             # no smoothing as it is used in addition, not in multiplication
679             return 0;
680             }
681             } elsif (
682             $ALGORITHM == 5
683             || $ALGORITHM == 12 || $ALGORITHM == 13
684             || $ALGORITHM == 15
685             || $ALGORITHM == 16 || $ALGORITHM == 17
686             || $ALGORITHM == 18 || $ALGORITHM == 19
687             )
688             {
689              
690             # smoothing by linear combination
691             # PROB(label|prev_label) =
692             # smooth_bigrams * transitions->{prev_label}->{label} +
693             # smooth_unigrams * unigrams->{label} +
694             # smooth_uniform
695              
696             my $probs =
697             $self->get_transition_probs_array( $label_this, $label_prev );
698              
699             my $result = $probs->[0] * $self->smooth_uniform
700             + $probs->[1] * $self->smooth_unigrams
701             + $probs->[2] * $self->smooth_bigrams;
702              
703             return $result;
704              
705             } elsif (
706             $ALGORITHM == 0
707             || $ALGORITHM == 1
708             || $ALGORITHM == 2
709             || $ALGORITHM == 3
710             || $ALGORITHM == 4
711             || $ALGORITHM == 10
712             || $ALGORITHM == 11
713             || $ALGORITHM == 14
714             )
715             {
716              
717             # no real smoothing
718             if ($self->transitions->{$label_prev}
719             && $self->transitions->{$label_prev}->{$label_this}
720             )
721             {
722             return $self->transitions->{$label_prev}->{$label_this};
723             } else {
724             return 0.00001;
725             }
726             } else {
727             croak "ModelLabelling->get_transition_score not implemented"
728             . " for algorithm no. $ALGORITHM!";
729             }
730             } # end get_transition_score
731              
732             # $result->[0] = uniform prob
733             # $result->[1] = unigram prob
734             # $result->[2] = bigram prob
735             sub get_transition_probs_array {
736              
737             # (Str $label_this, Str $label_prev)
738             my ( $self, $label_this, $label_prev ) = @_;
739              
740             my $result = [ 0, 0, 0 ];
741              
742             # uniform
743             $result->[0] = $self->uniform_prob;
744              
745             if ( $self->unigrams->{$label_this} ) {
746              
747             # unigram
748             $result->[1] = $self->unigrams->{$label_this};
749              
750             if ( $self->transitions->{$label_prev}->{$label_this} ) {
751              
752             # bigram
753             $result->[2] = $self->transitions->{$label_prev}->{$label_this};
754             }
755             }
756              
757             return $result;
758             }
759              
760             # get scores of all possible labels based on all the features
761             # (gives different numbers for different algorithms,
762             # often they are not real probabilities but general scores)
763             sub get_emission_scores {
764              
765             # (ArrayRef[Str] $features)
766             my ( $self, $features ) = @_;
767              
768             # a hashref of the structure $result->{label} = prob
769             # where prob might or might not be a real probability
770             # (i.e. may or may not fulfill 0 <= prob <= 1 & sum(probs) == 1),
771             # depending on the algorithm used
772             # (but always a higher prob means a better scoring (more probable) label
773             # and all of the probs are non-negative) TODO does it hold?
774             my $result = {};
775              
776             my $ALGORITHM = $self->config->labeller_algorithm;
777              
778             if ($ALGORITHM == 0
779             || $ALGORITHM == 1
780             || $ALGORITHM == 2
781             || $ALGORITHM == 3
782             || $ALGORITHM == 10
783             || $ALGORITHM == 11
784             || $ALGORITHM == 12
785             || $ALGORITHM == 13
786             || $ALGORITHM == 14
787             || $ALGORITHM == 15
788             )
789             {
790             $result = $self->get_emission_scores_basic_MIRA($features);
791             } elsif ( $ALGORITHM == 4 || $ALGORITHM == 5 ) {
792             $result = $self->get_emission_scores_no_MIRA($features);
793             } else {
794             croak "ModelLabelling->get_emission_scores not implemented"
795             . " for algorithm no. $ALGORITHM!";
796             }
797              
798             # the boundary label is NOT a valid label
799             delete $result->{ $self->config->SEQUENCE_BOUNDARY_LABEL };
800              
801             return $result;
802             }
803              
804             sub get_emission_scores_basic_MIRA {
805              
806             my ( $self, $features ) = @_;
807              
808             my $ALGORITHM = $self->config->labeller_algorithm;
809              
810             my $result = {};
811              
812             my $warnNoEmissionProbs = "!!! WARNING !!! "
813             . "Based on the training data, no possible label was found"
814             . " for an edge. This usually means that either"
815             . " your training data are not big enough or that"
816             . " the set of features you are using"
817             . " is not well constructed - either it is too small"
818             . " or it lacks features that would be general enough"
819             . " to cover all possible sentences."
820             . " Using blind emission probabilities instead.\n";
821              
822             # "pure MIRA", i.e. no MLE
823              
824             if ( $ALGORITHM == 11 || $ALGORITHM == 13 ) {
825              
826             # initialize all label scores with 0 (so that all labels get some score)
827             my $all_labels = $self->get_all_labels();
828             foreach my $label (@$all_labels) {
829             $result->{$label} = 0;
830             }
831             }
832              
833             # get scores
834             foreach my $feature (@$features) {
835             if ( $self->emissions->{$feature} ) {
836             foreach my $label ( keys %{ $self->emissions->{$feature} } ) {
837             $result->{$label} += $self->emissions->{$feature}->{$label};
838             }
839             }
840             }
841              
842             # subtracting the minimum from the score
843             if ($ALGORITHM == 0
844             || $ALGORITHM == 1
845             || $ALGORITHM == 2
846             || $ALGORITHM == 10
847             || $ALGORITHM == 11
848             || $ALGORITHM == 12
849             || $ALGORITHM == 13
850             || $ALGORITHM == 14
851             || $ALGORITHM == 15
852             )
853             {
854              
855             # find min and max score
856             my $min = 1e300;
857             my $max = -1e300;
858             foreach my $label ( keys %$result ) {
859             if ( $result->{$label} < $min ) {
860             $min = $result->{$label};
861             }
862             if ( $result->{$label} > $max ) {
863             $max = $result->{$label};
864             }
865              
866             # else is between $min and $max -> keep the values as they are
867             }
868              
869             if ( $min > $max ) {
870              
871             # $min > $max, i.e. nothing has been generated -> backoff
872             if ( $self->config->DEBUG >= 2 ) {
873             print $warnNoEmissionProbs;
874             }
875              
876             # backoff by using unigram probabilities
877             # (or unigram counts in some algorithms)
878             $result = $self->unigrams;
879             } else {
880              
881             # something has been generated, now 0 and 1 start to differ
882             if ($ALGORITHM == 0
883             || $ALGORITHM == 10
884             || $ALGORITHM == 11
885             || $ALGORITHM == 12
886             || $ALGORITHM == 13
887             || $ALGORITHM == 14
888             || $ALGORITHM == 15
889             )
890             {
891              
892             # 0 MIRA-trained scores recomputed by +abs(min)
893             # and converted to probs
894             if ( $min < $max ) {
895              
896             # the typical case
897             # my $subtractant = $min;
898             my $divisor = 0;
899              
900             foreach my $label ( keys %$result ) {
901             $result->{$label} = ( $result->{$label} - $min );
902             $divisor += $result->{$label};
903             }
904             foreach my $label ( keys %$result ) {
905             $result->{$label} = $result->{$label} / $divisor;
906             }
907             } else {
908              
909             # $min == $max
910              
911             # uniform prob distribution
912             my $prob = 1 / scalar( keys %$result );
913             foreach my $label ( keys %$result ) {
914             $result->{$label} = $prob;
915             }
916             }
917              
918             # end $ALGORITHM == 0|10|11|12|13|14|15
919             } else {
920              
921             # $ALGORITHM == 1|2
922             # 1 dtto, NOT converted to probs
923             # (but should behave the same as 0)
924             # 2 dtto, sum in Viterbi instead of product
925             # (new_prob = old_prob + emiss*trans)
926             # (for 1 and 2 the emission probs are completely the same,
927             # they are just handled differently by the Labeller)
928              
929             if ( $min < $max ) {
930              
931             # the typical case
932             # my $subtractant = $min;
933              
934             foreach my $label ( keys %$result ) {
935             $result->{$label} = ( $result->{$label} - $min );
936             }
937             } else {
938              
939             # $min == $max
940             # uniform prob distribution
941              
942             if ( $min <= 0 ) {
943              
944             # we would like to keep the values
945             # but this is not possible in this case
946             foreach my $label ( keys %$result ) {
947              
948             # so lets just assign ones
949             $result->{$label} = 1;
950             }
951             }
952              
953             # else there is already a uniform distribution
954             # so let's keep it as it is
955             }
956              
957             # end $ALGORITHM == 1|2
958             }
959             }
960              
961             # end $ALGORITHM == 0|1|2|10|11|12|13|14|15
962             } else {
963              
964             # $ALGORITHM == 3
965             # no subtraction of minimum, just throw away <= 0
966              
967             foreach my $label ( keys %$result ) {
968             if ( $result->{$label} <= 0 ) {
969             delete $result->{$label};
970             }
971              
972             # else > 0 -> just keep it there and that's it
973             }
974             } # end $ALGORITHM == 3
975              
976             return $result;
977             } # end get_emission_scores_basic_MIRA
978              
979             sub get_emission_scores_no_MIRA {
980              
981             my ( $self, $features ) = @_;
982              
983             my $result = {};
984              
985             my $warnNoEmissionProbs = "!!! WARNING !!! "
986             . "Based on the training data, no possible label was found"
987             . " for an edge. This usually means that either"
988             . " your training data are not big enough or that"
989             . " the set of features you are using"
990             . " is not well constructed - either it is too small"
991             . " or it lacks features that would be general enough"
992             . " to cover all possible sentences."
993             . " Using blind emission probabilities instead.\n";
994              
995             # basic or full MLE, no MIRA
996              
997             my %counts = ();
998             my %prob_sums = ();
999              
1000             # get scores
1001             foreach my $feature (@$features) {
1002             if ( $self->emissions->{$feature} ) {
1003              
1004             # !!! TODO tady by mÄ›l bejt součin !!!
1005             foreach my $label ( keys %{ $self->emissions->{$feature} } ) {
1006             $prob_sums{$label} +=
1007             $self->emissions->{$feature}->{$label};
1008             $counts{$label}++;
1009             }
1010             }
1011             }
1012              
1013             if ( keys %prob_sums ) {
1014             foreach my $label ( keys %prob_sums ) {
1015              
1016             # something like average pobability
1017             # = all features have the score of 1
1018             # (or more precisely 1/number_of_features)
1019             $result->{$label} = $prob_sums{$label} / $counts{$label};
1020             }
1021             } else {
1022              
1023             # backoff
1024             if ( $self->config->DEBUG >= 2 ) {
1025             print $warnNoEmissionProbs;
1026             }
1027              
1028             # backoff by using unigram probabilities
1029             # (or unigram counts in some algorithms)
1030             $result = $self->unigrams;
1031             }
1032              
1033             return $result;
1034             } # end get_emission_scores_no_MIRA
1035              
1036             # sets emission score (if $label_prev is not set)
1037             # or transition score (if it is)
1038             # of the $feature to $score
1039             sub set_feature_score {
1040              
1041             # (Str $feature, Num $score, Str $label, Maybe[Str] $label_prev)
1042             my ( $self, $feature, $score, $label, $label_prev ) = @_;
1043              
1044             if ( defined $label_prev ) {
1045             $self->transitions->{$feature}->{$label_prev}->{$label} = $score;
1046             } else {
1047             $self->emissions->{$feature}->{$label} = $score;
1048             }
1049              
1050             return;
1051             }
1052              
1053             # updates emission score (if $label_prev is not set)
1054             # or transition score (if it is)
1055             # of the $feature by adding $update
1056             sub update_feature_score {
1057              
1058             # (Str $feature, Num $update, Str $label, Maybe[Str] $label_prev)
1059             my ( $self, $feature, $update, $label, $label_prev ) = @_;
1060              
1061             if ( defined $label_prev ) {
1062             $self->transitions->{$feature}->{$label_prev}->{$label} += $update;
1063             } else {
1064             $self->emissions->{$feature}->{$label} += $update;
1065             }
1066              
1067             return;
1068             }
1069              
1070             # returns number of features in the model (where a "feature" can stand for
1071             # various things depending on the algorithm used)
1072             sub get_feature_count {
1073              
1074             my ($self) = @_;
1075              
1076             my $ALGORITHM = $self->config->labeller_algorithm;
1077              
1078             # result = $emissions_count + $transitions_count
1079             my $emissions_count = 0;
1080             my $transitions_count = 0;
1081              
1082             # structure: emissions->{feature}->{label}
1083             my @emission_features = keys %{ $self->emissions };
1084             foreach my $feature (@emission_features) {
1085             $emissions_count += scalar( keys %{ $self->emissions->{$feature} } );
1086             }
1087              
1088             if ( $ALGORITHM == 8 || $ALGORITHM == 9 ) {
1089              
1090             # structure: transitions->{feature}->{label_prev}->{label}
1091              
1092             my @transition_features = keys %{ $self->transitions };
1093             foreach my $feature (@transition_features) {
1094              
1095             my @labels = keys %{ $self->transitions->{$feature} };
1096             foreach my $label_prev (@labels) {
1097              
1098             $transitions_count += scalar(
1099             keys %{ $self->transitions->{$feature}->{$label_prev} }
1100             );
1101             }
1102             }
1103              
1104             } else {
1105              
1106             # structure: transitions->{label_prev}->{label}
1107              
1108             my @labels = keys %{ $self->transitions };
1109             foreach my $label_prev (@labels) {
1110              
1111             $transitions_count +=
1112             scalar( keys %{ $self->transitions->{$label_prev} } );
1113             }
1114             }
1115              
1116             return $emissions_count + $transitions_count;
1117              
1118             } # end get_feature_count
1119              
1120             1;
1121              
1122             __END__
1123              
1124             =pod
1125              
1126             =for Pod::Coverage BUILD
1127              
1128             =encoding utf-8
1129              
1130             =head1 NAME
1131              
1132             Treex::Tool::Parser::MSTperl::ModelLabelling
1133              
1134             =head1 VERSION
1135              
1136             version 0.11949
1137              
1138             =head1 DESCRIPTION
1139              
1140             This is an in-memory represenation of a labelling model,
1141             extended from L<Treex::Tool::Parser::MSTperl::ModelBase>.
1142              
1143             =head1 FIELDS
1144              
1145             =head2 Inherited from base package
1146              
1147             Fields inherited from L<Treex::Tool::Parser::MSTperl::ModelBase>.
1148              
1149             =over 4
1150              
1151             =item config
1152              
1153             Instance of L<Treex::Tool::Parser::MSTperl::Config> containing settings to be
1154             used for the model.
1155              
1156             Currently the settings most relevant to the model are the following:
1157              
1158             =over 8
1159              
1160             =item EM_EPSILON
1161              
1162             See L<Treex::Tool::Parser::MSTperl::Config/EM_EPSILON>.
1163              
1164             =item labeller_algorithm
1165              
1166             See L<Treex::Tool::Parser::MSTperl::Config/labeller_algorithm>.
1167              
1168             =item labelledFeaturesControl
1169              
1170             See L<Treex::Tool::Parser::MSTperl::Config/labelledFeaturesControl>.
1171              
1172             =item SEQUENCE_BOUNDARY_LABEL
1173              
1174             See L<Treex::Tool::Parser::MSTperl::Config/SEQUENCE_BOUNDARY_LABEL>.
1175              
1176             =back
1177              
1178             =item featuresControl
1179              
1180             Provides access to labeller features, especially enabling their computation.
1181             Intance of L<Treex::Tool::Parser::MSTperl::FeaturesControl>.
1182              
1183             =back
1184              
1185             =head2 Label scoring
1186              
1187             =over 4
1188              
1189             =item emissions
1190              
1191             Emission scores for Viterbi. They follow the edge-based factorization
1192             and provide scores for various labels for an edge based on its features.
1193              
1194             The structure is:
1195              
1196             emissions->{feature}->{label} = score
1197              
1198             Scores may or may not be probabilities, based on the algorithm used.
1199             Also based on the algorithm they may be MIRA-computed
1200             or they might be obtained by standard MLE.
1201              
1202             =item transitions
1203              
1204             Transition scores for Viterbi. They follow the
1205             first order Markov chain edge-based factorization
1206             and provide scores for various labels for an edge
1207             probably based on its features
1208             and always based on previous edge label.
1209              
1210             Scores may or may not be probabilities, based on the algorithm used.
1211             Also based on the algorithm they may be obtained by standard MLE
1212             or they might be MIRA-computed.
1213              
1214             The structure is:
1215              
1216             transitions->{label_prev}->{label_this} = prob
1217              
1218             or
1219              
1220             transitions->{feature}->{label_prev}->{label_this} = score
1221              
1222             =back
1223              
1224             =head2 Transitions smoothing
1225              
1226             In some algorithms linear combination smoothing is used
1227             for transition probabilities.
1228             The resulting transition probability is then obtained as:
1229              
1230             PROB(label|prev_label) =
1231             smooth_bigrams * transitions->{prev_label}->{label} +
1232             smooth_unigrams * unigrams->{label} +
1233             smooth_uniform
1234              
1235             =over 4
1236              
1237             =item smooth_bigrams
1238              
1239             =item smooth_unigrams
1240              
1241             =item smooth_uniform
1242              
1243             The actual smoothing parameters computed by EM algorithm.
1244             Each of them is between 0 and 1 and together they sum up to 1.
1245              
1246             =item uniform_prob
1247              
1248             Unifrom probability of a label, computed as
1249             C<1 / ( keys %{ $self->unigrams } )>.
1250              
1251             Set in C<compute_smoothing_params>.
1252              
1253             =item unigrams
1254              
1255             Basic MLE from data, the structure is
1256              
1257             unigrams->{label} = prob
1258              
1259             To be used for transitions smoothing and/or backoff
1260             (can be used both for emissions and transitions)
1261             It also contains the C<SEQUENCE_BOUNDARY_LABEL> prob
1262             (the SEQUENCE_BOUNDARY_LABEL is counted once for each sequence)
1263             which might be unappropriate in some cases (eg. for emission probs).
1264              
1265             =item EM_heldout_data
1266              
1267             Just an array ref with the sentences that represent the heldout data
1268             to be able to run the EM algorithm in C<prepare_for_mira()>.
1269             Used only in training.
1270              
1271             =back
1272              
1273             =head1 METHODS
1274              
1275             =head2 Inherited
1276              
1277             Subroutines inherited from L<Treex::Tool::Parser::MSTperl::ModelBase>.
1278              
1279             =head3 Load and store
1280              
1281             =over 4
1282              
1283             =item store
1284              
1285             See L<Treex::Tool::Parser::MSTperl::ModelBase/store>.
1286              
1287             =item store_tsv
1288              
1289             See L<Treex::Tool::Parser::MSTperl::ModelBase/store_tsv>.
1290              
1291             =item load
1292              
1293             See L<Treex::Tool::Parser::MSTperl::ModelBase/load>.
1294              
1295             =item load_tsv
1296              
1297             See L<Treex::Tool::Parser::MSTperl::ModelBase/load_tsv>.
1298              
1299             =back
1300              
1301             =head2 Overriden
1302              
1303             Subroutines overriding stubs in L<Treex::Tool::Parser::MSTperl::ModelBase>.
1304              
1305             =head3 Load and store
1306              
1307             =over 4
1308              
1309             =item $data = get_data_to_store(), $data = get_data_to_store_tsv()
1310              
1311             Returns the model data, containing the following fields:
1312             C<unigrams>,
1313             C<transitions>,
1314             C<emissions>,
1315             C<smooth_uniform>,
1316             C<smooth_unigrams>,
1317             C<smooth_bigrams>,
1318             C<uniform_prob>
1319              
1320             =item load_data($data), load_data_tsv($data)
1321              
1322             Tries to get all necessary data from C<$data>
1323             (see C<get_data_to_store> to see what data are stored).
1324             Also does basic checks on the data, eg. for non-emptiness, but nothing
1325             sophisticated. Is algorithm-sensitive, i.e. if some data are not needed
1326             for the algorithm used, they do not have to be contained in the hash.
1327              
1328             =back
1329              
1330             =head3 Training support
1331              
1332             =over 4
1333              
1334             =item prepare_for_mira
1335              
1336             Called after preprocessing training data, before entering the MIRA phase.
1337              
1338             Function varies depending on algorithm used.
1339             Usually recomputes counts stored in C<emissions>, C<transitions> and C<unigrams>
1340             to probabilities that have been computed by C<add_emission>,
1341             C<add_transition> and C<add_unigram>.
1342             Also calls C<compute_smoothing_params> to estimate smoothing parameters
1343             for smoothing of transition probabilities.
1344              
1345             =item get_feature_count
1346              
1347             Only to provide information about the model.
1348             Returns number of features in the model (where a "feature" can stand for
1349             various things depending on the algorithm used).
1350              
1351             =back
1352              
1353             =head2 Technical methods
1354              
1355             =over 4
1356              
1357             =item BUILD
1358              
1359             my $model = Treex::Tool::Parser::MSTperl::ModelLabelling->new(
1360             config => $config,
1361             );
1362              
1363             Creates an empty model. If you are training the model, this is probably what you
1364             want, otherwise you can use C<load> or C<load_tsv>
1365             to load an existing labelling model from a file.
1366              
1367             However, most often you would probably use a model for a labeller
1368             (L<Treex::Tool::Parser::MSTperl::Labeller>)
1369             or a labelling trainer
1370             (L<Treex::Tool::Parser::MSTperl::TrainerLabelling>)
1371             which both automatically create the model on build.
1372             The labeller also provides wrapping methods
1373             L<Treex::Tool::Parser::MSTperl::Labeller/load_model>
1374             and
1375             L<Treex::Tool::Parser::MSTperl::Labeller/load_model_tsv>
1376             which you can call to load the model from a file.
1377             (Btw. as you might expect, the trainer provides methods
1378             L<Treex::Tool::Parser::MSTperl::TrainerLabelling/store_model>
1379             and
1380             L<Treex::Tool::Parser::MSTperl::TrainerLabelling/store_model_tsv>.)
1381              
1382             =back
1383              
1384             =head2 MLE on training data
1385              
1386             C<emissions> and C<transitions> can be either MIRA-trained
1387             or estimated directly from training data using MLE
1388             (Maximum Likelihood Estimate).
1389             C<unigrams> are always estimated by MLE.
1390              
1391             =over 4
1392              
1393             =item add_unigram ($label)
1394              
1395             Increment count for the label in C<unigrams>.
1396              
1397             =item add_transition ($label_this, $label_prev)
1398              
1399             =item add_transition ($label_this, $label_prev, $feature)
1400              
1401             Increment count for the transition in C<transitions>, possible including a
1402             feature on "this" edge if the algorithm uses features with transitions.
1403              
1404             =item add_emission ($feature, $label)
1405              
1406             Increment count for this label on an edge with this feature in C<emissions>.
1407              
1408             =item compute_probs_from_counts ($self->emissions)
1409              
1410             Takes a hash reference with label counts and chnages the counts
1411             to probabilities (this is the actual MLE).
1412             May be called in C<prepare_for_mira> on
1413             C<emissions>, C<transitions> and C<unigrams>.
1414              
1415             =back
1416              
1417             =head2 EM algorithm
1418              
1419             =over 4
1420              
1421             =item compute_smoothing_params()
1422              
1423             The main method containing an implementation of the Expectation Maximization
1424             Algorithm to compute smoothing parameters (C<smooth_bigrams>,
1425             C<smooth_unigrams>, C<smooth_uniform>) for transition probabilities
1426             smoothing by linear combination of bigram, unigram and uniform probability.
1427             Iteratively tries to find
1428             such parameters that the probabilities from training data
1429             (C<transitions>, C<unigrams> and C<uniform_prob>) combined together by
1430             the smoothing parameters model well enough the heldout data
1431             (C<EM_heldout_data>), i.e. tries to maximize the probability of the heldout
1432             data given the training data probabilities by adjusting the smoothing
1433             parameters values.
1434              
1435             Uses C<EM_EPSILON> as a stopping criterion, i.e. stops when the sum of
1436             absolute values of changes to all smoothing parameters are lower
1437             than the value of C<EM_EPSILON>.
1438              
1439             =item count_expected_counts_all()
1440              
1441             =item count_expected_counts_tree($root_node)
1442              
1443             =item count_expected_counts_sequence($labels_sequence)
1444              
1445             Support methods to C<compute_smoothing_params>, in the order in which they
1446             call each other.
1447              
1448             =back
1449              
1450             =head2 Scoring
1451              
1452             A bunch of methods to score the likelihood of a label being assigned to an
1453             edge based on the edge's features and the label assigned to the previous
1454             edge.
1455              
1456             =over 4
1457              
1458             =item get_all_labels()
1459              
1460             Returns (a reference to) an array of all labels found in the training data
1461             (eg. C<['Subj', 'Obj', 'Atr']>).
1462              
1463             =item get_label_score($label, $label_prev, $features)
1464              
1465             Computes a score of assigning the given label to an edge,
1466             given the features of the edge and the label assigned to the previous edge.
1467              
1468             Always a higher score means a more likely label for the edge.
1469             Some algorithms may give a negative score.
1470              
1471             Is semantically equivalent to calling C<get_emission_score>
1472             and C<get_transition_score> and then combining it together somehow.
1473              
1474             =item get_emission_score($label, $feature)
1475              
1476             Computes the "emission score" of assigning the given label to an edge,
1477             given one of the feature of the edge and disregarding
1478             the label assigned to the previous edge.
1479              
1480             =item get_transition_score($label_this, $label_prev, $feature)
1481              
1482             Computes the "transition score" of assigning the given label to an edge,
1483             given the label assigned to the previous edge
1484             and possibly also one of the features of the edge
1485             but NOT including the emission score returned by C<get_emission_score>.
1486              
1487             =item $result = get_transition_probs_array ($label_this, $label_prev)
1488              
1489             Returns (a reference to) an array of the probabilities of the transition
1490             from label_prev to label_this (to be smoothed together),
1491             having the following structure:
1492              
1493             $result->[0] = uniform prob
1494             $result->[1] = unigram prob
1495             $result->[2] = bigram prob
1496              
1497             =item $result = get_emission_scores($features)
1498              
1499             Get scores of assigning each of the possible labels to an edge
1500             based on all the features of the edge. Is semantically equivalent
1501             to doing:
1502              
1503             foreach label
1504             foreach feature
1505             get_emission_score(label, feature)
1506              
1507             The structure is:
1508              
1509             $result->{label} = score
1510              
1511             Actually only serves as a switch for several implementations of the method
1512             (C<get_emission_scores_basic_MIRA> and C<get_emission_scores_no_MIRA>);
1513             the method to be used is selected based on the algorithm being used.
1514              
1515             =item get_emission_scores_basic_MIRA($features)
1516              
1517             A C<get_emission_scores> implementation used with algorithms
1518             where the emission scores are computed by MIRA (this is currently
1519             the most successful implementation).
1520              
1521             =item get_emission_scores_no_MIRA($features)
1522              
1523             A C<get_emission_scores> implementation using only MLE. Probably obsolete now.
1524              
1525             =back
1526              
1527             =head2 Changing the scores
1528              
1529             Methods used by the trainer
1530             (L<Treex::Tool::Parser::MSTperl::TrainerLabelling>)
1531             to adjust the scores to whatever seems to be
1532             the best idea at the moment. Used only in MIRA training
1533             (MLE uses C<add_unigram>, C<add_emission>, C<add_transition>
1534             and C<compute_probs_from_counts> instead).
1535              
1536             =over 4
1537              
1538             =item set_feature_score($feature, $score, $label, $label_prev)
1539              
1540             Sets the specified emission score (if label_prev is not set)
1541             or transition score (if it is)
1542             to the given value (C<$score>).
1543              
1544             =item update_feature_score($feature, $update, $label, $label_prev)
1545              
1546             Updates the specified emission score (if label_prev is not set)
1547             or transition score (if it is)
1548             by the given value (C<$update>), i.e. adds that value to the
1549             current value.
1550              
1551             =back
1552              
1553             =head1 AUTHORS
1554              
1555             Rudolf Rosa <rosa@ufal.mff.cuni.cz>
1556              
1557             =head1 COPYRIGHT AND LICENSE
1558              
1559             Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles
1560             University in Prague
1561              
1562             This module is free software; you can redistribute it and/or modify it under
1563             the same terms as Perl itself.