File Coverage

blib/lib/Treex/Tool/Parser/MSTperl/TrainerLabelling.pm
Criterion Covered Total %
statement 1 3 33.3
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 2 4 50.0


line stmt bran cond sub pod time code
1             package Treex::Tool::Parser::MSTperl::TrainerLabelling;
2             {
3             $Treex::Tool::Parser::MSTperl::TrainerLabelling::VERSION = '0.11949';
4             }
5              
6 1     1   2890 use Moose;
  0            
  0            
7             use Carp;
8              
9             extends 'Treex::Tool::Parser::MSTperl::TrainerBase';
10              
11             use Treex::Tool::Parser::MSTperl::Labeller;
12              
13             has model => (
14             isa => 'Treex::Tool::Parser::MSTperl::ModelLabelling',
15             is => 'rw',
16             );
17              
18             has labeller => (
19             isa => 'Treex::Tool::Parser::MSTperl::Labeller',
20             is => 'rw',
21             );
22              
23             # All emissions used during the training summed together
24             # as averaging is reported to help avoid overtraining
25             # (M. Collins. 2002. Discriminative training methods for
26             # hidden Markov models: Theory and experiments with
27             # perceptron algorithms. In Proc. EMNLP.)
28             # Structure:
29             # emissions_summed->{feature}->{label} = score
30             has emissions_summed => (
31             isa => 'HashRef',
32             is => 'rw',
33             default => sub { {} },
34             );
35              
36             # same as emissions_summed but for pairs of labels, structure is
37             # transitions_summed->{feature}->{label_prev}->{label} = score
38             # or
39             # transitions_summed->{label_prev}->{label} = score
40             # depending on the algorithm used
41             has transitions_summed => (
42             isa => 'HashRef',
43             is => 'rw',
44             default => sub { {} },
45             );
46              
47             sub BUILD {
48             my ($self) = @_;
49              
50             $self->labeller(
51             Treex::Tool::Parser::MSTperl::Labeller->new( config => $self->config )
52             );
53             $self->model( $self->labeller->model );
54             $self->featuresControl( $self->config->labelledFeaturesControl );
55             $self->number_of_iterations( $self->config->labeller_number_of_iterations );
56              
57             return;
58             }
59              
60             # LABELLING TRAINING
61              
62             # compute the features of the sentence,
63             # build list of existing labels
64             # and compute the transition scores
65             sub preprocess_sentence {
66              
67             # (Treex::Tool::Parser::MSTperl::Sentence $sentence, Num $progress)
68             my ( $self, $sentence, $progress ) = @_;
69              
70             # compute edges and their features
71             $sentence->fill_fields_before_labelling();
72              
73             # $sentence->fill_fields_after_labelling();
74              
75             $self->compute_unigram_counts($sentence);
76              
77             my $ALGORITHM = $self->config->labeller_algorithm;
78              
79             # compute transition counts
80              
81             if ($ALGORITHM == 5
82             || $ALGORITHM == 12
83             || $ALGORITHM == 13
84             || $ALGORITHM == 15
85             || $ALGORITHM == 16
86             || $ALGORITHM == 17
87             || $ALGORITHM == 18
88             || $ALGORITHM == 19
89             )
90             {
91              
92             # MLE transitions with smoothing
93             if ( $progress < $self->config->EM_heldout_data_at ) {
94             $self->compute_transition_counts( $sentence->getNodeByOrd(0) );
95             } else {
96              
97             # do not use this sentence for transition counts,
98             # it will be used for EM algorithm to compute smoothing params
99             push @{ $self->model->EM_heldout_data }, $sentence;
100             }
101             } elsif (
102             $ALGORITHM == 0
103             || $ALGORITHM == 1
104             || $ALGORITHM == 2
105             || $ALGORITHM == 3
106             || $ALGORITHM == 4
107             || $ALGORITHM == 9
108             || $ALGORITHM == 10
109             || $ALGORITHM == 11
110             || $ALGORITHM == 14
111             )
112             {
113              
114             # MLE transitions without smoothing
115             $self->compute_transition_counts( $sentence->getNodeByOrd(0) );
116             }
117              
118             # else (8) MLE transition counts not used
119              
120             if ($ALGORITHM == 4
121             || $ALGORITHM == 5
122             || $ALGORITHM == 9
123             )
124             {
125              
126             # compute MLE emission counts for Viterbi
127             $self->compute_emission_counts($sentence);
128             }
129              
130             return;
131             }
132              
133             # computes label unigram counts for a sentence
134             sub compute_unigram_counts {
135              
136             # (Treex::Tool::Parser::MSTperl::Node $parent_node)
137             my ( $self, $sentence ) = @_;
138              
139             foreach my $edge ( @{ $sentence->edges } ) {
140             my $label = $edge->child->label;
141             $self->model->add_unigram($label);
142             }
143              
144             # TODO: do this?
145             $self->model->add_unigram( $self->config->SEQUENCE_BOUNDARY_LABEL );
146              
147             return;
148             }
149              
150             # computes transition counts for a tree (recursively)
151             sub compute_transition_counts {
152              
153             # (Treex::Tool::Parser::MSTperl::Node $parent_node)
154             my ( $self, $parent_node ) = @_;
155              
156             # stopping condition
157             if ( scalar( @{ $parent_node->children } ) == 0 ) {
158             return;
159             }
160              
161             my $ALGORITHM = $self->config->labeller_algorithm;
162              
163             # compute transition counts
164             my $last_label = $self->config->SEQUENCE_BOUNDARY_LABEL;
165             foreach my $edge ( @{ $parent_node->children } ) {
166             my $this_label = $edge->child->label;
167              
168             if ( $ALGORITHM == 9 ) {
169             my $features = $edge->features;
170             foreach my $feature (@$features) {
171             $self->model->add_transition(
172             $this_label, $last_label, $feature
173             );
174             }
175             } else {
176             $self->model->add_transition( $this_label, $last_label );
177             }
178              
179             $last_label = $this_label;
180             $self->compute_transition_counts( $edge->child );
181             }
182              
183             # add SEQUENCE_BOUNDARY_LABEL to end of sequence as well
184             # TODO: currently not used in Viterbi
185             if ( $ALGORITHM == 9 ) {
186              
187             # TODO: cannot use this since there is actually no such edge
188             # and therefore has no features
189             my $features = [];
190             foreach my $feature (@$features) {
191             $self->model->add_transition(
192             $self->config->SEQUENCE_BOUNDARY_LABEL, $last_label, $feature
193             );
194             }
195             } else {
196             $self->model->add_transition(
197             $self->config->SEQUENCE_BOUNDARY_LABEL, $last_label
198             );
199             }
200              
201             return;
202             }
203              
204             # algs 4, 5, 9
205             sub compute_emission_counts {
206              
207             # (Treex::Tool::Parser::MSTperl::Sentence $sentence)
208             my ( $self, $sentence ) = @_;
209              
210             foreach my $edge ( @{ $sentence->edges } ) {
211             my $label = $edge->child->label;
212             foreach my $feature ( @{ $edge->features } ) {
213             $self->model->add_emission( $feature, $label );
214             }
215             }
216              
217             return;
218             }
219              
220             sub update {
221              
222             # (Treex::Tool::Parser::MSTperl::Sentence $sentence_correct_labelling,
223             # Int $sumUpdateWeight)
224             my (
225             $self,
226             $sentence_correct_labelling,
227             $sumUpdateWeight
228             ) = @_;
229              
230             my $ALGORITHM = $self->config->labeller_algorithm;
231              
232             if ( $ALGORITHM == 4 || $ALGORITHM == 5 ) {
233              
234             # these are pure MLE setups with no use of MIRA
235             return;
236             }
237              
238             # relabel the sentence
239             # l' = argmax_l' s(l', x_t, y_t)
240             my $sentence_best_labelling = $self->labeller->label_sentence_internal(
241             $sentence_correct_labelling
242             );
243              
244             # nothing to do now in fill_fields_after_labelling()
245             # $sentence_best_labelling->fill_fields_after_labelling();
246              
247             # only progress and/or debug info
248             if ( $self->config->DEBUG >= 2 ) {
249             print "CORRECT LABELS:\n";
250             foreach my $node ( @{ $sentence_correct_labelling->nodes_with_root } ) {
251             print $node->ord . "/" . $node->label . "\n";
252             }
253             print "BEST SCORING LABELS:\n";
254             foreach my $node ( @{ $sentence_best_labelling->nodes_with_root } ) {
255             print $node->ord . "/" . $node->label . "\n";
256             }
257             }
258              
259             # min ||w_i+1 - w_i|| s.t. ...
260             $self->mira_update(
261             $sentence_correct_labelling,
262             $sentence_best_labelling,
263             $sumUpdateWeight
264             );
265              
266             return;
267              
268             }
269              
270             sub mira_update {
271              
272             # (Treex::Tool::Parser::MSTperl::Sentence $sentence_correct_labelling,
273             # Treex::Tool::Parser::MSTperl::Sentence $sentence_best_labelling,
274             # Int $sumUpdateWeight)
275             my (
276             $self,
277             $sentence_correct_labelling,
278             $sentence_best_labelling,
279             $sumUpdateWeight
280             ) = @_;
281              
282             my $ALGORITHM = $self->config->labeller_algorithm;
283              
284             if ($ALGORITHM == 8
285             || $ALGORITHM == 9
286             || $ALGORITHM == 14 || $ALGORITHM == 15
287             || $ALGORITHM == 16 || $ALGORITHM == 17
288             || $ALGORITHM == 18 || $ALGORITHM == 19
289             || $ALGORITHM >= 20
290             )
291             {
292             $self->mira_tree_update(
293             $sentence_correct_labelling->nodes_with_root->[0],
294             $sentence_best_labelling,
295             $sumUpdateWeight,
296             );
297             } else {
298              
299             # alg in 1 2 3 4 5 6 7 10 11 12 13
300             my @correct_labels =
301             map { $_->label } @{ $sentence_correct_labelling->nodes_with_root };
302             my @best_labels =
303             map { $_->label } @{ $sentence_best_labelling->nodes_with_root };
304              
305             foreach my $edge ( @{ $sentence_correct_labelling->edges } ) {
306              
307             my $ord = $edge->child->ord;
308             if ( $correct_labels[$ord] ne $best_labels[$ord] ) {
309              
310             my $correct_label = $correct_labels[$ord];
311             my $best_label = $best_labels[$ord];
312              
313             $self->mira_edge_update(
314             $edge, $correct_label, $best_label, $sumUpdateWeight
315             );
316              
317             } else {
318              
319             # $correct_labels[$ord] eq $best_labels[$ord]
320             if ( $self->config->DEBUG >= 3 ) {
321             print "label on $ord is correct, no need to optimize\n";
322             }
323             }
324             }
325             }
326              
327             return;
328             }
329              
330             # makes an update on the sequence of parent's children
331             # and recurses on their children
332             # alg in 8 9 14 15 16 17 18 19 >=20
333             sub mira_tree_update {
334              
335             # (Treex::Tool::Parser::MSTperl::Node $correct_parent,
336             # Treex::Tool::Parser::MSTperl::Sentence $sentence_best_labelling,
337             # Int $sumUpdateWeight)
338             my (
339             $self,
340             $correct_parent,
341             $sentence_best_labelling,
342             $sumUpdateWeight
343             ) = @_;
344              
345             my @correct_edges = @{ $correct_parent->children };
346              
347             if ( @correct_edges == 0 ) {
348             return;
349             }
350              
351             my $ALGORITHM = $self->config->labeller_algorithm;
352              
353             my $label_prev_correct = $self->config->SEQUENCE_BOUNDARY_LABEL;
354             my $label_prev_best = $self->config->SEQUENCE_BOUNDARY_LABEL;
355             foreach my $correct_edge (@correct_edges) {
356              
357             my $features = $correct_edge->features_all_labeller();
358             my $label_correct = $correct_edge->child->label;
359             my $label_best = (
360             $sentence_best_labelling->getNodeByOrd(
361             $correct_edge->child->ord
362             )
363             )->label;
364              
365             if ( $label_correct ne $label_best ) {
366              
367             # label is incorrect, we have to update the scores
368              
369             # $self->mira_edge_tree_update(
370             # $edge, $correct_label, $best_label, $sumUpdateWeight);
371              
372             my $score_correct = $self->model->get_label_score(
373             $label_correct, $label_prev_correct, $features
374             );
375             my $score_best = $self->model->get_label_score(
376             $label_best, $label_prev_best, $features
377             );
378              
379             if ( $ALGORITHM == 19 ) {
380              
381             if ( $score_correct > $score_best ) {
382             if ( $self->config->DEBUG >= 2 ) {
383             print "correct label $label_correct on "
384             . ( $correct_edge->child->ord )
385             . "has higher score than incorrect $label_best "
386             . "but transition scores preferred "
387             . "the incorrect one\n";
388             }
389             next;
390             }
391              
392             if ( $score_correct == 0 || $score_best == 0 ) {
393             if ( $self->config->DEBUG >= 2 ) {
394             print "correct label $label_correct on "
395             . ( $correct_edge->child->ord )
396             . "score correct: $score_correct\n"
397             . "score best: $score_best\n";
398             }
399             next;
400             }
401              
402             # inverse sigmoid
403             my $em_correct = -log( 1 / $score_correct - 1 )
404             / $self->config->SIGM_LAMBDA;
405             my $em_best = -log( 1 / $score_best - 1 )
406             / $self->config->SIGM_LAMBDA;
407              
408             # error to be distributed among features
409             my $margin = 1;
410             my $error = $em_best - $em_correct + $margin;
411              
412             # the same update is done twice with each feature
413             my $features_count = scalar( @{$features} );
414             my $update = $error / $features_count / 2;
415              
416             foreach my $feature ( @{$features} ) {
417              
418             # positive emission update
419             $self->update_feature_score(
420             $feature,
421             $update,
422             $sumUpdateWeight,
423             $label_correct,
424             );
425              
426             # negative emission update
427             $self->update_feature_score(
428             $feature,
429             -$update,
430             $sumUpdateWeight,
431             $label_best,
432             );
433             }
434              
435             } else {
436              
437             # this is actually a simple accuracy loss function (number of wrong
438             # labels in the sequence, here for one edge only) as described in
439             # Kevin Gimpel and Shay Cohen (2007):
440             # Discriminative Online Algorithms for
441             # Sequence Labeling - A Comparative Study
442             # which they show gave the best performance from all loss functions
443             # that they had tried
444             my $margin = 1;
445              
446             my $error = $score_best - $score_correct + $margin;
447              
448             if ( $error < 0 ) {
449             if ( $self->config->DEBUG >= 3 ) {
450             print "correct label $label_correct on "
451             . ( $correct_edge->child->ord )
452             . "has higher score than incorrect $label_best "
453             . "but transition scores preferred "
454             . "the incorrect one\n";
455             }
456             next;
457             }
458              
459             my $features_count = scalar( @{$features} );
460              
461             if ( $ALGORITHM == 8 || $ALGORITHM == 9 ) {
462              
463             # the same update is done four times with each feature
464             my $update = $error / $features_count / 4;
465              
466             foreach my $feature ( @{$features} ) {
467              
468             # TODO: which labels to use in transitions updates?
469             # none of the articles I have read
470             # mentions that specifically
471             # but according to their definitions they use
472             # $label_prev_correct for positive updates
473             # and $label_prev_best for negative updates
474             # (which makes some sense but
475             # several other combinations would
476             # make some sense as well -> let's try them, later)
477              
478             # positive emission update
479             $self->update_feature_score(
480             $feature,
481             $update,
482             $sumUpdateWeight,
483             $label_correct,
484             );
485              
486             # positive transition update
487             $self->update_feature_score(
488             $feature,
489             $update,
490             $sumUpdateWeight,
491             $label_correct,
492             $label_prev_correct,
493             );
494              
495             # negative emission update
496             $self->update_feature_score(
497             $feature,
498             -$update,
499             $sumUpdateWeight,
500             $label_best,
501             );
502              
503             # negative transition update
504             $self->update_feature_score(
505             $feature,
506             -$update,
507             $sumUpdateWeight,
508             $label_best,
509             $label_prev_best,
510             );
511             }
512              
513             # end if $ALGORITHM == 8|9
514             } elsif (
515             $ALGORITHM == 14
516             || $ALGORITHM == 15
517             || $ALGORITHM == 16
518             || $ALGORITHM == 17
519             || $ALGORITHM == 18
520             || $ALGORITHM >= 20
521             )
522             {
523              
524             # the same update is done twice with each feature
525             my $update = $error / $features_count / 2;
526              
527             foreach my $feature ( @{$features} ) {
528              
529             # positive emission update
530             $self->update_feature_score(
531             $feature,
532             $update,
533             $sumUpdateWeight,
534             $label_correct,
535             );
536              
537             # negative emission update
538             $self->update_feature_score(
539             $feature,
540             -$update,
541             $sumUpdateWeight,
542             $label_best,
543             );
544              
545             }
546              
547             # end if $ALGORITHM == 16|17
548             } else {
549             croak "TrainerLabelling->mira_tree_update not implemented"
550             . " for algorithm no. $ALGORITHM!";
551             }
552             }
553             }
554              
555             # else label is correct, do not update
556              
557             # shift
558             $label_prev_correct = $label_correct;
559             $label_prev_best = $label_best;
560              
561             } # end foreach $correct_edge
562              
563             # TODO: add SEQUENCE_BOUNDARY_LABEL at the end?
564              
565             # recursion
566             foreach my $correct_edge (@correct_edges) {
567             $self->mira_tree_update(
568             $correct_edge->child,
569             $sentence_best_labelling,
570             $sumUpdateWeight,
571             );
572             }
573              
574             return;
575             }
576              
577             # alg in 1 2 3 4 5 6 7 10 11 12 13
578             sub mira_edge_update {
579              
580             my ( $self, $edge, $correct_label, $best_label, $sumUpdateWeight ) = @_;
581              
582             my $label_scores = $self->model->get_emission_scores( $edge->features );
583              
584             # s(l_t, x_t, y_t)
585             my $score_correct = $label_scores->{$correct_label};
586             if ( !defined $score_correct ) {
587             $score_correct = 0;
588             }
589              
590             # s(l', x_t, y_t)
591             my $score_best = $label_scores->{$best_label};
592             if ( !defined $score_best ) {
593             $score_best = 0;
594             }
595              
596             # difference in scores should be greater than the margin:
597              
598             # L(l_t, l') number of incorrectly assigned labels
599             # edge-based factorization -> always one error
600             # (in case of an error)
601             my $margin = 1;
602              
603             # L(l_t, l') - [s(l_t, x_t, y_t) - s(l', x_t, y_t)]
604             my $error = $score_best - $score_correct + $margin;
605              
606             if ( $error > 0 ) {
607              
608             # features do not depend on sentence labelling
609             # (TODO: actually they may depend on parent labelling,
610             # but we chose to ignore this as we can assume that the
611             # parent is labelled correctly;
612             # that's why we use edges
613             # from $sentence_correct_labelling here)
614             my $features_diff = $edge->features;
615             my $features_diff_count = scalar( @{$features_diff} );
616              
617             if ( $features_diff_count > 0 ) {
618              
619             # min ||w_i+1 - w_i||
620             # s.t. s(x_t, y_t) - s(x_t, y') >= L(y_t, y')
621             my $update = $error / $features_diff_count;
622              
623             foreach my $feature ( @{$features_diff} ) {
624              
625             # $update is added to features
626             # of correct labelling
627             $self->update_feature_score(
628             $feature,
629             $update,
630             $sumUpdateWeight,
631             $correct_label
632             );
633              
634             # and subtracted from features
635             # of "best" labelling
636             $self->update_feature_score(
637             $feature,
638             -$update,
639             $sumUpdateWeight,
640             $best_label
641             );
642             }
643              
644             if ( $self->config->DEBUG >= 3 ) {
645             print "alpha: $update on $features_diff_count"
646             . " features (correct $correct_label,"
647             . " best $best_label)\n";
648             }
649              
650             } else {
651              
652             # $features_diff_count == 0
653             croak "It seems that there are no features!" .
654             "This is somewhat weird.";
655             }
656             } else {
657              
658             # $error <= 0
659             # (correct is better but transition ruled it out)
660             # TODO: incorporate transition scores?
661             if ( $self->config->DEBUG >= 3 ) {
662             print "correct label $correct_label "
663             . "has higher score than incorrect $best_label "
664             . "but transition scores preferred "
665             . "the incorrect one\n";
666             }
667             }
668              
669             return;
670             }
671              
672             # update score of the feature
673             # (also update emissions_summed or transitions_summed)
674             sub update_feature_score {
675              
676             # (Str $feature, Num $update, Num $sumUpdateWeight,
677             # Str $label, Maybe[Str] $label_prev)
678             my ( $self, $feature, $update, $sumUpdateWeight, $label, $label_prev ) = @_;
679              
680             # adds $update to the current score of the feature
681             my $result = $self->model->update_feature_score(
682             $feature, $update, $label, $label_prev
683             );
684              
685             # v = v + w_{i+1}
686             # $sumUpdateWeight denotes number of summands
687             # in which the score would appear
688             # if it were computed according to the definition
689             my $summed_update = $sumUpdateWeight * $update;
690             if ( defined $label_prev ) {
691              
692             # transition score update
693             $self->transitions_summed->{$feature}->{$label_prev}
694             ->{$label} += $summed_update;
695             } else {
696              
697             # emission score update
698             $self->emissions_summed->{$feature}->{$label} += $summed_update;
699             }
700              
701             return $result;
702             }
703              
704             # SCORES AVERAGING
705              
706             # recompute feature scores as averages
707             # using emissions_summed and transitions_summed
708             # ( w = v/(N * T) )
709             sub scores_averaging {
710              
711             my ($self) = @_;
712              
713             my $ALGORITHM = $self->config->labeller_algorithm;
714              
715             if ( $ALGORITHM == 4 || $ALGORITHM == 5 ) {
716              
717             # nothing to do (MIRA not used)
718              
719             } else {
720              
721             # emissions computed by MIRA
722             $self->scores_averaging_emissions();
723              
724             if ( $ALGORITHM == 8 || $ALGORITHM == 9 ) {
725              
726             # transitions also computed by MIRA
727             $self->scores_averaging_transitions();
728             }
729             }
730              
731             return;
732             }
733              
734             sub scores_averaging_emissions {
735              
736             my ($self) = @_;
737              
738             my $divisor = $self->number_of_inner_iterations;
739              
740             my @features = keys %{ $self->emissions_summed };
741             foreach my $feature (@features) {
742              
743             my @labels = keys %{ $self->emissions_summed->{$feature} };
744             foreach my $label (@labels) {
745              
746             # get the sum of scores
747             my $score_sum = $self->emissions_summed->{$feature}->{$label};
748              
749             # compute the average score
750             my $new_score = $score_sum / $divisor;
751              
752             # set the new score
753             $self->model->set_feature_score( $feature, $new_score, $label );
754              
755             # only progress and/or debug info
756             if ( $self->config->DEBUG >= 2 ) {
757             print "$feature\t$label\t$new_score\n";
758             }
759             }
760             }
761              
762             return;
763             }
764              
765             sub scores_averaging_transitions {
766              
767             my ($self) = @_;
768              
769             my $divisor = $self->number_of_inner_iterations;
770              
771             my @features = keys %{ $self->transitions_summed };
772             foreach my $feature (@features) {
773              
774             my @labels = keys %{ $self->transitions_summed->{$feature} };
775             foreach my $label_prev (@labels) {
776              
777             my @following_labels = keys %{
778             $self->transitions_summed->{$feature}->{$label_prev}
779             };
780             foreach my $label (@following_labels) {
781              
782             # get the sum of scores
783             my $score_sum = $self->transitions_summed->
784             {$feature}->{$label_prev}->{$label};
785              
786             # compute the average score
787             my $new_score = $score_sum / $divisor;
788              
789             # set the new score
790             $self->model->set_feature_score(
791             $feature, $new_score, $label, $label_prev
792             );
793              
794             # only progress and/or debug info
795             if ( $self->config->DEBUG >= 2 ) {
796             print "$feature\t$label_prev\t$label\t$new_score\n";
797             }
798             }
799             }
800             }
801              
802             return;
803             }
804              
805             1;
806              
807             __END__
808              
809             =pod
810              
811             =for Pod::Coverage BUILD
812              
813             =encoding utf-8
814              
815             =head1 NAME
816              
817             Treex::Tool::Parser::MSTperl::TrainerLabelling
818              
819             =head1 VERSION
820              
821             version 0.11949
822              
823             =head1 DESCRIPTION
824              
825             Trains on correctly labelled sentences and so creates and tunes the model.
826             Uses single-best MIRA (McDonald et al., 2005, Proc. HLT/EMNLP)
827              
828             =head1 FIELDS
829              
830             =over 4
831              
832             =item labeller
833              
834             Reference to an instance of L<Treex::Tool::Parser::MSTperl::Labeller> which is
835             used for the training.
836              
837             =item model
838              
839             Reference to an instance of L<Treex::Tool::Parser::MSTperl::ModelLabeller>
840             which is being trained.
841              
842             =back
843              
844             =head1 METHODS
845              
846             =over 4
847              
848             =item $trainer->train($training_data);
849              
850             Trains the model, using the settings from C<config> and the training
851             data in the form of a reference to an array of labelled sentences
852             (L<Treex::Tool::Parser::MSTperl::Sentence>), which can be obtained by the
853             L<Treex::Tool::Parser::MSTperl::Reader>.
854              
855             =item $self->mira_update($sentence_correct, $sentence_best, $sumUpdateWeight)
856              
857             Performs one update of the MIRA (Margin-Infused Relaxed Algorithm) on one
858             sentence from the training data. Its input is the correct labelling of the
859             sentence (from the training data) and the best scoring labelling created by
860             the labeller.
861              
862             =back
863              
864             =head1 AUTHORS
865              
866             Rudolf Rosa <rosa@ufal.mff.cuni.cz>
867              
868             =head1 COPYRIGHT AND LICENSE
869              
870             Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles
871             University in Prague
872              
873             This module is free software; you can redistribute it and/or modify it under
874             the same terms as Perl itself.