File Coverage

blib/lib/Treex/Tool/Parser/MSTperl/Labeller.pm
Criterion Covered Total %
statement 1 3 33.3
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 2 4 50.0


line stmt bran cond sub pod time code
1             package Treex::Tool::Parser::MSTperl::Labeller;
2             {
3             $Treex::Tool::Parser::MSTperl::Labeller::VERSION = '0.11949';
4             }
5              
6 1     1   2552 use Moose;
  0            
  0            
7             use Carp;
8              
9             use Treex::Tool::Parser::MSTperl::Sentence;
10             use Treex::Tool::Parser::MSTperl::Edge;
11             use Treex::Tool::Parser::MSTperl::ModelLabelling;
12              
13             has config => (
14             isa => 'Treex::Tool::Parser::MSTperl::Config',
15             is => 'ro',
16             required => '1',
17             );
18              
19             has model => (
20             isa => 'Maybe[Treex::Tool::Parser::MSTperl::ModelLabelling]',
21             is => 'rw',
22             );
23              
24             sub BUILD {
25             my ($self) = @_;
26              
27             $self->model(
28             Treex::Tool::Parser::MSTperl::ModelLabelling->new(
29             config => $self->config,
30             )
31             );
32              
33             return;
34             }
35              
36             sub load_model {
37              
38             # (Str $filename)
39             my ( $self, $filename ) = @_;
40              
41             return $self->model->load($filename);
42             }
43              
44             sub load_model_tsv {
45              
46             # (Str $filename)
47             my ( $self, $filename ) = @_;
48              
49             return $self->model->load_tsv($filename);
50             }
51              
52             sub label_sentence {
53              
54             # (Treex::Tool::Parser::MSTperl::Sentence $sentence)
55             my ( $self, $sentence ) = @_;
56              
57             # parse sentence (does not modify $sentence)
58             my $sentence_labelled = $self->label_sentence_internal($sentence);
59              
60             return $sentence_labelled->toLabelsArray();
61             }
62              
63             sub label_sentence_internal {
64              
65             # (Treex::Tool::Parser::MSTperl::Sentence $sentence)
66             my ( $self, $sentence ) = @_;
67              
68             if ( !$self->model ) {
69             croak "MSTperl parser error: There is no model for labelling!";
70             }
71              
72             # copy the sentence (do not modify $sentence directly)
73             my $sentence_working_copy = $sentence->copy_nonlabelled();
74             $sentence_working_copy->fill_fields_before_labelling();
75              
76             if ( $self->config->DEBUG >= 2 ) {
77             print "Labelling sentence: "
78             . ( join ' ', map { $_->fields->[1] } @{ $sentence->nodes } )
79             . " \n";
80             }
81              
82             # take root's children, find best scoring labelling sequence, recursion
83             my $root = $sentence_working_copy->getNodeByOrd(0);
84             $self->label_subtree($root);
85              
86             return $sentence_working_copy;
87             }
88              
89             # assign labels to edges going from the $parent node to its children
90             # (and recurse on the children)
91             # directly modifies the sentence (or better: the nodes within the sentence)
92             sub label_subtree {
93              
94             # (Treex::Tool::Parser::MSTperl::Node $parent)
95             my ( $self, $parent ) = @_;
96              
97             my $ALGORITHM = $self->config->labeller_algorithm;
98              
99             if ( $self->config->DEBUG >= 2 ) {
100             print "Label subtree of node number " . $parent->ord . ' '
101             . $parent->fields->[1] . "\n";
102             }
103              
104             my @edges = @{ $parent->children };
105             if ( $self->config->DEBUG >= 3 ) {
106             print "There are " . scalar(@edges) . " children edges \n";
107             }
108              
109             if ( @edges == 0 ) {
110             return;
111             }
112              
113             # label the nodes using Viterbi algorithm
114             # (this is my own implementation of Viterbi, fitted for this task)
115              
116             # States structure: for each state (its key being a label)
117             # there is an array with the current best path to it as 'path' (append-only)
118             # and its current score 'score' (computed somehow,
119             # i.e. differently for different algorithms)
120             my $states = {};
121             my $starting_state_key = $self->config->SEQUENCE_BOUNDARY_LABEL;
122              
123             # correspond to algorithms
124             my @starting_scores = (
125              
126             # 0 1 2 3 4 5 6 7 8 9
127             1e300, 1, 1, 1e300, 1e300, 1e300, -1, -1, 0, 0,
128              
129             # 10 11 12 13 14 15 16 17 18 19 20 21
130             1e300, 1e300, 1e300, 1e300, 1e300, 1e300, 0, 0, 0, 1, 0, 0,
131             );
132              
133             # path could be constructed by backpointers
134             # but one would have to keep all the states the whole time
135             $states->{$starting_state_key} = {
136             'path' => [ $self->config->SEQUENCE_BOUNDARY_LABEL ],
137             'score' => $starting_scores[$ALGORITHM],
138             };
139              
140             # run Viterbi
141             # In each cycle generates %new_states and sets them as %states,
142             # so at the end it suffices to find the state with the best score in %states
143             # and use its path as the result.
144             my $prev_edge = undef;
145             foreach my $edge (@edges) {
146              
147             # only progress and/or debug info
148             if ( $self->config->DEBUG >= 3 ) {
149             print " Labelling edge to node "
150             . $edge->child->ord . ' ' . $edge->child->fields->[1] . "\n";
151             print " Currently there are "
152             . ( keys %$states ) . " states\n";
153             }
154              
155             # do one Viterbi step - assign possible labels to $edge
156             # (including appropriate scores of course)
157             $states = $self->label_edge( $edge, $states, $prev_edge );
158              
159             if ( $ALGORITHM == 20 ) {
160              
161             # set the best label
162             my $best_state_label = $self->find_best_state_label($states);
163             $edge->child->label($best_state_label);
164             }
165              
166             $prev_edge = $edge;
167             }
168              
169             # TODO: foreach last state multiply its score
170             # by the label->sequence_boundary probability
171              
172             if ( $ALGORITHM != 20 ) {
173              
174             # End - find the state with the best score - this is the result
175             my $best_state_label = $self->find_best_state_label($states);
176              
177             if ($best_state_label) {
178              
179             my @labels = @{ $states->{$best_state_label}->{'path'} };
180              
181             # get rid of SEQUENCE_BOUNDARY_LABEL
182             shift @labels;
183              
184             # only progress and/or debug info
185             if ( $self->config->DEBUG >= 2 ) {
186             print "best state $best_state_label score: " . "\n";
187             print "best path: "
188             . ( join ' ', @labels )
189             . "\n";
190             }
191              
192             foreach my $edge (@edges) {
193             my $label = shift @labels;
194             $edge->child->label($label)
195             }
196              
197             } else {
198              
199             # TODO do not die, provide some backoff instead
200             # (do some smoothing, at least when no states are generated)
201             print "No best state generated, cannot label the sentence!"
202             . " (This is weird.)\n";
203             }
204             } # else: edges are labelled in each step
205              
206             # end of Viterbi
207              
208             # recursion
209             foreach my $edge (@edges) {
210             $self->label_subtree( $edge->child );
211             }
212              
213             return;
214             }
215              
216             sub find_best_state_label {
217              
218             my ( $self, $states ) = @_;
219              
220             # "negative infinity" (works both with real probs and with their logs)
221             my $best_state_score = -999999999;
222             my $best_state_label = undef;
223              
224             foreach my $state_label ( keys %$states ) {
225             if ( $self->config->DEBUG >= 4 ) {
226             print "state $state_label score: "
227             . $states->{$state_label}->{'score'} . "\n";
228             }
229             if ( $states->{$state_label}->{'score'} > $best_state_score ) {
230             $best_state_label = $state_label;
231             $best_state_score = $states->{$state_label}->{'score'};
232             }
233             }
234              
235             # only progress and/or debug info
236             if ( $self->config->DEBUG >= 2 ) {
237             print "best state $best_state_label score: "
238             . $best_state_score . "\n";
239             }
240              
241             return $best_state_label;
242             }
243              
244             # used as an internal part of label_subtree
245             # to get all probable labels for an edge
246             # i.e. make one step of the Viterbi algorithm
247             sub label_edge {
248              
249             my ( $self, $edge, $states, $prev_edge ) = @_;
250              
251             my $ALGORITHM = $self->config->labeller_algorithm;
252             my $new_states = {};
253             foreach my $last_state ( keys %$states ) {
254              
255             if ( $ALGORITHM == 21 && defined $prev_edge ) {
256              
257             # set last label
258             my $best_prev_state_label = $self->find_best_state_label($states);
259             $prev_edge->child->label($best_prev_state_label);
260             }
261              
262             # only progress and/or debug info
263             if ( $self->config->DEBUG >= 4 ) {
264             print " Processing state $last_state (score "
265             . $states->{$last_state}->{'score'} . ")\n";
266             }
267              
268             # compute the possible labels scores (typically probabilities),
269             # i.e. products of emission and transition probs
270             # possible_labels{label} = score
271             my %possible_labels = %{
272             $self->get_possible_labels(
273             $edge,
274             $last_state,
275             $states->{$last_state}->{'score'},
276             )
277             };
278              
279             # only progress and/or debug info
280             if ( $self->config->DEBUG >= 4 ) {
281             print " " . scalar( keys %possible_labels )
282             . " possible labels are "
283             . ( join ' ', keys %possible_labels )
284             . "\n";
285             }
286              
287             foreach my $new_label ( keys %possible_labels ) {
288             my $new_state_score = $possible_labels{$new_label};
289              
290             # only progress and/or debug info
291             if ( $self->config->DEBUG >= 5 ) {
292             print " Trying label $new_label, "
293             . "score $new_state_score\n";
294             print " Old state path "
295             . ( join ' ', @{ $states->{$last_state}->{'path'} } )
296             . " \n";
297             print " Old states: "
298             . (
299             join ' ',
300             map {
301             "$_ (" . (
302             join ' ', @{ $states->{$_}->{'path'} }
303             ) . ")"
304             } keys %$states
305             )
306             . " \n";
307             print " New states: "
308             . (
309             join ' ',
310             map {
311             "$_ (" . (
312             join ' ', @{ $new_states->{$_}->{'path'} }
313             ) . ")"
314             } keys %$new_states
315             )
316             . " \n";
317             }
318              
319             # test if this is the best
320             if (defined $new_states->{$new_label}
321             && $new_states->{$new_label}->{'score'} > $new_state_score
322             )
323             {
324              
325             # there is already the same state state
326             # with higher score
327             next;
328             }
329              
330             # else such a state is not yet there resp. it is but its score
331             # is lower than $new_state_score -> set it (resp. replace it)
332              
333             my @new_state_path = @{ $states->{$last_state}->{'path'} };
334             push @new_state_path, $new_label;
335             $new_states->{$new_label} = {
336             'path' => \@new_state_path,
337             'score' => $new_state_score,
338             };
339              
340             # only progress and/or debug info
341             if ( $self->config->DEBUG >= 5 ) {
342             print " New state path "
343             . ( join ' ', @new_state_path )
344             . " \n";
345             print " Old states: "
346             . (
347             join ' ',
348             map {
349             "$_ (" . (
350             join ' ', @{ $states->{$_}->{'path'} }
351             ) . ")"
352             } keys %$states
353             )
354             . " \n";
355             print " New states: "
356             . (
357             join ' ',
358             map {
359             "$_ (" . (
360             join ' ', @{ $new_states->{$_}->{'path'} }
361             ) . ")"
362             } keys %$new_states
363             )
364             . " \n";
365             }
366             } # foreach $new_label
367              
368             # only progress and/or debug info
369             if ( $self->config->DEBUG >= 4 ) {
370             print " Now there are "
371             . ( keys %$new_states ) . " new states\n";
372             }
373             } # foreach $last_state
374              
375             $new_states = $self->prune($new_states);
376              
377             return $new_states;
378             }
379              
380             # prune the states (keep only some of the best)
381             sub prune {
382              
383             my ( $self, $states ) = @_;
384              
385             # pruning
386              
387             # pruning type 1 commented out because emission scores are not probs
388             # (they do not sum up to 1)
389             #
390             # pruning: keep as many best states so that their normed prob
391             # sums up to at least $VITERBI_STATES_PROB_SUM_THRESHOLD
392             # %states = ();
393             # my @best_states = sort {
394             # $new_states{$b}->{'score'}
395             # <=> $new_states{$a}->{'score'}
396             # } keys %new_states;
397             # my $prob_sum = 0;
398             # foreach my $state (@best_states) {
399             # $prob_sum += $new_states{$state}->{'score'};
400             # }
401             # my $threshold = $prob_sum
402             # * $VITERBI_STATES_PROB_SUM_THRESHOLD;
403             # my $best_prob_sum = 0;
404             # while ($best_prob_sum < $threshold) {
405             # my $state = shift @best_states;
406             # $states{$state} = $new_states{$state};
407             # $best_prob_sum += $new_states{$state}->{'score'};
408             # }
409              
410             # states going form pruning phase 1 to pruning phase 2
411             # %new_states = %states;
412              
413             # simple pruning: keep n best states
414             my $new_states = {};
415             my @best_states
416             = sort {
417             $states->{$b}->{'score'} <=> $states->{$a}->{'score'}
418             } keys %$states;
419             for (
420             my $i = 0;
421             @best_states && $i < $self->config->VITERBI_STATES_NUM_THRESHOLD;
422             $i++
423             )
424             {
425             my $state = shift @best_states;
426             $new_states->{$state} = $states->{$state};
427              
428             # only progress and/or debug info
429             if ( $self->config->DEBUG >= 5 ) {
430             print " Pruning let thrgough the state $state"
431             . "\n";
432             }
433             }
434              
435             # only progress and/or debug info
436             if ( $self->config->DEBUG >= 4 ) {
437             print " After pruning there are "
438             . ( keys %$new_states ) . " states\n";
439             }
440              
441             return $new_states;
442             }
443              
444             # computes possible labels for an edge, using info about
445             # the emission scores, transition scores and last state's score
446             sub get_possible_labels {
447             my ( $self, $edge, $previous_label, $previous_label_score ) = @_;
448              
449             my $ALGORITHM = $self->config->labeller_algorithm;
450              
451             if ($ALGORITHM == 8
452             || $ALGORITHM == 9
453             || $ALGORITHM == 16
454             || $ALGORITHM == 17
455             || $ALGORITHM == 18
456             || $ALGORITHM == 19
457             || $ALGORITHM >= 20
458             )
459             {
460              
461             my $result = {};
462             my $all_labels = $self->model->get_all_labels();
463              
464             # foreach possible label (actually foreach existing label)
465             foreach my $label ( @{$all_labels} ) {
466              
467             # score = previous score + new score
468              
469             if ( $ALGORITHM == 20 ) {
470             if ( $self->config->DEBUG >= 4 ) {
471             print " Score for label $label: "
472             . (
473             $self->model->get_label_score(
474             $label, $previous_label, $edge->features_all_labeller()
475             )
476             ) . "\n";
477             }
478             $result->{$label} =
479             $self->model->get_label_score(
480             $label, $previous_label, $edge->features_all_labeller()
481             )
482             ;
483             } elsif ( $ALGORITHM == 19 ) {
484             if ( $self->config->DEBUG >= 4 ) {
485             print " Score for label $label: $previous_label_score + "
486             . (
487             $self->model->get_label_score(
488             $label, $previous_label, $edge->features_all_labeller()
489             )
490             ) . "\n";
491             }
492             $result->{$label} =
493             $previous_label_score
494             * $self->model->get_label_score(
495             $label, $previous_label, $edge->features
496             )
497             ;
498             } elsif ( $ALGORITHM == 21 ) {
499             if ( $self->config->DEBUG >= 4 ) {
500             print " Score for label $label: $previous_label_score + "
501             . (
502             $self->model->get_label_score(
503             $label, $previous_label, $edge->features_all_labeller()
504             )
505             ) . "\n";
506             }
507             $result->{$label} =
508             $previous_label_score
509             + $self->model->get_label_score(
510             $label, $previous_label, $edge->features_all_labeller()
511             )
512             ;
513             } elsif ( $ALGORITHM == 18 ) {
514             $result->{$label} =
515             $self->model->get_label_score(
516             $label, $previous_label, $edge->features
517             )
518             ;
519             } else {
520             if ( $self->config->DEBUG >= 4 ) {
521             print " Score for label $label: $previous_label_score + "
522             . (
523             $self->model->get_label_score(
524             $label, $previous_label, $edge->features
525             )
526             ) . "\n";
527             }
528             $result->{$label} =
529             $previous_label_score
530             + $self->model->get_label_score(
531             $label, $previous_label, $edge->features
532             )
533             ;
534             }
535              
536             } # end foreach $label
537              
538             return $result;
539              
540             } else { # $ALGORITHM not in 8, 9, >16
541              
542             my $emission_scores =
543             $self->model->get_emission_scores( $edge->features );
544              
545             my $transition_scores = {};
546             foreach my $possible_label ( keys %$emission_scores ) {
547             $transition_scores->{$possible_label} =
548             $self->model->get_transition_score(
549             $possible_label, $previous_label
550             );
551             }
552              
553             my $possible_labels = $self->get_possible_labels_internal(
554             $emission_scores,
555             $transition_scores,
556             $previous_label_score,
557             );
558              
559             if ( scalar( keys %$possible_labels ) > 0 ) {
560             return $possible_labels;
561             } else {
562              
563             # no possible states generated -> backoff
564             my %unigrams_copy = %{ $self->model->unigrams };
565             my $blind_scores = \%unigrams_copy;
566              
567             # TODO: smoothing (now a very stupid way is used
568             # just to lower the scores below the unblind scores)
569             foreach my $label ( keys %$blind_scores ) {
570             $blind_scores->{$label} *= 0.01;
571             }
572              
573             # fall back to unigram distribution for transitions
574             # TODO: smoothing of bigram, unigram and uniform distros
575             # for transitions
576             # (this fallback should then become obsolete)
577             $possible_labels = $self->get_possible_labels_internal(
578             $emission_scores,
579             $blind_scores,
580             $previous_label_score,
581             );
582              
583             if ( scalar( keys %$possible_labels ) > 0 ) {
584             return $possible_labels;
585             } else {
586             warn "Based on the training data, no possible label was found"
587             . " for an edge. This usually means that either"
588             . " your training data are not big enough or that"
589             . " the set of features you are using"
590             . " is not well constructed - either it is too small"
591             . " or it lacks features that would be general enough"
592             . " to cover all possible sentences."
593             . " Using blind emission probabilities instead.\n"
594             . " get_possible_labels ($edge, $previous_label,"
595             . " $previous_label_score ) \n"
596             . $edge->sentence->id
597             . ': '
598             . $edge->parent->fields->[1]
599             . " -> "
600             . $edge->child->fields->[1]
601             . "\n";
602              
603             # TODO: these are more or less probabilities, which might
604             # be unappropriate for some of the
605             # algorithms -> recompute somehow;
606             # also they do contain the SEQUENCE_BOUNDARY_LABEL prob
607              
608             $possible_labels = $self->get_possible_labels_internal(
609             $blind_scores,
610             $blind_scores,
611             $previous_label_score,
612             );
613              
614             if ( scalar( keys %$possible_labels ) > 0 ) {
615             return $possible_labels;
616             } else {
617             warn "no possible labels generated, no fallback helped:"
618             . " probably there is a bug in the code!"
619             . " get_possible_labels ($edge, $previous_label,"
620             . " $previous_label_score ) "
621             . $edge->parent->fields->[1]
622             . " -> "
623             . $edge->child->fields->[1]
624             . "\n";
625             return {};
626             } # end no backoff helped
627             } # end backoff by unigrams for emission and transition scores
628             } # end backoff by unigrams for transition scores
629             } # end $ALGORITHM not in 8,9
630             }
631              
632             sub get_possible_labels_internal {
633             my ( $self, $emission_scores, $transition_scores, $last_state_score ) = @_;
634              
635             # $emission_scores are often not really probs but scores
636              
637             my $ALGORITHM = $self->config->labeller_algorithm;
638              
639             if ($ALGORITHM == 8
640             || $ALGORITHM == 9
641             || $ALGORITHM == 16
642             || $ALGORITHM == 17
643             || $ALGORITHM == 18
644             || $ALGORITHM == 19
645             )
646             {
647              
648             # these algorithms have such a simple way of computing possible labels
649             # they they do not need to have it split into two subroutines
650             croak "Labeller->get_possible_labels_internal not implemented"
651             . " for algorithm no $ALGORITHM!";
652             }
653              
654             my %possible_labels;
655             foreach my $possible_label ( keys %$emission_scores ) {
656             my $emission_score = $emission_scores->{$possible_label};
657             my $transition_score = $transition_scores->{$possible_label};
658             if ( !defined $transition_score ) {
659             next;
660             }
661              
662             my $possible_state_score;
663             if ( $ALGORITHM == 2 ) {
664             $possible_state_score
665             = $last_state_score + $emission_score * $transition_score;
666             } else {
667             $possible_state_score
668             = $last_state_score * $emission_score * $transition_score;
669             }
670              
671             # if no state like that yet or better than current max,
672             # use this new state
673             if ($possible_state_score > 0
674             && (!$possible_labels{$possible_label}
675             || $possible_labels{$possible_label}->{'score'}
676             < $possible_state_score
677             )
678             )
679             {
680             $possible_labels{$possible_label} = $possible_state_score;
681             }
682              
683             # else we already have a state with the same key but higher score
684             } # end foreach $possible_label
685              
686             return \%possible_labels;
687             }
688              
689             1;
690              
691             __END__
692              
693              
694             =pod
695              
696             =for Pod::Coverage BUILD
697              
698             =encoding utf-8
699              
700             =head1 NAME
701              
702              
703             =head1 VERSION
704              
705             version 0.11949
706             Treex::Tool::Parser::MSTperl::Labeller - pure Perl implementation
707             of a dependency tree labeller for the MST parser
708              
709             =head1 DESCRIPTION
710              
711             This is a Perl implementation of the labeller for MST Parser
712             which is (most probably) described in
713             McDonald, Ryan: Discriminative Learning And Spanning Tree Algorithms
714             For Dependency Parsing, 2006 (chapter 3.3.3 Two-Stage Labelling).
715              
716             For a dependency parse tree - presumably, but not necessarily, obtained using
717             the MST parser (L<Treex::Tool::Parser::MSTperl::Parser>), possibly
718             non-projective - assigns the most probable labels to the edges of the tree,
719             using the given model (L<Treex::Tool::Parser::MSTperl::ModelLabelling>).
720              
721             Assigning labels is implemented as sequence labelling, where the sequence to
722             be labelled is each sequence of edges between a parent node and its children.
723             First-order Markov factorization is used, but because we do the labelling as a
724             separate second stage to the parsing, many non-local features can be used,
725             exploiting the knowledge of the structure of the whole tree; also we go from
726             to root downwards and are therefore able to use the knowledge of labels
727             assigned to ancestor edges (especially the edge leading to the common parent
728             node).
729              
730             Each label is technically stored with the child node of the edge it belongs to,
731             so sometimes I will talk about a node label, which actually means the label
732             of the edge between the node and its parent.
733              
734             A variant of the Viterbi algorithm is used to find the best scoring sequence
735             of labels. However, instead of probabilities we use (real-valued) scores and
736             therefore instead of multiplication addition is used in Viterbi.
737              
738             For detail on the features and training see
739             L<Treex::Tool::Parser::MSTperl::TrainerLabelling>.
740              
741              
742             I have used several sources of information to implement it, especially:
743              
744             Kevin Gimpel and Shay Cohen:
745             Discriminative Online Algorithms for Sequence Labeling - A Comparative Study
746             (2007)
747              
748             And also parts of these:
749              
750             Jun’ichi Kazama and Kentaro Torisawa:
751             A New Perceptron Algorithm for Sequence Labeling with Non-local Features
752             (2007)
753              
754             Wenliang Chen, Yujie Zhang, Hitoshi Isahara:
755             A Two-stage Parser for Multilingual Dependency Parsing
756             (2007)
757              
758             Binyamin Rozenfeld, Ronen Feldman, Moshe Fresko:
759             A Systematic Cross-Comparison of Sequence Classifiers
760             (2004?)
761              
762             =head1 FIELDS
763              
764             =over 4
765              
766             =item config
767              
768             Instance of L<Treex::Tool::Parser::MSTperl::Config> containing settings to be
769             used with labeller.
770              
771             Currently the settings most relevant to the Labeller are the following:
772              
773             =over 8
774              
775             =item VITERBI_STATES_NUM_THRESHOLD
776              
777             See L<Treex::Tool::Parser::MSTperl::Config/VITERBI_STATES_NUM_THRESHOLD>.
778              
779             =item labeller_algorithm
780              
781             See L<Treex::Tool::Parser::MSTperl::Config/labeller_algorithm>.
782              
783             =item labelledFeaturesControl
784              
785             See L<Treex::Tool::Parser::MSTperl::Config/labelledFeaturesControl>.
786              
787             =item SEQUENCE_BOUNDARY_LABEL
788              
789             See L<Treex::Tool::Parser::MSTperl::Config/SEQUENCE_BOUNDARY_LABEL>.
790              
791             =back
792              
793             =item model
794              
795             An instance of L<Treex::Tool::Parser::MSTperl::ModelLabelling>
796             used to label the trees. It can be changed if needed (it is usually needed
797             when training the labeller).
798              
799             =back
800              
801             =head1 METHODS
802              
803             =over 4
804              
805             =item my $labeller = Treex::Tool::Parser::MSTperl::Labeller->new(
806             config => $self->config);
807              
808             Creates an instance of the labeller using the given configuration, also
809             initializing the model.
810              
811             =item $labeller->load_model('modelfile.lmodel');
812              
813             =item $labeller->load_model_tsv('modelfile.tsv');
814              
815             Loads a labelled model
816             using L<Treex::Tool::Parser::MSTperl::ModelBase/load>
817             or L<Treex::Tool::Parser::MSTperl::ModelBase/load_tsv>.
818              
819             A model has to be loaded or created in an other way
820             before sentences can be labelled.
821              
822             =item $labeller->label_sentence($sentence);
823              
824             Labels a sentence (instance of L<Treex::Tool::Parser::MSTperl::Sentence>). It
825             finds the best labels for the sentence and returns them as an array reference.
826              
827             The given sentence is left intact and any labelling information already
828             contained in the sentence is disregarded.
829              
830             =item $labeller->label_sentence_internal($sentence);
831              
832             Does the actual labelling, returning a labelled instance of
833             L<Treex::Tool::Parser::MSTperl::Sentence>. The C<label_sentence> sub is
834             actually only a wrapper for this method which extracts the labels of the
835             nodes and returns these. If you are only interested in getting the labels,
836             use C<label_sentence>, but if it is handy for you to get a whole instance of
837             L<Treex::Tool::Parser::MSTperl::Sentence> (which is labelled), use
838             C<label_sentence_internal>. The subroutines are otherwise equivalent.
839              
840             The given sentence is left intact and any labelling information already
841             contained in the sentence is disregarded. All other information from the
842             given sentence is copied to the returned sentence, the sentence only differ
843             in the labels assigned.
844              
845             =item $labeller->label_subtree($parent);
846              
847             Assign labels to edges going from the $parent node to its children
848             (and recurse on the children).
849             Directly modifies the sentence
850             (or more precisely: the nodes within the sentence).
851              
852             =item $labeller->label_edge($edge, $states);
853              
854             Used as an internal part of label_subtree
855             to get all probable labels for an edge $edge
856             i.e. make one step of the Viterbi algorithm.
857             The $states is a hash reference of all currently active Viterbi states.
858              
859             =item $labeller->prune($states);
860              
861             Prune the states (passed as a hash ref), currently does n-best pruning
862             with n = C<config->VITERBI_STATES_NUM_THRESHOLD>, keeping always n best scoring
863             states at maximum (all states if there are less than n). Is called after
864             each Viterbi step, i.e. at the end of each call of C<label_edge>.
865             Returns the new pruned states (does not modify its input).
866              
867             =item $labeller->get_possible_labels($edge, $previous_label,
868             $previous_label_score);
869              
870             Computes possible labels for an edge, using info about the emission scores,
871             transition scores and last state's score. (Because of the first order Markov
872             factorization used, the states correspond to labels assigned to the edges they
873             corresond to.)
874              
875             =item $labeller->get_possible_labels_internal
876             ($emission_scores, $transition_scores, $last_state_score);
877              
878             Does the actual generation of possible labels for an edge, where the emission
879             and transition scores are already computed for this particular edge.
880              
881             =back
882              
883             =head1 AUTHORS
884              
885             Rudolf Rosa <rosa@ufal.mff.cuni.cz>
886              
887             =head1 COPYRIGHT AND LICENSE
888              
889             Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles
890             University in Prague
891              
892             This module is free software; you can redistribute it and/or modify it under
893             the same terms as Perl itself.