File Coverage

blib/lib/Treex/Tool/Parser/MSTperl/Parser.pm
Criterion Covered Total %
statement 1 3 33.3
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 2 4 50.0


line stmt bran cond sub pod time code
1             package Treex::Tool::Parser::MSTperl::Parser;
2             {
3             $Treex::Tool::Parser::MSTperl::Parser::VERSION = '0.11949';
4             }
5              
6 1     1   2335 use Moose;
  0            
  0            
7             use Carp;
8              
9             use Treex::Tool::Parser::MSTperl::Sentence;
10             use Treex::Tool::Parser::MSTperl::Edge;
11             use Treex::Tool::Parser::MSTperl::ModelUnlabelled;
12              
13             use Graph 0.94;
14             use Graph::ChuLiuEdmonds 0.05; #returns MINIMUM spanning tree
15              
16             has config => (
17             isa => 'Treex::Tool::Parser::MSTperl::Config',
18             is => 'ro',
19             required => '1',
20             );
21              
22             has model => (
23             isa => 'Maybe[Treex::Tool::Parser::MSTperl::ModelUnlabelled]',
24             is => 'rw',
25             );
26              
27             sub BUILD {
28             my ($self) = @_;
29              
30             $self->model(
31             Treex::Tool::Parser::MSTperl::ModelUnlabelled->new(
32             config => $self->config,
33             )
34             );
35              
36             return;
37             }
38              
39             sub load_model {
40              
41             # (Str $filename)
42             my ( $self, $filename ) = @_;
43              
44             return $self->model->load($filename);
45             }
46              
47             sub parse_sentence {
48              
49             # (Treex::Tool::Parser::MSTperl::Sentence $sentence)
50             my ( $self, $sentence ) = @_;
51              
52             # parse sentence (does not modify $sentence)
53             my $sentence_parsed = $self->parse_sentence_internal($sentence);
54              
55             return $sentence_parsed->toParentOrdsArray();
56             }
57              
58             sub parse_sentence_internal {
59              
60             # (Treex::Tool::Parser::MSTperl::Sentence $sentence)
61             my ( $self, $sentence ) = @_;
62              
63             if ( !$self->model ) {
64             croak "MSTperl parser error: There is no model for unlabelled parsing!";
65             }
66              
67             # copy the sentence (do not modify $sentence directly)
68             my $sentence_working_copy = $sentence->copy_nonparsed();
69             my $sentence_length = $sentence_working_copy->len();
70              
71             my $graph = Graph->new(
72             vertices => [ ( 0 .. $sentence_length ) ]
73             );
74             my @weighted_edges;
75             if ( $self->config->DEBUG >= 2 ) { print "EDGES (parent -> child):\n"; }
76             foreach my $child ( @{ $sentence_working_copy->nodes } ) {
77             foreach my $parent ( @{ $sentence_working_copy->nodes_with_root } ) {
78             if ( $child == $parent ) {
79             next;
80             }
81              
82             my $edge = Treex::Tool::Parser::MSTperl::Edge->new(
83             child => $child,
84             parent => $parent,
85             sentence => $sentence_working_copy
86             );
87              
88             my $features = $self->config->unlabelledFeaturesControl
89             ->get_all_features($edge);
90             my $score = $self->model->score_features($features);
91              
92             # only progress and/or debug info
93             if ( $self->config->DEBUG >= 2 ) {
94             print $parent->ord . ' ' . $parent->fields->[1] .
95             ' -> ' . $child->ord . ' ' . $child->fields->[1] .
96             ' score: ' . $score . "\n";
97             print $parent->ord .
98             ' -> ' . $child->ord .
99             ' score: ' . $score . "\n";
100             foreach my $feature ( @{$features} ) {
101             print $feature . ", ";
102             }
103             print "\n";
104             print "\n";
105             }
106              
107             # MaxST needed but MinST is computed
108             # -> need to normalize score as -$score
109             push @weighted_edges, ( $parent->ord, $child->ord, -$score );
110             }
111             }
112              
113             # only progress and/or debug info
114             if ( $self->config->DEBUG >= 2 ) {
115             print "GRAPH:\n";
116             print join " ", @weighted_edges;
117             print "\n";
118             }
119              
120             $graph->add_weighted_edges(@weighted_edges);
121              
122             my $msts = $graph->MST_ChuLiuEdmonds($graph);
123              
124             if ( $self->config->DEBUG >= 2 ) { print "RESULTS (parent -> child):\n"; }
125              
126             #results
127             foreach my $edge ( $msts->edges ) {
128             my ( $parent, $child ) = @$edge;
129             $sentence_working_copy->setChildParent( $child, $parent );
130              
131             if ( $self->config->DEBUG >= 2 ) {
132             print "$parent -> $child\n";
133             }
134             }
135              
136             return $sentence_working_copy;
137             }
138              
139             1;
140              
141             __END__
142              
143              
144             =pod
145              
146             =for Pod::Coverage BUILD
147              
148             =encoding utf-8
149              
150             =head1 NAME
151              
152             Treex::Tool::Parser::MSTperl::Parser - pure Perl implementation of MST parser
153              
154             =head1 VERSION
155              
156             version 0.11949
157              
158             =head1 DESCRIPTION
159              
160             This is a Perl implementation of the MST Parser described in
161             McDonald et al.:
162             Non-projective Dependency Parsing using Spanning Tree Algorithms
163             2005
164             in Proc. HLT/EMNLP.
165              
166             =head1 METHODS
167              
168             =over 4
169              
170             =item $parser->load_model('modelfile.model');
171              
172             Loads an unlabelled and/or a labelled model (= sets feature weights)
173             using L<Treex::Tool::Parser::MSTperl::ModelBase/load>.
174              
175             A model has to be loaded before sentences can be parsed.
176              
177             =item $parser->parse_sentence($sentence);
178              
179             Parses a sentence (instance of L<Treex::Tool::Parser::MSTperl::Sentence>). It
180             sets the C<parent> field of each node (instance of
181             L<Treex::Tool::Parser::MSTperl::Node>), i.e. a word in the sentence, and also
182             returns these parents as an array reference.
183              
184             Any parse information already contained in the sentence gets discarded
185             (explicitely, by calling
186             L<Treex::Tool::Parser::MSTperl::Sentence/copy_nonparsed>).
187              
188             =item $parser->parse_sentence_internal($sentence);
189              
190             Does the actual parsing, returning a parsed instance of
191             L<Treex::Tool::Parser::MSTperl::Sentence>. The C<parse_sentence> sub is
192             actually only a wrapper for this method which extracts the parents of the
193             nodes and returns these.
194              
195             =back
196              
197             =head1 AUTHORS
198              
199             Rudolf Rosa <rosa@ufal.mff.cuni.cz>
200              
201             ZdenÄ›k Žabokrtský <zabokrtsky@ufal.mff.cuni.cz>
202              
203             =head1 COPYRIGHT AND LICENSE
204              
205             Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles
206             University in Prague
207              
208             This module is free software; you can redistribute it and/or modify it under
209             the same terms as Perl itself.