File Coverage

blib/lib/Treex/Core/Node/P.pm
Criterion Covered Total %
statement 92 138 66.6
branch 18 40 45.0
condition 12 21 57.1
subroutine 11 15 73.3
pod 9 9 100.0
total 142 223 63.6


line stmt bran cond sub pod time code
1             package Treex::Core::Node::P;
2             $Treex::Core::Node::P::VERSION = '2.20210102';
3 24     24   342 use namespace::autoclean;
  24         78  
  24         225  
4              
5 24     24   1959 use Moose;
  24         62  
  24         170  
6 24     24   165032 use Treex::Core::Common;
  24         63  
  24         222  
7             extends 'Treex::Core::Node';
8              
9             # dirty: merging terminal and nonterminal nodes' attributes
10              
11             # common:
12             has [qw(is_head index coindex edgelabel)] => ( is => 'rw' );
13              
14             # terminal specific
15             has [qw(form lemma tag)] => ( is => 'rw' );
16              
17             # non-terminal specific
18             has [qw( phrase functions )] => ( is => 'rw' );
19              
20             sub get_pml_type_name {
21 136     136 1 253 my ($self) = @_;
22              
23 136 100 100     381 if ( $self->is_root() or $self->phrase or ($self->{'#name'} || '') eq 'nonterminal') {
    50 50        
      100        
      50        
      33        
24 100         521 return 'p-nonterminal.type';
25             }
26             elsif ( $self->tag or ($self->{'#name'} || '') eq 'terminal') {
27 36         131 return 'p-terminal.type';
28             }
29             else {
30 0         0 return;
31             }
32             }
33              
34             sub is_terminal {
35 61     61 1 104 my $self = shift @_;
36 61 50       143 return $self->get_pml_type_name eq 'p-terminal.type' ? 1 : 0;
37             }
38              
39             sub create_child {
40 0     0 1 0 my $self = shift;
41 0         0 log_warn 'With Treex::Core::Node::P you should you either create_terminal_child() or create_nonterminal_child() instead of create_child()';
42 0         0 return $self->SUPER::create_child(@_);
43             }
44              
45             sub create_nonterminal_child {
46 25     25 1 1368 my $self = shift @_;
47 25 50       64 log_warn 'Adding a child to a terminal p-node ' . $self->id if $self->is_terminal();
48 25         45 my $arg_ref;
49 25 100 66     122 if ( scalar @_ == 1 && ref $_[0] eq 'HASH' ) {
    50          
50 1         2 $arg_ref = $_[0];
51             }
52             elsif ( @_ % 2 ) {
53 0         0 log_fatal "Odd number of elements for create_nonterminal_child";
54             }
55             else {
56 24         50 $arg_ref = {@_};
57             }
58 25         64 $arg_ref->{'#name'} = 'nonterminal';
59 25         78 my $fs_file = $self->get_bundle->get_document()->_pmldoc;
60 25         103 my $child = $self->SUPER::create_child($arg_ref);
61 25         74 $child->set_type_by_name( $fs_file->metaData('schema'), 'p-nonterminal.type' );
62 25         912 return $child;
63             }
64              
65             sub create_terminal_child {
66 36     36 1 107 my $self = shift @_;
67 36 50       80 log_warn 'Adding a child to a terminal p-node ' . $self->id if $self->is_terminal();
68 36         65 my $arg_ref;
69 36 50 33     139 if ( scalar @_ == 1 && ref $_[0] eq 'HASH' ) {
    50          
70 0         0 $arg_ref = $_[0];
71             }
72             elsif ( @_ % 2 ) {
73 0         0 log_fatal "Odd number of elements for create_terminal_child";
74             }
75             else {
76 36         72 $arg_ref = {@_};
77             }
78 36         91 $arg_ref->{'#name'} = 'terminal';
79 36         110 my $fs_file = $self->get_bundle->get_document()->_pmldoc;
80 36         133 my $child = $self->SUPER::create_child($arg_ref);
81 36         123 $child->set_type_by_name( $fs_file->metaData('schema'), 'p-terminal.type' );
82 36         1345 return $child;
83             }
84              
85             sub create_from_mrg {
86 3     3 1 28 my ( $self, $mrg_string ) = @_;
87              
88             # normalize spaces
89 3         109 $mrg_string =~ s/([()])/ $1 /g;
90 3         84 $mrg_string =~ s/\s+/ /g;
91 3         15 $mrg_string =~ s/^ //g;
92 3         16 $mrg_string =~ s/ $//g;
93              
94             # remove back brackets (except for round)
95 3         12 $mrg_string =~ s/-LSB-/\[/g;
96 3         10 $mrg_string =~ s/-RSB-/\]/g;
97 3         9 $mrg_string =~ s/-LCB-/\{/g;
98 3         8 $mrg_string =~ s/-RCB-/\}/g;
99              
100             # mrg string should always start with "( ROOT (" as in the Stanford parser.
101             # The ROOT non-terminal has usually just one child "S",
102             # but sometimes (in parsing or in Brown treebank) it has more children,
103             # so the artificial root non-terminal "ROOT" makes sure it is formally a tree (not a forest).
104             # Charniak parser output starts with "( S1 (".
105             # PennTB trees start with "( (".
106 3         17 $mrg_string =~ s/^\( (S1 )?\(/( ROOT (/;
107            
108 3         64 my @tokens = split / /, $mrg_string;
109              
110 3         18 $self->_parse_mrg_nonterminal( \@tokens );
111            
112             # If there is just one child, we can remove the extra ROOT non-terminal.
113 3 100       22 if ($self->get_children == 1){
114 2         11 my ($child) = $self->get_children();
115 2         8 foreach my $grandchild ($child->get_children) {
116 6         19 $grandchild->set_parent($self);
117             }
118 2         63 $self->set_phrase($child->phrase);
119 2         59 $self->set_functions($child->functions);
120 2         20 $child->remove();
121             }
122              
123 3         15 return;
124             }
125              
126             sub _reduce {
127 106     106   196 my ( $self, $tokens_rf, $expected_token ) = @_;
128 106 50       239 if ( $tokens_rf->[0] eq $expected_token ) {
129 106         177 return shift @{$tokens_rf};
  106         205  
130             }
131             else {
132 0         0 log_fatal "Unparsable mrg remainder: '$expected_token' is not at the beginning of: "
133             . join( " ", @$tokens_rf );
134             }
135             }
136              
137             sub _parse_mrg_nonterminal {
138 23     23   51 my ( $self, $tokens_rf ) = @_;
139 23         76 $self->_reduce( $tokens_rf, "(" );
140              
141             # phrase type and (optionally) a list of grammatical functions
142 23         52 my $label = shift @{$tokens_rf};
  23         48  
143 23         74 my @label_components = split /-/, $label;
144 23         865 $self->set_phrase( shift @label_components );
145              
146             # TODO: handle traces correctly
147             # Delete trace indices (e.g. NP-SBJ-10 ... -NONE- *T*-10)
148             # Delete suffixes in Brown data, e.g. :"SBJ=1" -> "SBJ", "LOC=2" -> "LOC"
149 23         64 @label_components = grep { !/^\d+$/ } @label_components;
  7         36  
150 23         55 foreach my $comp (@label_components) {
151 7         24 $comp =~ s/=\d+$//;
152             }
153              
154 23 100       67 if (@label_components) {
155 7         232 $self->set_functions( \@label_components );
156             }
157              
158 23         71 while ( $tokens_rf->[0] eq "(" ) {
159 50 100       123 if ( $tokens_rf->[2] eq "(" ) {
160 20         62 my $new_nonterminal = $self->create_nonterminal_child();
161 20         99 $new_nonterminal->_parse_mrg_nonterminal($tokens_rf);
162             }
163             else {
164 30         86 my $new_terminal_child = $self->create_terminal_child();
165 30         106 $new_terminal_child->_parse_mrg_terminal($tokens_rf);
166             }
167             }
168              
169 23         74 $self->_reduce( $tokens_rf, ")" );
170 23         71 return;
171             }
172              
173             sub _parse_mrg_terminal {
174 30     30   70 my ( $self, $tokens_rf ) = @_;
175 30         102 $self->_reduce( $tokens_rf, "(" );
176              
177 30         48 my $tag = shift @{$tokens_rf};
  30         60  
178 30         51 my $form = shift @{$tokens_rf};
  30         55  
179 30         65 $form =~ s/-LRB-/\(/g;
180 30         54 $form =~ s/-RRB-/\)/g;
181 30         1019 $self->set_form($form);
182 30         877 $self->set_tag($tag);
183              
184 30         86 $self->_reduce( $tokens_rf, ")" );
185 30         114 return;
186             }
187              
188             sub stringify_as_mrg {
189 0     0 1   my ($self) = @_;
190 0           my $string;
191 0 0         if ( $self->phrase ) {
192 0 0         my @functions = $self->functions ? @{ $self->functions } : ();
  0            
193 0           $string = join '-', $self->phrase, @functions;
194 0           $string .= ' ';
195             }
196             else {
197 0 0         my $tag = defined $self->tag ? $self->tag : '?';
198 0 0         my $form = defined $self->form ? $self->form : '?';
199 0           $form =~ s/ /_/g;
200 0           $string = "$tag $form";
201 0           $string =~ s/\(/-LRB-/g;
202 0           $string =~ s/\)/-RRB-/g;
203             }
204 0 0         if ( $self->children ) {
205 0           $string .= join ' ', map { $_->stringify_as_mrg() } $self->children;
  0            
206             }
207 0           return "($string)";
208             }
209              
210             sub stringify_as_text {
211 0     0 1   my ($self) = @_;
212 0           my @children = $self->get_children();
213 0 0 0       return $self->form // '<?>' if !@children;
214 0           return join ' ', map { $_->stringify_as_text() } @children;
  0            
215             }
216              
217             #------------------------------------------------------------------------------
218             # Recursively copy children from myself to another node.
219             # This function is specific to the P layer because it contains the list of
220             # attributes. If we could figure out the list automatically, the function would
221             # become general enough to reside directly in Node.pm.
222             #------------------------------------------------------------------------------
223             sub copy_ptree
224             {
225 0     0 1   my $self = shift;
226 0           my $target = shift;
227              
228             # TODO probably we should do deepcopy
229 0           my %copy_of_wild = %{$self->wild};
  0            
230 0           $target->set_wild(\%copy_of_wild);
231              
232 0           my @children0 = $self->get_children();
233 0           foreach my $child0 (@children0)
234             {
235              
236             # Create a copy of the child node.
237 0 0         my $child1 = $child0->is_leaf() ? $target->create_terminal_child() : $target->create_nonterminal_child();
238              
239             # We should copy all attributes that the node has but it is not easy to figure out which these are.
240             # TODO: As a workaround, we list the attributes here directly.
241 0           foreach my $attribute (
242             'form', 'lemma', 'tag', # terminal
243             'phrase', 'functions', # nonterminal
244             'edgelabel', 'is_head', 'index', 'coindex' # common
245             )
246             {
247 0           my $value = $child0->get_attr($attribute);
248 0           $child1->set_attr( $attribute, $value );
249             }
250              
251             # TODO probably we should do deepcopy
252 0           %copy_of_wild = %{$child0->wild};
  0            
253 0           $child1->set_wild(\%copy_of_wild);
254              
255             # Call recursively on the subtrees of the children.
256 0           $child0->copy_ptree($child1);
257             }
258              
259 0           return;
260             }
261              
262             1;
263              
264             __END__
265              
266             =encoding utf-8
267              
268             =head1 NAME
269              
270             Treex::Core::Node::P
271              
272             =head1 VERSION
273              
274             version 2.20210102
275              
276             =head1 DESCRIPTION
277              
278             Representation of nodes of phrase structure (constituency) trees.
279              
280             =head1 METHODS
281              
282             =head2 $node->is_terminal()
283              
284             Is C<$node> a terminal node?
285             Does its C<get_pml_type_name eq 'p-terminal.type'>?
286              
287             =head2 my $child_phrase = $node->create_nonterminal_child()
288              
289             Create a new non-terminal child node,
290             i.e. a node representing a constituent (phrase).
291              
292             =head2 my $child_terminal = $node->create_terminal_child()
293              
294             Create a new terminal child node,
295             i.e. a node representing a token.
296              
297             =head2 my $node->create_from_mrg($mrg_string)
298              
299             Fill C<$node>'s attributes and create its subtree
300             from the serialized string in the PennTB C<mrg> format.
301             E.g.: I<(NP (DT a) (JJ nonexecutive) (NN director))>.
302              
303             =head2 my $mrg_string = $node->stringify_as_mrg()
304              
305             Serialize the tree structure of C<$node> and its subtree
306             as a string in the PennTB C<mrg> format.
307             E.g.: I<(NP (DT a) (JJ nonexecutive) (NN director))>.
308              
309             =head2 my $tokenized_text = $node->stringify_as_text()
310              
311             Get the text representing C<$node>'s subtree.
312             The text is tokenized, i.e. all tokens are separated by a space.
313              
314             =head2 $node->copy_ptree($target_node)
315              
316             Recursively copy children from C<$node> to C<$target_node>.
317             This method is specific to the P layer because it contains the list of
318             attributes. If we could figure out the list automatically, the method would
319             become general enough to reside directly in Node.pm.
320              
321             =head1 AUTHOR
322              
323             Martin Popel <popel@ufal.mff.cuni.cz>
324             Zdeněk Žabokrtský <zabokrtsky@ufal.mff.cuni.cz>
325             Daniel Zeman <zeman@ufal.mff.cuni.cz>
326              
327             =head1 COPYRIGHT AND LICENSE
328              
329             Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague
330              
331             This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.