File Coverage

blib/lib/Treex/Core/Phrase/PP.pm
Criterion Covered Total %
statement 12 64 18.7
branch 0 36 0.0
condition 0 6 0.0
subroutine 4 14 28.5
pod n/a
total 16 120 13.3


line stmt bran cond sub pod time code
1             package Treex::Core::Phrase::PP;
2             $Treex::Core::Phrase::PP::VERSION = '2.20160630';
3 1     1   1735 use utf8;
  1         3  
  1         5  
4 1     1   29 use namespace::autoclean;
  1         2  
  1         6  
5              
6 1     1   51 use Moose;
  1         2  
  1         5  
7 1     1   5747 use Treex::Core::Log;
  1         2  
  1         792  
8              
9             extends 'Treex::Core::Phrase::BaseNTerm';
10              
11              
12              
13             has 'fun' =>
14             (
15             is => 'rw',
16             isa => 'Treex::Core::Phrase',
17             required => 1,
18             writer => '_set_fun',
19             reader => 'fun'
20             );
21              
22             has 'arg' =>
23             (
24             is => 'rw',
25             isa => 'Treex::Core::Phrase',
26             required => 1,
27             writer => '_set_arg',
28             reader => 'arg'
29             );
30              
31             has 'fun_is_head' =>
32             (
33             is => 'rw',
34             isa => 'Bool',
35             required => 1
36             );
37              
38             has 'deprel_at_fun' =>
39             (
40             is => 'rw',
41             isa => 'Bool',
42             required => 1,
43             documentation =>
44             'Where (at what core child) is the label of the relation between this '.
45             'phrase and its parent? It is either at the function word or at the '.
46             'argument, regardless which of them is the head.'
47             );
48              
49             has 'core_deprel' =>
50             (
51             is => 'rw',
52             isa => 'Str',
53             required => 1,
54             default => 'case', # mark, AuxP, PrepArg
55             documentation =>
56             'The deprel that does not describe the relation of the PP to its parent. '.
57             'There are always two important relations in PPs, one of them is reachable via '.
58             '$self->deprel() and the other via $self->core_deprel(). '.
59             'In Prague treebanks, the PP is headed by preposition but the parent '.
60             'relation (indicated by deprel) is labeled at the argument. The label of the preposition '.
61             '(indicated by core_deprel) is always AuxP. In Universal Dependencies, '.
62             'the PP is headed by the argument which also bears the deprel '.
63             'of the parent relation. The preposition is attached to the argument and '.
64             'its label (indicated by core_deprel) is case or mark. '.
65             'Other treebanks may have the preposition as both the head and the parent-deprel bearer, '.
66             'while the argument would be attached as PrepArg (indicated by core_deprel). '
67             );
68              
69              
70              
71             #------------------------------------------------------------------------------
72             # After the object is constructed, this block makes sure that the core children
73             # refer back to it as their parent.
74             #------------------------------------------------------------------------------
75             sub BUILD
76             {
77 0     0     my $self = shift;
78 0 0 0       if(defined($self->fun()->parent()) || defined($self->arg()->parent()))
79             {
80 0           log_fatal("The core child already has another parent");
81             }
82 0           $self->fun()->_set_parent($self);
83 0           $self->arg()->_set_parent($self);
84             }
85              
86              
87              
88             #------------------------------------------------------------------------------
89             # Returns the head child of the phrase. Depending on the current preference,
90             # it is either the function word or its argument.
91             #------------------------------------------------------------------------------
92             sub head
93             {
94 0     0     my $self = shift;
95 0 0         log_fatal('Dead') if($self->dead());
96 0 0         return $self->fun_is_head() ? $self->fun() : $self->arg();
97             }
98              
99              
100              
101             #------------------------------------------------------------------------------
102             # Returns the list of non-head children of the phrase, i.e. the dependents plus
103             # either the function word or the argument (whichever is currently not the head).
104             #------------------------------------------------------------------------------
105             sub nonhead_children
106             {
107 0     0     my $self = shift;
108 0 0         log_fatal('Dead') if($self->dead());
109 0 0         my @children = (($self->fun_is_head() ? $self->arg() : $self->fun()), $self->dependents());
110 0 0         return $self->_order_required(@_) ? $self->order_phrases(@children) : @children;
111             }
112              
113              
114              
115             #------------------------------------------------------------------------------
116             # Returns the list of the children of the phrase that are not dependents, i.e.
117             # both the function word and the argument.
118             #------------------------------------------------------------------------------
119             sub core_children
120             {
121 0     0     my $self = shift;
122 0 0         log_fatal('Dead') if($self->dead());
123 0           my @children = ($self->fun(), $self->arg());
124 0 0         return $self->_order_required(@_) ? $self->order_phrases(@children) : @children;
125             }
126              
127              
128              
129             #------------------------------------------------------------------------------
130             # A shortcut to the attributes.
131             #------------------------------------------------------------------------------
132             sub deprel_at_head
133             {
134 0     0     my $self = shift;
135 0   0       return ($self->fun_is_head() && $self->deprel_at_fun()) || (!$self->fun_is_head() && !$self->deprel_at_fun());
136             }
137              
138              
139              
140             #------------------------------------------------------------------------------
141             # Returns the type of the dependency relation of the phrase to the governing
142             # phrase. A prepositional phrase has the same deprel as one of its core
143             # children. Depending on the current preference it is either the function word or
144             # the argument. This is not necessarily the same child that is the current
145             # head. For example, in the Prague annotation style, the preposition is head
146             # but its deprel is always 'AuxP' while the real deprel of the whole phrase is
147             # stored at the argument.
148             #------------------------------------------------------------------------------
149             sub deprel
150             {
151 0     0     my $self = shift;
152 0 0         log_fatal('Dead') if($self->dead());
153 0 0         return $self->deprel_at_fun() ? $self->fun()->deprel() : $self->arg()->deprel();
154             }
155              
156              
157              
158             #------------------------------------------------------------------------------
159             # Sets a new type of the dependency relation of the phrase to the governing
160             # phrase. For PPs the label is propagated to one of the core children.
161             # Depending on the current preference it is either the function word or the
162             # argument. This is not necessarily the same child that is the current head.
163             # The label is not propagated to the underlying dependency tree
164             # (the project_dependencies() method would have to be called to achieve that).
165             #------------------------------------------------------------------------------
166             sub set_deprel
167             {
168 0     0     my $self = shift;
169 0 0         log_fatal('Dead') if($self->dead());
170 0 0         if($self->deprel_at_fun())
171             {
172 0           $self->fun()->set_deprel(@_);
173 0           $self->arg()->set_deprel($self->core_deprel());
174             }
175             else
176             {
177 0           $self->arg()->set_deprel(@_);
178 0           $self->fun()->set_deprel($self->core_deprel());
179             }
180             }
181              
182              
183              
184             #------------------------------------------------------------------------------
185             # Returns the deprel that should be used when the phrase tree is projected back
186             # to a dependency tree (see the method project_dependencies()). In most cases
187             # this is identical to what deprel() returns. However, for instance
188             # prepositional phrases in Prague treebanks are attached using AuxP. Their
189             # relation to the parent (returned by deprel()) is projected to the argument of
190             # the preposition.
191             #------------------------------------------------------------------------------
192             sub project_deprel
193             {
194 0     0     my $self = shift;
195 0 0         log_fatal('Dead') if($self->dead());
196             # fun_is_head && deprel_at_fun => project_deprel == deprel # many treebanks
197             # fun_is_head && !deprel_at_fun => project_deprel == core_deprel # Prague style
198             # !fun_is_head && !deprel_at_fun => project_deprel == deprel # UD style
199             # !fun_is_head && deprel_at_fun => project_deprel == core_deprel # not used anywhere
200             # Here we always return the project_deprel of the head phrase. If our main deprel is not at the head
201             # (and thus the head deprel / project deprel is the core_deprel, e.g. 'AuxP'), we have to trust the
202             # previous code that the head deprel has been set and maintained correctly. We cannot just return the
203             # core_deprel here. If the head is not a normal phrase (e.g. if it is a coordination of prepositions),
204             # then the core deprel may be buried deeper and the actual projected deprel may be Coord, not AuxP!
205             # In consequence, the only difference between this implementation of project_deprel() and
206             # that of the ancestor class BaseNTerm is currently this comment.
207 0           return $self->head()->project_deprel();
208             }
209              
210              
211              
212             #------------------------------------------------------------------------------
213             # Replaces one of the core children (function word or argument) by another
214             # phrase. This is used when we want to transform the child to a different class
215             # of phrase. The replacement must not have a parent yet.
216             #------------------------------------------------------------------------------
217             sub replace_core_child
218             {
219 0     0     my $self = shift;
220 0           my $old_child = shift; # Treex::Core::Phrase
221 0           my $new_child = shift; # Treex::Core::Phrase
222 0 0         log_fatal('Dead') if($self->dead());
223 0           $self->_check_old_new_child($old_child, $new_child);
224 0           $old_child->_set_parent(undef);
225 0           $new_child->_set_parent($self);
226 0 0         if($old_child == $self->fun())
    0          
227             {
228 0           $self->_set_fun($new_child);
229             }
230             elsif($old_child == $self->arg())
231             {
232 0           $self->_set_arg($new_child);
233             }
234             else
235             {
236 0           log_fatal("The child to be replaced is not in my core");
237             }
238             }
239              
240              
241              
242             #------------------------------------------------------------------------------
243             # Returns a textual representation of the phrase and all subphrases. Useful for
244             # debugging.
245             #------------------------------------------------------------------------------
246             sub as_string
247             {
248 0     0     my $self = shift;
249 0           my $fun = 'FUN '.$self->fun()->as_string();
250 0           my $arg = 'ARG '.$self->arg()->as_string();
251 0           my @dependents = $self->dependents('ordered' => 1);
252 0           my $deps = join(', ', map {$_->as_string()} (@dependents));
  0            
253 0 0         $deps = 'DEPS '.$deps if($deps);
254 0           my $subtree = join(' ', ($fun, $arg, $deps));
255 0 0         $subtree .= ' _M' if($self->is_member());
256 0           return "(PP $subtree)";
257             }
258              
259              
260              
261             __PACKAGE__->meta->make_immutable();
262              
263             1;
264              
265              
266              
267             =for Pod::Coverage BUILD
268              
269             =encoding utf-8
270              
271             =head1 NAME
272              
273             Treex::Core::Phrase::PP
274              
275             =head1 VERSION
276              
277             version 2.20160630
278              
279             =head1 SYNOPSIS
280              
281             use Treex::Core::Document;
282             use Treex::Core::Phrase::Term;
283             use Treex::Core::Phrase::PP;
284              
285             my $document = new Treex::Core::Document;
286             my $bundle = $document->create_bundle();
287             my $zone = $bundle->create_zone('en');
288             my $root = $zone->create_atree();
289             my $prep = $root->create_child();
290             my $noun = $prep->create_child();
291             $prep->set_deprel('AuxP');
292             $noun->set_deprel('Adv');
293             my $prepphr = new Treex::Core::Phrase::Term ('node' => $prep);
294             my $argphr = new Treex::Core::Phrase::Term ('node' => $noun);
295             my $pphrase = new Treex::Core::Phrase::PP ('fun' => $prepphr, 'arg' => $argphr, 'fun_is_head' => 1);
296              
297             =head1 DESCRIPTION
298              
299             C<Phrase::PP> (standing for I<prepositional phrase>) is a special case of
300             C<Phrase::NTerm>. The model example is a preposition (possibly compound) and
301             its argument (typically a noun phrase), plus possible dependents of the whole,
302             such as emphasizers or punctuation. However, it can be also used for
303             subordinating conjunctions plus relative clauses, or for any pair of a function
304             word and its (one) argument.
305              
306             While we know the two key children (let's call them the preposition and the
307             argument), we do not take for fixed which one of them is the head (but the head
308             is indeed one of these two, and not any other child). Depending on the
309             preferred annotation style, we can pick the preposition or the argument as the
310             current head.
311              
312             =head1 ATTRIBUTES
313              
314             =over
315              
316             =item fun
317              
318             A sub-C<Phrase> of this phrase that contains the preposition (or another
319             function word if this is not a true prepositional phrase).
320              
321             =item arg
322              
323             A sub-C<Phrase> (typically a noun phrase) of this phrase that contains the
324             argument of the preposition (or of the other function word if this is not
325             a true prepositional phrase).
326              
327             =item fun_is_head
328              
329             Boolean attribute that defines the currently preferred annotation style.
330             C<True> means that the function word is considered the head of the phrase.
331             C<False> means that the argument is the head.
332              
333             =item deprel_at_fun
334              
335             Where (at what core child) is the label of the relation between this phrase and
336             its parent? It is either at the function word or at the argument, regardless
337             which of them is the head.
338              
339             =item core_deprel
340              
341             The deprel that does not describe the relation of the PP to its parent. There
342             are always two important relations in PPs, one of them is reachable via
343             deprel() and the other via core_deprel(). In Prague treebanks, the PP is headed
344             by preposition but the parent relation (indicated by deprel) is labeled at the
345             argument. The label of the preposition (indicated by core_deprel) is always
346             C<AuxP>. In Universal Dependencies, the PP is headed by the argument which also
347             bears the deprel of the parent relation. The preposition is attached to the
348             argument and its label (indicated by core_deprel) is C<case> or C<mark>. Other
349             treebanks may have the preposition as both the head and the parent-deprel
350             bearer, while the argument would be attached as PrepArg (indicated by
351             core_deprel).
352              
353             =back
354              
355             =head1 METHODS
356              
357             =over
358              
359             =item head
360              
361             A sub-C<Phrase> of this phrase that is at the moment considered the head phrase
362             (in the sense of dependency syntax).
363             Depending on the current preference, it is either the function word or its
364             argument.
365              
366             =item nonhead_children
367              
368             Returns the list of non-head children of the phrase, i.e. the dependents plus either
369             the function word or the argument (whichever is currently not the head).
370              
371             =item core_children
372              
373             Returns the list of the children of the phrase that are not dependents, i.e. both the
374             function word and the argument.
375              
376             =item deprel
377              
378             Returns the type of the dependency relation of the phrase to the governing
379             phrase. A prepositional phrase has the same deprel as one of its core
380             children. Depending on the current preference it is either the function word or
381             the argument. This is not necessarily the same child that is the current
382             head. For example, in the Prague annotation style, the preposition is head
383             but its deprel is always C<AuxP> while the real deprel of the whole phrase is
384             stored at the argument.
385              
386             =item set_deprel
387              
388             Sets a new type of the dependency relation of the phrase to the governing
389             phrase. For PPs the label is propagated to one of the core children.
390             Depending on the current preference it is either the function word or the
391             argument. This is not necessarily the same child that is the current head.
392             The label is not propagated to the underlying dependency tree
393             (the project_dependencies() method would have to be called to achieve that).
394              
395             =item project_deprel
396              
397             Returns the deprel that should be used when the phrase tree is projected back
398             to a dependency tree (see the method project_dependencies()). In most cases
399             this is identical to what deprel() returns. However, for instance
400             prepositional phrases in Prague treebanks are attached using C<AuxP>. Their
401             relation to the parent (returned by deprel()) is projected as the label of
402             the dependency between the preposition and its argument.
403              
404             =item replace_core_child
405              
406             Replaces one of the core children (function word or argument) by another
407             phrase. This is used when we want to transform the child to a different class
408             of phrase. The replacement must not have a parent yet.
409              
410             =item as_string
411              
412             Returns a textual representation of the phrase and all subphrases. Useful for
413             debugging.
414              
415             =back
416              
417             =head1 AUTHORS
418              
419             Daniel Zeman <zeman@ufal.mff.cuni.cz>
420              
421             =head1 COPYRIGHT AND LICENSE
422              
423             Copyright © 2013, 2015 by Institute of Formal and Applied Linguistics, Charles University in Prague
424             This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.