File Coverage

blib/lib/Treex/Core/Phrase/Term.pm
Criterion Covered Total %
statement 13 15 86.6
branch n/a
condition n/a
subroutine 5 5 100.0
pod n/a
total 18 20 90.0


line stmt bran cond sub pod time code
1             package Treex::Core::Phrase::Term;
2             $Treex::Core::Phrase::Term::VERSION = '2.20160630';
3 1     1   43762 use utf8;
  1         2  
  1         6  
4 1     1   30 use namespace::autoclean;
  1         2  
  1         6  
5              
6 1     1   91 use Moose;
  1         3  
  1         8  
7 1     1   6519 use Treex::Core::Log;
  1         3  
  1         57  
8 1     1   21 use Treex::Core::Node;
  0            
  0            
9              
10             extends 'Treex::Core::Phrase';
11              
12              
13              
14             has 'node' =>
15             (
16             is => 'ro',
17             isa => 'Treex::Core::Node',
18             required => 1
19             );
20              
21             has 'deprel' =>
22             (
23             is => 'rw',
24             isa => 'Str',
25             required => 1
26             );
27              
28              
29              
30             #------------------------------------------------------------------------------
31             # This block will be called before object construction. It will copy the deprel
32             # attribute from the node (unless it has been supplied by the caller
33             # separately). Then it will pass all the attributes to the constructor.
34             #------------------------------------------------------------------------------
35             around BUILDARGS => sub
36             {
37             my $orig = shift;
38             my $class = shift;
39             # Call the default BUILDARGS in Moose::Object. It will take care of distinguishing between a hash reference and a plain hash.
40             my $attr = $class->$orig(@_);
41             if(defined($attr->{node}))
42             {
43             my $node = $attr->{node};
44             # Add deprel only if it has not been supplied separately.
45             if(!defined($attr->{deprel}))
46             {
47             if(defined($node->deprel()))
48             {
49             $attr->{deprel} = $node->deprel();
50             }
51             elsif(defined($node->afun()))
52             {
53             $attr->{deprel} = $node->afun();
54             }
55             elsif(defined($node->conll_deprel()))
56             {
57             $attr->{deprel} = $node->conll_deprel();
58             }
59             else
60             {
61             $attr->{deprel} = 'NR';
62             }
63             }
64             # Copy the initial value of is_member from the node to the phrase.
65             if(!defined($attr->{is_member}) && $node->is_member())
66             {
67             $attr->{is_member} = 1;
68             }
69             }
70             return $attr;
71             };
72              
73              
74              
75             #------------------------------------------------------------------------------
76             # Tells whether this phrase is terminal. We could probably use the Moose's
77             # methods to query the class name but this will be more convenient.
78             #------------------------------------------------------------------------------
79             sub is_terminal
80             {
81             my $self = shift;
82             return 1;
83             }
84              
85              
86              
87             #------------------------------------------------------------------------------
88             # Returns the list of all nodes covered by the phrase, i.e. the head node of
89             # this phrase and of all its descendants.
90             #------------------------------------------------------------------------------
91             sub nodes
92             {
93             my $self = shift;
94             my $node = $self->node();
95             return ($node);
96             }
97              
98              
99              
100             #------------------------------------------------------------------------------
101             # Returns a one-element list containing this phrase. This method is used to
102             # collect all terminal descendants of a phrase. It is similar to nodes() but
103             # instead of Node objects, it returns phrases that wrap the nodes.
104             #------------------------------------------------------------------------------
105             sub terminals
106             {
107             my $self = shift;
108             return ($self);
109             }
110              
111              
112              
113             #------------------------------------------------------------------------------
114             # Returns the list of dependents of the phrase. Terminal phrases return an
115             # empty list by definition.
116             #------------------------------------------------------------------------------
117             sub dependents
118             {
119             my $self = shift;
120             return ();
121             }
122              
123              
124              
125             #------------------------------------------------------------------------------
126             # Returns the list of children of the phrase. Terminal phrases return an empty
127             # list by definition.
128             #------------------------------------------------------------------------------
129             sub children
130             {
131             my $self = shift;
132             return ();
133             }
134              
135              
136              
137             #------------------------------------------------------------------------------
138             # Returns the deprel that should be used when the phrase tree is projected back
139             # to a dependency tree (see the method project_dependencies()). In most cases
140             # this is identical to what deprel() returns. However, for instance
141             # prepositional phrases in Prague treebanks are attached using AuxP. Their
142             # relation to the parent (returned by deprel()) is projected to the argument of
143             # the preposition.
144             #------------------------------------------------------------------------------
145             sub project_deprel
146             {
147             my $self = shift;
148             return $self->deprel();
149             }
150              
151              
152              
153             #------------------------------------------------------------------------------
154             # Returns the lowest and the highest ord values of the nodes covered by this
155             # phrase (always a pair of scalar values; they will be identical for terminal
156             # phrases). Note that there is no guarantee that all nodes within the span are
157             # covered by this phrase. There may be gaps!
158             #------------------------------------------------------------------------------
159             sub span
160             {
161             my $self = shift;
162             my $ord = $self->ord();
163             return ($ord, $ord);
164             }
165              
166              
167              
168             #------------------------------------------------------------------------------
169             # Projects dependencies between the head and the dependents back to the
170             # underlying dependency structure. There is not much to do in the terminal
171             # phrase as it does not have any dependents. However, we will attach all nodes
172             # to the root, to prevent temporary cycles during the tree construction.
173             #------------------------------------------------------------------------------
174             sub project_dependencies
175             {
176             my $self = shift;
177             my $node = $self->node();
178             unless($node->is_root())
179             {
180             my $root = $node->get_root();
181             $node->set_parent($root);
182             }
183             # Reset the is_member flag.
184             # If we are converting to the Prague style, the flag will be set again where needed.
185             $node->set_is_member(undef);
186             }
187              
188              
189              
190             #------------------------------------------------------------------------------
191             # Returns a textual representation of the phrase and all subphrases. Useful for
192             # debugging.
193             #------------------------------------------------------------------------------
194             sub as_string
195             {
196             my $self = shift;
197             my $node = $self->node();
198             my $form = '_';
199             if($node->is_root())
200             {
201             $form = 'ROOT';
202             }
203             elsif(defined($node->form()))
204             {
205             $form = $node->form();
206             }
207             my $ord = $node->ord();
208             my $deprel = defined($self->deprel()) ? '-'.$self->deprel() : '';
209             $deprel .= '_M' if($self->is_member());
210             return "[ $form-$ord$deprel ]";
211             }
212              
213              
214              
215             __PACKAGE__->meta->make_immutable();
216              
217             1;
218              
219              
220              
221             =for Pod::Coverage BUILD
222              
223             =encoding utf-8
224              
225             =head1 NAME
226              
227             Treex::Core::Phrase::Term
228              
229             =head1 VERSION
230              
231             version 2.20160630
232              
233             =head1 SYNOPSIS
234              
235             use Treex::Core::Document;
236             use Treex::Core::Phrase::Term;
237              
238             my $document = new Treex::Core::Document;
239             my $bundle = $document->create_bundle();
240             my $zone = $bundle->create_zone('en');
241             my $root = $zone->create_atree();
242             my $phrase = new Treex::Core::Phrase::Term ('node' => $root);
243              
244             =head1 DESCRIPTION
245              
246             C<Term> is a terminal C<Phrase>. It contains (refers to) one C<Node> and it can
247             be part of nonterminal phrases (C<NTerm>).
248             See L<Treex::Core::Phrase> for more details.
249              
250             =head1 ATTRIBUTES
251              
252             =over
253              
254             =item node
255              
256             Refers to the C<Node> wrapped in this terminal phrase.
257              
258             =item deprel
259              
260             Any label describing the type of the dependency relation between this phrase
261             (its node) and the governing phrase (node of the first ancestor phrase where
262             this one does not act as head). This label is typically taken from the
263             underlying node when the phrase is built, but it may be translated or modified
264             and it is not kept synchronized with the underlying dependency tree during
265             transformations of the phrase structure. Nevertheless it is assumed that once
266             the transformations are done, the final dependency relations will be projected
267             back to the dependency tree.
268              
269             The C<deprel> attribute can also be supplied separately when creating the
270             C<Phrase::Term>. If it is not supplied, it will be copied from the C<Node>
271             to which the C<node> attribute refers.
272              
273             =item nodes
274              
275             Returns the list of all nodes covered by the phrase, which in the case of
276             a terminal phrase means just the node wrapped in it.
277              
278             =item terminals
279              
280             Returns a one-element list containing this phrase. This method is used to
281             collect all terminal descendants of a phrase. It is similar to C<nodes()> but
282             instead of C<Node> objects, it returns phrases that wrap the nodes.
283              
284             =item dependents
285              
286             Returns the list of dependents of the phrase. Terminal phrases return an
287             empty list by definition.
288              
289             =item children
290              
291             Returns the list of children of the phrase. Terminal phrases return an
292             empty list by definition.
293              
294             =item span
295              
296             Returns the lowest and the highest ord values of the nodes covered by this
297             phrase. For a terminal phrase, the result is just the C<ord> repeated twice.
298              
299             =item project_dependencies
300              
301             Projects dependencies between the head and the dependents back to the
302             underlying dependency structure. There is not much to do in the terminal
303             phrase as it does not have any dependents. However, we will attach all nodes
304             to the root, to prevent temporary cycles during the tree construction.
305              
306             =item as_string
307              
308             Returns a textual representation of the phrase and all subphrases. Useful for
309             debugging.
310              
311             =back
312              
313             =head1 AUTHORS
314              
315             Daniel Zeman <zeman@ufal.mff.cuni.cz>
316              
317             =head1 COPYRIGHT AND LICENSE
318              
319             Copyright © 2013, 2015 by Institute of Formal and Applied Linguistics, Charles University in Prague
320             This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.