| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Treex::Core::Phrase; |
|
2
|
|
|
|
|
|
|
$Treex::Core::Phrase::VERSION = '2.20160630'; |
|
3
|
1
|
|
|
1
|
|
14200
|
use utf8; |
|
|
1
|
|
|
|
|
13
|
|
|
|
1
|
|
|
|
|
4
|
|
|
4
|
1
|
|
|
1
|
|
417
|
use namespace::autoclean; |
|
|
1
|
|
|
|
|
14398
|
|
|
|
1
|
|
|
|
|
5
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
464
|
use Moose; |
|
|
1
|
|
|
|
|
379982
|
|
|
|
1
|
|
|
|
|
6
|
|
|
7
|
1
|
|
|
1
|
|
7403
|
use MooseX::SemiAffordanceAccessor; # attribute x is written using set_x($value) and read using x() |
|
|
1
|
|
|
|
|
11057
|
|
|
|
1
|
|
|
|
|
4
|
|
|
8
|
1
|
|
|
1
|
|
8258
|
use List::MoreUtils qw(any); |
|
|
1
|
|
|
|
|
6542
|
|
|
|
1
|
|
|
|
|
9
|
|
|
9
|
1
|
|
|
1
|
|
954
|
use Treex::Core::Log; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
62
|
|
|
10
|
1
|
|
|
1
|
|
408
|
use Treex::Core::Node; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
has 'parent' => |
|
15
|
|
|
|
|
|
|
( |
|
16
|
|
|
|
|
|
|
is => 'rw', |
|
17
|
|
|
|
|
|
|
isa => 'Maybe[Treex::Core::Phrase]', |
|
18
|
|
|
|
|
|
|
writer => '_set_parent', |
|
19
|
|
|
|
|
|
|
reader => 'parent', |
|
20
|
|
|
|
|
|
|
default => undef |
|
21
|
|
|
|
|
|
|
); |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
has 'is_member' => |
|
24
|
|
|
|
|
|
|
( |
|
25
|
|
|
|
|
|
|
is => 'rw', |
|
26
|
|
|
|
|
|
|
isa => 'Bool', |
|
27
|
|
|
|
|
|
|
documentation => 'Is this phrase a member of a coordination (i.e. conjunct) or apposition?', |
|
28
|
|
|
|
|
|
|
); |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
33
|
|
|
|
|
|
|
# Sets a new parent for this phrase. Unlike the bare setter _set_parent(), |
|
34
|
|
|
|
|
|
|
# this public method also takes care of the reverse links from the parent to |
|
35
|
|
|
|
|
|
|
# the children. The method returns the old parent, if any. |
|
36
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
37
|
|
|
|
|
|
|
sub set_parent |
|
38
|
|
|
|
|
|
|
{ |
|
39
|
|
|
|
|
|
|
log_fatal('Incorrect number of arguments') if(scalar(@_) != 2); |
|
40
|
|
|
|
|
|
|
my $self = shift; |
|
41
|
|
|
|
|
|
|
my $new_parent = shift; # Treex::Core::Phrase::NTerm or undef |
|
42
|
|
|
|
|
|
|
if(defined($new_parent) && $new_parent->is_descendant_of($self)) |
|
43
|
|
|
|
|
|
|
{ |
|
44
|
|
|
|
|
|
|
log_info($self->as_string()); |
|
45
|
|
|
|
|
|
|
log_fatal('Cannot set parent phrase because it would create a cycle'); |
|
46
|
|
|
|
|
|
|
} |
|
47
|
|
|
|
|
|
|
my $old_parent = $self->parent(); |
|
48
|
|
|
|
|
|
|
# Say the old parent good bye. |
|
49
|
|
|
|
|
|
|
if(defined($old_parent)) |
|
50
|
|
|
|
|
|
|
{ |
|
51
|
|
|
|
|
|
|
$old_parent->_remove_child($self); |
|
52
|
|
|
|
|
|
|
} |
|
53
|
|
|
|
|
|
|
# Set the new parent before we call its _add_child() method so that it can verify it has been called from here. |
|
54
|
|
|
|
|
|
|
$self->_set_parent($new_parent); |
|
55
|
|
|
|
|
|
|
# Say the new parent hello. |
|
56
|
|
|
|
|
|
|
if(defined($new_parent)) |
|
57
|
|
|
|
|
|
|
{ |
|
58
|
|
|
|
|
|
|
$new_parent->_add_child($self); |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
return $old_parent; |
|
61
|
|
|
|
|
|
|
} |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
66
|
|
|
|
|
|
|
# Returns the list of dependents of the phrase. This is an abstract method that |
|
67
|
|
|
|
|
|
|
# must be implemented in every derived class. Nonterminal phrases have a list |
|
68
|
|
|
|
|
|
|
# of dependents (possible empty) as their attribute. Terminal phrases return an |
|
69
|
|
|
|
|
|
|
# empty list by definition. |
|
70
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
71
|
|
|
|
|
|
|
sub dependents |
|
72
|
|
|
|
|
|
|
{ |
|
73
|
|
|
|
|
|
|
my $self = shift; |
|
74
|
|
|
|
|
|
|
log_fatal("The dependents() method is not implemented"); |
|
75
|
|
|
|
|
|
|
} |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
80
|
|
|
|
|
|
|
# Returns the list of children of the phrase. This is an abstract method that |
|
81
|
|
|
|
|
|
|
# must be implemented in every derived class. Nonterminal phrases distinguish |
|
82
|
|
|
|
|
|
|
# between core children and dependents, and this method should return both. |
|
83
|
|
|
|
|
|
|
# Terminal phrases return an empty list by definition. |
|
84
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
85
|
|
|
|
|
|
|
sub children |
|
86
|
|
|
|
|
|
|
{ |
|
87
|
|
|
|
|
|
|
my $self = shift; |
|
88
|
|
|
|
|
|
|
log_fatal("The children() method is not implemented"); |
|
89
|
|
|
|
|
|
|
} |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
94
|
|
|
|
|
|
|
# Tests whether this phrase depends on another phrase via the parent links. |
|
95
|
|
|
|
|
|
|
# This method is used to prevent cycles when setting a new parent. |
|
96
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
97
|
|
|
|
|
|
|
sub is_descendant_of |
|
98
|
|
|
|
|
|
|
{ |
|
99
|
|
|
|
|
|
|
log_fatal('Incorrect number of arguments') if(scalar(@_) != 2); |
|
100
|
|
|
|
|
|
|
my $self = shift; |
|
101
|
|
|
|
|
|
|
my $on_phrase = shift; # Treex::Core::Phrase |
|
102
|
|
|
|
|
|
|
my $parent = $self->parent(); |
|
103
|
|
|
|
|
|
|
while(defined($parent)) |
|
104
|
|
|
|
|
|
|
{ |
|
105
|
|
|
|
|
|
|
return 1 if($parent == $on_phrase); |
|
106
|
|
|
|
|
|
|
$parent = $parent->parent(); |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
return 0; |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
114
|
|
|
|
|
|
|
# Tells whether this phrase is terminal. We could probably use the Moose's |
|
115
|
|
|
|
|
|
|
# methods to query the class name but this will be more convenient. |
|
116
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
117
|
|
|
|
|
|
|
sub is_terminal |
|
118
|
|
|
|
|
|
|
{ |
|
119
|
|
|
|
|
|
|
my $self = shift; |
|
120
|
|
|
|
|
|
|
log_fatal("The is_terminal() method is not implemented"); |
|
121
|
|
|
|
|
|
|
} |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
126
|
|
|
|
|
|
|
# Tells whether this phrase is coordination. We could probably use the Moose's |
|
127
|
|
|
|
|
|
|
# methods to query the class name but this will be more convenient. |
|
128
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
129
|
|
|
|
|
|
|
sub is_coordination |
|
130
|
|
|
|
|
|
|
{ |
|
131
|
|
|
|
|
|
|
my $self = shift; |
|
132
|
|
|
|
|
|
|
# Default is FALSE, to be overridden in Coordination. |
|
133
|
|
|
|
|
|
|
return 0; |
|
134
|
|
|
|
|
|
|
} |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
139
|
|
|
|
|
|
|
# Tells whether this phrase is core child of another phrase. That is sometimes |
|
140
|
|
|
|
|
|
|
# important to know because core children cannot be easily moved around. |
|
141
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
142
|
|
|
|
|
|
|
sub is_core_child |
|
143
|
|
|
|
|
|
|
{ |
|
144
|
|
|
|
|
|
|
my $self = shift; |
|
145
|
|
|
|
|
|
|
my $parent = $self->parent(); |
|
146
|
|
|
|
|
|
|
return 0 if(!defined($parent)); |
|
147
|
|
|
|
|
|
|
return any {$_ == $self} ($parent->core_children()) |
|
148
|
|
|
|
|
|
|
} |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
153
|
|
|
|
|
|
|
# Returns the head node of the phrase. For terminal phrases this should just |
|
154
|
|
|
|
|
|
|
# return their node attribute. For nonterminal phrases this should return the |
|
155
|
|
|
|
|
|
|
# node of their head child. This is an abstract method that must be defined in |
|
156
|
|
|
|
|
|
|
# every derived class. |
|
157
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
158
|
|
|
|
|
|
|
sub node |
|
159
|
|
|
|
|
|
|
{ |
|
160
|
|
|
|
|
|
|
my $self = shift; |
|
161
|
|
|
|
|
|
|
log_fatal("The node() method is not implemented"); |
|
162
|
|
|
|
|
|
|
} |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
167
|
|
|
|
|
|
|
# Returns the list of all nodes covered by the phrase, i.e. the head node of |
|
168
|
|
|
|
|
|
|
# this phrase and of all its descendants. |
|
169
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
170
|
|
|
|
|
|
|
sub nodes |
|
171
|
|
|
|
|
|
|
{ |
|
172
|
|
|
|
|
|
|
my $self = shift; |
|
173
|
|
|
|
|
|
|
log_fatal("The nodes() method is not implemented"); |
|
174
|
|
|
|
|
|
|
} |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
179
|
|
|
|
|
|
|
# Returns the list of all terminal descendants of this phrase. Similar to |
|
180
|
|
|
|
|
|
|
# nodes(), but instead of Node objects returns the Phrase::Term objects, in |
|
181
|
|
|
|
|
|
|
# which the nodes are wrapped. |
|
182
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
183
|
|
|
|
|
|
|
sub terminals |
|
184
|
|
|
|
|
|
|
{ |
|
185
|
|
|
|
|
|
|
my $self = shift; |
|
186
|
|
|
|
|
|
|
log_fatal("The terminals() method is not implemented"); |
|
187
|
|
|
|
|
|
|
} |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
192
|
|
|
|
|
|
|
# Returns the type of the dependency relation of the phrase to the governing |
|
193
|
|
|
|
|
|
|
# phrase. This is an abstract method that must be defined in every derived |
|
194
|
|
|
|
|
|
|
# class. When the phrase structure is built around a dependency tree, the |
|
195
|
|
|
|
|
|
|
# relations will be probably taken from (or based on) the deprels of the |
|
196
|
|
|
|
|
|
|
# underlying nodes. When the phrase tree is transformed to the desired style, |
|
197
|
|
|
|
|
|
|
# the relations may be modified; at the end, they can be projected to the |
|
198
|
|
|
|
|
|
|
# dependency tree again. A general nonterminal phrase typically has the same |
|
199
|
|
|
|
|
|
|
# deprel as its head child. Terminal phrases store deprels as attributes. |
|
200
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
201
|
|
|
|
|
|
|
sub deprel |
|
202
|
|
|
|
|
|
|
{ |
|
203
|
|
|
|
|
|
|
my $self = shift; |
|
204
|
|
|
|
|
|
|
log_fatal("The deprel() method is not implemented"); |
|
205
|
|
|
|
|
|
|
} |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
210
|
|
|
|
|
|
|
# Returns the deprel that should be used when the phrase tree is projected back |
|
211
|
|
|
|
|
|
|
# to a dependency tree (see the method project_dependencies()). In most cases |
|
212
|
|
|
|
|
|
|
# this is identical to what deprel() returns. However, for instance |
|
213
|
|
|
|
|
|
|
# prepositional phrases in Prague treebanks are attached using AuxP. Their |
|
214
|
|
|
|
|
|
|
# relation to the parent (returned by deprel()) is projected to the argument of |
|
215
|
|
|
|
|
|
|
# the preposition. |
|
216
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
217
|
|
|
|
|
|
|
sub project_deprel |
|
218
|
|
|
|
|
|
|
{ |
|
219
|
|
|
|
|
|
|
my $self = shift; |
|
220
|
|
|
|
|
|
|
log_fatal("The project_deprel() method is not implemented"); |
|
221
|
|
|
|
|
|
|
} |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
226
|
|
|
|
|
|
|
# Returns the node's ord attribute. This means that nodes that do not implement |
|
227
|
|
|
|
|
|
|
# the Ordered role cannot be wrapped in phrases. We sometimes need to order |
|
228
|
|
|
|
|
|
|
# child phrases according to the word order of their head nodes. |
|
229
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
230
|
|
|
|
|
|
|
sub ord |
|
231
|
|
|
|
|
|
|
{ |
|
232
|
|
|
|
|
|
|
my $self = shift; |
|
233
|
|
|
|
|
|
|
return $self->node()->ord(); |
|
234
|
|
|
|
|
|
|
} |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
239
|
|
|
|
|
|
|
# Returns the lowest and the highest ord values of the nodes covered by this |
|
240
|
|
|
|
|
|
|
# phrase (always a pair of scalar values; they will be identical for terminal |
|
241
|
|
|
|
|
|
|
# phrases). Note that there is no guarantee that all nodes within the span are |
|
242
|
|
|
|
|
|
|
# covered by this phrase. There may be gaps! |
|
243
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
244
|
|
|
|
|
|
|
sub span |
|
245
|
|
|
|
|
|
|
{ |
|
246
|
|
|
|
|
|
|
my $self = shift; |
|
247
|
|
|
|
|
|
|
log_fatal("The span() method is not implemented"); |
|
248
|
|
|
|
|
|
|
} |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
253
|
|
|
|
|
|
|
# Projects dependencies between the head and the dependents back to the |
|
254
|
|
|
|
|
|
|
# underlying dependency structure. This is an abstract method that must be |
|
255
|
|
|
|
|
|
|
# implemented in the derived classes. |
|
256
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
257
|
|
|
|
|
|
|
sub project_dependencies |
|
258
|
|
|
|
|
|
|
{ |
|
259
|
|
|
|
|
|
|
my $self = shift; |
|
260
|
|
|
|
|
|
|
log_fatal("The project_dependencies() method is not implemented"); |
|
261
|
|
|
|
|
|
|
} |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
266
|
|
|
|
|
|
|
# Returns a textual representation of the phrase and all subphrases. Useful for |
|
267
|
|
|
|
|
|
|
# debugging. This is an abstract method that must be implemented in the derived |
|
268
|
|
|
|
|
|
|
# classes. |
|
269
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
270
|
|
|
|
|
|
|
sub as_string |
|
271
|
|
|
|
|
|
|
{ |
|
272
|
|
|
|
|
|
|
my $self = shift; |
|
273
|
|
|
|
|
|
|
log_fatal("The as_string() method is not implemented"); |
|
274
|
|
|
|
|
|
|
} |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable(); |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
1; |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=for Pod::Coverage BUILD |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=encoding utf-8 |
|
287
|
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
=head1 NAME |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
Treex::Core::Phrase |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=head1 VERSION |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
version 2.20160630 |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
A C<Phrase> is a concept defined on top of dependency trees and subtrees |
|
299
|
|
|
|
|
|
|
(where a subtree contains a node and all its descendants, not just any arbitrary subset of nodes). |
|
300
|
|
|
|
|
|
|
Similarly to the Chomsky's hierarchy of formal grammars, there are two main types of phrases: |
|
301
|
|
|
|
|
|
|
I<terminal> and I<nonterminal>. |
|
302
|
|
|
|
|
|
|
Furthermore, there may be subtypes of the nonterminal type with special behavior. |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
A B<terminal phrase> contains just one C<Node> (which typically corresponds to a surface token). |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
A B<nonterminal phrase> does not directly contain any C<Node> but it contains |
|
307
|
|
|
|
|
|
|
one or more (usually at least two) sub-phrases. |
|
308
|
|
|
|
|
|
|
The hierarchy of phrases and their sub-phrases is also a tree structure. |
|
309
|
|
|
|
|
|
|
In the typical case there is a relation between the tree of phrases and the underlying dependency |
|
310
|
|
|
|
|
|
|
tree, but the rules governing this relation are not fixed. |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
Phrases help us model situations that are difficult to model in the dependency tree alone. |
|
313
|
|
|
|
|
|
|
We can encode multiple levels of âtightnessâ of relations between governors and dependents. |
|
314
|
|
|
|
|
|
|
In particular we can distinguish between dependents that modify the whole phrase (shared modifiers) |
|
315
|
|
|
|
|
|
|
and those that modify only the head of the phrase (private modifiers). |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
This is particularly useful for various tree transformations and conversions between annotation |
|
318
|
|
|
|
|
|
|
styles (such as in the HamleDT blocks). |
|
319
|
|
|
|
|
|
|
The idea is that we will first construct a phrase tree based on the existing dependency tree, |
|
320
|
|
|
|
|
|
|
then we will perform transformations on the phrase tree |
|
321
|
|
|
|
|
|
|
and finally we will create new dependency relations based on the phrase tree and |
|
322
|
|
|
|
|
|
|
on the rules defined by the desired annotation style. |
|
323
|
|
|
|
|
|
|
Phrase is a temporary internal structure that will not be saved in the Treex format on the disk. |
|
324
|
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
Every phrase knows its parent (superphrase) and, if it is nonterminal, its children (subphrases). |
|
326
|
|
|
|
|
|
|
It also knows which of the children is the I<head> (as long as there are children, there is always |
|
327
|
|
|
|
|
|
|
one and only one head child). |
|
328
|
|
|
|
|
|
|
The phrase can also return its head node. For terminal phrases, this is the node they enwrap. |
|
329
|
|
|
|
|
|
|
For nonterminal phrases, this is defined recursively as the head node of their head child phrase. |
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
Every phrase also has a dependency relation label I<(deprel)>. |
|
332
|
|
|
|
|
|
|
These labels are analogous to deprels of nodes in dependency trees. |
|
333
|
|
|
|
|
|
|
Most of them are just taken from the underlying dependency tree and they are propagated back when |
|
334
|
|
|
|
|
|
|
new dependency structure is shaped after the phrases; however, some labels may have special |
|
335
|
|
|
|
|
|
|
meaning even for the C<Phrase> objects. They help recognize special types of nonterminal phrases, |
|
336
|
|
|
|
|
|
|
such as coordinations. |
|
337
|
|
|
|
|
|
|
If the phrase is the head of its parent phrase, its deprel is identical to the deprel of its parent. |
|
338
|
|
|
|
|
|
|
Otherwise, the deprel represents the dependency relation between the phrase and the head of its parent. |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
=over |
|
343
|
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=item parent |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
Refers to the parent C<Phrase>, if any. |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
=item is_member |
|
349
|
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
Is this phrase member of a paratactic structure such as coordination (where |
|
351
|
|
|
|
|
|
|
members are known as conjuncts) or apposition? We need this attribute because |
|
352
|
|
|
|
|
|
|
of the Prague-style dependency trees. We need it only during the building phase |
|
353
|
|
|
|
|
|
|
of the phrase tree. |
|
354
|
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
We could encode this attribute in C<deprel> but it would not be practical |
|
356
|
|
|
|
|
|
|
because it acts independently of C<deprel>. Unlike C<deprel>, C<is_member> is |
|
357
|
|
|
|
|
|
|
less tied to the underlying nodes; it is really an attribute of the whole |
|
358
|
|
|
|
|
|
|
phrase. If we decide to change the C<deprel> of the phrase (which is propagated |
|
359
|
|
|
|
|
|
|
to selected core children), we do not necessarily want to change C<is_member> |
|
360
|
|
|
|
|
|
|
too. And we do not want to decode C<is_member> from C<deprel>, shuffle and |
|
361
|
|
|
|
|
|
|
encode elsewhere again. |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
When a terminal phrase is created around a C<Node>, it takes its C<is_member> |
|
364
|
|
|
|
|
|
|
value from the node. When the phrase receives a parent, the C<is_member> flag |
|
365
|
|
|
|
|
|
|
will be typically moved to the parent (and erased at the child). However, this |
|
366
|
|
|
|
|
|
|
does not happen automatically and the C<Builder> has to do that when desired. |
|
367
|
|
|
|
|
|
|
Similarly, when the type of the phrase is changed (e.g. a new C<Phrase::PP> is |
|
368
|
|
|
|
|
|
|
created, the contents of the old C<Phrase::NTerm> is moved to it and the old |
|
369
|
|
|
|
|
|
|
phrase is destroyed), the surrounding code should make sure that the |
|
370
|
|
|
|
|
|
|
C<is_member> flag is carried over, too. Finally, the value will be used when |
|
371
|
|
|
|
|
|
|
a C<Phrase::Coordination> is recognized. At that point the C<is_member> flag |
|
372
|
|
|
|
|
|
|
can be erased for all newly identified conjuncts because now they can be |
|
373
|
|
|
|
|
|
|
recognized without the flag. However, if the C<Phrase::Coordination> itself (or its |
|
374
|
|
|
|
|
|
|
C<Phrase::NTerm> predecessor) is a member of a larger paratactic structure, then it |
|
375
|
|
|
|
|
|
|
must keep the flag for its parent to see and use. |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=back |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
=head1 METHODS |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=over |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=item $phrase->set_parent ($nonterminal_phrase); |
|
384
|
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
Sets a new parent for this phrase. The parent phrase must be a L<nonterminal|Treex::Core::Phrase::NTerm>. |
|
386
|
|
|
|
|
|
|
This phrase will become its new I<non-head> child. |
|
387
|
|
|
|
|
|
|
The new parent may also be undefined, which means that the current phrase will |
|
388
|
|
|
|
|
|
|
be disconnected from the phrase structure (but it will keeep its own children, |
|
389
|
|
|
|
|
|
|
if any). |
|
390
|
|
|
|
|
|
|
The method returns the old parent. |
|
391
|
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
=item my @dependents = $phrase->dependents(); |
|
393
|
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
Returns the list of dependents of the phrase. This is an abstract method that |
|
395
|
|
|
|
|
|
|
must be implemented in every derived class. Nonterminal phrases have a list |
|
396
|
|
|
|
|
|
|
of dependents (possible empty) as their attribute. Terminal phrases return an |
|
397
|
|
|
|
|
|
|
empty list by definition. |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
=item my @children = $phrase->children(); |
|
400
|
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
Returns the list of children of the phrase. This is an abstract method that |
|
402
|
|
|
|
|
|
|
must be implemented in every derived class. Nonterminal phrases distinguish |
|
403
|
|
|
|
|
|
|
between core children and dependents, and this method should return both. |
|
404
|
|
|
|
|
|
|
Terminal phrases return an empty list by definition. |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
=item if( $phrase->is_descendant_of ($another_phrase) ) {...} |
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
Tests whether this phrase depends on another phrase via the parent links. |
|
409
|
|
|
|
|
|
|
This method is used to prevent cycles when setting a new parent. |
|
410
|
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
=item my $ist = $phrase->is_terminal(); |
|
412
|
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
Tells whether this phrase is terminal, that is, it does not have children |
|
414
|
|
|
|
|
|
|
(subphrases). |
|
415
|
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
=item my $isc = $phrase->is_coordination(); |
|
417
|
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
Tells whether this phrase is L<Treex::Core::Phrase::Coordination> or its |
|
419
|
|
|
|
|
|
|
descendant. |
|
420
|
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=item my $iscc = $phrase->is_core_child(); |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
Tells whether this phrase is core child of another phrase. That is sometimes |
|
424
|
|
|
|
|
|
|
important to know because core children cannot be easily moved around. |
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
=item my $node = $phrase->node(); |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
Returns the head node of the phrase. For terminal phrases this should just |
|
429
|
|
|
|
|
|
|
return their node attribute. For nonterminal phrases this should return the |
|
430
|
|
|
|
|
|
|
node of their head child. This is an abstract method that must be defined in |
|
431
|
|
|
|
|
|
|
every derived class. |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=item my @nodes = $phrase->nodes(); |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
Returns the list of all nodes covered by the phrase, i.e. the head node of |
|
436
|
|
|
|
|
|
|
this phrase and of all its descendants. |
|
437
|
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
=item my @phrases = $phrase->terminals(); |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
Returns the list of all terminal descendants of this phrase. Similar to |
|
441
|
|
|
|
|
|
|
C<nodes()>, but instead of C<Node> objects returns the C<Phrase::Term> objects, in |
|
442
|
|
|
|
|
|
|
which the nodes are wrapped. |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
=item my $deprel = $phrase->deprel(); |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
Returns the type of the dependency relation of the phrase to the governing |
|
447
|
|
|
|
|
|
|
phrase. This is an abstract method that must be defined in every derived |
|
448
|
|
|
|
|
|
|
class. When the phrase structure is built around a dependency tree, the |
|
449
|
|
|
|
|
|
|
relations will be probably taken from (or based on) the deprels of the |
|
450
|
|
|
|
|
|
|
underlying nodes. When the phrase tree is transformed to the desired style, |
|
451
|
|
|
|
|
|
|
the relations may be modified; at the end, they can be projected to the |
|
452
|
|
|
|
|
|
|
dependency tree again. A general nonterminal phrase typically has the same |
|
453
|
|
|
|
|
|
|
deprel as its head child. Terminal phrases store deprels as attributes. |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=item my $deprel = $phrase->project_deprel(); |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
Returns the deprel that should be used when the phrase tree is projected back |
|
458
|
|
|
|
|
|
|
to a dependency tree (see the method project_dependencies()). In most cases |
|
459
|
|
|
|
|
|
|
this is identical to what deprel() returns. However, for instance |
|
460
|
|
|
|
|
|
|
prepositional phrases in Prague treebanks are attached using C<AuxP>. Their |
|
461
|
|
|
|
|
|
|
relation to the parent (returned by deprel()) is projected as the label of |
|
462
|
|
|
|
|
|
|
the dependency between the preposition and its argument. |
|
463
|
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
=item my $ord = $phrase->ord(); |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
Returns the head node's ord attribute. This means that nodes that do not implement |
|
467
|
|
|
|
|
|
|
the L<Treex::Core::Node::Ordered|Ordered> role cannot be wrapped in phrases. We sometimes need to order |
|
468
|
|
|
|
|
|
|
child phrases according to the word order of their head nodes. |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=item my ($left, $right) = $phrase->span(); |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
Returns the lowest and the highest ord values of the nodes covered by this |
|
473
|
|
|
|
|
|
|
phrase (always a pair of scalar values; they will be identical for terminal |
|
474
|
|
|
|
|
|
|
phrases). Note that there is no guarantee that all nodes within the span are |
|
475
|
|
|
|
|
|
|
covered by this phrase. There may be gaps! |
|
476
|
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
=item $phrase->project_dependencies(); |
|
478
|
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
Recursively projects dependencies between the head and the dependents back to the |
|
480
|
|
|
|
|
|
|
underlying dependency structure. |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=item my $phrase_string = $phrase->as_string(); |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
Returns a textual representation of the phrase and all subphrases. Useful for |
|
485
|
|
|
|
|
|
|
debugging. |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
=back |
|
488
|
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
=head1 AUTHORS |
|
490
|
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
Daniel Zeman <zeman@ufal.mff.cuni.cz> |
|
492
|
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
494
|
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
Copyright © 2013, 2015 by Institute of Formal and Applied Linguistics, Charles University in Prague |
|
496
|
|
|
|
|
|
|
This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. |