| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Lingua::Treebank::Const; |
|
2
|
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
52
|
use 5.008; |
|
|
3
|
|
|
|
|
9
|
|
|
|
3
|
|
|
|
|
125
|
|
|
4
|
3
|
|
|
3
|
|
17
|
use strict; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
103
|
|
|
5
|
3
|
|
|
3
|
|
13
|
use warnings; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
73
|
|
|
6
|
3
|
|
|
3
|
|
13
|
use Carp; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
164
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
3
|
|
|
3
|
|
15
|
use Lingua::Treebank; |
|
|
3
|
|
|
|
|
13
|
|
|
|
3
|
|
|
|
|
276
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.16'; # stay in sync automatically |
|
10
|
|
|
|
|
|
|
our $VERBOSE = $Lingua::Treebank::VERBOSE; |
|
11
|
|
|
|
|
|
|
our $BF_TRAVERSAL; |
|
12
|
|
|
|
|
|
|
################################################################## |
|
13
|
|
|
|
|
|
|
use constant { |
|
14
|
3
|
|
|
|
|
545
|
TAG => 1, |
|
15
|
|
|
|
|
|
|
ANNOT => 2, |
|
16
|
|
|
|
|
|
|
WORD => 3, |
|
17
|
|
|
|
|
|
|
PARENT => 4, |
|
18
|
|
|
|
|
|
|
CHILDREN => 5, |
|
19
|
|
|
|
|
|
|
NUM => 6, |
|
20
|
|
|
|
|
|
|
HEADCHILD => 7, # only used after Headfinder |
|
21
|
3
|
|
|
3
|
|
18
|
}; |
|
|
3
|
|
|
|
|
4
|
|
|
22
|
|
|
|
|
|
|
use overload |
|
23
|
3
|
|
|
|
|
27
|
'""' => \&stringify, |
|
24
|
|
|
|
|
|
|
'0+' => \&numerify, # find location in memory |
|
25
|
|
|
|
|
|
|
fallback => 1, # numeric tests measure memory location |
|
26
|
3
|
|
|
3
|
|
6099
|
; |
|
|
3
|
|
|
|
|
3525
|
|
|
27
|
|
|
|
|
|
|
################################################################## |
|
28
|
|
|
|
|
|
|
our $INDENT_CHAR = ' ' x 4; |
|
29
|
|
|
|
|
|
|
our $CHILD_PROLOG = "\n"; |
|
30
|
|
|
|
|
|
|
our $CHILD_EPILOG = ""; |
|
31
|
|
|
|
|
|
|
our $STRINGIFY = 'as_penn_text'; |
|
32
|
|
|
|
|
|
|
################################################################## |
|
33
|
|
|
|
|
|
|
sub numerify { |
|
34
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
35
|
0
|
|
|
|
|
0
|
my $num = $self->[NUM]; |
|
36
|
0
|
0
|
|
|
|
0
|
confess "no numeric value!?" unless defined $num; |
|
37
|
0
|
|
|
|
|
0
|
return $num; |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
## the approach below is not portable. assign a new number from |
|
40
|
|
|
|
|
|
|
## $__NUMID for every constituent at new() instead. |
|
41
|
|
|
|
|
|
|
# sub numerify { |
|
42
|
|
|
|
|
|
|
# my $self = shift; |
|
43
|
|
|
|
|
|
|
# if (not defined $self->[NUM]) { |
|
44
|
|
|
|
|
|
|
# # fetch out the number indicating the location in memory |
|
45
|
|
|
|
|
|
|
# my $refstr= overload::StrVal( $self ); |
|
46
|
|
|
|
|
|
|
# if ($refstr =~ m{\( 0x ([0-9a-fA-F]+) \) $}x) { # |
|
47
|
|
|
|
|
|
|
# # } |
|
48
|
|
|
|
|
|
|
# # cache it for later to save the regex |
|
49
|
|
|
|
|
|
|
# $self->[NUM] = hex $1; |
|
50
|
|
|
|
|
|
|
# } |
|
51
|
|
|
|
|
|
|
# else { |
|
52
|
|
|
|
|
|
|
# confess "numerify wasn't able to extract a numeric ref"; |
|
53
|
|
|
|
|
|
|
# } |
|
54
|
|
|
|
|
|
|
# } |
|
55
|
|
|
|
|
|
|
# return $self->[NUM]; |
|
56
|
|
|
|
|
|
|
#} |
|
57
|
|
|
|
|
|
|
our $__NUMID = 100; # never be small, just to be sure |
|
58
|
|
|
|
|
|
|
sub _next_numid { |
|
59
|
|
|
|
|
|
|
# for assigning unique numeric values to each new constituent; |
|
60
|
|
|
|
|
|
|
# invoked from the ->new() method |
|
61
|
4507
|
|
|
4507
|
|
4703
|
my $class = shift; |
|
62
|
4507
|
|
|
|
|
4538
|
++$__NUMID; |
|
63
|
4507
|
|
|
|
|
10623
|
return $__NUMID; |
|
64
|
|
|
|
|
|
|
} |
|
65
|
|
|
|
|
|
|
################################################################## |
|
66
|
|
|
|
|
|
|
sub stringify { |
|
67
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
68
|
0
|
0
|
|
|
|
0
|
if ($STRINGIFY eq 'as_penn_text') { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
69
|
0
|
|
|
|
|
0
|
return $self->as_penn_text(); |
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
elsif ($STRINGIFY eq 'words') { |
|
72
|
0
|
|
|
|
|
0
|
return join ' ', map { $_->word() } $self->get_all_terminals(); |
|
|
0
|
|
|
|
|
0
|
|
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
elsif ($STRINGIFY eq 'preterm_tags') { |
|
75
|
0
|
|
|
|
|
0
|
return join ' ', map { $_->tag() } $self->get_all_terminals(); |
|
|
0
|
|
|
|
|
0
|
|
|
76
|
|
|
|
|
|
|
} |
|
77
|
|
|
|
|
|
|
else { |
|
78
|
0
|
|
|
|
|
0
|
carp "don't recognize \$", |
|
79
|
|
|
|
|
|
|
__PACKAGE__, "::STRINGIFY value of $STRINGIFY"; |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
################################################################## |
|
84
|
|
|
|
|
|
|
sub edges { |
|
85
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
86
|
|
|
|
|
|
|
return |
|
87
|
0
|
|
|
|
|
0
|
map { join ',', @{$_} } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
88
|
|
|
|
|
|
|
$self->edges_data(@_); |
|
89
|
|
|
|
|
|
|
} |
|
90
|
|
|
|
|
|
|
sub edges_data { |
|
91
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
92
|
0
|
|
|
|
|
0
|
my (%args) = @_; |
|
93
|
0
|
|
|
|
|
0
|
my %ignore; |
|
94
|
0
|
0
|
|
|
|
0
|
my $do_terminal = |
|
95
|
|
|
|
|
|
|
(defined $args{'keepterminal'} ? $args{'keepterminal'} : 1); |
|
96
|
0
|
0
|
|
|
|
0
|
if (defined $args{ignore}) { |
|
97
|
0
|
|
|
|
|
0
|
%ignore = map {$_ => 1} @{$args{ignore}}; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
98
|
|
|
|
|
|
|
} |
|
99
|
0
|
|
|
|
|
0
|
my %coindex; |
|
100
|
0
|
0
|
|
|
|
0
|
%coindex = %{$args{coindex}} if defined $args{coindex}; |
|
|
0
|
|
|
|
|
0
|
|
|
101
|
|
|
|
|
|
|
|
|
102
|
0
|
|
|
|
|
0
|
my (@edges) = $self->_edges_driver(0); |
|
103
|
0
|
|
|
|
|
0
|
my (@returns); |
|
104
|
0
|
|
|
|
|
0
|
for (@edges) { |
|
105
|
0
|
0
|
|
|
|
0
|
next if $ignore{$_->[0]}; |
|
106
|
|
|
|
|
|
|
|
|
107
|
0
|
0
|
0
|
|
|
0
|
next if (not $do_terminal and $_->[0] =~ /::/); |
|
108
|
|
|
|
|
|
|
|
|
109
|
0
|
0
|
|
|
|
0
|
$_->[1] = $coindex{$_->[1]} if defined $coindex{$_->[1]}; |
|
110
|
0
|
0
|
|
|
|
0
|
$_->[2] = $coindex{$_->[2]} if defined $coindex{$_->[2]}; |
|
111
|
0
|
|
|
|
|
0
|
push @returns, $_; |
|
112
|
|
|
|
|
|
|
} |
|
113
|
0
|
|
|
|
|
0
|
return @returns; |
|
114
|
|
|
|
|
|
|
} |
|
115
|
|
|
|
|
|
|
sub _edges_driver { |
|
116
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
|
117
|
0
|
|
|
|
|
0
|
my $start_index = shift; |
|
118
|
|
|
|
|
|
|
|
|
119
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal()) { |
|
120
|
0
|
|
|
|
|
0
|
return ([$self->tag() . '::' . $self->word(), |
|
121
|
|
|
|
|
|
|
$start_index, |
|
122
|
|
|
|
|
|
|
$start_index + 1]); |
|
123
|
|
|
|
|
|
|
} |
|
124
|
0
|
|
|
|
|
0
|
my @edges; |
|
125
|
0
|
|
|
|
|
0
|
my $l_idx = $start_index; |
|
126
|
0
|
|
|
|
|
0
|
for (@{$self->children()}) { |
|
|
0
|
|
|
|
|
0
|
|
|
127
|
0
|
|
|
|
|
0
|
push @edges, $_->_edges_driver($l_idx); |
|
128
|
0
|
|
|
|
|
0
|
$l_idx = $edges[-1][-1]; |
|
129
|
|
|
|
|
|
|
} |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# don't forget the edge for myself |
|
132
|
0
|
|
|
|
|
0
|
push @edges, [$self->tag(), $start_index, $l_idx]; |
|
133
|
0
|
|
|
|
|
0
|
return @edges; |
|
134
|
|
|
|
|
|
|
} |
|
135
|
|
|
|
|
|
|
################################################################## |
|
136
|
|
|
|
|
|
|
sub shared_edges { |
|
137
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
138
|
0
|
|
|
|
|
0
|
my $other = shift; |
|
139
|
0
|
|
|
|
|
0
|
my %args = @_; |
|
140
|
|
|
|
|
|
|
|
|
141
|
0
|
|
|
|
|
0
|
my %is_phantom = map {$_ => 1} @{$args{phantom}}; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
142
|
|
|
|
|
|
|
|
|
143
|
0
|
|
|
|
|
0
|
my %coindex; # those trees that need coindexation |
|
144
|
0
|
0
|
|
|
|
0
|
if (scalar keys %is_phantom) { |
|
145
|
0
|
|
|
|
|
0
|
for ($self->edges_data(%args)) { |
|
146
|
0
|
0
|
|
|
|
0
|
if ($is_phantom{$_->[0]}) { |
|
147
|
0
|
|
|
|
|
0
|
$coindex{$_->[1]} = $_->[2]; |
|
148
|
|
|
|
|
|
|
} |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
} |
|
151
|
|
|
|
|
|
|
|
|
152
|
0
|
|
|
|
|
0
|
my %edges; |
|
153
|
0
|
|
|
|
|
0
|
for ($self->edges(%args, coindex => \%coindex)) { |
|
154
|
0
|
|
|
|
|
0
|
$edges{$_}++; |
|
155
|
|
|
|
|
|
|
} |
|
156
|
0
|
|
|
|
|
0
|
my %other_edges; |
|
157
|
0
|
|
|
|
|
0
|
for ($other->edges(%args, coindex => \%coindex)) { |
|
158
|
0
|
|
|
|
|
0
|
$other_edges{$_}++; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
3
|
|
|
3
|
|
3241
|
use List::Util 'min'; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
21163
|
|
|
161
|
0
|
|
|
|
|
0
|
my @to_return; |
|
162
|
0
|
|
|
|
|
0
|
for (keys %edges) { |
|
163
|
0
|
|
0
|
|
|
0
|
push @to_return, ($_) x min ($edges{$_} || 0, $other_edges{$_} || 0); |
|
|
|
|
0
|
|
|
|
|
|
164
|
|
|
|
|
|
|
} |
|
165
|
0
|
|
|
|
|
0
|
return @to_return; |
|
166
|
|
|
|
|
|
|
} |
|
167
|
|
|
|
|
|
|
################################################################## |
|
168
|
|
|
|
|
|
|
sub list_constituents { |
|
169
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
170
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal()) { |
|
171
|
0
|
|
|
|
|
0
|
return ($self); |
|
172
|
|
|
|
|
|
|
} |
|
173
|
|
|
|
|
|
|
else { |
|
174
|
0
|
|
|
|
|
0
|
my @list; |
|
175
|
0
|
|
|
|
|
0
|
for (@{$self->children()}) { |
|
|
0
|
|
|
|
|
0
|
|
|
176
|
0
|
|
|
|
|
0
|
push @list, $_->list_constituents(); |
|
177
|
|
|
|
|
|
|
} |
|
178
|
0
|
|
|
|
|
0
|
return $self, @list; |
|
179
|
|
|
|
|
|
|
} |
|
180
|
|
|
|
|
|
|
} |
|
181
|
|
|
|
|
|
|
################################################################## |
|
182
|
|
|
|
|
|
|
# High-power generic function for crawling the tree. Most of the other |
|
183
|
|
|
|
|
|
|
# functions could probably be implemented in terms of this one. |
|
184
|
|
|
|
|
|
|
sub walk { |
|
185
|
0
|
|
|
0
|
1
|
0
|
my ($self, $action, $stop_criterion, $state, $bf_traversal) = @_; |
|
186
|
|
|
|
|
|
|
|
|
187
|
0
|
0
|
|
|
|
0
|
croak "walk()'s first non-self arg not defined" |
|
188
|
|
|
|
|
|
|
unless (defined $action); |
|
189
|
0
|
0
|
|
|
|
0
|
if (ref $action eq '') { |
|
|
|
0
|
|
|
|
|
|
|
190
|
0
|
|
|
|
|
0
|
$action = $self->can($action); |
|
191
|
0
|
0
|
|
|
|
0
|
if (not defined $action) { |
|
192
|
0
|
|
|
|
|
0
|
croak "couldn't find method $action to call from within walk" |
|
193
|
|
|
|
|
|
|
} |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
elsif (not ref $action eq 'CODE') { |
|
196
|
0
|
|
|
|
|
0
|
croak "walk()'s first non-self arg not CODE-ref" |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
|
|
199
|
0
|
0
|
|
|
|
0
|
if (defined $stop_criterion) { |
|
200
|
0
|
0
|
|
|
|
0
|
if (ref $stop_criterion eq '') { |
|
|
|
0
|
|
|
|
|
|
|
201
|
0
|
|
|
|
|
0
|
$stop_criterion = $self->can($stop_criterion); |
|
202
|
0
|
0
|
|
|
|
0
|
croak "couldn't find method $stop_criterion ", |
|
203
|
|
|
|
|
|
|
" to call from within walk()" |
|
204
|
|
|
|
|
|
|
if (not defined $stop_criterion); |
|
205
|
|
|
|
|
|
|
} |
|
206
|
|
|
|
|
|
|
elsif (not ref $stop_criterion eq 'CODE') { |
|
207
|
0
|
|
|
|
|
0
|
croak "walk()'s stop criterion arg not a CODE-ref"; |
|
208
|
|
|
|
|
|
|
} |
|
209
|
|
|
|
|
|
|
} |
|
210
|
|
|
|
|
|
|
|
|
211
|
0
|
|
|
|
|
0
|
my @children = (); |
|
212
|
0
|
|
|
|
|
0
|
my @stack = ($self); |
|
213
|
|
|
|
|
|
|
|
|
214
|
0
|
0
|
|
|
|
0
|
if (not defined $bf_traversal) { |
|
215
|
0
|
|
|
|
|
0
|
$bf_traversal = $BF_TRAVERSAL; |
|
216
|
|
|
|
|
|
|
} |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
NODE: |
|
219
|
0
|
|
|
|
|
0
|
while (1) { |
|
220
|
|
|
|
|
|
|
# take one off the front of the line |
|
221
|
0
|
|
|
|
|
0
|
my $node = shift @stack; |
|
222
|
0
|
0
|
|
|
|
0
|
return if not defined $node; |
|
223
|
|
|
|
|
|
|
|
|
224
|
0
|
|
|
|
|
0
|
&{$action}($node, $state); |
|
|
0
|
|
|
|
|
0
|
|
|
225
|
|
|
|
|
|
|
|
|
226
|
0
|
0
|
0
|
|
|
0
|
if ( defined $stop_criterion |
|
|
0
|
|
|
|
|
0
|
|
|
227
|
|
|
|
|
|
|
and &{$stop_criterion}($node, $state) ) { |
|
228
|
|
|
|
|
|
|
# don't put the children on the agenda |
|
229
|
0
|
|
|
|
|
0
|
next NODE; |
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# else include node's children |
|
233
|
0
|
0
|
|
|
|
0
|
if ($bf_traversal) { |
|
234
|
|
|
|
|
|
|
# children go in the back of the line |
|
235
|
0
|
|
|
|
|
0
|
push @stack, @{$node->children()}; |
|
|
0
|
|
|
|
|
0
|
|
|
236
|
|
|
|
|
|
|
} |
|
237
|
|
|
|
|
|
|
else { # depth-first traversal |
|
238
|
|
|
|
|
|
|
# children go in the front of the line |
|
239
|
0
|
|
|
|
|
0
|
unshift @stack, @{$node->children()}; |
|
|
0
|
|
|
|
|
0
|
|
|
240
|
|
|
|
|
|
|
} |
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
} |
|
243
|
|
|
|
|
|
|
################################################################## |
|
244
|
|
|
|
|
|
|
sub find_common_ancestor { |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# returns lowest common ancestor, or undef if there is none. |
|
247
|
|
|
|
|
|
|
|
|
248
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
249
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $cousin = shift; |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
# error checking |
|
252
|
0
|
0
|
|
|
|
0
|
croak "cousin arg not defined" if not defined $cousin; |
|
253
|
0
|
0
|
|
|
|
0
|
croak "cousin not a " . __PACKAGE__ |
|
254
|
|
|
|
|
|
|
unless UNIVERSAL::isa($cousin, __PACKAGE__); |
|
255
|
|
|
|
|
|
|
|
|
256
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $matriarch = $self->root(); |
|
257
|
0
|
0
|
|
|
|
0
|
if ( $cousin->root() != $matriarch ) { |
|
258
|
0
|
|
|
|
|
0
|
return; # no common ancestor |
|
259
|
|
|
|
|
|
|
} |
|
260
|
|
|
|
|
|
|
|
|
261
|
0
|
|
|
|
|
0
|
my @self_lineage = $self->path_up_to( $matriarch ); |
|
262
|
0
|
|
|
|
|
0
|
my @cousin_lineage = $cousin->path_up_to( $matriarch ); |
|
263
|
|
|
|
|
|
|
|
|
264
|
0
|
|
0
|
|
|
0
|
while (@self_lineage and @cousin_lineage) { |
|
265
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $self_gramma = pop @self_lineage; |
|
266
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $cousin_gramma = pop @cousin_lineage; |
|
267
|
0
|
0
|
|
|
|
0
|
if ($self_gramma == $cousin_gramma) { |
|
268
|
0
|
|
|
|
|
0
|
$matriarch = $self_gramma; |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
else { |
|
271
|
|
|
|
|
|
|
# stop looking -- once unshared, thereafter its a waste to |
|
272
|
|
|
|
|
|
|
# keep looking. No incestuous trees here, one would hope. |
|
273
|
0
|
|
|
|
|
0
|
last; |
|
274
|
|
|
|
|
|
|
} |
|
275
|
|
|
|
|
|
|
} |
|
276
|
|
|
|
|
|
|
|
|
277
|
0
|
|
|
|
|
0
|
return $matriarch; |
|
278
|
|
|
|
|
|
|
} |
|
279
|
|
|
|
|
|
|
################################################################## |
|
280
|
|
|
|
|
|
|
sub equiv_to { |
|
281
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
282
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $other = shift; |
|
283
|
|
|
|
|
|
|
|
|
284
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal()) { |
|
285
|
0
|
0
|
|
|
|
0
|
return 0 unless $other->is_terminal(); |
|
286
|
|
|
|
|
|
|
|
|
287
|
0
|
0
|
|
|
|
0
|
if ($self->tag() ne $other->tag()) { |
|
288
|
0
|
|
|
|
|
0
|
return 0; |
|
289
|
|
|
|
|
|
|
} |
|
290
|
0
|
0
|
|
|
|
0
|
if ($self->word() ne $other->word()) { |
|
291
|
0
|
|
|
|
|
0
|
return 0; |
|
292
|
|
|
|
|
|
|
} |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
# otherwise it all passes: |
|
295
|
0
|
|
|
|
|
0
|
return 1; |
|
296
|
|
|
|
|
|
|
} |
|
297
|
|
|
|
|
|
|
else { |
|
298
|
|
|
|
|
|
|
# self non-terminal |
|
299
|
0
|
0
|
|
|
|
0
|
return 0 if $other->is_terminal(); |
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
# different number of children |
|
302
|
0
|
0
|
|
|
|
0
|
return 0 if (@{ $self->children() } != @{ $other->children() }); |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
303
|
|
|
|
|
|
|
|
|
304
|
0
|
|
|
|
|
0
|
foreach my $idx ( 0 .. $#{ $self->children() } ) { |
|
|
0
|
|
|
|
|
0
|
|
|
305
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $lchild = $self->children($idx); |
|
306
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $rchild = $other->children($idx); |
|
307
|
0
|
0
|
|
|
|
0
|
if (not $lchild->equiv_to($rchild)) { |
|
308
|
0
|
|
|
|
|
0
|
return 0; |
|
309
|
|
|
|
|
|
|
} |
|
310
|
|
|
|
|
|
|
} |
|
311
|
|
|
|
|
|
|
# otherwise it all passes |
|
312
|
0
|
|
|
|
|
0
|
return 1; |
|
313
|
|
|
|
|
|
|
} |
|
314
|
|
|
|
|
|
|
} |
|
315
|
|
|
|
|
|
|
################################################################## |
|
316
|
|
|
|
|
|
|
sub equiv_tags { |
|
317
|
0
|
|
|
0
|
1
|
0
|
croak "not implemented\n"; |
|
318
|
|
|
|
|
|
|
} |
|
319
|
|
|
|
|
|
|
sub equiv_words { |
|
320
|
0
|
|
|
0
|
1
|
0
|
croak "not implemented\n"; |
|
321
|
|
|
|
|
|
|
} |
|
322
|
|
|
|
|
|
|
################################################################## |
|
323
|
|
|
|
|
|
|
# height/depth functions |
|
324
|
|
|
|
|
|
|
################################################################## |
|
325
|
|
|
|
|
|
|
sub depth_from { |
|
326
|
|
|
|
|
|
|
# return depth from given target. returns undef if $target is not |
|
327
|
|
|
|
|
|
|
# the ancestor of $self |
|
328
|
|
|
|
|
|
|
|
|
329
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
330
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $target = shift; |
|
331
|
|
|
|
|
|
|
|
|
332
|
0
|
0
|
|
|
|
0
|
if ($self == $target) { |
|
|
|
0
|
|
|
|
|
|
|
333
|
0
|
|
|
|
|
0
|
return 0; |
|
334
|
|
|
|
|
|
|
} |
|
335
|
|
|
|
|
|
|
elsif ($self->is_root()) { |
|
336
|
0
|
|
|
|
|
0
|
carp "depth_from argument not an ancestor of instance"; |
|
337
|
|
|
|
|
|
|
# we could check this explicitly, but users may already know |
|
338
|
|
|
|
|
|
|
# this isn't going to happen, so let's not waste cycles |
|
339
|
0
|
|
|
|
|
0
|
return (); # not defined |
|
340
|
|
|
|
|
|
|
} |
|
341
|
|
|
|
|
|
|
else { |
|
342
|
0
|
|
|
|
|
0
|
return $self->parent->depth_from($target) + 1; |
|
343
|
|
|
|
|
|
|
} |
|
344
|
|
|
|
|
|
|
} |
|
345
|
|
|
|
|
|
|
################################################################## |
|
346
|
|
|
|
|
|
|
sub depth { |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# returns how many steps from self up to root |
|
349
|
|
|
|
|
|
|
|
|
350
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
# implemented using more general function -- but it does require |
|
353
|
|
|
|
|
|
|
# two traversals of the tree... other implementations may be easier |
|
354
|
0
|
|
|
|
|
0
|
return $self->depth_from( $self->root() ); |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
# if benchmarking turns up a problem here, use one of these below |
|
357
|
|
|
|
|
|
|
# instead (probably the second, since it involves the fewest stack ops |
|
358
|
|
|
|
|
|
|
# and so is probably the fastest). |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
# simple recursive implementation |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
## if ( $self->is_root() ) { |
|
363
|
|
|
|
|
|
|
## return 0; |
|
364
|
|
|
|
|
|
|
## } |
|
365
|
|
|
|
|
|
|
## else { |
|
366
|
|
|
|
|
|
|
## return $self->parent->depth() + 1; |
|
367
|
|
|
|
|
|
|
## } |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# non-recursive implementation |
|
370
|
|
|
|
|
|
|
## my $d = 0; |
|
371
|
|
|
|
|
|
|
## my __PACKAGE__ $p = $self->parent; |
|
372
|
|
|
|
|
|
|
## until ( $p->is_root() ) { |
|
373
|
|
|
|
|
|
|
## $h++; |
|
374
|
|
|
|
|
|
|
## $p = $p->parent; |
|
375
|
|
|
|
|
|
|
## } |
|
376
|
|
|
|
|
|
|
## return $d; |
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
} |
|
379
|
|
|
|
|
|
|
################################################################## |
|
380
|
|
|
|
|
|
|
sub height { |
|
381
|
|
|
|
|
|
|
# returns longest distance from self down to any leaf |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# could be re-implemented with get_all_terminals, path_up_to and |
|
384
|
|
|
|
|
|
|
# array lengths, but that seems unnecessary |
|
385
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
386
|
|
|
|
|
|
|
|
|
387
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal()) { |
|
388
|
0
|
|
|
|
|
0
|
return 0; |
|
389
|
|
|
|
|
|
|
} |
|
390
|
|
|
|
|
|
|
else { |
|
391
|
0
|
|
|
|
|
0
|
my ($max) = 0; |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
# choose the largest height among the children, return that |
|
394
|
|
|
|
|
|
|
# (+1) |
|
395
|
0
|
|
|
|
|
0
|
foreach my __PACKAGE__ $d (@{ $self->children() }) { |
|
|
0
|
|
|
|
|
0
|
|
|
396
|
0
|
|
|
|
|
0
|
my $this_height = $d->height(); |
|
397
|
0
|
0
|
|
|
|
0
|
if ($max < $this_height) { |
|
398
|
0
|
|
|
|
|
0
|
$max = $this_height; |
|
399
|
|
|
|
|
|
|
} |
|
400
|
|
|
|
|
|
|
} |
|
401
|
0
|
|
|
|
|
0
|
return $max + 1; |
|
402
|
|
|
|
|
|
|
} |
|
403
|
|
|
|
|
|
|
} |
|
404
|
|
|
|
|
|
|
################################################################## |
|
405
|
|
|
|
|
|
|
sub get_index { |
|
406
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
407
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $daughter = shift; |
|
408
|
|
|
|
|
|
|
|
|
409
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal) { |
|
410
|
0
|
|
|
|
|
0
|
carp "get_index called on terminal node, can't get_index"; |
|
411
|
0
|
|
|
|
|
0
|
return; |
|
412
|
|
|
|
|
|
|
} |
|
413
|
|
|
|
|
|
|
|
|
414
|
0
|
0
|
|
|
|
0
|
if (not $self == $daughter->parent ) { |
|
415
|
0
|
|
|
|
|
0
|
carp "argument not daughter of instance, can't get index"; |
|
416
|
0
|
|
|
|
|
0
|
return ; |
|
417
|
|
|
|
|
|
|
} |
|
418
|
|
|
|
|
|
|
|
|
419
|
0
|
|
|
|
|
0
|
for ( 0 .. $#{$self->children} ) { |
|
|
0
|
|
|
|
|
0
|
|
|
420
|
0
|
0
|
|
|
|
0
|
if ( $self->children($_) == $daughter ) { |
|
421
|
0
|
|
|
|
|
0
|
return $_; |
|
422
|
|
|
|
|
|
|
} |
|
423
|
|
|
|
|
|
|
} |
|
424
|
|
|
|
|
|
|
|
|
425
|
0
|
|
|
|
|
0
|
carp "malformed tree:", |
|
426
|
|
|
|
|
|
|
" daughter identifies instance as parent, but parent does ", |
|
427
|
|
|
|
|
|
|
"not claim daughter"; |
|
428
|
0
|
|
|
|
|
0
|
return ; |
|
429
|
|
|
|
|
|
|
} |
|
430
|
|
|
|
|
|
|
################################################################## |
|
431
|
|
|
|
|
|
|
# node retrieval functions |
|
432
|
|
|
|
|
|
|
################################################################## |
|
433
|
|
|
|
|
|
|
sub path_up_to { |
|
434
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
435
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $terminus = shift; |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
# could be done non-recursively, but this is grammatical structure |
|
438
|
|
|
|
|
|
|
# -- very small heights. Besides, recursivity is cooler, and |
|
439
|
|
|
|
|
|
|
# easier to think about |
|
440
|
|
|
|
|
|
|
|
|
441
|
0
|
0
|
|
|
|
0
|
if ($self == $terminus) { |
|
|
|
0
|
|
|
|
|
|
|
442
|
0
|
|
|
|
|
0
|
return ($self); |
|
443
|
|
|
|
|
|
|
} |
|
444
|
|
|
|
|
|
|
elsif ( $self->is_root() ) { |
|
445
|
0
|
|
|
|
|
0
|
carp "terminus argument not an ancestor of instance!"; |
|
446
|
0
|
|
|
|
|
0
|
return ; |
|
447
|
|
|
|
|
|
|
} |
|
448
|
|
|
|
|
|
|
else { |
|
449
|
0
|
|
|
|
|
0
|
my @path = $self->parent->path_up_to( $terminus ); |
|
450
|
0
|
0
|
|
|
|
0
|
if (not @path) { |
|
451
|
0
|
|
|
|
|
0
|
return; # not found |
|
452
|
|
|
|
|
|
|
} |
|
453
|
|
|
|
|
|
|
else { |
|
454
|
0
|
|
|
|
|
0
|
return ( $self, @path ); |
|
455
|
|
|
|
|
|
|
} |
|
456
|
|
|
|
|
|
|
} |
|
457
|
|
|
|
|
|
|
} |
|
458
|
|
|
|
|
|
|
################################################################## |
|
459
|
|
|
|
|
|
|
sub root { |
|
460
|
|
|
|
|
|
|
# returns the root of a given node |
|
461
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
462
|
0
|
0
|
|
|
|
0
|
if ($self->is_root()) { |
|
463
|
0
|
|
|
|
|
0
|
return $self; |
|
464
|
|
|
|
|
|
|
} |
|
465
|
|
|
|
|
|
|
else { |
|
466
|
0
|
|
|
|
|
0
|
return $self->parent->root(); |
|
467
|
|
|
|
|
|
|
} |
|
468
|
|
|
|
|
|
|
} |
|
469
|
|
|
|
|
|
|
################################################################## |
|
470
|
|
|
|
|
|
|
# Return a list of ancestors of a node matching a criteria given in a |
|
471
|
|
|
|
|
|
|
# function parameter. |
|
472
|
|
|
|
|
|
|
# my $path = <
|
|
473
|
|
|
|
|
|
|
# (NP |
|
474
|
|
|
|
|
|
|
# (NP |
|
475
|
|
|
|
|
|
|
# (VP |
|
476
|
|
|
|
|
|
|
# (N dog)))) |
|
477
|
|
|
|
|
|
|
# EOTREE |
|
478
|
|
|
|
|
|
|
# |
|
479
|
|
|
|
|
|
|
# my $node = Lingua::Treebank::TB3Const->new->from_penn_string($text); |
|
480
|
|
|
|
|
|
|
# my @terms = $node->get_all_terminals(); |
|
481
|
|
|
|
|
|
|
# my $node = shift @terms; |
|
482
|
|
|
|
|
|
|
# my @ancestors = $node->select_ancestors(sub{$_[0]->tag() eq "NP"}); |
|
483
|
|
|
|
|
|
|
sub select_ancestors { |
|
484
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
485
|
0
|
|
|
|
|
0
|
my $criteria = shift; |
|
486
|
|
|
|
|
|
|
|
|
487
|
0
|
|
|
|
|
0
|
my @ancestors = (); |
|
488
|
|
|
|
|
|
|
|
|
489
|
0
|
|
|
|
|
0
|
$self = $self->parent(); |
|
490
|
|
|
|
|
|
|
PARENT: |
|
491
|
0
|
|
|
|
|
0
|
while (defined $self) { |
|
492
|
0
|
0
|
|
|
|
0
|
push @ancestors, $self if (&$criteria($self)); |
|
493
|
0
|
|
|
|
|
0
|
$self = $self->parent(); |
|
494
|
|
|
|
|
|
|
} |
|
495
|
|
|
|
|
|
|
|
|
496
|
0
|
|
|
|
|
0
|
return @ancestors; |
|
497
|
|
|
|
|
|
|
} |
|
498
|
|
|
|
|
|
|
################################################################## |
|
499
|
|
|
|
|
|
|
# Return a list of children of a node matching a criteria given in a |
|
500
|
|
|
|
|
|
|
# function parameter. The children are searched breadth-first. |
|
501
|
|
|
|
|
|
|
sub select_children { |
|
502
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
503
|
0
|
|
|
|
|
0
|
my $criteria = shift; |
|
504
|
|
|
|
|
|
|
|
|
505
|
0
|
|
|
|
|
0
|
my @children = (); |
|
506
|
0
|
|
|
|
|
0
|
my @stack = ($self); |
|
507
|
|
|
|
|
|
|
CHILD: |
|
508
|
0
|
|
|
|
|
0
|
while (1) { |
|
509
|
0
|
|
|
|
|
0
|
my $node = pop @stack; |
|
510
|
0
|
0
|
|
|
|
0
|
last CHILD if (not $node); |
|
511
|
|
|
|
|
|
|
|
|
512
|
0
|
0
|
|
|
|
0
|
push @children, $node if (&$criteria($node)); |
|
513
|
0
|
|
|
|
|
0
|
push @stack, @{$node->children()}; |
|
|
0
|
|
|
|
|
0
|
|
|
514
|
|
|
|
|
|
|
} |
|
515
|
|
|
|
|
|
|
|
|
516
|
0
|
|
|
|
|
0
|
return @children; |
|
517
|
|
|
|
|
|
|
} |
|
518
|
|
|
|
|
|
|
################################################################## |
|
519
|
|
|
|
|
|
|
sub get_all_terminals { |
|
520
|
|
|
|
|
|
|
# returns all leaves in a left-right traversal |
|
521
|
|
|
|
|
|
|
|
|
522
|
4507
|
|
|
4507
|
1
|
9274
|
my __PACKAGE__ $self = shift; |
|
523
|
|
|
|
|
|
|
|
|
524
|
4507
|
|
|
|
|
4307
|
my @terminals; |
|
525
|
|
|
|
|
|
|
|
|
526
|
4507
|
100
|
|
|
|
7249
|
if ( $self->is_terminal() ) { |
|
527
|
2363
|
|
|
|
|
3499
|
@terminals = ( $self ); # parens force list return |
|
528
|
|
|
|
|
|
|
} |
|
529
|
|
|
|
|
|
|
else { |
|
530
|
2144
|
|
|
|
|
2116
|
foreach my __PACKAGE__ $d ( @{$self->children} ) { |
|
|
2144
|
|
|
|
|
3451
|
|
|
531
|
4158
|
|
|
|
|
7951
|
push @terminals, $d->get_all_terminals; |
|
532
|
|
|
|
|
|
|
} |
|
533
|
|
|
|
|
|
|
} |
|
534
|
4507
|
|
|
|
|
13484
|
return @terminals; |
|
535
|
|
|
|
|
|
|
} |
|
536
|
|
|
|
|
|
|
################################################################## |
|
537
|
|
|
|
|
|
|
sub next_sib { |
|
538
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
539
|
|
|
|
|
|
|
|
|
540
|
0
|
0
|
|
|
|
0
|
return if $self->is_root; # no sib, return undef |
|
541
|
|
|
|
|
|
|
|
|
542
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $parent = $self->parent; |
|
543
|
|
|
|
|
|
|
|
|
544
|
0
|
|
|
|
|
0
|
my $index = $parent->get_index($self); |
|
545
|
|
|
|
|
|
|
|
|
546
|
0
|
0
|
|
|
|
0
|
if ($index == $#{$parent->children}) { |
|
|
0
|
|
|
|
|
0
|
|
|
547
|
|
|
|
|
|
|
# this is the rightmost of the group of siblings |
|
548
|
0
|
|
|
|
|
0
|
return; # no right sib |
|
549
|
|
|
|
|
|
|
} |
|
550
|
0
|
|
|
|
|
0
|
return $parent->children($index + 1); |
|
551
|
|
|
|
|
|
|
} |
|
552
|
|
|
|
|
|
|
################################################################## |
|
553
|
|
|
|
|
|
|
sub prev_sib { |
|
554
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
555
|
|
|
|
|
|
|
|
|
556
|
0
|
0
|
|
|
|
0
|
return if $self->is_root; # no sib, return undef |
|
557
|
|
|
|
|
|
|
|
|
558
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $parent = $self->parent; |
|
559
|
|
|
|
|
|
|
|
|
560
|
0
|
|
|
|
|
0
|
my $index = $parent->get_index($self); |
|
561
|
|
|
|
|
|
|
|
|
562
|
0
|
0
|
|
|
|
0
|
if ($index == 0) { |
|
563
|
|
|
|
|
|
|
# this is the leftmost of the group of siblings |
|
564
|
0
|
|
|
|
|
0
|
return; # no left sib |
|
565
|
|
|
|
|
|
|
} |
|
566
|
0
|
|
|
|
|
0
|
return $parent->children($index - 1); |
|
567
|
|
|
|
|
|
|
} |
|
568
|
|
|
|
|
|
|
################################################################## |
|
569
|
|
|
|
|
|
|
sub right_leaf { |
|
570
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
571
|
|
|
|
|
|
|
# returns rightmost leaf of current node |
|
572
|
|
|
|
|
|
|
|
|
573
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal) { |
|
574
|
0
|
|
|
|
|
0
|
return $self; |
|
575
|
|
|
|
|
|
|
} |
|
576
|
|
|
|
|
|
|
else { |
|
577
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $right_daughter = $self->children(-1); |
|
578
|
0
|
|
|
|
|
0
|
return $right_daughter->right_leaf(); |
|
579
|
|
|
|
|
|
|
} |
|
580
|
|
|
|
|
|
|
} |
|
581
|
|
|
|
|
|
|
################################################################## |
|
582
|
|
|
|
|
|
|
sub left_leaf { |
|
583
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
584
|
|
|
|
|
|
|
# returns leftmost leaf of current node |
|
585
|
|
|
|
|
|
|
|
|
586
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal) { |
|
587
|
0
|
|
|
|
|
0
|
return $self; |
|
588
|
|
|
|
|
|
|
} |
|
589
|
|
|
|
|
|
|
else { |
|
590
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $left_daughter = $self->children(0); |
|
591
|
0
|
|
|
|
|
0
|
return $left_daughter->left_leaf(); |
|
592
|
|
|
|
|
|
|
} |
|
593
|
|
|
|
|
|
|
} |
|
594
|
|
|
|
|
|
|
################################################################## |
|
595
|
|
|
|
|
|
|
sub prev_leaf { |
|
596
|
|
|
|
|
|
|
# return the next leaf to the left (back in time), not dominated |
|
597
|
|
|
|
|
|
|
# by the current node |
|
598
|
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
# should behave correctly even when called on a non-terminal -- |
|
600
|
|
|
|
|
|
|
# returns the first leaf to the left not-dominated by the current |
|
601
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
602
|
|
|
|
|
|
|
|
|
603
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $left_sib = $self->prev_sib; |
|
604
|
|
|
|
|
|
|
|
|
605
|
0
|
0
|
|
|
|
0
|
if (defined $left_sib) { |
|
606
|
0
|
|
|
|
|
0
|
return $left_sib->right_leaf(); |
|
607
|
|
|
|
|
|
|
} |
|
608
|
|
|
|
|
|
|
else { |
|
609
|
|
|
|
|
|
|
# no immediate left sib, go up the tree |
|
610
|
|
|
|
|
|
|
|
|
611
|
0
|
0
|
|
|
|
0
|
if ( $self->is_root() ) { |
|
612
|
0
|
|
|
|
|
0
|
return; # no previous leaves |
|
613
|
|
|
|
|
|
|
} |
|
614
|
|
|
|
|
|
|
else { |
|
615
|
0
|
|
|
|
|
0
|
return $self->parent->prev_leaf(); |
|
616
|
|
|
|
|
|
|
} |
|
617
|
|
|
|
|
|
|
} |
|
618
|
|
|
|
|
|
|
} |
|
619
|
|
|
|
|
|
|
################################################################## |
|
620
|
|
|
|
|
|
|
sub next_leaf { |
|
621
|
|
|
|
|
|
|
# return the next leaf to the right (forward in time) |
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
# should behave correctly even when called on a non-terminal -- |
|
624
|
|
|
|
|
|
|
# returns the first leaf to the right not-dominated by the current |
|
625
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
626
|
|
|
|
|
|
|
|
|
627
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $right_sib = $self->next_sib; |
|
628
|
|
|
|
|
|
|
|
|
629
|
0
|
0
|
|
|
|
0
|
if (defined $right_sib) { |
|
630
|
0
|
|
|
|
|
0
|
return $right_sib->left_leaf(); |
|
631
|
|
|
|
|
|
|
} |
|
632
|
|
|
|
|
|
|
else { |
|
633
|
|
|
|
|
|
|
# no immediate right sib, go up the tree |
|
634
|
|
|
|
|
|
|
|
|
635
|
0
|
0
|
|
|
|
0
|
if ( $self->is_root() ) { |
|
636
|
0
|
|
|
|
|
0
|
return; # no previous leaves |
|
637
|
|
|
|
|
|
|
} |
|
638
|
|
|
|
|
|
|
else { |
|
639
|
0
|
|
|
|
|
0
|
return $self->parent->next_leaf(); |
|
640
|
|
|
|
|
|
|
} |
|
641
|
|
|
|
|
|
|
} |
|
642
|
|
|
|
|
|
|
} |
|
643
|
|
|
|
|
|
|
################################################################## |
|
644
|
|
|
|
|
|
|
# boolean requests (one additional argument) |
|
645
|
|
|
|
|
|
|
################################################################## |
|
646
|
|
|
|
|
|
|
sub is_descendant_of { |
|
647
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
648
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $grandma = shift; |
|
649
|
|
|
|
|
|
|
|
|
650
|
0
|
0
|
|
|
|
0
|
if ($self == $grandma) { |
|
651
|
0
|
|
|
|
|
0
|
return 1; # yes, you are your own descendant. :p |
|
652
|
|
|
|
|
|
|
} |
|
653
|
0
|
0
|
|
|
|
0
|
if ($self->is_root) { |
|
654
|
0
|
|
|
|
|
0
|
return 0; # root is descendant of nobody, grandma or otherwise |
|
655
|
|
|
|
|
|
|
} |
|
656
|
|
|
|
|
|
|
else { |
|
657
|
0
|
|
|
|
|
0
|
return $self->parent->is_descendant_of($grandma); |
|
658
|
|
|
|
|
|
|
} |
|
659
|
|
|
|
|
|
|
} |
|
660
|
|
|
|
|
|
|
################################################################## |
|
661
|
|
|
|
|
|
|
sub is_ancestor_of { |
|
662
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
663
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $candidate = shift; |
|
664
|
0
|
|
|
|
|
0
|
return $candidate->is_descendant_of($self); |
|
665
|
|
|
|
|
|
|
} |
|
666
|
|
|
|
|
|
|
################################################################## |
|
667
|
|
|
|
|
|
|
# Are the two nodes siblings? |
|
668
|
|
|
|
|
|
|
# |
|
669
|
|
|
|
|
|
|
# my $sibling = <
|
|
670
|
|
|
|
|
|
|
# (S |
|
671
|
|
|
|
|
|
|
# (NP |
|
672
|
|
|
|
|
|
|
# (D the) |
|
673
|
|
|
|
|
|
|
# (N boy)) |
|
674
|
|
|
|
|
|
|
# (VP |
|
675
|
|
|
|
|
|
|
# ran)) |
|
676
|
|
|
|
|
|
|
# EOTREE |
|
677
|
|
|
|
|
|
|
# |
|
678
|
|
|
|
|
|
|
# my $node = Lingua::Treebank::TB3Const->new()->from_penn_string($sibling); |
|
679
|
|
|
|
|
|
|
# my @child = @{$node->children()}; |
|
680
|
|
|
|
|
|
|
# my $np = $child[0]; |
|
681
|
|
|
|
|
|
|
# my $vp = $child[1]; |
|
682
|
|
|
|
|
|
|
# print "This is true." if ($np->is_sibling($vp)); |
|
683
|
|
|
|
|
|
|
sub is_sibling { |
|
684
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
685
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $other = shift; |
|
686
|
|
|
|
|
|
|
|
|
687
|
0
|
0
|
0
|
|
|
0
|
return 0 if ($self->is_root() or $other->is_root()); |
|
688
|
|
|
|
|
|
|
|
|
689
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $parent = $self->find_common_ancestor($other); |
|
690
|
0
|
0
|
|
|
|
0
|
return 0 if (not defined $parent); |
|
691
|
|
|
|
|
|
|
|
|
692
|
0
|
|
0
|
|
|
0
|
return ($parent == $self->parent() and $parent == $other->parent()); |
|
693
|
|
|
|
|
|
|
} |
|
694
|
|
|
|
|
|
|
################################################################## |
|
695
|
|
|
|
|
|
|
# I/O methods (to/from text) |
|
696
|
|
|
|
|
|
|
################################################################## |
|
697
|
|
|
|
|
|
|
sub as_penn_text { |
|
698
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
699
|
0
|
|
|
|
|
0
|
my $step = shift; |
|
700
|
0
|
|
|
|
|
0
|
my $indentChar = shift; |
|
701
|
0
|
|
|
|
|
0
|
my $child_prolog = shift; |
|
702
|
0
|
|
|
|
|
0
|
my $child_epilog = shift; |
|
703
|
0
|
|
|
|
|
0
|
my $am_head = shift; |
|
704
|
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
# set defaults (in case called without full specification) |
|
706
|
0
|
0
|
|
|
|
0
|
$step = 0 if not defined $step; |
|
707
|
0
|
0
|
|
|
|
0
|
$indentChar = $INDENT_CHAR if not defined $indentChar; |
|
708
|
0
|
0
|
|
|
|
0
|
$child_prolog = $CHILD_PROLOG if not defined $child_prolog; |
|
709
|
0
|
0
|
|
|
|
0
|
$child_epilog = $CHILD_EPILOG if not defined $child_epilog; |
|
710
|
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
# begin composition of text |
|
712
|
0
|
|
|
|
|
0
|
my $label = $self->tag(); |
|
713
|
0
|
0
|
|
|
|
0
|
if (defined $am_head) { |
|
714
|
0
|
0
|
|
|
|
0
|
if ($am_head) { |
|
715
|
0
|
|
|
|
|
0
|
$label = '*'.$label.'*'; |
|
716
|
|
|
|
|
|
|
} |
|
717
|
|
|
|
|
|
|
} |
|
718
|
|
|
|
|
|
|
# don't touch if $am_head undef |
|
719
|
|
|
|
|
|
|
|
|
720
|
0
|
0
|
|
|
|
0
|
if (defined $self->annot()) { |
|
721
|
0
|
|
|
|
|
0
|
$label .= '-' . $self->annot(); |
|
722
|
|
|
|
|
|
|
} |
|
723
|
|
|
|
|
|
|
|
|
724
|
0
|
|
|
|
|
0
|
my $text = '(' . $label . ' '; |
|
725
|
|
|
|
|
|
|
|
|
726
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal) { |
|
727
|
0
|
|
|
|
|
0
|
$text .= $self->word(); |
|
728
|
|
|
|
|
|
|
} |
|
729
|
|
|
|
|
|
|
else { |
|
730
|
|
|
|
|
|
|
# non-terminal |
|
731
|
0
|
|
|
|
|
0
|
my $head = $self->headchild(); |
|
732
|
|
|
|
|
|
|
|
|
733
|
0
|
|
|
|
|
0
|
foreach my __PACKAGE__ $d ( @{$self->children} ) { |
|
|
0
|
|
|
|
|
0
|
|
|
734
|
0
|
|
|
|
|
0
|
$text .= $child_prolog; |
|
735
|
0
|
|
|
|
|
0
|
$text .= ($indentChar x ($step + 1)); |
|
736
|
0
|
|
|
|
|
0
|
my $child_is_head; |
|
737
|
0
|
0
|
|
|
|
0
|
if (defined $head) { |
|
738
|
0
|
0
|
|
|
|
0
|
$child_is_head = ($head == $d ? 1 : 0); |
|
739
|
|
|
|
|
|
|
} |
|
740
|
0
|
|
|
|
|
0
|
$text .= $d->as_penn_text($step + 1, $indentChar, $child_prolog, $child_epilog, $child_is_head); |
|
741
|
0
|
|
|
|
|
0
|
$text .= $child_epilog; |
|
742
|
|
|
|
|
|
|
} |
|
743
|
|
|
|
|
|
|
} |
|
744
|
|
|
|
|
|
|
|
|
745
|
0
|
|
|
|
|
0
|
$text .= ')'; |
|
746
|
|
|
|
|
|
|
|
|
747
|
0
|
|
|
|
|
0
|
return $text; |
|
748
|
|
|
|
|
|
|
} |
|
749
|
|
|
|
|
|
|
################################################################## |
|
750
|
|
|
|
|
|
|
sub from_cnf_string { |
|
751
|
0
|
|
|
0
|
0
|
0
|
my __PACKAGE__ $self = shift; |
|
752
|
0
|
|
|
|
|
0
|
my $class = ref $self; |
|
753
|
0
|
|
|
|
|
0
|
local $_ = shift; |
|
754
|
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
# Strip leading and trailing whitespace. |
|
756
|
0
|
|
|
|
|
0
|
s/^\s+//; |
|
757
|
0
|
|
|
|
|
0
|
s/\s+$//; |
|
758
|
|
|
|
|
|
|
# Remove outermost parenthesis pair. |
|
759
|
0
|
0
|
|
|
|
0
|
if (s/^ \( \s* (.*) \s* \) $/$1/x) { |
|
|
|
0
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
# This is a non-terminal node. |
|
761
|
|
|
|
|
|
|
# Extract the non-terminal tag. |
|
762
|
0
|
|
|
|
|
0
|
s/^(\S+)\s*//; |
|
763
|
0
|
|
|
|
|
0
|
my $tag = $1; |
|
764
|
0
|
|
|
|
|
0
|
$self->tag($tag); |
|
765
|
|
|
|
|
|
|
# Enumerate all the children of this node. |
|
766
|
0
|
|
|
|
|
0
|
while (length $_) { |
|
767
|
0
|
|
|
|
|
0
|
my $childtext; |
|
768
|
0
|
0
|
|
|
|
0
|
if ( /^\(/ ) { |
|
769
|
|
|
|
|
|
|
# The child is a non-terminal node. |
|
770
|
0
|
|
|
|
|
0
|
$childtext = $class->find_brackets($_); |
|
771
|
0
|
|
|
|
|
0
|
substr ($_, 0, length $childtext) = ''; |
|
772
|
|
|
|
|
|
|
# BUGBUG check for errors here? |
|
773
|
|
|
|
|
|
|
} |
|
774
|
|
|
|
|
|
|
else { |
|
775
|
|
|
|
|
|
|
# The child is a terminal node. |
|
776
|
0
|
0
|
|
|
|
0
|
s/^(\S+)\s*// or carp "couldn't find text in $_\n"; |
|
777
|
0
|
|
|
|
|
0
|
$childtext = $1; |
|
778
|
|
|
|
|
|
|
} |
|
779
|
|
|
|
|
|
|
# Create a child node structure. |
|
780
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $child = $class->new(); |
|
781
|
0
|
|
|
|
|
0
|
$child->from_cnf_string($childtext); |
|
782
|
0
|
|
|
|
|
0
|
$self->append($child); |
|
783
|
|
|
|
|
|
|
# Skip whitespace delimiting children. |
|
784
|
0
|
|
|
|
|
0
|
s/^\s+//; |
|
785
|
|
|
|
|
|
|
} |
|
786
|
|
|
|
|
|
|
} |
|
787
|
|
|
|
|
|
|
elsif (/^([^_]+)_(\S+)$/) { |
|
788
|
|
|
|
|
|
|
# This is a terminal node. |
|
789
|
0
|
|
|
|
|
0
|
my ($word, $tag) = ($1, $2); |
|
790
|
0
|
|
|
|
|
0
|
$self->word($word); |
|
791
|
0
|
|
|
|
|
0
|
$self->tag($tag); |
|
792
|
|
|
|
|
|
|
} |
|
793
|
|
|
|
|
|
|
else { |
|
794
|
0
|
|
|
|
|
0
|
croak "can't parse '$_'"; |
|
795
|
|
|
|
|
|
|
} |
|
796
|
0
|
|
|
|
|
0
|
return $self; |
|
797
|
|
|
|
|
|
|
} |
|
798
|
|
|
|
|
|
|
################################################################## |
|
799
|
|
|
|
|
|
|
sub from_penn_string { |
|
800
|
4507
|
|
|
4507
|
1
|
4632
|
my __PACKAGE__ $self = shift; |
|
801
|
4507
|
|
|
|
|
5906
|
my $class = ref $self; |
|
802
|
4507
|
|
|
|
|
5276
|
my $text = shift; |
|
803
|
|
|
|
|
|
|
# pass it a complete constituent in text form. |
|
804
|
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
# records the tag plus a list of its subconstituents. If |
|
806
|
|
|
|
|
|
|
# subconstituents themselves have structure, then they will be |
|
807
|
|
|
|
|
|
|
# arrayrefs |
|
808
|
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
# JGK: why @tags? can't remember... |
|
810
|
|
|
|
|
|
|
# my (@tags) = shift; |
|
811
|
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
# strip off front and back parens and whitespace |
|
814
|
4507
|
|
|
|
|
17487
|
$text =~ s/^ \s* \( \s* //x; |
|
815
|
4507
|
|
|
|
|
56613
|
$text =~ s/ \s* \) \s* $//x; |
|
816
|
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
# handle perverse cases where the brackets are the text, like |
|
818
|
|
|
|
|
|
|
# (NP (-LRB- () (NNP Joe) (-RRB- ))) |
|
819
|
4507
|
|
|
|
|
6606
|
$text =~ s/\(-LRB- \(\)/__LPRN__/g; |
|
820
|
4507
|
|
|
|
|
5243
|
$text =~ s/\(-RRB- \)\)/__RPRN__/g; |
|
821
|
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
# tag is everything up to the first whitespace or |
|
823
|
|
|
|
|
|
|
# parenthesis. Children are everything else. |
|
824
|
4507
|
|
|
|
|
18056
|
my ($tag, $childrentext) = |
|
825
|
|
|
|
|
|
|
($text =~ /^ ([^\s\(]*) \s* (.*) $/sx); |
|
826
|
|
|
|
|
|
|
|
|
827
|
4507
|
50
|
33
|
|
|
16747
|
if (not defined $tag or not defined $childrentext) { |
|
828
|
0
|
|
|
|
|
0
|
croak "couldn't find a constituent in '$text'"; |
|
829
|
0
|
|
|
|
|
0
|
return; # undef |
|
830
|
|
|
|
|
|
|
} |
|
831
|
|
|
|
|
|
|
|
|
832
|
4507
|
100
|
|
|
|
13484
|
if ($tag =~ m/ ^ ( [^-]+? ) ([-=]) ( .* ) $/x ) { |
|
833
|
469
|
|
|
|
|
1188
|
my $short_tag = $1; |
|
834
|
469
|
50
|
|
|
|
1259
|
if ($2 ne '-') { |
|
835
|
0
|
0
|
|
|
|
0
|
warn "found '$2' (should be '-') separating annotation ". |
|
836
|
|
|
|
|
|
|
"in tag $tag\n" if $VERBOSE; |
|
837
|
|
|
|
|
|
|
} |
|
838
|
469
|
|
|
|
|
995
|
$self->annot( $3 ); |
|
839
|
469
|
|
|
|
|
833
|
$self->tag( $short_tag ); |
|
840
|
|
|
|
|
|
|
} |
|
841
|
|
|
|
|
|
|
else { |
|
842
|
4038
|
|
|
|
|
8270
|
$self->tag($tag); |
|
843
|
|
|
|
|
|
|
} |
|
844
|
4507
|
|
|
|
|
9370
|
while (length $childrentext) { |
|
845
|
|
|
|
|
|
|
# handle perverse cases where the brackets are the text, like |
|
846
|
|
|
|
|
|
|
# (NP (-LRB- () (NNP Joe) (-RRB- ))) |
|
847
|
6521
|
50
|
|
|
|
18061
|
if ($childrentext =~ s/^\s*__LPRN__\s*//) { |
|
|
|
50
|
|
|
|
|
|
|
848
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $child = $class->new(); |
|
849
|
0
|
|
|
|
|
0
|
$child->tag('-LRB-'); |
|
850
|
0
|
|
|
|
|
0
|
$child->word('('); |
|
851
|
0
|
|
|
|
|
0
|
$self->append($child); |
|
852
|
0
|
|
|
|
|
0
|
next; |
|
853
|
|
|
|
|
|
|
} |
|
854
|
|
|
|
|
|
|
elsif ($childrentext =~ s/^\s*__RPRN__\s*//) { |
|
855
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $child = $class->new(); |
|
856
|
0
|
|
|
|
|
0
|
$child->tag('-RRB-'); |
|
857
|
0
|
|
|
|
|
0
|
$child->word(')'); |
|
858
|
0
|
|
|
|
|
0
|
$self->append($child); |
|
859
|
0
|
|
|
|
|
0
|
next; |
|
860
|
|
|
|
|
|
|
} |
|
861
|
|
|
|
|
|
|
|
|
862
|
6521
|
|
|
|
|
14474
|
my $childtext = $class->find_brackets($childrentext); |
|
863
|
6521
|
100
|
|
|
|
13526
|
if (defined $childtext) { |
|
864
|
|
|
|
|
|
|
# child is itself a constituent |
|
865
|
4158
|
|
|
|
|
8694
|
my __PACKAGE__ $child = $class->new(); |
|
866
|
4158
|
|
|
|
|
8797
|
$child->from_penn_string($childtext); |
|
867
|
|
|
|
|
|
|
|
|
868
|
4158
|
|
|
|
|
7564
|
$self->append($child); |
|
869
|
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
# $child->parent($self); |
|
871
|
|
|
|
|
|
|
# push @{$self->children}, $child; |
|
872
|
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
# chop out the childrentext |
|
874
|
4158
|
|
|
|
|
7037
|
substr ($childrentext, 0, length $childtext) = ""; |
|
875
|
4158
|
|
|
|
|
7139
|
$childrentext =~ s/^\s+//; |
|
876
|
|
|
|
|
|
|
|
|
877
|
4158
|
50
|
|
|
|
6776
|
warn "trouble -- child constituent found " . |
|
878
|
|
|
|
|
|
|
"in token that already had word\n" |
|
879
|
|
|
|
|
|
|
if defined $self->word; |
|
880
|
|
|
|
|
|
|
} |
|
881
|
|
|
|
|
|
|
else { |
|
882
|
2363
|
50
|
|
|
|
4956
|
if ($childrentext =~ tr {()} {()} ) { |
|
883
|
0
|
|
|
|
|
0
|
carp "found a parenthesis in word '$childrentext'; ", |
|
884
|
|
|
|
|
|
|
" this suggests that the data had unbalanced parens"; |
|
885
|
|
|
|
|
|
|
} |
|
886
|
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
# this is a word; we're done |
|
888
|
2363
|
|
|
|
|
4487
|
$self->word($childrentext); |
|
889
|
|
|
|
|
|
|
|
|
890
|
|
|
|
|
|
|
# eliminate text so that we can exit the while loop |
|
891
|
2363
|
|
|
|
|
2831
|
$childrentext = ''; |
|
892
|
|
|
|
|
|
|
|
|
893
|
2363
|
|
|
|
|
3972
|
warn "trouble -- word found in token that " |
|
894
|
|
|
|
|
|
|
. "already had child constituents\n" |
|
895
|
2363
|
50
|
|
|
|
2400
|
if @{$self->children}; |
|
896
|
|
|
|
|
|
|
} |
|
897
|
|
|
|
|
|
|
} |
|
898
|
|
|
|
|
|
|
|
|
899
|
4507
|
|
|
|
|
7533
|
return $self; |
|
900
|
|
|
|
|
|
|
} |
|
901
|
|
|
|
|
|
|
my $bracket_error; |
|
902
|
|
|
|
|
|
|
sub find_brackets { |
|
903
|
6870
|
|
|
6870
|
0
|
8008
|
my $class = shift; |
|
904
|
6870
|
|
|
|
|
12833
|
my $text = shift; |
|
905
|
6870
|
|
|
|
|
7605
|
my $count_l = 1; |
|
906
|
|
|
|
|
|
|
|
|
907
|
6870
|
|
|
|
|
6486
|
my $posn = -1; |
|
908
|
|
|
|
|
|
|
|
|
909
|
6870
|
|
|
|
|
9743
|
my $nextL = index $text, '(', $posn+1; |
|
910
|
6870
|
|
|
|
|
8548
|
my $nextR = index $text, ')', $posn+1; |
|
911
|
|
|
|
|
|
|
|
|
912
|
6870
|
50
|
|
|
|
11930
|
croak ("I found a right bracket before a left-bracket. ", |
|
913
|
|
|
|
|
|
|
"Brackets mis-nested. Are you using .psd files instead of .mrg?") |
|
914
|
|
|
|
|
|
|
if $nextR < $nextL; |
|
915
|
|
|
|
|
|
|
|
|
916
|
6870
|
100
|
66
|
|
|
21718
|
return if ($nextL==-1 and $nextR==-1); |
|
917
|
|
|
|
|
|
|
|
|
918
|
4507
|
|
|
|
|
4465
|
$posn=$nextL; |
|
919
|
|
|
|
|
|
|
|
|
920
|
4507
|
50
|
|
|
|
7851
|
if ($posn == -1) { |
|
921
|
|
|
|
|
|
|
# undefined |
|
922
|
0
|
|
|
|
|
0
|
return; |
|
923
|
|
|
|
|
|
|
} |
|
924
|
|
|
|
|
|
|
|
|
925
|
4507
|
|
|
|
|
8907
|
while ($count_l > 0) { |
|
926
|
40523
|
|
|
|
|
51605
|
$nextL=index $text, '(', $posn+1; |
|
927
|
40523
|
|
|
|
|
43445
|
$nextR=index $text, ')', $posn+1; |
|
928
|
40523
|
50
|
|
|
|
62834
|
if ($nextR == -1) { |
|
929
|
0
|
|
|
|
|
0
|
croak "missing close parens in $text"; |
|
930
|
|
|
|
|
|
|
} |
|
931
|
40523
|
100
|
100
|
|
|
141531
|
if ($nextL == -1 or $nextR < $nextL) { |
|
932
|
22515
|
|
|
|
|
26088
|
$count_l--; |
|
933
|
22515
|
|
|
|
|
46470
|
$posn=$nextR; |
|
934
|
|
|
|
|
|
|
} |
|
935
|
|
|
|
|
|
|
else { # ($nextL < $nextR) |
|
936
|
18008
|
|
|
|
|
15723
|
$count_l++; |
|
937
|
18008
|
|
|
|
|
34308
|
$posn = $nextL; |
|
938
|
|
|
|
|
|
|
} |
|
939
|
|
|
|
|
|
|
} |
|
940
|
4507
|
|
|
|
|
14310
|
return substr $text, 0, $posn+1; |
|
941
|
|
|
|
|
|
|
} |
|
942
|
|
|
|
|
|
|
################################################################## |
|
943
|
|
|
|
|
|
|
# Tree modification methods |
|
944
|
|
|
|
|
|
|
################################################################## |
|
945
|
|
|
|
|
|
|
sub flatten { |
|
946
|
|
|
|
|
|
|
# pull up all terminals to be children of the instance here, |
|
947
|
|
|
|
|
|
|
# regardless of how deep they are |
|
948
|
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
# see POD below for better details |
|
950
|
|
|
|
|
|
|
|
|
951
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
952
|
|
|
|
|
|
|
|
|
953
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal) { |
|
954
|
0
|
|
|
|
|
0
|
carp "flatten called on terminal node"; |
|
955
|
0
|
|
|
|
|
0
|
return; |
|
956
|
|
|
|
|
|
|
} |
|
957
|
|
|
|
|
|
|
|
|
958
|
0
|
|
|
|
|
0
|
foreach my __PACKAGE__ $daughter (@{$self->children}) { |
|
|
0
|
|
|
|
|
0
|
|
|
959
|
|
|
|
|
|
|
|
|
960
|
0
|
0
|
|
|
|
0
|
next if $daughter->is_terminal; # this child's done |
|
961
|
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
# pull up all descendants of non-terminal daughter to depend |
|
963
|
|
|
|
|
|
|
# directly on the daughter |
|
964
|
0
|
|
|
|
|
0
|
$daughter->flatten(); |
|
965
|
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
# now reparent all the grandchildren to self, by retracting |
|
967
|
|
|
|
|
|
|
# the daughter |
|
968
|
0
|
|
|
|
|
0
|
$self->retract($daughter); |
|
969
|
|
|
|
|
|
|
} |
|
970
|
|
|
|
|
|
|
|
|
971
|
0
|
|
|
|
|
0
|
return $self; |
|
972
|
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
# could probably be reimplemented by "get_all_terminals" and |
|
974
|
|
|
|
|
|
|
# judicious use of insert, but this recursive strategy is more |
|
975
|
|
|
|
|
|
|
# elegant and takes advantage of brains of retract() method |
|
976
|
|
|
|
|
|
|
} |
|
977
|
|
|
|
|
|
|
################################################################## |
|
978
|
|
|
|
|
|
|
sub retract { |
|
979
|
|
|
|
|
|
|
# pulls in and removes one layer of non-terminal nodes, attaching |
|
980
|
|
|
|
|
|
|
# their children directly to the current node, retaining what |
|
981
|
|
|
|
|
|
|
# surface order they originally had. |
|
982
|
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
# see POD for more details |
|
984
|
|
|
|
|
|
|
|
|
985
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
986
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $daughter = shift; |
|
987
|
|
|
|
|
|
|
|
|
988
|
0
|
0
|
|
|
|
0
|
if ( $daughter->parent() != $self ) { |
|
989
|
0
|
|
|
|
|
0
|
carp "argument daughter does not claim instance as mother,", |
|
990
|
|
|
|
|
|
|
" can't retract!"; |
|
991
|
0
|
|
|
|
|
0
|
return; |
|
992
|
|
|
|
|
|
|
} |
|
993
|
|
|
|
|
|
|
|
|
994
|
0
|
0
|
|
|
|
0
|
if ( $daughter->is_terminal() ) { |
|
995
|
0
|
|
|
|
|
0
|
carp "daughter is a terminal node, can't retract!"; |
|
996
|
0
|
|
|
|
|
0
|
return; |
|
997
|
|
|
|
|
|
|
} |
|
998
|
|
|
|
|
|
|
|
|
999
|
0
|
|
|
|
|
0
|
$self->replace( $daughter, @{$daughter->children} ); |
|
|
0
|
|
|
|
|
0
|
|
|
1000
|
|
|
|
|
|
|
|
|
1001
|
0
|
|
|
|
|
0
|
return $self; |
|
1002
|
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
} |
|
1004
|
|
|
|
|
|
|
################################################################## |
|
1005
|
|
|
|
|
|
|
sub replace { |
|
1006
|
|
|
|
|
|
|
# replace target arg with replacement list |
|
1007
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
1008
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $target = shift; |
|
1009
|
0
|
|
|
|
|
0
|
my @replacements = @_; |
|
1010
|
|
|
|
|
|
|
|
|
1011
|
0
|
0
|
|
|
|
0
|
carp "argument not a child of instance, can't replace!" |
|
1012
|
|
|
|
|
|
|
unless ($target->parent == $self); |
|
1013
|
|
|
|
|
|
|
|
|
1014
|
0
|
|
|
|
|
0
|
my $index = $self->get_index($target); |
|
1015
|
|
|
|
|
|
|
|
|
1016
|
0
|
|
|
|
|
0
|
$self->detach_at($index); |
|
1017
|
|
|
|
|
|
|
|
|
1018
|
0
|
|
|
|
|
0
|
$self->insert_at($index, @replacements); |
|
1019
|
|
|
|
|
|
|
} |
|
1020
|
|
|
|
|
|
|
################################################################## |
|
1021
|
|
|
|
|
|
|
sub wither { |
|
1022
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
1023
|
0
|
0
|
|
|
|
0
|
return if $self->is_root(); |
|
1024
|
|
|
|
|
|
|
|
|
1025
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $parent = $self->parent(); |
|
1026
|
|
|
|
|
|
|
|
|
1027
|
0
|
|
|
|
|
0
|
my $num_sibs = $parent->num_children(); |
|
1028
|
|
|
|
|
|
|
|
|
1029
|
0
|
|
|
|
|
0
|
$parent->detach($self); |
|
1030
|
0
|
0
|
|
|
|
0
|
if ($num_sibs == 1) { |
|
1031
|
|
|
|
|
|
|
# unary parent, wither it too |
|
1032
|
0
|
|
|
|
|
0
|
$parent->wither(); |
|
1033
|
|
|
|
|
|
|
} |
|
1034
|
|
|
|
|
|
|
} |
|
1035
|
|
|
|
|
|
|
################################################################## |
|
1036
|
|
|
|
|
|
|
sub detach { |
|
1037
|
|
|
|
|
|
|
# removes an entire subtree. |
|
1038
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
1039
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $daughter = shift; |
|
1040
|
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
# actually do the detachment |
|
1042
|
0
|
|
|
|
|
0
|
my $index = $self->get_index($daughter); |
|
1043
|
|
|
|
|
|
|
|
|
1044
|
0
|
|
|
|
|
0
|
$self->detach_at($index); |
|
1045
|
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
} |
|
1047
|
|
|
|
|
|
|
################################################################## |
|
1048
|
|
|
|
|
|
|
sub detach_at { |
|
1049
|
|
|
|
|
|
|
# remove one daughter node at index |
|
1050
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
1051
|
0
|
|
|
|
|
0
|
my $index = shift; |
|
1052
|
|
|
|
|
|
|
|
|
1053
|
0
|
0
|
|
|
|
0
|
if (not defined $index) { |
|
1054
|
0
|
|
|
|
|
0
|
croak "no index provided to detach_at method"; |
|
1055
|
|
|
|
|
|
|
} |
|
1056
|
|
|
|
|
|
|
|
|
1057
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $d = $self->children($index); |
|
1058
|
|
|
|
|
|
|
|
|
1059
|
0
|
0
|
|
|
|
0
|
if (not defined $d) { |
|
1060
|
0
|
|
|
|
|
0
|
carp "no daughter at index $index"; |
|
1061
|
0
|
|
|
|
|
0
|
return; |
|
1062
|
|
|
|
|
|
|
} |
|
1063
|
|
|
|
|
|
|
|
|
1064
|
|
|
|
|
|
|
# remove links |
|
1065
|
0
|
|
|
|
|
0
|
$d->clear_parent(); |
|
1066
|
|
|
|
|
|
|
|
|
1067
|
0
|
0
|
0
|
|
|
0
|
if (defined $self->headchild() and $self->headchild() == $d) { |
|
1068
|
0
|
|
|
|
|
0
|
$self->clear_headchild(); |
|
1069
|
|
|
|
|
|
|
} |
|
1070
|
|
|
|
|
|
|
|
|
1071
|
0
|
|
|
|
|
0
|
splice @{$self->children}, $index, 1, (); # replace with empty list |
|
|
0
|
|
|
|
|
0
|
|
|
1072
|
|
|
|
|
|
|
} |
|
1073
|
|
|
|
|
|
|
################################################################## |
|
1074
|
|
|
|
|
|
|
sub prepend { |
|
1075
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
1076
|
0
|
|
|
|
|
0
|
my @daughters = @_; |
|
1077
|
0
|
|
|
|
|
0
|
$self->insert_at(0, @daughters); |
|
1078
|
|
|
|
|
|
|
} |
|
1079
|
|
|
|
|
|
|
################################################################## |
|
1080
|
|
|
|
|
|
|
sub append { |
|
1081
|
4158
|
|
|
4158
|
1
|
4398
|
my __PACKAGE__ $self = shift; |
|
1082
|
4158
|
|
|
|
|
6725
|
my @daughters = @_; |
|
1083
|
4158
|
|
|
|
|
4097
|
$self->insert_at(scalar @{$self->children}, @daughters); |
|
|
4158
|
|
|
|
|
6977
|
|
|
1084
|
|
|
|
|
|
|
} |
|
1085
|
|
|
|
|
|
|
################################################################## |
|
1086
|
|
|
|
|
|
|
sub insert_before { |
|
1087
|
0
|
|
|
0
|
0
|
0
|
my __PACKAGE__ $self = shift; |
|
1088
|
|
|
|
|
|
|
|
|
1089
|
0
|
|
|
|
|
0
|
my $parent = $self->parent(); |
|
1090
|
0
|
|
|
|
|
0
|
my $position = $parent->get_index($self); |
|
1091
|
|
|
|
|
|
|
|
|
1092
|
0
|
|
|
|
|
0
|
my @sibs = @_; |
|
1093
|
0
|
|
|
|
|
0
|
$parent->insert_at($position, @sibs); |
|
1094
|
|
|
|
|
|
|
|
|
1095
|
0
|
|
|
|
|
0
|
return $self; |
|
1096
|
|
|
|
|
|
|
} |
|
1097
|
|
|
|
|
|
|
################################################################## |
|
1098
|
|
|
|
|
|
|
sub insert_after { |
|
1099
|
0
|
|
|
0
|
0
|
0
|
my __PACKAGE__ $self = shift; |
|
1100
|
|
|
|
|
|
|
|
|
1101
|
0
|
|
|
|
|
0
|
my $parent = $self->parent(); |
|
1102
|
0
|
|
|
|
|
0
|
my $position = $parent->get_index($self); |
|
1103
|
|
|
|
|
|
|
|
|
1104
|
0
|
|
|
|
|
0
|
my @sibs = @_; |
|
1105
|
0
|
|
|
|
|
0
|
$parent->insert_at($position + 1, @sibs); |
|
1106
|
|
|
|
|
|
|
|
|
1107
|
0
|
|
|
|
|
0
|
return $self; |
|
1108
|
|
|
|
|
|
|
} |
|
1109
|
|
|
|
|
|
|
################################################################## |
|
1110
|
|
|
|
|
|
|
sub insert_at { |
|
1111
|
4158
|
|
|
4158
|
1
|
5343
|
my __PACKAGE__ $self = shift; |
|
1112
|
4158
|
|
|
|
|
4386
|
my $position = shift; |
|
1113
|
4158
|
|
|
|
|
5639
|
my @daughters = @_; |
|
1114
|
|
|
|
|
|
|
|
|
1115
|
4158
|
|
|
|
|
6267
|
foreach my __PACKAGE__ $d (@daughters) { |
|
1116
|
4158
|
|
|
|
|
7013
|
$d->parent($self); |
|
1117
|
|
|
|
|
|
|
} |
|
1118
|
|
|
|
|
|
|
|
|
1119
|
4158
|
|
|
|
|
5293
|
splice @{$self->children}, $position, 0, @daughters; |
|
|
4158
|
|
|
|
|
6884
|
|
|
1120
|
4158
|
|
|
|
|
7653
|
return $self; |
|
1121
|
|
|
|
|
|
|
} |
|
1122
|
|
|
|
|
|
|
################################################################## |
|
1123
|
|
|
|
|
|
|
# FEATURES OF THE CURRENT NODE |
|
1124
|
|
|
|
|
|
|
################################################################## |
|
1125
|
|
|
|
|
|
|
sub is_root { |
|
1126
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
1127
|
0
|
|
|
|
|
0
|
return ( not defined $self->[PARENT] ); |
|
1128
|
|
|
|
|
|
|
} |
|
1129
|
|
|
|
|
|
|
################################################################## |
|
1130
|
|
|
|
|
|
|
# Is this an empty root node? |
|
1131
|
|
|
|
|
|
|
# |
|
1132
|
|
|
|
|
|
|
# my $text = <
|
|
1133
|
|
|
|
|
|
|
# ( |
|
1134
|
|
|
|
|
|
|
# (INTJ |
|
1135
|
|
|
|
|
|
|
# (UH Okay) |
|
1136
|
|
|
|
|
|
|
# (. .) |
|
1137
|
|
|
|
|
|
|
# (-DFL- E_S))) |
|
1138
|
|
|
|
|
|
|
# EOTREE |
|
1139
|
|
|
|
|
|
|
# |
|
1140
|
|
|
|
|
|
|
# my $node = Lingua::Treebank::TB3Const->new->from_penn_string($text) |
|
1141
|
|
|
|
|
|
|
# print "This is true." if ($node->is_empty_root()); |
|
1142
|
|
|
|
|
|
|
# |
|
1143
|
|
|
|
|
|
|
sub is_empty_root { |
|
1144
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
1145
|
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
return ($self->is_root() and |
|
1147
|
|
|
|
|
|
|
not $self->tag() and |
|
1148
|
0
|
|
0
|
|
|
0
|
scalar(@{$self->children()}) == 1 ) |
|
1149
|
|
|
|
|
|
|
} |
|
1150
|
|
|
|
|
|
|
################################################################# |
|
1151
|
|
|
|
|
|
|
sub is_terminal { |
|
1152
|
4507
|
|
|
4507
|
1
|
4612
|
my __PACKAGE__ $self = shift; |
|
1153
|
4507
|
100
|
|
|
|
8812
|
if (defined $self->[WORD]) { |
|
1154
|
2363
|
50
|
|
|
|
2399
|
if ( @{$self->children()} ) { |
|
|
2363
|
|
|
|
|
3693
|
|
|
1155
|
0
|
|
|
|
|
0
|
carp "how did I get children AND a word?"; |
|
1156
|
|
|
|
|
|
|
} |
|
1157
|
2363
|
|
|
|
|
4957
|
return 1; |
|
1158
|
|
|
|
|
|
|
} |
|
1159
|
|
|
|
|
|
|
else { |
|
1160
|
2144
|
50
|
|
|
|
2005
|
if ( not @{ $self->children() } ) { |
|
|
2144
|
|
|
|
|
8788
|
|
|
1161
|
0
|
|
|
|
|
0
|
croak "how did I get neither a word NOR children?"; |
|
1162
|
0
|
|
|
|
|
0
|
return 1; # might as well terminate |
|
1163
|
|
|
|
|
|
|
} |
|
1164
|
2144
|
|
|
|
|
4560
|
return 0; |
|
1165
|
|
|
|
|
|
|
} |
|
1166
|
|
|
|
|
|
|
} |
|
1167
|
|
|
|
|
|
|
################################################################## |
|
1168
|
|
|
|
|
|
|
sub children { |
|
1169
|
17330
|
|
|
17330
|
1
|
18747
|
my $self = shift; |
|
1170
|
17330
|
50
|
|
|
|
32160
|
if (@_ > 2) { |
|
1171
|
0
|
|
|
|
|
0
|
croak "children() called with >2 args"; |
|
1172
|
|
|
|
|
|
|
} |
|
1173
|
17330
|
50
|
|
|
|
28972
|
if (@_ == 2) { |
|
1174
|
|
|
|
|
|
|
# e.g. $d->children(1, $foo_child); |
|
1175
|
0
|
0
|
|
|
|
0
|
croak "wrong package type: ", ref($_[1]), |
|
1176
|
|
|
|
|
|
|
" . Expecting ", __PACKAGE__ |
|
1177
|
|
|
|
|
|
|
unless UNIVERSAL::isa($_[1], __PACKAGE__); |
|
1178
|
|
|
|
|
|
|
|
|
1179
|
0
|
|
|
|
|
0
|
return $self->[ CHILDREN ][ $_[0] ] = $_[1]; |
|
1180
|
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
} |
|
1182
|
17330
|
50
|
|
|
|
33973
|
if (@_ == 1) { |
|
1183
|
0
|
0
|
|
|
|
0
|
if (ref $_[0] eq 'ARRAY') { |
|
1184
|
|
|
|
|
|
|
# reset entire array, |
|
1185
|
|
|
|
|
|
|
# e.g. $d->children([ $foo, $bar ]) |
|
1186
|
0
|
|
|
|
|
0
|
foreach (@{$_[1]}) { |
|
|
0
|
|
|
|
|
0
|
|
|
1187
|
0
|
0
|
|
|
|
0
|
if (not UNIVERSAL::isa($_, __PACKAGE__)) { |
|
1188
|
0
|
|
|
|
|
0
|
croak "ref ", ref $_, " in arrayref not a ", |
|
1189
|
|
|
|
|
|
|
__PACKAGE__; |
|
1190
|
|
|
|
|
|
|
} |
|
1191
|
|
|
|
|
|
|
} |
|
1192
|
0
|
|
|
|
|
0
|
$self->[ CHILDREN ] = $_[1]; |
|
1193
|
|
|
|
|
|
|
} |
|
1194
|
|
|
|
|
|
|
else { |
|
1195
|
|
|
|
|
|
|
# getting single element |
|
1196
|
|
|
|
|
|
|
# e.g. $d->children(2); |
|
1197
|
0
|
|
|
|
|
0
|
return $self->[ CHILDREN ][ $_[0] ]; |
|
1198
|
|
|
|
|
|
|
} |
|
1199
|
|
|
|
|
|
|
} |
|
1200
|
|
|
|
|
|
|
# else no args |
|
1201
|
17330
|
|
|
|
|
49533
|
return $self->[ CHILDREN ]; |
|
1202
|
|
|
|
|
|
|
} |
|
1203
|
|
|
|
|
|
|
################################################################## |
|
1204
|
|
|
|
|
|
|
sub num_children { |
|
1205
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
1206
|
0
|
|
|
|
|
0
|
return scalar @{$self->[ CHILDREN ]}; |
|
|
0
|
|
|
|
|
0
|
|
|
1207
|
|
|
|
|
|
|
} |
|
1208
|
|
|
|
|
|
|
################################################################## |
|
1209
|
|
|
|
|
|
|
# Functions for headed trees |
|
1210
|
|
|
|
|
|
|
################################################################## |
|
1211
|
|
|
|
|
|
|
sub capitalize_headed { |
|
1212
|
0
|
|
|
0
|
0
|
0
|
my __PACKAGE__ $self = shift; |
|
1213
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal) { |
|
1214
|
0
|
|
|
|
|
0
|
return; |
|
1215
|
|
|
|
|
|
|
} |
|
1216
|
0
|
|
|
|
|
0
|
my $head = $self->headchild(); |
|
1217
|
0
|
|
|
|
|
0
|
for my $kid (@{$self->children}) { |
|
|
0
|
|
|
|
|
0
|
|
|
1218
|
0
|
0
|
|
|
|
0
|
if ($kid == $head) { |
|
1219
|
0
|
|
|
|
|
0
|
$kid->tag(uc $kid->tag()); |
|
1220
|
|
|
|
|
|
|
} |
|
1221
|
|
|
|
|
|
|
else { |
|
1222
|
0
|
|
|
|
|
0
|
$kid->tag(lc $kid->tag()); |
|
1223
|
|
|
|
|
|
|
} |
|
1224
|
0
|
|
|
|
|
0
|
$kid->capitalize_headed(); |
|
1225
|
|
|
|
|
|
|
} |
|
1226
|
|
|
|
|
|
|
} |
|
1227
|
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
sub maximal_projection { |
|
1229
|
|
|
|
|
|
|
# given a node (usually a leaf!) climb the tree until I'm not the |
|
1230
|
|
|
|
|
|
|
# headword any more |
|
1231
|
0
|
|
|
0
|
0
|
0
|
my __PACKAGE__ $self = shift; |
|
1232
|
0
|
|
|
|
|
0
|
my $maximal = $self; |
|
1233
|
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
CLIMB: |
|
1235
|
0
|
|
|
|
|
0
|
while (1) { |
|
1236
|
0
|
|
|
|
|
0
|
my $parent = $maximal->parent(); |
|
1237
|
0
|
0
|
0
|
|
|
0
|
if (not defined $parent or $parent->headterminal() != $self) { |
|
1238
|
0
|
|
|
|
|
0
|
last CLIMB; # done! $maximal is it |
|
1239
|
|
|
|
|
|
|
} |
|
1240
|
0
|
|
|
|
|
0
|
$maximal = $parent; |
|
1241
|
|
|
|
|
|
|
} |
|
1242
|
0
|
|
|
|
|
0
|
return $maximal; |
|
1243
|
|
|
|
|
|
|
} |
|
1244
|
|
|
|
|
|
|
################################################################## |
|
1245
|
|
|
|
|
|
|
sub clear_headchild { |
|
1246
|
0
|
|
|
0
|
0
|
0
|
my __PACKAGE__ $self = shift; |
|
1247
|
0
|
|
|
|
|
0
|
$self->[HEADCHILD] = undef; |
|
1248
|
|
|
|
|
|
|
} |
|
1249
|
|
|
|
|
|
|
################################################################## |
|
1250
|
|
|
|
|
|
|
sub headterminal { |
|
1251
|
0
|
|
|
0
|
0
|
0
|
my __PACKAGE__ $self = shift; |
|
1252
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal()) { |
|
1253
|
0
|
|
|
|
|
0
|
return $self; |
|
1254
|
|
|
|
|
|
|
} |
|
1255
|
0
|
|
|
|
|
0
|
my $headchild = $self->headchild(); |
|
1256
|
|
|
|
|
|
|
|
|
1257
|
0
|
0
|
|
|
|
0
|
return undef if not defined $headchild; |
|
1258
|
|
|
|
|
|
|
|
|
1259
|
0
|
|
|
|
|
0
|
return $headchild->headterminal(); |
|
1260
|
|
|
|
|
|
|
} |
|
1261
|
|
|
|
|
|
|
################################################################## |
|
1262
|
|
|
|
|
|
|
sub headchild { |
|
1263
|
0
|
|
|
0
|
0
|
0
|
my __PACKAGE__ $self = shift; |
|
1264
|
0
|
0
|
|
|
|
0
|
if (@_) { |
|
1265
|
|
|
|
|
|
|
# setting |
|
1266
|
0
|
0
|
|
|
|
0
|
if (@_ > 1) { |
|
1267
|
0
|
|
|
|
|
0
|
croak "->headchild() called with >1 argument"; |
|
1268
|
|
|
|
|
|
|
} |
|
1269
|
0
|
|
|
|
|
0
|
my $val = $_[0]; |
|
1270
|
0
|
0
|
|
|
|
0
|
croak "->headchild() argument wrong class" |
|
1271
|
|
|
|
|
|
|
if ( not UNIVERSAL::isa($val, __PACKAGE__) ); |
|
1272
|
|
|
|
|
|
|
|
|
1273
|
0
|
0
|
|
|
|
0
|
if (not grep { $val == $_ } @{$self->[ CHILDREN ]}) { |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1274
|
0
|
|
|
|
|
0
|
croak "->headchild() setting used value that wasn't ", |
|
1275
|
|
|
|
|
|
|
"one of its kids"; |
|
1276
|
|
|
|
|
|
|
} |
|
1277
|
0
|
|
|
|
|
0
|
$self->[HEADCHILD] = $val; |
|
1278
|
|
|
|
|
|
|
} |
|
1279
|
|
|
|
|
|
|
else { |
|
1280
|
|
|
|
|
|
|
# getting |
|
1281
|
0
|
|
|
|
|
0
|
return $self->[HEADCHILD]; |
|
1282
|
|
|
|
|
|
|
} |
|
1283
|
|
|
|
|
|
|
} |
|
1284
|
|
|
|
|
|
|
################################################################## |
|
1285
|
|
|
|
|
|
|
sub parent { |
|
1286
|
4158
|
|
|
4158
|
1
|
4308
|
my __PACKAGE__ $self = shift; |
|
1287
|
4158
|
50
|
|
|
|
6967
|
if (@_) { |
|
1288
|
|
|
|
|
|
|
# setting |
|
1289
|
4158
|
50
|
|
|
|
7671
|
if (@_ > 1) { |
|
1290
|
0
|
|
|
|
|
0
|
croak "parent called with >1 argument"; |
|
1291
|
|
|
|
|
|
|
} |
|
1292
|
4158
|
|
|
|
|
4153
|
my $val = $_[0]; |
|
1293
|
4158
|
50
|
|
|
|
12356
|
croak "parent argument wrong class" |
|
1294
|
|
|
|
|
|
|
if ( not UNIVERSAL::isa($val, __PACKAGE__) ); |
|
1295
|
4158
|
|
|
|
|
13491
|
$self->[PARENT] = $val; |
|
1296
|
|
|
|
|
|
|
} |
|
1297
|
|
|
|
|
|
|
else { |
|
1298
|
|
|
|
|
|
|
# getting |
|
1299
|
0
|
|
|
|
|
0
|
return $self->[PARENT]; |
|
1300
|
|
|
|
|
|
|
} |
|
1301
|
|
|
|
|
|
|
} |
|
1302
|
|
|
|
|
|
|
################################################################## |
|
1303
|
|
|
|
|
|
|
sub clear_parent { |
|
1304
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
1305
|
0
|
|
|
|
|
0
|
$self->[PARENT] = undef; |
|
1306
|
|
|
|
|
|
|
} |
|
1307
|
|
|
|
|
|
|
################################################################## |
|
1308
|
|
|
|
|
|
|
sub tag { |
|
1309
|
4507
|
|
|
4507
|
1
|
7762
|
my __PACKAGE__ $self = shift; |
|
1310
|
4507
|
50
|
|
|
|
7863
|
if (@_) { |
|
1311
|
|
|
|
|
|
|
# setting |
|
1312
|
4507
|
50
|
|
|
|
8137
|
if (@_ > 1) { |
|
1313
|
0
|
|
|
|
|
0
|
croak "tag() called with >1 argument"; |
|
1314
|
|
|
|
|
|
|
} |
|
1315
|
4507
|
50
|
|
|
|
7388
|
carp "tag() passed a reference!" if ref($_[0]); |
|
1316
|
4507
|
|
|
|
|
11035
|
$self->[TAG] = $_[0]; |
|
1317
|
|
|
|
|
|
|
} |
|
1318
|
|
|
|
|
|
|
else { |
|
1319
|
|
|
|
|
|
|
# getting |
|
1320
|
0
|
|
|
|
|
0
|
return $self->[TAG]; |
|
1321
|
|
|
|
|
|
|
} |
|
1322
|
|
|
|
|
|
|
} |
|
1323
|
|
|
|
|
|
|
################################################################## |
|
1324
|
|
|
|
|
|
|
sub annot { |
|
1325
|
469
|
|
|
469
|
1
|
545
|
my __PACKAGE__ $self = shift; |
|
1326
|
469
|
50
|
|
|
|
849
|
if (@_) { |
|
1327
|
|
|
|
|
|
|
# setting |
|
1328
|
469
|
50
|
|
|
|
937
|
if (@_ > 1) { |
|
1329
|
0
|
|
|
|
|
0
|
croak "annot() called with >1 argument"; |
|
1330
|
|
|
|
|
|
|
} |
|
1331
|
469
|
50
|
|
|
|
997
|
carp "annot() passed a reference!" if ref($_[0]); |
|
1332
|
469
|
|
|
|
|
1426
|
$self->[ANNOT] = $_[0]; |
|
1333
|
|
|
|
|
|
|
} |
|
1334
|
|
|
|
|
|
|
else { |
|
1335
|
|
|
|
|
|
|
# getting |
|
1336
|
0
|
|
|
|
|
0
|
return $self->[ANNOT]; |
|
1337
|
|
|
|
|
|
|
} |
|
1338
|
|
|
|
|
|
|
} |
|
1339
|
|
|
|
|
|
|
################################################################## |
|
1340
|
|
|
|
|
|
|
sub word { |
|
1341
|
6521
|
|
|
6521
|
1
|
12249
|
my __PACKAGE__ $self = shift; |
|
1342
|
6521
|
100
|
|
|
|
10906
|
if (@_) { |
|
1343
|
|
|
|
|
|
|
# setting |
|
1344
|
2363
|
50
|
|
|
|
4475
|
if (@_ > 1) { |
|
1345
|
0
|
|
|
|
|
0
|
croak "word() called with >1 argument"; |
|
1346
|
|
|
|
|
|
|
} |
|
1347
|
|
|
|
|
|
|
|
|
1348
|
2363
|
50
|
|
|
|
2201
|
if (@{$self->[CHILDREN]}) { |
|
|
2363
|
|
|
|
|
5373
|
|
|
1349
|
0
|
|
|
|
|
0
|
croak "can't assign a word when children exist, failing!"; |
|
1350
|
0
|
|
|
|
|
0
|
return; |
|
1351
|
|
|
|
|
|
|
} |
|
1352
|
|
|
|
|
|
|
|
|
1353
|
2363
|
50
|
|
|
|
4084
|
carp "word() passed a reference!" if ref($_[0]); |
|
1354
|
2363
|
|
|
|
|
5501
|
$self->[WORD] = $_[0]; |
|
1355
|
|
|
|
|
|
|
} |
|
1356
|
|
|
|
|
|
|
else { |
|
1357
|
|
|
|
|
|
|
# getting |
|
1358
|
4158
|
|
|
|
|
21218
|
return $self->[WORD]; |
|
1359
|
|
|
|
|
|
|
} |
|
1360
|
|
|
|
|
|
|
} |
|
1361
|
|
|
|
|
|
|
################################################################## |
|
1362
|
|
|
|
|
|
|
sub text { |
|
1363
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
|
1364
|
0
|
|
|
|
|
0
|
return join(" ", |
|
1365
|
0
|
|
|
|
|
0
|
map {$_->word()} |
|
1366
|
0
|
|
|
|
|
0
|
grep {$_->tag ne '-NONE-'} |
|
1367
|
|
|
|
|
|
|
$self->get_all_terminals()); |
|
1368
|
|
|
|
|
|
|
} |
|
1369
|
|
|
|
|
|
|
################################################################## |
|
1370
|
|
|
|
|
|
|
sub new { |
|
1371
|
4507
|
|
|
4507
|
1
|
5534
|
my $class = shift; |
|
1372
|
4507
|
|
|
|
|
6521
|
my %args = @_; |
|
1373
|
4507
|
|
|
|
|
11059
|
my $self = bless [], $class; |
|
1374
|
4507
|
|
|
|
|
10377
|
$self->[CHILDREN] = []; |
|
1375
|
4507
|
|
|
|
|
10615
|
$self->[NUM] = $class->_next_numid(); |
|
1376
|
4507
|
|
|
|
|
11718
|
foreach (keys %args) { |
|
1377
|
0
|
0
|
|
|
|
0
|
if ($self->can($_)) { |
|
1378
|
0
|
|
|
|
|
0
|
$self->$_($args{$_}); |
|
1379
|
|
|
|
|
|
|
} |
|
1380
|
|
|
|
|
|
|
else { |
|
1381
|
0
|
|
|
|
|
0
|
carp "unknown argument $_"; |
|
1382
|
|
|
|
|
|
|
} |
|
1383
|
|
|
|
|
|
|
} |
|
1384
|
4507
|
|
|
|
|
10152
|
return $self; |
|
1385
|
|
|
|
|
|
|
} |
|
1386
|
|
|
|
|
|
|
################################################################## |
|
1387
|
|
|
|
|
|
|
|
|
1388
|
|
|
|
|
|
|
1; |
|
1389
|
|
|
|
|
|
|
|
|
1390
|
|
|
|
|
|
|
__END__ |