File Coverage

blib/lib/Treex/Core/Node/Aligned.pm
Criterion Covered Total %
statement 118 142 83.1
branch 27 40 67.5
condition 19 37 51.3
subroutine 19 21 90.4
pod 11 11 100.0
total 194 251 77.2


line stmt bran cond sub pod time code
1             package Treex::Core::Node::Aligned;
2             $Treex::Core::Node::Aligned::VERSION = '2.20210102';
3 24     24   17009 use Moose::Role;
  24         79  
  24         209  
4              
5             # with Moose >= 2.00, this must be present also in roles
6 24     24   129078 use MooseX::SemiAffordanceAccessor;
  24         72  
  24         279  
7 24     24   69737 use Treex::Core::Common;
  24         105  
  24         267  
8              
9             sub _set_directed_as_default {
10 25     25   62 my ($filter) = @_;
11            
12 25   50     61 my $new_filter = $filter // {};
13 25 100       66 if (!defined $new_filter->{directed}) {
14 1         9 log_warn "You called \$node->get_aligned_nodes(\$filter) without determining the \"directed\" parameter in the \$filter. "
15             . "For the time being, it returns links only in the specified direction, but this will be changed soon.";
16 1         4 $new_filter->{directed} = 1;
17             }
18 25         55 return $new_filter;
19             }
20              
21             sub get_aligned_nodes {
22 25     25 1 3199 my ($self, $filter) = @_;
23              
24             # return self if both language and selector of a counterpart should be the same as the self's ones
25             return ([$self], ["self"]) if (defined $filter->{language} && defined $filter->{selector} &&
26 25 0 66     100 $filter->{language} eq $self->language && $filter->{selector} eq $self->selector);
      33        
      33        
27              
28 25         62 $filter = _set_directed_as_default($filter);
29             # retrieve aligned nodes and its types outcoming links
30            
31 25         77 my ($aligned_to, $aligned_to_types) = $self->_get_direct_aligned_nodes();
32             #log_info "ALITO: " . Dumper($aligned_to_types);
33             #log_info "ALITOIDS: " . (join ",", map {$_->id} @$aligned_to);
34 25 100       71 my @aligned = $aligned_to ? @$aligned_to : ();
35 25 100       61 my @aligned_types = $aligned_to_types ? @$aligned_to_types : ();
36            
37             # retrieve aligned nodes and its types outcoming links
38            
39 25         62 my $directed = delete $filter->{directed};
40 25 100       64 if (!$directed) {
41 12         58 my @aligned_from = sort {$a->id cmp $b->id} $self->get_referencing_nodes('alignment');
  1         28  
42 12         26 my %seen_ids = ();
43 12         24 my @aligned_from_types = map {$_->_get_alignment_types($self)} grep {!$seen_ids{$_->id}++} @aligned_from;
  11         46  
  11         268  
44             #log_info "ALIFROM: " . Dumper(\@aligned_from_types);
45             #log_info "ALIFROMIDS: " . (join ",", map {$_->id} @aligned_from);
46 12         30 push @aligned, @aligned_from;
47 12         38 push @aligned_types, @aligned_from_types;
48             }
49              
50             # filter the retrieved nodes and links
51              
52 25         57 my ($final_nodes, $final_types) = (\@aligned, \@aligned_types);
53 25 100       97 if (%$filter) {
54 15         47 ($final_nodes, $final_types) = _edge_filter_out($final_nodes, $final_types, $filter);
55 15         46 ($final_nodes, $final_types) = _node_filter_out($final_nodes, $final_types, $filter);
56             }
57            
58 25         159 log_debug "[Core::Node::Aligned::get_aligned_nodes]\tfiltered: " . (join " ", @$final_types), 1;
59 25         120 return ($final_nodes, $final_types);
60             }
61              
62             sub get_undirected_aligned_nodes {
63 10     10 1 12626 my ($self, $filter) = @_;
64 10   100     46 $filter //= {};
65 10         28 $filter->{directed} = 0;
66              
67 10         34 return $self->get_aligned_nodes($filter);
68             }
69              
70             sub get_directed_aligned_nodes {
71 7     7 1 2441 my ($self, $filter) = @_;
72 7   100     40 $filter //= {};
73 7         20 $filter->{directed} = 1;
74              
75 7         24 return $self->get_aligned_nodes($filter);
76             }
77              
78             sub _node_filter_out {
79 15     15   30 my ($nodes, $types, $filter) = @_;
80 15         28 my $lang = $filter->{language};
81 15         27 my $sel = $filter->{selector};
82              
83             my @idx = grep {
84 15 100 33     32 (!defined $lang || ($lang eq $nodes->[$_]->language)) &&
  19   100     117  
85             (!defined $sel || ($sel eq $nodes->[$_]->selector))
86             } 0 .. $#$nodes;
87              
88 15         61 return ([@$nodes[@idx]], [@$types[@idx]]);
89             }
90              
91             sub _value_of_type {
92 15     15   30 my ($type, $type_list) = @_;
93 15         24 my $i = 0;
94 15         31 foreach my $type_re (@$type_list) {
95 18 100       63 if ($type_re =~ /^!(.*)/) {
96 5 100       53 return undef if ($type =~ /$1/);
97             }
98             else {
99 13 100       162 return $i if ($type =~ /$type_re/);
100             }
101 4         9 $i++;
102             }
103 1         5 return undef;
104             }
105              
106             sub _edge_filter_out {
107 15     15   28 my ($nodes, $types, $filter) = @_;
108 15         35 my $rel_types = $filter->{rel_types};
109 15 100       46 return ($nodes, $types) if (!defined $rel_types);
110            
111             #log_info 'ALITYPES: ' . Dumper($types);
112 11         22 my @values = map {_value_of_type($_, $rel_types)} @$types;
  15         37  
113             #log_info 'ALITYPES: ' . Dumper(\@values);
114 11         34 my @idx = grep { defined $values[$_] } 0 .. $#$nodes;
  15         43  
115 11         38 @idx = sort {$values[$a] <=> $values[$b]} @idx;
  3         14  
116              
117 11         64 return ([@$nodes[@idx]], [@$types[@idx]]);
118             }
119              
120             sub _get_direct_aligned_nodes {
121 36     36   75 my ($self) = @_;
122 36         130 my $links_rf = $self->get_attr('alignment');
123 36 100       98 if ($links_rf) {
124 29         78 my $document = $self->get_document;
125 29         79 my @nodes = map { $document->get_node_by_id( $_->{'counterpart.rf'} ) } @$links_rf;
  31         130  
126 29         73 my @types = map { $_->{'type'} } @$links_rf;
  31         84  
127 29         95 return ( \@nodes, \@types );
128             }
129 7         17 return ( undef, undef );
130             }
131              
132             sub _get_alignment_types {
133 11     11   29 my ($from, $to, $both_dir) = @_;
134              
135 11         54 my @all_types;
136             my @types_idx;
137            
138 11         33 my ($nodes, $types) = $from->_get_direct_aligned_nodes();
139 11 50       29 if (defined $nodes) {
140 11         31 @types_idx = grep {$nodes->[$_] == $to} 0 .. scalar(@$nodes)-1;
  11         40  
141             }
142 11         33 push @all_types, @$types[@types_idx];
143            
144             # try the opposite link
145 11 50       30 if ($both_dir) {
146 0         0 ($nodes, $types) = $to->_get_direct_aligned_nodes();
147 0 0       0 if (defined $nodes) {
148 0         0 @types_idx = grep {$nodes->[$_] == $from} 0 .. scalar(@$nodes)-1;
  0         0  
149             }
150 0         0 push @all_types, @$types[@types_idx];
151             }
152            
153 11         76 return @all_types;
154             }
155              
156              
157             sub get_aligned_nodes_of_type {
158 1     1 1 4 my ( $self, $type_regex, $lang, $selector ) = @_;
159              
160 1 50       7 if ($type_regex =~ /^!/) {
161 0         0 log_warn "Note that a alignment type regex starting with ! has a special meaning.";
162             }
163              
164 1         9 my ($ali_nodes) = $self->get_directed_aligned_nodes({
165             language => $lang,
166             selector => $selector,
167             rel_types => [ $type_regex ],
168             });
169 1         10 return @$ali_nodes;
170             }
171              
172             sub is_aligned_to {
173 7     7 1 19 my ($node1, $node2, $filter) = @_;
174 7         21 my ($nodes, $types) = $node1->get_aligned_nodes($filter);
175 7     5   64 return any {$_ == $node2} @$nodes;
  5         51  
176             }
177              
178             sub is_undirected_aligned_to {
179 2     2 1 7 my ($node1, $node2, $filter) = @_;
180 2   50     9 $filter //= {};
181 2         6 $filter->{directed} = 0;
182 2         38 return $node1->is_aligned_to($node2, $filter);
183             }
184              
185             sub is_directed_aligned_to {
186 5     5 1 796 my ($node1, $node2, $filter) = @_;
187 5   50     18 $filter //= {};
188 5         13 $filter->{directed} = 1;
189 5         20 return $node1->is_aligned_to($node2, $filter);
190             }
191              
192             sub delete_aligned_nodes_by_filter {
193 0     0 1 0 my ($node, $filter) = @_;
194              
195 0   0     0 $filter //= {};
196 0   0     0 $filter->{directed} //= 0;
197 0         0 my ($nodes, $types) = $node->get_aligned_nodes($filter);
198 0         0 for (my $i = 0; $i < @$nodes; $i++) {
199 0         0 log_debug "[Core::Node::Aligned::delete_aligned_nodes_by_filter]\tremoving: " . $types->[$i] . " " . $nodes->[$i]->id, 1;
200 0 0       0 if ($node->is_directed_aligned_to($nodes->[$i], {rel_types => ['^'.$types->[$i].'$']})) {
201 0         0 $node->delete_aligned_node($nodes->[$i], $types->[$i]);
202             }
203             else {
204 0         0 $nodes->[$i]->delete_aligned_node($node, $types->[$i]);
205             }
206             }
207             }
208              
209             sub delete_aligned_node {
210 2     2 1 1511 my ( $self, $node, $type ) = @_;
211 2         11 my $links_rf = $self->get_attr('alignment');
212 2         6 my @links = ();
213 2 50       6 if ($links_rf) {
214             @links = grep {
215 2         6 $_->{'counterpart.rf'} ne $node->id
216 5 50 33     135 || ( defined($type) && defined( $_->{'type'} ) && $_->{'type'} ne $type )
      66        
217             }
218             @$links_rf;
219             }
220 2         11 $self->set_attr( 'alignment', \@links );
221 2         16 return;
222             }
223              
224             sub add_aligned_node {
225 6     6 1 31 my ( $self, $node, $type ) = @_;
226 6         26 my $links_rf = $self->get_attr('alignment');
227 6   50     200 my %new_link = ( 'counterpart.rf' => $node->id, 'type' => $type // ''); #/ so we have no undefs
228 6         21 push( @$links_rf, \%new_link );
229 6         35 $self->set_attr( 'alignment', $links_rf );
230 6         22 return;
231             }
232              
233             # remove invalid alignment links (leading to unindexed nodes)
234             sub update_aligned_nodes {
235 0     0 1   my ($self) = @_;
236 0           my $doc = $self->get_document();
237 0           my $links_rf = $self->get_attr('alignment');
238 0           my @new_links;
239              
240 0           foreach my $link ( @{$links_rf} ) {
  0            
241 0 0         push @new_links, $link if ( $doc->id_is_indexed( $link->{'counterpart.rf'} ) );
242             }
243 0           $self->set_attr( 'alignment', \@new_links );
244 0           return;
245             }
246             1;
247              
248             __END__
249              
250             =encoding utf-8
251              
252             =head1 NAME
253              
254             Treex::Core::Node::Aligned
255              
256             =head1 VERSION
257              
258             version 2.20210102
259              
260             =head1 DESCRIPTION
261              
262             Moose role with methods to access alignment.
263              
264             =head1 METHODS
265              
266             =over
267              
268             =item ($ali_nodes, $ali_types) = $node->get_aligned_nodes($filter)
269              
270             This is the main getter method. It returns all nodes aligned to a specified node C<$node>,
271             and types of these alignment links as two list references -- C<$ali_nodes>, and C<$ali_types>,
272             respectively.
273              
274             By the optional parameter C<$filter>, one may specify a filter to be applied to the nodes
275             and links. The filter is a hash reference, with the following possible keys:
276            
277             C<language> - the language of the aligned nodes (e.g. C<en>)
278             C<selector> - the selector of the aligned nodes (e.g. C<src>)
279             C<directed> - return only the links originating from the C<$node> (possible values: C<0> and C<1>,
280             by default equals to C<1>)
281             C<rel_types> - filter the alignment types. The value of this parameter must be a reference to
282             a list of regular expression strings. The expressions starting with the C<!> sign represent negative
283             filters. The actual link type is compared to these regexps one after another, skipping the rest
284             if the type matches a current regexp. If the type matches no regexps in the list, it is filtered out.
285             Therefore, negative rules should be at the beginning of the list, followed by at least one positive
286             rule. For instance, C<['^a$','^b$']> returns only links of type C<a> or C<b>. On the other hand,
287             C<['!^a$','!^b$','.*']> returns everything except for C<a> and C<b>. The filter C<['!^ab.*','^a.*']>
288             accepts only the types starting with C<a>, except for those starting with C<ab>.
289              
290             For the time being, C<directed = 1> is the default if it is not specified in the filter. However,
291             this will probably change soon, so you had better use C<get_directed_aligned_nodes> for this purpose,
292             or specify the C<directed> parameter, explicitly.
293              
294             Both returned list references -- C<$ali_nodes> and C<$ali_types>, are always defined. If the
295             C<$node> has no alignment link that satisfies the filter constraints, a reference to an empty
296             list is returned.
297              
298             =item ($ali_nodes, $ali_types) = $node->get_undirected_aligned_nodes($filter)
299              
300             Return counterparts of the links in both the specified and opposite direction.
301             It calls C<get_aligned_nodes> with C<directed> equal to 0.
302              
303             =item ($ali_nodes, $ali_types) = $node->get_directed_aligned_nodes($filter)
304              
305             Return only counterparts of the links in the specified direction.
306             Calls C<get_aligned_nodes> with C<directed> equal to 1.
307             With undefined C<filter>, it corresponds to the original version of the
308             C<get_aligned_nodes> method.
309              
310             =item my @nodes = $node->get_aligned_nodes_of_type($regex_constraint_on_type)
311              
312             Returns a list of nodes aligned to the $node by the specified alignment type.
313              
314             =item my $is_aligned = $node1->is_aligned_to($node2, $filter)
315              
316             An indicator function of whether the nodes C<$node1> and C<$node2> are aligned under the conditions
317             specified by the filter C<$filter> (see more in the C<get_aligned_nodes> function description).
318             For the time being, C<directed = 1> is the default if it is not specified in the filter. However,
319             this will probably change soon, so you had better use C<is_directed_aligned_to> for this purpose,
320             or specify the C<directed> parameter, explicitly.
321              
322             =item my $is_aligned = $node1->is_undirected_aligned_to($node2, $filter)
323              
324             The same as C<is_aligned_to>, accepting links in both the specified and the opposite direction.
325              
326             =item my $is_aligned = $node1->is_directed_aligned_to($node2, $filter)
327              
328             The same as C<is_aligned_to>, accepting links only in the specified direction.
329              
330             =item $node->delete_aligned_node($target, $type)
331              
332             All alignments of the $target to $node are deleted, if their types equal $type.
333              
334             =item $node->delete_aligned_nodes_by_filter($filter)
335              
336             This deletes the alignment links pointing from/to the node C<$node>. Only the links satisfying
337             the C<$filter> constraints are removed.
338             If the parameter C<directed> in the C<filter> is not specified, C<directed = 0> is the default.
339              
340             =item $node->add_aligned_node($target, $type)
341              
342             Aligns $target node to $node. The prior existence of the link is not checked.
343              
344             =item $node->update_aligned_nodes()
345              
346             Removes all alignment links leading to nodes which have been deleted.
347              
348             =back
349              
350             =head1 AUTHOR
351              
352             Michal Novák <mnovak@ufal.mff.cuni.cz>
353              
354             =head1 COPYRIGHT AND LICENSE
355              
356             Copyright © 2015 by Institute of Formal and Applied Linguistics, Charles University in Prague
357              
358             This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.