File Coverage

blib/lib/Treex/Core/Node.pm
Criterion Covered Total %
statement 19 21 90.4
branch n/a
condition n/a
subroutine 7 7 100.0
pod n/a
total 26 28 92.8


line stmt bran cond sub pod time code
1             package Treex::Core::Node;
2             $Treex::Core::Node::VERSION = '2.20160630';
3 24     24   14494 use namespace::autoclean;
  24         14901  
  24         228  
4              
5 24     24   2390 use Moose;
  24         387653  
  24         226  
6 24     24   161284 use MooseX::NonMoose;
  24         1907  
  24         216  
7 24     24   230464 use Treex::Core::Common;
  24         77  
  24         350  
8 24     24   183174 use Cwd;
  24         66  
  24         2047  
9 24     24   174 use Scalar::Util qw(refaddr);
  24         68  
  24         1166  
10 24     24   15232 use Treex::PML;
  0            
  0            
11              
12             extends 'Treex::PML::Node';
13             with 'Treex::Core::WildAttr';
14             with 'Treex::Core::Node::Aligned';
15              
16             # overloading does not work with namespace::autoclean
17             # see https://rt.cpan.org/Public/Bug/Display.html?id=50938
18             # We may want to use https://metacpan.org/module/namespace::sweep instead.
19             #
20             # use overload
21             # '""' => 'to_string',
22             # '==' => 'equals',
23             # '!=' => '_not_equals',
24             # 'eq' => 'equals', # deprecated
25             # 'ne' => '_not_equals', # deprecated
26             # 'bool' => sub{1},
27             #
28             # # We can A) let Magic Autogeneration to build "derived" overloadings,
29             # # or B) we can disable this feature (via fallback=>0)
30             # # and define only the needed overloadings
31             # # (so all other overloadings will result in fatal errors).
32             # # See perldoc overload.
33             # # I decided for A, but uncommenting the following lines can catch some misuses.
34             # #'!' => sub{0},
35             # #'.' => sub{$_[2] ? $_[1] . $_[0]->to_string : $_[0]->to_string . $_[1]},
36             # #fallback => 0,
37             # ;
38             # # TODO: '<' => 'precedes' (or better '<=>' => ...)
39             # # 'eq' => sub {log_warn 'You should use ==' && return $_[0]==$_[1]} # similarly for 'ne'
40              
41             Readonly my $_SWITCHES_REGEX => qr/^(ordered|add_self|(preceding|following|first|last)_only)$/x;
42             my $CHECK_FOR_CYCLES = 1;
43              
44             our $LOG_NEW = 0;
45             our $LOG_EDITS = 0;
46             # tip: you can use Util::Eval doc='$Treex::Core::Node::LOG_EDITS=1;' in your scenario
47             # Note that most attributes are not set by set_attr. See TODO below.
48              
49             has _zone => (
50             is => 'rw',
51             writer => '_set_zone',
52             reader => '_get_zone',
53             weak_ref => 1,
54             );
55              
56             has id => (
57             is => 'rw',
58             trigger => \&_index_my_id,
59             );
60              
61             sub BUILD {
62             my ( $self, $arg_ref ) = @_;
63              
64             if (( not defined $arg_ref or not defined $arg_ref->{_called_from_core_} )
65             and not $Treex::Core::Config::running_in_tred
66             )
67             {
68             log_fatal 'Because of node indexing, no nodes can be created outside of documents. '
69             . 'You have to use $zone->create_tree(...) or $node->create_child() '
70             . 'instead of Treex::Core::Node...->new().';
71             }
72             return;
73             }
74              
75             sub to_string {
76             my ($self) = @_;
77             return $self->id // 'node_without_id(addr=' . refaddr($self) . ')';
78             }
79              
80             # Since we have overloaded stringification, we must overload == as well,
81             # so you can use "if ($nodeA == $nodeB){...}".
82             sub equals {
83             my ($self, $node) = @_;
84             #return ref($node) && $node->id eq $self->id;
85             return ref($node) && refaddr($node) == refaddr($self);
86             }
87              
88             sub _not_equals {
89             my ($self, $node) = @_;
90             return !$self->equals($node);
91             }
92              
93             sub _index_my_id {
94             my $self = shift;
95             $self->get_document->index_node_by_id( $self->id, $self );
96             return;
97             }
98              
99             sub _caller_signature {
100             my $level = 1;
101             my ($package, $filename, $line) = caller;
102             while ($package =~ /^Treex::Core/){
103             ($package, $filename, $line) = caller $level++;
104             }
105             $package =~ s/^Treex::Block:://;
106             return "$package#$line";
107             }
108              
109             # ---- access to attributes ----
110              
111             # unlike attr (implemented in Treex::PML::Instance::get_data)
112             # get_attr implements only "plain" and "nested hash" attribute names,
113             # i.e. no XPath-like expressions (a/aux.rf[3]) are allowed.
114             # This results in much faster code.
115             sub get_attr {
116             my ( $self, $attr_name ) = @_;
117             log_fatal('Incorrect number of arguments') if @_ != 2;
118             my $val = $self;
119             for my $step ( split /\//, $attr_name ) {
120             if ( !defined $val ) {
121             log_fatal "Attribute '$attr_name' contains strange symbols."
122             . " For XPath like constructs (e.g. 'a/aux.rf[3]') use the 'attr' method."
123             if $attr_name =~ /[^-\w\/.]/;
124             }
125             $val = $val->{$step};
126             }
127             return $val;
128             }
129              
130             use Treex::PML::Factory;
131              
132             sub set_attr {
133             my ( $self, $attr_name, $attr_value ) = @_;
134             log_fatal('Incorrect number of arguments') if @_ != 3;
135             if ( $attr_name eq 'id' ) {
136             if ( not defined $attr_value or $attr_value eq '' ) {
137             log_fatal 'Setting undefined or empty ID is not allowed';
138             }
139             $self->get_document->index_node_by_id( $attr_value, $self );
140             }
141             elsif ( ref($attr_value) eq 'ARRAY' ) {
142             $attr_value = Treex::PML::List->new( @{$attr_value} );
143             }
144              
145             if ($attr_name =~ /\.rf$/){
146             my $document = $self->get_document();
147              
148             # Delete previous back references
149             my $old_value = $self->get_attr($attr_name);
150             if ($old_value) {
151             if ( ref $old_value eq 'Treex::PML::List' && @$old_value ) {
152             $document->remove_backref( $attr_name, $self->id, $old_value );
153             }
154             else {
155             $document->remove_backref( $attr_name, $self->id, [$old_value] );
156             }
157             }
158              
159             # Set new back references
160             my $ids = ref($attr_value) eq 'Treex::PML::List' ? $attr_value : [$attr_value];
161             $document->index_backref( $attr_name, $self->id, $ids );
162             }
163             elsif ($attr_name eq 'alignment'){
164             my $document = $self->get_document();
165             if ($self->{alignment}){
166             my @old_ids = map { $_->{'counterpart.rf'} } @{$self->{alignment}};
167             $document->remove_backref( 'alignment', $self->id, \@old_ids );
168             }
169             if ($attr_value && @$attr_value){
170             my @new_ids = map { $_->{'counterpart.rf'} } @$attr_value;
171             $document->index_backref( $attr_name, $self->id, \@new_ids );
172             }
173             }
174              
175             # TODO: most attributes are set by Moose setters,
176             # e.g. $anode->set_form("Hi") does not call set_attr.
177             # We would need to redefine all the setter to fill wild->{edited_by}.
178             if ($LOG_EDITS){
179             my $signature = $self->wild->{edited_by};
180             if ($signature) {$signature .= "\n";}
181             else {$signature = '';}
182             my $a_value = $attr_value // 'undef';
183             $signature .= "$attr_name=$a_value ". $self->_caller_signature();
184             $self->wild->{edited_by} = $signature;
185             }
186              
187             #simple attributes can be accessed directly
188             return $self->{$attr_name} = $attr_value if $attr_name =~ /^[\w\.]+$/ || $attr_name eq '#name';
189             log_fatal "Attribute '$attr_name' contains strange symbols."
190             . " No XPath like constructs (e.g. 'a/aux.rf[3]') are allowed."
191             if $attr_name =~ /[^-\w\/.]/;
192              
193             my $val = $self;
194             my @steps = split /\//, $attr_name;
195             while (1) {
196             my $step = shift @steps;
197             if (@steps) {
198             if ( !defined( $val->{$step} ) ) {
199             $val->{$step} = Treex::PML::Factory->createStructure();
200             }
201             $val = $val->{$step};
202             }
203             else {
204             return $val->{$step} = $attr_value;
205             }
206             }
207             return;
208             }
209              
210             sub get_deref_attr {
211             my ( $self, $attr_name ) = @_;
212             log_fatal('Incorrect number of arguments') if @_ != 2;
213             my $attr_value = $self->get_attr($attr_name);
214              
215             return if !$attr_value;
216             my $document = $self->get_document();
217             return [ map { $document->get_node_by_id($_) } @{$attr_value} ]
218             if ref($attr_value) eq 'Treex::PML::List';
219             return $document->get_node_by_id($attr_value);
220             }
221              
222             sub set_deref_attr {
223             my ( $self, $attr_name, $attr_value ) = @_;
224             log_fatal('Incorrect number of arguments') if @_ != 3;
225              
226             # If $attr_value is an array of nodes
227             if ( ref($attr_value) eq 'ARRAY' ) {
228             my @list = map { $_->id } @{$attr_value};
229             $attr_value = Treex::PML::List->new(@list);
230             }
231              
232             # If $attr_value is just one node
233             else {
234             $attr_value = $attr_value->id;
235             }
236              
237             # Set the new reference(s)
238             $self->set_attr( $attr_name, $attr_value );
239             return;
240             }
241              
242             sub get_bundle {
243             log_fatal 'Incorrect number of arguments' if @_ != 1;
244             my $self = shift;
245             return $self->get_zone->get_bundle;
246             }
247              
248             # reference to embedding zone is stored only with tree root, not with nodes
249             sub get_zone {
250             log_fatal 'Incorrect number of arguments' if @_ != 1;
251             my $self = shift;
252             my $zone;
253             if ( $self->is_root ) {
254             $zone = $self->_get_zone;
255             }
256             else {
257             $zone = $self->get_root->_get_zone; ## no critic (ProtectPrivateSubs)
258             }
259              
260             log_fatal "a node (" . $self->id . ") can't reveal its zone" if !$zone;
261             return $zone;
262              
263             }
264              
265             sub remove {
266             my ($self, $arg_ref) = @_;
267             if ( $self->is_root ) {
268             log_fatal 'Tree root cannot be removed using $root->remove().'
269             . ' Use $zone->remove_tree($layer) instead';
270             }
271             my $root = $self->get_root();
272             my $document = $self->get_document();
273            
274             my @children = $self->get_children();
275             if (@children){
276             my $what_to_do = 'remove';
277             if ($arg_ref && $arg_ref->{children}){
278             $what_to_do = $arg_ref->{children};
279             }
280             if ($what_to_do =~ /^rehang/){
281             foreach my $child (@children){
282             $child->set_parent($self->get_parent);
283             }
284             }
285             if ($what_to_do =~ /warn$/){
286             log_warn $self->get_address . " is being removed by remove({children=>$what_to_do}), but it has (unexpected) children";
287             }
288             }
289              
290             # Remove the subtree from the document's indexing table
291             my @to_remove = ( $self, $self->get_descendants );
292             foreach my $node ( @to_remove) {
293             if ( defined $node->id ) {
294             $document->_remove_references_to_node( $node );
295             $document->index_node_by_id( $node->id, undef );
296             }
297             }
298              
299             # Disconnect the node from its parent (& siblings) and delete all attributes
300             # It actually does: $self->cut(); undef %$_ for ($self->descendants(), $self);
301             $self->destroy;
302              
303             # TODO: order normalizing can be done in a more efficient way
304             # (update just the following ords)
305             $root->_normalize_node_ordering();
306              
307             # By reblessing we make sure that
308             # all methods called on removed nodes will result in fatal errors.
309             foreach my $node (@to_remove){
310             bless $node, 'Treex::Core::Node::Deleted';
311             }
312             return;
313             }
314              
315             # Return all nodes that have a reference of the given type (e.g. 'alignment', 'a/lex.rf') to this node
316             sub get_referencing_nodes {
317             my ( $self, $type, $lang, $sel ) = @_;
318             my $doc = $self->get_document;
319             my $refs = $doc->get_references_to_id( $self->id );
320             return if ( !$refs || !$refs->{$type} );
321             if ((defined $lang) && (defined $sel)) {
322             my @ref_filtered_by_tree;
323             if ($sel eq q() ) {
324             @ref_filtered_by_tree = grep { /(a|t)\_tree\-$lang\-.+/; }@{ $refs->{$type} };
325             }
326             else {
327             @ref_filtered_by_tree = grep { /(a|t)\_tree\-$lang\_$sel\-.+/; }@{ $refs->{$type} };
328             }
329             return map { $doc->get_node_by_id($_) } @ref_filtered_by_tree;
330             }
331             return map { $doc->get_node_by_id($_) } @{ $refs->{$type} };
332             }
333              
334             # Remove a reference of the given type to the given node. This will not remove a reverse reference from document
335             # index, since it is itself called when removing reverse references; use the API methods for the individual
336             # references if you want to keep reverse references up-to-date.
337             sub remove_reference {
338             my ( $self, $type, $id ) = @_;
339              
340             if ( $type eq 'alignment' ) { # handle alignment links separately
341              
342             my $links = $self->get_attr('alignment');
343              
344             if ($links) {
345             $self->set_attr( 'alignment', [ grep { $_->{'counterpart.rf'} ne $id } @{$links} ] );
346             }
347             }
348             else {
349             my $attr = $self->get_attr($type);
350             log_fatal "undefined attr $type (id=$id)" if !defined $attr;
351              
352             if ( $attr eq $id || scalar( @{$attr} ) <= 1 ) { # single-value attributes
353             $self->set_attr( $type, undef );
354             }
355             else {
356             $attr->delete_value($id); # TODO : will it be always a Treex::PML::List? Looks like it works.
357             }
358             }
359             return;
360             }
361              
362              
363             sub fix_pml_type {
364             log_fatal 'Incorrect number of arguments' if @_ != 1;
365             my $self = shift;
366             if ( not $self->type() ) {
367             my $type = $self->get_pml_type_name();
368             if ( not $type ) {
369             log_warn "No PML type recognized for node $self";
370             return;
371             }
372             my $fs_file = $self->get_document()->_pmldoc;
373             $self->set_type_by_name( $fs_file->metaData('schema'), $type );
374             }
375             return;
376             }
377              
378             sub get_pml_type_name {
379             log_fatal 'Incorrect number of arguments' if @_ != 1;
380             my $self = shift;
381             return;
382             }
383              
384             sub get_layer {
385             log_fatal 'Incorrect number of arguments' if @_ != 1;
386             my $self = shift;
387             if ( ref($self) =~ /Node::(\w)$/ ) {
388             return lc($1);
389             }
390             else {
391             log_fatal "Cannot recognize node's layer: $self";
392             }
393             }
394              
395             sub language {
396             log_fatal 'Incorrect number of arguments' if @_ != 1;
397             my $self = shift;
398             return $self->get_zone()->language;
399             }
400              
401             sub selector {
402             log_fatal 'Incorrect number of arguments' if @_ != 1;
403             my $self = shift;
404             return $self->get_zone()->selector;
405             }
406              
407             sub create_child {
408             my $self = shift;
409              
410             #NOT VALIDATED INTENTIONALLY - passing args to to new (and it's also black magic, so I'm not touching it)
411              
412             # TODO:
413             #my $new_node = ( ref $self )->new(@_);
414             # Previous line is very strange and causes errors which are hard to debug.
415             # Magically, it works on UFAL machines, but nowhere else - I don't know why.
416             # Substituting the hash by hashref is a workaround,
417             # but the black magic is still there.
418             my $arg_ref;
419             if ( scalar @_ == 1 && ref $_[0] eq 'HASH' ) {
420             $arg_ref = $_[0];
421             }
422             elsif ( @_ % 2 ) {
423             log_fatal "Odd number of elements for create_child";
424             }
425             else {
426             $arg_ref = {@_};
427             }
428              
429             # Structured attributes (e.g. morphcat/pos) must be handled separately
430             # TODO: And also attributes which don't have accessors (those are not Moose attributes).
431             # Note: mlayer_pos was not added to Treex::Core::Node::T because it goes
432             # against the "tectogrammatical ideology" and we use it as a temporary hack.
433             my %structured_attrs;
434             foreach my $attr ( keys %{$arg_ref} ) {
435             if ( $attr =~ m{/} || $attr eq 'mlayer_pos' || $attr eq '#name') {
436             $structured_attrs{$attr} = delete $arg_ref->{$attr};
437             }
438             }
439              
440             $arg_ref->{_called_from_core_} = 1;
441             my $new_node = ( ref $self )->new($arg_ref);
442             $new_node->set_parent($self);
443              
444             my $new_id = $self->generate_new_id();
445             $new_node->set_id($new_id);
446              
447             foreach my $attr ( keys %structured_attrs ) {
448             $new_node->set_attr( $attr, $structured_attrs{$attr} );
449             }
450              
451             # my $type = $new_node->get_pml_type_name();
452             # return $new_node if !defined $type;
453             # my $fs_file = $self->get_bundle->get_document()->_pmldoc;
454             # $self->set_type_by_name( $fs_file->metaData('schema'), $type );
455              
456             $new_node->fix_pml_type();
457              
458             # Remember which module (Treex block) and line number in its source code are responsible for creating this node.
459             if ($LOG_NEW){
460             $new_node->wild->{created_by} = $self->_caller_signature();
461             }
462              
463             return $new_node;
464             }
465              
466             #************************************
467             #---- TREE NAVIGATION ------
468              
469             sub get_document {
470             log_fatal 'Incorrect number of arguments' if @_ != 1;
471             my $self = shift;
472             my $bundle = $self->get_bundle();
473             log_fatal('Cannot call get_document on a node which is in no bundle') if !defined $bundle;
474             return $bundle->get_document();
475             }
476              
477             sub get_root {
478             log_fatal 'Incorrect number of arguments' if @_ != 1;
479             my $self = shift;
480             return $self->root();
481             }
482              
483             sub is_root {
484             log_fatal 'Incorrect number of arguments' if @_ != 1;
485             my $self = shift;
486             return !$self->parent;
487             }
488              
489             sub is_leaf {
490             log_fatal 'Incorrect number of arguments' if @_ != 1;
491             my $self = shift;
492             return not $self->firstson;
493             }
494              
495             sub get_parent {
496             log_fatal 'Incorrect number of arguments' if @_ != 1;
497             my $self = shift;
498             return $self->parent;
499             }
500              
501             sub set_parent {
502             my $self = shift;
503             my ($parent) = pos_validated_list(
504             \@_,
505             { isa => 'Treex::Core::Node' },
506             );
507              
508             # TODO check for this (but set_parent is called also from create_child)
509             #if ($self->get_document() != $parent->get_document()) {
510             # log_fatal("Cannot move a node from one document to another");
511             #}
512              
513             # We cannot detach a node by setting an undefined parent. The if statement below will die.
514             # Let's inform the user where the bad call is.
515             log_fatal( 'Cannot attach the node ' . $self->id . ' to an undefined parent' ) if ( !defined($parent) );
516             if ( $self == $parent || $CHECK_FOR_CYCLES && $parent->is_descendant_of($self) ) {
517             my $id = $self->id;
518             my $p_id = $parent->id;
519             log_fatal("Attempt to set parent of $id to the node $p_id, which would lead to a cycle.");
520             }
521              
522             # TODO: Too much FSlib (aka Treex::PML) here
523             $self->cut();
524             my $fsfile = $parent->get_document()->_pmldoc;
525             my @fschildren = $parent->children();
526             if (@fschildren) {
527             Treex::PML::PasteAfter( $self, $fschildren[-1] );
528             }
529             else {
530             Treex::PML::Paste( $self, $parent, $fsfile->FS() );
531             }
532              
533             return;
534             }
535              
536             sub _check_switches {
537              
538             #This method may be replaced by subtype and checked as parameter
539             my $self = shift;
540             my ($arg_ref) = pos_validated_list(
541             \@_,
542             { isa => 'Maybe[HashRef]' },
543             );
544              
545             # Check for role Ordered
546             log_fatal('This type of node does not support ordering')
547             if (
548             ( $arg_ref->{ordered} || any { $arg_ref->{ $_ . '_only' } } qw(first last preceding following) )
549             &&
550             !$self->does('Treex::Core::Node::Ordered')
551             );
552              
553             # Check switches for not allowed combinations
554             log_fatal('Specified both preceding_only and following_only.')
555             if $arg_ref->{preceding_only} && $arg_ref->{following_only};
556             log_fatal('Specified both first_only and last_only.')
557             if $arg_ref->{first_only} && $arg_ref->{last_only};
558              
559             # Check for explicit "ordered" when not needed (possible typo)
560             log_warn('Specifying (first|last|preceding|following)_only implies ordered.')
561             if $arg_ref->{ordered}
562             && any { $arg_ref->{ $_ . '_only' } } qw(first last preceding following);
563              
564             # Check for unknown switches
565             my $unknown = first { $_ !~ $_SWITCHES_REGEX } keys %{$arg_ref};
566             log_warn("Unknown switch $unknown") if defined $unknown;
567              
568             return;
569             }
570              
571             # Shared processing of switches: ordered, (preceding|following|first|last)_only
572             # for subs get_children, get_descendants and get_siblings.
573             # This is quite an uneffective implementation in case of e.g. first_only
574             sub _process_switches {
575             my $self = shift;
576             my ( $arg_ref, @nodes ) = pos_validated_list(
577             \@_,
578             { isa => 'Maybe[HashRef]' },
579             MX_PARAMS_VALIDATE_ALLOW_EXTRA => 1,
580             );
581              
582             # Check for unknown switches and not allowed combinations
583             $self->_check_switches($arg_ref);
584              
585             # Add this node if add_self
586             if ( $arg_ref->{add_self} ) {
587             push @nodes, $self;
588             }
589              
590             # Sort nodes if needed
591             if (( $arg_ref->{ordered} || any { $arg_ref->{ $_ . '_only' } } qw(first last preceding following) )
592             && @nodes && defined $nodes[0]->ord
593             )
594             {
595             @nodes = sort { $a->ord() <=> $b->ord() } @nodes;
596             }
597              
598             # Leave preceding/following only if needed
599             if ( $arg_ref->{preceding_only} ) {
600             @nodes = grep { $_->ord() <= $self->ord } @nodes;
601             }
602             elsif ( $arg_ref->{following_only} ) {
603             @nodes = grep { $_->ord() >= $self->ord } @nodes;
604             }
605              
606             # first_only / last_only
607             return $nodes[0] if $arg_ref->{first_only};
608             return $nodes[-1] if $arg_ref->{last_only};
609             return @nodes;
610             }
611              
612             sub get_children {
613             my $self = shift;
614             my ($arg_ref) = pos_validated_list(
615             \@_,
616             { isa => 'Maybe[HashRef]', optional => 1 },
617             );
618              
619             my @children = $self->children();
620             return @children if !$arg_ref;
621             return $self->_process_switches( $arg_ref, @children );
622             }
623              
624             sub get_descendants {
625             my $self = shift;
626             my ($arg_ref) = pos_validated_list(
627             \@_,
628             { isa => 'Maybe[HashRef]', optional => 1 },
629             );
630              
631             my @descendants;
632             if ( $arg_ref && $arg_ref->{except} ) {
633             my $except_node = delete $arg_ref->{except};
634             return () if $self == $except_node;
635             @descendants = map {
636             $_->get_descendants( { except => $except_node, add_self => 1 } )
637             } $self->get_children();
638             }
639             else {
640             @descendants = $self->descendants();
641             }
642             return @descendants if !$arg_ref;
643             return $self->_process_switches( $arg_ref, @descendants );
644             }
645              
646             sub get_siblings {
647             my $self = shift;
648             my ($arg_ref) = pos_validated_list(
649             \@_,
650             { isa => 'Maybe[HashRef]', optional => 1 },
651             );
652             my $parent = $self->get_parent();
653             return () if !$parent;
654             my @siblings = grep { $_ ne $self } $parent->get_children();
655             return @siblings if !$arg_ref;
656             return $self->_process_switches( $arg_ref, @siblings );
657             }
658              
659             sub get_left_neighbor { return $_[0]->get_siblings( { preceding_only => 1, last_only => 1 } ); }
660             sub get_right_neighbor { return $_[0]->get_siblings( { following_only => 1, first_only => 1 } ); }
661              
662             sub is_descendant_of {
663             my $self = shift;
664             my ($another_node) = pos_validated_list(
665             \@_,
666             { isa => 'Treex::Core::Node' },
667             );
668              
669             my $parent = $self->get_parent();
670             while ($parent) {
671             return 1 if $parent == $another_node;
672             $parent = $parent->get_parent();
673             }
674             return 0;
675             }
676             sub dominates {
677             my $self = shift;
678             my $another_node = shift;
679             return $another_node->is_descendant_of($self);
680             }
681              
682             #************************************
683             #---- OTHER ------
684              
685             sub get_depth {
686             log_fatal 'Incorrect number of arguments' if @_ != 1;
687             my $self = shift;
688             my $depth = 0;
689             while ( $self = $self->get_parent() ) {
690             $depth++;
691             }
692             return $depth;
693             }
694              
695             # This is called from $node->remove()
696             # so it must be defined in this class,
697             # but it is overriden in Treex::Core::Node::Ordered.
698             sub _normalize_node_ordering {
699             }
700              
701             sub get_address {
702             log_fatal 'Incorrect number of arguments' if @_ != 1;
703             my $self = shift;
704             my $id = $self->id;
705             my $bundle = $self->get_bundle();
706             my $doc = $bundle->get_document();
707             my $file = $doc->loaded_from || ( $doc->full_filename . '.treex' );
708             my $position = $bundle->get_position() + 1;
709              
710             #my $filename = Cwd::abs_path($file);
711             return "$file##$position.$id";
712             }
713              
714             # Empty DESTROY method is a hack to get rid of the "Deep recursion warning"
715             # in Treex::PML::Node::DESTROY and MooseX::NonMoose::Meta::Role::Class::_check_superclass_destructor.
716             # Without this hack, you get the warning after creating a node with 99 or more children.
717             # Deep recursion on subroutine "Class::MOP::Method::execute" at .../5.12.2/MooseX/NonMoose/Meta/Role/Class.pm line 183.
718             sub DESTROY {
719             }
720              
721             #*************************************
722             #---- DEPRECATED & QUESTIONABLE ------
723              
724             sub generate_new_id { #TODO move to Core::Document?
725             log_fatal 'Incorrect number of arguments' if @_ != 1;
726             my $self = shift;
727             my $doc = $self->get_document;
728              
729             my $latest_node_number = $doc->_latest_node_number;
730              
731             my $new_id;
732              
733             #$self->get_root->id =~ /(.+)root/;
734             #my $id_base = $1 || "";
735             my $id_base;
736             if ( $self->get_root->id =~ /(.+)root/ ) {
737             $id_base = $1;
738             }
739             else {
740             $id_base = q();
741             }
742              
743             while (1) {
744             $latest_node_number++;
745             $new_id = "${id_base}n$latest_node_number";
746             last if !$doc->id_is_indexed($new_id);
747              
748             }
749              
750             $doc->_set_latest_node_number($latest_node_number);
751              
752             return $new_id;
753             }
754              
755             sub add_to_listattr {
756             my $self = shift;
757             my ( $attr_name, $attr_value ) = pos_validated_list(
758             \@_,
759             { isa => 'Str' },
760             { isa => 'Any' },
761             );
762              
763             my $list = $self->attr($attr_name);
764             log_fatal("Attribute $attr_name is not a list!")
765             if !defined $list || ref($list) ne 'Treex::PML::List';
766             my @new_list = @{$list};
767             if ( ref($attr_value) eq 'ARRAY' ) {
768             push @new_list, @{$attr_value};
769             }
770             else {
771             push @new_list, $attr_value;
772             }
773             return $self->set_attr( $attr_name, Treex::PML::List->new(@new_list) );
774             }
775              
776             # Get more attributes at once
777             sub get_attrs {
778             my $self = shift;
779             my @attr_names = pos_validated_list(
780             \@_,
781             { isa => 'Any' }, #at least one parameter
782             MX_PARAMS_VALIDATE_ALLOW_EXTRA => 1,
783             );
784              
785             my @attr_values;
786             if ( ref $attr_names[-1] ) {
787             my $arg_ref = pop @attr_names;
788             my $change_undefs_to = $arg_ref->{undefs};
789             @attr_values = map {
790             defined $self->get_attr($_) ? $self->get_attr($_) : $change_undefs_to
791             } @attr_names;
792             }
793             else {
794             @attr_values = map { $self->get_attr($_) } @attr_names;
795             }
796              
797             return @attr_values;
798             }
799              
800             # Return all attributes of the given node (sub)type that contain references
801             sub _get_reference_attrs {
802             my ($self) = @_;
803             return ();
804             }
805              
806             # Return IDs of all nodes to which there are reference links from this node (must be overridden in
807             # the respective node types)
808             sub _get_referenced_ids {
809             my ($self) = @_;
810             my $ret = {};
811              
812             # handle alignment separately
813             my $links_rf = $self->get_attr('alignment');
814             $ret->{alignment} = [ map { $_->{'counterpart.rf'} } @{$links_rf} ] if ($links_rf);
815              
816             # all other references
817             foreach my $ref_attr ( $self->_get_reference_attrs() ) {
818             my $val = $self->get_attr($ref_attr) or next;
819             if ( !ref $val ) { # single-valued
820             $ret->{$ref_attr} = [$val];
821             }
822             else {
823             $ret->{$ref_attr} = $val;
824             }
825             }
826             return $ret;
827             }
828              
829              
830             # ---------------------
831              
832             # changing the functionality of Treex::PML::Node's following() so that it traverses all
833             # nodes in all trees in all zones (needed for search in TrEd)
834              
835             sub following {
836             my ( $self ) = @_;
837              
838             my $pml_following = Treex::PML::Node::following(@_);
839              
840             if ( $pml_following ) {
841             return $pml_following;
842             }
843              
844             else {
845             my $bundle = ( ref($self) eq 'Treex::Core::Bundle' ) ? $self : $self->get_bundle;
846              
847             my @all_trees = map {
848             ref($_) ne 'Treex::PML::Struct'
849             ? $_->get_all_trees
850             : ()
851             } $bundle->get_all_zones;
852              
853             if ( ref($self) eq 'Treex::Core::Bundle' ) {
854             return $all_trees[0];
855             }
856              
857             else {
858             my $my_root = $self->get_root;
859             foreach my $index ( 0..$#all_trees ) {
860             if ( $all_trees[$index] eq $my_root ) {
861             return $all_trees[$index+1];
862             }
863             }
864             log_fatal "Node belongs to no tree: this should never happen";
865             }
866             }
867             }
868              
869             # This is copied from Treex::PML::Node.
870             # Using Treex::PML::Node::following is faster than recursion
871             # and it does not cause "deep recursion" warnings.
872             sub descendants {
873             my $self = $_[0];
874             my @kin = ();
875             my $desc = $self->Treex::PML::Node::following($self);
876             while ($desc) {
877             push @kin, $desc;
878             $desc = $desc->Treex::PML::Node::following($self);
879             }
880             return @kin;
881             }
882              
883             # TODO: How to do this in an elegant way?
884             # Unless we find a better way, we must disable two perlcritics
885             package Treex::Core::Node::Removed; ## no critic (ProhibitMultiplePackages)
886             $Treex::Core::Node::Removed::VERSION = '2.20160630';
887             use Treex::Core::Log;
888              
889             sub AUTOLOAD { ## no critic (ProhibitAutoloading)
890             our $AUTOLOAD;
891             if ( $AUTOLOAD !~ /DESTROY$/ ) {
892             log_fatal("You cannot call any methods on removed nodes, but have called $AUTOLOAD");
893             }
894             }
895              
896             package Treex::Core::Node; ## no critic (ProhibitMultiplePackages)
897              
898             __PACKAGE__->meta->make_immutable;
899              
900             1;
901              
902             __END__
903              
904             ##-- begin proposal
905             # Example usage:
906             # Treex::Core::Node::T methods get_lex_anode and get_aux_anodes could use:
907             # my $a_lex = $t_node->get_r_attr('a/lex.rf'); # returns the node or undef
908             # my @a_aux = $t_node->get_r_attr('a/aux.rf'); # returns the nodes or ()
909             sub get_r_attr {
910             my ( $self, $attr_name ) = @_;
911             log_fatal('Incorrect number of arguments') if @_ != 2;
912             my $attr_value = $self->get_attr($attr_name);
913              
914             return if !$attr_value;
915             my $document = $self->get_document();
916             if (wantarray) {
917             log_fatal("Attribute '$attr_name' is not a list, but get_r_attr() called in a list context.")
918             if ref($attr_value) ne 'Treex::PML::List';
919             return map { $document->get_node_by_id($_) } @{$attr_value};
920             }
921              
922             log_fatal("Attribute $attr_name is a list, but get_r_attr() not called in a list context.")
923             if ref($attr_value) eq 'Treex::PML::List';
924             return $document->get_node_by_id($attr_value);
925             }
926              
927             # Example usage:
928             # $t_node->set_r_attr('a/lex.rf', $a_lex);
929             # $t_node->set_r_attr('a/aux.rf', @a_aux);
930             sub set_r_attr {
931             my ( $self, $attr_name, @attr_values ) = @_;
932             log_fatal('Incorrect number of arguments') if @_ < 3;
933             my $fs = $self;
934              
935             # TODO $fs->type nefunguje - asi protoze se v konstruktorech nenastavuje typ
936             if ( $fs->type($attr_name) eq 'Treex::PML::List' ) {
937             my @list = map { $_->id } @attr_values;
938              
939             # TODO: overriden Node::N::set_attr is bypassed by this call
940             return $fs->set_attr( $attr_name, Treex::PML::List->new(@list) );
941             }
942             log_fatal("Attribute '$attr_name' is not a list, but set_r_attr() called with @attr_values values.")
943             if @attr_values > 1;
944              
945             # TODO: overriden Node::N::set_attr is bypassed by this call
946             return $fs->set_attr( $attr_name, $attr_values[0]->id );
947             }
948              
949              
950              
951             =for Pod::Coverage BUILD
952              
953              
954             =encoding utf-8
955              
956             =head1 NAME
957              
958             Treex::Core::Node - smallest unit that holds information in Treex
959              
960             =head1 VERSION
961              
962             version 2.20160630
963              
964             =head1 DESCRIPTION
965              
966             This class represents a Treex node.
967             Treex trees (contained in bundles) are formed by nodes and edges.
968             Attributes can be attached only to nodes. Edge's attributes must
969             be stored as the lower node's attributes.
970             Tree's attributes must be stored as attributes of the root node.
971              
972             =head1 METHODS
973              
974             =head2 Construction
975              
976             =over 4
977              
978             =item my $new_node = $existing_node->create_child({lemma=>'house', tag=>'NN' });
979              
980             Creates a new node as a child of an existing node. Some of its attribute
981             can be filled. Direct calls of node constructors (C<< ->new >>) should be avoided.
982              
983              
984             =back
985              
986              
987              
988             =head2 Access to the containers
989              
990             =over 4
991              
992             =item my $bundle = $node->get_bundle();
993              
994             Returns the L<Treex::Core::Bundle> object in which the node's tree is contained.
995              
996             =item my $document = $node->get_document();
997              
998             Returns the L<Treex::Core::Document> object in which the node's tree is contained.
999              
1000             =item get_layer
1001              
1002             Return the layer of this node (I<a>, I<t>, I<n> or I<p>).
1003              
1004             =item get_zone
1005              
1006             Return the zone (L<Treex::Core::BundleZone>) to which this node
1007             (and the whole tree) belongs.
1008              
1009             =item $lang_code = $node->language
1010              
1011             shortcut for C<< $lang_code = $node->get_zone()->language >>
1012              
1013             =item $selector = $node->selector
1014              
1015             shortcut for C<< $selector = $node->get_zone()->selector >>
1016              
1017             =back
1018              
1019              
1020             =head2 Access to attributes
1021              
1022             =over 4
1023              
1024             =item my $value = $node->get_attr($name);
1025              
1026             Returns the value of the node attribute of the given name.
1027              
1028             =item my $node->set_attr($name,$value);
1029              
1030             Sets the given attribute of the node with the given value.
1031             If the attribute name is C<id>, then the document's indexing table
1032             is updated. If value of the type C<List> is to be filled,
1033             then C<$value> must be a reference to the array of values.
1034              
1035             =item my $node2 = $node1->get_deref_attr($name);
1036              
1037             If value of the given attribute is reference (or list of references),
1038             it returns the appropriate node (or a reference to the list of nodes).
1039              
1040             =item my $node1->set_deref_attr($name, $node2);
1041              
1042             Sets the given attribute with C<id> (list of C<id>s) of the given node (list of nodes).
1043              
1044             =item my $node->add_to_listattr($name, $value);
1045              
1046             If the given attribute is list, the given value is appended to it.
1047              
1048             =item my $node->get_attrs(qw(name_of_attr1 name_of_attr2 ...));
1049              
1050             Get more attributes at once.
1051             If the last argument is C<{undefs=E<gt>$value}>, all undefs are substituted
1052             by a C<$value> (typically the value is an empty string).
1053              
1054             =back
1055              
1056              
1057              
1058              
1059             =head2 Access to tree topology
1060              
1061             =over 4
1062              
1063             =item my $parent_node = $node->get_parent();
1064              
1065             Returns the parent node, or C<undef> if there is none (if C<$node> itself is the root)
1066              
1067             =item $node->set_parent($parent_node);
1068              
1069             Makes C<$node> a child of C<$parent_node>.
1070              
1071             =item $node->remove({children=>remove});
1072              
1073             Deletes a node.
1074             Node identifier is removed from the document indexing table.
1075             The removed node cannot be further used.
1076              
1077             Optional argument C<children> in C<$arg_ref> can specify
1078             what to do with children (and all descendants,
1079             i.e. the subtree rooted by the given node) if present:
1080             C<remove>, C<remove_warn>, C<rehang>, C<rehang_warn>.
1081             The default is C<remove> -- remove recursively.
1082             C<rehang> means reattach the children of C<$node> to the parent of C<$node>.
1083             The C<_warn> variants will in addition produce a warning.
1084              
1085             =item my $root_node = $node->get_root();
1086              
1087             Returns the root of the node's tree.
1088              
1089             =item $node->is_root();
1090              
1091             Returns C<true> if the node has no parent.
1092              
1093             =item $node->is_leaf();
1094              
1095             Returns C<true> if the node has no children.
1096              
1097             =item $node1->is_descendant_of($node2);
1098              
1099             Tests whether C<$node1> is among transitive descendants of C<$node2>;
1100              
1101             =back
1102              
1103             Next three methods (for access to children / descendants / siblings)
1104             have an optional argument C<$arg_ref> for specifying switches.
1105             By adding some switches, you can modify the behavior of these methods.
1106             See L<"Switches"> for examples.
1107              
1108             =over
1109              
1110             =item my @child_nodes = $node->get_children($arg_ref);
1111              
1112             Returns an array of child nodes.
1113              
1114             =item my @descendant_nodes = $node->get_descendants($arg_ref);
1115              
1116             Returns an array of descendant nodes ('transitive children').
1117              
1118             =item my @sibling_nodes = $node->get_siblings($arg_ref);
1119              
1120             Returns an array of nodes sharing the parent with the current node.
1121              
1122             =back
1123              
1124             =head3 Switches
1125              
1126             Currently there are 6 switches:
1127              
1128             =over
1129              
1130             =item * ordered
1131              
1132             =item * preceding_only, following_only
1133              
1134             =item * first_only, last_only
1135              
1136             =item * add_self
1137              
1138             =back
1139              
1140             =head4 Examples of usage
1141              
1142             Names of variables in the examples suppose a language with left-to-right script.
1143              
1144             my @ordered_descendants = $node->get_descendants({ordered=>1});
1145             my @self_and_left_children = $node->get_children({preceding_only=>1, add_self=>1});
1146             my @ordered_self_and_children = $node->get_children({ordered=>1, add_self=>1});
1147             my $leftmost_child = $node->get_children({first_only=>1});
1148             my @ordered_siblings = $node->get_siblings({ordered=>1});
1149             my $left_neighbor = $node->get_siblings({preceding_only=>1, last_only=>1});
1150             my $right_neighbor = $node->get_siblings({following_only=>1, first_only=>1});
1151             my $leftmost_sibling_or_self = $node->get_siblings({add_self=>1, first_only=>1});
1152              
1153             =head4 Restrictions
1154              
1155             =over
1156              
1157             =item *
1158              
1159             C<first_only> and C<last_only> switches makes the method return just one item -
1160             a scalar, even if combined with the C<add_self> switch.
1161              
1162             =item *
1163              
1164             Specifying C<(first|last|preceding|following)_only> implies C<ordered>,
1165             so explicit addition of C<ordered> gives a warning.
1166              
1167             =item *
1168              
1169             Specifying both C<preceding_only> and C<following_only> gives an error
1170             (same for combining C<first_only> and C<last_only>).
1171              
1172             =back
1173              
1174             =head4 Shortcuts
1175              
1176             There are shortcuts for comfort of those who use B<left-to-right> scripts:
1177              
1178             =over
1179              
1180             =item my $left_neighbor_node = $node->get_left_neighbor();
1181              
1182             Returns the rightmost node from the set of left siblings (the nearest left sibling).
1183             Actually, this is shortcut for C<$node-E<gt>get_siblings({preceding_only=E<gt>1, last_only=E<gt>1})>.
1184              
1185             =item my $right_neighbor_node = $node->get_right_neighbor();
1186              
1187             Returns the leftmost node from the set of right siblings (the nearest right sibling).
1188             Actually, this is shortcut for C<$node-E<gt>get_siblings({following_only=E<gt>1, first_only=E<gt>1})>.
1189              
1190             =back
1191              
1192             =head2 PML-related methods
1193              
1194             =over
1195              
1196             =item my $type = $node->get_pml_type_name;
1197              
1198             =item $node->fix_pml_type();
1199              
1200             If a node has no PML type, then its type is detected (according
1201             to the node's location) and filled by the PML interface.
1202              
1203             =back
1204              
1205             =head2 References (alignment and other references depending on node subtype)
1206              
1207             =over
1208              
1209             =item my @refnodes = $node->get_referencing_nodes($ref_type);
1210              
1211             Returns an array of nodes referencing this node with the given reference type (e.g. 'alignment', 'a/lex.rf' etc.).
1212              
1213             =back
1214              
1215             =head2 Other methods
1216              
1217             =over 4
1218              
1219             =item $node->generate_new_id();
1220              
1221             Generate new (= so far unindexed) identifier (to be used when creating new
1222             nodes). The new identifier is derived from the identifier of the root
1223             (C<< $node->root >>), by adding suffix C<x1> (or C<x2>, if C<...x1> has already
1224             been indexed, etc.) to the root's C<id>.
1225              
1226             =item my $levels = $node->get_depth();
1227              
1228             Return the depth of the node. The root has depth = 0, its children have depth = 1 etc.
1229              
1230             =item my $address = $node->get_address();
1231              
1232             Return the node address, i.e. file name and node's position within the file,
1233             similarly to TrEd's C<FPosition()> (but the value is only returned, not printed).
1234              
1235             =item $node->equals($another_node)
1236              
1237             This is the internal implementation of overloaded C<==> operator,
1238             which checks whether C<$node == $another_node> (the object instance must be identical).
1239              
1240             =item my $string = $node->to_string()
1241              
1242             This is the internal implementation of overloaded stringification,
1243             so you can use e.g. C<print "There is a node $node.">.
1244             It returns the id (C<$node->id>), but the behavior may be overridden in subclasses.
1245             See L<overload> pragma for details about overloading operators in Perl.
1246              
1247             =back
1248              
1249              
1250             =head1 AUTHORS
1251              
1252             ZdenÄ›k Žabokrtský <zabokrtsky@ufal.mff.cuni.cz>
1253              
1254             Martin Popel <popel@ufal.mff.cuni.cz>
1255              
1256             David Mareček <marecek@ufal.mff.cuni.cz>
1257              
1258             Daniel Zeman <zeman@ufal.mff.cuni.cz>
1259              
1260             OndÅ™ej DuÅ¡ek <odusek@ufal.mff.cuni.cz>
1261              
1262             =head1 COPYRIGHT AND LICENSE
1263              
1264             Copyright © 2011-2012 by Institute of Formal and Applied Linguistics, Charles University in Prague
1265              
1266             This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.