File Coverage

blib/lib/Treex/Core/Document.pm
Criterion Covered Total %
statement 13 15 86.6
branch n/a
condition n/a
subroutine 5 5 100.0
pod n/a
total 18 20 90.0


line stmt bran cond sub pod time code
1             package Treex::Core::Document;
2             $Treex::Core::Document::VERSION = '2.20160630';
3 24     24   360604 use Moose;
  24         7513134  
  24         184  
4 24     24   175954 use Treex::Core::Common;
  24         84  
  24         155  
5 24     24   143786 use Treex::Core::Config;
  24         63  
  24         724  
6 24     24   13139 use Treex::Core::DocZone;
  24         128  
  24         1120  
7 24     24   218329 use Treex::Core::Bundle;
  0            
  0            
8              
9             use Treex::PML;
10             Treex::PML::UseBackends('PMLBackend');
11             Treex::PML::AddResourcePath( Treex::Core::Config->pml_schema_dir() );
12              
13             with 'Treex::Core::WildAttr';
14              
15             use Scalar::Util qw( weaken reftype );
16              
17             use PerlIO::via::gzip;
18             use Storable;
19             use Digest::MD5 qw(md5_hex);
20             use Lingua::Interset::FeatureStructure;
21              
22             has loaded_from => ( is => 'rw', isa => 'Str', default => '' );
23             has path => ( is => 'rw', isa => 'Str' );
24             has file_stem => ( is => 'rw', isa => 'Str', default => 'noname' );
25             has file_number => ( is => 'rw', isa => 'Str', builder => 'build_file_number' );
26             has compress => ( is => 'rw', isa => 'Bool', default => undef, documentation => 'compression to .gz' );
27             has storable => (
28             is => 'rw',
29             isa => 'Bool',
30             default => undef,
31             documentation => 'using Storable with gz compression instead of Treex::PML'
32             );
33              
34             has _hash => ( is => 'rw', isa => 'Str' );
35              
36             sub get_hash {
37             my $self = shift;
38             if ( ! defined($self->_hash) ) {
39             $Storable::canonical = 1;
40             $self->_set_hash(md5_hex(Storable::nfreeze($self)));
41             $Storable::canonical = 0;
42             }
43             return $self->_hash;
44             }
45              
46             sub set_hash {
47             my ($self, $hash) = @_;
48              
49             $self->_set_hash($hash);
50              
51             return;
52             }
53              
54             has _pmldoc => (
55             isa => 'Treex::PML::Document',
56             is => 'rw',
57             init_arg => 'pml_doc',
58             writer => '_set_pmldoc',
59             handles => {
60             set_filename => 'changeFilename',
61             map { $_ => $_ }
62             qw( clone writeFile writeTo filename URL
63             changeFilename changeURL fileFormat changeFileFormat
64             backend changeBackend encoding changeEncoding userData
65             changeUserData metaData changeMetaData listMetaData
66             appData changeAppData listAppData
67              
68             documentRootData
69              
70             FS changeFS
71              
72             hint changeHint pattern_count pattern patterns
73             changePatterns tail changeTail
74              
75             trees changeTrees treeList tree delete_tree lastTreeNo notSaved
76             currentTreeNo currentNode nodes value_line value_line_list
77             determine_node_type )
78             },
79             builder => '_create_empty_pml_doc',
80             );
81              
82             has _index => (
83             is => 'rw',
84             default => sub { return {} },
85             );
86              
87             has _backref => (
88             is => 'rw',
89             default => sub { return {} },
90             );
91              
92             has _latest_node_number => ( # for generating document-unique IDs
93             is => 'rw',
94             default => 0,
95             );
96              
97             use Treex::PML::Factory;
98             my $factory = Treex::PML::Factory->new();
99              
100             my $highest_file_number = 1;
101              
102             # the description attribute is stored inside the meta structures of pml documents,
103             # that is why it is not realized as a regular Moose attribute
104              
105             sub set_description {
106             my ( $self, $attr_value ) = @_;
107              
108             return Treex::PML::Node::set_attr(
109             $self->metaData('pml_root')->{meta},
110             'description', $attr_value
111             );
112             }
113              
114             sub description {
115             my $self = shift;
116             return Treex::PML::Node::attr( $self->metaData('pml_root')->{meta}, 'description' );
117             }
118              
119             sub build_file_number {
120             return sprintf "%03d", $highest_file_number++;
121             }
122              
123             # Full filename without the extension
124             sub full_filename {
125             log_fatal 'Incorrect number of arguments' if @_ != 1;
126             my $self = shift;
127             my $path = '';
128             if (defined $self->path && $self->path ne ''){
129             $path = $self->path;
130             $path .= '/' if $path !~ m{/$};
131             }
132             return $path . $self->file_stem . $self->file_number;
133             }
134              
135             sub BUILD {
136             my $self = shift;
137             my ($params_rf) = @_;
138             my $pmldoc;
139              
140             if ( defined $params_rf ) {
141              
142             # creating Treex::Core::Document from an already existing Treex::PML::Document instance
143             if ( $params_rf->{pmldoc} ) {
144             $pmldoc = $params_rf->{pmldoc};
145             }
146              
147             # loading Treex::Core::Document from a file
148             elsif ( $params_rf->{filename} ) {
149              
150             if ( $params_rf->{filename} =~ /.streex$/ ) {
151             log_fatal 'Storable (.streex) docs must be retrieved by Treex::Core::Document->retrieve_storable($filename)';
152             }
153              
154             else {
155              
156             # If the file contains invalid PML (e.g. unknown afun value)
157             # Treex::PML fails with die.
158             # TODO: we should rather catch the die message and report it via log_fatal
159             $pmldoc = eval {
160             # In r10421, ZŽ added here recover => 1:
161             # $factory->createDocumentFromFile( $params_rf->{filename}, { recover => 1 });
162             # However, if the file contains invalid PML (e.g. unknown afun value), the recover=>1 option
163             # results in returning a $pmldoc which seems to be OK, but it contains no bundles,
164             # so Treex crashes on subsequent blocks which is misleading for users.
165             # If we really want to be fault-tolerant, it seems we would need to set Treex::PML::Instance::Reader::STRICT=0,
166             # but I don't no enough about PML internals and I think it's better to make such errors fatal.
167             # Martin Popel
168             $factory->createDocumentFromFile( $params_rf->{filename});
169             };
170             log_fatal "Error while loading " . $params_rf->{filename} . ( $@ ? "\n$@" : '' )
171             if !defined $pmldoc;
172             }
173             }
174             }
175              
176             # constructing treex document from an existing file
177             if ($pmldoc) {
178             $self->_set_pmldoc($pmldoc);
179              
180             # ensuring Treex::Core types (partially copied from the factory)
181             # $doczone hashref will be reused as the blessed instance variable
182             for my $doczone ($self->get_all_zones()){
183             Treex::Core::DocZone->new($doczone);
184             }
185              
186             $self->_rebless_and_index();
187             }
188              
189             $self->deserialize_wild;
190             foreach my $bundle ( $self->get_bundles ) {
191             $bundle->deserialize_wild;
192             foreach my $bundlezone ( $bundle->get_all_zones ) {
193             foreach my $tree ( $bundlezone->get_all_trees ){
194             my $ordered = $tree->type->get_structure_name =~ /[at]-(root|node)/ ? 1 : 0;
195             my $correct_ord = 0;
196             my @nodes = $tree->get_descendants( { add_self => 1, ($ordered ? (ordered => 1) : ()) } );
197             foreach my $node (@nodes){
198             # normalize ord, so there are no gaps
199             if ($ordered){
200             $node->_set_ord($correct_ord);
201             $correct_ord++;
202             }
203             $node->deserialize_wild;
204             if ( $node->DOES('Treex::Core::Node::Interset') ) {
205             $node->deserialize_iset;
206             }
207             }
208             }
209             }
210             }
211              
212             return;
213             }
214              
215             sub _rebless_and_index {
216             my $self = shift;
217             foreach my $bundle ( $self->get_bundles ) {
218             bless $bundle, 'Treex::Core::Bundle';
219              
220             $bundle->_set_document($self);
221              
222             if ( defined $bundle->{zones} ) {
223             foreach my $zone ( map { $_->value() } $bundle->{zones}->elements ) {
224              
225             # $zone hashref will be reused as the blessed instance variable
226             Treex::Core::BundleZone->new($zone);
227             $zone->_set_bundle($bundle);
228              
229             foreach my $tree ( $zone->get_all_trees ) {
230             my $layer;
231             if ( $tree->type->get_structure_name =~ /(\S)-(root|node|nonterminal|terminal)/ ) {
232             $layer = uc($1);
233             }
234             else {
235             log_fatal "Unexpected member in zone structure: " . $tree->type->get_structure_name;
236             }
237             foreach my $node ( $tree, $tree->descendants ) { # must still call Treex::PML::Node's API
238             bless $node, "Treex::Core::Node::$layer";
239             $self->index_node_by_id( $node->get_id, $node );
240             if ($layer eq 'A' && $node->{iset}){
241             $node->{iset} = Lingua::Interset::FeatureStructure->new(%{$node->{iset}});
242             }
243             }
244             $tree->_set_zone($zone);
245             }
246             }
247             }
248             }
249             return;
250             }
251              
252             sub _pml_attribute_hash {
253             my $self = shift;
254             return $self->metaData('pml_root')->{meta};
255             }
256              
257             #my $_treex_schema_file = Treex::PML::ResolvePath( '.', 'treex_schema.xml', 1 );
258             my $_treex_schema_file = Treex::Core::Config->pml_schema_dir . "/" . 'treex_schema.xml';
259             if ( not -f $_treex_schema_file ) {
260             log_fatal "Can't find PML schema $_treex_schema_file";
261             }
262              
263             my $_treex_schema = Treex::PML::Schema->new( { filename => $_treex_schema_file } );
264              
265             sub _create_empty_pml_doc { ## no critic (ProhibitUnusedPrivateSubroutines)
266             my $fsfile = Treex::PML::Document->create
267             (
268             name => "x", #$filename, ???
269             FS => Treex::PML::FSFormat->new(
270             {
271             'deepord' => ' N' # ???
272             }
273             ),
274             trees => [],
275             backend => 'PMLBackend',
276             encoding => "utf-8",
277             );
278              
279             $fsfile->changeMetaData( 'schema-url', 'treex_schema.xml' );
280             $fsfile->changeMetaData( 'schema', $_treex_schema );
281             $fsfile->changeMetaData( 'pml_root', { meta => {}, bundles => undef, } );
282             return $fsfile;
283             }
284              
285             # --- INDEXING
286              
287             sub index_node_by_id {
288             my $self = shift;
289             my ( $id, $node ) = pos_validated_list(
290             \@_,
291             { isa => 'Treex::Type::Id' },
292             { isa => 'Maybe[Treex::Core::Node]' }, #jde to takhle?
293             );
294             my $index = $self->_index;
295             if ( defined $node ) {
296             $index->{$id} = $node;
297             weaken $index->{$id};
298              
299             my $refs = $node->_get_referenced_ids;
300             foreach my $type ( keys %{$refs} ) {
301             $self->index_backref( $type, $id, $refs->{$type} );
302             }
303             }
304             else {
305             delete $index->{$id};
306             }
307             return;
308             }
309              
310             # Add references to the reversed references list
311             sub index_backref {
312             my ( $self, $type, $source, $targets ) = @_;
313             my $backref = $self->_backref;
314              
315             foreach my $target ( @{$targets} ) {
316             next if ( !defined($target) );
317             my $target_backrefs = $backref->{$target} // {};
318             $backref->{$target} = $target_backrefs;
319              
320             $target_backrefs->{$type} = [] if ( !$target_backrefs->{$type} );
321             push @{ $target_backrefs->{$type} }, $source;
322             }
323             return;
324             }
325              
326             # Remove references from the reversed references list
327             sub remove_backref {
328             my ( $self, $type, $source, $targets ) = @_;
329             my $backref = $self->_backref;
330              
331             foreach my $target ( @{$targets} ) {
332             next if ( !defined($target) );
333             my $target_backrefs = $backref->{$target};
334             next if ( !$target_backrefs );
335              
336             $target_backrefs->{$type} = [ grep { $_ ne $source } @{ $target_backrefs->{$type} } ];
337             }
338             return;
339             }
340              
341             # Return a hash of references ( type->[nodes] ) leading to the node with the given id
342             sub get_references_to_id {
343             my ( $self, $id ) = @_;
344             my $backref = $self->_backref;
345              
346             return if ( !$backref->{$id} );
347             return $backref->{$id}; # TODO clone ?
348             }
349              
350             # Remove all references and backreferences leading to the $node (calls remove_reference() on the source nodes)
351             sub _remove_references_to_node {
352             my ( $self, $node ) = @_;
353             my $id = $node->id;
354             my $backref = $self->_backref;
355              
356             # First, delete backreferences to the $node
357             my $refs = $node->_get_referenced_ids();
358             foreach my $type ( keys %{$refs} ) {
359             $self->remove_backref( $type, $id, $refs->{$type} );
360             }
361              
362             # Second, delete references to the $node
363             return if ( !$backref->{$id} );
364             my $node_backref = $backref->{$id};
365              
366             foreach my $type ( keys %{$node_backref} ) {
367             foreach my $source ( @{ $node_backref->{$type} } ) {
368             $self->get_node_by_id($source)->remove_reference( $type, $id );
369             }
370             }
371              
372             # Third, delete backreferences from the $node
373             delete $backref->{$id};
374             return;
375             }
376              
377             sub id_is_indexed {
378             my $self = shift;
379             my ($id) = pos_validated_list(
380             \@_,
381             { isa => 'Treex::Type::Id' },
382             );
383             return ( defined $self->_index->{$id} );
384             }
385              
386             sub get_node_by_id {
387             my $self = shift;
388             my ($id) = pos_validated_list(
389             \@_,
390             { isa => 'Treex::Type::Id' },
391             );
392             if ( defined $self->_index->{$id} ) {
393             return $self->_index->{$id};
394             }
395             else {
396             log_fatal "ID not indexed: id=\"$id\"";
397              
398             # This is something very fatal. Treex assumes every node ID to
399             # be valid and pointing to a node *in the given document*.
400             # (It is fine to have a node with no a/lex.rf
401             # attribute, but if the attribute is there, the value
402             # has to be an ID within the document.)
403             #
404             # If your data violates the requirement and your IDs point to
405             # a different document, the only hack we suggest is to drop such
406             # references...
407             }
408             return;
409             }
410              
411             sub get_all_node_ids {
412             log_fatal('Incorrect number of arguments') if @_ != 1;
413             my $self = shift;
414             return ( keys %{ $self->_index } );
415             }
416              
417             # -------------------------------------- ACCESS TO BUNDLES -------------------
418              
419             sub get_bundles {
420             log_fatal('Incorrect number of arguments') if @_ != 1;
421             my $self = shift;
422             return $self->trees;
423             }
424              
425             sub create_bundle {
426             my ( $self, $arg_ref ) = @_;
427             my $fsfile = $self->_pmldoc();
428             my $new_bundle;
429             my $position_of_new;
430              
431             if ( $arg_ref and ( $arg_ref->{after} or $arg_ref->{before} ) ) {
432             my $reference_bundle = ( $arg_ref->{after} ) ? $arg_ref->{after} : $arg_ref->{before};
433             my $position_of_reference = $reference_bundle->get_position;
434             $position_of_new = $position_of_reference + ( $arg_ref->{after} ? 1 : 0 );
435             }
436              
437             else { # default: append at the end of the document
438             $position_of_new = scalar( $self->get_bundles() );
439             }
440              
441             $new_bundle = $fsfile->new_tree($position_of_new);
442             $new_bundle->set_type_by_name( $fsfile->metaData('schema'), 'bundle.type' );
443             bless $new_bundle, "Treex::Core::Bundle"; # is this correct/sufficient with Moose ????
444             $new_bundle->_set_document($self);
445              
446             $new_bundle->set_id( "s" . ( $fsfile->lastTreeNo + 1 ) );
447              
448             return $new_bundle;
449             }
450              
451             # ----------------------- ACCESS TO ZONES ------------------------------------
452              
453             sub create_zone {
454             my $self = shift;
455             my ( $language, $selector ) = pos_validated_list(
456             \@_,
457             { isa => 'Treex::Type::LangCode' },
458             { isa => 'Treex::Type::Selector', default => '' },
459             );
460              
461             my $new_zone = Treex::Core::DocZone->new(
462             {
463             'language' => $language,
464             'selector' => $selector
465             }
466             );
467              
468             my $new_element = Treex::PML::Seq::Element->new( 'zone', $new_zone );
469              
470             my $meta = $self->metaData('pml_root')->{meta};
471             if ( defined $meta->{zones} ) {
472             $meta->{zones}->unshift_element_obj($new_element);
473             }
474             else {
475             $meta->{zones} = Treex::PML::Seq->new( [$new_element] );
476             }
477              
478             return $new_zone;
479             }
480              
481             sub get_all_zones {
482             my $self = shift;
483             my $meta = $self->metaData('pml_root')->{meta};
484             return if !$meta->{zones};
485            
486             # Each element is a pair [$name, $value]. We need just the values.
487             return map {$_->[1]} $meta->{zones}->elements;
488             }
489              
490             sub get_zone {
491             my $self = shift;
492             my ( $language, $selector ) = pos_validated_list(
493             \@_,
494             { isa => 'Treex::Type::LangCode' },
495             { isa => 'Treex::Type::Selector', default => '' },
496             );
497              
498             foreach my $zone ($self->get_all_zones()) {
499             return $zone if $zone->language eq $language && $zone->selector eq $selector;
500             }
501             return;
502             }
503              
504             sub get_or_create_zone {
505             my $self = shift;
506             my ( $language, $selector ) = pos_validated_list(
507             \@_,
508             { isa => 'Treex::Type::LangCode' },
509             { isa => 'Treex::Type::Selector', default => '' },
510             );
511              
512             my $fs_zone = $self->get_zone( $language, $selector );
513             if ( not defined $fs_zone ) {
514             $fs_zone = $self->create_zone( $language, $selector );
515             }
516             return $fs_zone;
517             }
518              
519             # -------------------- LOADING AND SAVING ------------------------------------
520              
521             sub load {
522             my $self = shift;
523             return $self->_pmldoc->load(@_);
524              
525             # TODO: this is unfinished: should be somehow connected with the code in BUILD
526             }
527              
528             sub save {
529             my $self = shift;
530             my ($filename) = @_;
531              
532             if ( $filename =~ /\.streex$/ ) {
533             open( my $F, ">:via(gzip)", $filename ) or log_fatal $!;
534             print $F Storable::nfreeze($self);
535             close $F;
536              
537             # using Storable::nstore_fd($self,*$F) emits 'Inappropriate ioctl for device'
538             }
539              
540             else {
541             $self->_serialize_all_wild();
542             return $self->_pmldoc->save(@_);
543             }
544              
545             return;
546             }
547              
548             sub _serialize_all_wild {
549             my ($self) = @_;
550             $self->serialize_wild;
551             foreach my $bundle ( $self->get_bundles ) {
552             $bundle->serialize_wild;
553             foreach my $bundlezone ( $bundle->get_all_zones ) {
554             foreach my $node ( map { $_->get_descendants( { add_self => 1 } ) } $bundlezone->get_all_trees ) {
555             $node->serialize_wild;
556             if ( $node->DOES('Treex::Core::Node::Interset') ) {
557             $node->serialize_iset;
558             }
559             }
560             }
561             }
562             return;
563             }
564              
565             sub retrieve_storable {
566             my ( $class, $file ) = @_; # $file stands for a file name, but it can be also file handle (needed by the TrEd backend for .streex)
567              
568             my $FILEHANDLE;
569             my $opened = 0;
570              
571             if ( ref($file) and reftype($file) eq 'GLOB' ) {
572             $FILEHANDLE = $file;
573             }
574             else {
575             log_fatal "filename=$file, but Treex::Core::Document->retrieve_storable(\$filename) can be used only for .streex files"
576             unless $file =~ /\.streex$/;
577             open $FILEHANDLE, "<:via(gzip)", $file or log_fatal($!);
578             $opened = 1;
579             }
580              
581             my $serialized;
582              
583             # reading it this way is silly, but both slurping the file or
584             # using Storable::retrieve_fd lead to errors when used with via(gzip)
585             while (<$FILEHANDLE>) {
586             $serialized .= $_;
587             }
588              
589             if ( $opened ) {
590             close($FILEHANDLE);
591             }
592              
593             # my $retrieved_doc = Storable::retrieve_fd(*$FILEHANDLE) or log_fatal($!);
594             my $retrieved_doc = Storable::thaw($serialized) or log_fatal $!;
595              
596             if ( not ref($file) ) {
597             $retrieved_doc->set_loaded_from($file);
598             my ( $volume, $dirs, $file_name ) = File::Spec->splitpath($file);
599             $retrieved_doc->set_path( $volume . $dirs );
600              
601             # $retrieved_doc->changeFilename($file); # why this doesn't affect the name displayed in TrEd?
602             }
603              
604             # *.streex files saved before r8789 (2012-05-29) have no PML types with nodes, let's fix it
605             # TODO: delete this hack as soon as no such old streex files are needed.
606             foreach my $bundle ( $retrieved_doc->get_bundles() ) {
607             foreach my $bundlezone ( $bundle->get_all_zones() ) {
608             foreach my $node ( map { $_->get_descendants() } $bundlezone->get_all_trees() ) {
609              
610             # skip this hack if we are dealing with a new streex file
611             #return $retrieved_doc if $node->type;
612             # This shortcut does not work since old files have only *some* nodes without types
613             $node->fix_pml_type();
614             }
615             }
616             }
617              
618             return $retrieved_doc;
619             }
620              
621             __PACKAGE__->meta->make_immutable;
622              
623             1;
624              
625             __END__
626              
627              
628              
629             =for Pod::Coverage BUILD build_file_number description set_description
630              
631             =encoding utf-8
632              
633             =head1 NAME
634              
635             Treex::Core::Document - representation of a text and its linguistic analyses in the Treex framework
636              
637             =head1 VERSION
638              
639             version 2.20160630
640              
641             =head1 DESCRIPTION
642              
643             A document consists of a sequence of bundles, mirroring a sequence
644             of natural language sentences (typically, but not necessarily,
645             originating from the same text). Attributes (attribute-value pairs)
646             can be attached to a document as a whole.
647              
648             Note that the references from the bundles to the containing document are weak,
649             so make sure you always keep a reference to the document in scope to prevent
650             the contents of the document from being garbage-collected.
651              
652             =head1 ATTRIBUTES
653              
654             C<Treex::Core::Document>'s instances have the following attributes:
655              
656             =over 4
657              
658             =item description
659              
660             Textual description of the file's content that is stored in the file.
661              
662             =item loaded_from
663              
664             =item path
665              
666             =item file_stem
667              
668             =item file_number
669              
670             =back
671              
672             The attributes can be accessed using semi-affordance accessors:
673             getters have the same names as attributes, while setters start with
674             C<set_>. For example, the attribute C<path> has a getter C<path()> and a setter C<set_path($path)>
675              
676              
677              
678             =head1 METHODS
679              
680             =head2 Constructor
681              
682             =over 4
683              
684             =item my $new_document = Treex::Core::Document->new;
685              
686             creates a new empty document object.
687              
688             =item my $new_document = Treex::Core::Document->new( { pmldoc => $pmldoc } );
689              
690             creates a C<Treex::Core::Document> instance from an already existing L<Treex::PML::Document> instance
691              
692             =item my $new_document = Treex::Core::Document->new( { filename => $filename } );
693              
694             loads a C<Treex::Core::Document> instance from a .treex file
695              
696             =back
697              
698              
699             =head2 Access to zones
700              
701             Document zones are instances of L<Treex::Core::DocZone>, parametrized
702             by language code and possibly also by another free label
703             called selector, whose purpose is to distinguish zones for the same language
704             but from a different source.
705              
706             =over 4
707              
708             =item my $zone = $doc->create_zone( $langcode, ?$selector );
709              
710             =item my $zone = $doc->get_zone( $langcode, ?$selector );
711              
712             =item my $zone = $doc->get_or_create_zone( $langcode, ?$selector );
713              
714             =back
715              
716              
717             =head2 Access to bundles
718              
719             =over 4
720              
721             =item my @bundles = $document->get_bundles();
722              
723             Returns the array of bundles contained in the document.
724              
725              
726             =item my $new_bundle = $document->create_bundle();
727              
728             Creates a new empty bundle and appends it
729             at the end of the document.
730              
731             =item my $new_bundle = $document->new_bundle_before( $existing_bundle );
732              
733             Creates a new empty bundle and inserts it
734             in front of the existing bundle.
735              
736             =item my $new_bundle = $document->new_bundle_after( $existing_bundle );
737              
738             Creates a new empty bundle and inserts it
739             after the existing bundle.
740              
741             =back
742              
743              
744             =head2 Node indexing
745              
746             =over 4
747              
748             =item $document->index_node_by_id( $id, $node );
749              
750             The node is added to the document's indexing table C<id2node> (it is done
751             automatically in L<Treex::Core::Node::set_attr()|Treex::Core::Node/set_attr>
752             if the attribute name is 'C<id>'). When using C<undef> in the place of the
753             second argument, the entry for the given id is deleted from the hash.
754              
755              
756             =item my $node = $document->get_node_by_id( $id );
757              
758             Return the node which has the value C<$id> in its 'C<id>' attribute,
759             no matter to which tree and to which bundle in the given document
760             the node belongs to.
761              
762             It is prohibited in Treex for IDs to point outside of the current document.
763             In rare cases where your data has such links, we recommend you to split the
764             documents differently or hack it by dropping the problematic links.
765              
766             =item $document->id_is_indexed( $id );
767              
768             Return C<true> if the given C<id> is already present in the indexing table.
769              
770             =item $document->get_all_node_ids();
771              
772             Return the array of all node identifiers indexed in the document.
773              
774             =item $document->get_references_to_id( $id );
775              
776             Return all references leading to the given node id in a hash (keys are reference types, e.g. 'alignment',
777             'a/lex.rf' etc., values are arrays of nodes referencing this node).
778              
779             =item $document->remove_refences_to_id( $id );
780              
781             Remove all references to the given node id (calls remove_reference() on each referencing node).
782              
783             =back
784              
785             =head2 Serializing
786              
787             =over 4
788              
789             =item my $document = load($filename, \%opts)
790              
791             Loads document from C<$filename> given C<%opts> using L<Treex::PML::Document::load()>
792              
793             =item $document->save($filename)
794              
795             Saves document to C<$filename> using L<Treex::PML::Document::save()>,
796             or by the Storable module if the file's extension is .streex.gz.
797              
798             =item Treex::Core::Document->retrieve_storable($filename)
799              
800             Loading a document from the .streex (Storable) format.
801              
802             =back
803              
804             =head2 Other
805              
806             =over 4
807              
808             =item my $filename = $doc->full_filename;
809              
810             full filename without the extension
811              
812             =back
813              
814              
815             =head1 AUTHOR
816              
817             ZdenÄ›k Žabokrtský <zabokrtsky@ufal.mff.cuni.cz>
818              
819             Martin Popel <popel@ufal.mff.cuni.cz>
820              
821             OndÅ™ej DuÅ¡ek <odusek@ufal.mff.cuni.cz>
822              
823             =head1 COPYRIGHT AND LICENSE
824              
825             Copyright © 2011-2012 by Institute of Formal and Applied Linguistics, Charles University in Prague
826              
827             This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.