File Coverage

blib/lib/Treex/PML/Document.pm
Criterion Covered Total %
statement 276 619 44.5
branch 91 368 24.7
condition 17 143 11.8
subroutine 60 100 60.0
pod 76 77 98.7
total 520 1307 39.7


line stmt bran cond sub pod time code
1             package Treex::PML::Document;
2              
3             ############################################################
4             #
5             # FS File
6             # =========
7             #
8             #
9 6     6   2765 use Treex::PML::Schema;
  6         22  
  6         626  
10 6     6   38 use Carp;
  6         16  
  6         266  
11 6     6   36 use strict;
  6         12  
  6         137  
12              
13 6     6   30 use vars qw($VERSION);
  6         12  
  6         290  
14             BEGIN {
15 6     6   95 $VERSION='2.24'; # version template
16             }
17 6     6   31 use URI;
  6         14  
  6         104  
18 6     6   25 use URI::file;
  6         12  
  6         97  
19 6     6   28 use Cwd;
  6         14  
  6         293  
20 6     6   3161 use Treex::PML::FSFormat;
  6         17  
  6         170  
21 6     6   2532 use Treex::PML::Backend::FS;
  6         16  
  6         193  
22 6     6   2713 use Treex::PML::Node;
  6         24  
  6         248  
23 6     6   44 use Treex::PML::Factory;
  6         12  
  6         148  
24              
25 6     6   33 use Scalar::Util qw(blessed weaken);
  6         12  
  6         360  
26 6     6   35 use UNIVERSAL::DOES;
  6         13  
  6         6512  
27              
28             =head1 NAME
29              
30             Treex::PML::Document - Treex::PML class representing a document consisting of a set of trees.
31              
32             =head1 DESCRIPTION
33              
34             This class implements a document consisting of a set of trees. The
35             document may be associated with a FS format and a PML schema and can
36             contain additional meta data, application data, and user data
37             (implemented as name/value paris).
38              
39             For backward compatibility, a the document may also contain data
40             related with the FS format, e.g. a patterns and tail.
41              
42             =head1 METHODS
43              
44             =over 4
45              
46             =cut
47              
48             =item Treex::PML::Document->load (filename,\%opts ?)
49              
50             NOTE: Don't call this method as a constructor directly, use Treex::PML::Factory->createDocumentFromFile() instead!
51              
52             Load a Treex::PML::Document object from a given file. If called as a class
53             method, a new instance is created, otherwise the current instance is
54             reinitialized and reused. The method returns the instance or dies
55             (using Carp::croak) if loading fails (unless option C is set,
56             see below).
57              
58             Loading options can be passed as a HASH reference in the second
59             argument. The following keys are supported:
60              
61             =over 8
62              
63             =item backends
64              
65             An ARRAY reference of IO backend names (previously imported using
66             C). These backends are tried additionally to
67             Treex::PML::Backend::FS. If not given, the backends previously selected using
68             C or C are used instead.
69              
70             =item encoding
71              
72             A name of character set (encoding) to be used by text-based I/O
73             backends such as Treex::PML::Backend::FS.
74              
75             =item recover
76              
77             If true, the method returns normally in case of loading failure, but
78             sets the global variable C<$Treex::PML::FSError> to the value return value
79             of C, indicating the error.
80              
81             =back
82              
83             =cut
84              
85             sub load {
86 25     25 1 93 my ($class,$filename,$opts) = @_;
87 25   50     147 $opts||={};
88 25 50       87 my $new=ref($class) ? $class : $class->new();
89             # the second arg may/may not be encoding string
90 25 50       112 $new->changeEncoding($opts->{encoding}) if $opts->{encoding};
91 25 50       55 my $error = $new->readFile($filename,@{$opts->{backends} || \@Treex::PML::BACKENDS});
  25         207  
92 25 50       155 if ($opts->{recover}) {
    50          
    100          
93 0         0 $Treex::PML::FSError = $error;
94 0         0 return $new;
95             } elsif ($error == 1) {
96 0         0 croak("Loading file '$filename' failed: no suitable backend!");
97             } elsif ($error) {
98 1         268 croak("Loading file '$filename' failed, possible error: $!");
99             } else {
100 24         159 return $new;
101             }
102             }
103              
104              
105              
106             # # Treex::PML::Document->newFSFile (filename,encoding?,\@backends)
107              
108             # This is an obsolete interface for loading a Treex::PML::Document from file.
109             # It is recommended to use Treex::PML::Document->load() instad.
110              
111             # This method retruns the new instance. The value of $Treex::PML::FSError
112             # contains the return value of $document->readFile and should be used to
113             # check for errors.
114              
115             # #
116              
117             sub newFSFile {
118 0     0 0 0 my ($self,$filename) = (shift,shift);
119 0         0 my $new=$self->new();
120             # the second arg may/may not be encoding string
121 0 0       0 $new->changeEncoding(shift) unless ref($_[0]);
122 0         0 $Treex::PML::FSError=$new->readFile($filename,@_);
123 0         0 return $new;
124             }
125              
126             =pod
127              
128             =item Treex::PML::Document->new (name?, file_format?, FS?, hint_pattern?, attribs_patterns?, unparsed_tail?, trees?, save_status?, backend?, encoding?, user_data?, meta_data?, app_data?)
129              
130             Creates and returns a new FS file object based on the given values
131             (optional). For use with arguments, it is more convenient to use the
132             method C instead.
133              
134             NOTE: Don't call this constructor directly, use Treex::PML::Factory->createDocument() instead!
135              
136             =cut
137              
138             sub new {
139 25     25 1 69 my $self = shift;
140 25 50 33     145 if (@_==1 and ref($_[0]) eq 'HASH') {
141 0         0 return $self->create($_[0]);
142             }
143 25   33     155 my $class = ref($self) || $self;
144 25         82 my $new = [];
145 25         71 bless $new, $class;
146 25         131 $new->initialize(@_);
147 25         135 return $new;
148             }
149              
150             =pod
151              
152             =item Treex::PML::Document->new({ argument => value, ... })
153              
154             or
155              
156             =item Treex::PML::Document->create({ argument => value, ... })
157              
158             NOTE: Don't call this constructor directly, use Treex::PML::Factory->createDocument() instead!
159              
160             Creates and returns a new empty Treex::PML::Document object based on the
161             given parameters. This method accepts argument => value pairs as
162             arguments. The following arguments are available:
163              
164             name, format, FS, hint, patterns, tail, trees, save_status, backend
165              
166             See C for more details.
167              
168              
169             =cut
170              
171             sub create {
172 0     0 1 0 my $self = shift;
173 0 0 0     0 my $args = (@_==1 and ref($_[0])) ? $_[0] : { @_ };
174 0 0       0 if (exists $args->{filename}) {
175 0         0 croak(__PACKAGE__."->create: Unknown parameter 'filename'\n");
176             }
177 0         0 return $self->new(@{$args}{qw(name format FS hint patterns tail trees save_status backend encoding user_data meta_data app_data)});
  0         0  
178             }
179              
180              
181             =item $document->clone ($clone_trees)
182              
183             Create a new Treex::PML::Document object with the same file name, file
184             format, meta data, FSFormat, backend, encoding, patterns, hint and
185             tail as the current Treex::PML::Document. If $clone_trees is true,
186             populate the new Treex::PML::Document object with clones of all trees
187             from the current Treex::PML::Document.
188              
189             =cut
190              
191             sub clone {
192 0     0 1 0 my ($self, $deep)=@_;
193 0         0 my $fs=$self->FS;
194 0         0 my $new = ref($self)->create(
195             name => $self->filename,
196             format => $self->fileFormat,
197             FS => $fs->clone,
198             trees => [],
199             backend => $self->backend,
200             encoding => $self->encoding,
201             hint => $self->hint,
202             patterns => [ $self->patterns() ],
203             tail => $self->tail
204             );
205             # clone metadata
206 0 0       0 if (ref($self->[13])) {
207 0         0 $new->[13] = Treex::PML::CloneValue($self->[13]);
208             }
209 0 0       0 if ($deep) {
210 0         0 @{$new->treeList} = map { $fs->clone_subtree($_) } $self->trees();
  0         0  
  0         0  
211             }
212 0         0 return $new;
213             }
214              
215             sub _weakenLinks {
216 0     0   0 my ($self) = @_;
217 0         0 foreach my $tree (@{$self->treeList}) {
  0         0  
218 0         0 Treex::PML::_WeakenLinks($tree);
219             }
220             }
221              
222             sub DESTROY {
223 25     25   3444420 my ($self) = @_;
224 25 50       106 return unless ref($self);
225             # this is not needed if all links are weak
226 25         61 $_->destroy() for (@{$self->treeList});
  25         107  
227 25         9887 undef @$self;
228             }
229              
230             =pod
231              
232             =item $document->initialize (name?, file_format?, FS?, hint_pattern?, attribs_patterns?, unparsed_tail?, trees?, save_status?, backend?, encoding?, user_data?, meta_data?, app_data?)
233              
234             Initialize a FS file object. Argument description:
235              
236             =over 4
237              
238             =item name (scalar)
239              
240             File name
241              
242             =item file_format (scalar)
243              
244             File format identifier (user-defined string). TrEd, for example, uses
245             C, C and C strings as identifiers.
246              
247             =item FS (FSFormat)
248              
249             FSFormat object associated with the file
250              
251             =item hint_pattern (scalar)
252              
253             hint pattern definition (used by TrEd)
254              
255             =item attribs_patterns (list reference)
256              
257             embedded stylesheet patterns (used by TrEd)
258              
259             =item unparsed_tail (list reference)
260              
261             The rest of the file, which is not parsed by Treex::PML, i.e. Graph's embedded macros
262              
263             =item trees (list reference)
264              
265             List of FSNode objects representing root nodes of all trees in the Treex::PML::Document.
266              
267             =item save_status (scalar)
268              
269             File save status indicator, 0=file is saved, 1=file is not saved (TrEd
270             uses this field).
271              
272             =item backend (scalar)
273              
274             IO Backend used to open/save the file.
275              
276             =item encoding (scalar)
277              
278             IO character encoding for perl 5.8 I/O filters
279              
280             =item user_data (arbitrary scalar type)
281              
282             Reserved for the user. Content of this slot is not persistent.
283              
284             =item meta_data (hashref)
285              
286             Meta data (usually used by IO Backends to store additional information
287             about the file - i.e. other than encoding, trees, patterns, etc).
288              
289             =item app_data (hashref)
290              
291             Non-persistent application specific data associated with the file (by
292             default this is an empty hash reference). Applications may store
293             temporary data associated with the file into this hash.
294              
295             =back
296              
297              
298             =cut
299              
300             sub initialize {
301 25     25 1 48 my $self = shift;
302             # what will we do here ?
303 25         93 $self->[1] = $_[1]; # file format (scalar)
304 25 50       157 $self->[2] = ref($_[2]) ? $_[2] : Treex::PML::Factory->createFSFormat(); # FS format (FSFormat object)
305 25         58 $self->[3] = $_[3]; # hint pattern
306 25 50       113 $self->[4] = ref($_[4]) eq 'ARRAY' ? $_[4] : []; # list of attribute patterns
307 25 50       110 $self->[5] = ref($_[5]) eq 'ARRAY' ? $_[5] : []; # unparsed rest of a file
308 25 50       208 $self->[6] = UNIVERSAL::isa($_[6],'ARRAY') ?
309             Treex::PML::Factory->createList($_[6],1) :
310             Treex::PML::Factory->createList(); # trees
311 25 50       112 $self->[7] = $_[7] ? $_[7] : 0; # notsaved
312 25         67 $self->[8] = undef; # storage for current tree number
313 25         54 $self->[9] = undef; # storage for current node
314 25 50       108 $self->[10] = $_[8] ? $_[8] : 'Treex::PML::Backend::FS'; # backend;
315 25 50       86 $self->[11] = $_[9] ? $_[9] : undef; # encoding;
316 25 50       88 $self->[12] = $_[10] ? $_[10] : {}; # user data
317 25 50       88 $self->[13] = $_[11] ? $_[11] : {}; # meta data
318 25 50       90 $self->[14] = $_[12] ? $_[12] : {}; # app data
319              
320 25         92 $self->[15] = undef;
321 25 50       77 if (defined $_[0]) {
322 0         0 $self->changeURL($_[0]);
323             } else {
324 25         55 $self->[0] = undef;
325             }
326 25 50       84 return ref($self) ? $self : undef;
327             }
328              
329             =pod
330              
331             =item $document->readFile ($filename, \@backends)
332              
333             NOTE: Don't call this constructor directly, use Treex::PML::Factory->createDocumentFromFile() instead!
334              
335             Read a document from a given file. The first argument
336             must be a file-name. The second argument may be a list reference
337             consisting of names of I/O backends. If no backends are given, only
338             the Treex::PML::Backend::FS is used. For each I/O backend, C tries to
339             execute the C function from the appropriate class in the order
340             in which the backends were specified, passing it the filename as an
341             argument. The first I/O backend whose C function returns 1 is
342             then used to read the file.
343              
344             Note: this function sets noSaved to zero.
345              
346             Return values:
347             0 - succes
348             1 - no suitable backend
349             -1 - backend failed
350              
351             =cut
352              
353             sub readFile {
354 25     25 1 83 my ($self,$url) = (shift,shift);
355 25 50       240 my @backends = UNIVERSAL::isa($_[0],'ARRAY') ? @{$_[0]} : scalar(@_) ? @_ : qw(Treex::PML::Backend::FS);
  0 50       0  
356 25         59 my $ret = 1;
357 25 50       96 croak("readFile is not a class method") unless ref($self);
358 25         323 $url =~ s/^\s*|\s*$//g;
359 25         122 my ($file,$remove_file) = eval { Treex::PML::IO::fetch_file($url) };
  25         142  
360 25 50       119 print STDERR "Actual file: $file\n" if $Treex::PML::Debug;
361 25 100       102 return -1 if $@;
362 24         74 foreach my $backend (@backends) {
363 24 50       85 print STDERR "Trying backend $backend: " if $Treex::PML::Debug;
364 24         123 $backend = Treex::PML::BackendCanRead($backend);
365 24 50 33     95 if ($backend &&
366             eval {
367 6     6   52 no strict 'refs';
  6         12  
  6         486  
368 24         119 &{"${backend}::test"}($file,$self->encoding);
  24         157  
369             }) {
370 24         128 $self->changeBackend($backend);
371 24         109 $self->changeFilename($url);
372 24 50       72 print STDERR "success\n" if $Treex::PML::Debug;
373 24         54 eval {
374 6     6   39 no strict 'refs';
  6         12  
  6         2215  
375 24         50 my $fh;
376 24 50       71 print STDERR "calling ${backend}::open_backend\n" if $Treex::PML::Debug;
377 24         83 $fh = &{"${backend}::open_backend"}($file,"r",$self->encoding);
  24         165  
378 24         60 &{"${backend}::read"}($fh,$self);
  24         169  
379 24 50       30487 &{"${backend}::close_backend"}($fh) || warn "Close failed.\n";
  24         279  
380             };
381 24 50       86 if ($@) {
382 0         0 print STDERR "Error occured while reading '$url' using backend ${backend}:\n";
383 0         0 my $err = $@; chomp $err;
  0         0  
384 0         0 print STDERR "$err\n";
385 0         0 $ret = -1;
386             } else {
387 24         64 $ret = 0;
388             }
389 24         145 $self->notSaved(0);
390 24         70 last;
391             }
392 0 0       0 print STDERR "fail\n" if $Treex::PML::Debug;
393             # eval {
394             # no strict 'refs';
395             # print STDERR "TEST",$backend->can('test'),"\n";
396             # print STDERR "READ",$backend->can('read'),"\n";
397             # print STDERR "OPEN",$backend->can('open_backend'),"\n";
398             # print STDERR "REAL_TEST($file): ",&{"${backend}::test"}($file,$self->encoding),"\n";
399             # } if $Treex::PML::Debug;
400 0 0       0 if ($@) {
401 0         0 my $err = $@; chomp $err;
  0         0  
402 0         0 print STDERR "$err\n";
403             }
404             }
405 24 50       112 if ($ret == 1) {
406 0         0 my $err = "Unknown file type (all IO backends failed): $url\n";
407 0         0 $@.="\n".$err;
408             }
409 24 100 100     168 if ($url ne $file and $remove_file) {
410 5         42 local $!;
411 5   33     989 unlink $file || warn "couldn't unlink tmp file $file: $!\n";
412             }
413 24         105 return $ret;
414             }
415              
416             =pod
417              
418             =item $document->save ($filename?)
419              
420             Save Treex::PML::Document object to a given file using the corresponding I/O backend
421             (see $document->changeBackend) and set noSaved to zero.
422              
423             =item $document->writeFile ($filename?)
424              
425             This is just an alias for $document->save($filename).
426              
427             =cut
428              
429             sub writeFile {
430 11     11 1 107 my ($self,$filename) = @_;
431 11 50       53 return unless ref($self);
432              
433 11 100 66     82 $filename = $self->filename unless (defined($filename) and $filename ne "");
434 11   50     716 my $backend=$self->backend || 'Treex::PML::Backend::FS';
435 11 50       40 print STDERR "Writing to $filename using backend $backend\n" if $Treex::PML::Debug;
436 11         36 my $ret;
437             #eval {
438 6     6   44 no strict 'refs';
  6         12  
  6         1064  
439              
440             my $fh;
441 11   50     69 $backend = Treex::PML::BackendCanWrite($backend) || die "Backend $backend is not loaded or does not support writing\n";
442 11 50       51 ($fh=&{"${backend}::open_backend"}($filename,"w",$self->encoding)) || die "Open failed on '$filename' using backend $backend\n";
  11         73  
443 11   50     31 $ret=&{"${backend}::write"}($fh,$self) || die "Write to '$filename' failed using backend $backend\n";
444 11 50       42 &{"${backend}::close_backend"}($fh) || die "Closing file '$filename' failed using backend $backend\n";
  11         127  
445             #};
446             #if ($@) {
447             # print STDERR "Error: $@\n";
448             # return 0;
449             #}
450 11 50       92 $self->notSaved(0) if $ret;
451 11         256 return $ret;
452             }
453              
454             BEGIN {
455 6     6   504 *save = \&writeFile;
456             }
457              
458             =item $document->writeTo (glob_ref)
459              
460             Write FS declaration, trees and unparsed tail to a given file (file handle open for
461             reading must be passed as a GLOB reference). Sets noSaved to zero.
462              
463             =cut
464              
465             sub writeTo {
466 0     0 1 0 my ($self,$fileref) = @_;
467 0 0       0 return unless ref($self);
468              
469 0   0     0 my $backend=$self->backend || 'Treex::PML::Backend::FS';
470 0 0       0 print STDERR "Writing using backend $backend\n" if $Treex::PML::Debug;
471 0         0 my $ret;
472 0         0 eval {
473 6     6   41 no strict 'refs';
  6         19  
  6         16831  
474             # require $backend;
475 0   0     0 $ret=$backend->can('write') && &{"${backend}::write"}($fileref,$self);
476             };
477 0 0       0 print STDERR "$@\n" if $@;
478 0         0 return $ret;
479             }
480              
481             =pod
482              
483             =item $document->filename
484              
485             Return the FS file's file name. If the actual file name is a file:// URL,
486             convert it to system path and return it. If it is a different type of URL,
487             return the corresponding URI object.
488              
489             =cut
490              
491              
492             #
493             # since URI::file->file is expensive, we cache the value in $self->[15]
494             #
495             # $self->[0] should always be an URI object (if not, we upgrade it)
496             #
497             #
498              
499              
500             sub filename {
501 42     42 1 92 my ($self) = @_;
502 42 50       130 return unless $self;
503              
504 42         92 my $filename = $self->[15]; # cached filename
505 42 100       118 if (defined $filename) {
506 7         38 return $filename
507             }
508 35 50       158 $filename = $self->[0] or return undef; # URI
509 35 50       348 if (!ref($filename)) {
510 0         0 $self->[15] = undef; # clear cache
511 0         0 $filename = $self->[0] = Treex::PML::IO::make_URI($filename);
512             }
513 35 50 33     426 if ((blessed($filename) and $filename->isa('URI::file'))) {
514 35         143 return ($self->[15] = $filename->file);
515             }
516 0         0 return $filename;
517             }
518              
519             =item $document->URL
520              
521             Return the FS file's URL as URI object.
522              
523             =cut
524              
525              
526             sub URL {
527 0     0 1 0 my ($self) = @_;
528 0         0 my $filename = $self->[0];
529 0 0 0     0 if ($filename and !(blessed($filename) and $filename->isa('URI'))) {
      0        
530 0         0 $self->[15]=undef;
531 0         0 return ($self->[0] = Treex::PML::IO::make_URI($filename));
532             }
533 0         0 return $filename;
534             }
535              
536             =pod
537              
538             =item $document->changeFilename (new_filename)
539              
540             Change the FS file's file name.
541              
542             =cut
543              
544              
545             sub changeFilename {
546 24     24 1 65 my ($self,$val) = @_;
547 24 50       59 return unless ref($self);
548 24         99 my $uri = $self->[0] = Treex::PML::IO::make_abs_URI($val);
549 24         3737 $self->[15]=undef; # clear cache
550 24         57 return $uri;
551             }
552              
553             =item $document->changeURL (uri)
554              
555             Like changeFilename, but does not attempt to absoultize the filename.
556             The argument must be an absolute URL (preferably URI object).
557              
558             =cut
559              
560              
561             sub changeURL {
562 40     40 1 2440 my ($self,$val) = @_;
563 40 50       118 return unless ref($self);
564 40         169 my $url = $self->[0] = Treex::PML::IO::make_URI($val);
565 40         94 $self->[15]=undef;
566 40         94 return $url;
567             }
568              
569             =pod
570              
571             =item $document->fileFormat
572              
573             Return file format identifier (user-defined string). TrEd, for
574             example, uses C, C and C
575             non-specific format> strings as identifiers.
576              
577             =cut
578              
579             sub fileFormat {
580 0     0 1 0 my ($self) = @_;
581 0 0       0 return ref($self) ? $self->[1] : undef;
582             }
583              
584             =pod
585              
586             =item $document->changeFileFormat (string)
587              
588             Change file format identifier.
589              
590             =cut
591              
592             sub changeFileFormat {
593 0     0 1 0 my ($self,$val) = @_;
594 0 0       0 return unless ref($self);
595 0         0 return $self->[1]=$val;
596             }
597              
598             =pod
599              
600             =item $document->backend
601              
602             Return IO backend module name. The default backend is Treex::PML::Backend::FS, used
603             to save files in the FS format.
604              
605             =cut
606              
607             sub backend {
608 11     11 1 33 my ($self) = @_;
609 11 50       68 return ref($self) ? $self->[10] : undef;
610             }
611              
612             =pod
613              
614             =item $document->changeBackend (string)
615              
616             Change file backend.
617              
618             =cut
619              
620             sub changeBackend {
621 24     24 1 72 my ($self,$val) = @_;
622 24 50       94 return unless ref($self);
623 24         68 return $self->[10]=$val;
624             }
625              
626             =pod
627              
628             =item $document->encoding
629              
630             Return file character encoding (used by Perl 5.8 input/output filters).
631              
632             =cut
633              
634             sub encoding {
635 60     60 1 132 my ($self) = @_;
636 60 50       229 return ref($self) ? $self->[11] : undef;
637             }
638              
639             =pod
640              
641             =item $document->changeEncoding (string)
642              
643             Change file character encoding (used by Perl 5.8 input/output filters).
644              
645             =cut
646              
647             sub changeEncoding {
648 22     22 1 77 my ($self,$val) = @_;
649 22 50       67 return unless ref($self);
650 22         61 return $self->[11]=$val;
651             }
652              
653              
654             =pod
655              
656             =item $document->userData
657              
658             Return user data associated with the file (by default this is an empty
659             hash reference). User data are not supposed to be persistent and IO
660             backends should ignore it.
661              
662             =cut
663              
664             sub userData {
665 0     0 1 0 my ($self) = @_;
666 0 0       0 return ref($self) ? $self->[12] : undef;
667             }
668              
669             =pod
670              
671             =item $document->changeUserData (value)
672              
673             Change user data associated with the file. User data are not supposed
674             to be persistent and IO backends should ignore it.
675              
676             =cut
677              
678             sub changeUserData {
679 0     0 1 0 my ($self,$val) = @_;
680 0 0       0 return unless ref($self);
681 0         0 return $self->[12]=$val;
682             }
683              
684             =pod
685              
686             =item $document->metaData (name)
687              
688             Return meta data stored into the object usually by IO backends. Meta
689             data are supposed to be persistent, i.e. they are saved together with
690             the file (at least by some IO backends).
691              
692             =cut
693              
694             sub metaData {
695 119     119 1 231 my ($self,$name) = @_;
696 119 50       505 return ref($self) ? $self->[13]->{$name} : undef;
697             }
698              
699             =pod
700              
701             =item $document->changeMetaData (name,value)
702              
703             Change meta information (usually used by IO backends). Meta data are
704             supposed to be persistent, i.e. they are saved together with the file
705             (at least by some IO backends).
706              
707             =cut
708              
709             sub changeMetaData {
710 254     254 1 668 my ($self,$name,$val) = @_;
711 254 50       515 return unless ref($self);
712 254         767 return $self->[13]->{$name}=$val;
713             }
714              
715             =item $document->listMetaData (name)
716              
717             In array context, return the list of metaData keys. In scalar context
718             return the hash reference where metaData are stored.
719              
720             =cut
721              
722             sub listMetaData {
723 0     0 1 0 my ($self) = @_;
724 0 0       0 return unless ref($self);
725 0 0       0 return wantarray ? keys(%{$self->[13]}) : $self->[13];
  0         0  
726             }
727              
728             =item $document->appData (name)
729              
730             Return application specific information associated with the
731             file. Application data are not persistent, i.e. they are not saved
732             together with the file by IO backends.
733              
734             =cut
735              
736             sub appData {
737 48     48 1 117 my ($self,$name) = @_;
738 48 50       244 return ref($self) ? $self->[14]->{$name} : undef;
739             }
740              
741             =pod
742              
743             =item $document->changeAppData (name,value)
744              
745             Change application specific information associated with the
746             file. Application data are not persistent, i.e. they are not saved
747             together with the file by IO backends.
748              
749             =cut
750              
751             sub changeAppData {
752 56     56 1 159 my ($self,$name,$val) = @_;
753 56 50       168 return unless ref($self);
754 56         199 return $self->[14]->{$name}=$val;
755             }
756              
757             =item $document->listAppData (name)
758              
759             In array context, return the list of appData keys. In scalar context
760             return the hash reference where appData are stored.
761              
762             =cut
763              
764             sub listAppData {
765 0     0 1 0 my ($self) = @_;
766 0 0       0 return unless ref($self);
767 0 0       0 return wantarray ? keys(%{$self->[14]}) : $self->[13];
  0         0  
768             }
769              
770             =pod
771              
772              
773             =item $document->schema
774              
775             Return a reference to the associated PML schema (if any). Note: The
776             pointer to the schema is stored in the metaData field 'schema'.
777              
778             =cut
779              
780             sub schema {
781 3     3 1 17 my($self)=@_;
782 3         13 return $self->metaData('schema');
783             }
784              
785             =item $document->schemaURL
786              
787             Return URL of the PML schema the document is associated with (if any).
788             Note that unlike $document->schema->get_url, the URL is not resolved
789             and is returned exactly as referenced in the document PML header.
790              
791             Note: The URL is stored in the metaData field 'schema-url'.
792              
793             =cut
794              
795             sub schemaURL {
796 1     1 1 596 my($self)=@_;
797 1         4 return $self->metaData('schema-url');
798             }
799              
800             =item $document->changeSchemaURL($newURL)
801              
802             Return URL of the PML schema the document is associated with (if any).
803             Note: The URL is stored in the metaData field 'schema-url'.
804              
805             =cut
806              
807             sub changeSchemaURL {
808 0     0 1 0 my($self,$url)=@_;
809 0         0 return $self->changeMetaData('schema-url',Treex::PML::IO::make_URI($url));
810             }
811              
812             =item $document->documentRootData()
813              
814             Return the root data structure of the PML instance (with trees, prolog and epilog taken out)
815             Note: The URL is stored in the metaData field 'pml_root'.
816              
817             =cut
818              
819             sub documentRootData {
820 0     0 1 0 my($self,$url)=@_;
821 0         0 return $self->metaData('pml_root');
822             }
823              
824             =item $document->treesProlog()
825              
826             Return a sequence of non-tree elements preceding trees in the PML
827             sequence (with role #TREES) from which trees were extracted (if any).
828             Note: The prolog is stored in the the metaData field 'pml_prolog'.
829              
830             =cut
831              
832             sub treesProlog {
833 0     0 1 0 my($self,$url)=@_;
834 0         0 return $self->metaData('pml_prolog');
835             }
836              
837             =item $document->treesEpilog()
838              
839             Return a sequence of non-tree elements following trees in the PML
840             sequence (with role #TREES) from which trees were extracted (if any).
841             Note: The epilog is stored in the the metaData field 'pml_epilog'.
842              
843             =cut
844              
845             sub treesEpilog {
846 0     0 1 0 my($self,$url)=@_;
847 0         0 return $self->metaData('pml_epilog');
848             }
849              
850             =item $document->lookupNodeByID($id)
851              
852             Lookup a node by its #ID. Note that the ID-hash is created when the
853             document is loaded (and if not, when first queried), but is not
854             maintained by this class. It must therefore be maintained by the
855             application.
856              
857             =cut
858              
859             sub lookupNodeByID {
860 1     1 1 4 my ($self,$id)=@_;
861 1 50       4 if (defined($id)) {
862 1         5 return $self->nodeIDHash()->{$id};
863             }
864 0         0 return;
865             }
866              
867             =item $document->deleteNodeIDHashEntry($node)
868              
869             Remove a given node from the ID-hash. Returns the value removed from
870             the ID hash (note: the function does not check if the entry for the
871             given node's ID actually was mapped to the given node) or undef if the
872             node's ID was not hashed.
873              
874             =cut
875              
876             sub deleteNodeIDHashEntry {
877 0     0 1 0 my ($self,$node)=@_;
878 0         0 my $id_hash = $self->appData('id-hash');
879 0 0       0 if (ref($id_hash)) {
880 0         0 my $id =$node->get_id;
881 0 0       0 if (defined $id) {
882 0         0 return delete $id_hash->{$id};
883             }
884             }
885 0         0 return undef;
886             }
887              
888             =item $document->deleteIDHashEntry($id)
889              
890             Remove a given ID from the ID-hash. Returns the removed hash entry (or
891             undef if ID was not hashed).
892              
893             =cut
894              
895             sub deleteIDHashEntry {
896 0     0 1 0 my ($self,$id)=@_;
897 0         0 my $id_hash = $self->appData('id-hash');
898 0 0       0 if (ref($id_hash)) {
899 0         0 return delete $id_hash->{$id};
900             }
901 0         0 return undef;
902             }
903              
904              
905             =item $document->hashNodeByID($node)
906              
907             Hash a node by its #ID. Note that the ID-hash is created when the
908             document is loaded (and if not, when first queried), but is not
909             maintained by this class. It must therefore be maintained by the
910             application.
911              
912             =cut
913              
914             sub hashNodeByID {
915 0     0 1 0 my ($self,$node)=@_;
916 0         0 my $id = $node->get_id;
917 0 0       0 if (defined $id) {
918 0         0 weaken( $self->nodeIDHash()->{$id} = $node );
919             }
920 0         0 return $id;
921             }
922              
923             =item $document->nodeIDHash()
924              
925             Return a hash reference mapping node IDs to node objects. If the ID
926             hash did not exist, it is rebuilt. Note: the ID hash, if exists, is
927             stored in the 'id-hash' appData entry.
928              
929             =cut
930              
931             sub nodeIDHash {
932 1     1 1 3 my ($self,$id)=@_;
933              
934 1         4 my $id_hash = $self->appData('id-hash');
935 1 50       5 if (ref($id_hash)) {
936 1         4 return $id_hash;
937             } else {
938 0         0 return $self->rebuildIDHash();
939             }
940             }
941              
942             =item $document->hasIDHash()
943              
944             Returns 1 if the document has an ID-to-node hash map, 0 otherwise.
945              
946             =cut
947              
948             sub hasIDHash {
949 0     0 1 0 my ($self)=@_;
950 0 0       0 if (ref($self->appData('id-hash'))) {
951 0         0 return 1;
952             } else {
953 0         0 return 0;
954             }
955             }
956              
957             =item $document->rebuildIDHash()
958              
959             Empty and rebuild document's ID-to-node hash.
960              
961             =cut
962              
963             sub rebuildIDHash {
964 0     0 1 0 my ($self)=@_;
965              
966 0         0 my $id_hash = $self->appData('id-hash');
967 0 0       0 if (ref($id_hash)) {
968 0         0 %$id_hash=();
969             } else {
970 0         0 $id_hash = {};
971 0         0 $self->changeAppData('id-hash',$id_hash);
972             }
973              
974 0         0 my %id_member;
975 0         0 for my $root ($self->trees) {
976 0         0 my $node = $root;
977 0         0 while ($node) {
978 0   0     0 my $member = $id_member{$node->type} ||= $node->get_id_member_name;
979 0 0       0 if ($member) {
980 0         0 weaken($id_hash->{ $node->{$member} } = $node);
981             }
982 0         0 $node = $node->following;
983             }
984             }
985 0         0 return $id_hash;
986             }
987              
988             =item $document->referenceURLHash
989              
990             Returns a HASHref mapping file reference IDs to URLs.
991              
992             =cut
993              
994             sub referenceURLHash {
995 5     5 1 637 my ($self)=@_;
996 5   50     25 return $self->metaData('references') || {};
997             }
998              
999             =item $document->referenceNameHash
1000              
1001             Returns a HASHref mapping file reference names to reference IDs. Each
1002             value of the hash is either a ID string (if there is just one
1003             reference with a given name) or a L containing all IDs
1004             associated with a given name.
1005              
1006             =cut
1007              
1008             sub referenceNameHash {
1009 4     4 1 6195 my ($self)=@_;
1010 4   50     16 return $self->metaData('refnames') || {};
1011             }
1012              
1013             =item $document->referenceObjectHash()
1014              
1015             Returns a HASH whose keys are reference IDs and whose values are
1016             either DOM or C representations of the
1017             corresponding related resources. Unless related tree documents were
1018             loaded with loadRequiredDocuments(), this hash only contains resources
1019             declared as readas='dom' or readas='pml' in the PML schema.
1020              
1021              
1022             Note: the hash is stored in the document's appData entry 'ref'.
1023              
1024             =cut
1025              
1026             sub referenceObjectHash {
1027 11     11 1 5430 my ($self)=@_;
1028 11         60 return $self->appData('ref');
1029             }
1030              
1031             =item $document->relatedDocuments()
1032              
1033             Returns a list of [id, URL] pairs of related tree documents declared
1034             in the PML schema of this document as C (if any).
1035             Note that C does not load related tree documents
1036             automatically.
1037              
1038             Note: the hash is stored in the document's metaData entry
1039             'fs-require'.
1040              
1041             =cut
1042              
1043             sub relatedDocuments {
1044 4     4 1 15 my ($self)=@_;
1045 4 50       9 return @{$self->metaData('fs-require') || []};
  4         20  
1046             }
1047              
1048              
1049             =item $document->loadRelatedDocuments($recurse,$callback)
1050              
1051             Loads related tree documents declared in the PML schema of this
1052             document as C (if any), unless already loaded.
1053              
1054             Both arguments are optional:
1055              
1056             the $recurse argument is a boolean flag indicating whether the
1057             loadRelatedDocuments() should be called on the loaded related
1058             docuemnts as well.
1059              
1060             the $calback may contain a callback (anonymouse subroutine) which will
1061             then be invoked before retrieveing a related tree document. The
1062             callback will receive two arguments; the current $document and an URL of
1063             the related tree document to retrieve.
1064              
1065             If the callback returns undef or empty list), the related document
1066             will be retrieved in a standard way (using
1067             C<< Treex::PML::Factory->createDocumentFromFile >>). If it returns a
1068             defined but false value (e.g. 0) the related document will not be
1069             retrieved at all. If it returns a defined value which is either a
1070             string or an URI object, the related document will be retrieved from
1071             that address. Finally, if the callback returns an object implementing
1072             the C interface, the object will be associated
1073             with the current docment.
1074              
1075             =cut
1076              
1077             sub loadRelatedDocuments {
1078 4     4 1 4675 my ($self,$recurse,$callback)=@_;
1079 4         22 my @requires = $self->relatedDocuments();
1080 4         16 my $ref = $self->referenceObjectHash();
1081 4         12 my @loaded;
1082 4         16 for my $req (@requires) {
1083 4 50       21 next if ref($ref->{$req->[0]});
1084 4         20 my $req_URL = Treex::PML::ResolvePath($self->filename,$req->[1]);
1085 4         14 my $req_fs;
1086 4 100       24 if (ref($callback) eq 'CODE') {
1087 3         16 my $result = $callback->($self,$req_URL);
1088 3 50       21 if (defined $result) {
1089 3 100 33     24 if (!$result) {
    100          
    50          
1090 1         4 next;
1091             } elsif (UNIVERSAL::DOES::does($result,'Treex::PML::Document')) {
1092 1         20 $req_fs=$result;
1093             } elsif (blessed($result) and $result->isa('URI')) {
1094 0         0 $req_URL = $result->as_string;
1095             } else {
1096 1         53 $req_URL = $result;
1097             }
1098             }
1099             }
1100 3 100       15 if (!defined $req_fs) {
1101 2 50       10 warn "Pre-loading dependent $req_URL ($req->[1]) as appData('ref')->{$req->[0]}\n" if $Treex::PML::Debug;
1102 2         16 $req_fs = Treex::PML::Factory->createDocumentFromFile($req_URL);
1103             }
1104 2         8 push @loaded,$req_fs;
1105 2         11 my $part_of = $req_fs->appData('fs-part-of');
1106 2 50       9 if (!ref($part_of)) {
1107 2         6 $part_of = [];
1108 2         8 $req_fs->changeAppData('fs-part-of',$part_of);
1109             }
1110 2         7 push @$part_of, $self;
1111 2         12 weaken($part_of->[-1]); # we rather weaken the back reference
1112 2         7 $self->appData('ref')->{$req->[0]}=$req_fs;
1113 2 50       11 push @loaded, $req_fs->loadRelatedDocuments(1,$callback) if $recurse;
1114             }
1115 3         15 return @loaded;
1116             }
1117              
1118             =item $document->relatedSuperDocuments()
1119              
1120             Returns a list of C objects representing related
1121             superior documents (i.e. documents that loaded the current documents
1122             using loadRelatedDocuments()).
1123              
1124             Note: these documents are stored in the document's appData entry
1125             'fs-part-of'.
1126              
1127             =cut
1128              
1129             sub relatedSuperDocuments {
1130 2     2 1 7 my ($self)=@_;
1131 2 50       6 return @{ $self->appData('fs-part-of')||[] };
  2         9  
1132             }
1133              
1134             =item $document->FS
1135              
1136             Return a reference to the associated FSFormat object.
1137              
1138             =cut
1139              
1140             sub FS {
1141 32     32 1 227 return $_[0]->[2];
1142             # my ($self) = @_;
1143             # return ref($self) ? $self->[2] : undef;
1144             }
1145              
1146             =pod
1147              
1148             =item $document->changeFS (FSFormat_object)
1149              
1150             Associate FS file with a new FSFormat object.
1151              
1152             =cut
1153              
1154             sub changeFS {
1155 2     2 1 6 my ($self,$val) = @_;
1156 2 50       5 return unless ref($self);
1157 2         13 $self->[2]=$val;
1158            
1159 2         8 my $enc = $val->special('E');
1160 2 50       6 if ($enc) {
1161 0         0 $self->changeEncoding($enc);
1162 0         0 delete $val->specials->{E};
1163             }
1164 2         14 return $self->[2];
1165             }
1166              
1167             =pod
1168              
1169             =item $document->hint
1170              
1171             Return the Tred's hint pattern declared in the Treex::PML::Document.
1172              
1173             =cut
1174              
1175              
1176             sub hint {
1177 11     11 1 34 my ($self) = @_;
1178 11 50       51 return ref($self) ? $self->[3] : undef;
1179             }
1180              
1181             =pod
1182              
1183             =item $document->changeHint (string)
1184              
1185             Change the Tred's hint pattern associated with this Treex::PML::Document.
1186              
1187             =cut
1188              
1189              
1190             sub changeHint {
1191 2     2 1 7 my ($self,$val) = @_;
1192 2 50       6 return unless ref($self);
1193 2         5 return $self->[3]=$val;
1194             }
1195              
1196             =pod
1197              
1198             =item $document->pattern_count
1199              
1200             Return the number of display attribute patterns associated with this Treex::PML::Document.
1201              
1202             =cut
1203              
1204             sub pattern_count {
1205 0     0 1 0 my ($self) = @_;
1206 0 0       0 return ref($self) ? scalar(@{ $self->[4] }) : undef;
  0         0  
1207             }
1208              
1209             =item $document->pattern (n)
1210              
1211             Return n'th the display pattern associated with this Treex::PML::Document.
1212              
1213             =cut
1214              
1215              
1216             sub pattern {
1217 0     0 1 0 my ($self,$index) = @_;
1218 0 0       0 return ref($self) ? $self->[4]->[$index] : undef;
1219             }
1220              
1221             =item $document->patterns
1222              
1223             Return a list of display attribute patterns associated with this Treex::PML::Document.
1224              
1225             =cut
1226              
1227             sub patterns {
1228 11     11 1 30 my ($self) = @_;
1229 11 50       35 return ref($self) ? @{$self->[4]} : undef;
  11         45  
1230             }
1231              
1232             =pod
1233              
1234             =item $document->changePatterns (list)
1235              
1236             Change the list of display attribute patterns associated with this Treex::PML::Document.
1237              
1238             =cut
1239              
1240             sub changePatterns {
1241 2     2 1 4 my $self = shift;
1242 2 50       7 return unless ref($self);
1243 2         4 return @{$self->[4]}=@_;
  2         7  
1244             }
1245              
1246             =pod
1247              
1248             =item $document->tail
1249              
1250             Return the unparsed tail of the FS file (i.e. Graph's embedded macros).
1251              
1252             =cut
1253              
1254              
1255             sub tail {
1256 6     6 1 10 my ($self) = @_;
1257 6 50       13 return ref($self) ? @{$self->[5]} : undef;
  6         22  
1258             }
1259              
1260             =pod
1261              
1262             =item $document->changeTail (list)
1263              
1264             Modify the unparsed tail of the FS file (i.e. Graph's embedded macros).
1265              
1266             =cut
1267              
1268              
1269             sub changeTail {
1270 3     3 1 9 my $self = shift;
1271 3 50       10 return unless ref($self);
1272 3         5 return @{$self->[5]}=@_;
  3         11  
1273             }
1274              
1275             =pod
1276              
1277             =item $document->trees
1278              
1279             Return a list of all trees (i.e. their roots represented by FSNode objects).
1280              
1281             =cut
1282              
1283             ## Two methods to work with trees (for convenience)
1284             sub trees {
1285 6     6 1 3171 my ($self) = @_;
1286 6 50       24 return ref($self) ? @{$self->treeList} : undef;
  6         19  
1287             }
1288              
1289             =pod
1290              
1291             =item $document->changeTrees (list)
1292              
1293             Assign a new list of trees.
1294              
1295             =cut
1296              
1297             sub changeTrees {
1298 24     24 1 63 my $self = shift;
1299 24 50       80 return unless ref($self);
1300 24         70 return @{$self->treeList}=@_;
  24         83  
1301             }
1302              
1303             =pod
1304              
1305             =item $document->treeList
1306              
1307             Return a reference to the internal array of all trees (e.g. their
1308             roots represented by FSNode objects).
1309              
1310             =cut
1311              
1312             # returns a reference!!!
1313             sub treeList {
1314 70     70 1 182 my ($self) = @_;
1315 70 50       635 return ref($self) ? $self->[6] : undef;
1316             }
1317              
1318             =pod
1319              
1320             =item $document->tree (n)
1321              
1322             Return a reference to the tree number n.
1323              
1324             =cut
1325              
1326             # returns a reference!!!
1327             sub tree {
1328 2     2 1 593 my ($self,$n) = @_;
1329 2 50       17 return ref($self) ? $self->[6]->[$n] : undef;
1330             }
1331              
1332              
1333             =pod
1334              
1335             =item $document->lastTreeNo
1336              
1337             Return number of associated trees minus one.
1338              
1339             =cut
1340              
1341             sub lastTreeNo {
1342 0     0 1 0 my ($self) = @_;
1343 0 0       0 return ref($self) ? $#{$self->treeList} : undef;
  0         0  
1344             }
1345              
1346             =pod
1347              
1348             =item $document->notSaved (value?)
1349              
1350             Return/assign file saving status (this is completely user-driven).
1351              
1352             =cut
1353              
1354             sub notSaved {
1355 35     35 1 129 my ($self,$val) = @_;
1356              
1357 35 50       130 return unless ref($self);
1358 35 50       147 return $self->[7]=$val if (defined $val);
1359 0           return $self->[7];
1360             }
1361              
1362             =item $document->currentTreeNo (value?)
1363              
1364             Return/assign index of current tree (this is completely user-driven).
1365              
1366             =cut
1367              
1368             sub currentTreeNo {
1369 0     0 1   my ($self,$val) = @_;
1370              
1371 0 0         return unless ref($self);
1372 0 0         return $self->[8]=$val if (defined $val);
1373 0           return $self->[8];
1374             }
1375              
1376             =item $document->currentNode (value?)
1377              
1378             Return/assign current node (this is completely user-driven).
1379              
1380             =cut
1381              
1382             sub currentNode {
1383 0     0 1   my ($self,$val) = @_;
1384              
1385 0 0         return unless ref($self);
1386 0 0         return $self->[9]=$val if (defined $val);
1387 0           return $self->[9];
1388             }
1389              
1390             =pod
1391              
1392             =item $document->nodes (tree_no, prev_current, include_hidden)
1393              
1394             Get list of nodes for given tree. Returns two value list
1395             ($nodes,$current), where $nodes is a reference to a list of nodes for
1396             the tree and current is either root of the tree or the same node as
1397             prev_current if prev_current belongs to the tree. The list is sorted
1398             according to the ordering attribute (obtained from FS->order) and
1399             inclusion of hidden nodes (in the sense of FSFormat's hiding attribute
1400             FS->hide) depends on the boolean value of include_hidden.
1401              
1402             =cut
1403              
1404             sub nodes {
1405             # prepare value line and node list with deleted/saved hidden
1406             # and ordered by real Ord
1407              
1408 0     0 1   my ($document,$tree_no,$prevcurrent,$show_hidden)=@_;
1409 0           my @nodes=();
1410 0 0         return \@nodes unless ref($document);
1411              
1412              
1413 0 0         $tree_no=0 if ($tree_no<0);
1414 0 0         $tree_no=$document->lastTreeNo() if ($tree_no>$document->lastTreeNo());
1415              
1416 0           my $root=$document->treeList->[$tree_no];
1417 0           my $node=$root;
1418 0           my $current=$root;
1419              
1420 0           while($node) {
1421 0           push @nodes, $node;
1422 0 0         $current=$node if ($prevcurrent eq $node);
1423 0 0         $node=$show_hidden ? $node->following() : $node->following_visible($document->FS);
1424             }
1425              
1426 0           my $attr=$document->FS->order();
1427             # schwartzian transform
1428 0 0 0       if (defined($attr) or length($attr)) {
1429 6     6   3343 use sort 'stable';
  6         3225  
  6         40  
1430             @nodes =
1431 0           map { $_->[0] }
1432 0           sort { $a->[1] <=> $b->[1] }
1433 0           map { [$_, $_->get_member($attr) ] } @nodes;
  0            
1434             }
1435 0           return (\@nodes,$current);
1436             }
1437              
1438             =pod
1439              
1440             =item $document->value_line (tree_no, no_tree_numbers?)
1441              
1442             Return a sentence string for the given tree. Sentence string is a
1443             string of chained value attributes (FS->value) ordered according to
1444             the FS->sentord or FS->order if FS->sentord attribute is not defined.
1445              
1446             Unless no_tree_numbers is non-zero, prepend the resulting string with
1447             a "tree number/tree count: " prefix.
1448              
1449             =cut
1450              
1451             sub value_line {
1452 0     0 1   my ($document,$tree_no,$no_numbers)=@_;
1453 0 0         return unless $document;
1454              
1455 0 0         return ($no_numbers ? "" : ($tree_no+1)."/".($document->lastTreeNo+1).": ").
1456             join(" ",$document->value_line_list($tree_no));
1457             }
1458              
1459             =item $document->value_line_list (tree_no)
1460              
1461             Return a list of value (FS->value) attributes for the given tree
1462             ordered according to the FS->sentord or FS->order if FS->sentord
1463             attribute is not defined.
1464              
1465             =cut
1466              
1467             sub value_line_list {
1468 0     0 1   my ($document,$tree_no,$no_numbers,$wantnodes)=@_;
1469 0 0         return unless $document;
1470              
1471 0           my $node=$document->treeList->[$tree_no];
1472 0           my @sent=();
1473              
1474 0           my $sentord=$document->FS->sentord();
1475 0           my $val=$document->FS->value();
1476 0 0         $sentord=$document->FS->order() unless (defined($sentord));
1477              
1478             # if PML schemas are in use and one of the attributes
1479             # is an attr-path, we have to use $node->attr(...) instead of $node->{...}
1480             # (otherwise we optimize and use hash keys).
1481 0 0 0       if (($val=~m{/} or $sentord=~m{/}) and ref($document->metaData('schema'))) {
      0        
1482 0           while ($node) {
1483 0           my $value = $node->attr($val);
1484 0 0 0       push @sent,$node
      0        
1485             unless ($value eq '' or
1486             $value eq '???' or
1487             $node->attr($sentord)>=999); # this is a PDT-TR specific hack
1488 0           $node=$node->following();
1489             }
1490 0           @sent = sort { $a->attr($sentord) <=> $b->attr($sentord) } @sent;
  0            
1491 0 0         if ($wantnodes) {
1492 0           return (map { [$_->attr($val),$_] } @sent);
  0            
1493             } else {
1494 0           return (map { $_->attr($val) } @sent);
  0            
1495             }
1496             } else {
1497 0           while ($node) {
1498             push @sent,$node
1499             unless ($node->{$val} eq '' or
1500             $node->{$val} eq '???' or
1501 0 0 0       $node->{$sentord}>=999); # this is a PDT-TR specific hack
      0        
1502 0           $node=$node->following();
1503             }
1504 0           @sent = sort { $a->{$sentord} <=> $b->{$sentord} } @sent;
  0            
1505 0 0         if ($wantnodes) {
1506 0           return (map { [$_->{$val},$_] } @sent);
  0            
1507             } else {
1508 0           return (map { $_->{$val} } @sent);
  0            
1509             }
1510             }
1511             }
1512              
1513              
1514             =pod
1515              
1516             =item $document->insert_tree (root,position)
1517              
1518             Insert new tree at given position.
1519              
1520             =cut
1521              
1522             sub insert_tree {
1523 0     0 1   my ($self,$nr,$pos)=@_;
1524 0 0         splice(@{$self->treeList}, $pos, 0, $nr) if $nr;
  0            
1525 0           return $nr;
1526             }
1527              
1528             =pod
1529              
1530             =item $document->set_tree (root,pos)
1531              
1532             Set tree at given position.
1533              
1534             =cut
1535              
1536             sub set_tree {
1537 0     0 1   my ($self,$nr,$pos)=@_;
1538 0 0 0       croak('Usage: $document->set_tree(root,pos)') if !ref($nr) or ref($pos);
1539 0           $self->treeList->[$pos]=$nr;
1540 0           return $nr;
1541             }
1542              
1543             =item $document->append_tree (root)
1544              
1545             Append tree at given position.
1546              
1547             =cut
1548              
1549             sub append_tree {
1550 0     0 1   my ($self,$nr)=@_;
1551 0 0         croak('Usage: $document->append_tree(root,pos)') if !ref($nr);
1552 0           push @{$self->treeList},$nr;
  0            
1553 0           return $nr;
1554             }
1555              
1556              
1557             =pod
1558              
1559             =item $document->new_tree (position)
1560              
1561             Create a new tree at given position and return pointer to its root.
1562              
1563             =cut
1564              
1565             sub new_tree {
1566 0     0 1   my ($self,$pos)=@_;
1567              
1568 0           my $nr=Treex::PML::Factory->createNode(); # creating new root
1569 0           $self->insert_tree($nr,$pos);
1570 0           return $nr;
1571              
1572             }
1573              
1574             =item $document->delete_tree (position)
1575              
1576             Delete the tree at given position and return pointer to its root.
1577              
1578             =cut
1579              
1580             sub delete_tree {
1581 0     0 1   my ($self,$pos)=@_;
1582 0           my ($root)=splice(@{$self->treeList}, $pos, 1);
  0            
1583 0           return $root;
1584             }
1585              
1586             =item $document->destroy_tree (position)
1587              
1588             Delete the tree on a given position and destroy its content (the root and all its descendant nodes).
1589              
1590             =cut
1591              
1592             sub destroy_tree {
1593 0     0 1   my ($self,$pos)=@_;
1594 0           my $root=$self->delete_tree($pos);
1595 0 0         return unless $root;
1596 0           $root->destroy;
1597 0           return 1;
1598             }
1599              
1600             =item $document->swap_trees (position1,position2)
1601              
1602             Swap the trees on given positions in the tree list.
1603             The positions must be between 0 and lastTreeNo inclusive.
1604              
1605             =cut
1606              
1607             sub swap_trees {
1608 0     0 1   my ($self,$pos1,$pos2)=@_;
1609 0           my $tree_list = $self->treeList;
1610 0 0 0       unless (defined($pos1) and 0<=$pos1 and $pos1<=$self->lastTreeNo and
      0        
      0        
      0        
      0        
1611             defined($pos2) and 0<=$pos2 and $pos2<=$self->lastTreeNo) {
1612 0           croak("Fsfile->delete_tree(position1,position2): The positions must be between 0 and lastTreeNo inclusive!");
1613             }
1614 0 0         return if $pos1 == $pos2;
1615 0           my $root1 = $tree_list->[$pos1];
1616 0           $tree_list->[$pos1]=$tree_list->[$pos2];
1617 0           $tree_list->[$pos2]=$root1;
1618 0           return;
1619             }
1620              
1621             =item $document->move_tree_to (position1,position2)
1622              
1623             Move the tree on position1 in the tree list so that its position after
1624             the move is position2.
1625             The positions must be between 0 and lastTreeNo inclusive.
1626              
1627             =cut
1628              
1629             sub move_tree_to {
1630 0     0 1   my ($self,$pos1,$pos2)=@_;
1631 0 0 0       unless (defined($pos1) and 0<=$pos1 and $pos1<=$self->lastTreeNo and
      0        
      0        
      0        
      0        
1632             defined($pos2) and 0<=$pos2 and $pos2<=$self->lastTreeNo) {
1633 0           croak("Fsfile->delete_tree(position1,position2): The positions must be between 0 and lastTreeNo inclusive!");
1634             }
1635 0 0         return if $pos1 == $pos2;
1636 0           my $root = $self->delete_tree($pos1);
1637 0           $self->insert_tree($root,$pos2);
1638 0           return $root;
1639             }
1640              
1641             =item $document->test_tree_type ( root_type )
1642              
1643             This method can be used before a C or a similar operation
1644             to test if the root node provided as an argument is of a type valid
1645             for this Treex::PML::Document. More specifically, return 1 if the current file is
1646             not associated with a PML schema or if the tree list represented by
1647             PML list or sequence with the role #TREES permits members of the type
1648             of C. Otherwise return 0.
1649              
1650             A type-declaration object can be passed directly instead of
1651             C.
1652              
1653             =cut
1654              
1655             sub test_tree_type {
1656 0     0 1   my ($self, $obj) = @_;
1657 0 0         die 'Usage: $document->test_tree_type($node_or_decl)' unless ref($obj);
1658 0           my $type = $self->metaData('pml_trees_type');
1659 0 0         return 1 unless $type;
1660 0 0         if (UNIVERSAL::DOES::does($obj,'Treex::PML::Schema::Decl')) {
1661 0 0         if ($obj->get_decl_type == PML_TYPE_DECL) {
1662             # a named type decl passed, no problem
1663 0           $obj = $obj->get_content_decl;
1664             }
1665             } else {
1666             # assume it's a node
1667 0           $obj = $obj->type;
1668 0 0         return 0 unless $obj;
1669             }
1670 0           my $type_is = $type->get_decl_type;
1671 0 0         if ($type_is == PML_ELEMENT_DECL) {
    0          
1672 0           $type = $type->get_content_decl;
1673 0           $type_is = $type->get_decl_type;
1674             } elsif ($type_is == PML_MEMBER_DECL) {
1675 0           $type = $type->get_content_decl;
1676 0           $type_is = $type->get_decl_type;
1677             }
1678              
1679 0 0         if ($type_is == PML_SEQUENCE_DECL) {
    0          
1680 0 0         return 1 if $type->find_elements_by_content_decl($obj);
1681             } elsif ($type_is == PML_LIST_DECL) {
1682 0 0         return 1 if $type->get_content_decl == $obj;
1683             }
1684             }
1685              
1686             sub _can_have_children {
1687 0     0     my ($parent_decl)=@_;
1688 0 0         return unless $parent_decl;
1689 0           my $parent_decl_type = $parent_decl->get_decl_type;
1690 0 0         if ($parent_decl_type == PML_ELEMENT_DECL()) {
1691 0           $parent_decl = $parent_decl->get_content_decl;
1692 0           $parent_decl_type = $parent_decl->get_decl_type;
1693             }
1694 0 0         if ($parent_decl_type == PML_STRUCTURE_DECL()) {
    0          
1695 0 0         return 1 if $parent_decl->find_members_by_role('#CHILDNODES');
1696             } elsif ($parent_decl_type == PML_CONTAINER_DECL()) {
1697 0           my $content_decl = $parent_decl->get_content_decl;
1698 0 0 0       return 1 if $content_decl and $content_decl->get_role eq '#CHILDNODES';
1699             }
1700 0           return 0;
1701             }
1702              
1703              
1704              
1705             =item $document->determine_node_type ( node, { choose_command => sub{...} } )
1706              
1707             If the node passed already has a PML type, the type is returned.
1708              
1709             Otherwise this method tries to determine and set the PML type of the current
1710             node based on the type of its parent and possibly the node's '#name'
1711             attribute.
1712              
1713             If the node type cannot be determined, the method dies.
1714              
1715             If more than one type is possible for the node, the method first tries
1716             to run a callback routine passed in the choose_command option (if
1717             available) passing it three arguments: the $document, $node and an ARRAY
1718             reference of possible types. If the callback returns back one of the
1719             types, it is assigned to the node. Otherwise no type is assigned and
1720             the method returns a list of possible node types.
1721              
1722             =cut
1723              
1724             sub determine_node_type {
1725 0     0 1   my ($document,$node,$opts)=@_;
1726 0           my $type = $node->type;
1727 0 0         return $type if $type;
1728 0           my $ntype;
1729             my @ntypes;
1730 0 0         my $has_children = $node->firstson ? 1 : 0;
1731 0 0         if ($node->parent) {
1732             # is parent's type known?
1733 0           my $parent_decl = $node->parent->type;
1734 0 0         if (ref($parent_decl)) {
1735             # ok, find #CHILDNODES
1736 0           my $parent_decl_type = $parent_decl->get_decl_type;
1737 0           my $member_decl;
1738 0 0         if ($parent_decl_type == PML_STRUCTURE_DECL()) {
    0          
1739 0           ($member_decl) = map { $_->get_content_decl }
  0            
1740             $parent_decl->find_members_by_role('#CHILDNODES');
1741             } elsif ($parent_decl_type == PML_CONTAINER_DECL()) {
1742 0           $member_decl = $parent_decl->get_content_decl;
1743 0 0 0       undef $member_decl unless $member_decl and $member_decl->get_role eq '#CHILDNODES';
1744             }
1745 0 0         if ($member_decl) {
1746 0           my $member_decl_type = $member_decl->get_decl_type;
1747 0 0         if ($member_decl_type == PML_LIST_DECL()) {
    0          
1748 0           $ntype = $member_decl->get_content_decl;
1749 0 0 0       undef $ntype unless $ntype and $ntype->get_role eq '#NODE'
      0        
      0        
1750             and (!$has_children or _can_have_children($ntype));
1751             } elsif ($member_decl_type == PML_SEQUENCE_DECL()) {
1752             my $elements =
1753             @ntypes =
1754 0 0         grep { !$has_children or _can_have_children($_->[1]) }
1755 0           grep { $_->[1]->get_role eq '#NODE' }
1756 0           map { [ $_->get_name, $_->get_content_decl ] }
  0            
1757             $member_decl->get_elements;
1758 0 0         if (defined $node->{'#name'}) {
1759 0           ($ntype) = grep { $_->[0] eq $node->{'#name'} } @ntypes;
  0            
1760 0 0         $ntype=$ntype->[1] if $ntype;
1761             }
1762             } else {
1763 0           die "I'm confused - found role #CHILDNODES on a ".$member_decl->get_decl_path().", which is neither a list nor a sequence...\n";
1764             }
1765             }
1766             } else {
1767             # ask the user to set the type of the parent first
1768 0           die("Parent node type is unknown.\nYou must assign node-type to the parent node first!");
1769 0           return;
1770             }
1771             } else {
1772             # find #TREES sequence representing the tree list
1773 0           my @tree_types;
1774 0 0         if (ref $document) {
1775 0           my $pml_trees_type = $document->metaData('pml_trees_type');
1776 0 0         if (ref $pml_trees_type) {
1777 0           @tree_types = ($pml_trees_type);
1778             } else {
1779 0           my $schema = $document->metaData('schema');
1780 0           @tree_types = $schema->find_types_by_role('#TREES');
1781             }
1782             }
1783 0           foreach my $tt (@tree_types) {
1784 0 0         if (!ref($tt)) {
1785 0           die("I'm confused - found role #TREES on something which is neither a list nor a sequence: $tt\n");
1786             }
1787 0           my $tt_is = $tt->get_decl_type;
1788 0 0 0       if ($tt_is == PML_ELEMENT_DECL or $tt_is == PML_MEMBER_DECL or $tt_is == PML_TYPE_DECL) {
      0        
1789 0           $tt = $tt->get_content_decl;
1790 0           $tt_is = $tt->get_decl_type;
1791             }
1792              
1793 0 0         if ($tt_is == PML_LIST_DECL()) {
    0          
1794 0           $ntype = $tt->get_content_decl;
1795 0 0 0       undef $ntype unless $ntype and $ntype->get_role eq '#NODE'
      0        
      0        
1796             and (!$has_children or _can_have_children($ntype));
1797             } elsif ($tt_is == PML_SEQUENCE_DECL()) {
1798             my $elements =
1799             @ntypes =
1800 0 0         grep { !$has_children or _can_have_children($_->[1]) }
1801 0           grep { $_->[1]->get_role eq '#NODE' }
1802 0           map { [ $_->get_name, $_->get_content_decl ] }
  0            
1803             $tt->get_elements;
1804 0 0         if (defined $node->{'#name'}) {
1805 0           ($ntype) = grep { $_->[0] eq $node->{'#name'} } @ntypes;
  0            
1806 0 0         $ntype=$ntype->[1] if $ntype;
1807             }
1808             } else {
1809 0           die ("I'm confused - found role #TREES on something which is neither a list nor a sequence: $tt\n");
1810             }
1811             }
1812             }
1813 0           my $base_type;
1814 0 0         if ($ntype) {
    0          
    0          
1815 0           $base_type = $ntype;
1816 0           $node->set_type($base_type);
1817             } elsif (@ntypes == 1) {
1818 0           $node->{'#name'} = $ntypes[0][0];
1819 0           $base_type = $ntypes[0][1];
1820 0           $node->set_type($base_type);
1821             } elsif (@ntypes > 1) {
1822 0           my $i = 1;
1823 0 0 0       if (ref($opts) and $opts->{choose_command}) {
1824 0           my $type = $opts->{choose_command}->($document,$node,[@ntypes]);
1825 0 0 0       if ($type and grep { $_==$type } @ntypes) {
  0            
1826 0           $node->set_type($type->[1]);
1827 0           $node->{'#name'} = $type->[0];
1828 0           $base_type=$node->type;
1829             } else {
1830 0           return;
1831             }
1832             }
1833             } else {
1834 0           die("Cannot determine node type: schema does not allow nodes on this level...\n");
1835 0           return;
1836             }
1837 0           return $node->type;
1838             }
1839              
1840             =back
1841              
1842             =cut
1843              
1844             =head1 SEE ALSO
1845              
1846             L, L, L, L
1847              
1848             =head1 COPYRIGHT AND LICENSE
1849              
1850             Copyright (C) 2006-2010 by Petr Pajas
1851              
1852             This library is free software; you can redistribute it and/or modify
1853             it under the same terms as Perl itself, either Perl version 5.8.2 or,
1854             at your option, any later version of Perl 5 you may have available.
1855              
1856             =cut
1857              
1858              
1859             1;