File Coverage

blib/lib/Treex/PML/Document.pm
Criterion Covered Total %
statement 1 3 33.3
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 2 4 50.0


line stmt bran cond sub pod time code
1             package Treex::PML::Document;
2              
3             ############################################################
4             #
5             # FS File
6             # =========
7             #
8             #
9 1     1   434 use Treex::PML::Schema;
  0            
  0            
10             use Carp;
11             use strict;
12              
13             use vars qw($VERSION);
14             BEGIN {
15             $VERSION='2.21'; # version template
16             }
17             use URI;
18             use URI::file;
19             use Cwd;
20             use Treex::PML::FSFormat;
21             use Treex::PML::Backend::FS;
22             use Treex::PML::Node;
23             use Treex::PML::Factory;
24              
25             use Scalar::Util qw(blessed weaken);
26             use UNIVERSAL::DOES;
27              
28             =head1 NAME
29              
30             Treex::PML::Document - Treex::PML class representing a document consisting of a set of trees.
31              
32             =head1 DESCRIPTION
33              
34             This class implements a document consisting of a set of trees. The
35             document may be associated with a FS format and a PML schema and can
36             contain additional meta data, application data, and user data
37             (implemented as name/value paris).
38              
39             For backward compatibility, a the document may also contain data
40             related with the FS format, e.g. a patterns and tail.
41              
42             =head1 METHODS
43              
44             =over 4
45              
46             =cut
47              
48             =item Treex::PML::Document->load (filename,\%opts ?)
49              
50             NOTE: Don't call this method as a constructor directly, use Treex::PML::Factory->createDocumentFromFile() instead!
51              
52             Load a Treex::PML::Document object from a given file. If called as a class
53             method, a new instance is created, otherwise the current instance is
54             reinitialized and reused. The method returns the instance or dies
55             (using Carp::croak) if loading fails (unless option C is set,
56             see below).
57              
58             Loading options can be passed as a HASH reference in the second
59             argument. The following keys are supported:
60              
61             =over 8
62              
63             =item backends
64              
65             An ARRAY reference of IO backend names (previously imported using
66             C). These backends are tried additionally to
67             Treex::PML::Backend::FS. If not given, the backends previously selected using
68             C or C are used instead.
69              
70             =item encoding
71              
72             A name of character set (encoding) to be used by text-based I/O
73             backends such as Treex::PML::Backend::FS.
74              
75             =item recover
76              
77             If true, the method returns normally in case of loading failure, but
78             sets the global variable C<$Treex::PML::FSError> to the value return value
79             of C, indicating the error.
80              
81             =back
82              
83             =cut
84              
85             sub load {
86             my ($class,$filename,$opts) = @_;
87             $opts||={};
88             my $new=ref($class) ? $class : $class->new();
89             # the second arg may/may not be encoding string
90             $new->changeEncoding($opts->{encoding}) if $opts->{encoding};
91             my $error = $new->readFile($filename,@{$opts->{backends} || \@Treex::PML::BACKENDS});
92             if ($opts->{recover}) {
93             $Treex::PML::FSError = $error;
94             return $new;
95             } elsif ($error == 1) {
96             croak("Loading file '$filename' failed: no suitable backend!");
97             } elsif ($error) {
98             croak("Loading file '$filename' failed, possible error: $!");
99             } else {
100             return $new;
101             }
102             }
103              
104              
105              
106             # # Treex::PML::Document->newFSFile (filename,encoding?,\@backends)
107              
108             # This is an obsolete interface for loading a Treex::PML::Document from file.
109             # It is recommended to use Treex::PML::Document->load() instad.
110              
111             # This method retruns the new instance. The value of $Treex::PML::FSError
112             # contains the return value of $document->readFile and should be used to
113             # check for errors.
114              
115             # #
116              
117             sub newFSFile {
118             my ($self,$filename) = (shift,shift);
119             my $new=$self->new();
120             # the second arg may/may not be encoding string
121             $new->changeEncoding(shift) unless ref($_[0]);
122             $Treex::PML::FSError=$new->readFile($filename,@_);
123             return $new;
124             }
125              
126             =pod
127              
128             =item Treex::PML::Document->new (name?, file_format?, FS?, hint_pattern?, attribs_patterns?, unparsed_tail?, trees?, save_status?, backend?, encoding?, user_data?, meta_data?, app_data?)
129              
130             Creates and returns a new FS file object based on the given values
131             (optional). For use with arguments, it is more convenient to use the
132             method C instead.
133              
134             NOTE: Don't call this constructor directly, use Treex::PML::Factory->createDocument() instead!
135              
136             =cut
137              
138             sub new {
139             my $self = shift;
140             if (@_==1 and ref($_[0]) eq 'HASH') {
141             return $self->create($_[0]);
142             }
143             my $class = ref($self) || $self;
144             my $new = [];
145             bless $new, $class;
146             $new->initialize(@_);
147             return $new;
148             }
149              
150             =pod
151              
152             =item Treex::PML::Document->new({ argument => value, ... })
153              
154             or
155              
156             =item Treex::PML::Document->create({ argument => value, ... })
157              
158             NOTE: Don't call this constructor directly, use Treex::PML::Factory->createDocument() instead!
159              
160             Creates and returns a new empty Treex::PML::Document object based on the
161             given parameters. This method accepts argument => value pairs as
162             arguments. The following arguments are available:
163              
164             name, format, FS, hint, patterns, tail, trees, save_status, backend
165              
166             See C for more details.
167              
168              
169             =cut
170              
171             sub create {
172             my $self = shift;
173             my $args = (@_==1 and ref($_[0])) ? $_[0] : { @_ };
174             if (exists $args->{filename}) {
175             croak(__PACKAGE__."->create: Unknown parameter 'filename'\n");
176             }
177             return $self->new(@{$args}{qw(name format FS hint patterns tail trees save_status backend encoding user_data meta_data app_data)});
178             }
179              
180              
181             =item $document->clone ($clone_trees)
182              
183             Create a new Treex::PML::Document object with the same file name, file
184             format, meta data, FSFormat, backend, encoding, patterns, hint and
185             tail as the current Treex::PML::Document. If $clone_trees is true,
186             populate the new Treex::PML::Document object with clones of all trees
187             from the current Treex::PML::Document.
188              
189             =cut
190              
191             sub clone {
192             my ($self, $deep)=@_;
193             my $fs=$self->FS;
194             my $new = ref($self)->create(
195             name => $self->filename,
196             format => $self->fileFormat,
197             FS => $fs->clone,
198             trees => [],
199             backend => $self->backend,
200             encoding => $self->encoding,
201             hint => $self->hint,
202             patterns => [ $self->patterns() ],
203             tail => $self->tail
204             );
205             # clone metadata
206             if (ref($self->[13])) {
207             $new->[13] = Treex::PML::CloneValue($self->[13]);
208             }
209             if ($deep) {
210             @{$new->treeList} = map { $fs->clone_subtree($_) } $self->trees();
211             }
212             return $new;
213             }
214              
215             sub _weakenLinks {
216             my ($self) = @_;
217             foreach my $tree (@{$self->treeList}) {
218             Treex::PML::_WeakenLinks($tree);
219             }
220             }
221              
222             sub DESTROY {
223             my ($self) = @_;
224             return unless ref($self);
225             # this is not needed if all links are weak
226             $_->destroy() for (@{$self->treeList});
227             undef @$self;
228             }
229              
230             =pod
231              
232             =item $document->initialize (name?, file_format?, FS?, hint_pattern?, attribs_patterns?, unparsed_tail?, trees?, save_status?, backend?, encoding?, user_data?, meta_data?, app_data?)
233              
234             Initialize a FS file object. Argument description:
235              
236             =over 4
237              
238             =item name (scalar)
239              
240             File name
241              
242             =item file_format (scalar)
243              
244             File format identifier (user-defined string). TrEd, for example, uses
245             C, C and C strings as identifiers.
246              
247             =item FS (FSFormat)
248              
249             FSFormat object associated with the file
250              
251             =item hint_pattern (scalar)
252              
253             hint pattern definition (used by TrEd)
254              
255             =item attribs_patterns (list reference)
256              
257             embedded stylesheet patterns (used by TrEd)
258              
259             =item unparsed_tail (list reference)
260              
261             The rest of the file, which is not parsed by Treex::PML, i.e. Graph's embedded macros
262              
263             =item trees (list reference)
264              
265             List of FSNode objects representing root nodes of all trees in the Treex::PML::Document.
266              
267             =item save_status (scalar)
268              
269             File save status indicator, 0=file is saved, 1=file is not saved (TrEd
270             uses this field).
271              
272             =item backend (scalar)
273              
274             IO Backend used to open/save the file.
275              
276             =item encoding (scalar)
277              
278             IO character encoding for perl 5.8 I/O filters
279              
280             =item user_data (arbitrary scalar type)
281              
282             Reserved for the user. Content of this slot is not persistent.
283              
284             =item meta_data (hashref)
285              
286             Meta data (usually used by IO Backends to store additional information
287             about the file - i.e. other than encoding, trees, patterns, etc).
288              
289             =item app_data (hashref)
290              
291             Non-persistent application specific data associated with the file (by
292             default this is an empty hash reference). Applications may store
293             temporary data associated with the file into this hash.
294              
295             =back
296              
297              
298             =cut
299              
300             sub initialize {
301             my $self = shift;
302             # what will we do here ?
303             $self->[1] = $_[1]; # file format (scalar)
304             $self->[2] = ref($_[2]) ? $_[2] : Treex::PML::Factory->createFSFormat(); # FS format (FSFormat object)
305             $self->[3] = $_[3]; # hint pattern
306             $self->[4] = ref($_[4]) eq 'ARRAY' ? $_[4] : []; # list of attribute patterns
307             $self->[5] = ref($_[5]) eq 'ARRAY' ? $_[5] : []; # unparsed rest of a file
308             $self->[6] = UNIVERSAL::isa($_[6],'ARRAY') ?
309             Treex::PML::Factory->createList($_[6],1) :
310             Treex::PML::Factory->createList(); # trees
311             $self->[7] = $_[7] ? $_[7] : 0; # notsaved
312             $self->[8] = undef; # storage for current tree number
313             $self->[9] = undef; # storage for current node
314             $self->[10] = $_[8] ? $_[8] : 'Treex::PML::Backend::FS'; # backend;
315             $self->[11] = $_[9] ? $_[9] : undef; # encoding;
316             $self->[12] = $_[10] ? $_[10] : {}; # user data
317             $self->[13] = $_[11] ? $_[11] : {}; # meta data
318             $self->[14] = $_[12] ? $_[12] : {}; # app data
319              
320             $self->[15] = undef;
321             if (defined $_[0]) {
322             $self->changeURL($_[0]);
323             } else {
324             $self->[0] = undef;
325             }
326             return ref($self) ? $self : undef;
327             }
328              
329             =pod
330              
331             =item $document->readFile ($filename, \@backends)
332              
333             NOTE: Don't call this constructor directly, use Treex::PML::Factory->createDocumentFromFile() instead!
334              
335             Read a document from a given file. The first argument
336             must be a file-name. The second argument may be a list reference
337             consisting of names of I/O backends. If no backends are given, only
338             the Treex::PML::Backend::FS is used. For each I/O backend, C tries to
339             execute the C function from the appropriate class in the order
340             in which the backends were specified, passing it the filename as an
341             argument. The first I/O backend whose C function returns 1 is
342             then used to read the file.
343              
344             Note: this function sets noSaved to zero.
345              
346             Return values:
347             0 - succes
348             1 - no suitable backend
349             -1 - backend failed
350              
351             =cut
352              
353             sub readFile {
354             my ($self,$url) = (shift,shift);
355             my @backends = UNIVERSAL::isa($_[0],'ARRAY') ? @{$_[0]} : scalar(@_) ? @_ : qw(Treex::PML::Backend::FS);
356             my $ret = 1;
357             croak("readFile is not a class method") unless ref($self);
358             $url =~ s/^\s*|\s*$//g;
359             my ($file,$remove_file) = eval { Treex::PML::IO::fetch_file($url) };
360             print STDERR "Actual file: $file\n" if $Treex::PML::Debug;
361             return -1 if $@;
362             foreach my $backend (@backends) {
363             print STDERR "Trying backend $backend: " if $Treex::PML::Debug;
364             $backend = Treex::PML::BackendCanRead($backend);
365             if ($backend &&
366             eval {
367             no strict 'refs';
368             &{"${backend}::test"}($file,$self->encoding);
369             }) {
370             $self->changeBackend($backend);
371             $self->changeFilename($url);
372             print STDERR "success\n" if $Treex::PML::Debug;
373             eval {
374             no strict 'refs';
375             my $fh;
376             print STDERR "calling ${backend}::open_backend\n" if $Treex::PML::Debug;
377             $fh = &{"${backend}::open_backend"}($file,"r",$self->encoding);
378             &{"${backend}::read"}($fh,$self);
379             &{"${backend}::close_backend"}($fh) || warn "Close failed.\n";
380             };
381             if ($@) {
382             print STDERR "Error occured while reading '$url' using backend ${backend}:\n";
383             my $err = $@; chomp $err;
384             print STDERR "$err\n";
385             $ret = -1;
386             } else {
387             $ret = 0;
388             }
389             $self->notSaved(0);
390             last;
391             }
392             print STDERR "fail\n" if $Treex::PML::Debug;
393             # eval {
394             # no strict 'refs';
395             # print STDERR "TEST",$backend->can('test'),"\n";
396             # print STDERR "READ",$backend->can('read'),"\n";
397             # print STDERR "OPEN",$backend->can('open_backend'),"\n";
398             # print STDERR "REAL_TEST($file): ",&{"${backend}::test"}($file,$self->encoding),"\n";
399             # } if $Treex::PML::Debug;
400             if ($@) {
401             my $err = $@; chomp $err;
402             print STDERR "$err\n";
403             }
404             }
405             if ($ret == 1) {
406             my $err = "Unknown file type (all IO backends failed): $url\n";
407             $@.="\n".$err;
408             }
409             if ($url ne $file and $remove_file) {
410             local $!;
411             unlink $file || warn "couldn't unlink tmp file $file: $!\n";
412             }
413             return $ret;
414             }
415              
416             =pod
417              
418             =item $document->save ($filename?)
419              
420             Save Treex::PML::Document object to a given file using the corresponding I/O backend
421             (see $document->changeBackend) and set noSaved to zero.
422              
423             =item $document->writeFile ($filename?)
424              
425             This is just an alias for $document->save($filename).
426              
427             =cut
428              
429             sub writeFile {
430             my ($self,$filename) = @_;
431             return unless ref($self);
432              
433             $filename = $self->filename unless (defined($filename) and $filename ne "");
434             my $backend=$self->backend || 'Treex::PML::Backend::FS';
435             print STDERR "Writing to $filename using backend $backend\n" if $Treex::PML::Debug;
436             my $ret;
437             #eval {
438             no strict 'refs';
439              
440             my $fh;
441             $backend = Treex::PML::BackendCanWrite($backend) || die "Backend $backend is not loaded or does not support writing\n";
442             ($fh=&{"${backend}::open_backend"}($filename,"w",$self->encoding)) || die "Open failed on '$filename' using backend $backend\n";
443             $ret=&{"${backend}::write"}($fh,$self) || die "Write to '$filename' failed using backend $backend\n";
444             &{"${backend}::close_backend"}($fh) || die "Closing file '$filename' failed using backend $backend\n";
445             #};
446             #if ($@) {
447             # print STDERR "Error: $@\n";
448             # return 0;
449             #}
450             $self->notSaved(0) if $ret;
451             return $ret;
452             }
453              
454             BEGIN {
455             *save = \&writeFile;
456             }
457              
458             =item $document->writeTo (glob_ref)
459              
460             Write FS declaration, trees and unparsed tail to a given file (file handle open for
461             reading must be passed as a GLOB reference). Sets noSaved to zero.
462              
463             =cut
464              
465             sub writeTo {
466             my ($self,$fileref) = @_;
467             return unless ref($self);
468              
469             my $backend=$self->backend || 'Treex::PML::Backend::FS';
470             print STDERR "Writing using backend $backend\n" if $Treex::PML::Debug;
471             my $ret;
472             eval {
473             no strict 'refs';
474             # require $backend;
475             $ret=$backend->can('write') && &{"${backend}::write"}($fileref,$self);
476             };
477             print STDERR "$@\n" if $@;
478             return $ret;
479             }
480              
481             =pod
482              
483             =item $document->filename
484              
485             Return the FS file's file name. If the actual file name is a file:// URL,
486             convert it to system path and return it. If it is a different type of URL,
487             return the corresponding URI object.
488              
489             =cut
490              
491              
492             #
493             # since URI::file->file is expensive, we cache the value in $self->[15]
494             #
495             # $self->[0] should always be an URI object (if not, we upgrade it)
496             #
497             #
498              
499              
500             sub filename {
501             my ($self) = @_;
502             return unless $self;
503              
504             my $filename = $self->[15]; # cached filename
505             if (defined $filename) {
506             return $filename
507             }
508             $filename = $self->[0] or return undef; # URI
509             if (!ref($filename)) {
510             $self->[15] = undef; # clear cache
511             $filename = $self->[0] = Treex::PML::IO::make_URI($filename);
512             }
513             if ((blessed($filename) and $filename->isa('URI::file'))) {
514             return ($self->[15] = $filename->file);
515             }
516             return $filename;
517             }
518              
519             =item $document->URL
520              
521             Return the FS file's URL as URI object.
522              
523             =cut
524              
525              
526             sub URL {
527             my ($self) = @_;
528             my $filename = $self->[0];
529             if ($filename and !(blessed($filename) and $filename->isa('URI'))) {
530             $self->[15]=undef;
531             return ($self->[0] = Treex::PML::IO::make_URI($filename));
532             }
533             return $filename;
534             }
535              
536             =pod
537              
538             =item $document->changeFilename (new_filename)
539              
540             Change the FS file's file name.
541              
542             =cut
543              
544              
545             sub changeFilename {
546             my ($self,$val) = @_;
547             return unless ref($self);
548             my $uri = $self->[0] = Treex::PML::IO::make_abs_URI($val);
549             $self->[15]=undef; # clear cache
550             return $uri;
551             }
552              
553             =item $document->changeURL (uri)
554              
555             Like changeFilename, but does not attempt to absoultize the filename.
556             The argument must be an absolute URL (preferably URI object).
557              
558             =cut
559              
560              
561             sub changeURL {
562             my ($self,$val) = @_;
563             return unless ref($self);
564             my $url = $self->[0] = Treex::PML::IO::make_URI($val);
565             $self->[15]=undef;
566             return $url;
567             }
568              
569             =pod
570              
571             =item $document->fileFormat
572              
573             Return file format identifier (user-defined string). TrEd, for
574             example, uses C, C and C
575             non-specific format> strings as identifiers.
576              
577             =cut
578              
579             sub fileFormat {
580             my ($self) = @_;
581             return ref($self) ? $self->[1] : undef;
582             }
583              
584             =pod
585              
586             =item $document->changeFileFormat (string)
587              
588             Change file format identifier.
589              
590             =cut
591              
592             sub changeFileFormat {
593             my ($self,$val) = @_;
594             return unless ref($self);
595             return $self->[1]=$val;
596             }
597              
598             =pod
599              
600             =item $document->backend
601              
602             Return IO backend module name. The default backend is Treex::PML::Backend::FS, used
603             to save files in the FS format.
604              
605             =cut
606              
607             sub backend {
608             my ($self) = @_;
609             return ref($self) ? $self->[10] : undef;
610             }
611              
612             =pod
613              
614             =item $document->changeBackend (string)
615              
616             Change file backend.
617              
618             =cut
619              
620             sub changeBackend {
621             my ($self,$val) = @_;
622             return unless ref($self);
623             return $self->[10]=$val;
624             }
625              
626             =pod
627              
628             =item $document->encoding
629              
630             Return file character encoding (used by Perl 5.8 input/output filters).
631              
632             =cut
633              
634             sub encoding {
635             my ($self) = @_;
636             return ref($self) ? $self->[11] : undef;
637             }
638              
639             =pod
640              
641             =item $document->changeEncoding (string)
642              
643             Change file character encoding (used by Perl 5.8 input/output filters).
644              
645             =cut
646              
647             sub changeEncoding {
648             my ($self,$val) = @_;
649             return unless ref($self);
650             return $self->[11]=$val;
651             }
652              
653              
654             =pod
655              
656             =item $document->userData
657              
658             Return user data associated with the file (by default this is an empty
659             hash reference). User data are not supposed to be persistent and IO
660             backends should ignore it.
661              
662             =cut
663              
664             sub userData {
665             my ($self) = @_;
666             return ref($self) ? $self->[12] : undef;
667             }
668              
669             =pod
670              
671             =item $document->changeUserData (value)
672              
673             Change user data associated with the file. User data are not supposed
674             to be persistent and IO backends should ignore it.
675              
676             =cut
677              
678             sub changeUserData {
679             my ($self,$val) = @_;
680             return unless ref($self);
681             return $self->[12]=$val;
682             }
683              
684             =pod
685              
686             =item $document->metaData (name)
687              
688             Return meta data stored into the object usually by IO backends. Meta
689             data are supposed to be persistent, i.e. they are saved together with
690             the file (at least by some IO backends).
691              
692             =cut
693              
694             sub metaData {
695             my ($self,$name) = @_;
696             return ref($self) ? $self->[13]->{$name} : undef;
697             }
698              
699             =pod
700              
701             =item $document->changeMetaData (name,value)
702              
703             Change meta information (usually used by IO backends). Meta data are
704             supposed to be persistent, i.e. they are saved together with the file
705             (at least by some IO backends).
706              
707             =cut
708              
709             sub changeMetaData {
710             my ($self,$name,$val) = @_;
711             return unless ref($self);
712             return $self->[13]->{$name}=$val;
713             }
714              
715             =item $document->listMetaData (name)
716              
717             In array context, return the list of metaData keys. In scalar context
718             return the hash reference where metaData are stored.
719              
720             =cut
721              
722             sub listMetaData {
723             my ($self) = @_;
724             return unless ref($self);
725             return wantarray ? keys(%{$self->[13]}) : $self->[13];
726             }
727              
728             =item $document->appData (name)
729              
730             Return application specific information associated with the
731             file. Application data are not persistent, i.e. they are not saved
732             together with the file by IO backends.
733              
734             =cut
735              
736             sub appData {
737             my ($self,$name) = @_;
738             return ref($self) ? $self->[14]->{$name} : undef;
739             }
740              
741             =pod
742              
743             =item $document->changeAppData (name,value)
744              
745             Change application specific information associated with the
746             file. Application data are not persistent, i.e. they are not saved
747             together with the file by IO backends.
748              
749             =cut
750              
751             sub changeAppData {
752             my ($self,$name,$val) = @_;
753             return unless ref($self);
754             return $self->[14]->{$name}=$val;
755             }
756              
757             =item $document->listAppData (name)
758              
759             In array context, return the list of appData keys. In scalar context
760             return the hash reference where appData are stored.
761              
762             =cut
763              
764             sub listAppData {
765             my ($self) = @_;
766             return unless ref($self);
767             return wantarray ? keys(%{$self->[14]}) : $self->[13];
768             }
769              
770             =pod
771              
772              
773             =item $document->schema
774              
775             Return a reference to the associated PML schema (if any). Note: The
776             pointer to the schema is stored in the metaData field 'schema'.
777              
778             =cut
779              
780             sub schema {
781             my($self)=@_;
782             return $self->metaData('schema');
783             }
784              
785             =item $document->schemaURL
786              
787             Return URL of the PML schema the document is associated with (if any).
788             Note that unlike $document->schema->get_url, the URL is not resolved
789             and is returned exactly as referenced in the document PML header.
790              
791             Note: The URL is stored in the metaData field 'schema-url'.
792              
793             =cut
794              
795             sub schemaURL {
796             my($self)=@_;
797             return $self->metaData('schema-url');
798             }
799              
800             =item $document->changeSchemaURL($newURL)
801              
802             Return URL of the PML schema the document is associated with (if any).
803             Note: The URL is stored in the metaData field 'schema-url'.
804              
805             =cut
806              
807             sub changeSchemaURL {
808             my($self,$url)=@_;
809             return $self->changeMetaData('schema-url',Treex::PML::IO::make_URI($url));
810             }
811              
812             =item $document->documentRootData()
813              
814             Return the root data structure of the PML instance (with trees, prolog and epilog taken out)
815             Note: The URL is stored in the metaData field 'pml_root'.
816              
817             =cut
818              
819             sub documentRootData {
820             my($self,$url)=@_;
821             return $self->metaData('pml_root');
822             }
823              
824             =item $document->treesProlog()
825              
826             Return a sequence of non-tree elements preceding trees in the PML
827             sequence (with role #TREES) from which trees were extracted (if any).
828             Note: The prolog is stored in the the metaData field 'pml_prolog'.
829              
830             =cut
831              
832             sub treesProlog {
833             my($self,$url)=@_;
834             return $self->metaData('pml_prolog');
835             }
836              
837             =item $document->treesEpilog()
838              
839             Return a sequence of non-tree elements following trees in the PML
840             sequence (with role #TREES) from which trees were extracted (if any).
841             Note: The epilog is stored in the the metaData field 'pml_epilog'.
842              
843             =cut
844              
845             sub treesEpilog {
846             my($self,$url)=@_;
847             return $self->metaData('pml_epilog');
848             }
849              
850             =item $document->lookupNodeByID($id)
851              
852             Lookup a node by its #ID. Note that the ID-hash is created when the
853             document is loaded (and if not, when first queried), but is not
854             maintained by this class. It must therefore be maintained by the
855             application.
856              
857             =cut
858              
859             sub lookupNodeByID {
860             my ($self,$id)=@_;
861             if (defined($id)) {
862             return $self->nodeIDHash()->{$id};
863             }
864             return;
865             }
866              
867             =item $document->deleteNodeIDHashEntry($node)
868              
869             Remove a given node from the ID-hash. Returns the value removed from
870             the ID hash (note: the function does not check if the entry for the
871             given node's ID actually was mapped to the given node) or undef if the
872             node's ID was not hashed.
873              
874             =cut
875              
876             sub deleteNodeIDHashEntry {
877             my ($self,$node)=@_;
878             my $id_hash = $self->appData('id-hash');
879             if (ref($id_hash)) {
880             my $id =$node->get_id;
881             if (defined $id) {
882             return delete $id_hash->{$id};
883             }
884             }
885             return undef;
886             }
887              
888             =item $document->deleteIDHashEntry($id)
889              
890             Remove a given ID from the ID-hash. Returns the removed hash entry (or
891             undef if ID was not hashed).
892              
893             =cut
894              
895             sub deleteIDHashEntry {
896             my ($self,$id)=@_;
897             my $id_hash = $self->appData('id-hash');
898             if (ref($id_hash)) {
899             return delete $id_hash->{$id};
900             }
901             return undef;
902             }
903              
904              
905             =item $document->hashNodeByID($node)
906              
907             Hash a node by its #ID. Note that the ID-hash is created when the
908             document is loaded (and if not, when first queried), but is not
909             maintained by this class. It must therefore be maintained by the
910             application.
911              
912             =cut
913              
914             sub hashNodeByID {
915             my ($self,$node)=@_;
916             my $id = $node->get_id;
917             if (defined $id) {
918             weaken( $self->nodeIDHash()->{$id} = $node );
919             }
920             return $id;
921             }
922              
923             =item $document->nodeIDHash()
924              
925             Return a hash reference mapping node IDs to node objects. If the ID
926             hash did not exist, it is rebuilt. Note: the ID hash, if exists, is
927             stored in the 'id-hash' appData entry.
928              
929             =cut
930              
931             sub nodeIDHash {
932             my ($self,$id)=@_;
933              
934             my $id_hash = $self->appData('id-hash');
935             if (ref($id_hash)) {
936             return $id_hash;
937             } else {
938             return $self->rebuildIDHash();
939             }
940             }
941              
942             =item $document->hasIDHash()
943              
944             Returns 1 if the document has an ID-to-node hash map, 0 otherwise.
945              
946             =cut
947              
948             sub hasIDHash {
949             my ($self)=@_;
950             if (ref($self->appData('id-hash'))) {
951             return 1;
952             } else {
953             return 0;
954             }
955             }
956              
957             =item $document->rebuildIDHash()
958              
959             Empty and rebuild document's ID-to-node hash.
960              
961             =cut
962              
963             sub rebuildIDHash {
964             my ($self)=@_;
965              
966             my $id_hash = $self->appData('id-hash');
967             if (ref($id_hash)) {
968             %$id_hash=();
969             } else {
970             $id_hash = {};
971             $self->changeAppData('id-hash',$id_hash);
972             }
973              
974             my %id_member;
975             for my $root ($self->trees) {
976             my $node = $root;
977             while ($node) {
978             my $member = $id_member{$node->type} ||= $node->get_id_member_name;
979             if ($member) {
980             weaken($id_hash->{ $node->{$member} } = $node);
981             }
982             $node = $node->following;
983             }
984             }
985             return $id_hash;
986             }
987              
988             =item $document->referenceURLHash
989              
990             Returns a HASHref mapping file reference IDs to URLs.
991              
992             =cut
993              
994             sub referenceURLHash {
995             my ($self)=@_;
996             return $self->metaData('references') || {};
997             }
998              
999             =item $document->referenceNameHash
1000              
1001             Returns a HASHref mapping file reference names to reference IDs. Each
1002             value of the hash is either a ID string (if there is just one
1003             reference with a given name) or a L containing all IDs
1004             associated with a given name.
1005              
1006             =cut
1007              
1008             sub referenceNameHash {
1009             my ($self)=@_;
1010             return $self->metaData('refnames') || {};
1011             }
1012              
1013             =item $document->referenceObjectHash()
1014              
1015             Returns a HASH whose keys are reference IDs and whose values are
1016             either DOM or C representations of the
1017             corresponding related resources. Unless related tree documents were
1018             loaded with loadRequiredDocuments(), this hash only contains resources
1019             declared as readas='dom' or readas='pml' in the PML schema.
1020              
1021              
1022             Note: the hash is stored in the document's appData entry 'ref'.
1023              
1024             =cut
1025              
1026             sub referenceObjectHash {
1027             my ($self)=@_;
1028             return $self->appData('ref');
1029             }
1030              
1031             =item $document->relatedDocuments()
1032              
1033             Returns a list of [id, URL] pairs of related tree documents declared
1034             in the PML schema of this document as C (if any).
1035             Note that C does not load related tree documents
1036             automatically.
1037              
1038             Note: the hash is stored in the document's metaData entry
1039             'fs-require'.
1040              
1041             =cut
1042              
1043             sub relatedDocuments {
1044             my ($self)=@_;
1045             return @{$self->metaData('fs-require') || []};
1046             }
1047              
1048              
1049             =item $document->loadRelatedDocuments($recurse,$callback)
1050              
1051             Loads related tree documents declared in the PML schema of this
1052             document as C (if any), unless already loaded.
1053              
1054             Both arguments are optional:
1055              
1056             the $recurse argument is a boolean flag indicating whether the
1057             loadRelatedDocuments() should be called on the loaded related
1058             docuemnts as well.
1059              
1060             the $calback may contain a callback (anonymouse subroutine) which will
1061             then be invoked before retrieveing a related tree document. The
1062             callback will receive two arguments; the current $document and an URL of
1063             the related tree document to retrieve.
1064              
1065             If the callback returns undef or empty list), the related document
1066             will be retrieved in a standard way (using
1067             CcreateDocumentFromFile>). If it returns a
1068             defined but false value (e.g. 0) the related document will not be
1069             retrieved at all. If it returns a defined value which is either a
1070             string or an URI object, the related document will be retrieved from
1071             that address. Finally, if the callback returns an object implementing
1072             the C interface, the object will be associated
1073             with the current docment.
1074              
1075             =cut
1076              
1077             sub loadRelatedDocuments {
1078             my ($self,$recurse,$callback)=@_;
1079             my @requires = $self->relatedDocuments();
1080             my $ref = $self->referenceObjectHash();
1081             my @loaded;
1082             for my $req (@requires) {
1083             next if ref($ref->{$req->[0]});
1084             my $req_URL = Treex::PML::ResolvePath($self->filename,$req->[1]);
1085             my $req_fs;
1086             if (ref($callback) eq 'CODE') {
1087             my $result = $callback->($self,$req_URL);
1088             if (defined $result) {
1089             if (!$result) {
1090             next;
1091             } elsif (UNIVERSAL::DOES::does($result,'Treex::PML::Document')) {
1092             $req_fs=$result;
1093             } elsif (blessed($result) and $result->isa('URI')) {
1094             $req_URL = $result->as_string;
1095             } else {
1096             $req_URL = $result;
1097             }
1098             }
1099             }
1100             if (!defined $req_fs) {
1101             warn "Pre-loading dependent $req_URL ($req->[1]) as appData('ref')->{$req->[0]}\n" if $Treex::PML::Debug;
1102             $req_fs = Treex::PML::Factory->createDocumentFromFile($req_URL);
1103             }
1104             push @loaded,$req_fs;
1105             my $part_of = $req_fs->appData('fs-part-of');
1106             if (!ref($part_of)) {
1107             $part_of = [];
1108             $req_fs->changeAppData('fs-part-of',$part_of);
1109             }
1110             push @$part_of, $self;
1111             weaken($part_of->[-1]); # we rather weaken the back reference
1112             $self->appData('ref')->{$req->[0]}=$req_fs;
1113             push @loaded, $req_fs->loadRelatedDocuments(1,$callback) if $recurse;
1114             }
1115             return @loaded;
1116             }
1117              
1118             =item $document->relatedSuperDocuments()
1119              
1120             Returns a list of C objects representing related
1121             superior documents (i.e. documents that loaded the current documents
1122             using loadRelatedDocuments()).
1123              
1124             Note: these documents are stored in the document's appData entry
1125             'fs-part-of'.
1126              
1127             =cut
1128              
1129             sub relatedSuperDocuments {
1130             my ($self)=@_;
1131             return @{ $self->appData('fs-part-of')||[] };
1132             }
1133              
1134             =item $document->FS
1135              
1136             Return a reference to the associated FSFormat object.
1137              
1138             =cut
1139              
1140             sub FS {
1141             return $_[0]->[2];
1142             # my ($self) = @_;
1143             # return ref($self) ? $self->[2] : undef;
1144             }
1145              
1146             =pod
1147              
1148             =item $document->changeFS (FSFormat_object)
1149              
1150             Associate FS file with a new FSFormat object.
1151              
1152             =cut
1153              
1154             sub changeFS {
1155             my ($self,$val) = @_;
1156             return unless ref($self);
1157             $self->[2]=$val;
1158            
1159             my $enc = $val->special('E');
1160             if ($enc) {
1161             $self->changeEncoding($enc);
1162             delete $val->specials->{E};
1163             }
1164             return $self->[2];
1165             }
1166              
1167             =pod
1168              
1169             =item $document->hint
1170              
1171             Return the Tred's hint pattern declared in the Treex::PML::Document.
1172              
1173             =cut
1174              
1175              
1176             sub hint {
1177             my ($self) = @_;
1178             return ref($self) ? $self->[3] : undef;
1179             }
1180              
1181             =pod
1182              
1183             =item $document->changeHint (string)
1184              
1185             Change the Tred's hint pattern associated with this Treex::PML::Document.
1186              
1187             =cut
1188              
1189              
1190             sub changeHint {
1191             my ($self,$val) = @_;
1192             return unless ref($self);
1193             return $self->[3]=$val;
1194             }
1195              
1196             =pod
1197              
1198             =item $document->pattern_count
1199              
1200             Return the number of display attribute patterns associated with this Treex::PML::Document.
1201              
1202             =cut
1203              
1204             sub pattern_count {
1205             my ($self) = @_;
1206             return ref($self) ? scalar(@{ $self->[4] }) : undef;
1207             }
1208              
1209             =item $document->pattern (n)
1210              
1211             Return n'th the display pattern associated with this Treex::PML::Document.
1212              
1213             =cut
1214              
1215              
1216             sub pattern {
1217             my ($self,$index) = @_;
1218             return ref($self) ? $self->[4]->[$index] : undef;
1219             }
1220              
1221             =item $document->patterns
1222              
1223             Return a list of display attribute patterns associated with this Treex::PML::Document.
1224              
1225             =cut
1226              
1227             sub patterns {
1228             my ($self) = @_;
1229             return ref($self) ? @{$self->[4]} : undef;
1230             }
1231              
1232             =pod
1233              
1234             =item $document->changePatterns (list)
1235              
1236             Change the list of display attribute patterns associated with this Treex::PML::Document.
1237              
1238             =cut
1239              
1240             sub changePatterns {
1241             my $self = shift;
1242             return unless ref($self);
1243             return @{$self->[4]}=@_;
1244             }
1245              
1246             =pod
1247              
1248             =item $document->tail
1249              
1250             Return the unparsed tail of the FS file (i.e. Graph's embedded macros).
1251              
1252             =cut
1253              
1254              
1255             sub tail {
1256             my ($self) = @_;
1257             return ref($self) ? @{$self->[5]} : undef;
1258             }
1259              
1260             =pod
1261              
1262             =item $document->changeTail (list)
1263              
1264             Modify the unparsed tail of the FS file (i.e. Graph's embedded macros).
1265              
1266             =cut
1267              
1268              
1269             sub changeTail {
1270             my $self = shift;
1271             return unless ref($self);
1272             return @{$self->[5]}=@_;
1273             }
1274              
1275             =pod
1276              
1277             =item $document->trees
1278              
1279             Return a list of all trees (i.e. their roots represented by FSNode objects).
1280              
1281             =cut
1282              
1283             ## Two methods to work with trees (for convenience)
1284             sub trees {
1285             my ($self) = @_;
1286             return ref($self) ? @{$self->treeList} : undef;
1287             }
1288              
1289             =pod
1290              
1291             =item $document->changeTrees (list)
1292              
1293             Assign a new list of trees.
1294              
1295             =cut
1296              
1297             sub changeTrees {
1298             my $self = shift;
1299             return unless ref($self);
1300             return @{$self->treeList}=@_;
1301             }
1302              
1303             =pod
1304              
1305             =item $document->treeList
1306              
1307             Return a reference to the internal array of all trees (e.g. their
1308             roots represented by FSNode objects).
1309              
1310             =cut
1311              
1312             # returns a reference!!!
1313             sub treeList {
1314             my ($self) = @_;
1315             return ref($self) ? $self->[6] : undef;
1316             }
1317              
1318             =pod
1319              
1320             =item $document->tree (n)
1321              
1322             Return a reference to the tree number n.
1323              
1324             =cut
1325              
1326             # returns a reference!!!
1327             sub tree {
1328             my ($self,$n) = @_;
1329             return ref($self) ? $self->[6]->[$n] : undef;
1330             }
1331              
1332              
1333             =pod
1334              
1335             =item $document->lastTreeNo
1336              
1337             Return number of associated trees minus one.
1338              
1339             =cut
1340              
1341             sub lastTreeNo {
1342             my ($self) = @_;
1343             return ref($self) ? $#{$self->treeList} : undef;
1344             }
1345              
1346             =pod
1347              
1348             =item $document->notSaved (value?)
1349              
1350             Return/assign file saving status (this is completely user-driven).
1351              
1352             =cut
1353              
1354             sub notSaved {
1355             my ($self,$val) = @_;
1356              
1357             return unless ref($self);
1358             return $self->[7]=$val if (defined $val);
1359             return $self->[7];
1360             }
1361              
1362             =item $document->currentTreeNo (value?)
1363              
1364             Return/assign index of current tree (this is completely user-driven).
1365              
1366             =cut
1367              
1368             sub currentTreeNo {
1369             my ($self,$val) = @_;
1370              
1371             return unless ref($self);
1372             return $self->[8]=$val if (defined $val);
1373             return $self->[8];
1374             }
1375              
1376             =item $document->currentNode (value?)
1377              
1378             Return/assign current node (this is completely user-driven).
1379              
1380             =cut
1381              
1382             sub currentNode {
1383             my ($self,$val) = @_;
1384              
1385             return unless ref($self);
1386             return $self->[9]=$val if (defined $val);
1387             return $self->[9];
1388             }
1389              
1390             =pod
1391              
1392             =item $document->nodes (tree_no, prev_current, include_hidden)
1393              
1394             Get list of nodes for given tree. Returns two value list
1395             ($nodes,$current), where $nodes is a reference to a list of nodes for
1396             the tree and current is either root of the tree or the same node as
1397             prev_current if prev_current belongs to the tree. The list is sorted
1398             according to the ordering attribute (obtained from FS->order) and
1399             inclusion of hidden nodes (in the sense of FSFormat's hiding attribute
1400             FS->hide) depends on the boolean value of include_hidden.
1401              
1402             =cut
1403              
1404             sub nodes {
1405             # prepare value line and node list with deleted/saved hidden
1406             # and ordered by real Ord
1407              
1408             my ($document,$tree_no,$prevcurrent,$show_hidden)=@_;
1409             my @nodes=();
1410             return \@nodes unless ref($document);
1411              
1412              
1413             $tree_no=0 if ($tree_no<0);
1414             $tree_no=$document->lastTreeNo() if ($tree_no>$document->lastTreeNo());
1415              
1416             my $root=$document->treeList->[$tree_no];
1417             my $node=$root;
1418             my $current=$root;
1419              
1420             while($node) {
1421             push @nodes, $node;
1422             $current=$node if ($prevcurrent eq $node);
1423             $node=$show_hidden ? $node->following() : $node->following_visible($document->FS);
1424             }
1425              
1426             my $attr=$document->FS->order();
1427             # schwartzian transform
1428             if (defined($attr) or length($attr)) {
1429             use sort 'stable';
1430             @nodes =
1431             map { $_->[0] }
1432             sort { $a->[1] <=> $b->[1] }
1433             map { [$_, $_->get_member($attr) ] } @nodes;
1434             }
1435             return (\@nodes,$current);
1436             }
1437              
1438             =pod
1439              
1440             =item $document->value_line (tree_no, no_tree_numbers?)
1441              
1442             Return a sentence string for the given tree. Sentence string is a
1443             string of chained value attributes (FS->value) ordered according to
1444             the FS->sentord or FS->order if FS->sentord attribute is not defined.
1445              
1446             Unless no_tree_numbers is non-zero, prepend the resulting string with
1447             a "tree number/tree count: " prefix.
1448              
1449             =cut
1450              
1451             sub value_line {
1452             my ($document,$tree_no,$no_numbers)=@_;
1453             return unless $document;
1454              
1455             return ($no_numbers ? "" : ($tree_no+1)."/".($document->lastTreeNo+1).": ").
1456             join(" ",$document->value_line_list($tree_no));
1457             }
1458              
1459             =item $document->value_line_list (tree_no)
1460              
1461             Return a list of value (FS->value) attributes for the given tree
1462             ordered according to the FS->sentord or FS->order if FS->sentord
1463             attribute is not defined.
1464              
1465             =cut
1466              
1467             sub value_line_list {
1468             my ($document,$tree_no,$no_numbers,$wantnodes)=@_;
1469             return unless $document;
1470              
1471             my $node=$document->treeList->[$tree_no];
1472             my @sent=();
1473              
1474             my $sentord=$document->FS->sentord();
1475             my $val=$document->FS->value();
1476             $sentord=$document->FS->order() unless (defined($sentord));
1477              
1478             # if PML schemas are in use and one of the attributes
1479             # is an attr-path, we have to use $node->attr(...) instead of $node->{...}
1480             # (otherwise we optimize and use hash keys).
1481             if (($val=~m{/} or $sentord=~m{/}) and ref($document->metaData('schema'))) {
1482             while ($node) {
1483             my $value = $node->attr($val);
1484             push @sent,$node
1485             unless ($value eq '' or
1486             $value eq '???' or
1487             $node->attr($sentord)>=999); # this is a PDT-TR specific hack
1488             $node=$node->following();
1489             }
1490             @sent = sort { $a->attr($sentord) <=> $b->attr($sentord) } @sent;
1491             if ($wantnodes) {
1492             return (map { [$_->attr($val),$_] } @sent);
1493             } else {
1494             return (map { $_->attr($val) } @sent);
1495             }
1496             } else {
1497             while ($node) {
1498             push @sent,$node
1499             unless ($node->{$val} eq '' or
1500             $node->{$val} eq '???' or
1501             $node->{$sentord}>=999); # this is a PDT-TR specific hack
1502             $node=$node->following();
1503             }
1504             @sent = sort { $a->{$sentord} <=> $b->{$sentord} } @sent;
1505             if ($wantnodes) {
1506             return (map { [$_->{$val},$_] } @sent);
1507             } else {
1508             return (map { $_->{$val} } @sent);
1509             }
1510             }
1511             }
1512              
1513              
1514             =pod
1515              
1516             =item $document->insert_tree (root,position)
1517              
1518             Insert new tree at given position.
1519              
1520             =cut
1521              
1522             sub insert_tree {
1523             my ($self,$nr,$pos)=@_;
1524             splice(@{$self->treeList}, $pos, 0, $nr) if $nr;
1525             return $nr;
1526             }
1527              
1528             =pod
1529              
1530             =item $document->set_tree (root,pos)
1531              
1532             Set tree at given position.
1533              
1534             =cut
1535              
1536             sub set_tree {
1537             my ($self,$nr,$pos)=@_;
1538             croak('Usage: $document->set_tree(root,pos)') if !ref($nr) or ref($pos);
1539             $self->treeList->[$pos]=$nr;
1540             return $nr;
1541             }
1542              
1543             =item $document->append_tree (root)
1544              
1545             Append tree at given position.
1546              
1547             =cut
1548              
1549             sub append_tree {
1550             my ($self,$nr)=@_;
1551             croak('Usage: $document->append_tree(root,pos)') if !ref($nr);
1552             push @{$self->treeList},$nr;
1553             return $nr;
1554             }
1555              
1556              
1557             =pod
1558              
1559             =item $document->new_tree (position)
1560              
1561             Create a new tree at given position and return pointer to its root.
1562              
1563             =cut
1564              
1565             sub new_tree {
1566             my ($self,$pos)=@_;
1567              
1568             my $nr=Treex::PML::Factory->createNode(); # creating new root
1569             $self->insert_tree($nr,$pos);
1570             return $nr;
1571              
1572             }
1573              
1574             =item $document->delete_tree (position)
1575              
1576             Delete the tree at given position and return pointer to its root.
1577              
1578             =cut
1579              
1580             sub delete_tree {
1581             my ($self,$pos)=@_;
1582             my ($root)=splice(@{$self->treeList}, $pos, 1);
1583             return $root;
1584             }
1585              
1586             =item $document->destroy_tree (position)
1587              
1588             Delete the tree on a given position and destroy its content (the root and all its descendant nodes).
1589              
1590             =cut
1591              
1592             sub destroy_tree {
1593             my ($self,$pos)=@_;
1594             my $root=$self->delete_tree($pos);
1595             return unless $root;
1596             $root->destroy;
1597             return 1;
1598             }
1599              
1600             =item $document->swap_trees (position1,position2)
1601              
1602             Swap the trees on given positions in the tree list.
1603             The positions must be between 0 and lastTreeNo inclusive.
1604              
1605             =cut
1606              
1607             sub swap_trees {
1608             my ($self,$pos1,$pos2)=@_;
1609             my $tree_list = $self->treeList;
1610             unless (defined($pos1) and 0<=$pos1 and $pos1<=$self->lastTreeNo and
1611             defined($pos2) and 0<=$pos2 and $pos2<=$self->lastTreeNo) {
1612             croak("Fsfile->delete_tree(position1,position2): The positions must be between 0 and lastTreeNo inclusive!");
1613             }
1614             return if $pos1 == $pos2;
1615             my $root1 = $tree_list->[$pos1];
1616             $tree_list->[$pos1]=$tree_list->[$pos2];
1617             $tree_list->[$pos2]=$root1;
1618             return;
1619             }
1620              
1621             =item $document->move_tree_to (position1,position2)
1622              
1623             Move the tree on position1 in the tree list so that its position after
1624             the move is position2.
1625             The positions must be between 0 and lastTreeNo inclusive.
1626              
1627             =cut
1628              
1629             sub move_tree_to {
1630             my ($self,$pos1,$pos2)=@_;
1631             unless (defined($pos1) and 0<=$pos1 and $pos1<=$self->lastTreeNo and
1632             defined($pos2) and 0<=$pos2 and $pos2<=$self->lastTreeNo) {
1633             croak("Fsfile->delete_tree(position1,position2): The positions must be between 0 and lastTreeNo inclusive!");
1634             }
1635             return if $pos1 == $pos2;
1636             my $root = $self->delete_tree($pos1);
1637             $self->insert_tree($root,$pos2);
1638             return $root;
1639             }
1640              
1641             =item $document->test_tree_type ( root_type )
1642              
1643             This method can be used before a C or a similar operation
1644             to test if the root node provided as an argument is of a type valid
1645             for this Treex::PML::Document. More specifically, return 1 if the current file is
1646             not associated with a PML schema or if the tree list represented by
1647             PML list or sequence with the role #TREES permits members of the type
1648             of C. Otherwise return 0.
1649              
1650             A type-declaration object can be passed directly instead of
1651             C.
1652              
1653             =cut
1654              
1655             sub test_tree_type {
1656             my ($self, $obj) = @_;
1657             die 'Usage: $document->test_tree_type($node_or_decl)' unless ref($obj);
1658             my $type = $self->metaData('pml_trees_type');
1659             return 1 unless $type;
1660             if (UNIVERSAL::DOES::does($obj,'Treex::PML::Schema::Decl')) {
1661             if ($obj->get_decl_type == PML_TYPE_DECL) {
1662             # a named type decl passed, no problem
1663             $obj = $obj->get_content_decl;
1664             }
1665             } else {
1666             # assume it's a node
1667             $obj = $obj->type;
1668             return 0 unless $obj;
1669             }
1670             my $type_is = $type->get_decl_type;
1671             if ($type_is == PML_ELEMENT_DECL) {
1672             $type = $type->get_content_decl;
1673             $type_is = $type->get_decl_type;
1674             } elsif ($type_is == PML_MEMBER_DECL) {
1675             $type = $type->get_content_decl;
1676             $type_is = $type->get_decl_type;
1677             }
1678              
1679             if ($type_is == PML_SEQUENCE_DECL) {
1680             return 1 if $type->find_elements_by_content_decl($obj);
1681             } elsif ($type_is == PML_LIST_DECL) {
1682             return 1 if $type->get_content_decl == $obj;
1683             }
1684             }
1685              
1686             sub _can_have_children {
1687             my ($parent_decl)=@_;
1688             return unless $parent_decl;
1689             my $parent_decl_type = $parent_decl->get_decl_type;
1690             if ($parent_decl_type == PML_ELEMENT_DECL()) {
1691             $parent_decl = $parent_decl->get_content_decl;
1692             $parent_decl_type = $parent_decl->get_decl_type;
1693             }
1694             if ($parent_decl_type == PML_STRUCTURE_DECL()) {
1695             return 1 if $parent_decl->find_members_by_role('#CHILDNODES');
1696             } elsif ($parent_decl_type == PML_CONTAINER_DECL()) {
1697             my $content_decl = $parent_decl->get_content_decl;
1698             return 1 if $content_decl and $content_decl->get_role eq '#CHILDNODES';
1699             }
1700             return 0;
1701             }
1702              
1703              
1704              
1705             =item $document->determine_node_type ( node, { choose_command => sub{...} } )
1706              
1707             If the node passed already has a PML type, the type is returned.
1708              
1709             Otherwise this method tries to determine and set the PML type of the current
1710             node based on the type of its parent and possibly the node's '#name'
1711             attribute.
1712              
1713             If the node type cannot be determined, the method dies.
1714              
1715             If more than one type is possible for the node, the method first tries
1716             to run a callback routine passed in the choose_command option (if
1717             available) passing it three arguments: the $document, $node and an ARRAY
1718             reference of possible types. If the callback returns back one of the
1719             types, it is assigned to the node. Otherwise no type is assigned and
1720             the method returns a list of possible node types.
1721              
1722             =cut
1723              
1724             sub determine_node_type {
1725             my ($document,$node,$opts)=@_;
1726             my $type = $node->type;
1727             return $type if $type;
1728             my $ntype;
1729             my @ntypes;
1730             my $has_children = $node->firstson ? 1 : 0;
1731             if ($node->parent) {
1732             # is parent's type known?
1733             my $parent_decl = $node->parent->type;
1734             if (ref($parent_decl)) {
1735             # ok, find #CHILDNODES
1736             my $parent_decl_type = $parent_decl->get_decl_type;
1737             my $member_decl;
1738             if ($parent_decl_type == PML_STRUCTURE_DECL()) {
1739             ($member_decl) = map { $_->get_content_decl }
1740             $parent_decl->find_members_by_role('#CHILDNODES');
1741             } elsif ($parent_decl_type == PML_CONTAINER_DECL()) {
1742             $member_decl = $parent_decl->get_content_decl;
1743             undef $member_decl unless $member_decl and $member_decl->get_role eq '#CHILDNODES';
1744             }
1745             if ($member_decl) {
1746             my $member_decl_type = $member_decl->get_decl_type;
1747             if ($member_decl_type == PML_LIST_DECL()) {
1748             $ntype = $member_decl->get_content_decl;
1749             undef $ntype unless $ntype and $ntype->get_role eq '#NODE'
1750             and (!$has_children or _can_have_children($ntype));
1751             } elsif ($member_decl_type == PML_SEQUENCE_DECL()) {
1752             my $elements =
1753             @ntypes =
1754             grep { !$has_children or _can_have_children($_->[1]) }
1755             grep { $_->[1]->get_role eq '#NODE' }
1756             map { [ $_->get_name, $_->get_content_decl ] }
1757             $member_decl->get_elements;
1758             if (defined $node->{'#name'}) {
1759             ($ntype) = grep { $_->[0] eq $node->{'#name'} } @ntypes;
1760             $ntype=$ntype->[1] if $ntype;
1761             }
1762             } else {
1763             die "I'm confused - found role #CHILDNODES on a ".$member_decl->get_decl_path().", which is neither a list nor a sequence...\n";
1764             }
1765             }
1766             } else {
1767             # ask the user to set the type of the parent first
1768             die("Parent node type is unknown.\nYou must assign node-type to the parent node first!");
1769             return;
1770             }
1771             } else {
1772             # find #TREES sequence representing the tree list
1773             my @tree_types;
1774             if (ref $document) {
1775             my $pml_trees_type = $document->metaData('pml_trees_type');
1776             if (ref $pml_trees_type) {
1777             @tree_types = ($pml_trees_type);
1778             } else {
1779             my $schema = $document->metaData('schema');
1780             @tree_types = $schema->find_types_by_role('#TREES');
1781             }
1782             }
1783             foreach my $tt (@tree_types) {
1784             if (!ref($tt)) {
1785             die("I'm confused - found role #TREES on something which is neither a list nor a sequence: $tt\n");
1786             }
1787             my $tt_is = $tt->get_decl_type;
1788             if ($tt_is == PML_ELEMENT_DECL or $tt_is == PML_MEMBER_DECL or $tt_is == PML_TYPE_DECL) {
1789             $tt = $tt->get_content_decl;
1790             $tt_is = $tt->get_decl_type;
1791             }
1792              
1793             if ($tt_is == PML_LIST_DECL()) {
1794             $ntype = $tt->get_content_decl;
1795             undef $ntype unless $ntype and $ntype->get_role eq '#NODE'
1796             and (!$has_children or _can_have_children($ntype));
1797             } elsif ($tt_is == PML_SEQUENCE_DECL()) {
1798             my $elements =
1799             @ntypes =
1800             grep { !$has_children or _can_have_children($_->[1]) }
1801             grep { $_->[1]->get_role eq '#NODE' }
1802             map { [ $_->get_name, $_->get_content_decl ] }
1803             $tt->get_elements;
1804             if (defined $node->{'#name'}) {
1805             ($ntype) = grep { $_->[0] eq $node->{'#name'} } @ntypes;
1806             $ntype=$ntype->[1] if $ntype;
1807             }
1808             } else {
1809             die ("I'm confused - found role #TREES on something which is neither a list nor a sequence: $tt\n");
1810             }
1811             }
1812             }
1813             my $base_type;
1814             if ($ntype) {
1815             $base_type = $ntype;
1816             $node->set_type($base_type);
1817             } elsif (@ntypes == 1) {
1818             $node->{'#name'} = $ntypes[0][0];
1819             $base_type = $ntypes[0][1];
1820             $node->set_type($base_type);
1821             } elsif (@ntypes > 1) {
1822             my $i = 1;
1823             if (ref($opts) and $opts->{choose_command}) {
1824             my $type = $opts->{choose_command}->($document,$node,[@ntypes]);
1825             if ($type and grep { $_==$type } @ntypes) {
1826             $node->set_type($type->[1]);
1827             $node->{'#name'} = $type->[0];
1828             $base_type=$node->type;
1829             } else {
1830             return;
1831             }
1832             }
1833             } else {
1834             die("Cannot determine node type: schema does not allow nodes on this level...\n");
1835             return;
1836             }
1837             return $node->type;
1838             }
1839              
1840             =back
1841              
1842             =cut
1843              
1844             =head1 SEE ALSO
1845              
1846             L, L, L, L
1847              
1848             =head1 COPYRIGHT AND LICENSE
1849              
1850             Copyright (C) 2006-2010 by Petr Pajas
1851              
1852             This library is free software; you can redistribute it and/or modify
1853             it under the same terms as Perl itself, either Perl version 5.8.2 or,
1854             at your option, any later version of Perl 5 you may have available.
1855              
1856             =cut
1857              
1858              
1859             1;