File Coverage

lib/PMLTQ/Suggest/Utils.pm
Criterion Covered Total %
statement 80 177 45.2
branch 21 84 25.0
condition 9 36 25.0
subroutine 16 21 76.1
pod 0 8 0.0
total 126 326 38.6


line stmt bran cond sub pod time code
1             package PMLTQ::Suggest::Utils;
2             our $AUTHORITY = 'cpan:MATY';
3             $PMLTQ::Suggest::Utils::VERSION = '1.1.0';
4 2     2   14 use strict;
  2         5  
  2         60  
5 2     2   9 use warnings;
  2         4  
  2         52  
6              
7 2     2   606 use Treex::PML::Document;
  2         232504  
  2         73  
8 2     2   16 use List::MoreUtils 'uniq';
  2         5  
  2         21  
9 2     2   1604 use File::Basename 'basename';
  2         5  
  2         99  
10 2     2   11 use UNIVERSAL::DOES;
  2         4  
  2         71  
11              
12 2     2   14 use Encode ();
  2         14  
  2         44  
13 2     2   12 use Treex::PML::Schema::CDATA;
  2         4  
  2         50  
14 2     2   11 use Treex::PML::Factory;
  2         5  
  2         58  
15 2     2   1069 use UNIVERSAL;
  2         27  
  2         11  
16              
17             #######################################################################################
18             # Usage : first(\&sub, @list)
19             # Purpose : Return the first element of list for which the sub returns true
20             # (no arguments are passed to the sub, it has to use $_);
21             # Return undef otherwise (or empty list in list context)
22             # Returns : see Purpose
23             # Parameters : anonymous_sub \&sub -- subroutine that does not take any arguments and
24             # returns values which can be evaluated to true or false
25             # list @list -- first element from the @list, which is accepted by \&sub is then returned
26             # Throws : no exceptions
27             # Comments : Prototyped function
28             sub first (&@) {
29 15     15 0 30 my $code = shift;
30              
31 15         45 foreach (@_) {
32 59 100       83 return $_ if &{$code}();
  59         93  
33             }
34              
35 0         0 return;
36             }
37             #######################################################################################
38             ### from TrEd::Utils
39             # Usage : apply_file_suffix($win, $goto)
40             # Purpose : Set current tree and node positions to positions described by
41             # $goto suffix in file displayed in $win window
42             # Returns : 1 if the new position was found and set, 0 otherwise
43             # Parameters : TrEd::Window $win -- reference to TrEd::Window object
44             # string $goto -- suffix of the file (or a position in the file)
45             # Throws : no exceptions
46             # Comments : Possible suffix formats:
47             # ##123.2 -- tree number 123 (if counting from 1) and its second node
48             # #123.3 -- tree whose $root->{form} equals to #123 and its third node
49             # (only hint found in Treex/PML/Backend/CSTS/Csts2fs.pm)
50             # #a123 -- finds node with id #a123 and the tree it belongs to
51             # The node's id can also be placed after the '.', e.g. ##123.#a123, in
52             # which case the sub searches for node with id #a123 inside tree no 123
53             #
54             # Sets $win->{treeNo} and $win->{currentNode} if appropriate.
55             # See Also : parse_file_suffix()
56             sub apply_file_suffix {
57 15     15 0 43 my ( $win, $goto ) = @_;
58 15 50       52 return if ( !defined $win );
59 15         33 my $fsfile = $win->{FSFile};
60 15 50 33     163 return if !( defined $fsfile && defined $goto && $goto ne ''); # $EMPTY_STR );
      33        
61              
62 15 50       147 if ( $goto =~ m/^##([0-9]+)/ ) {
    50          
    50          
63              
64             # handle cases like '##123'
65 0         0 my $no = int( $1 - 1 );
66 0         0 $win->{treeNo} = min( max( 0, $no ), $fsfile->lastTreeNo() );
67 0 0       0 return 0 if $win->{treeNo} != $no;
68             }
69             elsif ( $goto =~ /^#([0-9]+)/ ) {
70              
71             # handle cases like '#123'
72             # this is PDT 1.0-specific code, sorry
73 0         0 my $no;
74 0         0 for ( my $i = 0; $i <= $fsfile->lastTreeNo(); $i++ ) {
75 0 0       0 if ( $fsfile->treeList()->[$i]->{form} eq "#$1" ) {
76 0         0 $no = $i;
77 0         0 last;
78             }
79             }
80 0 0       0 return 0 if ( !defined $no );
81 0         0 $win->{treeNo} = $no;
82             }
83             elsif ( $goto =~ /^#([^#]+)$/ ) {
84              
85             # handle cases like '#a123'
86 15         49 my $id = $1;
87 15 50       126 if ( Treex::PML::Schema::CDATA->check_string_format( $id, 'ID' ) ) {
88 15         496 my $id_hash = $fsfile->appData('id-hash');
89 15 50 33     194 if ( UNIVERSAL::isa( $id_hash, 'HASH' )
90             && exists $id_hash->{$id} )
91             {
92 15         35 my $node = $id_hash->{$id};
93              
94             # we would like to use Treex::PML::Index() here, but can't
95             # and why we can not?
96 15         75 my $list = $fsfile->treeList();
97 15   33     153 my $root = UNIVERSAL::can( $node, 'root' ) && $node->root();
98             my $n = defined($root) && first {
99 59     59   204 $list->[$_] == $root;
100             }
101 15   66     365 0 .. $#$list;
102              
103 15 50 33     114 if ( defined $root and !defined($n) ) {
104 0         0 $n = _find_tree_no( $fsfile, $root, $list );
105              
106             # exit from _find_tree_no() function
107 0 0 0     0 if ( !defined $n || $n == -1 ) {
108 0         0 return 0;
109             }
110             }
111 15 50       45 if ( defined($n) ) {
112 15         43 $win->{treeNo} = $n;
113 15         37 $win->{currentNode} = $node;
114 15         109 return 1;
115             }
116             else {
117 0         0 return 0;
118             }
119             }
120             }
121             }
122              
123             # new: we're the dot in .[0-9]+ (TM)
124 0 0       0 if ( $goto =~ /\.([0-9]+)$/ ) {
    0          
125 0         0 my $root = get_node_by_no( $win, $1 );
126 0 0       0 if ($root) {
127 0         0 $win->{currentNode} = $root;
128 0         0 return 1;
129             }
130             else {
131 0         0 return 0;
132             }
133             }
134             elsif ( $goto =~ /\.([^0-9#][^#]*)$/ ) {
135 0         0 my $id = $1;
136 0 0       0 if ( Treex::PML::Schema::CDATA->check_string_format( $id, 'ID' ) ) {
137 0         0 my $id_hash = $fsfile->appData('id-hash');
138 0 0 0     0 if ( UNIVERSAL::isa( $id_hash, 'HASH' )
139             && exists( $id_hash->{$id} ) )
140             {
141             return 1
142 0 0       0 if ( $win->{currentNode} = $id_hash->{$id} ); # assignment
143             }
144             else {
145 0         0 return 0;
146             }
147             }
148             }
149 0         0 return 1;
150              
151             # hey, caller, you should redraw after this!
152             }
153              
154             #TODO: document & test this unclear function
155             sub _find_tree_no {
156 0     0   0 my ( $fsfile, $root, $list ) = @_;
157 0         0 my $n = undef;
158              
159             # hm, we have a node, but don't know to which tree
160             # it belongs
161 0         0 my $trees_type = $fsfile->metaData('pml_trees_type');
162 0         0 my $root_type = $root->type();
163              
164             #TODO: empty? or defined???
165 0 0 0     0 if ( $trees_type and $root_type ) {
166 0         0 my $trees_type_is = $trees_type->get_decl_type();
167 0         0 my %paths;
168             my $is_sequence;
169 0         0 my $found;
170 0         0 my @elements;
171 0 0       0 if ( $trees_type_is == Treex::PML::Schema::PML_LIST_DECL() ) {
    0          
172 0         0 @elements = [ 'LM', $trees_type->get_content_decl() ];
173             }
174             elsif ( $trees_type_is == Treex::PML::Schema::PML_SEQUENCE_DECL() ) {
175              
176             # Treex::PML::Schema::Element::get_name(),
177             # ::Schema::Decl::get_content_decl()
178 0         0 @elements = map { [ $_->get_name(), $_->get_content_decl() ] }
  0         0  
179             $trees_type->get_elements();
180 0         0 $is_sequence = 1;
181             }
182             else {
183 0         0 return -1;
184             }
185              
186 0         0 for my $el (@elements) {
187             $paths{ $el->[0] } = [
188             $trees_type->get_schema->find_decl(
189             sub {
190 0     0   0 $_[0] == $root_type;
191             },
192 0         0 $el->[1],
193             {}
194             )
195             ];
196 0 0       0 if ( @{ $paths{ $el->[0] } } ) {
  0         0  
197 0         0 $found = 1;
198             }
199             }
200 0 0       0 return -1 if !$found;
201             TREE:
202 0         0 for my $i ( 0 .. $#$list ) {
203 0         0 my $tree = $list->[$i];
204             my $paths
205             = $is_sequence
206             ? $paths{ $tree->{'#name'} }
207 0 0       0 : $paths{LM};
208 0 0       0 for my $p ( @{ $paths || [] } ) {
  0         0  
209 0         0 for my $value ( $tree->all($p) ) {
210 0 0       0 if ( $value == $root ) {
211 0         0 $n = $i;
212 0         0 last TREE;
213             }
214             }
215             }
216             }
217             }
218 0         0 return $n;
219             }
220              
221             #######################################################################################
222             ### from TrEd::Utils
223             # Usage : parse_file_suffix($filename)
224             # Purpose : Split file name into file name itself and its suffix
225             # Returns : List which contains file name and its suffix, if there is no suffix,
226             # second list element is undef
227             # Parameters : scalar $filename -- name of the file
228             # Throws : no exceptions
229             # Comments : File suffix can be of the following forms:
230             # a) 1 or 2 #-signs, upper-case characters or numbers, and optionally followed by
231             # optional dash, full stop and at least one number
232             # b) 2 #-signs, at least one number, full stop, followed by
233             # one non-numeric not-# character and any number of not-# chars
234             # c) 1 #-sign followed by any number of not-# characters
235             # See Also :
236             sub parse_file_suffix {
237 15     15 0 38 my ($filename) = @_;
238             #
239 15 50       44 return if ( !defined $filename );
240 15 50 33     293 if ( $filename =~ s/(##?[0-9A-Z]+(?:-?\.[0-9]+)?)$// ) {
    50 33        
    50          
241 0         0 return ( $filename, $1 );
242             }
243             elsif (
244             $filename =~ m{^
245             (.*) # file name with any characters followed by
246             (\#\#[0-9]+\.) # 2x#, at least one number and full stop
247             ([^0-9\#][^\#]*) # followed by one non-numeric not-# character and any number of not-# chars
248             $
249             }x
250             and Treex::PML::Schema::CDATA->check_string_format( $3, 'ID' )
251             )
252             {
253 0         0 return ( $1, $2 . $3 );
254             }
255             elsif (
256             $filename =~ m{^
257             (.*) # file name with any characters followed by
258             \# # one hash followed by
259             ([^\#]+) # any number of not-# characters
260             $
261             }x
262             and Treex::PML::Schema::CDATA->check_string_format( $2, 'ID' )
263             )
264             {
265 15         509 return ( $1, '#' . $2 );
266             }
267             else {
268 0         0 return ( $filename, undef );
269             }
270             }
271              
272             ######################################
273              
274              
275              
276             # open a data file and related files on lower layers
277             sub open_file {
278 19     19 0 57 my $filename = shift;
279             # TODO fsfile caching and closing !!!
280 19         138 my $fsfile = Treex::PML::Factory->createDocumentFromFile($filename);
281 19 50       23351335 if ($Treex::PML::FSError) {
282 0         0 die "Error loading file $filename: $Treex::PML::FSError ($!)\n";
283             }
284 19         119 my $requires = $fsfile->metaData('fs-require');
285 19 50       258 if ($requires) {
286 19         71 for my $req (@$requires) {
287 9         65 my $req_filename = $req->[1]->abs( $fsfile->URL );
288 9         646 warn("REQUIRES $req_filename");
289 9         527 my $secondary = $fsfile->appData('ref');
290 9 50       120 unless ($secondary) {
291 0         0 $secondary = {};
292 0         0 $fsfile->changeAppData( 'ref', $secondary );
293             }
294 9         50 my $sf = open_file($req_filename);
295 9         61 $secondary->{ $req->[0] } = $sf;
296             }
297             }
298 19         91 return $fsfile;
299             }
300             #############################################
301              
302              
303             sub GetSecondaryFiles {
304 10     10 0 34 my ($fsfile) = @_;
305             # is probably the same as Treex::PML::Document->relatedDocuments()
306             # a reference to a list of pairs (id, URL)
307 10         36 my $requires = $fsfile->metaData('fs-require');
308 10         79 my @secondary;
309 10 50       43 if ($requires) {
310 10         40 foreach my $req (@$requires) {
311 9         28 my $id = $req->[0];
312             my $req_fs
313             = ref( $fsfile->appData('ref') )
314 9 50       43 ? $fsfile->appData('ref')->{$id}
315             : undef;
316 9 50       155 if ( UNIVERSAL::DOES::does( $req_fs, 'Treex::PML::Document' ) ) {
317 9         174 push( @secondary, $req_fs );
318             }
319             }
320             }
321 10         103 return uniq(@secondary);
322             }
323              
324              
325             sub OpenSecondaryFiles {
326 0     0 0   my ( $fsfile ) = @_;
327 0           my $win = undef;
328 0           my $status = 1;
329 0 0         return $status if $fsfile->appData('fs-require-loaded');
330 0           $fsfile->changeAppData( 'fs-require-loaded', 1 );
331 0           my $requires = $fsfile->metaData('fs-require'); #$fsfile->relatedDocuments()
332 0 0         if (defined $requires) {
333 0           for my $req (@$requires) {
334 0 0         next if ref( $fsfile->appData('ref')->{ $req->[0] } );
335 0           my $req_filename
336             = Treex::PML::ResolvePath( $fsfile->filename, $req->[1] );
337 0           print STDERR "Pre-loading dependent $req_filename ($req->[1]) as appData('ref')->{$req->[0]}\n";
338 0           my ( $req_fs, $status2 ) = open_file( # TODO simplify Tred::File::open_file() subrutine
339             $win, $req_filename,
340             -preload => 1,
341             -norecent => 1
342             );
343 0           _merge_status( $status, $status2 );
344 0 0         if ( !$status2->{ok} ) {
345 0           close_file( $win, -fsfile => $req_fs, -no_update => 1 );
346 0           return $status2;
347             }
348             else { #zaznac do zavisleho, ze je zavisly na nadradenom
349 0           push @{ $req_fs->appData('fs-part-of') },
  0            
350             $fsfile; # is this a good idea?
351 0           main::__debug("Setting appData('ref')->{$req->[0]} to $req_fs");
352 0           $fsfile->appData('ref')->{ $req->[0] } = $req_fs;
353             }
354             }
355             }
356 0           return $status;
357             }
358              
359             sub ThisAddress {
360 0     0 0   my ($node, $fsfile) = @_;
361 0           my $type = $node->type;
362 0   0       my ($id_attr) = $type && $type->find_members_by_role('#ID');
363              
364 0           return basename($fsfile->filename) . '#' . $node->{ $id_attr->get_name }
365             }
366              
367             sub GetNodeIndex {
368 0     0 0   my $node = shift;
369 0           my $i = -1;
370 0           while ($node) {
371 0           $node = $node->previous();
372 0           $i++;
373             }
374 0           return $i;
375             }
376              
377              
378             1;