File Coverage

lib/UR/DataSource/Filesystem.pm
Criterion Covered Total %
statement 738 969 76.1
branch 207 398 52.0
condition 57 114 50.0
subroutine 72 98 73.4
pod 5 13 38.4
total 1079 1592 67.7


line stmt bran cond sub pod time code
1             package UR::DataSource::Filesystem;
2              
3 9     9   488 use UR;
  9         13  
  9         94  
4 9     9   37 use strict;
  9         13  
  9         196  
5 9     9   30 use warnings;
  9         10  
  9         406  
6             our $VERSION = "0.46"; # UR $VERSION;
7              
8 9     9   34 use File::Basename;
  9         10  
  9         567  
9 9     9   33 use File::Path;
  9         11  
  9         318  
10 9     9   34 use List::Util;
  9         10  
  9         342  
11 9     9   31 use Scalar::Util;
  9         13  
  9         273  
12 9     9   33 use Errno qw(EINTR EAGAIN EOPNOTSUPP);
  9         9  
  9         19408  
13              
14             # lets you specify the server in several ways:
15             # path => '/path/name'
16             # means there is one file storing the data
17             # path => [ '/path1/name', '/path2/name' ]
18             # means the first tile we need to open the file, pick one (for load balancing)
19             # path => '/path/to/directory/'
20             # means that directory contains one or more files, and the classes using
21             # this datasource can have table_name metadata to pick the file
22             # path => '/path/$param1/${param2}.ext'
23             # means the values for $param1 and $param2 should come from the input rule.
24             # If the rule doesn't specify the param, then it should glob for the possible
25             # names at that point in the filesystem
26             # path => '/path/&method/filename'
27             # means the value for that part of the path should come from a method call
28             # run as $subject_class_name->$method($rule)
29             # path => '/path/*/path/$name/
30             # means it should glob at the appropriate time for the '*', but no use the
31             # paths found matching the glob to infer any values
32              
33             # maybe suppert a URI scheme like
34             # file:/path/$to/File.ext?columns=[a,b,c]&sorted_columns=[a,b]
35              
36             # TODO
37             # * Support non-equality operators for properties that are part of the path spec
38              
39              
40             class UR::DataSource::Filesystem {
41             is => 'UR::DataSource',
42             has => [
43             path => { doc => 'Path spec for the path on the filesystem containing the data' },
44             delimiter => { is => 'String', default_value => '\s*,\s*', doc => 'Delimiter between columns on the same line' },
45             record_separator => { is => 'String', default_value => "\n", doc => 'Delimiter between lines in the file' },
46             header_lines => { is => 'Integer', default_value => 0, doc => 'Number of lines at the start of the file to skip' },
47             columns_from_header => { is => 'Boolean', default_value => 0, doc => 'The column names are in the first line of the file' },
48             handle_class => { is => 'String', default_value => 'IO::File', doc => 'Class to use for new file handles' },
49             ],
50             has_optional => [
51             columns => { is => 'ARRAY', doc => 'Names of the columns in the file, in order' },
52             sorted_columns => { is => 'ARRAY', doc => 'Names of the columns by which the data file is sorted' },
53             ],
54             doc => 'A data source for treating files as relational data',
55             };
56              
57 2     2 0 4 sub can_savepoint { 0;} # Doesn't support savepoints
58              
59             # Filesystem datasources don't have a "default_handle"
60 0     0 0 0 sub create_default_handle { undef }
61              
62             sub _regex {
63 107     107   174 my $self = shift;
64              
65 107 100       329 unless ($self->{'_regex'}) {
66 8         32 my $delimiter = $self->delimiter;
67 8         16 my $r = eval { qr($delimiter) };
  8         143  
68 8 50 33     52 if ($@ || !$r) {
69 0         0 $self->error_message("Unable to interepret delimiter '".$self->delimiter.": $@");
70 0         0 return;
71             }
72 8         21 $self->{'_regex'} = $r;
73             }
74 107         207 return $self->{'_regex'};
75             }
76              
77              
78             sub _logger {
79 104     104   173 my $self = shift;
80 104         148 my $varname = shift;
81 104 50       363 if ($ENV{$varname}) {
82 0         0 my $log_fh = UR::DBI->sql_fh;
83             return sub {
84 0     0   0 my $msg = shift;
85 0         0 my $time = time();
86 0         0 $msg =~ s/\b\$time\b/$time/g;
87 0         0 my $localtime = scalar(localtime $time);
88 0         0 $msg =~ s/\b\$localtime\b/$localtime/;
89              
90 0         0 $log_fh->print($msg);
91 0         0 };
92             } else {
93 104         246 return \&UR::Util::null_sub;
94             }
95             }
96              
97             # The behavior for handling the filehandles after fork is contained in
98             # the read_record_from_file closure. There's nothing special for the
99             # data source to do
100             sub prepare_for_fork {
101 0     0 0 0 return 1;
102             }
103             sub finish_up_after_fork {
104 0     0 0 0 return 1;
105             }
106              
107             # Like UR::BoolExpr::specifies_value_for, but works on either a BoolExpr
108             # or another object. In the latter case, it returns true if the object's
109             # class has the given property
110             sub __specifies_value_for {
111 50     50   68 my($self, $thing, $property_name) = @_;
112              
113 50 50       213 return $thing->isa('UR::BoolExpr')
114             ? $thing->specifies_value_for($property_name)
115             : $thing->__meta__->property_meta_for_name($property_name);
116             }
117              
118             # Like UR::BoolExpr::value_for, but works on either a BoolExpr
119             # or another object.
120             sub __value_for {
121 18     18   21 my($self, $thing, $property_name) = @_;
122              
123 18 50       87 return $thing->isa('UR::BoolExpr')
124             ? $thing->value_for($property_name)
125             : $thing->$property_name;
126             }
127              
128             # Like UR::BoolExpr::subject_class_name, but works on either a BoolExpr
129             # or another object.
130             sub __subject_class_name {
131 181     181   201 my($self, $thing) = @_;
132              
133 181 100       874 return $thing->isa('UR::BoolExpr')
134             ? $thing->subject_class_name()
135             : $thing->class;
136             }
137              
138              
139             sub _replace_vars_with_values_in_pathname {
140 173     173   2897 my($self, $rule_or_obj, $string, $prop_values_hash) = @_;
141              
142 173   100     618 $prop_values_hash ||= {};
143              
144             # Match something like /some/path/$var/name or /some/path${var}.ext/name
145 173 100       612 if ($string =~ m/\$\{?(\w+)\}?/) {
146 50         103 my $varname = $1;
147 50         112 my $subject_class_name = $self->__subject_class_name($rule_or_obj);
148 50 50       142 unless ($subject_class_name->__meta__->property_meta_for_name($varname)) {
149 0         0 Carp::croak("Invalid 'server' for data source ".$self->id
150             . ": Path spec $string requires a value for property $varname "
151             . " which is not a property of class $subject_class_name");
152             }
153 50         63 my @string_replacement_values;
154              
155 50 100       126 if ($self->__specifies_value_for($rule_or_obj, $varname)) {
156 18         45 my @property_values = $self->__value_for($rule_or_obj, $varname);
157 18 100 66     88 if (@property_values == 1 and ref($property_values[0]) eq 'ARRAY') {
158 3         4 @property_values = @{$property_values[0]};
  3         5  
159             }
160             # Make a listref that has one element per value for that property in the rule (in-clause
161             # rules may have more than one value)
162             # Each element has 2 parts, first is the value, second is the accumulated prop_values_hash
163             # where we've added the occurance of this property havine one of the values
164 18         30 @property_values = map { [ $_, { %$prop_values_hash, $varname => $_ } ] } @property_values;
  21         80  
165              
166             # Escape any shell glob characters in the values: [ ] { } ~ ? * and \
167             # we don't want a property with value '?' to be a glob wildcard
168 18         23 @string_replacement_values = map { $_->[0] =~ s/([[\]{}~?*\\])/\\$1/; $_ } @property_values;
  21         48  
  21         36  
169              
170             } else {
171             # The rule doesn't have a value for this property.
172             # Put a shell wildcard in here, and a later glob will match things
173             # The '.__glob_positions__' key holds a list of places we've inserted shell globs.
174             # Each element is a 2-element list: index 0 is the string position, element 1 if the variable name.
175             # This is needed so the later glob expansion can tell the difference between globs
176             # that are part of the original path spec, and globs put in here
177 32 100       34 my @glob_positions = @{ $prop_values_hash->{'.__glob_positions__'} || [] };
  32         138  
178              
179 32         138 my $glob_pos = $-[0];
180 32         73 push @glob_positions, [$glob_pos, $varname];
181 32         154 @string_replacement_values = ([ '*', { %$prop_values_hash, '.__glob_positions__' => \@glob_positions} ]);
182             }
183              
184             my @return = map {
185 50         71 my $s = $string;
  53         55  
186 53         243 substr($s, $-[0], $+[0] - $-[0], $_->[0]);
187 53         154 [ $s, $_->[1] ];
188             }
189             @string_replacement_values;
190              
191             # recursion to process the next variable replacement
192 50         59 return map { $self->_replace_vars_with_values_in_pathname($rule_or_obj, @$_) } @return;
  53         152  
193              
194             } else {
195 123         411 return [ $string, $prop_values_hash ];
196             }
197             }
198              
199             sub _replace_subs_with_values_in_pathname {
200 131     131   1294 my($self, $rule_or_obj, $string, $prop_values_hash) = @_;
201              
202 131   100     263 $prop_values_hash ||= {};
203 131         313 my $subject_class_name = $self->__subject_class_name($rule_or_obj);
204              
205             # Match something like /some/path/&sub/name or /some/path&{sub}.ext/name
206 131 100       346 if ($string =~ m/\&\{?(\w+)\}?/) {
207 13         18 my $subname = $1;
208 13 50       41 unless ($subject_class_name->can($subname)) {
209 0         0 Carp::croak("Invalid 'server' for data source ".$self->id
210             . ": Path spec $string requires a value for method $subname "
211             . " which is not a method of class " . $self->__subject_class_name($rule_or_obj));
212             }
213            
214 13         74 my @property_values = eval { $subject_class_name->$subname($rule_or_obj) };
  13         32  
215 13 50       56 if ($@) {
216 0         0 Carp::croak("Can't resolve final path for 'server' for data source ".$self->id
217             . ": Method call to ${subject_class_name}::${subname} died with: $@");
218             }
219 13 50 66     44 if (@property_values == 1 and ref($property_values[0]) eq 'ARRAY') {
220 0         0 @property_values = @{$property_values[0]};
  0         0  
221             }
222             # Make a listref that has one element per value for that property in the rule (in-clause
223             # rules may have more than one value)
224             # Each element has 2 parts, first is the value, second is the accumulated prop_values_hash
225             # where we've added the occurance of this property havine one of the values
226 13         13 @property_values = map { [ $_, { %$prop_values_hash } ] } @property_values;
  15         50  
227              
228             # Escape any shell glob characters in the values: [ ] { } ~ ? * and \
229             # we don't want a return value '?' or '*' to be a glob wildcard
230 13         16 my @string_replacement_values = map { $_->[0] =~ s/([[\]{}~?*\\])/\\$1/; $_ } @property_values;
  15         23  
  15         21  
231              
232             # Given a pathname returned from the glob, return a new glob_position_list
233             # that has fixed up the position information accounting for the fact that
234             # the globbed pathname is a different length than the original spec
235 13         11 my $original_path_length = length($string);
236 13         14 my $glob_position_list = $prop_values_hash->{'.__glob_positions__'};
237 13         21 my $subname_replacement_position = $-[0];
238             my $fix_offsets_in_glob_list = sub {
239 15     15   13 my $pathname = shift;
240             # alter the position only if it is greater than the position of
241             # the subname we're replacing
242 15 100       24 return map { [ $_->[0] < $subname_replacement_position
  6         17  
243             ? $_->[0]
244             : $_->[0] + length($pathname) - $original_path_length,
245             $_->[1] ]
246             }
247             @$glob_position_list;
248 13         43 };
249              
250             my @return = map {
251 13         13 my $s = $string;
  15         13  
252 15         51 substr($s, $-[0], $+[0] - $-[0], $_->[0]);
253 15         24 $_->[1]->{'.__glob_positions__'} = [ $fix_offsets_in_glob_list->($s) ];
254 15         33 [ $s, $_->[1] ];
255             }
256             @string_replacement_values;
257              
258             # recursion to process the next method call
259 13         12 return map { $self->_replace_subs_with_values_in_pathname($rule_or_obj, @$_) } @return;
  15         34  
260              
261             } else {
262 118         383 return [ $string, $prop_values_hash ];
263             }
264             }
265              
266             sub _replace_glob_with_values_in_pathname {
267 195     195   7732 my($self, $string, $prop_values_hash) = @_;
268              
269             # a * not preceeded by a backslash, delimited by /
270 195 100       953 if ($string =~ m#([^/]*?[^\\/]?(\*)[^/]*)#) {
271 42         92 my $glob_pos = $-[2];
272              
273 42         102 my $path_segment_including_glob = substr($string, 0, $+[0]);
274 42         72 my $remaining_path = substr($string, $+[0]);
275 42         3419 my @glob_matches = map { $_ . $remaining_path }
  78         201  
276             glob($path_segment_including_glob);
277              
278 42         58 my $resolve_glob_values_for_each_result;
279 42         58 my $glob_position_list = $prop_values_hash->{'.__glob_positions__'};
280              
281             # Given a pathname returned from the glob, return a new glob_position_list
282             # that has fixed up the position information accounting for the fact that
283             # the globbed pathname is a different length than the original spec
284 42         52 my $original_path_length = length($string);
285             my $fix_offsets_in_glob_list = sub {
286 78     78   75 my $pathname = shift;
287 78         131 return map { [ $_->[0] + length($pathname) - $original_path_length, $_->[1] ] } @$glob_position_list;
  26         71  
288 42         192 };
289              
290 42 100       93 if ($glob_position_list->[0]->[0] == $glob_pos) {
291             # This * was put in previously by a $propname in the spec that wasn't mentioned in the rule
292              
293 40         61 my $path_delim_pos = index($path_segment_including_glob, '/', $glob_pos);
294 40 50       84 $path_delim_pos = length($path_segment_including_glob) if ($path_delim_pos == -1); # No more /s
295              
296 40         41 my $regex_as_str = $path_segment_including_glob;
297             # Find out just how many *s we're dealing with and where they are, up to the next /
298             # remove them from the glob_position_list because we're going to resolve their values
299 40         69 my(@glob_positions, @property_names);
300 40   100     252 while (@$glob_position_list
301             and
302             $glob_position_list->[0]->[0] < $path_delim_pos
303             ) {
304 42         58 my $this_glob_info = shift @{$glob_position_list};
  42         106  
305 42         84 push @glob_positions, $this_glob_info->[0];
306 42         156 push @property_names, $this_glob_info->[1];
307             }
308             # Replace the *s found with regex captures
309 40         71 my $glob_replacement = '([^/]*)';
310 40         53 my $glob_rpl_offset = 0;
311 40         90 my $offset_inc = length($glob_replacement) - 1; # replacing a 1-char string '*' with a 7-char string '([^/]*)'
312             $regex_as_str = List::Util::reduce( sub {
313 42     42   84 substr($a, $b + $glob_rpl_offset, 1, $glob_replacement);
314 42         53 $glob_rpl_offset += $offset_inc;
315 42         59 $a;
316             },
317 40         281 ($regex_as_str, @glob_positions) );
318              
319 40         689 my $regex = qr{$regex_as_str};
320 40         73 my @property_values_for_each_glob_match = map { [ $_, [ $_ =~ $regex] ] } @glob_matches;
  76         493  
321              
322             # Fill in the property names into .__glob_positions__
323             # we've resolved in this iteration, and apply offset fixups for the
324             # difference in string length between the pre- and post-glob pathnames
325              
326             $resolve_glob_values_for_each_result = sub {
327             return map {
328 40     40   45 my %h = %$prop_values_hash;
  76         186  
329 76         71 @h{@property_names} = @{$_->[1]};
  76         130  
330 76         106 $h{'.__glob_positions__'} = [ $fix_offsets_in_glob_list->($_->[0]) ];
331 76         174 [$_->[0], \%h];
332             }
333             @property_values_for_each_glob_match;
334 40         202 };
335              
336             } else {
337             # This is a glob put in the original path spec
338             # The new path comes from the @glob_matches list.
339             # Apply offset fixups for the difference in string length between the
340             # pre- and post-glob pathnames
341             $resolve_glob_values_for_each_result = sub {
342 2     2   3 return map { [
343 2         5 $_,
344             { %$prop_values_hash,
345             '.__glob_positions__' => [ $fix_offsets_in_glob_list->($_) ]
346             }
347             ]
348             }
349             @glob_matches;
350 2         6 };
351             }
352              
353 42         69 my @resolved_paths_and_property_values = $resolve_glob_values_for_each_result->();
354              
355             # Recursion to process the next glob
356 42         49 return map { $self->_replace_glob_with_values_in_pathname( @$_ ) }
  78         177  
357             @resolved_paths_and_property_values;
358              
359             } else {
360 153         196 delete $prop_values_hash->{'.__glob_positions__'};
361 153         723 return [ $string, $prop_values_hash ];
362             }
363             }
364              
365              
366             sub resolve_file_info_for_rule_and_path_spec {
367 112     112 0 168 my($self, $rule, $path_spec) = @_;
368              
369 112   66     497 $path_spec ||= $self->path;
370              
371 112         322 return map { $self->_replace_glob_with_values_in_pathname(@$_) }
372 112         369 map { $self->_replace_subs_with_values_in_pathname($rule, @$_) }
  112         351  
373             $self->_replace_vars_with_values_in_pathname($rule, $path_spec);
374             }
375              
376              
377             # We're overriding path() so the first time it's called, it will
378             # pick one from the list and then stay with that one for the life
379             # of the program
380             sub path {
381 105     105 1 153 my $self = shift;
382              
383 105 50       283 unless ($self->{'__cached_path'}) {
384 105         331 my $path = $self->__path();
385 105 50 33     298 if (ref($path) and ref($path) eq 'ARRAY') {
386 0         0 my $count = @$path;
387 0         0 my $idx = $$ % $count;
388 0         0 $self->{'_cached_path'} = $path->[$idx];
389             } else {
390 105         225 $self->{'_cached_path'} = $path;
391             }
392             }
393 105         261 return $self->{'_cached_path'};
394             }
395              
396             # Names of creation params that we should force to be listrefs
397             our %creation_param_is_list = map { $_ => 1 } qw( columns sorted_columns );
398             sub create_from_inline_class_data {
399 4     4 1 9 my($class, $class_data, $ds_data) = @_;
400              
401             #unless (exists $ds_data->{'columns'}) {
402             # User didn't specify columns in the file. Assumme every property is a column, and in the same order
403             # We'll have to ask the class object for the column list the first time there's a query
404             #}
405              
406 4         5 my %ds_creation_params;
407 4         9 foreach my $param ( qw( path delimiter record_separator columns header_lines
408             columns_from_header handle_class sorted_columns )
409             ) {
410 32 100       62 if (exists $ds_data->{$param}) {
411 13 50 66     129 if ($creation_param_is_list{$param} and ref($ds_data->{$param}) ne 'ARRAY') {
412 0         0 $ds_creation_params{$param} = \( $ds_data->{$param} );
413             } else {
414 13         26 $ds_creation_params{$param} = $ds_data->{$param};
415             }
416             }
417             }
418              
419 4         24 my $ds_id = UR::Object::Type->autogenerate_new_object_id_uuid();
420 4   50     25 my $ds_type = delete $ds_data->{'is'} || __PACKAGE__;
421 4         50 my $ds = $ds_type->create( %ds_creation_params, id => $ds_id );
422 4         22 return $ds;
423             }
424              
425              
426              
427             sub _things_in_list_are_numeric {
428 13     13   19 my $self = shift;
429              
430 13         26 foreach ( @{$_[0]} ) {
  13         40  
431 14 50       70 return 0 if (! Scalar::Util::looks_like_number($_));
432             }
433 13         45 return 1;
434             }
435              
436             # Construct a closure to perform an operator test against the given value
437             # The closures return 0 is the test is successful, -1 if unsuccessful but
438             # the file's value was less than $value, and 1 if unsuccessful and greater.
439             # The iterator that churns through the file knows that if it's comparing an
440             # ID/sorted column, and the comparator returns 1 then we've gone past the
441             # point where we can expect to ever find another successful match and we
442             # should stop looking
443             my $ALWAYS_FALSE = sub { -1 };
444             sub _comparator_for_operator_and_property {
445 36     36   72 my($self,$property,$operator,$value) = @_;
446              
447 9     9   49 no warnings 'uninitialized'; # we're handling ''/undef/null specially below where it matters
  9         12  
  9         21184  
448              
449 36 100 66     286 if ($operator eq 'between') {
    50          
    50          
    50          
    50          
    100          
450 2 50 33     27 if ($value->[0] eq '' or $value->[1] eq '') {
451 0         0 return $ALWAYS_FALSE;
452             }
453              
454 2 100 66     12 if ($property->is_numeric and $self->_things_in_list_are_numeric($value)) {
455 1 50       4 if ($value->[0] > $value->[1]) {
456             # Will never be true
457 0         0 Carp::carp "'between' comparison will never be true with values ".$value->[0]," and ".$value->[1];
458 0         0 return $ALWAYS_FALSE;
459             }
460              
461             # numeric 'between' comparison
462             return sub {
463 5 50   5   6 return -1 if (${$_[0]} eq '');
  5         9  
464 5 100       5 if (${$_[0]} < $value->[0]) {
  5 50       11  
465 1         2 return -1;
466 4         6 } elsif (${$_[0]} > $value->[1]) {
467 0         0 return 1;
468             } else {
469 4         5 return 0;
470             }
471 1         8 };
472             } else {
473 1 50       5 if ($value->[0] gt $value->[1]) {
474 0         0 Carp::carp "'between' comparison will never be true with values ".$value->[0]," and ".$value->[1];
475 0         0 return $ALWAYS_FALSE;
476             }
477              
478             # A string 'between' comparison
479             return sub {
480 5 50   5   4 return -1 if (${$_[0]} eq '');
  5         10  
481 5 100       4 if (${$_[0]} lt $value->[0]) {
  5 100       10  
482 1         3 return -1;
483 4         7 } elsif (${$_[0]} gt $value->[1]) {
484 1         2 return 1;
485             } else {
486 3         4 return 0;
487             }
488 1         6 };
489             }
490              
491             } elsif ($operator eq 'in') {
492 0 0       0 if (! @$value) {
493 0         0 return $ALWAYS_FALSE;
494             }
495              
496 0 0 0     0 if ($property->is_numeric and $self->_things_in_list_are_numeric($value)) {
497             # Numeric 'in' comparison returns undef if we're within the range of the list
498             # but don't actually match any of the items in the list
499 0         0 @$value = sort { $a <=> $b } @$value; # sort the values first
  0         0  
500             return sub {
501 0 0   0   0 return -1 if (${$_[0]} eq '');
  0         0  
502 0 0       0 if (${$_[0]} < $value->[0]) {
  0 0       0  
503 0         0 return -1;
504 0         0 } elsif (${$_[0]} > $value->[-1]) {
505 0         0 return 1;
506             } else {
507 0         0 foreach ( @$value ) {
508 0 0       0 return 0 if ${$_[0]} == $_;
  0         0  
509             }
510 0         0 return -1;
511             }
512 0         0 };
513              
514             } else {
515             # A string 'in' comparison
516 0         0 @$value = sort { $a cmp $b } @$value;
  0         0  
517             return sub {
518 0 0   0   0 if (${$_[0]} lt $value->[0]) {
  0 0       0  
519 0         0 return -1;
520 0         0 } elsif (${$_[0]} gt $value->[-1]) {
521 0         0 return 1;
522             } else {
523 0         0 foreach ( @$value ) {
524 0 0       0 return 0 if ${$_[0]} eq $_;
  0         0  
525             }
526 0         0 return -1;
527             }
528 0         0 };
529              
530             }
531              
532             } elsif ($operator eq 'not in') {
533 0 0       0 if (! @$value) {
534 0         0 return $ALWAYS_FALSE;
535             }
536              
537 0 0 0     0 if ($property->is_numeric and $self->_things_in_list_are_numeric($value)) {
538             return sub {
539 0 0   0   0 return -1 if (${$_[0]} eq '');
  0         0  
540 0         0 foreach ( @$value ) {
541 0 0       0 return -1 if ${$_[0]} == $_;
  0         0  
542             }
543 0         0 return 0;
544             }
545              
546 0         0 } else {
547             return sub {
548 0     0   0 foreach ( @$value ) {
549 0 0       0 return -1 if ${$_[0]} eq $_;
  0         0  
550             }
551 0         0 return 0;
552             }
553 0         0 }
554              
555             } elsif ($operator eq 'like') {
556             # 'like' is always a string comparison. In addition, we can't know if we're ahead
557             # or behind in the file's ID columns, so the only two return values are 0 and 1
558              
559 0 0       0 return $ALWAYS_FALSE if ($value eq ''); # property like NULL is always false
560              
561             # Convert SQL-type wildcards to Perl-type wildcards
562             # Convert a % to a *, and _ to ., unless they're preceeded by \ to escape them.
563             # Not that this isn't precisely correct, as \\% should really mean a literal \
564             # followed by a wildcard, but we can't be correct in all cases without including
565             # a real parser. This will catch most cases.
566              
567 0         0 $value =~ s/(?
568 0         0 $value =~ s/(?
569 0         0 my $regex = qr($value);
570             return sub {
571 0 0   0   0 return -1 if (${$_[0]} eq '');
  0         0  
572 0 0       0 if (${$_[0]} =~ $regex) {
  0         0  
573 0         0 return 0;
574             } else {
575 0         0 return 1;
576             }
577 0         0 };
578              
579             } elsif ($operator eq 'not like') {
580 0 0       0 return $ALWAYS_FALSE if ($value eq ''); # property like NULL is always false
581 0         0 $value =~ s/(?
582 0         0 $value =~ s/(?
583 0         0 my $regex = qr($value);
584             return sub {
585 0 0   0   0 return -1 if (${$_[0]} eq '');
  0         0  
586 0 0       0 if (${$_[0]} =~ $regex) {
  0         0  
587 0         0 return 1;
588             } else {
589 0         0 return 0;
590             }
591 0         0 };
592              
593              
594             # FIXME - should we only be testing the numericness of the property?
595             } elsif ($property->is_numeric and $self->_things_in_list_are_numeric([$value])) {
596             # Basic numeric comparisons
597 12 100 0     41 if ($operator eq '=') {
    50          
    0          
    0          
    0          
    0          
    0          
    0          
598             return sub {
599 49 50   49   47 return -1 if (${$_[0]} eq ''); # null always != a number
  49         101  
600 49         43 return ${$_[0]} <=> $value;
  49         84  
601 9         45 };
602             } elsif ($operator eq '<') {
603             return sub {
604 14 50   14   14 return -1 if (${$_[0]} eq ''); # null always != a number
  14         37  
605 14 100       31 ${$_[0]} < $value ? 0 : 1;
  14         39  
606 3         16 };
607             } elsif ($operator eq '<=') {
608             return sub {
609 0 0   0   0 return -1 if (${$_[0]} eq ''); # null always != a number
  0         0  
610 0 0       0 ${$_[0]} <= $value ? 0 : 1;
  0         0  
611 0         0 };
612             } elsif ($operator eq '>') {
613             return sub {
614 0 0   0   0 return -1 if (${$_[0]} eq ''); # null always != a number
  0         0  
615 0 0       0 ${$_[0]} > $value ? 0 : -1;
  0         0  
616 0         0 };
617             } elsif ($operator eq '>=') {
618             return sub {
619 0 0   0   0 return -1 if (${$_[0]} eq ''); # null always != a number
  0         0  
620 0 0       0 ${$_[0]} >= $value ? 0 : -1;
  0         0  
621 0         0 };
622             } elsif ($operator eq 'true') {
623             return sub {
624 0 0   0   0 ${$_[0]} ? 0 : -1;
  0         0  
625 0         0 };
626             } elsif ($operator eq 'false') {
627             return sub {
628 0 0   0   0 ${$_[0]} ? -1 : 0;
  0         0  
629 0         0 };
630             } elsif ($operator eq '!=' or $operator eq 'ne') {
631             return sub {
632 0 0   0   0 return 0 if (${$_[0]} eq ''); # null always != a number
  0         0  
633 0 0       0 ${$_[0]} != $value ? 0 : -1;
  0         0  
634             }
635 0         0 }
636              
637             } else {
638             # Basic string comparisons
639 22 100 0     65 if ($operator eq '=') {
    50          
    50          
    50          
    0          
    0          
    0          
    0          
640             return sub {
641 93 50 25 93   76 return -1 if (${$_[0]} eq '' xor $value eq '');
  93         341  
642 93         75 return ${$_[0]} cmp $value;
  93         109  
643 21         98 };
644             } elsif ($operator eq '<') {
645             return sub {
646 0 0   0   0 ${$_[0]} lt $value ? 0 : 1;
  0         0  
647 0         0 };
648             } elsif ($operator eq '<=') {
649             return sub {
650 0 0 0 0   0 return -1 if (${$_[0]} eq '' or $value eq '');
  0         0  
651 0 0       0 ${$_[0]} le $value ? 0 : 1;
  0         0  
652 0         0 };
653             } elsif ($operator eq '>') {
654             return sub {
655 6 100   6   4 ${$_[0]} gt $value ? 0 : -1;
  6         15  
656 1         5 };
657             } elsif ($operator eq '>=') {
658             return sub {
659 0 0 0 0   0 return -1 if (${$_[0]} eq '' or $value eq '');
  0         0  
660 0 0       0 ${$_[0]} ge $value ? 0 : -1;
  0         0  
661 0         0 };
662             } elsif ($operator eq 'true') {
663             return sub {
664 0 0   0   0 ${$_[0]} ? 0 : -1;
  0         0  
665 0         0 };
666             } elsif ($operator eq 'false') {
667             return sub {
668 0 0   0   0 ${$_[0]} ? -1 : 0;
  0         0  
669 0         0 };
670             } elsif ($operator eq '!=' or $operator eq 'ne') {
671             return sub {
672 0 0   0   0 ${$_[0]} ne $value ? 0 : -1;
  0         0  
673             }
674 0         0 }
675             }
676             }
677              
678              
679              
680             sub _properties_from_path_spec {
681 0     0   0 my($self) = @_;
682              
683 0 0       0 unless (exists $self->{'__properties_from_path_spec'}) {
684 0         0 my $path = $self->path;
685 0 0       0 $path = $path->[0] if ref($path);
686              
687 0         0 my @property_names;
688 0         0 while($path =~ m/\G\$\{?(\w+)\}?/) {
689 0         0 push @property_names, $1;
690             }
691 0         0 $self->{'__properties_from_path_spec'} = \@property_names;
692             }
693 0         0 return @{ $self->{'__properties_from_path_spec'} };
  0         0  
694             }
695              
696              
697             sub _generate_loading_templates_arrayref {
698 33     33   50 my($self, $old_sql_cols) = @_;
699              
700             # Each elt in @$column_data is a quad:
701             # [ $class_meta, $property_meta, $table_name, $object_num ]
702             # Keep only the properties with columns (mostly just to remove UR::Object::id
703 33         81 my @sql_cols = grep { $_->[1]->column_name }
  150         225  
704             @$old_sql_cols;
705              
706 33         186 my $template_data = $self->SUPER::_generate_loading_templates_arrayref(\@sql_cols);
707 33         100 return $template_data;
708             }
709              
710              
711              
712             sub _resolve_column_names_from_pathname {
713 116     116   209 my($self,$pathname,$fh) = @_;
714              
715 116 100       406 unless (exists($self->{'__column_names_from_pathname'}->{$pathname})) {
716 13 100       68 if (my $column_names_in_order = $self->columns) {
717 10         32 $self->{'__column_names_from_pathname'}->{$pathname} = $column_names_in_order;
718              
719             } else {
720 3         12 my $record_separator = $self->record_separator();
721 3         87 my $line = $fh->getline();
722 3         138 $line =~ s/$record_separator$//; # chomp, but for any value
723             # FIXME - to support record-oriented files, we need some replacement for this...
724 3         13 my $split_regex = $self->_regex();
725 3         15 my @headers = split($split_regex, $line);
726 3         10 $self->{'__column_names_from_pathname'}->{$pathname} = \@headers;
727             }
728             }
729 116         285 return $self->{'__column_names_from_pathname'}->{$pathname};
730             }
731              
732              
733             sub file_is_sorted_as_requested {
734 115     115 0 160 my($self, $query_plan) = @_;
735              
736 115   100     297 my $sorted_columns = $self->sorted_columns || [];
737              
738 115         347 my $order_by_columns = $query_plan->order_by_columns();
739 115         359 for (my $i = 0; $i < @$order_by_columns; $i++) {
740 126 100       318 next if ($order_by_columns->[$i] eq '$.'); # input line number is always sorted
741 121 100       290 next if ($order_by_columns->[$i] eq '__FILE__');
742              
743 116 100       308 return 0 if $i > $#$sorted_columns;
744 79 100       220 if ($sorted_columns->[$i] ne $order_by_columns->[$i]) {
745 68         194 return 0;
746             }
747             }
748 10         26 return 1;
749             }
750              
751              
752             # FIXME - this is a copy of parts of _generate_class_data_for_loading from UR::DS::RDBMS
753             sub _generate_class_data_for_loading {
754 169     169   192 my ($self, $class_meta) = @_;
755              
756 169         521 my $parent_class_data = $self->SUPER::_generate_class_data_for_loading($class_meta);
757              
758 169         485 my @class_hierarchy = ($class_meta->class_name,$class_meta->ancestry_class_names);
759 169         208 my $order_by_columns;
760 169         140 do {
761 169         172 my @id_column_names;
762 169         275 for my $inheritance_class_name (@class_hierarchy) {
763 169         436 my $inheritance_class_object = UR::Object::Type->get($inheritance_class_name);
764 169 50       499 unless ($inheritance_class_object->table_name) {
765 0         0 next;
766             }
767             @id_column_names =
768             #map {
769             # my $t = $inheritance_class_object->table_name;
770             # ($t) = ($t =~ /(\S+)\s*$/);
771             # $t . '.' . $_
772             #}
773 182         464 grep { defined }
774             map {
775 182         588 my $p = $inheritance_class_object->property_meta_for_name($_);
776 182 50       392 die ("No property $_ found for " . $inheritance_class_object->class_name . "?") unless $p;
777 182         374 $p->column_name;
778             }
779 182         444 map { $_->property_name }
780 169         619 grep { $_->column_name }
  182         587  
781             $inheritance_class_object->direct_id_property_metas;
782              
783 169 50       462 last if (@id_column_names);
784             }
785 169         253 $order_by_columns = \@id_column_names;
786             };
787              
788 169         227 my(@all_table_properties, @direct_table_properties, $first_table_name, $subclassify_by);
789 169         196 for my $co ( $class_meta, @{ $parent_class_data->{parent_class_objects} } ) {
  169         377  
790 360         782 my $table_name = $co->table_name;
791 360 100       732 next unless $table_name;
792              
793 169   33     550 $first_table_name ||= $co->table_name;
794             # $sub_classification_method_name ||= $co->sub_classification_method_name;
795             # $sub_classification_meta_class_name ||= $co->sub_classification_meta_class_name;
796 169   33     560 $subclassify_by ||= $co->subclassify_by;
797              
798 169     634   527 my $sort_sub = sub ($$) { return $_[0]->property_name cmp $_[1]->property_name };
  634         1008  
799             push @all_table_properties,
800 625         1105 map { [$co, $_, $table_name, 0 ] }
801             sort $sort_sub
802 169 100       543 grep { defined $_->column_name && $_->column_name ne '' }
  649         1089  
803             UR::Object::Property->get( class_name => $co->class_name );
804              
805 169 50       1215 @direct_table_properties = @all_table_properties if $class_meta eq $co;
806             }
807              
808              
809 169         1951 my $class_data = {
810             %$parent_class_data,
811              
812             order_by_columns => $order_by_columns,
813             direct_table_properties => \@direct_table_properties,
814             all_table_properties => \@all_table_properties,
815             };
816 169         854 return $class_data;
817             }
818              
819              
820             # Needed for the QueryPlan's processing of order-by params
821             # Params are a list of the 4-tuples [class-meta, prop-meta, table-name, object-num]
822             sub _select_clause_columns_for_table_property_data {
823 16     16   21 my $self = shift;
824              
825              
826 16         22 return [ map { $_->[1]->column_name } @_ ];
  16         31  
827             }
828              
829             # Used to populate the %value_extractor_for_column_name hash
830             # It should return a sub that, when given a row of data from the source,
831             # returns the proper data from that row.
832             #
833             # It's expected to return a sub that accepts ($self, $row, $fh, $filename)
834             # and return a reference to the right data. In most cases, it'll just pluck
835             # out the $column_idx'th element from $@row, but we're using it
836             # to attach special meaning to the $. token
837             sub _create_value_extractor_for_column_name {
838 617     617   592 my($self, $rule, $column_name, $column_idx) = @_;
839              
840 617 100       1139 if ($column_name eq '$.') {
    100          
841             return sub {
842 30     30   39 my($self, $row, $fh, $filename) = @_;
843 30         61 my $line_no = $fh->input_line_number();
844 30         329 return \$line_no;
845 115         388 };
846             } elsif ($column_name eq '__FILE__') {
847             return sub {
848 0     0   0 my($self,$row,$fh,$filename) = @_;
849 0         0 return \$filename;
850 115         410 };
851             } else {
852             return sub {
853 3129     3129   2471 my($self, $row, $fh, $filename) = @_;
854 3129         3750 return \$row->[$column_idx];
855 387         1550 };
856             }
857             }
858              
859              
860             sub create_iterator_closure_for_rule {
861 103     103 1 129 my($self,$rule) = @_;
862              
863 103         305 my $class_name = $rule->subject_class_name;
864 103         284 my $class_meta = $class_name->__meta__;
865 103         231 my $rule_template = $rule->template;
866              
867             # We're defering to the class metadata here because we don't yet know the
868             # pathnames of the files we'll be reading from. If the columns_from_header flag
869             # is set, then there's no way of knowing what the columns are until then
870 391         607 my @column_names = grep { defined }
871 103         448 map { $class_meta->column_for_property($_) }
  395         771  
872             $class_meta->all_property_names;
873              
874             # FIXME - leaning on the sorted_columns property here means:
875             # 1) It's useless when used where the path spec is a directory and
876             # classes have table_names, since each file is likely to have different
877             # columns
878             # 2) If we ultimately end up reading from more than one file, all the files
879             # must be sorted in the same way. It's possible the user has sorted each
880             # file differently, though in practice it would make for a lot of trouble
881 103         169 my %column_is_sorted_descending;
882 83 100       253 my @sorted_column_names = map { if (index($_, '-') == 0) {
883 38         77 my $col = $_;
884 38         91 substr($col, 0, 1, '');
885 38         106 $column_is_sorted_descending{$col} = 1;
886 38         107 $col;
887             } else {
888 45         112 $_;
889             }
890             }
891 103 100       184 @{ $self->sorted_columns || [] };
  103         349  
892 103         190 my %sorted_column_names = map { $_ => 1 } @sorted_column_names;
  83         212  
893 103         194 my @unsorted_column_names = grep { ! exists $sorted_column_names{$_} } @column_names;
  391         509  
894              
895 103         120 my @rule_column_names_in_order; # The order we should perform rule matches on - value is the name of the column in the file
896             my @comparison_for_column; # closures to call to perform the match - same order as @rule_column_names_in_order
897 0         0 my %rule_column_name_to_comparison_index;
898              
899 0         0 my(%property_for_column, %operator_for_column, %value_for_column); # These are used for logging
900              
901             my $resolve_comparator_for_column_name = sub {
902 391     391   347 my $column_name = shift;
903              
904 391         872 my $property_name = $class_meta->property_for_column($column_name);
905 391 100       849 return unless $rule->specifies_value_for($property_name);
906              
907 36   50     129 my $operator = $rule->operator_for($property_name)
908             || '=';
909 36         109 my $rule_value = $rule->value_for($property_name);
910              
911 36         65 $property_for_column{$column_name} = $property_name;
912 36         61 $operator_for_column{$column_name} = $operator;
913 36         49 $value_for_column{$column_name} = $rule_value;
914              
915 36         218 my $comp_function = $self->_comparator_for_operator_and_property(
916             $class_meta->property($property_name),
917             $operator,
918             $rule_value);
919              
920 36         66 push @rule_column_names_in_order, $column_name;
921 36         43 push @comparison_for_column, $comp_function;
922 36         64 $rule_column_name_to_comparison_index{$column_name} = $#comparison_for_column;
923 36         80 return 1;
924 103         524 };
925              
926 103         125 my $sorted_columns_in_rule_count; # How many columns we can consider when trying "the shortcut" for sorted data
927             my %column_is_used_in_sorted_capacity;
928 103         158 foreach my $column_name ( @sorted_column_names ) {
929 83 100 66     179 if (! $resolve_comparator_for_column_name->($column_name)
930             and ! defined($sorted_columns_in_rule_count)
931             ) {
932             # The first time we don't match a sorted column, record the index
933 76         135 $sorted_columns_in_rule_count = scalar(@rule_column_names_in_order);
934             } else {
935 7         19 $column_is_used_in_sorted_capacity{$column_name} = ' (sorted)';
936             }
937             }
938 103   100     329 $sorted_columns_in_rule_count ||= scalar(@rule_column_names_in_order);
939              
940 103         165 foreach my $column_name ( @unsorted_column_names ) {
941 308         423 $resolve_comparator_for_column_name->($column_name);
942             }
943              
944             # sort them by filename
945 103         400 my @possible_file_info_list = sort { $a->[0] cmp $b->[0] }
  18         31  
946             $self->resolve_file_info_for_rule_and_path_spec($rule);
947              
948 103         367 my $table_name = $class_meta->table_name;
949 103 100 66     500 if (defined($table_name) and $table_name ne '__default__') {
950             # Tack the final file name onto the end if the class has a table name
951 8         13 @possible_file_info_list = map { [ $_->[0] . "/$table_name", $_->[1] ] } @possible_file_info_list;
  8         43  
952             }
953              
954 103         356 my $handle_class = $self->handle_class;
955 103         194 my $use_quick_read = $handle_class eq 'IO::Handle';
956 103         330 my $split_regex = $self->_regex();
957 103         284 my $logger = $self->_logger('UR_DBI_MONITOR_SQL');
958 103         584 my $record_separator = $self->record_separator;
959              
960 103         334 my $monitor_start_time = Time::HiRes::time();
961              
962 9     9   48 { no warnings 'uninitialized';
  9         16  
  9         15964  
  103         97  
963             $logger->("\nFILE: starting query covering " . scalar(@possible_file_info_list)." files:\n\t"
964 115         623 . join("\n\t", map { $_->[0] } @possible_file_info_list )
965             . "\nFILTERS: "
966             . (scalar(@rule_column_names_in_order)
967             ? join("\n\t", map {
968 103 100       360 $_ . $column_is_used_in_sorted_capacity{$_}
969             . " $operator_for_column{$_} "
970             . (ref($value_for_column{$_}) eq 'ARRAY'
971 2         15 ? '[' . join(',',@{$value_for_column{$_}}) .']'
972 36 100       290 : $value_for_column{$_} )
973             }
974             @rule_column_names_in_order)
975             : '*none*')
976             . "\n\n"
977             );
978             }
979              
980 103         392 my $query_plan = $self->_resolve_query_plan($rule_template);
981 103 50       124 if (@{ $query_plan->{'loading_templates'} } > 1) {
  103         309  
982 0         0 Carp::croak(__PACKAGE__ . " does not support joins. The rule was $rule");
983             }
984 103         191 my $loading_template = $query_plan->{loading_templates}->[0];
985 103         123 my @property_names_in_loading_template_order = @{ $loading_template->{'property_names'} };
  103         301  
986 103         170 my @column_names_in_loading_template_order = map { $class_meta->column_for_property($_) }
  391         702  
987             @property_names_in_loading_template_order;
988              
989 103         161 my %property_name_to_resultset_index_map;
990             my %column_name_to_resultset_index_map;
991 103         314 for (my $i = 0; $i < @property_names_in_loading_template_order; $i++) {
992 391         336 my $property_name = $property_names_in_loading_template_order[$i];
993 391         411 $property_name_to_resultset_index_map{$property_name} = $i;
994 391         577 $column_name_to_resultset_index_map{$class_meta->column_for_property($property_name)} = $i;
995             }
996              
997 103         136 my @iterator_for_each_file;
998 103         200 foreach ( @possible_file_info_list ) {
999 115         192 my $pathname = $_->[0];
1000 115         125 my $property_values_from_path_spec = $_->[1];
1001              
1002 115         293 my @properties_from_path_spec = keys %$property_values_from_path_spec;
1003 115         202 my @values_from_path_spec = values %$property_values_from_path_spec;
1004              
1005 115         286 my $pid = $$; # For tracking whether there's been a fork()
1006 115         852 my $fh = $handle_class->new($pathname);
1007 115 50       12916 unless ($fh) {
1008 0         0 $logger->("FILE: Skipping $pathname because it did not open: $!\n");
1009 0         0 next; # missing or unopenable files is not fatal
1010             }
1011              
1012 115         485 my $column_names_in_order = $self->_resolve_column_names_from_pathname($pathname,$fh);
1013             # %value_for_column_name holds subs that return the value for that column. For values
1014             # determined from the path resolver, save that value here. Most other values get plucked out
1015             # of the line read from the file. The remaining values are special tokens like $. and __FILE__.
1016             # These subs are used both for testing whether values read from the data source pass the rule
1017             # and for constructing the resultset passed up to the Context
1018 115         166 my %value_for_column_name;
1019             my %column_name_to_index_map;
1020 115         193 my $ordered_column_names_count = scalar(@$column_names_in_order);
1021 115         407 for (my $i = 0; $i < $ordered_column_names_count; $i++) {
1022 388         434 my $column_name = $column_names_in_order->[$i];
1023 388 100       733 next unless (defined $column_name);
1024 387         467 $column_name_to_index_map{$column_name} = $i;
1025 387         670 $value_for_column_name{$column_name}
1026             = $self->_create_value_extractor_for_column_name($rule, $column_name, $i);
1027             }
1028 115         244 foreach ( '$.', '__FILE__' ) {
1029 230         364 $value_for_column_name{$_} = $self->_create_value_extractor_for_column_name($rule, $_, undef);
1030 230         377 $column_name_to_index_map{$_} = undef;
1031             }
1032 115         451 while (my($prop, $value) = each %$property_values_from_path_spec) {
1033 35         103 my $column = $class_meta->column_for_property($prop);
1034 35     62   111 $value_for_column_name{$column} = sub { return \$value };
  62         86  
1035 35         117 $column_name_to_index_map{$column} = undef;
1036             }
1037              
1038             # Convert the column_name keys here to indexes into the comparison list
1039             my %column_for_this_comparison_is_sorted_descending =
1040 1         5 map { $rule_column_name_to_comparison_index{$_} => $column_is_sorted_descending{$_} }
1041 115         334 grep { exists $rule_column_name_to_comparison_index{$_} }
  38         153  
1042             keys %column_is_sorted_descending;
1043              
1044             # rule properties that aren't actually columns in the file should be
1045             # satisfied by the path resolution already, so we can strip them out of the
1046             # list of columns to test
1047 48         95 my @rule_columns_in_order = map { $column_name_to_index_map{$_} }
1048 115         233 grep { exists $column_name_to_index_map{$_} }
  48         122  
1049             @rule_column_names_in_order;
1050             # And also strip out any items in @comparison_for_column for non-column data
1051 48         98 my @comparison_for_column_this_file = map { $comparison_for_column[ $rule_column_name_to_comparison_index{$_} ] }
1052 115         196 grep { exists $column_name_to_index_map{$_} }
  48         74  
1053             @rule_column_names_in_order;
1054              
1055             # Burn through the requsite number of header lines
1056 115         471 my $lines_read = $fh->input_line_number;
1057 115         2536 my $throwaway_line_count = $self->header_lines;
1058 115         320 while($throwaway_line_count > $lines_read) {
1059 19         248 $lines_read++;
1060 19         351 scalar($fh->getline());
1061             }
1062              
1063 115         343 my $lines_matched = 0;
1064              
1065 115         126 my $log_first_fetch;
1066             $log_first_fetch = sub {
1067 115     115   2307 $logger->(sprintf("FILE: $pathname FIRST FETCH TIME: %.4f s\n\n", Time::HiRes::time() - $monitor_start_time));
1068 115         532 $log_first_fetch = \&UR::Util::null_sub;
1069 115         411 };
1070 115         137 my $log_first_match;
1071             $log_first_match = sub {
1072 102     102   452 $logger->("FILE: $pathname First match after reading $lines_read lines\n\n");
1073 102         409 $log_first_match = \&UR::Util::null_sub;
1074 115         378 };
1075              
1076              
1077 115         133 my $next_record;
1078              
1079             # This sub reads the next record (line) from the file, splits the line into
1080             # columns and puts the data into @$next_record
1081 115         877 my $record_separator_re = qr($record_separator$);
1082             my $read_record_from_file = sub {
1083              
1084             # Make sure some wise guy hasn't changed this out from under us
1085 983     983   2044 local $/ = $record_separator;
1086              
1087 983 50       1700 if ($pid != $$) {
1088             # There's been a fork() between the original opening and now
1089             # This filehandle is no longer valid to read from, but tell()
1090             # should still report the right position
1091 0         0 my $pos = $fh->tell();
1092 0         0 $logger->("FILE: reopening file $pathname and seeking to position $pos after fork()\n");
1093 0         0 my $fh = $handle_class->new($pathname);
1094 0 0       0 unless ($fh) {
1095 0         0 $logger->("FILE: Reopening $pathname after fork() failed: $!\n");
1096 0         0 return; # behave if we're at EOF
1097             }
1098 0         0 $fh->seek($pos, 0); # fast-forward to the old position
1099 0         0 $pid = $$;
1100             }
1101              
1102 983         674 my $line;
1103             READ_LINE_FROM_FILE:
1104 983         1335 while(! defined($line)) {
1105             # Hack for OSX 10.5.
1106             # At EOF, the getline below will return undef. Most builds of Perl
1107             # will also set $! to 0 at EOF so you can distinguish between the cases
1108             # of EOF (which may have actually happened a while ago because of buffering)
1109             # and an actual read error. OSX 10.5's Perl does not, and so $!
1110             # retains whatever value it had after the last failed syscall, likely
1111             # a stat() while looking for a Perl module. This should have no effect
1112             # other platforms where you can't trust $! at arbitrary points in time
1113             # anyway
1114 983         1055 $! = 0;
1115 983 50       16118 $line = $use_quick_read ? <$fh> : $fh->getline();
1116              
1117 983 100 100     23858 if ($line and $line !~ $record_separator_re) {
1118             # Was a short read - probably at EOF
1119             # If the record_separator is a multi-char string, and the last
1120             # characters of $line are the first characters of the
1121             # record_separator, it's likely (though not certain) that the right
1122             # Thing to do is to remove the partial record separator.
1123 1         5 for (my $keep_chars = length($record_separator); $keep_chars > 0; $keep_chars--) {
1124 4         5 my $match_rs = substr($record_separator, 0, $keep_chars);
1125 4 100       36 if ($line =~ m/$match_rs$/) {
1126 1         3 substr($line, 0 - $keep_chars) = '';
1127 1         2 last;
1128             }
1129             }
1130             }
1131              
1132 983 100       1995 unless (defined $line) {
1133 109 50 33     358 if ($! && ! $fh->eof()) {
1134 0 0 0     0 redo READ_LINE_FROM_FILE if ($! == EAGAIN or $! == EINTR);
1135 0         0 Carp::croak("read failed for file $pathname: $!");
1136             }
1137              
1138             # at EOF. Close up shop and remove this fh from the list
1139             #flock($fh,LOCK_UN);
1140 109         140 $fh = undef;
1141 109         3421 $next_record = undef;
1142              
1143 109         1725 $logger->("FILE: $pathname at EOF\n"
1144             . "FILE: $lines_read lines read for this request. $lines_matched matches in this file\n"
1145             . sprintf("FILE: TOTAL EXECUTE-FETCH TIME: %.4f s\n\n", Time::HiRes::time() - $monitor_start_time)
1146             );
1147 109         294 return;
1148             }
1149             }
1150 874         610 $lines_read++;
1151              
1152 874         2407 $line =~ s/$record_separator$//; # chomp, but for any value
1153             # FIXME - to support record-oriented files, we need some replacement for this...
1154 874         3754 $next_record = [ split($split_regex, $line, $ordered_column_names_count) ];
1155 115         605 };
1156              
1157 115         150 my $number_of_comparisons = @comparison_for_column_this_file;
1158              
1159             # The file filter iterator.
1160             # This sub looks at @$next_record and applies the comparator functions in order.
1161             # If it passes all of them, it constructs a resultset row and passes it up to the
1162             # multiplexer iterator
1163             my $file_filter_iterator = sub {
1164 886     886   1188 $log_first_fetch->();
1165              
1166             FOR_EACH_LINE:
1167 886         971 for(1) {
1168 983         1056 $read_record_from_file->();
1169              
1170 983 100       1845 unless ($next_record) {
1171             # Done reading from this file
1172 109         314 return;
1173             }
1174              
1175 874         1567 for (my $i = 0; $i < $number_of_comparisons; $i++) {
1176             my $comparison = $comparison_for_column_this_file[$i]->(
1177 172         339 $value_for_column_name{ $rule_column_names_in_order[$i] }->($self, $next_record, $fh, $pathname)
1178             );
1179              
1180 172 100 100     1024 if ( ( ($column_for_this_comparison_is_sorted_descending{$i} and $comparison < 0) or $comparison > 0)
    100 100        
1181             and $i < $sorted_columns_in_rule_count
1182             ) {
1183             # We've gone past the last thing that could possibly match
1184 6         79 $logger->("FILE: $pathname $lines_read lines read for this request. $lines_matched matches\n"
1185             . sprintf("FILE: TOTAL EXECUTE-FETCH TIME: %.4f s\n", Time::HiRes::time() - $monitor_start_time));
1186              
1187             #flock($fh,LOCK_UN);
1188 6         18 return;
1189              
1190             } elsif ($comparison) {
1191             # comparison didn't match, read another line from the file
1192 97         116 redo FOR_EACH_LINE;
1193             }
1194              
1195             # That comparison worked... stay in the for() loop for other comparisons
1196             }
1197             }
1198             # All the comparisons return '0', meaning they passed
1199              
1200 771         1240 $log_first_match->();
1201 771         552 $lines_matched++;
1202 3049 50       4373 my @resultset = map { ref($_) ? $$_ : $_ }
1203 771         842 map { ref($value_for_column_name{$_})
1204             ? $value_for_column_name{$_}->($self, $next_record, $fh, $pathname)
1205 3049 50       4800 : $value_for_column_name{$_} # constant value from path spec
1206             }
1207             @column_names_in_loading_template_order;
1208 771         1671 return \@resultset;
1209 115         612 };
1210              
1211             # Higher layers in the loading logic require rows from the data source to be returned
1212             # in ID order. If the file contents is not sorted primarily by ID, then we need to do
1213             # the less efficient thing by first reading in all the matching rows in one go, sorting
1214             # them by ID, then iterating over the results
1215 115 100       328 unless ($self->file_is_sorted_as_requested($query_plan)) {
1216 153         308 my @resultset_indexes_to_sort = map { $column_name_to_resultset_index_map{$_} }
1217 105         140 @{ $query_plan->order_by_columns() };
  105         216  
1218 105         374 $file_filter_iterator
1219             = $self->_create_iterator_for_custom_sorted_columns($file_filter_iterator, $query_plan, \%column_name_to_resultset_index_map);
1220             }
1221              
1222 115         3291 push @iterator_for_each_file, $file_filter_iterator;
1223             }
1224              
1225 103 50       603 if (! @iterator_for_each_file) {
    100          
1226 0         0 return \&UR::Util::null_sub; # No matching files
1227             } elsif (@iterator_for_each_file == 1) {
1228 97         1435 return $iterator_for_each_file[0]; # If there's only 1 file, no need to multiplex
1229             }
1230              
1231 6         10 my @next_record_for_each_file; # in the same order as @iterator_for_each_file
1232              
1233 6         20 my %column_is_numeric = map { $_->column_name => $_->is_numeric }
1234 6         16 map { $class_meta->property_meta_for_name($_) }
1235 6         21 map { $class_meta->property_for_column($_) }
1236 6 50       26 map { index($_, '-') == 0 ? substr($_, 1) : $_ }
1237 6         8 @{ $query_plan->order_by_columns };
  6         19  
1238              
1239             my @resultset_index_sort_sub
1240 6         17 = map { &_resolve_sorter_for( is_numeric => $column_is_numeric{$_},
1241             is_descending => $column_is_sorted_descending{$_},
1242 0         0 column_index => $property_name_to_resultset_index_map{$_});
1243             }
1244             @sorted_column_names;
1245              
1246 6         15 my %resultset_idx_is_sorted_descending = map { $column_name_to_resultset_index_map{$_} => 1 }
  0         0  
1247             keys %column_is_sorted_descending;
1248             my $resultset_sorter = sub {
1249 2     2   5 my($idx_a,$idx_b) = shift;
1250              
1251 2         4 foreach my $sort_sub ( @resultset_index_sort_sub ) {
1252 0         0 my $cmp = $sort_sub->($next_record_for_each_file[$idx_a], $next_record_for_each_file[$idx_b]);
1253 0 0       0 return $cmp if $cmp; # done if they're not equal
1254             }
1255 2         4 return 0;
1256 6         26 };
1257              
1258             # This is the iterator returned to the Context, and knows about all the individual
1259             # file filter iterators. It compares the next resultset from each of them and
1260             # returns the next resultset to the Context
1261             my $multiplex_iterator = sub {
1262 12 50   12   27 return unless @iterator_for_each_file; # if they're all run out
1263              
1264 12         10 my $lowest_slot;
1265 12         31 for(my $i = 0; $i < @iterator_for_each_file; $i++) {
1266 26 100       41 unless(defined $next_record_for_each_file[$i]) {
1267 24         43 $next_record_for_each_file[$i] = $iterator_for_each_file[$i]->();
1268 24 100       36 unless (defined $next_record_for_each_file[$i]) {
1269             # That iterator is exhausted, splice it out
1270 18         23 splice(@iterator_for_each_file, $i, 1);
1271 18         25 splice(@next_record_for_each_file, $i, 1);
1272 18 100       38 return unless (@iterator_for_each_file); # This can happen here if none of the files have matching data
1273 12         16 redo;
1274             }
1275             }
1276              
1277 8 100       17 unless (defined $lowest_slot) {
1278 6         9 $lowest_slot = $i;
1279 6         10 next;
1280             }
1281              
1282 2         5 my $cmp = $resultset_sorter->($lowest_slot, $i);
1283 2 50       6 if ($cmp > 0) {
1284 0         0 $lowest_slot = $i;
1285             }
1286             }
1287              
1288 6         10 my $retval = $next_record_for_each_file[$lowest_slot];
1289 6         8 $next_record_for_each_file[$lowest_slot] = undef;
1290 6         11 return $retval;
1291 6         18 };
1292              
1293 6         112 return $multiplex_iterator;
1294             }
1295              
1296              
1297             # Constructors for subs to sort appropriately
1298             sub _resolve_sorter_for {
1299 143     143   316 my %params = @_;
1300              
1301 143         193 my $col_idx = $params{'column_index'};
1302              
1303             my $is_descending = (exists($params{'is_descending'}) && $params{'is_descending'})
1304             ||
1305 143   66     700 (exists($params{'is_ascending'}) && $params{'is_ascending'});
1306             my $is_numeric = (exists($params{'is_numeric'}) && $params{'is_numeric'})
1307             ||
1308 143   66     537 (exists($params{'is_string'}) && $params{'is_string'});
1309 143 100       238 if ($is_descending) {
1310 31 100       76 if ($is_numeric) {
1311 15     302   113 return sub($$) { $_[1]->[$col_idx] <=> $_[0]->[$col_idx] };
  302         448  
1312             } else {
1313 16     341   106 return sub($$) { $_[1]->[$col_idx] cmp $_[0]->[$col_idx] };
  341         446  
1314             }
1315             } else {
1316 112 100       171 if ($is_numeric) {
1317 95     546   554 return sub($$) { $_[0]->[$col_idx] <=> $_[1]->[$col_idx] };
  546         668  
1318             } else {
1319 17     355   108 return sub($$) { $_[0]->[$col_idx] cmp $_[1]->[$col_idx] };
  355         483  
1320             }
1321             }
1322             }
1323              
1324             # Higher layers in the loading logic require rows from the data source to be returned
1325             # in ID order. If the file contents is not sorted primarily by ID, then we need to do
1326             # the less efficient thing by first reading in all the matching rows in one go, sorting
1327             # them by ID, then iterating over the results
1328             sub _create_iterator_for_custom_sorted_columns {
1329 105     105   155 my($self, $iterator_this_file, $query_plan, $column_name_to_resultset_index_map) = @_;
1330              
1331 105         103 my @matching;
1332 105         210 while (my $row = $iterator_this_file->()) {
1333 726         1072 push @matching, $row; # save matches as [id, rowref]
1334             }
1335              
1336 105 100       334 unless (@matching) {
1337 12         45 return \&UR::Util::null_sub; # Easy, no matches
1338             }
1339              
1340 93         484 my $class_meta = $query_plan->class_name->__meta__;
1341 141         429 my %column_is_numeric = map { $_->column_name => $_->is_numeric }
1342 141         334 map { $class_meta->property_meta_for_name($_) }
1343 141         498 map { $class_meta->property_for_column($_) }
1344 141 100       773 map { index($_, '-') == 0 ? substr($_,1) : $_ }
1345 93         254 @{ $query_plan->order_by_columns };
  93         291  
1346              
1347 93         195 my @sorters;
1348 9     9   49 { no warnings 'numeric';
  9         14  
  9         313  
  93         121  
1349 9     9   36 no warnings 'uninitialized';
  9         12  
  9         10588  
1350 141         483 @sorters = map { &_resolve_sorter_for(%$_) }
1351 141         162 map { my $col_name = $_;
1352 141         155 my $descending = 0;
1353 141 100       335 if (index($col_name, '-') == 0) {
1354 31         51 $descending = 1;
1355 31         69 substr($col_name, 0, 1, ''); # remove the -
1356             }
1357 141         189 my $col_idx = $column_name_to_resultset_index_map->{$col_name};
1358 141         586 { column_index => $col_idx, is_descending => $descending, is_numeric => $column_is_numeric{$col_name} };
1359             }
1360 93         147 @{ $query_plan->order_by_columns };
  93         346  
1361             }
1362              
1363 93         224 my $sort_by_order_by_columns;
1364 93 100       205 if (@sorters == 1) {
1365 45         74 $sort_by_order_by_columns = $sorters[0];
1366             } else {
1367             $sort_by_order_by_columns
1368             = sub($$) {
1369 1004     1004   811 foreach (@sorters) {
1370 1084 100       939 if (my $rv = $_->(@_)) {
1371 1004         793 return $rv;
1372             }
1373             }
1374 0         0 return 0;
1375 48         167 };
1376             }
1377 93         288 @matching = sort $sort_by_order_by_columns
1378             @matching;
1379              
1380             return sub {
1381 818     818   1172 return shift @matching;
1382 93         729 };
1383             }
1384              
1385              
1386             sub initializer_should_create_column_name_for_class_properties {
1387 66     66 0 952 1;
1388             }
1389              
1390              
1391             # The string used to join fields of a row together when writing
1392             #
1393             # Since the 'delimiter' property is interpreted as a regex in the reading
1394             # code, we'll try to be smart about making a real string from that.
1395             #
1396             # subclasses can override this to provide a different implementation
1397             sub column_join_string {
1398 1     1 0 1 my $self = shift;
1399              
1400 1         3 my $join_pattern = $self->delimiter;
1401              
1402             # make some common substitutions...
1403 1 50       4 if ($join_pattern eq '\s*,\s*') {
1404             # The default...
1405 0         0 return ', ';
1406             }
1407              
1408 1         3 $join_pattern =~ s/\\s*//g; # Turn 0-or-more whitespaces to nothing
1409 1         1 $join_pattern =~ s/\\t/\t/; # tab
1410 1         2 $join_pattern =~ s/\\s/ /; # whitespace
1411              
1412 1         2 return $join_pattern;
1413             }
1414              
1415              
1416             sub _sync_database {
1417 1     1   3 my $self = shift;
1418 1         2 my %params = @_;
1419              
1420 1 50       3 unless (ref($self)) {
1421 0 0       0 if ($self->isa("UR::Singleton")) {
1422 0         0 $self = $self->_singleton_object;
1423             }
1424             else {
1425 0         0 Carp::croak("Cannot call _sync_database as a class method on a non-singleton class");
1426             }
1427             }
1428              
1429 1         3 $DB::single=1;
1430 1         2 my $changed_objects = delete $params{'changed_objects'};
1431              
1432 1         7 my $path_spec = $self->path;
1433              
1434             # First, bin up the changed objects by their class' table_name
1435 1         2 my %objects_for_path;
1436 1         2 foreach my $obj ( @$changed_objects ) {
1437 7         10 my @path = $self->resolve_file_info_for_rule_and_path_spec($obj, $path_spec);
1438 7 50       16 if (!@path) {
    50          
1439 0         0 $self->error_message("Couldn't resolve destination file for object "
1440             .$obj->class." ID ".$obj->id.": ".Data::Dumper::Dumper($obj));
1441 0         0 return;
1442             } elsif (@path > 1) {
1443 0         0 $self->error_message("Got multiple filenames when resolving destination file for object "
1444             . $obj->class." ID ".$obj->id.": ".join(', ', @path));
1445             }
1446 7   100     15 $objects_for_path{ $path[0]->[0] } ||= [];
1447 7         5 push @{ $objects_for_path{ $path[0]->[0] } }, $obj;
  7         11  
1448             }
1449              
1450 1         2 my %objects_for_pathname;
1451 1         3 foreach my $path ( keys %objects_for_path ) {
1452 1         2 foreach my $obj ( @{ $objects_for_path{$path} } ) {
  1         2  
1453 7         12 my $class_meta = $obj->__meta__;
1454 7         12 my $table_name = $class_meta->table_name;
1455 7         6 my $pathname = $path;
1456 7 50 33     20 if (defined($table_name) and $table_name ne '__default__') {
1457 0         0 $pathname .= '/' . $table_name;
1458             }
1459 7   100     13 $objects_for_pathname{$pathname} ||= [];
1460 7         4 push @{ $objects_for_pathname{$pathname} }, $obj;
  7         8  
1461             }
1462             }
1463              
1464 1         2 my %column_is_sorted_descending;
1465 2 50       4 my @sorted_column_names = map { if (index($_, '-') == 0) {
1466 0         0 my $s = $_;
1467 0         0 substr($s, 0, 1, '');
1468 0         0 $column_is_sorted_descending{$s} = $s;
1469             } else {
1470 2         3 $_;
1471             }
1472             }
1473 1 50       1 @{ $self->sorted_columns() || [] };
  1         4  
1474              
1475 1         7 my $handle_class = $self->handle_class;
1476 1         19 my $use_quick_read = $handle_class->isa('IO::Handle');
1477              
1478 1         197 my $join_string = $self->column_join_string;
1479 1         3 my $record_separator = $self->record_separator;
1480 1         5 my $split_regex = $self->_regex();
1481 1         4 local $/; # Make sure some wise guy hasn't changed this out from under us
1482 1         2 $/ = $record_separator;
1483              
1484 1         4 my $logger = $self->_logger('UR_DBI_MONITOR_SQL');
1485 1         5 my $total_save_time = Time::HiRes::time();
1486 1         9 $logger->("FILE: Saving changes to ".scalar(keys %objects_for_pathname) . " files:\n\t"
1487             . join("\n\t", keys(%objects_for_pathname)) . "\n\n");
1488              
1489 1         2 foreach my $pathname ( keys %objects_for_pathname ) {
1490 1         2 my $use_quick_rename;
1491 1         48 my $containing_directory = File::Basename::dirname($pathname);
1492 1 50       38 unless (-d $containing_directory) {
1493 0         0 File::Path::mkpath($containing_directory);
1494             }
1495 1 50       7 if (-w $containing_directory) {
    0          
1496 1         2 $use_quick_rename = 1;
1497             } elsif (! -w $pathname) {
1498 0         0 Carp::croak("Cannot save to file $pathname: Neither the directory nor the file are writable");
1499             }
1500              
1501 1         6 my $read_fh = $handle_class->new($pathname);
1502              
1503             # Objects going to the same file should all be of a common class
1504 1         94 my $class_meta = $objects_for_pathname{$pathname}->[0]->__meta__;
1505              
1506 1         2 my @property_names_that_are_sorted = map { $class_meta->property_for_column($_) }
  2         13  
1507             @sorted_column_names;
1508             # Returns true of the passed-in object has a change in one of the sorted columns
1509             my $object_has_changed_sorted_column = sub {
1510 2     2   3 my $obj = shift;
1511 2         3 foreach my $prop ( @property_names_that_are_sorted ) {
1512 3 100       9 if (UR::Context->_get_committed_property_value($obj, $prop) ne $obj->$prop) {
1513 1         3 return 1;
1514             }
1515             }
1516 1         3 return 0;
1517 1         5 };
1518              
1519 1         9 my $column_names_in_file = $self->_resolve_column_names_from_pathname($pathname, $read_fh);
1520 1         2 my $column_names_count = @$column_names_in_file;
1521 1         1 my %column_name_to_index;
1522 1         3 for (my $i = 0; $i < @$column_names_in_file; $i++) {
1523 3         8 $column_name_to_index{$column_names_in_file->[$i]} = $i;
1524             }
1525             # This lets us take a hash slice of the object and get a row for the file
1526 1         3 my @property_names_in_column_order = map { $class_meta->property_for_column($_) }
  3         4  
1527             @$column_names_in_file;
1528              
1529 3         9 my %column_name_is_numeric = map { $_->column_name => $_->is_numeric }
1530 3         9 map { $class_meta->property_meta_for_name($_) }
1531 1         3 map { $class_meta->property_for_column($_) }
  3         4  
1532             @$column_names_in_file;
1533              
1534 1         3 my $insert = [];
1535 1         3 my $update = {};
1536 1         1 my $delete = {};
1537 1         2 foreach my $obj ( @{ $objects_for_pathname{$pathname} } ) {
  1         4  
1538 7 100       22 if ($obj->isa('UR::Object::Ghost')) {
    100          
1539             # This should be removed from the file
1540 2         3 my $original = $obj->{'db_committed'};
1541 2         3 my $line = join($join_string, @{$original}{@property_names_in_column_order}) . $record_separator;
  2         4  
1542 2         4 $delete->{$line} = $obj;
1543              
1544             } elsif ($obj->{'db_committed'}) {
1545             # this is a changed object
1546 2         3 my $original = $obj->{'db_committed'};
1547              
1548 2 100       3 if ($object_has_changed_sorted_column->($obj)) {
1549             # One of hte sorted columns has changed. Model this as a delete and insert
1550 1         2 push @$insert, [ @{$obj}{@property_names_in_column_order} ];
  1         2  
1551 1         2 my $line = join($join_string, @{$original}{@property_names_in_column_order}) . $record_separator;
  1         3  
1552 1         3 $delete->{$line} = $obj;
1553             } else {
1554             # This object is changed since it was read in the file
1555 1         1 my $original_line = join($join_string, @{$original}{@property_names_in_column_order}) . $record_separator;
  1         4  
1556 1         2 my $changed_line = join($join_string, @{$obj}{@property_names_in_column_order}) . $record_separator;
  1         3  
1557 1         2 $update->{$original_line} = $changed_line;
1558             }
1559              
1560             } else {
1561             # This object is new and should be added to the file
1562 3         4 push @$insert, [ @{$obj}{@property_names_in_column_order} ];
  3         6  
1563             }
1564             }
1565              
1566 1         2 my %column_is_sorted_descending;
1567 2 50       4 my @sorted_column_names = map { if (index($_, '-') == 0) {
1568 0         0 my $s = $_;
1569 0         0 substr($s, 0, 1, '');
1570 0         0 $column_is_sorted_descending{$s} = $s;
1571             } else {
1572 2         4 $_;
1573             }
1574             }
1575 1 50       2 @{ $self->sorted_columns() || [] };
  1         3  
1576              
1577 1         2 my $row_sort_sub;
1578 1 50       2 if (@sorted_column_names) {
1579 1         2 my @comparison_subs = map { &_resolve_sorter_for(is_numeric => $column_name_is_numeric{$_},
1580             is_descending => $column_is_sorted_descending{$_},
1581 2         8 column_index => $column_name_to_index{$_})
1582             }
1583             @sorted_column_names;
1584              
1585             $row_sort_sub = sub ($$) {
1586 15     15   15 foreach my $comparator ( @comparison_subs ) {
1587 16         20 my $cmp = $comparator->($_[0], $_[1]);
1588 16 100       27 return $cmp if $cmp;
1589             }
1590 0         0 return 0;
1591 1         3 };
1592              
1593             # Put the rows-to-insert in sorted order
1594 1         4 my @insert_sorted = sort $row_sort_sub @$insert;
1595 1         2 $insert = \@insert_sorted;
1596             }
1597              
1598 1 50       11 my $write_fh = $use_quick_rename
1599             ? File::Temp->new(DIR => $containing_directory)
1600             : File::Temp->new();
1601 1 50       434 unless ($write_fh) {
1602 0         0 Carp::croak("Can't save changes for $pathname: Can't create temporary file for writing: $!");
1603             }
1604            
1605 1         10 my $monitor_start_rime = Time::HiRes::time();
1606 1         1 my $time = time();
1607 1         62 $logger->(sprintf("\nFILE: SYNC DATABASE AT %s [%s]. Started transaction for %s to temp file %s\n",
1608             $time, scalar(localtime($time)), $pathname, $write_fh->filename));
1609              
1610             # Write headers to the new file
1611 1         4 for (my $i = 0; $i < $self->header_lines; $i++) {
1612 0 0       0 my $line = $use_quick_read ? <$read_fh> : $read_fh->getline();
1613 0         0 $write_fh->print($line);
1614             }
1615              
1616 1         2 my $line;
1617             READ_A_LINE:
1618 1         2 while(1) {
1619 11 100       14 unless ($line) {
1620 8 50       34 $line = $use_quick_read ? <$read_fh> : $read_fh->getline();
1621 8 100       13 last unless defined $line;
1622             }
1623              
1624 10 50 50     24 if (@sorted_column_names and scalar(@$insert)) {
1625             # There are sorted things waiting to insert
1626 10         7 my $chomped = $line;
1627 10         45 $chomped =~ s/$record_separator$//; # chomp, but for any value
1628 10         26 my $row = [ split($split_regex, $chomped, $column_names_count) ];
1629 10         15 my $cmp = $row_sort_sub->($row, $insert->[0]);
1630 10 100       16 if ($cmp > 0) {
1631             # write the object's data
1632 9     9   51 no warnings 'uninitialized'; # Some of the object's data may be undef
  9         15  
  9         1608  
1633 3         5 my $new_row = shift @$insert;
1634 3         4 my $new_line = join($join_string, @$new_row) . $record_separator;
1635              
1636 3         8 $logger->("FILE: INSERT >>$new_line<<\n");
1637              
1638 3         5 $write_fh->print($new_line);
1639             # Don't undef the last line read, meaning it could still be written to the output...
1640 3         12 next READ_A_LINE;
1641             }
1642             }
1643              
1644 7 100       17 if (my $obj = delete $delete->{$line}) {
    100          
1645 2         5 $logger->("FILE: DELETE >>$line<<\n");
1646              
1647             } elsif (my $changed = delete $update->{$line}) {
1648 1         9 $logger->("FILE: UPDFATE replace >>$line<< with >>$changed<<\n");
1649 1         5 $write_fh->print($changed);
1650              
1651             } else {
1652             # This line form the file was unchanged in the app
1653 4         7 $write_fh->print($line);
1654             }
1655 7         25 $line = undef;
1656             }
1657              
1658 1 50       4 if (keys %$delete) {
1659 0         0 $self->warning_message("There were " . scalar( keys %$delete)
1660             . " deleted " . $class_meta->class_name
1661             . " objects that did not match data in file $pathname");
1662             }
1663 1 50       5 if (keys %$update) {
1664 0         0 $self->warning_message("There were " . scalar( keys %$delete)
1665             . " updated " . $class_meta->class_name
1666             . " objects that did not match data in file $pathname");
1667             }
1668              
1669             # finish out by writing the rest of the new data
1670 1         2 foreach my $new_row ( @$insert ) {
1671 9     9   38 no warnings 'uninitialized'; # Some of the object's data may be undef
  9         12  
  9         5830  
1672 1         2 my $new_line = join($join_string, @$new_row) . $record_separator;
1673 1         4 $logger->("FILE: INSERT >>$new_line<<\n");
1674 1         2 $write_fh->print($new_line);
1675             }
1676              
1677 1         5 my $changed_objects = $objects_for_pathname{$pathname};
1678 1 50       9 unless ($self->_set_specified_objects_saved_uncommitted( $changed_objects )) {
1679 0         0 Carp::croak("Error setting objects to a saved state after syncing");
1680             }
1681             # These closures will keep $write_fh in scope and delay their removal until
1682             # commit() or rollback(). Call these with no args to commit, and one arg (doesn't
1683             # matter what) to roll back
1684             my $commit = $use_quick_rename
1685             ? sub {
1686 1 50   1   2 if (@_) {
1687 0         0 $self->_set_specified_objects_saved_rolled_back($changed_objects);
1688             } else {
1689 1         3 my $temp_filename = $write_fh->filename;
1690 1         9 $logger->("FILE: COMMIT rename $temp_filename => $pathname\n");
1691 1 50       61 unless (rename($temp_filename, $pathname)) {
1692 0         0 $self->error_message("Can't rename $temp_filename to $pathname: $!");
1693 0         0 return;
1694             }
1695 1         9 $self->_set_specified_objects_saved_committed($changed_objects);
1696             }
1697 1         3 return 1;
1698             }
1699             :
1700             sub {
1701 0 0   0   0 if (@_) {
1702 0         0 $self->_set_specified_objects_saved_rolled_back($changed_objects);
1703             } else {
1704 0         0 my $temp_filename = $write_fh->filename;
1705 0         0 $logger->("FILE: COMMIT copy " . $temp_filename . " => $pathname\n");
1706 0         0 my $read_fh = IO::File->new($temp_filename);
1707 0 0       0 unless ($read_fh) {
1708 0         0 $self->error_message("Can't open file $temp_filename for reading: $!");
1709 0         0 return;
1710             }
1711 0         0 my $copy_fh = IO::File->new($pathname, 'w');
1712 0 0       0 unless ($copy_fh) {
1713 0         0 $self->error_message("Can't open file $pathname for writing: $!");
1714 0         0 return;
1715             }
1716              
1717 0         0 while(<$read_fh>) {
1718 0         0 $copy_fh->print($_);
1719             }
1720 0         0 $copy_fh->close();
1721 0         0 $read_fh->close();
1722 0         0 $self->_set_specified_objects_saved_committed($changed_objects);
1723             }
1724 0         0 return 1;
1725 1 50       7 };
1726              
1727 1         10 $write_fh->close();
1728              
1729 1   50     88 $self->{'__saved_uncommitted'} ||= [];
1730 1         1 push @{ $self->{'__saved_uncommitted'} }, $commit;
  1         4  
1731              
1732 1         2 $time = time();
1733 1         4 $logger->("\nFILE: SYNC DATABASE finished ".$write_fh->filename . "\n");
1734             }
1735              
1736             $logger->(sprintf("Saved changes to %d files in %.4f s\n",
1737 1         2 scalar(@{ $self->{'__saved_uncommitted'}}), Time::HiRes::time() - $total_save_time));
  1         19  
1738 1         7 return 1;
1739             }
1740              
1741             sub commit {
1742 1     1 1 1 my $self = shift;
1743 1 50 33     4 if (! ref($self) and $self->isa('UR::Singleton')) {
1744 0         0 $self = $self->_singleton_object;
1745             }
1746              
1747 1 50       4 if ($self->{'__saved_uncommitted'}) {
1748 1         1 foreach my $commit ( @{ $self->{'__saved_uncommitted'}}) {
  1         2  
1749 1         3 $commit->();
1750             }
1751             }
1752 1         10 delete $self->{'__saved_uncommitted'};
1753              
1754 1         4 return 1;
1755             }
1756              
1757              
1758             sub rollback {
1759 0     0 1   my $self = shift;
1760 0 0 0       if (! ref($self) and $self->isa('UR::Singleton')) {
1761 0           $self = $self->_singleton_object;
1762             }
1763              
1764 0 0         if ($self->{'__saved_uncommitted'}) {
1765 0           foreach my $commit ( @{ $self->{'__saved_uncommitted'}}) {
  0            
1766 0           $commit->('rollback');
1767             }
1768             }
1769 0           delete $self->{'__saved_uncommitted'};
1770              
1771 0           return 1;
1772             }
1773              
1774              
1775             1;
1776              
1777             __END__