File Coverage

lib/UR/DataSource/Filesystem.pm

Criterion	Covered	Total	%
statement	738	969	76.1
branch	207	398	52.0
condition	57	114	50.0
subroutine	72	98	73.4
pod	5	13	38.4
total	1079	1592	67.7

line	stmt	bran	cond	sub	pod	time	code
1							package UR::DataSource::Filesystem;
2
3	9			9		488	use UR;
	9					13
	9					94
4	9			9		37	use strict;
	9					13
	9					196
5	9			9		30	use warnings;
	9					10
	9					406
6							our $VERSION = "0.46"; # UR $VERSION;
7
8	9			9		34	use File::Basename;
	9					10
	9					567
9	9			9		33	use File::Path;
	9					11
	9					318
10	9			9		34	use List::Util;
	9					10
	9					342
11	9			9		31	use Scalar::Util;
	9					13
	9					273
12	9			9		33	use Errno qw(EINTR EAGAIN EOPNOTSUPP);
	9					9
	9					19408
13
14							# lets you specify the server in several ways:
15							# path => '/path/name'
16							# means there is one file storing the data
17							# path => [ '/path1/name', '/path2/name' ]
18							# means the first tile we need to open the file, pick one (for load balancing)
19							# path => '/path/to/directory/'
20							# means that directory contains one or more files, and the classes using
21							# this datasource can have table_name metadata to pick the file
22							# path => '/path/$param1/${param2}.ext'
23							# means the values for $param1 and $param2 should come from the input rule.
24							# If the rule doesn't specify the param, then it should glob for the possible
25							# names at that point in the filesystem
26							# path => '/path/&method/filename'
27							# means the value for that part of the path should come from a method call
28							# run as $subject_class_name->$method($rule)
29							# path => '/path/*/path/$name/
30							# means it should glob at the appropriate time for the '*', but no use the
31							# paths found matching the glob to infer any values
32
33							# maybe suppert a URI scheme like
34							# file:/path/$to/File.ext?columns=[a,b,c]&sorted_columns=[a,b]
35
36							# TODO
37							# * Support non-equality operators for properties that are part of the path spec
38
39
40							class UR::DataSource::Filesystem {
41							is => 'UR::DataSource',
42							has => [
43							path => { doc => 'Path spec for the path on the filesystem containing the data' },
44							delimiter => { is => 'String', default_value => '\s,\s', doc => 'Delimiter between columns on the same line' },
45							record_separator => { is => 'String', default_value => "\n", doc => 'Delimiter between lines in the file' },
46							header_lines => { is => 'Integer', default_value => 0, doc => 'Number of lines at the start of the file to skip' },
47							columns_from_header => { is => 'Boolean', default_value => 0, doc => 'The column names are in the first line of the file' },
48							handle_class => { is => 'String', default_value => 'IO::File', doc => 'Class to use for new file handles' },
49							],
50							has_optional => [
51							columns => { is => 'ARRAY', doc => 'Names of the columns in the file, in order' },
52							sorted_columns => { is => 'ARRAY', doc => 'Names of the columns by which the data file is sorted' },
53							],
54							doc => 'A data source for treating files as relational data',
55							};
56
57	2			2	0	4	sub can_savepoint { 0;} # Doesn't support savepoints
58
59							# Filesystem datasources don't have a "default_handle"
60	0			0	0	0	sub create_default_handle { undef }
61
62							sub _regex {
63	107			107		174	my $self = shift;
64
65	107	100				329	unless ($self->{'_regex'}) {
66	8					32	my $delimiter = $self->delimiter;
67	8					16	my $r = eval { qr($delimiter) };
	8					143
68	8	50	33			52	if ($@ \|\| !$r) {
69	0					0	$self->error_message("Unable to interepret delimiter '".$self->delimiter.": $@");
70	0					0	return;
71							}
72	8					21	$self->{'_regex'} = $r;
73							}
74	107					207	return $self->{'_regex'};
75							}
76
77
78							sub _logger {
79	104			104		173	my $self = shift;
80	104					148	my $varname = shift;
81	104	50				363	if ($ENV{$varname}) {
82	0					0	my $log_fh = UR::DBI->sql_fh;
83							return sub {
84	0			0		0	my $msg = shift;
85	0					0	my $time = time();
86	0					0	$msg =~ s/\b\$time\b/$time/g;
87	0					0	my $localtime = scalar(localtime $time);
88	0					0	$msg =~ s/\b\$localtime\b/$localtime/;
89
90	0					0	$log_fh->print($msg);
91	0					0	};
92							} else {
93	104					246	return \&UR::Util::null_sub;
94							}
95							}
96
97							# The behavior for handling the filehandles after fork is contained in
98							# the read_record_from_file closure. There's nothing special for the
99							# data source to do
100							sub prepare_for_fork {
101	0			0	0	0	return 1;
102							}
103							sub finish_up_after_fork {
104	0			0	0	0	return 1;
105							}
106
107							# Like UR::BoolExpr::specifies_value_for, but works on either a BoolExpr
108							# or another object. In the latter case, it returns true if the object's
109							# class has the given property
110							sub __specifies_value_for {
111	50			50		68	my($self, $thing, $property_name) = @_;
112
113	50	50				213	return $thing->isa('UR::BoolExpr')
114							? $thing->specifies_value_for($property_name)
115							: $thing->__meta__->property_meta_for_name($property_name);
116							}
117
118							# Like UR::BoolExpr::value_for, but works on either a BoolExpr
119							# or another object.
120							sub __value_for {
121	18			18		21	my($self, $thing, $property_name) = @_;
122
123	18	50				87	return $thing->isa('UR::BoolExpr')
124							? $thing->value_for($property_name)
125							: $thing->$property_name;
126							}
127
128							# Like UR::BoolExpr::subject_class_name, but works on either a BoolExpr
129							# or another object.
130							sub __subject_class_name {
131	181			181		201	my($self, $thing) = @_;
132
133	181	100				874	return $thing->isa('UR::BoolExpr')
134							? $thing->subject_class_name()
135							: $thing->class;
136							}
137
138
139							sub _replace_vars_with_values_in_pathname {
140	173			173		2897	my($self, $rule_or_obj, $string, $prop_values_hash) = @_;
141
142	173		100			618	$prop_values_hash \|\|= {};
143
144							# Match something like /some/path/$var/name or /some/path${var}.ext/name
145	173	100				612	if ($string =~ m/\$\{?(\w+)\}?/) {
146	50					103	my $varname = $1;
147	50					112	my $subject_class_name = $self->__subject_class_name($rule_or_obj);
148	50	50				142	unless ($subject_class_name->__meta__->property_meta_for_name($varname)) {
149	0					0	Carp::croak("Invalid 'server' for data source ".$self->id
150							. ": Path spec $string requires a value for property $varname "
151							. " which is not a property of class $subject_class_name");
152							}
153	50					63	my @string_replacement_values;
154
155	50	100				126	if ($self->__specifies_value_for($rule_or_obj, $varname)) {
156	18					45	my @property_values = $self->__value_for($rule_or_obj, $varname);
157	18	100	66			88	if (@property_values == 1 and ref($property_values[0]) eq 'ARRAY') {
158	3					4	@property_values = @{$property_values[0]};
	3					5
159							}
160							# Make a listref that has one element per value for that property in the rule (in-clause
161							# rules may have more than one value)
162							# Each element has 2 parts, first is the value, second is the accumulated prop_values_hash
163							# where we've added the occurance of this property havine one of the values
164	18					30	@property_values = map { [ $_, { %$prop_values_hash, $varname => $_ } ] } @property_values;
	21					80
165
166							# Escape any shell glob characters in the values: [ ] { } ~ ? * and \
167							# we don't want a property with value '?' to be a glob wildcard
168	18					23	@string_replacement_values = map { $_->[0] =~ s/([[\]{}~?*\\])/\\$1/; $_ } @property_values;
	21					48
	21					36
169
170							} else {
171							# The rule doesn't have a value for this property.
172							# Put a shell wildcard in here, and a later glob will match things
173							# The '.__glob_positions__' key holds a list of places we've inserted shell globs.
174							# Each element is a 2-element list: index 0 is the string position, element 1 if the variable name.
175							# This is needed so the later glob expansion can tell the difference between globs
176							# that are part of the original path spec, and globs put in here
177	32	100				34	my @glob_positions = @{ $prop_values_hash->{'.__glob_positions__'} \|\| [] };
	32					138
178
179	32					138	my $glob_pos = $-[0];
180	32					73	push @glob_positions, [$glob_pos, $varname];
181	32					154	@string_replacement_values = ([ '*', { %$prop_values_hash, '.__glob_positions__' => \@glob_positions} ]);
182							}
183
184							my @return = map {
185	50					71	my $s = $string;
	53					55
186	53					243	substr($s, $-[0], $+[0] - $-[0], $_->[0]);
187	53					154	[ $s, $_->[1] ];
188							}
189							@string_replacement_values;
190
191							# recursion to process the next variable replacement
192	50					59	return map { $self->_replace_vars_with_values_in_pathname($rule_or_obj, @$_) } @return;
	53					152
193
194							} else {
195	123					411	return [ $string, $prop_values_hash ];
196							}
197							}
198
199							sub _replace_subs_with_values_in_pathname {
200	131			131		1294	my($self, $rule_or_obj, $string, $prop_values_hash) = @_;
201
202	131		100			263	$prop_values_hash \|\|= {};
203	131					313	my $subject_class_name = $self->__subject_class_name($rule_or_obj);
204
205							# Match something like /some/path/&sub/name or /some/path&{sub}.ext/name
206	131	100				346	if ($string =~ m/\&\{?(\w+)\}?/) {
207	13					18	my $subname = $1;
208	13	50				41	unless ($subject_class_name->can($subname)) {
209	0					0	Carp::croak("Invalid 'server' for data source ".$self->id
210							. ": Path spec $string requires a value for method $subname "
211							. " which is not a method of class " . $self->__subject_class_name($rule_or_obj));
212							}
213
214	13					74	my @property_values = eval { $subject_class_name->$subname($rule_or_obj) };
	13					32
215	13	50				56	if ($@) {
216	0					0	Carp::croak("Can't resolve final path for 'server' for data source ".$self->id
217							. ": Method call to ${subject_class_name}::${subname} died with: $@");
218							}
219	13	50	66			44	if (@property_values == 1 and ref($property_values[0]) eq 'ARRAY') {
220	0					0	@property_values = @{$property_values[0]};
	0					0
221							}
222							# Make a listref that has one element per value for that property in the rule (in-clause
223							# rules may have more than one value)
224							# Each element has 2 parts, first is the value, second is the accumulated prop_values_hash
225							# where we've added the occurance of this property havine one of the values
226	13					13	@property_values = map { [ $_, { %$prop_values_hash } ] } @property_values;
	15					50
227
228							# Escape any shell glob characters in the values: [ ] { } ~ ? * and \
229							# we don't want a return value '?' or '*' to be a glob wildcard
230	13					16	my @string_replacement_values = map { $_->[0] =~ s/([[\]{}~?*\\])/\\$1/; $_ } @property_values;
	15					23
	15					21
231
232							# Given a pathname returned from the glob, return a new glob_position_list
233							# that has fixed up the position information accounting for the fact that
234							# the globbed pathname is a different length than the original spec
235	13					11	my $original_path_length = length($string);
236	13					14	my $glob_position_list = $prop_values_hash->{'.__glob_positions__'};
237	13					21	my $subname_replacement_position = $-[0];
238							my $fix_offsets_in_glob_list = sub {
239	15			15		13	my $pathname = shift;
240							# alter the position only if it is greater than the position of
241							# the subname we're replacing
242	15	100				24	return map { [ $_->[0] < $subname_replacement_position
	6					17
243							? $_->[0]
244							: $_->[0] + length($pathname) - $original_path_length,
245							$_->[1] ]
246							}
247							@$glob_position_list;
248	13					43	};
249
250							my @return = map {
251	13					13	my $s = $string;
	15					13
252	15					51	substr($s, $-[0], $+[0] - $-[0], $_->[0]);
253	15					24	$_->[1]->{'.__glob_positions__'} = [ $fix_offsets_in_glob_list->($s) ];
254	15					33	[ $s, $_->[1] ];
255							}
256							@string_replacement_values;
257
258							# recursion to process the next method call
259	13					12	return map { $self->_replace_subs_with_values_in_pathname($rule_or_obj, @$_) } @return;
	15					34
260
261							} else {
262	118					383	return [ $string, $prop_values_hash ];
263							}
264							}
265
266							sub _replace_glob_with_values_in_pathname {
267	195			195		7732	my($self, $string, $prop_values_hash) = @_;
268
269							# a * not preceeded by a backslash, delimited by /
270	195	100				953	if ($string =~ m#([^/]?[^\\/]?(\)[^/]*)#) {
271	42					92	my $glob_pos = $-[2];
272
273	42					102	my $path_segment_including_glob = substr($string, 0, $+[0]);
274	42					72	my $remaining_path = substr($string, $+[0]);
275	42					3419	my @glob_matches = map { $_ . $remaining_path }
	78					201
276							glob($path_segment_including_glob);
277
278	42					58	my $resolve_glob_values_for_each_result;
279	42					58	my $glob_position_list = $prop_values_hash->{'.__glob_positions__'};
280
281							# Given a pathname returned from the glob, return a new glob_position_list
282							# that has fixed up the position information accounting for the fact that
283							# the globbed pathname is a different length than the original spec
284	42					52	my $original_path_length = length($string);
285							my $fix_offsets_in_glob_list = sub {
286	78			78		75	my $pathname = shift;
287	78					131	return map { [ $_->[0] + length($pathname) - $original_path_length, $_->[1] ] } @$glob_position_list;
	26					71
288	42					192	};
289
290	42	100				93	if ($glob_position_list->[0]->[0] == $glob_pos) {
291							# This * was put in previously by a $propname in the spec that wasn't mentioned in the rule
292
293	40					61	my $path_delim_pos = index($path_segment_including_glob, '/', $glob_pos);
294	40	50				84	$path_delim_pos = length($path_segment_including_glob) if ($path_delim_pos == -1); # No more /s
295
296	40					41	my $regex_as_str = $path_segment_including_glob;
297							# Find out just how many *s we're dealing with and where they are, up to the next /
298							# remove them from the glob_position_list because we're going to resolve their values
299	40					69	my(@glob_positions, @property_names);
300	40		100			252	while (@$glob_position_list
301							and
302							$glob_position_list->[0]->[0] < $path_delim_pos
303							) {
304	42					58	my $this_glob_info = shift @{$glob_position_list};
	42					106
305	42					84	push @glob_positions, $this_glob_info->[0];
306	42					156	push @property_names, $this_glob_info->[1];
307							}
308							# Replace the *s found with regex captures
309	40					71	my $glob_replacement = '([^/]*)';
310	40					53	my $glob_rpl_offset = 0;
311	40					90	my $offset_inc = length($glob_replacement) - 1; # replacing a 1-char string '' with a 7-char string '([^/])'
312							$regex_as_str = List::Util::reduce( sub {
313	42			42		84	substr($a, $b + $glob_rpl_offset, 1, $glob_replacement);
314	42					53	$glob_rpl_offset += $offset_inc;
315	42					59	$a;
316							},
317	40					281	($regex_as_str, @glob_positions) );
318
319	40					689	my $regex = qr{$regex_as_str};
320	40					73	my @property_values_for_each_glob_match = map { [ $_, [ $_ =~ $regex] ] } @glob_matches;
	76					493
321
322							# Fill in the property names into .__glob_positions__
323							# we've resolved in this iteration, and apply offset fixups for the
324							# difference in string length between the pre- and post-glob pathnames
325
326							$resolve_glob_values_for_each_result = sub {
327							return map {
328	40			40		45	my %h = %$prop_values_hash;
	76					186
329	76					71	@h{@property_names} = @{$_->[1]};
	76					130
330	76					106	$h{'.__glob_positions__'} = [ $fix_offsets_in_glob_list->($_->[0]) ];
331	76					174	[$_->[0], \%h];
332							}
333							@property_values_for_each_glob_match;
334	40					202	};
335
336							} else {
337							# This is a glob put in the original path spec
338							# The new path comes from the @glob_matches list.
339							# Apply offset fixups for the difference in string length between the
340							# pre- and post-glob pathnames
341							$resolve_glob_values_for_each_result = sub {
342	2			2		3	return map { [
343	2					5	$_,
344							{ %$prop_values_hash,
345							'.__glob_positions__' => [ $fix_offsets_in_glob_list->($_) ]
346							}
347							]
348							}
349							@glob_matches;
350	2					6	};
351							}
352
353	42					69	my @resolved_paths_and_property_values = $resolve_glob_values_for_each_result->();
354
355							# Recursion to process the next glob
356	42					49	return map { $self->_replace_glob_with_values_in_pathname( @$_ ) }
	78					177
357							@resolved_paths_and_property_values;
358
359							} else {
360	153					196	delete $prop_values_hash->{'.__glob_positions__'};
361	153					723	return [ $string, $prop_values_hash ];
362							}
363							}
364
365
366							sub resolve_file_info_for_rule_and_path_spec {
367	112			112	0	168	my($self, $rule, $path_spec) = @_;
368
369	112		66			497	$path_spec \|\|= $self->path;
370
371	112					322	return map { $self->_replace_glob_with_values_in_pathname(@$_) }
372	112					369	map { $self->_replace_subs_with_values_in_pathname($rule, @$_) }
	112					351
373							$self->_replace_vars_with_values_in_pathname($rule, $path_spec);
374							}
375
376
377							# We're overriding path() so the first time it's called, it will
378							# pick one from the list and then stay with that one for the life
379							# of the program
380							sub path {
381	105			105	1	153	my $self = shift;
382
383	105	50				283	unless ($self->{'__cached_path'}) {
384	105					331	my $path = $self->__path();
385	105	50	33			298	if (ref($path) and ref($path) eq 'ARRAY') {
386	0					0	my $count = @$path;
387	0					0	my $idx = $$ % $count;
388	0					0	$self->{'_cached_path'} = $path->[$idx];
389							} else {
390	105					225	$self->{'_cached_path'} = $path;
391							}
392							}
393	105					261	return $self->{'_cached_path'};
394							}
395
396							# Names of creation params that we should force to be listrefs
397							our %creation_param_is_list = map { $_ => 1 } qw( columns sorted_columns );
398							sub create_from_inline_class_data {
399	4			4	1	9	my($class, $class_data, $ds_data) = @_;
400
401							#unless (exists $ds_data->{'columns'}) {
402							# User didn't specify columns in the file. Assumme every property is a column, and in the same order
403							# We'll have to ask the class object for the column list the first time there's a query
404							#}
405
406	4					5	my %ds_creation_params;
407	4					9	foreach my $param ( qw( path delimiter record_separator columns header_lines
408							columns_from_header handle_class sorted_columns )
409							) {
410	32	100				62	if (exists $ds_data->{$param}) {
411	13	50	66			129	if ($creation_param_is_list{$param} and ref($ds_data->{$param}) ne 'ARRAY') {
412	0					0	$ds_creation_params{$param} = \( $ds_data->{$param} );
413							} else {
414	13					26	$ds_creation_params{$param} = $ds_data->{$param};
415							}
416							}
417							}
418
419	4					24	my $ds_id = UR::Object::Type->autogenerate_new_object_id_uuid();
420	4		50			25	my $ds_type = delete $ds_data->{'is'} \|\| __PACKAGE__;
421	4					50	my $ds = $ds_type->create( %ds_creation_params, id => $ds_id );
422	4					22	return $ds;
423							}
424
425
426
427							sub _things_in_list_are_numeric {
428	13			13		19	my $self = shift;
429
430	13					26	foreach ( @{$_[0]} ) {
	13					40
431	14	50				70	return 0 if (! Scalar::Util::looks_like_number($_));
432							}
433	13					45	return 1;
434							}
435
436							# Construct a closure to perform an operator test against the given value
437							# The closures return 0 is the test is successful, -1 if unsuccessful but
438							# the file's value was less than $value, and 1 if unsuccessful and greater.
439							# The iterator that churns through the file knows that if it's comparing an
440							# ID/sorted column, and the comparator returns 1 then we've gone past the
441							# point where we can expect to ever find another successful match and we
442							# should stop looking
443							my $ALWAYS_FALSE = sub { -1 };
444							sub _comparator_for_operator_and_property {
445	36			36		72	my($self,$property,$operator,$value) = @_;
446
447	9			9		49	no warnings 'uninitialized'; # we're handling ''/undef/null specially below where it matters
	9					12
	9					21184
448
449	36	100	66			286	if ($operator eq 'between') {
		50
		50
		50
		50
		100
450	2	50	33			27	if ($value->[0] eq '' or $value->[1] eq '') {
451	0					0	return $ALWAYS_FALSE;
452							}
453
454	2	100	66			12	if ($property->is_numeric and $self->_things_in_list_are_numeric($value)) {
455	1	50				4	if ($value->[0] > $value->[1]) {
456							# Will never be true
457	0					0	Carp::carp "'between' comparison will never be true with values ".$value->[0]," and ".$value->[1];
458	0					0	return $ALWAYS_FALSE;
459							}
460
461							# numeric 'between' comparison
462							return sub {
463	5	50		5		6	return -1 if (${$_[0]} eq '');
	5					9
464	5	100				5	if (${$_[0]} < $value->[0]) {
	5	50				11
465	1					2	return -1;
466	4					6	} elsif (${$_[0]} > $value->[1]) {
467	0					0	return 1;
468							} else {
469	4					5	return 0;
470							}
471	1					8	};
472							} else {
473	1	50				5	if ($value->[0] gt $value->[1]) {
474	0					0	Carp::carp "'between' comparison will never be true with values ".$value->[0]," and ".$value->[1];
475	0					0	return $ALWAYS_FALSE;
476							}
477
478							# A string 'between' comparison
479							return sub {
480	5	50		5		4	return -1 if (${$_[0]} eq '');
	5					10
481	5	100				4	if (${$_[0]} lt $value->[0]) {
	5	100				10
482	1					3	return -1;
483	4					7	} elsif (${$_[0]} gt $value->[1]) {
484	1					2	return 1;
485							} else {
486	3					4	return 0;
487							}
488	1					6	};
489							}
490
491							} elsif ($operator eq 'in') {
492	0	0				0	if (! @$value) {
493	0					0	return $ALWAYS_FALSE;
494							}
495
496	0	0	0			0	if ($property->is_numeric and $self->_things_in_list_are_numeric($value)) {
497							# Numeric 'in' comparison returns undef if we're within the range of the list
498							# but don't actually match any of the items in the list
499	0					0	@$value = sort { $a <=> $b } @$value; # sort the values first
	0					0
500							return sub {
501	0	0		0		0	return -1 if (${$_[0]} eq '');
	0					0
502	0	0				0	if (${$_[0]} < $value->[0]) {
	0	0				0
503	0					0	return -1;
504	0					0	} elsif (${$_[0]} > $value->[-1]) {
505	0					0	return 1;
506							} else {
507	0					0	foreach ( @$value ) {
508	0	0				0	return 0 if ${$_[0]} == $_;
	0					0
509							}
510	0					0	return -1;
511							}
512	0					0	};
513
514							} else {
515							# A string 'in' comparison
516	0					0	@$value = sort { $a cmp $b } @$value;
	0					0
517							return sub {
518	0	0		0		0	if (${$_[0]} lt $value->[0]) {
	0	0				0
519	0					0	return -1;
520	0					0	} elsif (${$_[0]} gt $value->[-1]) {
521	0					0	return 1;
522							} else {
523	0					0	foreach ( @$value ) {
524	0	0				0	return 0 if ${$_[0]} eq $_;
	0					0
525							}
526	0					0	return -1;
527							}
528	0					0	};
529
530							}
531
532							} elsif ($operator eq 'not in') {
533	0	0				0	if (! @$value) {
534	0					0	return $ALWAYS_FALSE;
535							}
536
537	0	0	0			0	if ($property->is_numeric and $self->_things_in_list_are_numeric($value)) {
538							return sub {
539	0	0		0		0	return -1 if (${$_[0]} eq '');
	0					0
540	0					0	foreach ( @$value ) {
541	0	0				0	return -1 if ${$_[0]} == $_;
	0					0
542							}
543	0					0	return 0;
544							}
545
546	0					0	} else {
547							return sub {
548	0			0		0	foreach ( @$value ) {
549	0	0				0	return -1 if ${$_[0]} eq $_;
	0					0
550							}
551	0					0	return 0;
552							}
553	0					0	}
554
555							} elsif ($operator eq 'like') {
556							# 'like' is always a string comparison. In addition, we can't know if we're ahead
557							# or behind in the file's ID columns, so the only two return values are 0 and 1
558
559	0	0				0	return $ALWAYS_FALSE if ($value eq ''); # property like NULL is always false
560
561							# Convert SQL-type wildcards to Perl-type wildcards
562							# Convert a % to a *, and _ to ., unless they're preceeded by \ to escape them.
563							# Not that this isn't precisely correct, as \\% should really mean a literal \
564							# followed by a wildcard, but we can't be correct in all cases without including
565							# a real parser. This will catch most cases.
566
567	0					0	$value =~ s/(?
568	0					0	$value =~ s/(?
569	0					0	my $regex = qr($value);
570							return sub {
571	0	0		0		0	return -1 if (${$_[0]} eq '');
	0					0
572	0	0				0	if (${$_[0]} =~ $regex) {
	0					0
573	0					0	return 0;
574							} else {
575	0					0	return 1;
576							}
577	0					0	};
578
579							} elsif ($operator eq 'not like') {
580	0	0				0	return $ALWAYS_FALSE if ($value eq ''); # property like NULL is always false
581	0					0	$value =~ s/(?
582	0					0	$value =~ s/(?
583	0					0	my $regex = qr($value);
584							return sub {
585	0	0		0		0	return -1 if (${$_[0]} eq '');
	0					0
586	0	0				0	if (${$_[0]} =~ $regex) {
	0					0
587	0					0	return 1;
588							} else {
589	0					0	return 0;
590							}
591	0					0	};
592
593
594							# FIXME - should we only be testing the numericness of the property?
595							} elsif ($property->is_numeric and $self->_things_in_list_are_numeric([$value])) {
596							# Basic numeric comparisons
597	12	100	0			41	if ($operator eq '=') {
		50
		0
		0
		0
		0
		0
		0
598							return sub {
599	49	50		49		47	return -1 if (${$_[0]} eq ''); # null always != a number
	49					101
600	49					43	return ${$_[0]} <=> $value;
	49					84
601	9					45	};
602							} elsif ($operator eq '<') {
603							return sub {
604	14	50		14		14	return -1 if (${$_[0]} eq ''); # null always != a number
	14					37
605	14	100				31	${$_[0]} < $value ? 0 : 1;
	14					39
606	3					16	};
607							} elsif ($operator eq '<=') {
608							return sub {
609	0	0		0		0	return -1 if (${$_[0]} eq ''); # null always != a number
	0					0
610	0	0				0	${$_[0]} <= $value ? 0 : 1;
	0					0
611	0					0	};
612							} elsif ($operator eq '>') {
613							return sub {
614	0	0		0		0	return -1 if (${$_[0]} eq ''); # null always != a number
	0					0
615	0	0				0	${$_[0]} > $value ? 0 : -1;
	0					0
616	0					0	};
617							} elsif ($operator eq '>=') {
618							return sub {
619	0	0		0		0	return -1 if (${$_[0]} eq ''); # null always != a number
	0					0
620	0	0				0	${$_[0]} >= $value ? 0 : -1;
	0					0
621	0					0	};
622							} elsif ($operator eq 'true') {
623							return sub {
624	0	0		0		0	${$_[0]} ? 0 : -1;
	0					0
625	0					0	};
626							} elsif ($operator eq 'false') {
627							return sub {
628	0	0		0		0	${$_[0]} ? -1 : 0;
	0					0
629	0					0	};
630							} elsif ($operator eq '!=' or $operator eq 'ne') {
631							return sub {
632	0	0		0		0	return 0 if (${$_[0]} eq ''); # null always != a number
	0					0
633	0	0				0	${$_[0]} != $value ? 0 : -1;
	0					0
634							}
635	0					0	}
636
637							} else {
638							# Basic string comparisons
639	22	100	0			65	if ($operator eq '=') {
		50
		50
		50
		0
		0
		0
		0
640							return sub {
641	93	50	25	93		76	return -1 if (${$_[0]} eq '' xor $value eq '');
	93					341
642	93					75	return ${$_[0]} cmp $value;
	93					109
643	21					98	};
644							} elsif ($operator eq '<') {
645							return sub {
646	0	0		0		0	${$_[0]} lt $value ? 0 : 1;
	0					0
647	0					0	};
648							} elsif ($operator eq '<=') {
649							return sub {
650	0	0	0	0		0	return -1 if (${$_[0]} eq '' or $value eq '');
	0					0
651	0	0				0	${$_[0]} le $value ? 0 : 1;
	0					0
652	0					0	};
653							} elsif ($operator eq '>') {
654							return sub {
655	6	100		6		4	${$_[0]} gt $value ? 0 : -1;
	6					15
656	1					5	};
657							} elsif ($operator eq '>=') {
658							return sub {
659	0	0	0	0		0	return -1 if (${$_[0]} eq '' or $value eq '');
	0					0
660	0	0				0	${$_[0]} ge $value ? 0 : -1;
	0					0
661	0					0	};
662							} elsif ($operator eq 'true') {
663							return sub {
664	0	0		0		0	${$_[0]} ? 0 : -1;
	0					0
665	0					0	};
666							} elsif ($operator eq 'false') {
667							return sub {
668	0	0		0		0	${$_[0]} ? -1 : 0;
	0					0
669	0					0	};
670							} elsif ($operator eq '!=' or $operator eq 'ne') {
671							return sub {
672	0	0		0		0	${$_[0]} ne $value ? 0 : -1;
	0					0
673							}
674	0					0	}
675							}
676							}
677
678
679
680							sub _properties_from_path_spec {
681	0			0		0	my($self) = @_;
682
683	0	0				0	unless (exists $self->{'__properties_from_path_spec'}) {
684	0					0	my $path = $self->path;
685	0	0				0	$path = $path->[0] if ref($path);
686
687	0					0	my @property_names;
688	0					0	while($path =~ m/\G\$\{?(\w+)\}?/) {
689	0					0	push @property_names, $1;
690							}
691	0					0	$self->{'__properties_from_path_spec'} = \@property_names;
692							}
693	0					0	return @{ $self->{'__properties_from_path_spec'} };
	0					0
694							}
695
696
697							sub _generate_loading_templates_arrayref {
698	33			33		50	my($self, $old_sql_cols) = @_;
699
700							# Each elt in @$column_data is a quad:
701							# [ $class_meta, $property_meta, $table_name, $object_num ]
702							# Keep only the properties with columns (mostly just to remove UR::Object::id
703	33					81	my @sql_cols = grep { $_->[1]->column_name }
	150					225
704							@$old_sql_cols;
705
706	33					186	my $template_data = $self->SUPER::_generate_loading_templates_arrayref(\@sql_cols);
707	33					100	return $template_data;
708							}
709
710
711
712							sub _resolve_column_names_from_pathname {
713	116			116		209	my($self,$pathname,$fh) = @_;
714
715	116	100				406	unless (exists($self->{'__column_names_from_pathname'}->{$pathname})) {
716	13	100				68	if (my $column_names_in_order = $self->columns) {
717	10					32	$self->{'__column_names_from_pathname'}->{$pathname} = $column_names_in_order;
718
719							} else {
720	3					12	my $record_separator = $self->record_separator();
721	3					87	my $line = $fh->getline();
722	3					138	$line =~ s/$record_separator$//; # chomp, but for any value
723							# FIXME - to support record-oriented files, we need some replacement for this...
724	3					13	my $split_regex = $self->_regex();
725	3					15	my @headers = split($split_regex, $line);
726	3					10	$self->{'__column_names_from_pathname'}->{$pathname} = \@headers;
727							}
728							}
729	116					285	return $self->{'__column_names_from_pathname'}->{$pathname};
730							}
731
732
733							sub file_is_sorted_as_requested {
734	115			115	0	160	my($self, $query_plan) = @_;
735
736	115		100			297	my $sorted_columns = $self->sorted_columns \|\| [];
737
738	115					347	my $order_by_columns = $query_plan->order_by_columns();
739	115					359	for (my $i = 0; $i < @$order_by_columns; $i++) {
740	126	100				318	next if ($order_by_columns->[$i] eq '$.'); # input line number is always sorted
741	121	100				290	next if ($order_by_columns->[$i] eq '__FILE__');
742
743	116	100				308	return 0 if $i > $#$sorted_columns;
744	79	100				220	if ($sorted_columns->[$i] ne $order_by_columns->[$i]) {
745	68					194	return 0;
746							}
747							}
748	10					26	return 1;
749							}
750
751
752							# FIXME - this is a copy of parts of _generate_class_data_for_loading from UR::DS::RDBMS
753							sub _generate_class_data_for_loading {
754	169			169		192	my ($self, $class_meta) = @_;
755
756	169					521	my $parent_class_data = $self->SUPER::_generate_class_data_for_loading($class_meta);
757
758	169					485	my @class_hierarchy = ($class_meta->class_name,$class_meta->ancestry_class_names);
759	169					208	my $order_by_columns;
760	169					140	do {
761	169					172	my @id_column_names;
762	169					275	for my $inheritance_class_name (@class_hierarchy) {
763	169					436	my $inheritance_class_object = UR::Object::Type->get($inheritance_class_name);
764	169	50				499	unless ($inheritance_class_object->table_name) {
765	0					0	next;
766							}
767							@id_column_names =
768							#map {
769							# my $t = $inheritance_class_object->table_name;
770							# ($t) = ($t =~ /(\S+)\s*$/);
771							# $t . '.' . $_
772							#}
773	182					464	grep { defined }
774							map {
775	182					588	my $p = $inheritance_class_object->property_meta_for_name($_);
776	182	50				392	die ("No property $_ found for " . $inheritance_class_object->class_name . "?") unless $p;
777	182					374	$p->column_name;
778							}
779	182					444	map { $_->property_name }
780	169					619	grep { $_->column_name }
	182					587
781							$inheritance_class_object->direct_id_property_metas;
782
783	169	50				462	last if (@id_column_names);
784							}
785	169					253	$order_by_columns = \@id_column_names;
786							};
787
788	169					227	my(@all_table_properties, @direct_table_properties, $first_table_name, $subclassify_by);
789	169					196	for my $co ( $class_meta, @{ $parent_class_data->{parent_class_objects} } ) {
	169					377
790	360					782	my $table_name = $co->table_name;
791	360	100				732	next unless $table_name;
792
793	169		33			550	$first_table_name \|\|= $co->table_name;
794							# $sub_classification_method_name \|\|= $co->sub_classification_method_name;
795							# $sub_classification_meta_class_name \|\|= $co->sub_classification_meta_class_name;
796	169		33			560	$subclassify_by \|\|= $co->subclassify_by;
797
798	169			634		527	my $sort_sub = sub ($$) { return $_[0]->property_name cmp $_[1]->property_name };
	634					1008
799							push @all_table_properties,
800	625					1105	map { [$co, $_, $table_name, 0 ] }
801							sort $sort_sub
802	169	100				543	grep { defined $_->column_name && $_->column_name ne '' }
	649					1089
803							UR::Object::Property->get( class_name => $co->class_name );
804
805	169	50				1215	@direct_table_properties = @all_table_properties if $class_meta eq $co;
806							}
807
808
809	169					1951	my $class_data = {
810							%$parent_class_data,
811
812							order_by_columns => $order_by_columns,
813							direct_table_properties => \@direct_table_properties,
814							all_table_properties => \@all_table_properties,
815							};
816	169					854	return $class_data;
817							}
818
819
820							# Needed for the QueryPlan's processing of order-by params
821							# Params are a list of the 4-tuples [class-meta, prop-meta, table-name, object-num]
822							sub _select_clause_columns_for_table_property_data {
823	16			16		21	my $self = shift;
824
825
826	16					22	return [ map { $_->[1]->column_name } @_ ];
	16					31
827							}
828
829							# Used to populate the %value_extractor_for_column_name hash
830							# It should return a sub that, when given a row of data from the source,
831							# returns the proper data from that row.
832							#
833							# It's expected to return a sub that accepts ($self, $row, $fh, $filename)
834							# and return a reference to the right data. In most cases, it'll just pluck
835							# out the $column_idx'th element from $@row, but we're using it
836							# to attach special meaning to the $. token
837							sub _create_value_extractor_for_column_name {
838	617			617		592	my($self, $rule, $column_name, $column_idx) = @_;
839
840	617	100				1139	if ($column_name eq '$.') {
		100
841							return sub {
842	30			30		39	my($self, $row, $fh, $filename) = @_;
843	30					61	my $line_no = $fh->input_line_number();
844	30					329	return \$line_no;
845	115					388	};
846							} elsif ($column_name eq '__FILE__') {
847							return sub {
848	0			0		0	my($self,$row,$fh,$filename) = @_;
849	0					0	return \$filename;
850	115					410	};
851							} else {
852							return sub {
853	3129			3129		2471	my($self, $row, $fh, $filename) = @_;
854	3129					3750	return \$row->[$column_idx];
855	387					1550	};
856							}
857							}
858
859
860							sub create_iterator_closure_for_rule {
861	103			103	1	129	my($self,$rule) = @_;
862
863	103					305	my $class_name = $rule->subject_class_name;
864	103					284	my $class_meta = $class_name->__meta__;
865	103					231	my $rule_template = $rule->template;
866
867							# We're defering to the class metadata here because we don't yet know the
868							# pathnames of the files we'll be reading from. If the columns_from_header flag
869							# is set, then there's no way of knowing what the columns are until then
870	391					607	my @column_names = grep { defined }
871	103					448	map { $class_meta->column_for_property($_) }
	395					771
872							$class_meta->all_property_names;
873
874							# FIXME - leaning on the sorted_columns property here means:
875							# 1) It's useless when used where the path spec is a directory and
876							# classes have table_names, since each file is likely to have different
877							# columns
878							# 2) If we ultimately end up reading from more than one file, all the files
879							# must be sorted in the same way. It's possible the user has sorted each
880							# file differently, though in practice it would make for a lot of trouble
881	103					169	my %column_is_sorted_descending;
882	83	100				253	my @sorted_column_names = map { if (index($_, '-') == 0) {
883	38					77	my $col = $_;
884	38					91	substr($col, 0, 1, '');
885	38					106	$column_is_sorted_descending{$col} = 1;
886	38					107	$col;
887							} else {
888	45					112	$_;
889							}
890							}
891	103	100				184	@{ $self->sorted_columns \|\| [] };
	103					349
892	103					190	my %sorted_column_names = map { $_ => 1 } @sorted_column_names;
	83					212
893	103					194	my @unsorted_column_names = grep { ! exists $sorted_column_names{$_} } @column_names;
	391					509
894
895	103					120	my @rule_column_names_in_order; # The order we should perform rule matches on - value is the name of the column in the file
896							my @comparison_for_column; # closures to call to perform the match - same order as @rule_column_names_in_order
897	0					0	my %rule_column_name_to_comparison_index;
898
899	0					0	my(%property_for_column, %operator_for_column, %value_for_column); # These are used for logging
900
901							my $resolve_comparator_for_column_name = sub {
902	391			391		347	my $column_name = shift;
903
904	391					872	my $property_name = $class_meta->property_for_column($column_name);
905	391	100				849	return unless $rule->specifies_value_for($property_name);
906
907	36		50			129	my $operator = $rule->operator_for($property_name)
908							\|\| '=';
909	36					109	my $rule_value = $rule->value_for($property_name);
910
911	36					65	$property_for_column{$column_name} = $property_name;
912	36					61	$operator_for_column{$column_name} = $operator;
913	36					49	$value_for_column{$column_name} = $rule_value;
914
915	36					218	my $comp_function = $self->_comparator_for_operator_and_property(
916							$class_meta->property($property_name),
917							$operator,
918							$rule_value);
919
920	36					66	push @rule_column_names_in_order, $column_name;
921	36					43	push @comparison_for_column, $comp_function;
922	36					64	$rule_column_name_to_comparison_index{$column_name} = $#comparison_for_column;
923	36					80	return 1;
924	103					524	};
925
926	103					125	my $sorted_columns_in_rule_count; # How many columns we can consider when trying "the shortcut" for sorted data
927							my %column_is_used_in_sorted_capacity;
928	103					158	foreach my $column_name ( @sorted_column_names ) {
929	83	100	66			179	if (! $resolve_comparator_for_column_name->($column_name)
930							and ! defined($sorted_columns_in_rule_count)
931							) {
932							# The first time we don't match a sorted column, record the index
933	76					135	$sorted_columns_in_rule_count = scalar(@rule_column_names_in_order);
934							} else {
935	7					19	$column_is_used_in_sorted_capacity{$column_name} = ' (sorted)';
936							}
937							}
938	103		100			329	$sorted_columns_in_rule_count \|\|= scalar(@rule_column_names_in_order);
939
940	103					165	foreach my $column_name ( @unsorted_column_names ) {
941	308					423	$resolve_comparator_for_column_name->($column_name);
942							}
943
944							# sort them by filename
945	103					400	my @possible_file_info_list = sort { $a->[0] cmp $b->[0] }
	18					31
946							$self->resolve_file_info_for_rule_and_path_spec($rule);
947
948	103					367	my $table_name = $class_meta->table_name;
949	103	100	66			500	if (defined($table_name) and $table_name ne '__default__') {
950							# Tack the final file name onto the end if the class has a table name
951	8					13	@possible_file_info_list = map { [ $_->[0] . "/$table_name", $_->[1] ] } @possible_file_info_list;
	8					43
952							}
953
954	103					356	my $handle_class = $self->handle_class;
955	103					194	my $use_quick_read = $handle_class eq 'IO::Handle';
956	103					330	my $split_regex = $self->_regex();
957	103					284	my $logger = $self->_logger('UR_DBI_MONITOR_SQL');
958	103					584	my $record_separator = $self->record_separator;
959
960	103					334	my $monitor_start_time = Time::HiRes::time();
961
962	9			9		48	{ no warnings 'uninitialized';
	9					16
	9					15964
	103					97
963							$logger->("\nFILE: starting query covering " . scalar(@possible_file_info_list)." files:\n\t"
964	115					623	. join("\n\t", map { $_->[0] } @possible_file_info_list )
965							. "\nFILTERS: "
966							. (scalar(@rule_column_names_in_order)
967							? join("\n\t", map {
968	103	100				360	$_ . $column_is_used_in_sorted_capacity{$_}
969							. " $operator_for_column{$_} "
970							. (ref($value_for_column{$_}) eq 'ARRAY'
971	2					15	? '[' . join(',',@{$value_for_column{$_}}) .']'
972	36	100				290	: $value_for_column{$_} )
973							}
974							@rule_column_names_in_order)
975							: 'none')
976							. "\n\n"
977							);
978							}
979
980	103					392	my $query_plan = $self->_resolve_query_plan($rule_template);
981	103	50				124	if (@{ $query_plan->{'loading_templates'} } > 1) {
	103					309
982	0					0	Carp::croak(__PACKAGE__ . " does not support joins. The rule was $rule");
983							}
984	103					191	my $loading_template = $query_plan->{loading_templates}->[0];
985	103					123	my @property_names_in_loading_template_order = @{ $loading_template->{'property_names'} };
	103					301
986	103					170	my @column_names_in_loading_template_order = map { $class_meta->column_for_property($_) }
	391					702
987							@property_names_in_loading_template_order;
988
989	103					161	my %property_name_to_resultset_index_map;
990							my %column_name_to_resultset_index_map;
991	103					314	for (my $i = 0; $i < @property_names_in_loading_template_order; $i++) {
992	391					336	my $property_name = $property_names_in_loading_template_order[$i];
993	391					411	$property_name_to_resultset_index_map{$property_name} = $i;
994	391					577	$column_name_to_resultset_index_map{$class_meta->column_for_property($property_name)} = $i;
995							}
996
997	103					136	my @iterator_for_each_file;
998	103					200	foreach ( @possible_file_info_list ) {
999	115					192	my $pathname = $_->[0];
1000	115					125	my $property_values_from_path_spec = $_->[1];
1001
1002	115					293	my @properties_from_path_spec = keys %$property_values_from_path_spec;
1003	115					202	my @values_from_path_spec = values %$property_values_from_path_spec;
1004
1005	115					286	my $pid = $$; # For tracking whether there's been a fork()
1006	115					852	my $fh = $handle_class->new($pathname);
1007	115	50				12916	unless ($fh) {
1008	0					0	$logger->("FILE: Skipping $pathname because it did not open: $!\n");
1009	0					0	next; # missing or unopenable files is not fatal
1010							}
1011
1012	115					485	my $column_names_in_order = $self->_resolve_column_names_from_pathname($pathname,$fh);
1013							# %value_for_column_name holds subs that return the value for that column. For values
1014							# determined from the path resolver, save that value here. Most other values get plucked out
1015							# of the line read from the file. The remaining values are special tokens like $. and __FILE__.
1016							# These subs are used both for testing whether values read from the data source pass the rule
1017							# and for constructing the resultset passed up to the Context
1018	115					166	my %value_for_column_name;
1019							my %column_name_to_index_map;
1020	115					193	my $ordered_column_names_count = scalar(@$column_names_in_order);
1021	115					407	for (my $i = 0; $i < $ordered_column_names_count; $i++) {
1022	388					434	my $column_name = $column_names_in_order->[$i];
1023	388	100				733	next unless (defined $column_name);
1024	387					467	$column_name_to_index_map{$column_name} = $i;
1025	387					670	$value_for_column_name{$column_name}
1026							= $self->_create_value_extractor_for_column_name($rule, $column_name, $i);
1027							}
1028	115					244	foreach ( '$.', '__FILE__' ) {
1029	230					364	$value_for_column_name{$_} = $self->_create_value_extractor_for_column_name($rule, $_, undef);
1030	230					377	$column_name_to_index_map{$_} = undef;
1031							}
1032	115					451	while (my($prop, $value) = each %$property_values_from_path_spec) {
1033	35					103	my $column = $class_meta->column_for_property($prop);
1034	35			62		111	$value_for_column_name{$column} = sub { return \$value };
	62					86
1035	35					117	$column_name_to_index_map{$column} = undef;
1036							}
1037
1038							# Convert the column_name keys here to indexes into the comparison list
1039							my %column_for_this_comparison_is_sorted_descending =
1040	1					5	map { $rule_column_name_to_comparison_index{$_} => $column_is_sorted_descending{$_} }
1041	115					334	grep { exists $rule_column_name_to_comparison_index{$_} }
	38					153
1042							keys %column_is_sorted_descending;
1043
1044							# rule properties that aren't actually columns in the file should be
1045							# satisfied by the path resolution already, so we can strip them out of the
1046							# list of columns to test
1047	48					95	my @rule_columns_in_order = map { $column_name_to_index_map{$_} }
1048	115					233	grep { exists $column_name_to_index_map{$_} }
	48					122
1049							@rule_column_names_in_order;
1050							# And also strip out any items in @comparison_for_column for non-column data
1051	48					98	my @comparison_for_column_this_file = map { $comparison_for_column[ $rule_column_name_to_comparison_index{$_} ] }
1052	115					196	grep { exists $column_name_to_index_map{$_} }
	48					74
1053							@rule_column_names_in_order;
1054
1055							# Burn through the requsite number of header lines
1056	115					471	my $lines_read = $fh->input_line_number;
1057	115					2536	my $throwaway_line_count = $self->header_lines;
1058	115					320	while($throwaway_line_count > $lines_read) {
1059	19					248	$lines_read++;
1060	19					351	scalar($fh->getline());
1061							}
1062
1063	115					343	my $lines_matched = 0;
1064
1065	115					126	my $log_first_fetch;
1066							$log_first_fetch = sub {
1067	115			115		2307	$logger->(sprintf("FILE: $pathname FIRST FETCH TIME: %.4f s\n\n", Time::HiRes::time() - $monitor_start_time));
1068	115					532	$log_first_fetch = \&UR::Util::null_sub;
1069	115					411	};
1070	115					137	my $log_first_match;
1071							$log_first_match = sub {
1072	102			102		452	$logger->("FILE: $pathname First match after reading $lines_read lines\n\n");
1073	102					409	$log_first_match = \&UR::Util::null_sub;
1074	115					378	};
1075
1076
1077	115					133	my $next_record;
1078
1079							# This sub reads the next record (line) from the file, splits the line into
1080							# columns and puts the data into @$next_record
1081	115					877	my $record_separator_re = qr($record_separator$);
1082							my $read_record_from_file = sub {
1083
1084							# Make sure some wise guy hasn't changed this out from under us
1085	983			983		2044	local $/ = $record_separator;
1086
1087	983	50				1700	if ($pid != $$) {
1088							# There's been a fork() between the original opening and now
1089							# This filehandle is no longer valid to read from, but tell()
1090							# should still report the right position
1091	0					0	my $pos = $fh->tell();
1092	0					0	$logger->("FILE: reopening file $pathname and seeking to position $pos after fork()\n");
1093	0					0	my $fh = $handle_class->new($pathname);
1094	0	0				0	unless ($fh) {
1095	0					0	$logger->("FILE: Reopening $pathname after fork() failed: $!\n");
1096	0					0	return; # behave if we're at EOF
1097							}
1098	0					0	$fh->seek($pos, 0); # fast-forward to the old position
1099	0					0	$pid = $$;
1100							}
1101
1102	983					674	my $line;
1103							READ_LINE_FROM_FILE:
1104	983					1335	while(! defined($line)) {
1105							# Hack for OSX 10.5.
1106							# At EOF, the getline below will return undef. Most builds of Perl
1107							# will also set $! to 0 at EOF so you can distinguish between the cases
1108							# of EOF (which may have actually happened a while ago because of buffering)
1109							# and an actual read error. OSX 10.5's Perl does not, and so $!
1110							# retains whatever value it had after the last failed syscall, likely
1111							# a stat() while looking for a Perl module. This should have no effect
1112							# other platforms where you can't trust $! at arbitrary points in time
1113							# anyway
1114	983					1055	$! = 0;
1115	983	50				16118	$line = $use_quick_read ? <$fh> : $fh->getline();
1116
1117	983	100	100			23858	if ($line and $line !~ $record_separator_re) {
1118							# Was a short read - probably at EOF
1119							# If the record_separator is a multi-char string, and the last
1120							# characters of $line are the first characters of the
1121							# record_separator, it's likely (though not certain) that the right
1122							# Thing to do is to remove the partial record separator.
1123	1					5	for (my $keep_chars = length($record_separator); $keep_chars > 0; $keep_chars--) {
1124	4					5	my $match_rs = substr($record_separator, 0, $keep_chars);
1125	4	100				36	if ($line =~ m/$match_rs$/) {
1126	1					3	substr($line, 0 - $keep_chars) = '';
1127	1					2	last;
1128							}
1129							}
1130							}
1131
1132	983	100				1995	unless (defined $line) {
1133	109	50	33			358	if ($! && ! $fh->eof()) {
1134	0	0	0			0	redo READ_LINE_FROM_FILE if ($! == EAGAIN or $! == EINTR);
1135	0					0	Carp::croak("read failed for file $pathname: $!");
1136							}
1137
1138							# at EOF. Close up shop and remove this fh from the list
1139							#flock($fh,LOCK_UN);
1140	109					140	$fh = undef;
1141	109					3421	$next_record = undef;
1142
1143	109					1725	$logger->("FILE: $pathname at EOF\n"
1144							. "FILE: $lines_read lines read for this request. $lines_matched matches in this file\n"
1145							. sprintf("FILE: TOTAL EXECUTE-FETCH TIME: %.4f s\n\n", Time::HiRes::time() - $monitor_start_time)
1146							);
1147	109					294	return;
1148							}
1149							}
1150	874					610	$lines_read++;
1151
1152	874					2407	$line =~ s/$record_separator$//; # chomp, but for any value
1153							# FIXME - to support record-oriented files, we need some replacement for this...
1154	874					3754	$next_record = [ split($split_regex, $line, $ordered_column_names_count) ];
1155	115					605	};
1156
1157	115					150	my $number_of_comparisons = @comparison_for_column_this_file;
1158
1159							# The file filter iterator.
1160							# This sub looks at @$next_record and applies the comparator functions in order.
1161							# If it passes all of them, it constructs a resultset row and passes it up to the
1162							# multiplexer iterator
1163							my $file_filter_iterator = sub {
1164	886			886		1188	$log_first_fetch->();
1165
1166							FOR_EACH_LINE:
1167	886					971	for(1) {
1168	983					1056	$read_record_from_file->();
1169
1170	983	100				1845	unless ($next_record) {
1171							# Done reading from this file
1172	109					314	return;
1173							}
1174
1175	874					1567	for (my $i = 0; $i < $number_of_comparisons; $i++) {
1176							my $comparison = $comparison_for_column_this_file[$i]->(
1177	172					339	$value_for_column_name{ $rule_column_names_in_order[$i] }->($self, $next_record, $fh, $pathname)
1178							);
1179
1180	172	100	100			1024	if ( ( ($column_for_this_comparison_is_sorted_descending{$i} and $comparison < 0) or $comparison > 0)
		100	100
1181							and $i < $sorted_columns_in_rule_count
1182							) {
1183							# We've gone past the last thing that could possibly match
1184	6					79	$logger->("FILE: $pathname $lines_read lines read for this request. $lines_matched matches\n"
1185							. sprintf("FILE: TOTAL EXECUTE-FETCH TIME: %.4f s\n", Time::HiRes::time() - $monitor_start_time));
1186
1187							#flock($fh,LOCK_UN);
1188	6					18	return;
1189
1190							} elsif ($comparison) {
1191							# comparison didn't match, read another line from the file
1192	97					116	redo FOR_EACH_LINE;
1193							}
1194
1195							# That comparison worked... stay in the for() loop for other comparisons
1196							}
1197							}
1198							# All the comparisons return '0', meaning they passed
1199
1200	771					1240	$log_first_match->();
1201	771					552	$lines_matched++;
1202	3049	50				4373	my @resultset = map { ref($_) ? $$_ : $_ }
1203	771					842	map { ref($value_for_column_name{$_})
1204							? $value_for_column_name{$_}->($self, $next_record, $fh, $pathname)
1205	3049	50				4800	: $value_for_column_name{$_} # constant value from path spec
1206							}
1207							@column_names_in_loading_template_order;
1208	771					1671	return \@resultset;
1209	115					612	};
1210
1211							# Higher layers in the loading logic require rows from the data source to be returned
1212							# in ID order. If the file contents is not sorted primarily by ID, then we need to do
1213							# the less efficient thing by first reading in all the matching rows in one go, sorting
1214							# them by ID, then iterating over the results
1215	115	100				328	unless ($self->file_is_sorted_as_requested($query_plan)) {
1216	153					308	my @resultset_indexes_to_sort = map { $column_name_to_resultset_index_map{$_} }
1217	105					140	@{ $query_plan->order_by_columns() };
	105					216
1218	105					374	$file_filter_iterator
1219							= $self->_create_iterator_for_custom_sorted_columns($file_filter_iterator, $query_plan, \%column_name_to_resultset_index_map);
1220							}
1221
1222	115					3291	push @iterator_for_each_file, $file_filter_iterator;
1223							}
1224
1225	103	50				603	if (! @iterator_for_each_file) {
		100
1226	0					0	return \&UR::Util::null_sub; # No matching files
1227							} elsif (@iterator_for_each_file == 1) {
1228	97					1435	return $iterator_for_each_file[0]; # If there's only 1 file, no need to multiplex
1229							}
1230
1231	6					10	my @next_record_for_each_file; # in the same order as @iterator_for_each_file
1232
1233	6					20	my %column_is_numeric = map { $_->column_name => $_->is_numeric }
1234	6					16	map { $class_meta->property_meta_for_name($_) }
1235	6					21	map { $class_meta->property_for_column($_) }
1236	6	50				26	map { index($_, '-') == 0 ? substr($_, 1) : $_ }
1237	6					8	@{ $query_plan->order_by_columns };
	6					19
1238
1239							my @resultset_index_sort_sub
1240	6					17	= map { &_resolve_sorter_for( is_numeric => $column_is_numeric{$_},
1241							is_descending => $column_is_sorted_descending{$_},
1242	0					0	column_index => $property_name_to_resultset_index_map{$_});
1243							}
1244							@sorted_column_names;
1245
1246	6					15	my %resultset_idx_is_sorted_descending = map { $column_name_to_resultset_index_map{$_} => 1 }
	0					0
1247							keys %column_is_sorted_descending;
1248							my $resultset_sorter = sub {
1249	2			2		5	my($idx_a,$idx_b) = shift;
1250
1251	2					4	foreach my $sort_sub ( @resultset_index_sort_sub ) {
1252	0					0	my $cmp = $sort_sub->($next_record_for_each_file[$idx_a], $next_record_for_each_file[$idx_b]);
1253	0	0				0	return $cmp if $cmp; # done if they're not equal
1254							}
1255	2					4	return 0;
1256	6					26	};
1257
1258							# This is the iterator returned to the Context, and knows about all the individual
1259							# file filter iterators. It compares the next resultset from each of them and
1260							# returns the next resultset to the Context
1261							my $multiplex_iterator = sub {
1262	12	50		12		27	return unless @iterator_for_each_file; # if they're all run out
1263
1264	12					10	my $lowest_slot;
1265	12					31	for(my $i = 0; $i < @iterator_for_each_file; $i++) {
1266	26	100				41	unless(defined $next_record_for_each_file[$i]) {
1267	24					43	$next_record_for_each_file[$i] = $iterator_for_each_file[$i]->();
1268	24	100				36	unless (defined $next_record_for_each_file[$i]) {
1269							# That iterator is exhausted, splice it out
1270	18					23	splice(@iterator_for_each_file, $i, 1);
1271	18					25	splice(@next_record_for_each_file, $i, 1);
1272	18	100				38	return unless (@iterator_for_each_file); # This can happen here if none of the files have matching data
1273	12					16	redo;
1274							}
1275							}
1276
1277	8	100				17	unless (defined $lowest_slot) {
1278	6					9	$lowest_slot = $i;
1279	6					10	next;
1280							}
1281
1282	2					5	my $cmp = $resultset_sorter->($lowest_slot, $i);
1283	2	50				6	if ($cmp > 0) {
1284	0					0	$lowest_slot = $i;
1285							}
1286							}
1287
1288	6					10	my $retval = $next_record_for_each_file[$lowest_slot];
1289	6					8	$next_record_for_each_file[$lowest_slot] = undef;
1290	6					11	return $retval;
1291	6					18	};
1292
1293	6					112	return $multiplex_iterator;
1294							}
1295
1296
1297							# Constructors for subs to sort appropriately
1298							sub _resolve_sorter_for {
1299	143			143		316	my %params = @_;
1300
1301	143					193	my $col_idx = $params{'column_index'};
1302
1303							my $is_descending = (exists($params{'is_descending'}) && $params{'is_descending'})
1304							\|\|
1305	143		66			700	(exists($params{'is_ascending'}) && $params{'is_ascending'});
1306							my $is_numeric = (exists($params{'is_numeric'}) && $params{'is_numeric'})
1307							\|\|
1308	143		66			537	(exists($params{'is_string'}) && $params{'is_string'});
1309	143	100				238	if ($is_descending) {
1310	31	100				76	if ($is_numeric) {
1311	15			302		113	return sub($$) { $_[1]->[$col_idx] <=> $_[0]->[$col_idx] };
	302					448
1312							} else {
1313	16			341		106	return sub($$) { $_[1]->[$col_idx] cmp $_[0]->[$col_idx] };
	341					446
1314							}
1315							} else {
1316	112	100				171	if ($is_numeric) {
1317	95			546		554	return sub($$) { $_[0]->[$col_idx] <=> $_[1]->[$col_idx] };
	546					668
1318							} else {
1319	17			355		108	return sub($$) { $_[0]->[$col_idx] cmp $_[1]->[$col_idx] };
	355					483
1320							}
1321							}
1322							}
1323
1324							# Higher layers in the loading logic require rows from the data source to be returned
1325							# in ID order. If the file contents is not sorted primarily by ID, then we need to do
1326							# the less efficient thing by first reading in all the matching rows in one go, sorting
1327							# them by ID, then iterating over the results
1328							sub _create_iterator_for_custom_sorted_columns {
1329	105			105		155	my($self, $iterator_this_file, $query_plan, $column_name_to_resultset_index_map) = @_;
1330
1331	105					103	my @matching;
1332	105					210	while (my $row = $iterator_this_file->()) {
1333	726					1072	push @matching, $row; # save matches as [id, rowref]
1334							}
1335
1336	105	100				334	unless (@matching) {
1337	12					45	return \&UR::Util::null_sub; # Easy, no matches
1338							}
1339
1340	93					484	my $class_meta = $query_plan->class_name->__meta__;
1341	141					429	my %column_is_numeric = map { $_->column_name => $_->is_numeric }
1342	141					334	map { $class_meta->property_meta_for_name($_) }
1343	141					498	map { $class_meta->property_for_column($_) }
1344	141	100				773	map { index($_, '-') == 0 ? substr($_,1) : $_ }
1345	93					254	@{ $query_plan->order_by_columns };
	93					291
1346
1347	93					195	my @sorters;
1348	9			9		49	{ no warnings 'numeric';
	9					14
	9					313
	93					121
1349	9			9		36	no warnings 'uninitialized';
	9					12
	9					10588
1350	141					483	@sorters = map { &_resolve_sorter_for(%$_) }
1351	141					162	map { my $col_name = $_;
1352	141					155	my $descending = 0;
1353	141	100				335	if (index($col_name, '-') == 0) {
1354	31					51	$descending = 1;
1355	31					69	substr($col_name, 0, 1, ''); # remove the -
1356							}
1357	141					189	my $col_idx = $column_name_to_resultset_index_map->{$col_name};
1358	141					586	{ column_index => $col_idx, is_descending => $descending, is_numeric => $column_is_numeric{$col_name} };
1359							}
1360	93					147	@{ $query_plan->order_by_columns };
	93					346
1361							}
1362
1363	93					224	my $sort_by_order_by_columns;
1364	93	100				205	if (@sorters == 1) {
1365	45					74	$sort_by_order_by_columns = $sorters[0];
1366							} else {
1367							$sort_by_order_by_columns
1368							= sub($$) {
1369	1004			1004		811	foreach (@sorters) {
1370	1084	100				939	if (my $rv = $_->(@_)) {
1371	1004					793	return $rv;
1372							}
1373							}
1374	0					0	return 0;
1375	48					167	};
1376							}
1377	93					288	@matching = sort $sort_by_order_by_columns
1378							@matching;
1379
1380							return sub {
1381	818			818		1172	return shift @matching;
1382	93					729	};
1383							}
1384
1385
1386							sub initializer_should_create_column_name_for_class_properties {
1387	66			66	0	952	1;
1388							}
1389
1390
1391							# The string used to join fields of a row together when writing
1392							#
1393							# Since the 'delimiter' property is interpreted as a regex in the reading
1394							# code, we'll try to be smart about making a real string from that.
1395							#
1396							# subclasses can override this to provide a different implementation
1397							sub column_join_string {
1398	1			1	0	1	my $self = shift;
1399
1400	1					3	my $join_pattern = $self->delimiter;
1401
1402							# make some common substitutions...
1403	1	50				4	if ($join_pattern eq '\s,\s') {
1404							# The default...
1405	0					0	return ', ';
1406							}
1407
1408	1					3	$join_pattern =~ s/\\s*//g; # Turn 0-or-more whitespaces to nothing
1409	1					1	$join_pattern =~ s/\\t/\t/; # tab
1410	1					2	$join_pattern =~ s/\\s/ /; # whitespace
1411
1412	1					2	return $join_pattern;
1413							}
1414
1415
1416							sub _sync_database {
1417	1			1		3	my $self = shift;
1418	1					2	my %params = @_;
1419
1420	1	50				3	unless (ref($self)) {
1421	0	0				0	if ($self->isa("UR::Singleton")) {
1422	0					0	$self = $self->_singleton_object;
1423							}
1424							else {
1425	0					0	Carp::croak("Cannot call _sync_database as a class method on a non-singleton class");
1426							}
1427							}
1428
1429	1					3	$DB::single=1;
1430	1					2	my $changed_objects = delete $params{'changed_objects'};
1431
1432	1					7	my $path_spec = $self->path;
1433
1434							# First, bin up the changed objects by their class' table_name
1435	1					2	my %objects_for_path;
1436	1					2	foreach my $obj ( @$changed_objects ) {
1437	7					10	my @path = $self->resolve_file_info_for_rule_and_path_spec($obj, $path_spec);
1438	7	50				16	if (!@path) {
		50
1439	0					0	$self->error_message("Couldn't resolve destination file for object "
1440							.$obj->class." ID ".$obj->id.": ".Data::Dumper::Dumper($obj));
1441	0					0	return;
1442							} elsif (@path > 1) {
1443	0					0	$self->error_message("Got multiple filenames when resolving destination file for object "
1444							. $obj->class." ID ".$obj->id.": ".join(', ', @path));
1445							}
1446	7		100			15	$objects_for_path{ $path[0]->[0] } \|\|= [];
1447	7					5	push @{ $objects_for_path{ $path[0]->[0] } }, $obj;
	7					11
1448							}
1449
1450	1					2	my %objects_for_pathname;
1451	1					3	foreach my $path ( keys %objects_for_path ) {
1452	1					2	foreach my $obj ( @{ $objects_for_path{$path} } ) {
	1					2
1453	7					12	my $class_meta = $obj->__meta__;
1454	7					12	my $table_name = $class_meta->table_name;
1455	7					6	my $pathname = $path;
1456	7	50	33			20	if (defined($table_name) and $table_name ne '__default__') {
1457	0					0	$pathname .= '/' . $table_name;
1458							}
1459	7		100			13	$objects_for_pathname{$pathname} \|\|= [];
1460	7					4	push @{ $objects_for_pathname{$pathname} }, $obj;
	7					8
1461							}
1462							}
1463
1464	1					2	my %column_is_sorted_descending;
1465	2	50				4	my @sorted_column_names = map { if (index($_, '-') == 0) {
1466	0					0	my $s = $_;
1467	0					0	substr($s, 0, 1, '');
1468	0					0	$column_is_sorted_descending{$s} = $s;
1469							} else {
1470	2					3	$_;
1471							}
1472							}
1473	1	50				1	@{ $self->sorted_columns() \|\| [] };
	1					4
1474
1475	1					7	my $handle_class = $self->handle_class;
1476	1					19	my $use_quick_read = $handle_class->isa('IO::Handle');
1477
1478	1					197	my $join_string = $self->column_join_string;
1479	1					3	my $record_separator = $self->record_separator;
1480	1					5	my $split_regex = $self->_regex();
1481	1					4	local $/; # Make sure some wise guy hasn't changed this out from under us
1482	1					2	$/ = $record_separator;
1483
1484	1					4	my $logger = $self->_logger('UR_DBI_MONITOR_SQL');
1485	1					5	my $total_save_time = Time::HiRes::time();
1486	1					9	$logger->("FILE: Saving changes to ".scalar(keys %objects_for_pathname) . " files:\n\t"
1487							. join("\n\t", keys(%objects_for_pathname)) . "\n\n");
1488
1489	1					2	foreach my $pathname ( keys %objects_for_pathname ) {
1490	1					2	my $use_quick_rename;
1491	1					48	my $containing_directory = File::Basename::dirname($pathname);
1492	1	50				38	unless (-d $containing_directory) {
1493	0					0	File::Path::mkpath($containing_directory);
1494							}
1495	1	50				7	if (-w $containing_directory) {
		0
1496	1					2	$use_quick_rename = 1;
1497							} elsif (! -w $pathname) {
1498	0					0	Carp::croak("Cannot save to file $pathname: Neither the directory nor the file are writable");
1499							}
1500
1501	1					6	my $read_fh = $handle_class->new($pathname);
1502
1503							# Objects going to the same file should all be of a common class
1504	1					94	my $class_meta = $objects_for_pathname{$pathname}->[0]->__meta__;
1505
1506	1					2	my @property_names_that_are_sorted = map { $class_meta->property_for_column($_) }
	2					13
1507							@sorted_column_names;
1508							# Returns true of the passed-in object has a change in one of the sorted columns
1509							my $object_has_changed_sorted_column = sub {
1510	2			2		3	my $obj = shift;
1511	2					3	foreach my $prop ( @property_names_that_are_sorted ) {
1512	3	100				9	if (UR::Context->_get_committed_property_value($obj, $prop) ne $obj->$prop) {
1513	1					3	return 1;
1514							}
1515							}
1516	1					3	return 0;
1517	1					5	};
1518
1519	1					9	my $column_names_in_file = $self->_resolve_column_names_from_pathname($pathname, $read_fh);
1520	1					2	my $column_names_count = @$column_names_in_file;
1521	1					1	my %column_name_to_index;
1522	1					3	for (my $i = 0; $i < @$column_names_in_file; $i++) {
1523	3					8	$column_name_to_index{$column_names_in_file->[$i]} = $i;
1524							}
1525							# This lets us take a hash slice of the object and get a row for the file
1526	1					3	my @property_names_in_column_order = map { $class_meta->property_for_column($_) }
	3					4
1527							@$column_names_in_file;
1528
1529	3					9	my %column_name_is_numeric = map { $_->column_name => $_->is_numeric }
1530	3					9	map { $class_meta->property_meta_for_name($_) }
1531	1					3	map { $class_meta->property_for_column($_) }
	3					4
1532							@$column_names_in_file;
1533
1534	1					3	my $insert = [];
1535	1					3	my $update = {};
1536	1					1	my $delete = {};
1537	1					2	foreach my $obj ( @{ $objects_for_pathname{$pathname} } ) {
	1					4
1538	7	100				22	if ($obj->isa('UR::Object::Ghost')) {
		100
1539							# This should be removed from the file
1540	2					3	my $original = $obj->{'db_committed'};
1541	2					3	my $line = join($join_string, @{$original}{@property_names_in_column_order}) . $record_separator;
	2					4
1542	2					4	$delete->{$line} = $obj;
1543
1544							} elsif ($obj->{'db_committed'}) {
1545							# this is a changed object
1546	2					3	my $original = $obj->{'db_committed'};
1547
1548	2	100				3	if ($object_has_changed_sorted_column->($obj)) {
1549							# One of hte sorted columns has changed. Model this as a delete and insert
1550	1					2	push @$insert, [ @{$obj}{@property_names_in_column_order} ];
	1					2
1551	1					2	my $line = join($join_string, @{$original}{@property_names_in_column_order}) . $record_separator;
	1					3
1552	1					3	$delete->{$line} = $obj;
1553							} else {
1554							# This object is changed since it was read in the file
1555	1					1	my $original_line = join($join_string, @{$original}{@property_names_in_column_order}) . $record_separator;
	1					4
1556	1					2	my $changed_line = join($join_string, @{$obj}{@property_names_in_column_order}) . $record_separator;
	1					3
1557	1					2	$update->{$original_line} = $changed_line;
1558							}
1559
1560							} else {
1561							# This object is new and should be added to the file
1562	3					4	push @$insert, [ @{$obj}{@property_names_in_column_order} ];
	3					6
1563							}
1564							}
1565
1566	1					2	my %column_is_sorted_descending;
1567	2	50				4	my @sorted_column_names = map { if (index($_, '-') == 0) {
1568	0					0	my $s = $_;
1569	0					0	substr($s, 0, 1, '');
1570	0					0	$column_is_sorted_descending{$s} = $s;
1571							} else {
1572	2					4	$_;
1573							}
1574							}
1575	1	50				2	@{ $self->sorted_columns() \|\| [] };
	1					3
1576
1577	1					2	my $row_sort_sub;
1578	1	50				2	if (@sorted_column_names) {
1579	1					2	my @comparison_subs = map { &_resolve_sorter_for(is_numeric => $column_name_is_numeric{$_},
1580							is_descending => $column_is_sorted_descending{$_},
1581	2					8	column_index => $column_name_to_index{$_})
1582							}
1583							@sorted_column_names;
1584
1585							$row_sort_sub = sub ($$) {
1586	15			15		15	foreach my $comparator ( @comparison_subs ) {
1587	16					20	my $cmp = $comparator->($_[0], $_[1]);
1588	16	100				27	return $cmp if $cmp;
1589							}
1590	0					0	return 0;
1591	1					3	};
1592
1593							# Put the rows-to-insert in sorted order
1594	1					4	my @insert_sorted = sort $row_sort_sub @$insert;
1595	1					2	$insert = \@insert_sorted;
1596							}
1597
1598	1	50				11	my $write_fh = $use_quick_rename
1599							? File::Temp->new(DIR => $containing_directory)
1600							: File::Temp->new();
1601	1	50				434	unless ($write_fh) {
1602	0					0	Carp::croak("Can't save changes for $pathname: Can't create temporary file for writing: $!");
1603							}
1604
1605	1					10	my $monitor_start_rime = Time::HiRes::time();
1606	1					1	my $time = time();
1607	1					62	$logger->(sprintf("\nFILE: SYNC DATABASE AT %s [%s]. Started transaction for %s to temp file %s\n",
1608							$time, scalar(localtime($time)), $pathname, $write_fh->filename));
1609
1610							# Write headers to the new file
1611	1					4	for (my $i = 0; $i < $self->header_lines; $i++) {
1612	0	0				0	my $line = $use_quick_read ? <$read_fh> : $read_fh->getline();
1613	0					0	$write_fh->print($line);
1614							}
1615
1616	1					2	my $line;
1617							READ_A_LINE:
1618	1					2	while(1) {
1619	11	100				14	unless ($line) {
1620	8	50				34	$line = $use_quick_read ? <$read_fh> : $read_fh->getline();
1621	8	100				13	last unless defined $line;
1622							}
1623
1624	10	50	50			24	if (@sorted_column_names and scalar(@$insert)) {
1625							# There are sorted things waiting to insert
1626	10					7	my $chomped = $line;
1627	10					45	$chomped =~ s/$record_separator$//; # chomp, but for any value
1628	10					26	my $row = [ split($split_regex, $chomped, $column_names_count) ];
1629	10					15	my $cmp = $row_sort_sub->($row, $insert->[0]);
1630	10	100				16	if ($cmp > 0) {
1631							# write the object's data
1632	9			9		51	no warnings 'uninitialized'; # Some of the object's data may be undef
	9					15
	9					1608
1633	3					5	my $new_row = shift @$insert;
1634	3					4	my $new_line = join($join_string, @$new_row) . $record_separator;
1635
1636	3					8	$logger->("FILE: INSERT >>$new_line<<\n");
1637
1638	3					5	$write_fh->print($new_line);
1639							# Don't undef the last line read, meaning it could still be written to the output...
1640	3					12	next READ_A_LINE;
1641							}
1642							}
1643
1644	7	100				17	if (my $obj = delete $delete->{$line}) {
		100
1645	2					5	$logger->("FILE: DELETE >>$line<<\n");
1646
1647							} elsif (my $changed = delete $update->{$line}) {
1648	1					9	$logger->("FILE: UPDFATE replace >>$line<< with >>$changed<<\n");
1649	1					5	$write_fh->print($changed);
1650
1651							} else {
1652							# This line form the file was unchanged in the app
1653	4					7	$write_fh->print($line);
1654							}
1655	7					25	$line = undef;
1656							}
1657
1658	1	50				4	if (keys %$delete) {
1659	0					0	$self->warning_message("There were " . scalar( keys %$delete)
1660							. " deleted " . $class_meta->class_name
1661							. " objects that did not match data in file $pathname");
1662							}
1663	1	50				5	if (keys %$update) {
1664	0					0	$self->warning_message("There were " . scalar( keys %$delete)
1665							. " updated " . $class_meta->class_name
1666							. " objects that did not match data in file $pathname");
1667							}
1668
1669							# finish out by writing the rest of the new data
1670	1					2	foreach my $new_row ( @$insert ) {
1671	9			9		38	no warnings 'uninitialized'; # Some of the object's data may be undef
	9					12
	9					5830
1672	1					2	my $new_line = join($join_string, @$new_row) . $record_separator;
1673	1					4	$logger->("FILE: INSERT >>$new_line<<\n");
1674	1					2	$write_fh->print($new_line);
1675							}
1676
1677	1					5	my $changed_objects = $objects_for_pathname{$pathname};
1678	1	50				9	unless ($self->_set_specified_objects_saved_uncommitted( $changed_objects )) {
1679	0					0	Carp::croak("Error setting objects to a saved state after syncing");
1680							}
1681							# These closures will keep $write_fh in scope and delay their removal until
1682							# commit() or rollback(). Call these with no args to commit, and one arg (doesn't
1683							# matter what) to roll back
1684							my $commit = $use_quick_rename
1685							? sub {
1686	1	50		1		2	if (@_) {
1687	0					0	$self->_set_specified_objects_saved_rolled_back($changed_objects);
1688							} else {
1689	1					3	my $temp_filename = $write_fh->filename;
1690	1					9	$logger->("FILE: COMMIT rename $temp_filename => $pathname\n");
1691	1	50				61	unless (rename($temp_filename, $pathname)) {
1692	0					0	$self->error_message("Can't rename $temp_filename to $pathname: $!");
1693	0					0	return;
1694							}
1695	1					9	$self->_set_specified_objects_saved_committed($changed_objects);
1696							}
1697	1					3	return 1;
1698							}
1699							:
1700							sub {
1701	0	0		0		0	if (@_) {
1702	0					0	$self->_set_specified_objects_saved_rolled_back($changed_objects);
1703							} else {
1704	0					0	my $temp_filename = $write_fh->filename;
1705	0					0	$logger->("FILE: COMMIT copy " . $temp_filename . " => $pathname\n");
1706	0					0	my $read_fh = IO::File->new($temp_filename);
1707	0	0				0	unless ($read_fh) {
1708	0					0	$self->error_message("Can't open file $temp_filename for reading: $!");
1709	0					0	return;
1710							}
1711	0					0	my $copy_fh = IO::File->new($pathname, 'w');
1712	0	0				0	unless ($copy_fh) {
1713	0					0	$self->error_message("Can't open file $pathname for writing: $!");
1714	0					0	return;
1715							}
1716
1717	0					0	while(<$read_fh>) {
1718	0					0	$copy_fh->print($_);
1719							}
1720	0					0	$copy_fh->close();
1721	0					0	$read_fh->close();
1722	0					0	$self->_set_specified_objects_saved_committed($changed_objects);
1723							}
1724	0					0	return 1;
1725	1	50				7	};
1726
1727	1					10	$write_fh->close();
1728
1729	1		50			88	$self->{'__saved_uncommitted'} \|\|= [];
1730	1					1	push @{ $self->{'__saved_uncommitted'} }, $commit;
	1					4
1731
1732	1					2	$time = time();
1733	1					4	$logger->("\nFILE: SYNC DATABASE finished ".$write_fh->filename . "\n");
1734							}
1735
1736							$logger->(sprintf("Saved changes to %d files in %.4f s\n",
1737	1					2	scalar(@{ $self->{'__saved_uncommitted'}}), Time::HiRes::time() - $total_save_time));
	1					19
1738	1					7	return 1;
1739							}
1740
1741							sub commit {
1742	1			1	1	1	my $self = shift;
1743	1	50	33			4	if (! ref($self) and $self->isa('UR::Singleton')) {
1744	0					0	$self = $self->_singleton_object;
1745							}
1746
1747	1	50				4	if ($self->{'__saved_uncommitted'}) {
1748	1					1	foreach my $commit ( @{ $self->{'__saved_uncommitted'}}) {
	1					2
1749	1					3	$commit->();
1750							}
1751							}
1752	1					10	delete $self->{'__saved_uncommitted'};
1753
1754	1					4	return 1;
1755							}
1756
1757
1758							sub rollback {
1759	0			0	1		my $self = shift;
1760	0	0	0				if (! ref($self) and $self->isa('UR::Singleton')) {
1761	0						$self = $self->_singleton_object;
1762							}
1763
1764	0	0					if ($self->{'__saved_uncommitted'}) {
1765	0						foreach my $commit ( @{ $self->{'__saved_uncommitted'}}) {
	0
1766	0						$commit->('rollback');
1767							}
1768							}
1769	0						delete $self->{'__saved_uncommitted'};
1770
1771	0						return 1;
1772							}
1773
1774
1775							1;
1776
1777							__END__