File Coverage

lib/UR/DataSource/FileMux.pm
Criterion Covered Total %
statement 256 333 76.8
branch 71 126 56.3
condition 15 36 41.6
subroutine 22 26 84.6
pod 2 8 25.0
total 366 529 69.1


line stmt bran cond sub pod time code
1             package UR::DataSource::FileMux;
2              
3             # NOTE! This module is deprecated. Use UR::DataSource::Filesystem instead.
4              
5 6     6   140 use UR;
  6         9  
  6         30  
6 6     6   22 use strict;
  6         8  
  6         104  
7 6     6   17 use warnings;
  6         8  
  6         9348  
8             our $VERSION = "0.46"; # UR $VERSION;
9              
10             class UR::DataSource::FileMux {
11             is => ['UR::DataSource'],
12             doc => 'A factory for other datasource factories that is able to pivot depending on parameters in the rule used for get()',
13             has => [
14             delimiter => { is => 'String', default_value => '\s*,\s*', doc => 'Delimiter between columns on the same line' },
15             record_separator => { is => 'String', default_value => "\n", doc => 'Delimiter between lines in the file' },
16             column_order => { is => 'ARRAY', doc => 'Names of the columns in the file, in order' },
17             cache_size => { is => 'Integer', default_value => 100 },
18             skip_first_line => { is => 'Integer', default_value => 0 },
19             handle_class => { is => 'String', default_value => 'IO::File', doc => 'Class to use for new file handles' },
20             quick_disconnect => { is => 'Boolean', default_value => 1, doc => 'Do not hold the file handle open between requests' },
21             file_resolver => { is => 'CODE', doc => 'subref that will return a pathname given a rule' },
22             constant_values => { is => 'ARRAY', default_value => undef, doc => 'Property names which are not in the data file(s), but are part of the objects loaded from the data source' },
23             ],
24             has_optional => [
25             server => { is => 'String', doc => 'pathname to the data file' },
26             file_list => { is => 'ARRAY', doc => 'list of pathnames of equivalent files' },
27             sort_order => { is => 'ARRAY', doc => 'Names of the columns by which the data file is sorted' },
28             required_for_get => { is => 'ARRAY', doc => 'Property names which must appear in any get() request using this data source. It is used to build the argument list for the file_resolver sub' },
29             delegate_file_ds => { is => 'UR:DataFile::FileMuxFile', reverse_as => 'controlling_filemux', is_many => 1 },
30             ],
31             };
32              
33             UR::Object::Type->define(
34             class_name => 'UR::DataSource::FileMuxFile',
35             is => 'UR::DataSource::File',
36             has_transient => [
37             controlling_filemux => { is => 'UR::DataSource::FileMux', id_by => 'controlling_filemux_id' },
38             ],
39             )->is_uncachable(1);
40              
41              
42             # FileMux doesn't have a 'default_handle'
43             sub create_default_handle {
44 0     0 0 0 return undef;
45             }
46              
47             sub disconnect {
48 0     0 0 0 my $self = shift;
49 0         0 my @delegates = $self->delegate_file_ds();
50 0         0 $_->disconnect_default_handle foreach @delegates;
51             }
52              
53             # The concreate data sources will be of this type
54             sub _delegate_data_source_class {
55 28     28   44 'UR::DataSource::FileMuxFile';
56             }
57              
58              
59              
60             sub sql_fh {
61 0     0 0 0 return UR::DBI->sql_fh();
62             }
63              
64 2     2 0 6 sub can_savepoint { 0;} # Doesn't support savepoints
65              
66             my %WORKING_RULES; # Avoid recusion when infering values from rules
67             sub create_iterator_closure_for_rule {
68 21     21 1 25 my($self,$rule) = @_;
69            
70 21 100       47 if ($WORKING_RULES{$rule->id}++) {
71 3         9 my $subject_class = $rule->subject_class_name;
72 3         12 $self->error_message("Recursive entry into create_iterator_closure_for_rule() for class $subject_class rule_id ".$rule->id);
73 3         9 $WORKING_RULES{$rule->id}--;
74 3         9 return;
75             }
76              
77 18         67 my $context = UR::Context->get_current;
78 18         64 my $required_for_get = $self->required_for_get;
79              
80 18 50       72 if ($ENV{'UR_DBI_MONITOR_SQL'}) {
81 0         0 $self->sql_fh->printf("FILEMux: Resolving values for %d params (%s)\n",
82             scalar(@$required_for_get),
83             join(',',@$required_for_get));
84             }
85              
86 18         24 my @all_resolver_params;
87 18         45 for(my $i = 0; $i < @$required_for_get; $i++) {
88 25         37 my $param_name = $required_for_get->[$i];
89 25         76 my @values = $context->infer_property_value_from_rule($param_name, $rule);
90 25 100       53 unless (@values) {
91             # Hack: the above infer...rule() returned 0 objects, so $all_params_loaded made
92             # a note of it. Later on, if the user supplies more params such that it would be
93             # able to resolve a file, we'll never get here, because the Context will see that a
94             # superset of the params (this current invocation without sufficient params) was already
95             # tried and results should be entirely in the cache - ie. no objects.
96             # So... remove the evidence that we tried this in case the user is catching the die
97             # below and will continue on
98 3         10 $context->_forget_loading_was_done_with_template_and_rule($rule->template_id, $rule->id);
99 3         15 Carp::croak "Can't resolve data source: no $param_name specified in rule $rule";
100             }
101              
102 22 100 66     102 if (@values == 1 and ref($values[0]) eq 'ARRAY') {
103 2         2 @values = @{$values[0]};
  2         5  
104             }
105              
106 22 50       53 if ($ENV{'UR_DBI_MONITOR_SQL'}) {
107 0         0 $self->sql_fh->print(" FILEMux: $param_name: (",join(',',@values),")\n");
108             }
109              
110 22 50       48 unless ($rule->specifies_value_for($param_name)) {
111 0 0       0 if (scalar(@values) == 1) {
112 0         0 $rule = $rule->add_filter($param_name => $values[0]);
113             } else {
114 0         0 $rule = $rule->add_filter($param_name => \@values);
115             }
116             }
117 22         73 $all_resolver_params[$i] = \@values;
118             }
119 15         53 my @resolver_param_combinations = UR::Util::combinations_of_values(@all_resolver_params);
120              
121             # Each combination of params ends up being from a different data source. Make an
122             # iterator pulling from each of them
123 15         24 my $file_resolver = $self->{'file_resolver'};
124 15 100       37 if (ref($file_resolver) ne 'CODE') {
125             # Hack! The data source is probably a singleton class and there's a file_resolver method
126             # defined
127 9         46 $file_resolver = $self->can('file_resolver');
128             }
129              
130 15         84 my $concrete_ds_type = $self->_delegate_data_source_class;
131             #my %sub_ds_params = $self->_common_params_for_concrete_data_sources();
132 15         23 my @constant_value_properties = @{$self->constant_values};
  15         47  
133              
134 15         37 my @data_source_construction_data;
135 15         31 foreach my $resolver_params ( @resolver_param_combinations ) {
136 17         48 push @data_source_construction_data, { subject_class_name => $rule->subject_class_name,
137             file_resolver => $file_resolver,
138             file_resolver_params => $resolver_params,
139             };
140            
141             }
142 15         38 delete $WORKING_RULES{$rule->id};
143              
144 15         20 my($monitor_start_time,$monitor_printed_first_fetch);
145 15 50       36 if ($ENV{'UR_DBI_MONITOR_SQL'}) {
146 0         0 $monitor_start_time = Time::HiRes::time();
147 0         0 $monitor_printed_first_fetch = 0;
148             }
149              
150 15         47 my $base_sub_ds_name = $self->id;
151              
152             # Fill in @ds_iterators with iterators for all the underlying data sources
153             # pre-fill @ds_next_row with the next object from each data source
154             # @ds_constant_values is the constant_values for objects of those data sources
155 15         17 my(@ds_iterators, @ds_next_row, @ds_constant_values);
156 15         26 foreach my $data_source_construction_data ( @data_source_construction_data ) {
157 17         28 my $subject_class_name = $data_source_construction_data->{'subject_class_name'};
158 17         18 my $file_resolver = $data_source_construction_data->{'file_resolver'};
159 17         18 my $file_resolver_params = $data_source_construction_data->{'file_resolver_params'};
160              
161 17         17 my @sub_ds_name_parts;
162 17         39 my $this_ds_rule_params = $rule->legacy_params_hash;
163 17         48 for (my $i = 0; $i < @$required_for_get; $i++) {
164 23         30 my $param_name = $required_for_get->[$i];
165 23         23 my $param_value = $file_resolver_params->[$i];
166 23         41 push @sub_ds_name_parts, $param_name . $param_value;
167 23         48 $this_ds_rule_params->{$param_name} = $param_value;
168             }
169 17         47 my $sub_ds_id = join('::', $base_sub_ds_name, @sub_ds_name_parts);
170              
171 17         50 my $resolved_file = $file_resolver->(@$file_resolver_params);
172 17 50       148 unless ($resolved_file) {
173 0         0 Carp::croak "Can't create data source: file resolver for $sub_ds_id returned false for params "
174             . join(',',@$file_resolver_params);
175             }
176 17         63 my $this_ds_obj = $self->get_or_create_data_source($concrete_ds_type, $sub_ds_id, $resolved_file);
177 17         94 my $this_ds_rule = UR::BoolExpr->resolve($subject_class_name,%$this_ds_rule_params);
178              
179 17         34 my @constant_values = map { $this_ds_rule->value_for($_) }
  23         55  
180             @constant_value_properties;
181              
182 17         85 my $ds_iterator = $this_ds_obj->create_iterator_closure_for_rule($this_ds_rule);
183 17         33 my $initial_obj = $ds_iterator->();
184 17 100       41 next unless $initial_obj;
185              
186 14         29 push @ds_constant_values, \@constant_values;
187 14         20 push @ds_iterators, $ds_iterator;
188 14         63 push @ds_next_row, $initial_obj;
189             }
190              
191 15 50 33     76 unless (scalar(@ds_constant_values) == scalar(@ds_iterators)
192             and
193             scalar(@ds_constant_values) == scalar(@ds_next_row) )
194             {
195 0         0 Carp::croak("Internal error in UR::DataSource::FileMux: arrays for iterators, constant_values and next_row have differing sizes");
196             }
197            
198              
199             # Create a closure that can sort the next possible rows in @ds_next_row and return the index of
200             # the one that sorts earliest
201 15         23 my $sorter;
202 15 100       45 if (@ds_iterators == 0 ) {
    100          
203             # No underlying data sources, no data to return
204 2     2   17 return sub {};
205              
206             } elsif (@ds_iterators == 1 ) {
207             # Only one underlying data source.
208 12     22   48 $sorter = sub { 0 };
  22         25  
209              
210             } else {
211             # more than one underlying data source, make a real sorter
212              
213 1         3 my %column_name_to_row_index;
214 1         6 my $column_order_names = $self->column_order;
215 1         5 my $constant_values = $self->constant_values;
216 1         4 push @$column_order_names, @$constant_values;
217 1         5 for (my $i = 0; $i < @$column_order_names; $i++) {
218 4         8 $column_name_to_row_index{$column_order_names->[$i]} = $i;
219             }
220              
221 1         4 my $sort_order = $self->sort_order;
222 1 50 33     12 if (! $sort_order or ! @$sort_order ) {
223             # They didn't specify sorting, Try finding out the class' ID properties
224             # and sort by them
225              
226 0         0 my $subject_class_meta = $rule->subject_class_name->__meta__;
227 0         0 my @id_properties = $subject_class_meta->direct_id_property_names;
228              
229 0         0 $sort_order = [];
230 0         0 foreach my $property_name ( @id_properties ) {
231 0         0 my $property_meta = $subject_class_meta->property_meta_for_name($property_name);
232 0         0 my $column_name = $property_meta->column_name;
233 0 0       0 next unless $column_name;
234 0 0       0 next unless ($column_name_to_row_index{$column_name});
235 0         0 push @$sort_order, $column_name;
236             }
237             }
238 1         2 my @row_index_sort_order = map { $column_name_to_row_index{$_} } @$sort_order;
  1         3  
239              
240             $sorter = sub {
241 7     7   10 my $lowest_obj_idx = 0;
242             COMPARE_OBJECTS:
243 7         14 for(my $compare_obj_idx = 1; $compare_obj_idx < @ds_next_row; $compare_obj_idx++) {
244              
245             COMPARE_COLUMNS:
246 5         9 for (my $i = 0; $i < @row_index_sort_order; $i++) {
247 5         4 my $column_num = $row_index_sort_order[$i];
248              
249 5   33     16 my $comparison = $ds_next_row[$lowest_obj_idx]->[$column_num] <=> $ds_next_row[$compare_obj_idx]->[$column_num]
250             ||
251             $ds_next_row[$lowest_obj_idx]->[$column_num] cmp $ds_next_row[$compare_obj_idx]->[$column_num];
252              
253 5 100       9 if ($comparison == -1) {
    50          
254 4         11 next COMPARE_OBJECTS;
255             } elsif ($comparison == 1) {
256 1         3 $lowest_obj_idx = $compare_obj_idx;
257 1         3 next COMPARE_OBJECTS;
258             }
259             }
260             }
261              
262 7         8 return $lowest_obj_idx;
263 1         6 };
264             }
265              
266              
267             my $iterator = sub {
268 38 50 33 38   88 if ($monitor_start_time and ! $monitor_printed_first_fetch) {
269 0         0 $self->sql_fh->printf("FILEMux: FIRST FETCH TIME: %.4f s\n", Time::HiRes::time() - $monitor_start_time);
270 0         0 $monitor_printed_first_fetch = 1;
271             }
272            
273 38         71 while (@ds_next_row) {
274 29         50 my $next_row_idx = $sorter->();
275 29         39 my $next_row_to_return = $ds_next_row[$next_row_idx];
276              
277 29         38 push @$next_row_to_return, @{$ds_constant_values[$next_row_idx]};
  29         56  
278              
279 29         71 my $refill_row = $ds_iterators[$next_row_idx]->();
280 29 100       55 if ($refill_row) {
281 15         15 $ds_next_row[$next_row_idx] = $refill_row;
282             } else {
283             # This iterator is exhausted
284 14         26 splice(@ds_iterators, $next_row_idx, 1);
285 14         42 splice(@ds_constant_values, $next_row_idx, 1);
286 14         38 splice(@ds_next_row, $next_row_idx, 1);
287             }
288 29         105 return $next_row_to_return;
289             }
290              
291 9 50       17 if ($monitor_start_time) {
292 0         0 $self->sql_fh->printf("FILEMux: TOTAL EXECUTE-FETCH TIME: %.4f s\n",
293             Time::HiRes::time() - $monitor_start_time);
294             }
295              
296 9         18 return;
297 13         66 };
298              
299 13         92 Sub::Name::subname('UR::DataSource::FileMux::__datasource_iterator(closure)__', $iterator);
300 13         97 return $iterator;
301             }
302              
303              
304             sub get_or_create_data_source {
305 17     17 0 25 my($self, $concrete_ds_type, $sub_ds_id, $file_path) = @_;
306              
307 17         19 my $sub_ds;
308 17 100       96 unless ($sub_ds = $concrete_ds_type->get($sub_ds_id)) {
309 11 50       35 if ($ENV{'UR_DBI_MONITOR_SQL'}) {
310 0         0 $self->sql_fh->print("FILEMux: $file_path is data source $sub_ds_id\n");
311             }
312              
313 11         39 my %sub_ds_params = $self->_common_params_for_concrete_data_sources();
314 11         54 $concrete_ds_type->define(
315             id => $sub_ds_id,
316             %sub_ds_params,
317             server => $file_path,
318             controlling_filemux_id => $self->id,
319             );
320 11         18 $UR::Context::all_objects_cache_size++;
321 11         37 $sub_ds = $concrete_ds_type->get($sub_ds_id);
322            
323 11 50       27 unless ($sub_ds) {
324 0         0 Carp::croak "Can't create data source: retrieving newly defined data source $sub_ds_id returned nothing";
325             }
326              
327             # Since these $sub_ds objects have no data_source, this will indicate to
328             # UR::Context::prune_object_cache() that it's ok to go ahead and drop them
329 11         51 $sub_ds->__weaken__();
330             }
331 17         32 return $sub_ds;
332             }
333              
334              
335             sub _generate_loading_templates_arrayref {
336 12     12   16 my $self = shift;
337 12         33 my $delegate_class = $self->_delegate_data_source_class();
338 12         78 $delegate_class->class; # trigger the autoloader, if necessary
339              
340 12         36 my $sub = $delegate_class->can('_generate_loading_templates_arrayref');
341 12 50       91 unless ($sub) {
342 0         0 Carp::croak(qq(FileMux can't locate method "_generate_loading_templates_arrayref" via package $delegate_class. Is $delegate_class a File-type DataSource?));
343             }
344 12         44 $self->$sub(@_);
345             }
346              
347              
348             sub _normalize_file_resolver_details {
349 4     4   6 my($class, $class_data, $ds_data) = @_;
350              
351 4         6 my $path_resolver_coderef;
352             my @required_for_get;
353 4         5 my $class_name = $class_data->{'class_name'};
354              
355 4 100       10 if (exists $ds_data->{'required_for_get'}) {
356 1         1 @required_for_get = @{$ds_data->{'required_for_get'}};
  1         4  
357             my $user_supplied_resolver = $ds_data->{'file_resolver'} || $ds_data->{'resolve_file_with'} ||
358 1   33     7 $ds_data->{'resolve_path_with'};
359 1 50       3 if (ref($user_supplied_resolver) eq 'CODE') {
    0          
360 1         2 $path_resolver_coderef = $user_supplied_resolver;
361             } elsif (! ref($user_supplied_resolver)) {
362             # It's a functcion name
363 0         0 $path_resolver_coderef = $class_name->can($user_supplied_resolver);
364 0 0       0 unless ($path_resolver_coderef) {
365 0         0 die "Can't locate function $user_supplied_resolver via class $class_name during creation of inline data source";
366             }
367             } else {
368 0         0 $class->error_message("The data_source specified 'required_for_get', but the file resolver was not a coderef or function name");
369 0         0 return;
370             }
371             } else {
372             my $resolve_path_with = $ds_data->{'resolve_path_with'} || $ds_data->{'path'} ||
373 3   0     92 $ds_data->{'server'} || $ds_data->{'file_resolver'};
374 3 0 33     7 unless ($resolve_path_with or $ds_data->{'file_list'}) {
375 0         0 $class->error_message("A data_source's definition must include 'resolve_path_with', 'path', 'server', or 'file_list'");
376 0         0 return;
377             }
378              
379 3 50       22 if (! ref($resolve_path_with)) {
    50          
    50          
    50          
380             # a simple string
381 0 0 0     0 if ($class_name->can($resolve_path_with) or grep { $_ eq $resolve_path_with } @{$class_data->{'has'}}) {
  0         0  
  0         0  
382             # a method or property name
383 6     6   31 no strict 'refs';
  6         9  
  6         897  
384 0         0 $path_resolver_coderef = \&{ $class_name . "::$resolve_path_with"};
  0         0  
385             } else {
386             # a hardcoded pathname
387 0     0   0 $path_resolver_coderef = sub { $resolve_path_with };
  0         0  
388             }
389             } elsif (ref($resolve_path_with) eq 'CODE') {
390 0         0 $path_resolver_coderef = $resolve_path_with;
391              
392             } elsif (ref($resolve_path_with) ne 'ARRAY') {
393 0         0 $class->error_message("A data_source's 'resolve_path_with' must be a coderef, arrayref, pathname or method name");
394 0         0 return;
395              
396             } elsif (ref($resolve_path_with) eq 'ARRAY') {
397             # A list of things
398 3 100       9 if (ref($resolve_path_with->[0]) eq 'CODE') {
    100          
    50          
    50          
399             # A coderef, then property list
400 1         2 @required_for_get = @{$ds_data->{'resolve_path_with'}};
  1         4  
401 1         2 $path_resolver_coderef = shift @required_for_get;
402              
403 14         21 } elsif (grep { $_ eq $resolve_path_with->[0] }
404 2         7 keys(%{$class_data->{'has'}}) ) {
405             # a list of property names, join them with /s
406 1 50       8 unless ($ds_data->{'base_path'}) {
407 0         0 $class->warning_message("$class_name inline data source: 'resolve_path_with' is a list of method names, but 'base_path' is undefined'");
408             }
409 1         7 @required_for_get = @{$resolve_path_with};
  1         2  
410 1         2 my $base_path = $ds_data->{'base_path'};
411 6     6   25 $path_resolver_coderef = sub { no warnings 'uninitialized';
  6         9  
  6         652  
412 1     1   5 return join('/', $base_path, @_)
413 1         4 };
414            
415             } elsif ($class_name->can($resolve_path_with->[0])) {
416             # a method compiled into the class, but not one that's a property
417 0         0 @required_for_get = @{$resolve_path_with};
  0         0  
418 0         0 my $fcn_name = shift @required_for_get;
419 0         0 my $path_resolver_coderef = $class_name->can($fcn_name);
420 0 0       0 unless ($path_resolver_coderef) {
421 0         0 die "Can't locate function $fcn_name via class $class_name during creation of inline data source";
422             }
423              
424             } elsif (! ref($resolve_path_with->[0])) {
425             # treat the first element as a sprintf format
426 1         43 @required_for_get = @{$resolve_path_with};
  1         4  
427 1         2 my $format = shift @required_for_get;
428 6     6   20 $path_resolver_coderef = sub { no warnings 'uninitialized';
  6         10  
  6         5795  
429 1     1   5 return sprintf($format, @_);
430 1         4 };
431             } else {
432 0         0 $class->error_message("Unrecognized layout for 'resolve_path_with'");
433 0         0 return;
434             }
435             } else {
436 0         0 $class->error_message("Unrecognized layout for 'resolve_path_with'");
437 0         0 return;
438             }
439             }
440              
441 4         12 return ($path_resolver_coderef, @required_for_get);
442             }
443              
444              
445             # Properties we'll copy from $self when creating a concrete data source
446             sub _common_params_for_concrete_data_sources {
447 12     12   18 my $self = shift;
448              
449 12         17 my %params;
450 12         27 foreach my $param ( qw( delimiter skip_first_line column_order sort_order record_separator constant_values handle_class quick_disconnect ) ) {
451 96 50       260 next unless defined $self->$param;
452 96         220 my @vals = $self->$param;
453 96 50       152 if (@vals > 1) {
454 0         0 $params{$param} = \@vals;
455             } else {
456 96         136 $params{$param} = $vals[0];
457             }
458             }
459 12         76 return %params;
460             }
461            
462              
463             sub initializer_should_create_column_name_for_class_properties {
464 69     69 0 650 1;
465             }
466            
467             # Called by the class initializer
468             sub create_from_inline_class_data {
469 4     4 1 8 my($class, $class_data, $ds_data) = @_;
470              
471 4 50       13 unless ($ds_data->{'column_order'}) {
472 0         0 die "Can't create inline data source for ".$class_data->{'class_name'}.": 'column_order' is a required param";
473             }
474              
475              
476 4         13 my($file_resolver, @required_for_get) = $class->_normalize_file_resolver_details($class_data, $ds_data);
477 4 50       9 return unless $file_resolver;
478              
479 4 100 66     21 if (!exists($ds_data->{'constant_values'}) and @required_for_get) {
480             # If there are required_for_get params, but the user didn't specify any constant_values,
481             # then all the required_for_get items that are real properties become constant_values
482 3         4 $ds_data->{'constant_values'} = [];
483 3         4 my %columns_from_ds = map { $_ => 1 } @{$ds_data->{'column_order'}};
  9         17  
  3         6  
484              
485 3         5 foreach my $param_name ( @required_for_get ) {
486 6         8 my $param_data = $class_data->{'has'}->{$param_name};
487 6 50       9 next unless $param_data;
488              
489 6         6 my $param_column = $param_data->{'column_name'};
490 6 50       8 next unless $param_column;
491              
492 6 50       13 unless ($columns_from_ds{$param_column}) {
493 6         4 push @{$ds_data->{'constant_values'}}, $param_name;
  6         11  
494             }
495             }
496             }
497              
498              
499 4         5 my %ds_creation_params;
500 4         8 foreach my $param ( qw( delimiter record_separator column_order cache_size skip_first_line sort_order constant_values ) ) {
501 28 100       40 if (exists $ds_data->{$param}) {
502 16         24 $ds_creation_params{$param} = $ds_data->{$param};
503             }
504             }
505              
506 4         27 my($namespace, $class_name) = ($class_data->{'class_name'} =~ m/^(\w+?)::(.*)/);
507 4         11 my $ds_id = "${namespace}::DataSource::${class_name}";
508 4         7 my $ds_type = delete $ds_data->{'is'};
509            
510 4         26 my $ds = $ds_type->create(
511             %ds_creation_params,
512             id => $ds_id,
513             required_for_get => \@required_for_get,
514             file_resolver => $file_resolver
515             );
516              
517 4         15 return $ds;
518             }
519              
520              
521             sub _sync_database {
522 1     1   2 my $self = shift;
523 1         2 my %params = @_;
524              
525 1 50       3 unless (ref($self)) {
526 0 0       0 if ($self->isa("UR::Singleton")) {
527 0         0 $self = $self->_singleton_object;
528             }
529             else {
530 0         0 die "Called as a class-method on a non-singleton datasource!";
531             }
532             }
533              
534 1         2 my $changed_objects = delete $params{'changed_objects'};
535              
536 1         5 my $context = UR::Context->get_current;
537 1         6 my $required_for_get = $self->required_for_get;
538              
539 1         5 my $file_resolver = $self->{'file_resolver'};
540 1 50       4 if (ref($file_resolver) ne 'CODE') {
541             # Hack! The data source is probably a singleton class and there's a file_resolver method
542             # defined
543 1         8 $file_resolver = $self->can('file_resolver');
544             }
545              
546 1         8 my $monitor_start_time;
547 1 50       4 if ($ENV{'UR_DBI_MONITOR_SQL'}) {
548 0         0 $monitor_start_time = Time::HiRes::time();
549 0         0 my $time = time();
550 0         0 $self->sql_fh->printf("FILEMux: SYNC_DATABASE AT %d [%s].\n", $time, scalar(localtime($time)));
551             }
552              
553 1         4 my $concrete_ds_type = $self->_delegate_data_source_class;
554 1         5 my %sub_ds_params = $self->_common_params_for_concrete_data_sources();
555              
556 1         3 my %datasource_for_dsid;
557             my %objects_by_datasource;
558 1         2 foreach my $obj ( @$changed_objects ) {
559 9         7 my @obj_values;
560 9         17 for (my $i = 0; $i < @$required_for_get; $i++) {
561            
562 9         7 my $property = $required_for_get->[$i];
563 9         19 my $value = $obj->$property;
564 9 50       14 unless ($value) {
565 0         0 my $class = $obj->class;
566 0         0 my $id = $obj->id;
567 0         0 $self->error_message("No value for required-for-get property $property on object of class $class id $id");
568 0         0 return;
569             }
570 9 50       11 if (ref $value) {
571 0         0 my $class = $obj->class;
572 0         0 my $id = $obj->id;
573 0         0 $self->error_message("Pivoting based on a non-scalar property is not supported. $class object id $id property $property did not return a scalar value");
574 0         0 return;
575             }
576              
577 9         14 push @obj_values, $value;
578             }
579              
580 9         8 my @sub_ds_name_parts;
581 9         13 for (my $i = 0; $i < @obj_values; $i++) {
582 9         16 push @sub_ds_name_parts, $required_for_get->[$i] . $obj_values[$i];
583             }
584 9         19 my $sub_ds_id = join('::', $self->id, @sub_ds_name_parts);
585              
586 9   100     22 my $sub_ds = $datasource_for_dsid{$sub_ds_id} || $concrete_ds_type->get($sub_ds_id);
587 9 100       12 unless ($sub_ds) {
588 1         4 my $file_path = $file_resolver->(@obj_values);
589 1 50       9 unless (defined $file_path) {
590 0         0 die "Can't resolve data source: resolver for " .
591             $self->class .
592             " returned undef for params " . join(',',@obj_values);
593             }
594              
595 1 50       4 if ($ENV{'UR_DBI_MONITOR_SQL'}) {
596 0         0 $self->sql_fh->print("FILEMux: $file_path is data source $sub_ds_id\n");
597             }
598              
599             $concrete_ds_type->define(
600 1         7 id => $sub_ds_id,
601             %sub_ds_params,
602             server => $file_path,
603             controlling_filemux_id => $self->id,
604             );
605 1         2 $UR::Context::all_objects_cache_size++;
606 1         3 $sub_ds = $concrete_ds_type->get($sub_ds_id);
607              
608             # Since these $sub_ds objects have no data_source, this will indicate to
609             # UR::Context::prune_object_cache() that it's ok to go ahead and drop them
610 1         3 $sub_ds->__weaken__();
611             }
612 9 50       11 unless ($sub_ds) {
613 0         0 die "Can't get data source with ID $sub_ds_id";
614             }
615 9   66     21 $datasource_for_dsid{$sub_ds_id} ||= $sub_ds;
616              
617              
618 9 100       16 unless ($objects_by_datasource{$sub_ds_id}) {
619 3         10 $objects_by_datasource{$sub_ds_id}->{'ds_obj'} = $sub_ds;
620 3         5 $objects_by_datasource{$sub_ds_id}->{'changed_objects'} = [];
621             }
622 9         6 push(@{$objects_by_datasource{$sub_ds_id}->{'changed_objects'}}, $obj);
  9         22  
623             }
624              
625 1         3 foreach my $h ( values %objects_by_datasource ) {
626 3         5 my $sub_ds = $h->{'ds_obj'};
627 3         3 my $changed_objects = $h->{'changed_objects'};
628              
629 3         13 $sub_ds->_sync_database(changed_objects => $changed_objects);
630             }
631              
632 1 50       5 if ($ENV{'UR_DBI_MONITOR_SQL'}) {
633 0         0 $self->sql_fh->printf("FILEMux: TOTAL COMMIT TIME: %.4f s\n", Time::HiRes::time() - $monitor_start_time);
634             }
635              
636 1         9 return 1;
637             }
638              
639              
640            
641              
642             1;
643              
644             =pod
645              
646             =head1 NAME
647              
648             UR::DataSource::FileMux - Parent class for datasources which can multiplex many files together
649              
650             =head1 DEPRECATED
651              
652             This module is deprecated. Use UR::DataSource::Filesystem instead.
653              
654             =head1 SYNOPSIS
655              
656             package MyNamespace::DataSource::MyFileMux;
657             class MyNamespace::DataSource::MyFileMux {
658             is => ['UR::DataSource::FileMux', 'UR::Singleton'],
659             };
660             sub column_order { ['thing_id', 'thing_name', 'thing_color'] }
661             sub sort_order { ['thing_id'] }
662             sub delimiter { "\t" }
663             sub constant_values { ['thing_type'] }
664             sub required_for_get { ['thing_type'] }
665             sub file_resolver {
666             my $thing_type = shift;
667             return '/base/path/to/files/' . $thing_type;
668             }
669              
670             package main;
671             class MyNamespace::ThingMux {
672             id_by => ['thing_id', 'thing_type' ],
673             has => ['thing_id', 'thing_type', 'thing_name','thing_color'],
674             data_source => 'MyNamespace::DataSource::MyFileMux',
675             };
676              
677             my @objs = MyNamespace::Thing->get(thing_type => 'people', thing_name => 'Bob');
678              
679             =head1 DESCRIPTION
680              
681             UR::DataSource::FileMux provides a framework for file-based data sources where the
682             data files are split up between one or more parameters of the class. For example,
683             in the synopsis above, the data for the class is stored in several files in the
684             directory /base/path/to/files/. Each file may have a name such as 'people' and 'cars'.
685              
686             When a get() request is made on the class, the parameter 'thing_type' must be present
687             in the rule, and the value of that parameter is used to complete the file's pathname,
688             via the file_resolver() function. Note that even though the 'thing_type' parameter
689             is not actually stored in the file, its value for the loaded objects gets filled in
690             because that parameter exists in the constant_values() configuration list, and in
691             the get() request.
692              
693             =head2 Configuration
694              
695             These methods determine the configuration for your data source and should appear as
696             properties of the data source or as functions in the package.
697              
698             =over 4
699              
700             =item delimiter()
701              
702             =item record_separator()
703              
704             =item skip_first_line()
705              
706             =item column_order()
707              
708             =item sort_order()
709              
710             These configuration items behave the same as in a UR::DataSource::File-based data source.
711              
712             =item required_for_get()
713              
714             required_for_get() should return a listref of parameter names. Whenever a get() request is
715             made on the class, the listed parameters must appear in the rule, or be derivable via
716             UR::Context::infer_property_value_from_rule().
717              
718             =item file_resolver()
719              
720             file_resolver() is called as a function (not a method). It should accept the same number
721             of parameters as are mentioned in required_for_get(). When a get() request is made,
722             those named parameters are extracted from the rule and passed in to the file_resolver()
723             function in the same order. file_resolver() must return a string that is used as the
724             pathname to the file that contains the needed data. The function must not have any
725             other side effects.
726              
727             In the case where the data source is a regular object (not a UR::Singleton'), then
728             the file_resover parameter should return a coderef.
729              
730             =item constant_values()
731              
732             constant_values() should return a listref of parameter names. These parameter names are used by
733             the object loader system to fill in data that may not be present in the data files. If the
734             class has parameters that are not actually stored in the data files, then the parameter
735             values are extracted from the rule and stored in the loaded object instances before being
736             returned to the user.
737              
738             In the synopsis above, thing_type is not stored in the data files, even though it exists
739             as a parameter of the MyNamespace::ThingMux class.
740              
741             =back
742              
743             =head2 Theory of Operation
744              
745             As part of the data-loading infrastructure inside UR, the parameters in a get()
746             request are transformed into a UR::BoolExpr instance, also called a rule.
747             UR::DataSource::FilMux hooks into that infrastructure by implementing
748             create_iterator_closure_for_rule(). It first collects the values for all the
749             parameters mentioned in required_for_get() by passing the rule and needed
750             parameter to infer_property_value_from_rule() of the current Context. If any
751             of the needed parameters is not resolvable, an excpetion is raised.
752              
753             Some of the rule's parameters may have multiple values. In those cases, all the
754             combinations of values are expanded. For example of param_a has 2 values, and
755             param_b has 3 values, then there are 6 possible combinations.
756              
757             For each combination of values, the file_resolver() function is called and
758             returns a pathname. For each pathname, a file-specific data source is created
759             (if it does not already exist), the server() configuration parameter created
760             to return that pathname. Other parameters are copied from the values in the
761             FileMux data source, such as column_names and delimiter.
762             create_iterator_closure_for_rule() is called on each of those data sources.
763              
764             Finally, an iterator is created to wrap all of those iterators, and is returned.
765            
766             =head1 INHERITANCE
767              
768             UR::DataSource
769              
770             =head1 SEE ALSO
771              
772             UR, UR::DataSource, UR::DataSource::File
773              
774             =cut
775