File Coverage

blib/lib/Data/Hopen/G/DAG.pm
Criterion Covered Total %
statement 182 182 100.0
branch 58 60 96.6
condition 6 7 85.7
subroutine 32 32 100.0
pod 6 6 100.0
total 284 287 98.9


line stmt bran cond sub pod time code
1             # Data::Hopen::G::DAG - hopen build graph
2             package Data::Hopen::G::DAG;
3 8     8   1365 use strict;
  8         18  
  8         257  
4 8     8   45 use Data::Hopen::Base;
  8         18  
  8         62  
5              
6             our $VERSION = '0.000019';
7              
8 8     8   2095 use parent 'Data::Hopen::G::Op';
  8         19  
  8         55  
9             use Class::Tiny {
10 14         143 goals => sub { [] },
11 8         91 default_goal => undef,
12             winner => undef,
13              
14             # Private attributes with simple defaults
15             #_node_by_name => sub { +{} }, # map from node names to nodes in either
16             # # _init_graph or _graph
17              
18             # Private attributes - initialized by BUILD()
19             _graph => undef, # L instance
20             _final => undef, # The graph sink - all goals have edges to this
21              
22             #Initialization operations
23             _init_graph => undef, # L
24             # for initializations
25             _init_first => undef, # Graph node for initialization - the first
26             # init operation to be performed.
27              
28             # TODO? also support fini to run operations after _graph runs?
29 8     8   710 };
  8         22  
30              
31 8     8   8076 use Data::Hopen qw(hlog getparameters *QUIET);
  8         18  
  8         868  
32 8     8   3814 use Data::Hopen::G::Goal;
  8         24  
  8         297  
33 8     8   1412 use Data::Hopen::G::Link;
  8         19  
  8         210  
34 8     8   43 use Data::Hopen::G::Node;
  8         16  
  8         162  
35 8     8   3726 use Data::Hopen::G::CollectOp;
  8         21  
  8         295  
36 8     8   54 use Data::Hopen::Util::Data qw(forward_opts);
  8         17  
  8         348  
37 8     8   3783 use Data::Hopen::OrderedPredecessorGraph;
  8         31  
  8         316  
38 8     8   350 use Getargs::Mixed; # parameters, which doesn't permit undef
  8         28  
  8         547  
39 8     8   52 use Hash::Merge;
  8         20  
  8         312  
40 8     8   7750 use Regexp::Assemble;
  8         133767  
  8         316  
41 8     8   70 use Scalar::Util qw(refaddr);
  8         18  
  8         423  
42 8     8   51 use Storable ();
  8         18  
  8         182  
43              
44             # Class data {{{1
45              
46             use constant {
47 8         17486 LINKS => 'link_list', # Graph edge attr: array of DHG::Link instances
48 8     8   43 };
  8         17  
49              
50             # A counter used for making unique names
51             my $_id_counter = 0; # threads: make shared
52              
53             # }}}1
54             # Docs {{{1
55              
56             =head1 NAME
57              
58             Data::Hopen::G::DAG - A hopen build graph
59              
60             =head1 SYNOPSIS
61              
62             This class encapsulates the DAG for a particular set of one or more goals.
63             It is itself a L so that it can be composed into
64             other DAGs.
65              
66             =head1 ATTRIBUTES
67              
68             =head2 goals
69              
70             Arrayref of the goals for this DAG.
71              
72             =head2 default_goal
73              
74             The default goal for this DAG.
75              
76             =head2 winner
77              
78             When a node has multiple predecessors, their outputs are combined using
79             L to form the input to that node. This sets the C
80             precedence. Valid values (case-insensitive) are:
81              
82             =over
83              
84             =item C or C<'combine'>
85              
86             (the default): L. Same-name keys
87             are merged, so no data is lost.
88              
89             =item C<'first'> or C<'keep'>
90              
91             L. The first predecessor to add a value
92             under a particular key will win.
93              
94             =item C<'last'> or C<'replace'>
95              
96             L. The last predecessor to add a value
97             under a particular key will win.
98              
99             =back
100              
101             =head2 _graph
102              
103             The actual L. If you find that you have to use it, please open an
104             issue so we can see about providing a documented API for your use case!
105              
106             =head2 _final
107              
108             The node to which all goals are connected.
109              
110             =head2 _init_graph
111              
112             A separate L of operations that will run before all the operations
113             in L. This is because I don't want to add an edge to every
114             single node just to force the topological sort to work out.
115              
116             =head2 _init_first
117              
118             The first node to be run in _init_graph.
119              
120             =head1 FUNCTIONS
121              
122             =cut
123              
124             # }}}1
125              
126             =head2 _run
127              
128             Traverses the graph. The DAG is similar to a subroutine in this respect. The
129             outputs from all the goals of the DAG are aggregated and provided as the
130             outputs of the DAG. The output is a hash keyed by the name of each goal, with
131             each goal's outputs as the values under that name. Usage:
132              
133             my $hrOutputs = $dag->run([-context=>$scope][, other options])
134              
135             C<$scope> must be a L or subclass if provided.
136             Other options are as L.
137              
138             When evaluating a node, the edges from its predecessors are traversed in
139             the order those predecessors were added to the graph.
140              
141             =cut
142              
143             # The implementation of run(). $self->scope has already been linked to the context.
144             sub _run {
145 30     30   180 my ($self, %args) = getparameters('self', [qw(; visitor)], @_);
146 30         1398 my $retval = {};
147              
148             # --- Get the initialization ops ---
149              
150 30         74 my @init_order = eval { $self->_init_graph->toposort };
  30         755  
151 30 100       1446 die "Initializations contain a cycle!" if $@;
152 29 100       823 @init_order = () if $self->_init_graph->vertices == 1; # no init nodes => skip
153              
154             # --- Get the runtime ops ---
155              
156 29         934 my @order = eval { $self->_graph->toposort };
  29         557  
157             # TODO someday support multi-core-friendly topo-sort, so nodes can run
158             # in parallel until they block each other.
159 29 100       2092 die "Graph contains a cycle!" if $@;
160              
161             # Remove _final from the order for now - I don't yet know what it means
162             # to traverse _final.
163 28 50 66     909 warn "Last item in order isn't _final! This might indicate a bug in hopen, or that some graph edges are missing."
164             unless $QUIET or refaddr $order[$#order] == refaddr $self->_final;
165              
166 28         1370 @order = grep { refaddr $_ != refaddr $self->_final } @order;
  100         1831  
167              
168             # --- Check for non-connected ops, and goals with no inputs ---
169              
170 28 100       217 unless($QUIET) {
171 27         477 foreach my $node ($self->_graph->isolated_vertices) {
172 1         1322 warn "Node @{[$node->name]} is not connected to any other nodes";
  1         7  
173             }
174              
175 27         37018 foreach my $goal (@{$self->goals}) {
  27         665  
176 27 100       602 warn "Goal @{[$goal->name]} has no inputs"
  1         292  
177             if $self->_graph->is_predecessorless_vertex($goal);
178             }
179             }
180              
181             # --- Set up for the merge ---
182              
183 28         7474 state $STRATEGIES = { # regex => strategy
184             '(|combine)' => 'combine',
185             '(first|keep)' => 'keep',
186             '(last|replace)' => 'replace',
187             };
188 28         1143 state $STRATEGY_MAP = Regexp::Assemble->new->flags('i')->track(1)
189             ->anchor_string_begin->anchor_string_end
190             ->add(keys %$STRATEGIES);
191              
192 28   100     4755 my $merge_strategy_idx = $STRATEGY_MAP->match($self->winner // '');
193 28 100       5960 die "Invalid winner value @{[$self->winner]}" unless defined $merge_strategy_idx;
  5         88  
194 23         78 my $merge_strategy = $STRATEGIES->{$merge_strategy_idx};
195              
196             # --- Traverse ---
197              
198             # Note: while hacking, please make sure Goal nodes can appear
199             # anywhere in the graph.
200              
201 23     18   197 hlog { my $x = 'Traversing DAG ' . $self->name; $x, '*' x (76-length($x)) };
  18         99  
  18         101  
202              
203 23         707 my $graph = $self->_init_graph;
204 23         170 foreach my $node (@init_order, undef, @order) {
205              
206 84 100       843 if(!defined($node)) { # undef is the marker between init and run
207 23         381 $graph = $self->_graph;
208 23         123 next;
209             }
210              
211             # Inputs to this node. These are different from the DAG's inputs.
212             # The scope stack is (outer to inner) DAG's inputs, DAG's overrides,
213             # then $node_inputs, then the individual node's overrides.
214 61         293 my $node_inputs = Data::Hopen::Scope::Hash->new;
215             # TODO make this a DH::Scope::Inputs once it's implemented
216 61         2706 $node_inputs->outer($self->scope);
217             # Data specifically being provided to the current node, e.g.,
218             # on input edges, beats the scope of the DAG as a whole.
219 61         2298 $node_inputs->local(true);
220             # A CollectOp won't reach above the node's inputs by default.
221 61         1254 $node_inputs->merge_strategy($merge_strategy);
222              
223             # Iterate over each node's edges and process any Links
224 61         443 foreach my $pred ($graph->ordered_predecessors($node)) {
225 35     29   22312 hlog { ('From', $pred->name, 'to', $node->name) };
  29         181  
226              
227             # Goals do not feed outputs to other Goals. This is so you can
228             # add edges between Goals to set their order while keeping the
229             # data for each Goal separate.
230             # TODO add tests for this. Also TODO decide whether this is
231             # actually the Right Thing!
232 35 50       240 next if eval { $pred->DOES('Data::Hopen::G::Goal') };
  35         495  
233              
234 35         185 my $links = $graph->get_edge_attribute($pred, $node, LINKS);
235              
236             # Simple case (no links): predecessor's outputs become our inputs
237 35 100       35527 unless($links) {
238 27     23   161 hlog { ' -- no links' };
  23         53  
239 27         169 $node_inputs->merge(%{$pred->outputs});
  27         165  
240             # TODO specify which set these are.
241             # Use the predecessor's identity as the set.
242 27         71 next;
243             }
244              
245             # More complex case: Process all the links
246              
247             # Helper function to wrap a hashref in the right scope for a link input
248             local *make_link_inputs = sub {
249 19     19   38 my $hrIn = shift;
250 19         66 my $scLinkInputs = Data::Hopen::Scope::Hash->new->put(%$hrIn);
251             # All links get the same outer scope --- they are parallel,
252             # not in series.
253             # TODO? use the predecessor's identity as the set.
254 19         345 $scLinkInputs->outer($self->scope);
255             # The links run at the same scope level as the node.
256 19         1119 $scLinkInputs->local(true);
257 19         125 return $scLinkInputs;
258 8         51 };
259              
260             # Make the first link's input scope
261 8         36 my $hrPredOutputs = $pred->outputs;
262             # In one test, outputs was undef if not on its own line.
263 8         23 my $scLinkInputs = make_link_inputs($hrPredOutputs);
264              
265             # Run the links in series - not parallel!
266 8         53 my $hrLinkOutputs = $scLinkInputs->as_hashref(-levels=>'local');
267 8         29 foreach my $link (@$links) {
268 11     9   114 hlog { ('From', $pred->name, 'via', $link->name, 'to', $node->name) };
  9         47  
269              
270 11         129 $hrLinkOutputs = $link->run(
271             -context=>$scLinkInputs,
272             # visitor not passed to links.
273             );
274 11         175 $scLinkInputs = make_link_inputs($hrLinkOutputs);
275             } #foreach incoming link
276              
277 8         159 $node_inputs->merge(%$hrLinkOutputs);
278             # TODO specify which set these are.
279              
280             } #foreach predecessor node
281              
282 61         415 my $step_output = $node->run(-context=>$node_inputs,
283             forward_opts(\%args, {'-'=>1}, 'visitor')
284             );
285 61         1056 $node->outputs($step_output);
286              
287             # Give the visitor a chance, and stash the results if necessary.
288 61 100       115 if(eval { $node->DOES('Data::Hopen::G::Goal') }) {
  61         508  
289 23 100       103 $args{visitor}->visit_goal($node, $node_inputs) if $args{visitor};
290              
291             # Save the result if there is one. Don't save {}.
292             # use $node->outputs, not $step_output, since the visitor may
293             # alter $node->outputs.
294 23 100       70 $retval->{$node->name} = $node->outputs if keys %{$node->outputs};
  23         95  
295             } else {
296 38 100       133 $args{visitor}->visit_node($node, $node_inputs) if $args{visitor};
297             }
298              
299 47     47   115 hlog { 'Finished node', $node->name, 'with outputs',
300 61         392 Dumper $node->outputs } 10;
301              
302             } #foreach node in topo-sort order
303              
304 23         1350 return $retval;
305             } #run()
306              
307             =head1 ADDING DATA
308              
309             =head2 goal
310              
311             Creates a goal of the DAG. Goals are names for sequences of operations,
312             akin to top-level Makefile targets. Usage:
313              
314             my $goalOp = $dag->goal('name')
315              
316             Returns the L node that is the goal. By default, any
317             inputs passed into a goal are provided as outputs of that goal, and are
318             saved as outputs of the DAG under the goal's name.
319              
320             The first call to C also sets L.
321              
322             =cut
323              
324             sub goal {
325 17 100   17 1 2910 my $self = shift or croak 'Need an instance';
326 16 100       194 my $name = shift or croak 'Need a goal name';
327 15         97 my $goal = Data::Hopen::G::Goal->new(name => $name);
328 15         377 $self->_graph->add_vertex($goal);
329             #$self->_node_by_name->{$name} = $goal;
330 15         4384 $self->_graph->add_edge($goal, $self->_final);
331 15 100       385 $self->default_goal($goal) unless $self->default_goal;
332 15         370 push @{$self->goals}, $goal;
  15         257  
333 15         57 return $goal;
334             } #goal()
335              
336             =head2 connect
337              
338             =over 4
339              
340             =item - C<< DAG:connect(, , , ) >>
341              
342             B.
343             Connects output C of operation C as input C of
344             operation C. No processing is done between output and input.
345             C and C can be anything usable as a table index, provided
346             that table index appears in the corresponding operation's descriptor.
347              
348             =item - C<< DAG:connect(, ) >>
349              
350             Creates a dependency edge from C to C, indicating that C must be
351             run before C. Does not transfer any data from C to C.
352              
353             =item - C<< DAG:connect(, , ) >>
354              
355             Connects C to C via L C.
356             C may be undef, in which case this is treated as the two-parameter form.
357              
358             If there are already link(s) on the edge from C to C, the new link
359             is added after the last existing link.
360              
361             =back
362              
363             TODO return the name of the edge? The edge instance itself? Maybe a
364             fluent interface to the DAG for chaining C calls?
365              
366             TODO remove the out-edge and in-edge parameters?
367              
368             =cut
369              
370             sub connect {
371 21 100   21 1 9358 my $self = shift or croak 'Need an instance';
372 20         57 my ($op1, $out_edge, $in_edge, $op2, $link);
373              
374             # Unpack args
375             #if(@_ == 4) {
376             # ($op2, $out_edge, $in_edge, $op2) = @_;
377             #} else the following
378 20 100       90 if(@_ == 3) {
    100          
379 12         26 ($op1, $link, $op2) = @_;
380             } elsif(@_ == 2) {
381 5         12 ($op1, $op2) = @_;
382             } else {
383 3         41 die "Invalid arguments";
384             }
385              
386             #my $out_edge = false; # No outputs TODO use these?
387             #my $in_edge = false; # No inputs
388              
389 13 100   13   66 hlog { 'DAG::connect(): Edge from', $op1->name,
390             'via', $link ? $link->name : '(no link)',
391 17         134 'to', $op2->name };
392              
393             # Add it to the graph (idempotent)
394 17         521 $self->_graph->add_edge($op1, $op2);
395             # $self->_node_by_name->{$_->name} = $_ foreach ($op1, $op2);
396              
397             # Save the DHG::Link as an edge attribute (not idempotent!)
398 17 100       584 if($link) {
399 11   100     338 my $attrs = $self->_graph->get_edge_attribute($op1, $op2, LINKS) || [];
400 11         11307 push @$attrs, $link;
401 11         266 $self->_graph->set_edge_attribute($op1, $op2, LINKS, $attrs);
402             }
403              
404 17         11148 return undef; # TODO decide what to return
405             } #connect()
406              
407             =head2 add
408              
409             Add a regular node to the graph. An attempt to add the same node twice will be
410             ignored. Usage:
411              
412             my $node = Data::Hopen::G::Op->new(name=>"whatever");
413             $dag->add($node);
414              
415             Returns the node, for the sake of chaining.
416              
417             =cut
418              
419             sub add {
420 4     4 1 2688 my ($self, undef, $node) = parameters('self', ['node'], @_);
421 2 100       102 return if $self->_graph->has_vertex($node);
422 1     1   147 hlog { __PACKAGE__, $self->name, 'adding', Dumper($node) } 2;
  1         5  
423              
424 1         31 $self->_graph->add_vertex($node);
425             #$self->_node_by_name->{$node->name} = $node if $node->name;
426              
427 1         308 return $node;
428             } #add()
429              
430             =head2 init
431              
432             Add an initialization operation to the graph. Initialization operations run
433             before all other operations. An attempt to add the same initialization
434             operation twice will be ignored. Usage:
435              
436             my $op = Data::Hopen::G::Op->new(name=>"whatever");
437             $dag->init($op[, $first]);
438              
439             If C<$first> is truthy, the op will be run before anything already in the
440             graph. However, later calls to C with C<$first> set will push
441             operations even before C<$op>.
442              
443             Returns the node, for the sake of chaining.
444              
445             =cut
446              
447             sub init {
448 6 100   6 1 5036 my $self = shift or croak 'Need an instance';
449 5 100       330 my $op = shift or croak 'Need an op';
450 4         9 my $first = shift;
451 4 100       99 return if $self->_init_graph->has_vertex($op);
452              
453 3         451 $self->_init_graph->add_vertex($op);
454             #$self->_node_by_name->{$op->name} = $op;
455              
456 3 100       782 if($first) { # $op becomes the new _init_first node
457 1         23 $self->_init_graph->add_edge($op, $self->_init_first);
458 1         26 $self->_init_first($op);
459             } else { # Not first, so can happen anytime. Add it after the
460             # current first node.
461 2         43 $self->_init_graph->add_edge($self->_init_first, $op);
462             }
463              
464 3         12 return $op;
465             } #init()
466              
467             =head1 ACCESSORS
468              
469             =head2 empty
470              
471             Returns truthy if the only nodes in the graph are internal nodes.
472             Intended for use by hopen files.
473              
474             =cut
475              
476             sub empty {
477 3 100   3 1 1945 my $self = shift or croak 'Need an instance';
478 2         57 return ($self->_graph->vertices == 1);
479             # _final is the node in an empty() graph.
480             # We don't check the _init_graph since empty() is intended
481             # for use by hopen files, not toolsets.
482             } #empty()
483              
484             =head1 OTHER
485              
486             =head2 BUILD
487              
488             Initialize the instance.
489              
490             =cut
491              
492             sub BUILD {
493             #use Data::Dumper;
494             #say Dumper(\@_);
495 17 100   17 1 5931 my $self = shift or croak 'Need an instance';
496 16         51 my $hrArgs = shift;
497              
498             # DAGs always have names
499 16 100       53 $self->name('__R_DAG_' . $_id_counter++) unless $self->has_custom_name;
500              
501             # Graph of normal operations
502 16         144 my $graph = Data::Hopen::OrderedPredecessorGraph->new( directed => true,
503             refvertexed => true);
504 16         20156 my $final = Data::Hopen::G::Node->new(
505             name => '__R_DAG_ROOT' . $_id_counter++);
506 16         1307 $graph->add_vertex($final);
507 16         5702 $self->_graph($graph);
508 16         379 $self->_final($final);
509              
510             # Graph of initialization operations
511 16         153 my $init_graph = Data::Hopen::OrderedPredecessorGraph->new( directed => true,
512             refvertexed => true);
513 16         2334 my $init = Data::Hopen::G::CollectOp->new(
514             name => '__R_DAG_INIT' . $_id_counter++);
515 16         1082 $init_graph->add_vertex($init);
516              
517 16         4714 $self->_init_graph($init_graph);
518 16         364 $self->_init_first($init);
519             } #BUILD()
520              
521             1;
522             # Rest of the docs {{{1
523             __END__