File Coverage

blib/lib/Data/Hopen/G/DAG.pm
Criterion Covered Total %
statement 182 182 100.0
branch 58 60 96.6
condition 6 7 85.7
subroutine 32 32 100.0
pod 6 6 100.0
total 284 287 98.9


line stmt bran cond sub pod time code
1             # Data::Hopen::G::DAG - hopen build graph
2             package Data::Hopen::G::DAG;
3 8     8   1373 use strict;
  8         18  
  8         257  
4 8     8   48 use Data::Hopen::Base;
  8         16  
  8         59  
5              
6             our $VERSION = '0.000017';
7              
8 8     8   2068 use parent 'Data::Hopen::G::Op';
  8         24  
  8         54  
9             use Class::Tiny {
10 14         182 goals => sub { [] },
11 8         86 default_goal => undef,
12             winner => undef,
13              
14             # Private attributes with simple defaults
15             #_node_by_name => sub { +{} }, # map from node names to nodes in either
16             # # _init_graph or _graph
17              
18             # Private attributes - initialized by BUILD()
19             _graph => undef, # L instance
20             _final => undef, # The graph sink - all goals have edges to this
21              
22             #Initialization operations
23             _init_graph => undef, # L
24             # for initializations
25             _init_first => undef, # Graph node for initialization - the first
26             # init operation to be performed.
27              
28             # TODO? also support fini to run operations after _graph runs?
29 8     8   643 };
  8         18  
30              
31 8     8   7982 use Data::Hopen qw(hlog getparameters *QUIET);
  8         20  
  8         894  
32 8     8   3849 use Data::Hopen::G::Goal;
  8         65  
  8         372  
33 8     8   1402 use Data::Hopen::G::Link;
  8         23  
  8         207  
34 8     8   50 use Data::Hopen::G::Node;
  8         18  
  8         163  
35 8     8   3644 use Data::Hopen::G::CollectOp;
  8         28  
  8         305  
36 8     8   52 use Data::Hopen::Util::Data qw(forward_opts);
  8         18  
  8         364  
37 8     8   3777 use Data::Hopen::OrderedPredecessorGraph;
  8         198  
  8         310  
38 8     8   263 use Getargs::Mixed; # parameters, which doesn't permit undef
  8         25  
  8         676  
39 8     8   54 use Hash::Merge;
  8         18  
  8         321  
40 8     8   7804 use Regexp::Assemble;
  8         134976  
  8         354  
41 8     8   78 use Scalar::Util qw(refaddr);
  8         21  
  8         447  
42 8     8   62 use Storable ();
  8         19  
  8         192  
43              
44             # Class data {{{1
45              
46             use constant {
47 8         18841 LINKS => 'link_list', # Graph edge attr: array of DHG::Link instances
48 8     8   43 };
  8         16  
49              
50             # A counter used for making unique names
51             my $_id_counter = 0; # threads: make shared
52              
53             # }}}1
54             # Docs {{{1
55              
56             =head1 NAME
57              
58             Data::Hopen::G::DAG - A hopen build graph
59              
60             =head1 SYNOPSIS
61              
62             This class encapsulates the DAG for a particular set of one or more goals.
63             It is itself a L so that it can be composed into
64             other DAGs.
65              
66             =head1 ATTRIBUTES
67              
68             =head2 goals
69              
70             Arrayref of the goals for this DAG.
71              
72             =head2 default_goal
73              
74             The default goal for this DAG.
75              
76             =head2 winner
77              
78             When a node has multiple predecessors, their outputs are combined using
79             L to form the input to that node. This sets the C
80             precedence. Valid values (case-insensitive) are:
81              
82             =over
83              
84             =item C or C<'combine'>
85              
86             (the default): L. Same-name keys
87             are merged, so no data is lost.
88              
89             =item C<'first'> or C<'keep'>
90              
91             L. The first predecessor to add a value
92             under a particular key will win.
93              
94             =item C<'last'> or C<'replace'>
95              
96             L. The last predecessor to add a value
97             under a particular key will win.
98              
99             =back
100              
101             =head2 _graph
102              
103             The actual L. If you find that you have to use it, please open an
104             issue so we can see about providing a documented API for your use case!
105              
106             =head2 _final
107              
108             The node to which all goals are connected.
109              
110             =head2 _init_graph
111              
112             A separate L of operations that will run before all the operations
113             in L. This is because I don't want to add an edge to every
114             single node just to force the topological sort to work out.
115              
116             =head2 _init_first
117              
118             The first node to be run in _init_graph.
119              
120             =head1 FUNCTIONS
121              
122             =cut
123              
124             # }}}1
125              
126             =head2 _run
127              
128             Traverses the graph. The DAG is similar to a subroutine in this respect. The
129             outputs from all the goals of the DAG are aggregated and provided as the
130             outputs of the DAG. The output is a hash keyed by the name of each goal, with
131             each goal's outputs as the values under that name. Usage:
132              
133             my $hrOutputs = $dag->run([-context=>$scope][, other options])
134              
135             C<$scope> must be a L or subclass if provided.
136             Other options are as L.
137              
138             When evaluating a node, the edges from its predecessors are traversed in
139             the order those predecessors were added to the graph.
140              
141             =cut
142              
143             # The implementation of run(). $self->scope has already been linked to the context.
144             sub _run {
145 30     30   212 my ($self, %args) = getparameters('self', [qw(; phase visitor)], @_);
146 30         1370 my $retval = {};
147              
148             # --- Get the initialization ops ---
149              
150 30         72 my @init_order = eval { $self->_init_graph->toposort };
  30         817  
151 30 100       1900 die "Initializations contain a cycle!" if $@;
152 29 100       838 @init_order = () if $self->_init_graph->vertices == 1; # no init nodes => skip
153              
154             # --- Get the runtime ops ---
155              
156 29         1473 my @order = eval { $self->_graph->toposort };
  29         609  
157             # TODO someday support multi-core-friendly topo-sort, so nodes can run
158             # in parallel until they block each other.
159 29 100       2003 die "Graph contains a cycle!" if $@;
160              
161             # Remove _final from the order for now - I don't yet know what it means
162             # to traverse _final.
163 28 50 66     976 warn "Last item in order isn't _final! This might indicate a bug in hopen, or that some graph edges are missing."
164             unless $QUIET or refaddr $order[$#order] == refaddr $self->_final;
165              
166 28         322 @order = grep { refaddr $_ != refaddr $self->_final } @order;
  100         1881  
167              
168             # --- Check for non-connected ops, and goals with no inputs ---
169              
170 28 100       238 unless($QUIET) {
171 27         481 foreach my $node ($self->_graph->isolated_vertices) {
172 1         112 warn "Node @{[$node->name]} is not connected to any other nodes";
  1         6  
173             }
174              
175 27         3711 foreach my $goal (@{$self->goals}) {
  27         712  
176 27 100       657 warn "Goal @{[$goal->name]} has no inputs"
  1         133  
177             if $self->_graph->is_predecessorless_vertex($goal);
178             }
179             }
180              
181             # --- Set up for the merge ---
182              
183 28         3685 state $STRATEGIES = { # regex => strategy
184             '(|combine)' => 'combine',
185             '(first|keep)' => 'keep',
186             '(last|replace)' => 'replace',
187             };
188 28         100 state $STRATEGY_MAP = Regexp::Assemble->new->flags('i')->track(1)
189             ->anchor_string_begin->anchor_string_end
190             ->add(keys %$STRATEGIES);
191              
192 28   100     5070 my $merge_strategy_idx = $STRATEGY_MAP->match($self->winner // '');
193 28 100       6453 die "Invalid winner value @{[$self->winner]}" unless defined $merge_strategy_idx;
  5         85  
194 23         86 my $merge_strategy = $STRATEGIES->{$merge_strategy_idx};
195              
196             # --- Traverse ---
197              
198             # Note: while hacking, please make sure Goal nodes can appear
199             # anywhere in the graph.
200              
201 23     18   250 hlog { my $x = 'Traversing DAG ' . $self->name; $x, '*' x (76-length($x)) };
  18         116  
  18         126  
202              
203 23         777 my $graph = $self->_init_graph;
204 23         207 foreach my $node (@init_order, undef, @order) {
205              
206 84 100       863 if(!defined($node)) { # undef is the marker between init and run
207 23         459 $graph = $self->_graph;
208 23         192 next;
209             }
210              
211             # Inputs to this node. These are different from the DAG's inputs.
212             # The scope stack is (outer to inner) DAG's inputs, DAG's overrides,
213             # then $node_inputs, then the individual node's overrides.
214 61         267 my $node_inputs = Data::Hopen::Scope::Hash->new;
215             # TODO make this a DH::Scope::Inputs once it's implemented
216 61         2875 $node_inputs->outer($self->scope);
217             # Data specifically being provided to the current node, e.g.,
218             # on input edges, beats the scope of the DAG as a whole.
219 61         2395 $node_inputs->local(true);
220             # A CollectOp won't reach above the node's inputs by default.
221 61         1332 $node_inputs->merge_strategy($merge_strategy);
222              
223             # Iterate over each node's edges and process any Links
224 61         523 foreach my $pred ($graph->ordered_predecessors($node)) {
225 35     29   20774 hlog { ('From', $pred->name, 'to', $node->name) };
  29         163  
226              
227             # Goals do not feed outputs to other Goals. This is so you can
228             # add edges between Goals to set their order while keeping the
229             # data for each Goal separate.
230             # TODO add tests for this. Also TODO decide whether this is
231             # actually the Right Thing!
232 35 50       243 next if eval { $pred->DOES('Data::Hopen::G::Goal') };
  35         487  
233              
234 35         201 my $links = $graph->get_edge_attribute($pred, $node, LINKS);
235              
236             # Simple case (no links): predecessor's outputs become our inputs
237 35 100       33093 unless($links) {
238 27     23   200 hlog { ' -- no links' };
  23         63  
239 27         162 $node_inputs->merge(%{$pred->outputs});
  27         159  
240             # TODO specify which set these are.
241             # Use the predecessor's identity as the set.
242 27         74 next;
243             }
244              
245             # More complex case: Process all the links
246              
247             # Helper function to wrap a hashref in the right scope for a link input
248             local *make_link_inputs = sub {
249 19     19   53 my $hrIn = shift;
250 19         73 my $scLinkInputs = Data::Hopen::Scope::Hash->new->put(%$hrIn);
251             # All links get the same outer scope --- they are parallel,
252             # not in series.
253             # TODO? use the predecessor's identity as the set.
254 19         386 $scLinkInputs->outer($self->scope);
255             # The links run at the same scope level as the node.
256 19         1278 $scLinkInputs->local(true);
257 19         140 return $scLinkInputs;
258 8         73 };
259              
260             # Make the first link's input scope
261 8         45 my $hrPredOutputs = $pred->outputs;
262             # In one test, outputs was undef if not on its own line.
263 8         32 my $scLinkInputs = make_link_inputs($hrPredOutputs);
264              
265             # Run the links in series - not parallel!
266 8         42 my $hrLinkOutputs = $scLinkInputs->as_hashref(-levels=>'local');
267 8         39 foreach my $link (@$links) {
268 11     9   124 hlog { ('From', $pred->name, 'via', $link->name, 'to', $node->name) };
  9         68  
269              
270 11         137 $hrLinkOutputs = $link->run(
271             -context=>$scLinkInputs,
272             forward_opts(\%args, {'-'=>1}, 'phase')
273             # visitor not passed to links.
274             );
275 11         190 $scLinkInputs = make_link_inputs($hrLinkOutputs);
276             } #foreach incoming link
277              
278 8         150 $node_inputs->merge(%$hrLinkOutputs);
279             # TODO specify which set these are.
280              
281             } #foreach predecessor node
282              
283 61         489 my $step_output = $node->run(-context=>$node_inputs,
284             forward_opts(\%args, {'-'=>1}, 'phase', 'visitor')
285             );
286 61         1124 $node->outputs($step_output);
287              
288             # Give the visitor a chance, and stash the results if necessary.
289 61 100       114 if(eval { $node->DOES('Data::Hopen::G::Goal') }) {
  61         544  
290 23 100       92 $args{visitor}->visit_goal($node, $node_inputs) if $args{visitor};
291              
292             # Save the result if there is one. Don't save {}.
293             # use $node->outputs, not $step_output, since the visitor may
294             # alter $node->outputs.
295 23 100       73 $retval->{$node->name} = $node->outputs if keys %{$node->outputs};
  23         87  
296             } else {
297 38 100       150 $args{visitor}->visit_node($node, $node_inputs) if $args{visitor};
298             }
299              
300 47     47   137 hlog { 'Finished node', $node->name, 'with outputs',
301 61         545 Dumper $node->outputs } 10;
302              
303             } #foreach node in topo-sort order
304              
305 23         501 return $retval;
306             } #run()
307              
308             =head1 ADDING DATA
309              
310             =head2 goal
311              
312             Creates a goal of the DAG. Goals are names for sequences of operations,
313             akin to top-level Makefile targets. Usage:
314              
315             my $goalOp = $dag->goal('name')
316              
317             Returns the L node that is the goal. By default, any
318             inputs passed into a goal are provided as outputs of that goal, and are
319             saved as outputs of the DAG under the goal's name.
320              
321             The first call to C also sets L.
322              
323             =cut
324              
325             sub goal {
326 17 100   17 1 3528 my $self = shift or croak 'Need an instance';
327 16 100       207 my $name = shift or croak 'Need a goal name';
328 15         100 my $goal = Data::Hopen::G::Goal->new(name => $name);
329 15         404 $self->_graph->add_vertex($goal);
330             #$self->_node_by_name->{$name} = $goal;
331 15         2754 $self->_graph->add_edge($goal, $self->_final);
332 15 100       445 $self->default_goal($goal) unless $self->default_goal;
333 15         396 push @{$self->goals}, $goal;
  15         277  
334 15         77 return $goal;
335             } #goal()
336              
337             =head2 connect
338              
339             =over 4
340              
341             =item - C<< DAG:connect(, , , ) >>
342              
343             B.
344             Connects output C of operation C as input C of
345             operation C. No processing is done between output and input.
346             C and C can be anything usable as a table index, provided
347             that table index appears in the corresponding operation's descriptor.
348              
349             =item - C<< DAG:connect(, ) >>
350              
351             Creates a dependency edge from C to C, indicating that C must be
352             run before C. Does not transfer any data from C to C.
353              
354             =item - C<< DAG:connect(, , ) >>
355              
356             Connects C to C via L C.
357             C may be undef, in which case this is treated as the two-parameter form.
358              
359             If there are already link(s) on the edge from C to C, the new link
360             is added after the last existing link.
361              
362             =back
363              
364             TODO return the name of the edge? The edge instance itself? Maybe a
365             fluent interface to the DAG for chaining C calls?
366              
367             TODO remove the out-edge and in-edge parameters?
368              
369             =cut
370              
371             sub connect {
372 21 100   21 1 10621 my $self = shift or croak 'Need an instance';
373 20         57 my ($op1, $out_edge, $in_edge, $op2, $link);
374              
375             # Unpack args
376             #if(@_ == 4) {
377             # ($op2, $out_edge, $in_edge, $op2) = @_;
378             #} else the following
379 20 100       73 if(@_ == 3) {
    100          
380 12         31 ($op1, $link, $op2) = @_;
381             } elsif(@_ == 2) {
382 5         14 ($op1, $op2) = @_;
383             } else {
384 3         36 die "Invalid arguments";
385             }
386              
387             #my $out_edge = false; # No outputs TODO use these?
388             #my $in_edge = false; # No inputs
389              
390 13 100   13   70 hlog { 'DAG::connect(): Edge from', $op1->name,
391             'via', $link ? $link->name : '(no link)',
392 17         142 'to', $op2->name };
393              
394             # Add it to the graph (idempotent)
395 17         548 $self->_graph->add_edge($op1, $op2);
396             # $self->_node_by_name->{$_->name} = $_ foreach ($op1, $op2);
397              
398             # Save the DHG::Link as an edge attribute (not idempotent!)
399 17 100       603 if($link) {
400 11   100     306 my $attrs = $self->_graph->get_edge_attribute($op1, $op2, LINKS) || [];
401 11         9937 push @$attrs, $link;
402 11         286 $self->_graph->set_edge_attribute($op1, $op2, LINKS, $attrs);
403             }
404              
405 17         9952 return undef; # TODO decide what to return
406             } #connect()
407              
408             =head2 add
409              
410             Add a regular node to the graph. An attempt to add the same node twice will be
411             ignored. Usage:
412              
413             my $node = Data::Hopen::G::Op->new(name=>"whatever");
414             $dag->add($node);
415              
416             Returns the node, for the sake of chaining.
417              
418             =cut
419              
420             sub add {
421 4     4 1 3463 my ($self, undef, $node) = parameters('self', ['node'], @_);
422 2 100       102 return if $self->_graph->has_vertex($node);
423 1     1   138 hlog { __PACKAGE__, $self->name, 'adding', Dumper($node) } 2;
  1         6  
424              
425 1         35 $self->_graph->add_vertex($node);
426             #$self->_node_by_name->{$node->name} = $node if $node->name;
427              
428 1         176 return $node;
429             } #add()
430              
431             =head2 init
432              
433             Add an initialization operation to the graph. Initialization operations run
434             before all other operations. An attempt to add the same initialization
435             operation twice will be ignored. Usage:
436              
437             my $op = Data::Hopen::G::Op->new(name=>"whatever");
438             $dag->init($op[, $first]);
439              
440             If C<$first> is truthy, the op will be run before anything already in the
441             graph. However, later calls to C with C<$first> set will push
442             operations even before C<$op>.
443              
444             Returns the node, for the sake of chaining.
445              
446             =cut
447              
448             sub init {
449 6 100   6 1 6137 my $self = shift or croak 'Need an instance';
450 5 100       340 my $op = shift or croak 'Need an op';
451 4         10 my $first = shift;
452 4 100       98 return if $self->_init_graph->has_vertex($op);
453              
454 3         438 $self->_init_graph->add_vertex($op);
455             #$self->_node_by_name->{$op->name} = $op;
456              
457 3 100       458 if($first) { # $op becomes the new _init_first node
458 1         21 $self->_init_graph->add_edge($op, $self->_init_first);
459 1         25 $self->_init_first($op);
460             } else { # Not first, so can happen anytime. Add it after the
461             # current first node.
462 2         43 $self->_init_graph->add_edge($self->_init_first, $op);
463             }
464              
465 3         13 return $op;
466             } #init()
467              
468             =head1 ACCESSORS
469              
470             =head2 empty
471              
472             Returns truthy if the only nodes in the graph are internal nodes.
473             Intended for use by hopen files.
474              
475             =cut
476              
477             sub empty {
478 3 100   3 1 2324 my $self = shift or croak 'Need an instance';
479 2         52 return ($self->_graph->vertices == 1);
480             # _final is the node in an empty() graph.
481             # We don't check the _init_graph since empty() is intended
482             # for use by hopen files, not toolsets.
483             } #empty()
484              
485             =head1 OTHER
486              
487             =head2 BUILD
488              
489             Initialize the instance.
490              
491             =cut
492              
493             sub BUILD {
494             #use Data::Dumper;
495             #say Dumper(\@_);
496 17 100   17 1 6779 my $self = shift or croak 'Need an instance';
497 16         51 my $hrArgs = shift;
498              
499             # DAGs always have names
500 16 100       71 $self->name('__R_DAG_' . $_id_counter++) unless $self->has_custom_name;
501              
502             # Graph of normal operations
503 16         153 my $graph = Data::Hopen::OrderedPredecessorGraph->new( directed => true,
504             refvertexed => true);
505 16         4745 my $final = Data::Hopen::G::Node->new(
506             name => '__R_DAG_ROOT' . $_id_counter++);
507 16         1472 $graph->add_vertex($final);
508 16         3950 $self->_graph($graph);
509 16         395 $self->_final($final);
510              
511             # Graph of initialization operations
512 16         151 my $init_graph = Data::Hopen::OrderedPredecessorGraph->new( directed => true,
513             refvertexed => true);
514 16         3201 my $init = Data::Hopen::G::CollectOp->new(
515             name => '__R_DAG_INIT' . $_id_counter++);
516 16         704 $init_graph->add_vertex($init);
517              
518 16         2914 $self->_init_graph($init_graph);
519 16         404 $self->_init_first($init);
520             } #BUILD()
521              
522             1;
523             # Rest of the docs {{{1
524             __END__