File Coverage

blib/lib/Data/Hopen/G/DAG.pm

Criterion	Covered	Total	%
statement	182	182	100.0
branch	58	60	96.6
condition	6	7	85.7
subroutine	32	32	100.0
pod	6	6	100.0
total	284	287	98.9

line	stmt	bran	cond	sub	pod	time	code
1							# Data::Hopen::G::DAG - hopen build graph
2							package Data::Hopen::G::DAG;
3	8			8		1365	use strict;
	8					18
	8					257
4	8			8		45	use Data::Hopen::Base;
	8					18
	8					62
5
6							our $VERSION = '0.000019';
7
8	8			8		2095	use parent 'Data::Hopen::G::Op';
	8					19
	8					55
9							use Class::Tiny {
10	14					143	goals => sub { [] },
11	8					91	default_goal => undef,
12							winner => undef,
13
14							# Private attributes with simple defaults
15							#_node_by_name => sub { +{} }, # map from node names to nodes in either
16							# # _init_graph or _graph
17
18							# Private attributes - initialized by BUILD()
19							_graph => undef, # L instance
20							_final => undef, # The graph sink - all goals have edges to this
21
22							#Initialization operations
23							_init_graph => undef, # L
24							# for initializations
25							_init_first => undef, # Graph node for initialization - the first
26							# init operation to be performed.
27
28							# TODO? also support fini to run operations after _graph runs?
29	8			8		710	};
	8					22
30
31	8			8		8076	use Data::Hopen qw(hlog getparameters *QUIET);
	8					18
	8					868
32	8			8		3814	use Data::Hopen::G::Goal;
	8					24
	8					297
33	8			8		1412	use Data::Hopen::G::Link;
	8					19
	8					210
34	8			8		43	use Data::Hopen::G::Node;
	8					16
	8					162
35	8			8		3726	use Data::Hopen::G::CollectOp;
	8					21
	8					295
36	8			8		54	use Data::Hopen::Util::Data qw(forward_opts);
	8					17
	8					348
37	8			8		3783	use Data::Hopen::OrderedPredecessorGraph;
	8					31
	8					316
38	8			8		350	use Getargs::Mixed; # parameters, which doesn't permit undef
	8					28
	8					547
39	8			8		52	use Hash::Merge;
	8					20
	8					312
40	8			8		7750	use Regexp::Assemble;
	8					133767
	8					316
41	8			8		70	use Scalar::Util qw(refaddr);
	8					18
	8					423
42	8			8		51	use Storable ();
	8					18
	8					182
43
44							# Class data {{{1
45
46							use constant {
47	8					17486	LINKS => 'link_list', # Graph edge attr: array of DHG::Link instances
48	8			8		43	};
	8					17
49
50							# A counter used for making unique names
51							my $_id_counter = 0; # threads: make shared
52
53							# }}}1
54							# Docs {{{1
55
56							=head1 NAME
57
58							Data::Hopen::G::DAG - A hopen build graph
59
60							=head1 SYNOPSIS
61
62							This class encapsulates the DAG for a particular set of one or more goals.
63							It is itself a L so that it can be composed into
64							other DAGs.
65
66							=head1 ATTRIBUTES
67
68							=head2 goals
69
70							Arrayref of the goals for this DAG.
71
72							=head2 default_goal
73
74							The default goal for this DAG.
75
76							=head2 winner
77
78							When a node has multiple predecessors, their outputs are combined using
79							L to form the input to that node. This sets the C
80							precedence. Valid values (case-insensitive) are:
81
82							=over
83
84							=item C or C<'combine'>
85
86							(the default): L. Same-name keys
87							are merged, so no data is lost.
88
89							=item C<'first'> or C<'keep'>
90
91							L. The first predecessor to add a value
92							under a particular key will win.
93
94							=item C<'last'> or C<'replace'>
95
96							L. The last predecessor to add a value
97							under a particular key will win.
98
99							=back
100
101							=head2 _graph
102
103							The actual L. If you find that you have to use it, please open an
104							issue so we can see about providing a documented API for your use case!
105
106							=head2 _final
107
108							The node to which all goals are connected.
109
110							=head2 _init_graph
111
112							A separate L of operations that will run before all the operations
113							in L. This is because I don't want to add an edge to every
114							single node just to force the topological sort to work out.
115
116							=head2 _init_first
117
118							The first node to be run in _init_graph.
119
120							=head1 FUNCTIONS
121
122							=cut
123
124							# }}}1
125
126							=head2 _run
127
128							Traverses the graph. The DAG is similar to a subroutine in this respect. The
129							outputs from all the goals of the DAG are aggregated and provided as the
130							outputs of the DAG. The output is a hash keyed by the name of each goal, with
131							each goal's outputs as the values under that name. Usage:
132
133							my $hrOutputs = $dag->run([-context=>$scope][, other options])
134
135							C<$scope> must be a L or subclass if provided.
136							Other options are as L.
137
138							When evaluating a node, the edges from its predecessors are traversed in
139							the order those predecessors were added to the graph.
140
141							=cut
142
143							# The implementation of run(). $self->scope has already been linked to the context.
144							sub _run {
145	30			30		180	my ($self, %args) = getparameters('self', [qw(; visitor)], @_);
146	30					1398	my $retval = {};
147
148							# --- Get the initialization ops ---
149
150	30					74	my @init_order = eval { $self->_init_graph->toposort };
	30					755
151	30	100				1446	die "Initializations contain a cycle!" if $@;
152	29	100				823	@init_order = () if $self->_init_graph->vertices == 1; # no init nodes => skip
153
154							# --- Get the runtime ops ---
155
156	29					934	my @order = eval { $self->_graph->toposort };
	29					557
157							# TODO someday support multi-core-friendly topo-sort, so nodes can run
158							# in parallel until they block each other.
159	29	100				2092	die "Graph contains a cycle!" if $@;
160
161							# Remove _final from the order for now - I don't yet know what it means
162							# to traverse _final.
163	28	50	66			909	warn "Last item in order isn't _final! This might indicate a bug in hopen, or that some graph edges are missing."
164							unless $QUIET or refaddr $order[$#order] == refaddr $self->_final;
165
166	28					1370	@order = grep { refaddr $_ != refaddr $self->_final } @order;
	100					1831
167
168							# --- Check for non-connected ops, and goals with no inputs ---
169
170	28	100				217	unless($QUIET) {
171	27					477	foreach my $node ($self->_graph->isolated_vertices) {
172	1					1322	warn "Node @{[$node->name]} is not connected to any other nodes";
	1					7
173							}
174
175	27					37018	foreach my $goal (@{$self->goals}) {
	27					665
176	27	100				602	warn "Goal @{[$goal->name]} has no inputs"
	1					292
177							if $self->_graph->is_predecessorless_vertex($goal);
178							}
179							}
180
181							# --- Set up for the merge ---
182
183	28					7474	state $STRATEGIES = { # regex => strategy
184							'(\|combine)' => 'combine',
185							'(first\|keep)' => 'keep',
186							'(last\|replace)' => 'replace',
187							};
188	28					1143	state $STRATEGY_MAP = Regexp::Assemble->new->flags('i')->track(1)
189							->anchor_string_begin->anchor_string_end
190							->add(keys %$STRATEGIES);
191
192	28		100			4755	my $merge_strategy_idx = $STRATEGY_MAP->match($self->winner // '');
193	28	100				5960	die "Invalid winner value @{[$self->winner]}" unless defined $merge_strategy_idx;
	5					88
194	23					78	my $merge_strategy = $STRATEGIES->{$merge_strategy_idx};
195
196							# --- Traverse ---
197
198							# Note: while hacking, please make sure Goal nodes can appear
199							# anywhere in the graph.
200
201	23			18		197	hlog { my $x = 'Traversing DAG ' . $self->name; $x, '*' x (76-length($x)) };
	18					99
	18					101
202
203	23					707	my $graph = $self->_init_graph;
204	23					170	foreach my $node (@init_order, undef, @order) {
205
206	84	100				843	if(!defined($node)) { # undef is the marker between init and run
207	23					381	$graph = $self->_graph;
208	23					123	next;
209							}
210
211							# Inputs to this node. These are different from the DAG's inputs.
212							# The scope stack is (outer to inner) DAG's inputs, DAG's overrides,
213							# then $node_inputs, then the individual node's overrides.
214	61					293	my $node_inputs = Data::Hopen::Scope::Hash->new;
215							# TODO make this a DH::Scope::Inputs once it's implemented
216	61					2706	$node_inputs->outer($self->scope);
217							# Data specifically being provided to the current node, e.g.,
218							# on input edges, beats the scope of the DAG as a whole.
219	61					2298	$node_inputs->local(true);
220							# A CollectOp won't reach above the node's inputs by default.
221	61					1254	$node_inputs->merge_strategy($merge_strategy);
222
223							# Iterate over each node's edges and process any Links
224	61					443	foreach my $pred ($graph->ordered_predecessors($node)) {
225	35			29		22312	hlog { ('From', $pred->name, 'to', $node->name) };
	29					181
226
227							# Goals do not feed outputs to other Goals. This is so you can
228							# add edges between Goals to set their order while keeping the
229							# data for each Goal separate.
230							# TODO add tests for this. Also TODO decide whether this is
231							# actually the Right Thing!
232	35	50				240	next if eval { $pred->DOES('Data::Hopen::G::Goal') };
	35					495
233
234	35					185	my $links = $graph->get_edge_attribute($pred, $node, LINKS);
235
236							# Simple case (no links): predecessor's outputs become our inputs
237	35	100				35527	unless($links) {
238	27			23		161	hlog { ' -- no links' };
	23					53
239	27					169	$node_inputs->merge(%{$pred->outputs});
	27					165
240							# TODO specify which set these are.
241							# Use the predecessor's identity as the set.
242	27					71	next;
243							}
244
245							# More complex case: Process all the links
246
247							# Helper function to wrap a hashref in the right scope for a link input
248							local *make_link_inputs = sub {
249	19			19		38	my $hrIn = shift;
250	19					66	my $scLinkInputs = Data::Hopen::Scope::Hash->new->put(%$hrIn);
251							# All links get the same outer scope --- they are parallel,
252							# not in series.
253							# TODO? use the predecessor's identity as the set.
254	19					345	$scLinkInputs->outer($self->scope);
255							# The links run at the same scope level as the node.
256	19					1119	$scLinkInputs->local(true);
257	19					125	return $scLinkInputs;
258	8					51	};
259
260							# Make the first link's input scope
261	8					36	my $hrPredOutputs = $pred->outputs;
262							# In one test, outputs was undef if not on its own line.
263	8					23	my $scLinkInputs = make_link_inputs($hrPredOutputs);
264
265							# Run the links in series - not parallel!
266	8					53	my $hrLinkOutputs = $scLinkInputs->as_hashref(-levels=>'local');
267	8					29	foreach my $link (@$links) {
268	11			9		114	hlog { ('From', $pred->name, 'via', $link->name, 'to', $node->name) };
	9					47
269
270	11					129	$hrLinkOutputs = $link->run(
271							-context=>$scLinkInputs,
272							# visitor not passed to links.
273							);
274	11					175	$scLinkInputs = make_link_inputs($hrLinkOutputs);
275							} #foreach incoming link
276
277	8					159	$node_inputs->merge(%$hrLinkOutputs);
278							# TODO specify which set these are.
279
280							} #foreach predecessor node
281
282	61					415	my $step_output = $node->run(-context=>$node_inputs,
283							forward_opts(\%args, {'-'=>1}, 'visitor')
284							);
285	61					1056	$node->outputs($step_output);
286
287							# Give the visitor a chance, and stash the results if necessary.
288	61	100				115	if(eval { $node->DOES('Data::Hopen::G::Goal') }) {
	61					508
289	23	100				103	$args{visitor}->visit_goal($node, $node_inputs) if $args{visitor};
290
291							# Save the result if there is one. Don't save {}.
292							# use $node->outputs, not $step_output, since the visitor may
293							# alter $node->outputs.
294	23	100				70	$retval->{$node->name} = $node->outputs if keys %{$node->outputs};
	23					95
295							} else {
296	38	100				133	$args{visitor}->visit_node($node, $node_inputs) if $args{visitor};
297							}
298
299	47			47		115	hlog { 'Finished node', $node->name, 'with outputs',
300	61					392	Dumper $node->outputs } 10;
301
302							} #foreach node in topo-sort order
303
304	23					1350	return $retval;
305							} #run()
306
307							=head1 ADDING DATA
308
309							=head2 goal
310
311							Creates a goal of the DAG. Goals are names for sequences of operations,
312							akin to top-level Makefile targets. Usage:
313
314							my $goalOp = $dag->goal('name')
315
316							Returns the L node that is the goal. By default, any
317							inputs passed into a goal are provided as outputs of that goal, and are
318							saved as outputs of the DAG under the goal's name.
319
320							The first call to C also sets L.
321
322							=cut
323
324							sub goal {
325	17	100		17	1	2910	my $self = shift or croak 'Need an instance';
326	16	100				194	my $name = shift or croak 'Need a goal name';
327	15					97	my $goal = Data::Hopen::G::Goal->new(name => $name);
328	15					377	$self->_graph->add_vertex($goal);
329							#$self->_node_by_name->{$name} = $goal;
330	15					4384	$self->_graph->add_edge($goal, $self->_final);
331	15	100				385	$self->default_goal($goal) unless $self->default_goal;
332	15					370	push @{$self->goals}, $goal;
	15					257
333	15					57	return $goal;
334							} #goal()
335
336							=head2 connect
337
338							=over 4
339
340							=item - C<< DAG:connect(, , , ) >>
341
342							B.
343							Connects output C of operation C as input C of
344							operation C. No processing is done between output and input.
345							C and C can be anything usable as a table index, provided
346							that table index appears in the corresponding operation's descriptor.
347
348							=item - C<< DAG:connect(, ) >>
349
350							Creates a dependency edge from C to C, indicating that C must be
351							run before C. Does not transfer any data from C to C.
352
353							=item - C<< DAG:connect(, , ) >>
354
355							Connects C to C via L C.
356							C may be undef, in which case this is treated as the two-parameter form.
357
358							If there are already link(s) on the edge from C to C, the new link
359							is added after the last existing link.
360
361							=back
362
363							TODO return the name of the edge? The edge instance itself? Maybe a
364							fluent interface to the DAG for chaining C calls?
365
366							TODO remove the out-edge and in-edge parameters?
367
368							=cut
369
370							sub connect {
371	21	100		21	1	9358	my $self = shift or croak 'Need an instance';
372	20					57	my ($op1, $out_edge, $in_edge, $op2, $link);
373
374							# Unpack args
375							#if(@_ == 4) {
376							# ($op2, $out_edge, $in_edge, $op2) = @_;
377							#} else the following
378	20	100				90	if(@_ == 3) {
		100
379	12					26	($op1, $link, $op2) = @_;
380							} elsif(@_ == 2) {
381	5					12	($op1, $op2) = @_;
382							} else {
383	3					41	die "Invalid arguments";
384							}
385
386							#my $out_edge = false; # No outputs TODO use these?
387							#my $in_edge = false; # No inputs
388
389	13	100		13		66	hlog { 'DAG::connect(): Edge from', $op1->name,
390							'via', $link ? $link->name : '(no link)',
391	17					134	'to', $op2->name };
392
393							# Add it to the graph (idempotent)
394	17					521	$self->_graph->add_edge($op1, $op2);
395							# $self->_node_by_name->{$_->name} = $_ foreach ($op1, $op2);
396
397							# Save the DHG::Link as an edge attribute (not idempotent!)
398	17	100				584	if($link) {
399	11		100			338	my $attrs = $self->_graph->get_edge_attribute($op1, $op2, LINKS) \|\| [];
400	11					11307	push @$attrs, $link;
401	11					266	$self->_graph->set_edge_attribute($op1, $op2, LINKS, $attrs);
402							}
403
404	17					11148	return undef; # TODO decide what to return
405							} #connect()
406
407							=head2 add
408
409							Add a regular node to the graph. An attempt to add the same node twice will be
410							ignored. Usage:
411
412							my $node = Data::Hopen::G::Op->new(name=>"whatever");
413							$dag->add($node);
414
415							Returns the node, for the sake of chaining.
416
417							=cut
418
419							sub add {
420	4			4	1	2688	my ($self, undef, $node) = parameters('self', ['node'], @_);
421	2	100				102	return if $self->_graph->has_vertex($node);
422	1			1		147	hlog { __PACKAGE__, $self->name, 'adding', Dumper($node) } 2;
	1					5
423
424	1					31	$self->_graph->add_vertex($node);
425							#$self->_node_by_name->{$node->name} = $node if $node->name;
426
427	1					308	return $node;
428							} #add()
429
430							=head2 init
431
432							Add an initialization operation to the graph. Initialization operations run
433							before all other operations. An attempt to add the same initialization
434							operation twice will be ignored. Usage:
435
436							my $op = Data::Hopen::G::Op->new(name=>"whatever");
437							$dag->init($op[, $first]);
438
439							If C<$first> is truthy, the op will be run before anything already in the
440							graph. However, later calls to C with C<$first> set will push
441							operations even before C<$op>.
442
443							Returns the node, for the sake of chaining.
444
445							=cut
446
447							sub init {
448	6	100		6	1	5036	my $self = shift or croak 'Need an instance';
449	5	100				330	my $op = shift or croak 'Need an op';
450	4					9	my $first = shift;
451	4	100				99	return if $self->_init_graph->has_vertex($op);
452
453	3					451	$self->_init_graph->add_vertex($op);
454							#$self->_node_by_name->{$op->name} = $op;
455
456	3	100				782	if($first) { # $op becomes the new _init_first node
457	1					23	$self->_init_graph->add_edge($op, $self->_init_first);
458	1					26	$self->_init_first($op);
459							} else { # Not first, so can happen anytime. Add it after the
460							# current first node.
461	2					43	$self->_init_graph->add_edge($self->_init_first, $op);
462							}
463
464	3					12	return $op;
465							} #init()
466
467							=head1 ACCESSORS
468
469							=head2 empty
470
471							Returns truthy if the only nodes in the graph are internal nodes.
472							Intended for use by hopen files.
473
474							=cut
475
476							sub empty {
477	3	100		3	1	1945	my $self = shift or croak 'Need an instance';
478	2					57	return ($self->_graph->vertices == 1);
479							# _final is the node in an empty() graph.
480							# We don't check the _init_graph since empty() is intended
481							# for use by hopen files, not toolsets.
482							} #empty()
483
484							=head1 OTHER
485
486							=head2 BUILD
487
488							Initialize the instance.
489
490							=cut
491
492							sub BUILD {
493							#use Data::Dumper;
494							#say Dumper(\@_);
495	17	100		17	1	5931	my $self = shift or croak 'Need an instance';
496	16					51	my $hrArgs = shift;
497
498							# DAGs always have names
499	16	100				53	$self->name('__R_DAG_' . $_id_counter++) unless $self->has_custom_name;
500
501							# Graph of normal operations
502	16					144	my $graph = Data::Hopen::OrderedPredecessorGraph->new( directed => true,
503							refvertexed => true);
504	16					20156	my $final = Data::Hopen::G::Node->new(
505							name => '__R_DAG_ROOT' . $_id_counter++);
506	16					1307	$graph->add_vertex($final);
507	16					5702	$self->_graph($graph);
508	16					379	$self->_final($final);
509
510							# Graph of initialization operations
511	16					153	my $init_graph = Data::Hopen::OrderedPredecessorGraph->new( directed => true,
512							refvertexed => true);
513	16					2334	my $init = Data::Hopen::G::CollectOp->new(
514							name => '__R_DAG_INIT' . $_id_counter++);
515	16					1082	$init_graph->add_vertex($init);
516
517	16					4714	$self->_init_graph($init_graph);
518	16					364	$self->_init_first($init);
519							} #BUILD()
520
521							1;
522							# Rest of the docs {{{1
523							__END__