File Coverage

blib/lib/Data/Hopen/G/DAG.pm

Criterion	Covered	Total	%
statement	182	182	100.0
branch	58	60	96.6
condition	6	7	85.7
subroutine	32	32	100.0
pod	6	6	100.0
total	284	287	98.9

line	stmt	bran	cond	sub	pod	time	code
1							# Data::Hopen::G::DAG - hopen build graph
2							package Data::Hopen::G::DAG;
3	8			8		1373	use strict;
	8					18
	8					257
4	8			8		48	use Data::Hopen::Base;
	8					16
	8					59
5
6							our $VERSION = '0.000017';
7
8	8			8		2068	use parent 'Data::Hopen::G::Op';
	8					24
	8					54
9							use Class::Tiny {
10	14					182	goals => sub { [] },
11	8					86	default_goal => undef,
12							winner => undef,
13
14							# Private attributes with simple defaults
15							#_node_by_name => sub { +{} }, # map from node names to nodes in either
16							# # _init_graph or _graph
17
18							# Private attributes - initialized by BUILD()
19							_graph => undef, # L instance
20							_final => undef, # The graph sink - all goals have edges to this
21
22							#Initialization operations
23							_init_graph => undef, # L
24							# for initializations
25							_init_first => undef, # Graph node for initialization - the first
26							# init operation to be performed.
27
28							# TODO? also support fini to run operations after _graph runs?
29	8			8		643	};
	8					18
30
31	8			8		7982	use Data::Hopen qw(hlog getparameters *QUIET);
	8					20
	8					894
32	8			8		3849	use Data::Hopen::G::Goal;
	8					65
	8					372
33	8			8		1402	use Data::Hopen::G::Link;
	8					23
	8					207
34	8			8		50	use Data::Hopen::G::Node;
	8					18
	8					163
35	8			8		3644	use Data::Hopen::G::CollectOp;
	8					28
	8					305
36	8			8		52	use Data::Hopen::Util::Data qw(forward_opts);
	8					18
	8					364
37	8			8		3777	use Data::Hopen::OrderedPredecessorGraph;
	8					198
	8					310
38	8			8		263	use Getargs::Mixed; # parameters, which doesn't permit undef
	8					25
	8					676
39	8			8		54	use Hash::Merge;
	8					18
	8					321
40	8			8		7804	use Regexp::Assemble;
	8					134976
	8					354
41	8			8		78	use Scalar::Util qw(refaddr);
	8					21
	8					447
42	8			8		62	use Storable ();
	8					19
	8					192
43
44							# Class data {{{1
45
46							use constant {
47	8					18841	LINKS => 'link_list', # Graph edge attr: array of DHG::Link instances
48	8			8		43	};
	8					16
49
50							# A counter used for making unique names
51							my $_id_counter = 0; # threads: make shared
52
53							# }}}1
54							# Docs {{{1
55
56							=head1 NAME
57
58							Data::Hopen::G::DAG - A hopen build graph
59
60							=head1 SYNOPSIS
61
62							This class encapsulates the DAG for a particular set of one or more goals.
63							It is itself a L so that it can be composed into
64							other DAGs.
65
66							=head1 ATTRIBUTES
67
68							=head2 goals
69
70							Arrayref of the goals for this DAG.
71
72							=head2 default_goal
73
74							The default goal for this DAG.
75
76							=head2 winner
77
78							When a node has multiple predecessors, their outputs are combined using
79							L to form the input to that node. This sets the C
80							precedence. Valid values (case-insensitive) are:
81
82							=over
83
84							=item C or C<'combine'>
85
86							(the default): L. Same-name keys
87							are merged, so no data is lost.
88
89							=item C<'first'> or C<'keep'>
90
91							L. The first predecessor to add a value
92							under a particular key will win.
93
94							=item C<'last'> or C<'replace'>
95
96							L. The last predecessor to add a value
97							under a particular key will win.
98
99							=back
100
101							=head2 _graph
102
103							The actual L. If you find that you have to use it, please open an
104							issue so we can see about providing a documented API for your use case!
105
106							=head2 _final
107
108							The node to which all goals are connected.
109
110							=head2 _init_graph
111
112							A separate L of operations that will run before all the operations
113							in L. This is because I don't want to add an edge to every
114							single node just to force the topological sort to work out.
115
116							=head2 _init_first
117
118							The first node to be run in _init_graph.
119
120							=head1 FUNCTIONS
121
122							=cut
123
124							# }}}1
125
126							=head2 _run
127
128							Traverses the graph. The DAG is similar to a subroutine in this respect. The
129							outputs from all the goals of the DAG are aggregated and provided as the
130							outputs of the DAG. The output is a hash keyed by the name of each goal, with
131							each goal's outputs as the values under that name. Usage:
132
133							my $hrOutputs = $dag->run([-context=>$scope][, other options])
134
135							C<$scope> must be a L or subclass if provided.
136							Other options are as L.
137
138							When evaluating a node, the edges from its predecessors are traversed in
139							the order those predecessors were added to the graph.
140
141							=cut
142
143							# The implementation of run(). $self->scope has already been linked to the context.
144							sub _run {
145	30			30		212	my ($self, %args) = getparameters('self', [qw(; phase visitor)], @_);
146	30					1370	my $retval = {};
147
148							# --- Get the initialization ops ---
149
150	30					72	my @init_order = eval { $self->_init_graph->toposort };
	30					817
151	30	100				1900	die "Initializations contain a cycle!" if $@;
152	29	100				838	@init_order = () if $self->_init_graph->vertices == 1; # no init nodes => skip
153
154							# --- Get the runtime ops ---
155
156	29					1473	my @order = eval { $self->_graph->toposort };
	29					609
157							# TODO someday support multi-core-friendly topo-sort, so nodes can run
158							# in parallel until they block each other.
159	29	100				2003	die "Graph contains a cycle!" if $@;
160
161							# Remove _final from the order for now - I don't yet know what it means
162							# to traverse _final.
163	28	50	66			976	warn "Last item in order isn't _final! This might indicate a bug in hopen, or that some graph edges are missing."
164							unless $QUIET or refaddr $order[$#order] == refaddr $self->_final;
165
166	28					322	@order = grep { refaddr $_ != refaddr $self->_final } @order;
	100					1881
167
168							# --- Check for non-connected ops, and goals with no inputs ---
169
170	28	100				238	unless($QUIET) {
171	27					481	foreach my $node ($self->_graph->isolated_vertices) {
172	1					112	warn "Node @{[$node->name]} is not connected to any other nodes";
	1					6
173							}
174
175	27					3711	foreach my $goal (@{$self->goals}) {
	27					712
176	27	100				657	warn "Goal @{[$goal->name]} has no inputs"
	1					133
177							if $self->_graph->is_predecessorless_vertex($goal);
178							}
179							}
180
181							# --- Set up for the merge ---
182
183	28					3685	state $STRATEGIES = { # regex => strategy
184							'(\|combine)' => 'combine',
185							'(first\|keep)' => 'keep',
186							'(last\|replace)' => 'replace',
187							};
188	28					100	state $STRATEGY_MAP = Regexp::Assemble->new->flags('i')->track(1)
189							->anchor_string_begin->anchor_string_end
190							->add(keys %$STRATEGIES);
191
192	28		100			5070	my $merge_strategy_idx = $STRATEGY_MAP->match($self->winner // '');
193	28	100				6453	die "Invalid winner value @{[$self->winner]}" unless defined $merge_strategy_idx;
	5					85
194	23					86	my $merge_strategy = $STRATEGIES->{$merge_strategy_idx};
195
196							# --- Traverse ---
197
198							# Note: while hacking, please make sure Goal nodes can appear
199							# anywhere in the graph.
200
201	23			18		250	hlog { my $x = 'Traversing DAG ' . $self->name; $x, '*' x (76-length($x)) };
	18					116
	18					126
202
203	23					777	my $graph = $self->_init_graph;
204	23					207	foreach my $node (@init_order, undef, @order) {
205
206	84	100				863	if(!defined($node)) { # undef is the marker between init and run
207	23					459	$graph = $self->_graph;
208	23					192	next;
209							}
210
211							# Inputs to this node. These are different from the DAG's inputs.
212							# The scope stack is (outer to inner) DAG's inputs, DAG's overrides,
213							# then $node_inputs, then the individual node's overrides.
214	61					267	my $node_inputs = Data::Hopen::Scope::Hash->new;
215							# TODO make this a DH::Scope::Inputs once it's implemented
216	61					2875	$node_inputs->outer($self->scope);
217							# Data specifically being provided to the current node, e.g.,
218							# on input edges, beats the scope of the DAG as a whole.
219	61					2395	$node_inputs->local(true);
220							# A CollectOp won't reach above the node's inputs by default.
221	61					1332	$node_inputs->merge_strategy($merge_strategy);
222
223							# Iterate over each node's edges and process any Links
224	61					523	foreach my $pred ($graph->ordered_predecessors($node)) {
225	35			29		20774	hlog { ('From', $pred->name, 'to', $node->name) };
	29					163
226
227							# Goals do not feed outputs to other Goals. This is so you can
228							# add edges between Goals to set their order while keeping the
229							# data for each Goal separate.
230							# TODO add tests for this. Also TODO decide whether this is
231							# actually the Right Thing!
232	35	50				243	next if eval { $pred->DOES('Data::Hopen::G::Goal') };
	35					487
233
234	35					201	my $links = $graph->get_edge_attribute($pred, $node, LINKS);
235
236							# Simple case (no links): predecessor's outputs become our inputs
237	35	100				33093	unless($links) {
238	27			23		200	hlog { ' -- no links' };
	23					63
239	27					162	$node_inputs->merge(%{$pred->outputs});
	27					159
240							# TODO specify which set these are.
241							# Use the predecessor's identity as the set.
242	27					74	next;
243							}
244
245							# More complex case: Process all the links
246
247							# Helper function to wrap a hashref in the right scope for a link input
248							local *make_link_inputs = sub {
249	19			19		53	my $hrIn = shift;
250	19					73	my $scLinkInputs = Data::Hopen::Scope::Hash->new->put(%$hrIn);
251							# All links get the same outer scope --- they are parallel,
252							# not in series.
253							# TODO? use the predecessor's identity as the set.
254	19					386	$scLinkInputs->outer($self->scope);
255							# The links run at the same scope level as the node.
256	19					1278	$scLinkInputs->local(true);
257	19					140	return $scLinkInputs;
258	8					73	};
259
260							# Make the first link's input scope
261	8					45	my $hrPredOutputs = $pred->outputs;
262							# In one test, outputs was undef if not on its own line.
263	8					32	my $scLinkInputs = make_link_inputs($hrPredOutputs);
264
265							# Run the links in series - not parallel!
266	8					42	my $hrLinkOutputs = $scLinkInputs->as_hashref(-levels=>'local');
267	8					39	foreach my $link (@$links) {
268	11			9		124	hlog { ('From', $pred->name, 'via', $link->name, 'to', $node->name) };
	9					68
269
270	11					137	$hrLinkOutputs = $link->run(
271							-context=>$scLinkInputs,
272							forward_opts(\%args, {'-'=>1}, 'phase')
273							# visitor not passed to links.
274							);
275	11					190	$scLinkInputs = make_link_inputs($hrLinkOutputs);
276							} #foreach incoming link
277
278	8					150	$node_inputs->merge(%$hrLinkOutputs);
279							# TODO specify which set these are.
280
281							} #foreach predecessor node
282
283	61					489	my $step_output = $node->run(-context=>$node_inputs,
284							forward_opts(\%args, {'-'=>1}, 'phase', 'visitor')
285							);
286	61					1124	$node->outputs($step_output);
287
288							# Give the visitor a chance, and stash the results if necessary.
289	61	100				114	if(eval { $node->DOES('Data::Hopen::G::Goal') }) {
	61					544
290	23	100				92	$args{visitor}->visit_goal($node, $node_inputs) if $args{visitor};
291
292							# Save the result if there is one. Don't save {}.
293							# use $node->outputs, not $step_output, since the visitor may
294							# alter $node->outputs.
295	23	100				73	$retval->{$node->name} = $node->outputs if keys %{$node->outputs};
	23					87
296							} else {
297	38	100				150	$args{visitor}->visit_node($node, $node_inputs) if $args{visitor};
298							}
299
300	47			47		137	hlog { 'Finished node', $node->name, 'with outputs',
301	61					545	Dumper $node->outputs } 10;
302
303							} #foreach node in topo-sort order
304
305	23					501	return $retval;
306							} #run()
307
308							=head1 ADDING DATA
309
310							=head2 goal
311
312							Creates a goal of the DAG. Goals are names for sequences of operations,
313							akin to top-level Makefile targets. Usage:
314
315							my $goalOp = $dag->goal('name')
316
317							Returns the L node that is the goal. By default, any
318							inputs passed into a goal are provided as outputs of that goal, and are
319							saved as outputs of the DAG under the goal's name.
320
321							The first call to C also sets L.
322
323							=cut
324
325							sub goal {
326	17	100		17	1	3528	my $self = shift or croak 'Need an instance';
327	16	100				207	my $name = shift or croak 'Need a goal name';
328	15					100	my $goal = Data::Hopen::G::Goal->new(name => $name);
329	15					404	$self->_graph->add_vertex($goal);
330							#$self->_node_by_name->{$name} = $goal;
331	15					2754	$self->_graph->add_edge($goal, $self->_final);
332	15	100				445	$self->default_goal($goal) unless $self->default_goal;
333	15					396	push @{$self->goals}, $goal;
	15					277
334	15					77	return $goal;
335							} #goal()
336
337							=head2 connect
338
339							=over 4
340
341							=item - C<< DAG:connect(, , , ) >>
342
343							B.
344							Connects output C of operation C as input C of
345							operation C. No processing is done between output and input.
346							C and C can be anything usable as a table index, provided
347							that table index appears in the corresponding operation's descriptor.
348
349							=item - C<< DAG:connect(, ) >>
350
351							Creates a dependency edge from C to C, indicating that C must be
352							run before C. Does not transfer any data from C to C.
353
354							=item - C<< DAG:connect(, , ) >>
355
356							Connects C to C via L C.
357							C may be undef, in which case this is treated as the two-parameter form.
358
359							If there are already link(s) on the edge from C to C, the new link
360							is added after the last existing link.
361
362							=back
363
364							TODO return the name of the edge? The edge instance itself? Maybe a
365							fluent interface to the DAG for chaining C calls?
366
367							TODO remove the out-edge and in-edge parameters?
368
369							=cut
370
371							sub connect {
372	21	100		21	1	10621	my $self = shift or croak 'Need an instance';
373	20					57	my ($op1, $out_edge, $in_edge, $op2, $link);
374
375							# Unpack args
376							#if(@_ == 4) {
377							# ($op2, $out_edge, $in_edge, $op2) = @_;
378							#} else the following
379	20	100				73	if(@_ == 3) {
		100
380	12					31	($op1, $link, $op2) = @_;
381							} elsif(@_ == 2) {
382	5					14	($op1, $op2) = @_;
383							} else {
384	3					36	die "Invalid arguments";
385							}
386
387							#my $out_edge = false; # No outputs TODO use these?
388							#my $in_edge = false; # No inputs
389
390	13	100		13		70	hlog { 'DAG::connect(): Edge from', $op1->name,
391							'via', $link ? $link->name : '(no link)',
392	17					142	'to', $op2->name };
393
394							# Add it to the graph (idempotent)
395	17					548	$self->_graph->add_edge($op1, $op2);
396							# $self->_node_by_name->{$_->name} = $_ foreach ($op1, $op2);
397
398							# Save the DHG::Link as an edge attribute (not idempotent!)
399	17	100				603	if($link) {
400	11		100			306	my $attrs = $self->_graph->get_edge_attribute($op1, $op2, LINKS) \|\| [];
401	11					9937	push @$attrs, $link;
402	11					286	$self->_graph->set_edge_attribute($op1, $op2, LINKS, $attrs);
403							}
404
405	17					9952	return undef; # TODO decide what to return
406							} #connect()
407
408							=head2 add
409
410							Add a regular node to the graph. An attempt to add the same node twice will be
411							ignored. Usage:
412
413							my $node = Data::Hopen::G::Op->new(name=>"whatever");
414							$dag->add($node);
415
416							Returns the node, for the sake of chaining.
417
418							=cut
419
420							sub add {
421	4			4	1	3463	my ($self, undef, $node) = parameters('self', ['node'], @_);
422	2	100				102	return if $self->_graph->has_vertex($node);
423	1			1		138	hlog { __PACKAGE__, $self->name, 'adding', Dumper($node) } 2;
	1					6
424
425	1					35	$self->_graph->add_vertex($node);
426							#$self->_node_by_name->{$node->name} = $node if $node->name;
427
428	1					176	return $node;
429							} #add()
430
431							=head2 init
432
433							Add an initialization operation to the graph. Initialization operations run
434							before all other operations. An attempt to add the same initialization
435							operation twice will be ignored. Usage:
436
437							my $op = Data::Hopen::G::Op->new(name=>"whatever");
438							$dag->init($op[, $first]);
439
440							If C<$first> is truthy, the op will be run before anything already in the
441							graph. However, later calls to C with C<$first> set will push
442							operations even before C<$op>.
443
444							Returns the node, for the sake of chaining.
445
446							=cut
447
448							sub init {
449	6	100		6	1	6137	my $self = shift or croak 'Need an instance';
450	5	100				340	my $op = shift or croak 'Need an op';
451	4					10	my $first = shift;
452	4	100				98	return if $self->_init_graph->has_vertex($op);
453
454	3					438	$self->_init_graph->add_vertex($op);
455							#$self->_node_by_name->{$op->name} = $op;
456
457	3	100				458	if($first) { # $op becomes the new _init_first node
458	1					21	$self->_init_graph->add_edge($op, $self->_init_first);
459	1					25	$self->_init_first($op);
460							} else { # Not first, so can happen anytime. Add it after the
461							# current first node.
462	2					43	$self->_init_graph->add_edge($self->_init_first, $op);
463							}
464
465	3					13	return $op;
466							} #init()
467
468							=head1 ACCESSORS
469
470							=head2 empty
471
472							Returns truthy if the only nodes in the graph are internal nodes.
473							Intended for use by hopen files.
474
475							=cut
476
477							sub empty {
478	3	100		3	1	2324	my $self = shift or croak 'Need an instance';
479	2					52	return ($self->_graph->vertices == 1);
480							# _final is the node in an empty() graph.
481							# We don't check the _init_graph since empty() is intended
482							# for use by hopen files, not toolsets.
483							} #empty()
484
485							=head1 OTHER
486
487							=head2 BUILD
488
489							Initialize the instance.
490
491							=cut
492
493							sub BUILD {
494							#use Data::Dumper;
495							#say Dumper(\@_);
496	17	100		17	1	6779	my $self = shift or croak 'Need an instance';
497	16					51	my $hrArgs = shift;
498
499							# DAGs always have names
500	16	100				71	$self->name('__R_DAG_' . $_id_counter++) unless $self->has_custom_name;
501
502							# Graph of normal operations
503	16					153	my $graph = Data::Hopen::OrderedPredecessorGraph->new( directed => true,
504							refvertexed => true);
505	16					4745	my $final = Data::Hopen::G::Node->new(
506							name => '__R_DAG_ROOT' . $_id_counter++);
507	16					1472	$graph->add_vertex($final);
508	16					3950	$self->_graph($graph);
509	16					395	$self->_final($final);
510
511							# Graph of initialization operations
512	16					151	my $init_graph = Data::Hopen::OrderedPredecessorGraph->new( directed => true,
513							refvertexed => true);
514	16					3201	my $init = Data::Hopen::G::CollectOp->new(
515							name => '__R_DAG_INIT' . $_id_counter++);
516	16					704	$init_graph->add_vertex($init);
517
518	16					2914	$self->_init_graph($init_graph);
519	16					404	$self->_init_first($init);
520							} #BUILD()
521
522							1;
523							# Rest of the docs {{{1
524							__END__