File Coverage

blib/lib/Devel/Git/MultiBisect/BuildTransitions.pm

Criterion	Covered	Total	%
statement	20	178	11.2
branch	0	60	0.0
condition	0	9	0.0
subroutine	7	17	41.1
pod	4	5	80.0
total	31	269	11.5

line	stmt	bran	cond	sub	pod	time	code
1							package Devel::Git::MultiBisect::BuildTransitions;
2	3			3		2149	use v5.14.0;
	3					31
3	3			3		22	use warnings;
	3					6
	3					92
4	3			3		1465	use parent ( qw\| Devel::Git::MultiBisect \| );
	3					1075
	3					28
5	3					146	use Devel::Git::MultiBisect::Auxiliary qw(
6							hexdigest_one_file
7							validate_list_sequence
8	3			3		185	);
	3					7
9	3			3		18	use Carp;
	3					7
	3					132
10	3			3		17	use File::Spec;
	3					6
	3					63
11	3			3		13	use File::Temp qw( tempdir );
	3					6
	3					7317
12
13							our $VERSION = '0.19';
14							$VERSION = eval $VERSION;
15
16							=head1 NAME
17
18							Devel::Git::MultiBisect::BuildTransitions - Gather build-time output where it changes over a range of F commits
19
20							=head1 SYNOPSIS
21
22							use Devel::Git::MultiBisect::BuildTransitions;
23
24							$self = Devel::Git::MultiBisect::BuildTransitions->new(\%parameters);
25
26							$commit_range = $self->get_commits_range();
27
28							$self->multisect_builds( { probe => 'error' } );
29
30							$multisected_outputs = $self->get_multisected_outputs();
31
32							$transitions = $self->inspect_transitions();
33							}
34
35							=head1 DESCRIPTION
36
37							Whereas F is concerned with B
38							failures, F is concerned with
39							B phenomena: exceptions and warnings. We can identify three such
40							cases:
41
42							=over 4
43
44							=item * Build-time failures
45
46							While running your C-compiler over C source code via F, an exception may
47							be thrown which causes the build to fail. Over a large number of commits,
48							different exceptions may be thrown at various commits. Identify those
49							commits.
50
51							=item * Build-time C-level warnings
52
53							Your C-compiler may identify sub-optimal C source code and emit warnings.
54							Over a large number of commits, different warnings may be thrown at various
55							commits. Identify the commits where the warnings changed.
56
57							=item * Build-time non-C-level warnings
58
59							At build time F is not limited to running a C compiler; it may also
60							execute statements in Perl, shell or other languages. Those statements may
61							themselves generate warnings. Identify the commits where the F output
62							from F changes.
63
64							=back
65
66							These three cases are distinguished by the arguments passed to the
67							C method described below.
68
69							=head1 METHODS
70
71							=head2 C
72
73							=over 4
74
75							=item * Purpose
76
77							Constructor.
78
79							=item * Arguments
80
81							$self = Devel::Git::MultiBisect::BuildTransitions->new(\%params);
82
83							Reference to a hash, typically the return value of
84							C.
85
86							=item * Return Value
87
88							Object of Devel::Git::MultiBisect child class.
89
90							=back
91
92							=cut
93
94							sub new {
95	0			0	1		my ($class, $params) = @_;
96
97	0						my $data = Devel::Git::MultiBisect::Init::init($params);
98
99	0						delete $data->{targets};
100	0						delete $data->{test_command};
101
102	0						return bless $data, $class;
103							}
104
105							=head2 C
106
107							=over 4
108
109							=item * Purpose
110
111							With a given set of configuration options and a specified range of F
112							commits, identify the points where the output of the "build command" --
113							typically, F -- materially changed.
114
115							A B would be either (a) the emergence or correction of
116							C-level exceptions; (b) the emergence or correction of C-level warnings; (c)
117							the emergence or correction of F output emitted during F by
118							Perl, shell or other non-C code.
119
120							These three cases are distinguished by the arguments passed to this method.
121
122							=item * Arguments
123
124							$self->multisect_builds(); # defaults to { probe => 'error' }
125
126							$self->multisect_builds({ probe => 'error' });
127
128							$self->multisect_builds({ probe => 'warning' });
129
130							$self->multisect_builds({ probe => 'stderr' });
131
132							Optionally takes one hash reference. At present that hashref may contain only
133							one element whose key is C and whose possible values are C,
134							C, or C. Defaults to C. Select among these values
135							depending on whether you are probing for changes in errors generated by the
136							C-compiler, changes in warnings generated by the C-compiler, or all text
137							output to C during F.
138
139							=item * Return Value
140
141							Returns true value upon success.
142
143							=item * Comment
144
145							As C runs it does two kinds of things:
146
147							=over 4
148
149							=item *
150
151							It stores results data within the object which you can subsequently access
152							through method calls.
153
154							=item *
155
156							It captures error messages from each commit run and writes them to a file on
157							disk for later human inspection. (If you have selected C 'stderr'>,
158							all content directed to F is written to that file.)
159
160							=back
161
162							=back
163
164							=cut
165
166							sub multisect_builds {
167	0			0	1		my ($self, $args) = @_;
168
169							# Methods called within multisect_builds:
170							# _prepare_for_multisection
171							# get_commits_range
172							# run_build_on_one_commit
173							# _configure_one_commit
174							# _build_one_commit
175							# _filter_build_log
176							# _run_one_commit_and_assign
177							# _bisection_decision
178							# _evaluate_status_of_build_runs
179
180	0	0					if (defined $args) {
181	0	0					croak "Argument passed to multisect_builds() must be hashref"
182							unless ref($args) eq 'HASH';
183	0						my %good_keys = map {$_ => 1} (qw\| probe \|);
	0
184	0						for my $k (keys %{$args}) {
	0
185							croak "Invalid key '$k' in hashref passed to multisect_builds()"
186	0	0					unless $good_keys{$k};
187							}
188	0						my %good_values = map {$_ => 1} (qw\| error warning stderr \|);
	0
189	0						for my $v (values %{$args}) {
	0
190							croak "Invalid value '$v' in 'probe' element in hashref passed to multisect_builds()"
191	0	0					unless $good_values{$v};
192							}
193							}
194	0	0					$args->{probe} = 'error' unless defined $args->{probe};
195	0						$self->{probe} = $args->{probe};
196
197							# Prepare data structures in the object to hold results of build runs on a
198							# per target, per commit basis.
199							# Also, "prime" the data structure by performing build runs for each target
200							# on the first and last commits in the commit range, storing that build
201							# output on disk as well.
202
203	0						my $start_time = time();
204	0						my $all_outputs = $self->_prepare_for_multisection();
205
206							# At this point, C<$all_outputs> is an array ref with one
207							# element per commit in the commit range. If a commit has been visited, the
208							# element is a hash ref with 4 key-value pairs like the ones below. If the
209							# commit has not yet been visited, the element is C.
210							#
211							# [
212							# {
213							# commit => "7c9c5138c6a704d1caf5908650193f777b81ad23",
214							# commit_short => "7c9c513",
215							# file => "/home/jkeenan/learn/perl/multisect/7c9c513.make.errors.rpt.txt",
216							# md5_hex => "d41d8cd98f00b204e9800998ecf8427e",
217							# },
218							# undef,
219							# undef,
220							# ...
221							# undef,
222							# {
223							# commit => "8f6628e3029399ac1e48dfcb59c3cd30e5127c3e",
224							# commit_short => "8f6628e",
225							# file => "/home/jkeenan/learn/perl/multisect/8f6628e.make.errors.rpt.txt",
226							# md5_hex => "fdce7ff2f07a0a8cd64005857f4060d4",
227							# },
228							# ]
229							#
230							# Unlike F -- where we could have been
231							# testing multiple test files on each commit -- here we're only concerned with
232							# recording the presence or absence of build-time errors. Hence, we only need
233							# an array of hash refs rather than an array of arrays of hash refs.
234							#
235							# The multisection process will entail running C over
236							# each commit selected by the multisection algorithm. Each run will insert a hash
237							# ref with the 4 KVPs into C<@{$self-E{all_outputs}}>. At the end of the
238							# multisection process those elements which we did not need to visit will still be
239							# C. We will then analyze the defined elements to identify the
240							# transitional commits.
241							#
242							# B
243							# build output> -- as reflected in a file on disk holding a list of normalized
244							# errors, normalized warnings or C -- B We are using
245							# an md5_hex value for that error file as a presumably valid unique identifier
246							# for that file's content. A transition point is a commit at which the output
247							# file's md5_hex differs from that of the immediately preceding commit. So, to
248							# identify the first transition point, we need to locate the commit at which the
249							# md5_hex changed from that found in the very first commit in the designated
250							# commit range. Once we've identified the first transition point, we'll look
251							# for the second transition point, i.e., that where the md5_hex changed from
252							# that observed at the first transition point. We'll continue that process
253							# until we get to a transition point where the md5_hex is identical to that of
254							# the very last commit in the commit range.
255
256	0						my ($min_idx, $max_idx) = (0, $#{$self->{commits}});
	0
257	0						my $this_target_status = 0;
258	0						my $current_start_idx = $min_idx;
259	0						my $current_end_idx = $max_idx;
260	0						my $overall_start_md5_hex = $self->{all_outputs}->[$min_idx]->{md5_hex};
261	0						my $overall_end_md5_hex = $self->{all_outputs}->[$max_idx]->{md5_hex};
262	0						my $n = 0;
263
264	0						while (! $this_target_status) {
265
266							# What gets (or may get) updated or assigned to in the course of one rep of this loop:
267							# $current_start_idx
268							# $current_end_idx
269							# $n
270							# $self->{all_outputs}
271
272	0						my $h = sprintf("%d" => (($current_start_idx + $current_end_idx) / 2));
273	0						$self->_run_one_commit_and_assign($h);
274
275	0						my $current_start_md5_hex = $self->{all_outputs}->[$current_start_idx]->{md5_hex};
276	0						my $target_h_md5_hex = $self->{all_outputs}->[$h]->{md5_hex};
277
278							# Decision criteria:
279							# If $target_h_md5_hex eq $current_start_md5_hex, then the first
280							# transition is after index $h. Hence bisection should go upwards.
281
282							# If $target_h_md5_hex ne $current_start_md5_hex, then the first
283							# transition has come before index $h. Hence bisection should go
284							# downwards. However, since the test of where the first transition is
285							# is that index j-1 has the same md5_hex as $current_start_md5_hex but
286							# index j has a different md5_hex, we have to do a run on
287							# j-1 as well.
288
289							($current_start_idx, $current_end_idx, $n) =
290							$self->_bisection_decision(
291							$target_h_md5_hex, $current_start_md5_hex, $h,
292							$self->{all_outputs},
293	0						$overall_end_md5_hex, $current_start_idx, $current_end_idx,
294							$max_idx, $n,
295							);
296	0						$this_target_status = $self->_evaluate_status_of_build_runs();
297							}
298
299
300	0						my $end_time = time();
301							my %timings = (
302							elapsed => $end_time - $start_time,
303	0						runs => scalar( grep {defined $_} @{$self->{all_outputs}} ),
	0
	0
304							);
305	0						$timings{mean} = sprintf("%.02f" => $timings{elapsed} / $timings{runs});
306	0	0					if ($self->{verbose}) {
307	0						say "Ran $timings{runs} runs; elapsed: $timings{elapsed} sec; mean: $timings{mean} sec";
308							}
309	0						$self->{timings} = \%timings;
310
311	0						return 1;
312							}
313
314							sub _prepare_for_multisection {
315	0			0			my $self = shift;
316
317							# get_commits_range is inherited from parent
318
319	0						my $all_commits = $self->get_commits_range();
320	0						$self->{all_outputs} = [ (undef) x scalar(@{$all_commits}) ];
	0
321
322	0						my %multisected_outputs_table;
323	0						for my $idx (0, $#{$all_commits}) {
	0
324
325	0						my $outputs = $self->run_build_on_one_commit($all_commits->[$idx]);
326	0						$self->{all_outputs}->[$idx] = $outputs;
327							}
328	0						return $self->{all_outputs};
329							}
330
331							sub run_build_on_one_commit {
332	0			0	0		my ($self, $commit) = @_;
333	0		0				$commit //= $self->{commits}->[0]->{sha};
334	0	0					say "Building commit: $commit" if ($self->{verbose});
335
336	0						my $starting_branch = $self->_configure_one_commit($commit);
337
338	0						my $outputsref = $self->_build_one_commit($commit);
339							say "Tested commit: $commit; returning to: $starting_branch"
340	0	0					if ($self->{verbose});
341
342							# We want to return to our basic branch (e.g., 'master', 'blead')
343							# before checking out a new commit.
344
345	0	0					system(qq\|git checkout --quiet $starting_branch\|)
346							and croak "Unable to 'git checkout --quiet $starting_branch";
347
348	0						$self->{commit_counter}++;
349	0	0					say "Commit counter: $self->{commit_counter}" if $self->{verbose};
350
351	0						return $outputsref;
352							}
353
354							sub _build_one_commit {
355	0			0			my ($self, $commit) = @_;
356	0						my $short_sha = substr($commit,0,$self->{short});
357	0						my $command_raw = $self->{make_command};
358
359							# If probe => error or probe => warning, we are capturing the entire
360							# (2>&1) output of 'make' in a file and then filtering that file (in
361							# _filter_build_log() for either C-level exceptions or C-level warnings.
362							# Hence, that file's name should end in 'make.output.txt'.
363							#
364							# If, however, probe => stderr, we are directly filtering the output of
365							# 'make' for STDERR and saving that in a file for subsequent
366							# commit-by-commit comparison of the STDERR output. Hence, the file for
367							# each commit should end in 'make.stderr.txt'.
368
369	0						my ($build_log, $cmd);
370	0	0					if ($self->{probe} eq 'stderr') {
371							$build_log = File::Spec->catfile(
372							$self->{outputdir},
373	0						join('.' => (
374							$short_sha,
375							'make',
376							'stderr',
377							'txt'
378							)),
379							);
380	0						$cmd = qq\|$command_raw 2>$build_log\|;
381							}
382							else {
383							$build_log = File::Spec->catfile(
384							$self->{outputdir},
385	0						join('.' => (
386							$short_sha,
387							'make',
388							'output',
389							'txt'
390							)),
391							);
392	0						$cmd = qq\|$command_raw >$build_log 2>&1\|;
393							}
394	0	0					say "Actual 'make' command: $cmd" if $self->{verbose};
395	0						my $rv = system($cmd);
396	0						my $filtered_probes_file = $self->_filter_build_log($build_log, $short_sha);
397	0	0					say "Created $filtered_probes_file" if $self->{verbose};
398							return {
399	0						commit => $commit,
400							commit_short => $short_sha,
401							file => $filtered_probes_file,
402							md5_hex => hexdigest_one_file($filtered_probes_file),
403							};
404							}
405
406							sub _filter_build_log {
407	0			0			my ($self, $buildlog, $short_sha) = @_;
408	0						my $tdir = tempdir( CLEANUP => 1 );
409
410	0	0					if ($self->{probe} eq 'error') {
		0
411							# the default case: probing for build-time errors
412	0						my $ackpattern = q\|-A2 '^[^:]+:\d+:\d+:\s+error:'\|;
413	0						my @raw_acklines = grep { ! m/^--\n/ } `ack $ackpattern $buildlog`;
	0
414	0						chomp(@raw_acklines);
415	0	0					croak "Got incorrect count of lines from ack; should be divisible by 3"
416							unless scalar(@raw_acklines) % 3 == 0;
417
418	0						my @refined_errors = ();
419	0						for (my $i=0; $i <= $#raw_acklines; $i += 3) {
420	0						my $j = $i + 2;
421	0						my @this_error = ();
422	0						my ($normalized) =
423							$raw_acklines[$i] =~ s/^([^:]+):\d+:\d+:(.*)$/$1:_:_:$2/r;
424	0						push @this_error, ($normalized, @raw_acklines[$i+1 .. $j]);
425	0						push @refined_errors, \@this_error;
426							}
427
428							my $error_report_file =
429	0						File::Spec->catfile($self->{outputdir}, "$short_sha.make.errors.rpt.txt");
430	0						say "rpt: $error_report_file";
431	0	0					open my $OUT, '>', $error_report_file
432							or croak "Unable to open $error_report_file for writing";
433	0	0					if (@refined_errors) {
434	0						for (my $i=0; $i<=($#refined_errors -1); $i++) {
435	0						say $OUT join "\n" => @{$refined_errors[$i]};
	0
436	0						say $OUT "--";
437							}
438	0						say $OUT join "\n" => @{$refined_errors[-1]};
	0
439							}
440	0	0					close $OUT or croak "Unable to close $error_report_file after writing";
441	0						return $error_report_file;
442							}
443							elsif ($self->{probe} eq 'warning') {
444	0						my $ackpattern = qr/^
445							([^:]+):
446							(\d+):
447							(\d+):\s+warning:\s+
448							(.*?)\s+\[-
449							(W.*)]$
450							/x;
451
452	0						my @refined_warnings = ();
453	0	0					open my $IN, '<', $buildlog or croak "Unable to open $buildlog for reading";
454	0						while (my $l = <$IN>) {
455	0						chomp $l;
456	0	0					next unless $l =~ m/$ackpattern/;
457	0						my ($source, $line, $character, $text, $class) = ($1, $2, $3, $4, $5);
458	0						my $rl = "$source:_:_: warning: $text [$class]";
459	0						push @refined_warnings, $rl;
460							}
461	0	0					close $IN or croak "Unable to close $buildlog after reading";
462
463							my $warning_report_file =
464	0						File::Spec->catfile($self->{outputdir}, "$short_sha.make.warnings.rpt.txt");
465	0	0					open my $OUT, '>', $warning_report_file
466							or croak "Unable to open $warning_report_file for writing";
467	0						say $OUT $_ for @refined_warnings;
468	0	0					close $OUT or croak "Unable to close $warning_report_file after writing";
469	0						return $warning_report_file;
470							}
471							else {
472							# $self->{probe} eq 'stderr'
473							# With this option, we simply record all STDERR from 'make' in the
474							# build log and return it.
475	0						return $buildlog;
476							}
477							}
478
479							sub _evaluate_status_of_build_runs {
480	0			0			my ($self) = @_;
481	0						my @trans = ();
482	0						for my $o (@{$self->{all_outputs}}) {
	0
483							push @trans,
484	0	0					defined $o ? $o->{md5_hex} : undef;
485							}
486	0						my $vls = validate_list_sequence(\@trans);
487	0	0	0				return ( (scalar(@{$vls}) == 1 ) and ($vls->[0])) ? 1 : 0;
488							}
489
490							sub _run_one_commit_and_assign {
491
492							# If we've already stashed a particular commit's outputs in all_outputs,
493							# then we don't need to actually perform a run.
494
495							# This internal method assigns to all_outputs in place.
496
497	0			0			my ($self, $idx) = @_;
498	0						my $this_commit = $self->{commits}->[$idx]->{sha};
499	0	0					unless (defined $self->{all_outputs}->[$idx]) {
500	0						say "\nAt commit counter $self->{commit_counter}, preparing to test commit ", $idx + 1, " of ", scalar(@{$self->{commits}})
501	0	0					if $self->{verbose};
502	0						my $these_outputs = $self->run_build_on_one_commit($this_commit);
503	0						$self->{all_outputs}->[$idx] = $these_outputs;
504							}
505							}
506
507							=head2 C
508
509							=over 4
510
511							=item * Purpose
512
513							Get results of C (other than test output files
514							created) reported on a per commit basis.
515
516							=item * Arguments
517
518							my $multisected_outputs = $self->get_multisected_outputs();
519
520							None; all data needed is already present in the object.
521
522							=item * Return Value
523
524							Reference to an array with one element for each commit in the commit range.
525
526							=over 4
527
528							=item *
529
530							If a particular commit B in the course of
531							C, then the array element is undefined. (The point
532							of multisection, of course, is to B have to visit every commit in the
533							commit range in order to figure out the commits at which test output changed.)
534
535							=item *
536
537							If a particular commit B in the course of
538							C, then the array element is a hash reference whose
539							elements have the following keys:
540
541							commit
542							commit_short
543							file
544							md5_hex
545
546							=back
547
548							=back
549
550							=cut
551
552							sub get_multisected_outputs {
553	0			0	1		my $self = shift;
554	0						return $self->{all_outputs};
555							}
556
557							=head2 C
558
559							=over 4
560
561							=item * Purpose
562
563							Get a data structure which reports on the most meaningful results of
564							C, namely, the first commit, the last commit and all
565							transitional commits.
566
567							=item * Arguments
568
569							my $transitions = $self->inspect_transitions();
570
571							None; all data needed is already present in the object.
572
573							=item * Return Value
574
575							Reference to a hash with 3 key-value pairs. Each element's value is another
576							hash reference. The elements of the top-level hash are:
577
578							=over 4
579
580							=item * C
581
582							Value is reference to hash keyed on C, C and C, whose
583							values are, respectively, the index position of the very first commit in the
584							commit range, the digest of that commit's test output and the path to the file
585							holding that output.
586
587							=item * C
588
589							Value is reference to hash keyed on C, C and C, whose
590							values are, respectively, the index position of the very last commit in the
591							commit range, the digest of that commit's test output and the path to the file
592							holding that output.
593
594							=item * C
595
596							Value is reference to an array with one element for each transitional commit.
597							Each such element is a reference to a hash with keys C and C.
598							In this context C refers to the last commit in a sub-sequence with a
599							particular digest; C refers to the next immediate commit which is the
600							first commit in a new sub-sequence with a new digest.
601
602							The values of C and C are, in turn, references to hashes with
603							keys C, C and C. Their values are, respectively, the index
604							position of the particular commit in the commit range, the digest of that
605							commit's test output and the path to the file holding that output.
606
607							=back
608
609							Example:
610
611
612							=item * Comment
613
614							The return value of C should be useful to the developer
615							trying to determine the various points in a long series of commits where a
616							target's test output changed in meaningful ways. Hence, it is really the
617							whole point of F.
618
619							=back
620
621							=cut
622
623							sub inspect_transitions {
624	0			0	1		my ($self) = @_;
625	0						my $multisected_outputs = $self->get_multisected_outputs();
626	0						my %transitions;
627	0						my $max_index = $#{$multisected_outputs};
	0
628	0						$transitions{transitions} = [];
629							$transitions{oldest} = {
630							idx => 0,
631							md5_hex => $multisected_outputs->[0]->{md5_hex},
632							file => $multisected_outputs->[0]->{file},
633	0						};
634							$transitions{newest} = {
635							idx => $max_index,
636							md5_hex => $multisected_outputs->[$max_index]->{md5_hex},
637							file => $multisected_outputs->[$max_index]->{file},
638	0						};
639	0						for (my $j = 1; $j <= $max_index; $j++) {
640	0						my $i = $j - 1;
641							next unless (
642	0	0	0				(defined $multisected_outputs->[$i]) and
643							(defined $multisected_outputs->[$j])
644							);
645	0						my $older_md5_hex = $multisected_outputs->[$i]->{md5_hex};
646	0						my $newer_md5_hex = $multisected_outputs->[$j]->{md5_hex};
647	0						my $older_file = $multisected_outputs->[$i]->{file};
648	0						my $newer_file = $multisected_outputs->[$j]->{file};
649	0	0					unless ($older_md5_hex eq $newer_md5_hex) {
650	0						push @{$transitions{transitions}}, {
	0
651							older => { idx => $i, md5_hex => $older_md5_hex, file => $older_file },
652							newer => { idx => $j, md5_hex => $newer_md5_hex, file => $newer_file },
653							}
654							}
655							}
656	0						return \%transitions;
657							}
658
659							1;
660
661							__END__