File Coverage

blib/lib/Devel/Git/MultiBisect/BuildTransitions.pm
Criterion Covered Total %
statement 20 178 11.2
branch 0 60 0.0
condition 0 9 0.0
subroutine 7 17 41.1
pod 4 5 80.0
total 31 269 11.5


line stmt bran cond sub pod time code
1             package Devel::Git::MultiBisect::BuildTransitions;
2 3     3   2149 use v5.14.0;
  3         31  
3 3     3   22 use warnings;
  3         6  
  3         92  
4 3     3   1465 use parent ( qw| Devel::Git::MultiBisect | );
  3         1075  
  3         28  
5 3         146 use Devel::Git::MultiBisect::Auxiliary qw(
6             hexdigest_one_file
7             validate_list_sequence
8 3     3   185 );
  3         7  
9 3     3   18 use Carp;
  3         7  
  3         132  
10 3     3   17 use File::Spec;
  3         6  
  3         63  
11 3     3   13 use File::Temp qw( tempdir );
  3         6  
  3         7317  
12              
13             our $VERSION = '0.19';
14             $VERSION = eval $VERSION;
15              
16             =head1 NAME
17              
18             Devel::Git::MultiBisect::BuildTransitions - Gather build-time output where it changes over a range of F commits
19              
20             =head1 SYNOPSIS
21              
22             use Devel::Git::MultiBisect::BuildTransitions;
23              
24             $self = Devel::Git::MultiBisect::BuildTransitions->new(\%parameters);
25              
26             $commit_range = $self->get_commits_range();
27              
28             $self->multisect_builds( { probe => 'error' } );
29              
30             $multisected_outputs = $self->get_multisected_outputs();
31              
32             $transitions = $self->inspect_transitions();
33             }
34              
35             =head1 DESCRIPTION
36              
37             Whereas F is concerned with B
38             failures, F is concerned with
39             B phenomena: exceptions and warnings. We can identify three such
40             cases:
41              
42             =over 4
43              
44             =item * Build-time failures
45              
46             While running your C-compiler over C source code via F, an exception may
47             be thrown which causes the build to fail. Over a large number of commits,
48             different exceptions may be thrown at various commits. Identify those
49             commits.
50              
51             =item * Build-time C-level warnings
52              
53             Your C-compiler may identify sub-optimal C source code and emit warnings.
54             Over a large number of commits, different warnings may be thrown at various
55             commits. Identify the commits where the warnings changed.
56              
57             =item * Build-time non-C-level warnings
58              
59             At build time F is not limited to running a C compiler; it may also
60             execute statements in Perl, shell or other languages. Those statements may
61             themselves generate warnings. Identify the commits where the F output
62             from F changes.
63              
64             =back
65              
66             These three cases are distinguished by the arguments passed to the
67             C method described below.
68              
69             =head1 METHODS
70              
71             =head2 C
72              
73             =over 4
74              
75             =item * Purpose
76              
77             Constructor.
78              
79             =item * Arguments
80              
81             $self = Devel::Git::MultiBisect::BuildTransitions->new(\%params);
82              
83             Reference to a hash, typically the return value of
84             C.
85              
86             =item * Return Value
87              
88             Object of Devel::Git::MultiBisect child class.
89              
90             =back
91              
92             =cut
93              
94             sub new {
95 0     0 1   my ($class, $params) = @_;
96              
97 0           my $data = Devel::Git::MultiBisect::Init::init($params);
98              
99 0           delete $data->{targets};
100 0           delete $data->{test_command};
101              
102 0           return bless $data, $class;
103             }
104              
105             =head2 C
106              
107             =over 4
108              
109             =item * Purpose
110              
111             With a given set of configuration options and a specified range of F
112             commits, identify the points where the output of the "build command" --
113             typically, F -- materially changed.
114              
115             A B would be either (a) the emergence or correction of
116             C-level exceptions; (b) the emergence or correction of C-level warnings; (c)
117             the emergence or correction of F output emitted during F by
118             Perl, shell or other non-C code.
119              
120             These three cases are distinguished by the arguments passed to this method.
121              
122             =item * Arguments
123              
124             $self->multisect_builds(); # defaults to { probe => 'error' }
125              
126             $self->multisect_builds({ probe => 'error' });
127              
128             $self->multisect_builds({ probe => 'warning' });
129              
130             $self->multisect_builds({ probe => 'stderr' });
131              
132             Optionally takes one hash reference. At present that hashref may contain only
133             one element whose key is C and whose possible values are C,
134             C, or C. Defaults to C. Select among these values
135             depending on whether you are probing for changes in errors generated by the
136             C-compiler, changes in warnings generated by the C-compiler, or all text
137             output to C during F.
138              
139             =item * Return Value
140              
141             Returns true value upon success.
142              
143             =item * Comment
144              
145             As C runs it does two kinds of things:
146              
147             =over 4
148              
149             =item *
150              
151             It stores results data within the object which you can subsequently access
152             through method calls.
153              
154             =item *
155              
156             It captures error messages from each commit run and writes them to a file on
157             disk for later human inspection. (If you have selected C 'stderr'>,
158             all content directed to F is written to that file.)
159              
160             =back
161              
162             =back
163              
164             =cut
165              
166             sub multisect_builds {
167 0     0 1   my ($self, $args) = @_;
168              
169             # Methods called within multisect_builds:
170             # _prepare_for_multisection
171             # get_commits_range
172             # run_build_on_one_commit
173             # _configure_one_commit
174             # _build_one_commit
175             # _filter_build_log
176             # _run_one_commit_and_assign
177             # _bisection_decision
178             # _evaluate_status_of_build_runs
179              
180 0 0         if (defined $args) {
181 0 0         croak "Argument passed to multisect_builds() must be hashref"
182             unless ref($args) eq 'HASH';
183 0           my %good_keys = map {$_ => 1} (qw| probe |);
  0            
184 0           for my $k (keys %{$args}) {
  0            
185             croak "Invalid key '$k' in hashref passed to multisect_builds()"
186 0 0         unless $good_keys{$k};
187             }
188 0           my %good_values = map {$_ => 1} (qw| error warning stderr |);
  0            
189 0           for my $v (values %{$args}) {
  0            
190             croak "Invalid value '$v' in 'probe' element in hashref passed to multisect_builds()"
191 0 0         unless $good_values{$v};
192             }
193             }
194 0 0         $args->{probe} = 'error' unless defined $args->{probe};
195 0           $self->{probe} = $args->{probe};
196              
197             # Prepare data structures in the object to hold results of build runs on a
198             # per target, per commit basis.
199             # Also, "prime" the data structure by performing build runs for each target
200             # on the first and last commits in the commit range, storing that build
201             # output on disk as well.
202              
203 0           my $start_time = time();
204 0           my $all_outputs = $self->_prepare_for_multisection();
205              
206             # At this point, C<$all_outputs> is an array ref with one
207             # element per commit in the commit range. If a commit has been visited, the
208             # element is a hash ref with 4 key-value pairs like the ones below. If the
209             # commit has not yet been visited, the element is C.
210             #
211             # [
212             # {
213             # commit => "7c9c5138c6a704d1caf5908650193f777b81ad23",
214             # commit_short => "7c9c513",
215             # file => "/home/jkeenan/learn/perl/multisect/7c9c513.make.errors.rpt.txt",
216             # md5_hex => "d41d8cd98f00b204e9800998ecf8427e",
217             # },
218             # undef,
219             # undef,
220             # ...
221             # undef,
222             # {
223             # commit => "8f6628e3029399ac1e48dfcb59c3cd30e5127c3e",
224             # commit_short => "8f6628e",
225             # file => "/home/jkeenan/learn/perl/multisect/8f6628e.make.errors.rpt.txt",
226             # md5_hex => "fdce7ff2f07a0a8cd64005857f4060d4",
227             # },
228             # ]
229             #
230             # Unlike F -- where we could have been
231             # testing multiple test files on each commit -- here we're only concerned with
232             # recording the presence or absence of build-time errors. Hence, we only need
233             # an array of hash refs rather than an array of arrays of hash refs.
234             #
235             # The multisection process will entail running C over
236             # each commit selected by the multisection algorithm. Each run will insert a hash
237             # ref with the 4 KVPs into C<@{$self-E{all_outputs}}>. At the end of the
238             # multisection process those elements which we did not need to visit will still be
239             # C. We will then analyze the defined elements to identify the
240             # transitional commits.
241             #
242             # B
243             # build output> -- as reflected in a file on disk holding a list of normalized
244             # errors, normalized warnings or C -- B We are using
245             # an md5_hex value for that error file as a presumably valid unique identifier
246             # for that file's content. A transition point is a commit at which the output
247             # file's md5_hex differs from that of the immediately preceding commit. So, to
248             # identify the first transition point, we need to locate the commit at which the
249             # md5_hex changed from that found in the very first commit in the designated
250             # commit range. Once we've identified the first transition point, we'll look
251             # for the second transition point, i.e., that where the md5_hex changed from
252             # that observed at the first transition point. We'll continue that process
253             # until we get to a transition point where the md5_hex is identical to that of
254             # the very last commit in the commit range.
255              
256 0           my ($min_idx, $max_idx) = (0, $#{$self->{commits}});
  0            
257 0           my $this_target_status = 0;
258 0           my $current_start_idx = $min_idx;
259 0           my $current_end_idx = $max_idx;
260 0           my $overall_start_md5_hex = $self->{all_outputs}->[$min_idx]->{md5_hex};
261 0           my $overall_end_md5_hex = $self->{all_outputs}->[$max_idx]->{md5_hex};
262 0           my $n = 0;
263              
264 0           while (! $this_target_status) {
265              
266             # What gets (or may get) updated or assigned to in the course of one rep of this loop:
267             # $current_start_idx
268             # $current_end_idx
269             # $n
270             # $self->{all_outputs}
271              
272 0           my $h = sprintf("%d" => (($current_start_idx + $current_end_idx) / 2));
273 0           $self->_run_one_commit_and_assign($h);
274              
275 0           my $current_start_md5_hex = $self->{all_outputs}->[$current_start_idx]->{md5_hex};
276 0           my $target_h_md5_hex = $self->{all_outputs}->[$h]->{md5_hex};
277              
278             # Decision criteria:
279             # If $target_h_md5_hex eq $current_start_md5_hex, then the first
280             # transition is *after* index $h. Hence bisection should go upwards.
281              
282             # If $target_h_md5_hex ne $current_start_md5_hex, then the first
283             # transition has come *before* index $h. Hence bisection should go
284             # downwards. However, since the test of where the first transition is
285             # is that index j-1 has the same md5_hex as $current_start_md5_hex but
286             # index j has a different md5_hex, we have to do a run on
287             # j-1 as well.
288              
289             ($current_start_idx, $current_end_idx, $n) =
290             $self->_bisection_decision(
291             $target_h_md5_hex, $current_start_md5_hex, $h,
292             $self->{all_outputs},
293 0           $overall_end_md5_hex, $current_start_idx, $current_end_idx,
294             $max_idx, $n,
295             );
296 0           $this_target_status = $self->_evaluate_status_of_build_runs();
297             }
298              
299              
300 0           my $end_time = time();
301             my %timings = (
302             elapsed => $end_time - $start_time,
303 0           runs => scalar( grep {defined $_} @{$self->{all_outputs}} ),
  0            
  0            
304             );
305 0           $timings{mean} = sprintf("%.02f" => $timings{elapsed} / $timings{runs});
306 0 0         if ($self->{verbose}) {
307 0           say "Ran $timings{runs} runs; elapsed: $timings{elapsed} sec; mean: $timings{mean} sec";
308             }
309 0           $self->{timings} = \%timings;
310              
311 0           return 1;
312             }
313              
314             sub _prepare_for_multisection {
315 0     0     my $self = shift;
316              
317             # get_commits_range is inherited from parent
318              
319 0           my $all_commits = $self->get_commits_range();
320 0           $self->{all_outputs} = [ (undef) x scalar(@{$all_commits}) ];
  0            
321              
322 0           my %multisected_outputs_table;
323 0           for my $idx (0, $#{$all_commits}) {
  0            
324              
325 0           my $outputs = $self->run_build_on_one_commit($all_commits->[$idx]);
326 0           $self->{all_outputs}->[$idx] = $outputs;
327             }
328 0           return $self->{all_outputs};
329             }
330              
331             sub run_build_on_one_commit {
332 0     0 0   my ($self, $commit) = @_;
333 0   0       $commit //= $self->{commits}->[0]->{sha};
334 0 0         say "Building commit: $commit" if ($self->{verbose});
335              
336 0           my $starting_branch = $self->_configure_one_commit($commit);
337              
338 0           my $outputsref = $self->_build_one_commit($commit);
339             say "Tested commit: $commit; returning to: $starting_branch"
340 0 0         if ($self->{verbose});
341              
342             # We want to return to our basic branch (e.g., 'master', 'blead')
343             # before checking out a new commit.
344              
345 0 0         system(qq|git checkout --quiet $starting_branch|)
346             and croak "Unable to 'git checkout --quiet $starting_branch";
347              
348 0           $self->{commit_counter}++;
349 0 0         say "Commit counter: $self->{commit_counter}" if $self->{verbose};
350              
351 0           return $outputsref;
352             }
353              
354             sub _build_one_commit {
355 0     0     my ($self, $commit) = @_;
356 0           my $short_sha = substr($commit,0,$self->{short});
357 0           my $command_raw = $self->{make_command};
358              
359             # If probe => error or probe => warning, we are capturing the entire
360             # (2>&1) output of 'make' in a file and then filtering that file (in
361             # _filter_build_log() for either C-level exceptions or C-level warnings.
362             # Hence, that file's name should end in 'make.output.txt'.
363             #
364             # If, however, probe => stderr, we are directly filtering the output of
365             # 'make' for STDERR and saving that in a file for subsequent
366             # commit-by-commit comparison of the STDERR output. Hence, the file for
367             # each commit should end in 'make.stderr.txt'.
368              
369 0           my ($build_log, $cmd);
370 0 0         if ($self->{probe} eq 'stderr') {
371             $build_log = File::Spec->catfile(
372             $self->{outputdir},
373 0           join('.' => (
374             $short_sha,
375             'make',
376             'stderr',
377             'txt'
378             )),
379             );
380 0           $cmd = qq|$command_raw 2>$build_log|;
381             }
382             else {
383             $build_log = File::Spec->catfile(
384             $self->{outputdir},
385 0           join('.' => (
386             $short_sha,
387             'make',
388             'output',
389             'txt'
390             )),
391             );
392 0           $cmd = qq|$command_raw >$build_log 2>&1|;
393             }
394 0 0         say "Actual 'make' command: $cmd" if $self->{verbose};
395 0           my $rv = system($cmd);
396 0           my $filtered_probes_file = $self->_filter_build_log($build_log, $short_sha);
397 0 0         say "Created $filtered_probes_file" if $self->{verbose};
398             return {
399 0           commit => $commit,
400             commit_short => $short_sha,
401             file => $filtered_probes_file,
402             md5_hex => hexdigest_one_file($filtered_probes_file),
403             };
404             }
405              
406             sub _filter_build_log {
407 0     0     my ($self, $buildlog, $short_sha) = @_;
408 0           my $tdir = tempdir( CLEANUP => 1 );
409              
410 0 0         if ($self->{probe} eq 'error') {
    0          
411             # the default case: probing for build-time errors
412 0           my $ackpattern = q|-A2 '^[^:]+:\d+:\d+:\s+error:'|;
413 0           my @raw_acklines = grep { ! m/^--\n/ } `ack $ackpattern $buildlog`;
  0            
414 0           chomp(@raw_acklines);
415 0 0         croak "Got incorrect count of lines from ack; should be divisible by 3"
416             unless scalar(@raw_acklines) % 3 == 0;
417              
418 0           my @refined_errors = ();
419 0           for (my $i=0; $i <= $#raw_acklines; $i += 3) {
420 0           my $j = $i + 2;
421 0           my @this_error = ();
422 0           my ($normalized) =
423             $raw_acklines[$i] =~ s/^([^:]+):\d+:\d+:(.*)$/$1:_:_:$2/r;
424 0           push @this_error, ($normalized, @raw_acklines[$i+1 .. $j]);
425 0           push @refined_errors, \@this_error;
426             }
427              
428             my $error_report_file =
429 0           File::Spec->catfile($self->{outputdir}, "$short_sha.make.errors.rpt.txt");
430 0           say "rpt: $error_report_file";
431 0 0         open my $OUT, '>', $error_report_file
432             or croak "Unable to open $error_report_file for writing";
433 0 0         if (@refined_errors) {
434 0           for (my $i=0; $i<=($#refined_errors -1); $i++) {
435 0           say $OUT join "\n" => @{$refined_errors[$i]};
  0            
436 0           say $OUT "--";
437             }
438 0           say $OUT join "\n" => @{$refined_errors[-1]};
  0            
439             }
440 0 0         close $OUT or croak "Unable to close $error_report_file after writing";
441 0           return $error_report_file;
442             }
443             elsif ($self->{probe} eq 'warning') {
444 0           my $ackpattern = qr/^
445             ([^:]+):
446             (\d+):
447             (\d+):\s+warning:\s+
448             (.*?)\s+\[-
449             (W.*)]$
450             /x;
451              
452 0           my @refined_warnings = ();
453 0 0         open my $IN, '<', $buildlog or croak "Unable to open $buildlog for reading";
454 0           while (my $l = <$IN>) {
455 0           chomp $l;
456 0 0         next unless $l =~ m/$ackpattern/;
457 0           my ($source, $line, $character, $text, $class) = ($1, $2, $3, $4, $5);
458 0           my $rl = "$source:_:_: warning: $text [$class]";
459 0           push @refined_warnings, $rl;
460             }
461 0 0         close $IN or croak "Unable to close $buildlog after reading";
462              
463             my $warning_report_file =
464 0           File::Spec->catfile($self->{outputdir}, "$short_sha.make.warnings.rpt.txt");
465 0 0         open my $OUT, '>', $warning_report_file
466             or croak "Unable to open $warning_report_file for writing";
467 0           say $OUT $_ for @refined_warnings;
468 0 0         close $OUT or croak "Unable to close $warning_report_file after writing";
469 0           return $warning_report_file;
470             }
471             else {
472             # $self->{probe} eq 'stderr'
473             # With this option, we simply record all STDERR from 'make' in the
474             # build log and return it.
475 0           return $buildlog;
476             }
477             }
478              
479             sub _evaluate_status_of_build_runs {
480 0     0     my ($self) = @_;
481 0           my @trans = ();
482 0           for my $o (@{$self->{all_outputs}}) {
  0            
483             push @trans,
484 0 0         defined $o ? $o->{md5_hex} : undef;
485             }
486 0           my $vls = validate_list_sequence(\@trans);
487 0 0 0       return ( (scalar(@{$vls}) == 1 ) and ($vls->[0])) ? 1 : 0;
488             }
489              
490             sub _run_one_commit_and_assign {
491              
492             # If we've already stashed a particular commit's outputs in all_outputs,
493             # then we don't need to actually perform a run.
494              
495             # This internal method assigns to all_outputs in place.
496              
497 0     0     my ($self, $idx) = @_;
498 0           my $this_commit = $self->{commits}->[$idx]->{sha};
499 0 0         unless (defined $self->{all_outputs}->[$idx]) {
500 0           say "\nAt commit counter $self->{commit_counter}, preparing to test commit ", $idx + 1, " of ", scalar(@{$self->{commits}})
501 0 0         if $self->{verbose};
502 0           my $these_outputs = $self->run_build_on_one_commit($this_commit);
503 0           $self->{all_outputs}->[$idx] = $these_outputs;
504             }
505             }
506              
507             =head2 C
508              
509             =over 4
510              
511             =item * Purpose
512              
513             Get results of C (other than test output files
514             created) reported on a per commit basis.
515              
516             =item * Arguments
517              
518             my $multisected_outputs = $self->get_multisected_outputs();
519              
520             None; all data needed is already present in the object.
521              
522             =item * Return Value
523              
524             Reference to an array with one element for each commit in the commit range.
525              
526             =over 4
527              
528             =item *
529              
530             If a particular commit B in the course of
531             C, then the array element is undefined. (The point
532             of multisection, of course, is to B have to visit every commit in the
533             commit range in order to figure out the commits at which test output changed.)
534              
535             =item *
536              
537             If a particular commit B in the course of
538             C, then the array element is a hash reference whose
539             elements have the following keys:
540              
541             commit
542             commit_short
543             file
544             md5_hex
545              
546             =back
547              
548             =back
549              
550             =cut
551              
552             sub get_multisected_outputs {
553 0     0 1   my $self = shift;
554 0           return $self->{all_outputs};
555             }
556              
557             =head2 C
558              
559             =over 4
560              
561             =item * Purpose
562              
563             Get a data structure which reports on the most meaningful results of
564             C, namely, the first commit, the last commit and all
565             transitional commits.
566              
567             =item * Arguments
568              
569             my $transitions = $self->inspect_transitions();
570              
571             None; all data needed is already present in the object.
572              
573             =item * Return Value
574              
575             Reference to a hash with 3 key-value pairs. Each element's value is another
576             hash reference. The elements of the top-level hash are:
577              
578             =over 4
579              
580             =item * C
581              
582             Value is reference to hash keyed on C, C and C, whose
583             values are, respectively, the index position of the very first commit in the
584             commit range, the digest of that commit's test output and the path to the file
585             holding that output.
586              
587             =item * C
588              
589             Value is reference to hash keyed on C, C and C, whose
590             values are, respectively, the index position of the very last commit in the
591             commit range, the digest of that commit's test output and the path to the file
592             holding that output.
593              
594             =item * C
595              
596             Value is reference to an array with one element for each transitional commit.
597             Each such element is a reference to a hash with keys C and C.
598             In this context C refers to the last commit in a sub-sequence with a
599             particular digest; C refers to the next immediate commit which is the
600             first commit in a new sub-sequence with a new digest.
601              
602             The values of C and C are, in turn, references to hashes with
603             keys C, C and C. Their values are, respectively, the index
604             position of the particular commit in the commit range, the digest of that
605             commit's test output and the path to the file holding that output.
606              
607             =back
608              
609             Example:
610              
611              
612             =item * Comment
613              
614             The return value of C should be useful to the developer
615             trying to determine the various points in a long series of commits where a
616             target's test output changed in meaningful ways. Hence, it is really the
617             whole point of F.
618              
619             =back
620              
621             =cut
622              
623             sub inspect_transitions {
624 0     0 1   my ($self) = @_;
625 0           my $multisected_outputs = $self->get_multisected_outputs();
626 0           my %transitions;
627 0           my $max_index = $#{$multisected_outputs};
  0            
628 0           $transitions{transitions} = [];
629             $transitions{oldest} = {
630             idx => 0,
631             md5_hex => $multisected_outputs->[0]->{md5_hex},
632             file => $multisected_outputs->[0]->{file},
633 0           };
634             $transitions{newest} = {
635             idx => $max_index,
636             md5_hex => $multisected_outputs->[$max_index]->{md5_hex},
637             file => $multisected_outputs->[$max_index]->{file},
638 0           };
639 0           for (my $j = 1; $j <= $max_index; $j++) {
640 0           my $i = $j - 1;
641             next unless (
642 0 0 0       (defined $multisected_outputs->[$i]) and
643             (defined $multisected_outputs->[$j])
644             );
645 0           my $older_md5_hex = $multisected_outputs->[$i]->{md5_hex};
646 0           my $newer_md5_hex = $multisected_outputs->[$j]->{md5_hex};
647 0           my $older_file = $multisected_outputs->[$i]->{file};
648 0           my $newer_file = $multisected_outputs->[$j]->{file};
649 0 0         unless ($older_md5_hex eq $newer_md5_hex) {
650 0           push @{$transitions{transitions}}, {
  0            
651             older => { idx => $i, md5_hex => $older_md5_hex, file => $older_file },
652             newer => { idx => $j, md5_hex => $newer_md5_hex, file => $newer_file },
653             }
654             }
655             }
656 0           return \%transitions;
657             }
658              
659             1;
660              
661             __END__