File Coverage

blib/lib/Devel/Git/MultiBisect.pm
Criterion Covered Total %
statement 26 124 20.9
branch 0 50 0.0
condition 0 3 0.0
subroutine 9 18 50.0
pod 5 5 100.0
total 40 200 20.0


line stmt bran cond sub pod time code
1             package Devel::Git::MultiBisect;
2 7     7   2964 use v5.14.0;
  7         22  
3 7     7   30 use warnings;
  7         10  
  7         153  
4 7     7   2574 use Devel::Git::MultiBisect::Init;
  7         16  
  7         242  
5 7         410 use Devel::Git::MultiBisect::Auxiliary qw(
6             clean_outputfile
7             hexdigest_one_file
8             validate_list_sequence
9 7     7   2878 );
  7         16  
10 7     7   40 use Carp;
  7         11  
  7         276  
11 7     7   34 use Cwd;
  7         9  
  7         280  
12 7     7   30 use File::Spec;
  7         12  
  7         192  
13 7     7   37 use File::Temp;
  7         10  
  7         393  
14 7     7   33 use List::Util qw(sum);
  7         10  
  7         8617  
15              
16             our $VERSION = '0.20';
17             $VERSION = eval $VERSION;
18              
19             =head1 NAME
20              
21             Devel::Git::MultiBisect - Study build and test output over a range of F commits
22              
23             =head1 SYNOPSIS
24              
25             You will typically construct an object of a class which is a child of
26             F, such as F,
27             F or
28             F. All methods documented in this
29             parent package may be called from any of these child classes.
30              
31             use Devel::Git::MultiBisect::AllCommits;
32             $self = Devel::Git::MultiBisect::AllCommits->new(\%parameters);
33              
34             ... or
35              
36             use Devel::Git::MultiBisect::Transitions;
37             $self = Devel::Git::MultiBisect::Transitions->new(\%parameters);
38              
39             ... or
40              
41             use Devel::Git::MultiBisect::BuildTransitions;
42             $self = Devel::Git::MultiBisect::BuildTransitions->new(\%parameters);
43              
44             ... and then:
45              
46             $commit_range = $self->get_commits_range();
47              
48             $full_targets = $self->set_targets(\@target_args);
49              
50             $outputs = $self->run_test_files_on_one_commit($commit_range->[0]);
51              
52             ... followed by methods specific to the child class.
53              
54             ... and then perhaps also:
55              
56             $timings = $self->get_timings();
57              
58             =head1 DESCRIPTION
59              
60             Given a Perl library or application kept in F for version control, it is
61             often useful to be able to compare the output collected from running one or
62             more test files over a range of F commits. If that range is sufficiently
63             large, a test may fail in B over that range.
64              
65             If that is the case, then simply asking, I<"When did this file start to
66             fail?"> -- a question which C is designed to answer -- is
67             insufficient. In order to identify more than one point of failure, we may
68             need to (a) capture the test output for each commit; or, (b) capture the test
69             output only at those commits where the output changed. The output of a run of
70             a test file may change for a variety of reasons: test failures, segfaults,
71             changes in the number or content of tests, etc.
72              
73             F provides methods to achieve that objective. Its
74             child classes, F and
75             F, provide different flavors of that
76             functionality for objectives (a) and (b), respectively. Please refer to their
77             documentation for further discussion.
78              
79             Child class F focuses on failures
80             during the B process rather than during testing. It can handle three
81             different types of problems which arise when you run F to build a Perl
82             library or to build Perl itself:
83              
84             =over 4
85              
86             =item * Exceptions detected by the C-compiler
87              
88             =item * Warnings emitted by the C-compiler
89              
90             =item * Warnings emitted by F or other languages invoked during F
91              
92             =back
93              
94             See the documentation for further details.
95              
96             =head2 GLOSSARY
97              
98             =over 4
99              
100             =item * B
101              
102             A source code change set entered ("committed") to a F repository. Each
103             commit is denoted by a SHA. In this library, whenever a commit is called for
104             as the argument to a function, you can also use a F.
105              
106             =item * B
107              
108             The range of sequential commits (determined by F) requested for analysis.
109              
110             =item * B
111              
112             A test file from the test suite of the application or library under study.
113              
114             =item * B
115              
116             What is sent to STDOUT or STDERR as a result of calling a test program such as
117             F or F on an individual target file. Currently we assume
118             that all such test programs are written based on the
119             L.
120              
121             =item * B
122              
123             A commit at which the test output for a given target changes from that of the
124             commit immediately preceding.
125              
126             =item * B
127              
128             A string holding the output of a cryptographic process run on test output
129             which uniquely identifies that output. (Currently, we use the
130             C algorithm.) We assume that if the test output does
131             not change between one or more commits, then that commit is not a transitional
132             commit.
133              
134             Note: Before taking a digest on a particular test output, we exclude text
135             such as timings which are highly likely to change from one run to the next and
136             which would introduce spurious variability into the digest calculations.
137              
138             =item * B or B
139              
140             A series of configure-build-test process sequences at those commits within the
141             commit range which are selected by a bisection algorithm.
142              
143             Normally, when we bisect (via F, F or
144             otherwise), we are seeking a I point where a Boolean result -- yes/no,
145             true/false, pass/fail -- is returned. What the test run outputs to STDOUT or
146             STDERR is a lesser concern.
147              
148             B points where the output
149             of the test command changes> -- regardless of whether that change is a C,
150             C or whatever. We capture the output for later human or programmatic
151             examination.
152              
153             =back
154              
155             =head1 METHODS
156              
157             =head2 C
158              
159             =over 4
160              
161             =item * Purpose
162              
163             Constructor.
164              
165             =item * Arguments
166              
167             $self = Devel::Git::MultiBisect::AllCommits->new(\%params);
168              
169             or
170              
171             $self = Devel::Git::MultiBisect::Transitions->new(\%params);
172              
173             or
174              
175             $self = Devel::Git::MultiBisect::BuildTransitions->new(\%params);
176              
177             Reference to a hash, typically the return value of
178             C.
179              
180             The hashref passed as argument must contain key-value pairs for C,
181             C. C tests for the existence of each of
182             these directories.
183              
184             =item * Return Value
185              
186             Object of Devel::Git::MultiBisect child class.
187              
188             =back
189              
190             =cut
191              
192             sub new {
193 0     0 1   my ($class, $params) = @_;
194              
195 0           my $data = Devel::Git::MultiBisect::Init::init($params);
196              
197 0           return bless $data, $class;
198             }
199              
200             =head2 C
201              
202             =over 4
203              
204             =item * Purpose
205              
206             Identify the SHAs of each F commit identified by C.
207              
208             =item * Arguments
209              
210             $commit_range = $self->get_commits_range();
211              
212             None; all data needed is already in the object.
213              
214             =item * Return Value
215              
216             Array reference, each element of which is a SHA.
217              
218             =back
219              
220             =cut
221              
222             sub get_commits_range {
223 0     0 1   my $self = shift;
224 0           return [ map { $_->{sha} } @{$self->{commits}} ];
  0            
  0            
225             }
226              
227             =head2 C
228              
229             =over 4
230              
231             =item * Purpose
232              
233             Identify the test files which will be run at different points in the commits
234             range. We shall assume that the test file has existed with its name unchanged
235             over the entire commit range.
236              
237             =item * Arguments
238              
239             $target_args = [
240             't/44_func_hashes_mult_unsorted.t',
241             't/45_func_hashes_alt_dual_sorted.t',
242             ];
243             $full_targets = $self->set_targets($target_args);
244              
245             Reference to an array holding the relative paths beneath the C to the
246             test files selected for examination.
247              
248             =item * Return Value
249              
250             Reference to an array holding hash references with these elements:
251              
252             =over 4
253              
254             =item * C
255              
256             Absolute paths to the test files selected for examination. Test file is
257             tested for its existence.
258              
259             =item * C
260              
261             String composed by taking an element in the array ref passed as argument and
262             substituting underscores C(<_>) for forward slash (C) and dot (C<.>)
263             characters. So,
264              
265             t/44_func_hashes_mult_unsorted.t
266              
267             ... becomes:
268              
269             t_44_func_hashes_mult_unsorted_t
270              
271             =back
272              
273             =back
274              
275             =cut
276              
277             sub set_targets {
278 0     0 1   my ($self, $explicit_targets) = @_;
279              
280 0           my @raw_targets = @{$self->{targets}};
  0            
281              
282             # If set_targets() is provided with an appropriate argument
283             # ($explicit_targets), override whatever may have been stored in the
284             # object by new().
285              
286 0 0         if (defined $explicit_targets) {
287 0 0         croak "Explicit targets passed to set_targets() must be in array ref"
288             unless ref($explicit_targets) eq 'ARRAY';
289 0           @raw_targets = @{$explicit_targets};
  0            
290             }
291              
292 0           my @full_targets = ();
293 0           my @missing_files = ();
294 0           for my $rt (@raw_targets) {
295 0           my $ft = File::Spec->catfile($self->{gitdir}, $rt);
296 0 0         if (! -e $ft) { push @missing_files, $ft; next }
  0            
  0            
297 0           my $stub;
298 0           ($stub = $rt) =~ s{[./]}{_}g;
299 0           push @full_targets, {
300             path => $ft,
301             stub => $stub,
302             };
303             }
304 0 0         if (@missing_files) {
305 0           croak "Cannot find file(s) to be tested: @missing_files";
306             }
307 0           $self->{targets} = [ @full_targets ];
308 0           return \@full_targets;
309             }
310              
311             =head2 C
312              
313             =over 4
314              
315             =item * Purpose
316              
317             Capture the output from running the selected test files at one specific F checkout.
318              
319             =item * Arguments
320              
321             $outputs = $self->run_test_files_on_one_commit("2a2e54a");
322              
323             or
324              
325             $excluded_targets = [
326             't/45_func_hashes_alt_dual_sorted.t',
327             ];
328             $outputs = $self->run_test_files_on_one_commit("2a2e54a", $excluded_targets);
329              
330             =over 4
331              
332             =item 1
333              
334             String holding the SHA from a single commit in the repository. This string
335             would typically be one of the elements in the array reference returned by
336             C<$self->get_commits_range()>. If no argument is provided, the method will
337             default to using the first element in the array reference returned by
338             C<$self->get_commits_range()>.
339              
340             =item 2
341              
342             Reference to array of target test files to be excluded from a particular
343             invocation of this method. Optional, but will die if argument is not an array
344             reference.
345              
346             =back
347              
348             =item * Return Value
349              
350             Reference to an array, each element of which is a hash reference with the
351             following elements:
352              
353             =over 4
354              
355             =item * C
356              
357             String holding the SHA from the commit passed as argument to this method (or
358             the default described above).
359              
360             =item * C
361              
362             String holding the value of C (above) to the number of characters
363             specified in the C element passed to the constructor; defaults to 7.
364              
365             =item * C
366              
367             String holding a rewritten version of the relative path beneath C of
368             the test file being run. In this relative path forward slash (C) and dot
369             (C<.>) characters are changed to underscores C(<_>). So,
370              
371             t/44_func_hashes_mult_unsorted.t
372              
373             ... becomes:
374              
375             t_44_func_hashes_mult_unsorted_t'
376              
377             =item * C
378              
379             String holding the full path to the file holding the TAP output collected
380             while running one test file at the given commit. The following example shows
381             how that path is calculated. Given:
382              
383             output directory (outputdir) => '/tmp/DQBuT_SRAY/'
384             SHA (commit) => '2a2e54af709f17cc6186b42840549c46478b6467'
385             shortened SHA (commit_short) => '2a2e54a'
386             test file (target->[$i]) => 't/44_func_hashes_mult_unsorted.t'
387              
388             ... the file is placed in the directory specified by C. We then
389             join C (the shortened SHA), C (the rewritten relative
390             path) and the strings C and C with a dot to yield this value for
391             the C element:
392              
393             2a2e54a.t_44_func_hashes_mult_unsorted_t.output.txt
394              
395             =item * C
396              
397             String holding the return value of
398             C run with the file
399             designated by the C element as an argument. (More precisely, the file
400             as modified by C.)
401              
402             =back
403              
404             Example:
405              
406             [
407             {
408             commit => "2a2e54af709f17cc6186b42840549c46478b6467",
409             commit_short => "2a2e54a",
410             file => "/tmp/1mVnyd59ee/2a2e54a.t_44_func_hashes_mult_unsorted_t.output.txt",
411             file_stub => "t_44_func_hashes_mult_unsorted_t",
412             md5_hex => "31b7c93474e15a16d702da31989ab565",
413             },
414             {
415             commit => "2a2e54af709f17cc6186b42840549c46478b6467",
416             commit_short => "2a2e54a",
417             file => "/tmp/1mVnyd59ee/2a2e54a.t_45_func_hashes_alt_dual_sorted_t.output.txt",
418             file_stub => "t_45_func_hashes_alt_dual_sorted_t",
419             md5_hex => "6ee767b9d2838e4bbe83be0749b841c1",
420             },
421             ]
422              
423             =item * Comment
424              
425             In this method's current implementation, we start with a C from
426             the repository at the specified C. We configure (I C
427             Makefile.PL>) and build (I C) the source code. We then test each
428             of the test files we have targeted (I C
429             relative/path/to/test_file.t>). We redirect both STDOUT and STDERR to
430             C, clean up the outputfile to remove the line containing timings
431             (as that introduces unwanted variability in the C values) and compute
432             the digest.
433              
434             This implementation is very much subject to change.
435              
436             If a true value for C has been passed to the constructor, the method
437             prints C to STDOUT before returning.
438              
439             B While this method is publicly documented, in actual use you probably
440             will not need to call it directly. Instead, you will probably use either
441             C or
442             C.
443              
444             =back
445              
446             =cut
447              
448             sub run_test_files_on_one_commit {
449 0     0 1   my ($self, $commit, $excluded_targets) = @_;
450 0   0       $commit //= $self->{commits}->[0]->{sha};
451 0 0         say "Testing commit: $commit" if ($self->{verbose});
452              
453 0 0         if (defined $excluded_targets) {
454 0 0         if (ref($excluded_targets) ne 'ARRAY') {
455 0           croak "excluded_targets, if defined, must be in array reference";
456             }
457             }
458             else {
459 0           $excluded_targets = [];
460             }
461 0           my %excluded_targets;
462 0           for my $t (@{$excluded_targets}) {
  0            
463 0           my $ft = File::Spec->catfile($self->{gitdir}, $t);
464 0           $excluded_targets{$ft}++;
465             }
466              
467             my $current_targets = [
468 0           grep { ! exists $excluded_targets{$_->{path}} }
469 0           @{$self->{targets}}
  0            
470             ];
471              
472 0           my $starting_branch = $self->_configure_build_one_commit($commit);
473              
474 0           my $outputsref = $self->_test_one_commit($commit, $current_targets);
475             say "Tested commit: $commit; returning to: $starting_branch"
476 0 0         if ($self->{verbose});
477              
478             # We want to return to our basic branch (e.g., 'master', 'blead')
479             # before checking out a new commit.
480              
481 0 0         system(qq|git checkout --quiet $starting_branch|)
482             and croak "Unable to 'git checkout --quiet $starting_branch";
483              
484 0           $self->{commit_counter}++;
485 0 0         say "Commit counter: $self->{commit_counter}" if $self->{verbose};
486              
487 0           return $outputsref;
488             }
489              
490             sub _configure_one_commit {
491 0     0     my ($self, $commit) = @_;
492 0 0         chdir $self->{gitdir} or croak "Unable to change to $self->{gitdir}";
493 0 0         system(qq|git clean --quiet -dfx|) and croak "Unable to 'git clean --quiet -dfx'";
494 0           my $starting_branch = $self->{branch};
495              
496 0 0         system(qq|git checkout --quiet $commit|) and croak "Unable to 'git checkout --quiet $commit'";
497 0 0         say "Running '$self->{configure_command}'" if $self->{verbose};
498 0 0         system($self->{configure_command}) and croak "Unable to run '$self->{configure_command})'";
499 0           return $starting_branch;
500             }
501              
502             sub _configure_build_one_commit {
503 0     0     my ($self, $commit) = @_;
504              
505 0           my $starting_branch = $self->_configure_one_commit($commit);
506              
507 0 0         say "Running '$self->{make_command}'" if $self->{verbose};
508 0 0         system($self->{make_command}) and croak "Unable to run '$self->{make_command})'";
509              
510 0           return $starting_branch;
511             }
512              
513             sub _test_one_commit {
514 0     0     my ($self, $commit, $current_targets) = @_;
515 0           my $short = substr($commit,0,$self->{short});
516 0           my @outputs;
517 0           for my $target (@{$current_targets}) {
  0            
518             my $outputfile = File::Spec->catfile(
519             $self->{outputdir},
520             join('.' => (
521             $short,
522             $target->{stub},
523 0           'output',
524             'txt'
525             )),
526             );
527 0           my $command_raw = $self->{test_command};
528 0           my $cmd;
529 0 0         unless ($command_raw eq 'harness') {
530 0           $cmd = qq|$command_raw $target->{path} >$outputfile 2>&1|;
531             }
532             else {
533 0           $cmd = qq|cd t; ./perl harness -v $target->{path} >$outputfile 2>&1; cd -|;
534             }
535 0 0         say "Running '$cmd'" if $self->{verbose};
536 0 0         system($cmd) and croak "Unable to run test_command";
537 0           $outputfile = clean_outputfile($outputfile);
538             push @outputs, {
539             commit => $commit,
540             commit_short => $short,
541             file => $outputfile,
542             file_stub => $target->{stub},
543 0           md5_hex => hexdigest_one_file($outputfile),
544             };
545 0 0         say "Created $outputfile" if $self->{verbose};
546             }
547 0           return \@outputs;
548             }
549              
550             sub _bisection_decision {
551 0     0     my ($self, $target_h_md5_hex, $current_start_md5_hex, $h, $relevant_self,
552             $overall_end_md5_hex, $current_start_idx, $current_end_idx, $max_idx, $n) = @_;
553 0 0         if ($target_h_md5_hex ne $current_start_md5_hex) {
554 0           my $g = $h - 1;
555 0           $self->_run_one_commit_and_assign($g);
556 0           my $target_g_md5_hex = $relevant_self->[$g]->{md5_hex};
557 0 0         if ($target_g_md5_hex eq $current_start_md5_hex) {
558 0 0         if ($target_h_md5_hex eq $overall_end_md5_hex) {
559             }
560             else {
561 0           $current_start_idx = $h;
562 0           $current_end_idx = $max_idx;
563             }
564 0           $n++;
565             }
566             else {
567             # Bisection should continue downwards
568 0           $current_end_idx = $h;
569 0           $n++;
570             }
571             }
572             else {
573             # Bisection should continue upwards
574 0           $current_start_idx = $h;
575 0           $n++;
576             }
577 0           return ($current_start_idx, $current_end_idx, $n);
578             }
579              
580             =head2 C
581              
582             =over 4
583              
584             =item * Purpose
585              
586             Get information on the time a multisection took to run.
587              
588             =item * Arguments
589              
590             None; all data needed is already in the object.
591              
592             =item * Return Value
593              
594             Hash reference. The selection of elements in this hashref will depend on
595             which subclass of F you are using and may differ among
596             subclasses. Example:
597              
598             { elapsed => 4297, mean => 186.83, runs => 23 }
599              
600             In this example (taken from a run of one test file over 220 commits in Perl 5
601             blead), 23 runs were needed to achieve a result. These took 4297 seconds
602             (approximately 71 minutes) with a mean run time of approximately 3 minutes
603             each.
604              
605             Method will return undefined value if timings are not yet available within the
606             object.
607              
608             =back
609              
610             =cut
611              
612             sub get_timings {
613 0     0 1   my $self = shift;
614 0 0         return unless exists $self->{timings};
615 0           return $self->{timings};
616             }
617              
618             =head1 SUPPORT
619              
620             Please report any bugs by mail to C
621             or through the web interface at L.
622              
623             =head1 AUTHOR
624              
625             James E. Keenan (jkeenan at cpan dot org). When sending correspondence, please
626             include 'Devel::Git::MultiBisect' or 'Devel-Git-MultiBisect' in your subject line.
627              
628             Creation date: October 12 2016. Last modification date: September 12 2021.
629              
630             Development repository: L
631              
632             =head1 ACKNOWLEDGEMENTS
633              
634             Thanks to the following contributors and reviewers:
635              
636             =over 4
637              
638             =item * Smylers
639              
640             For naming suggestion: L
641              
642             =item * Ricardo Signes
643              
644             For feedback during initial development.
645              
646             =item * Eily and Monk::Thomas
647              
648             For diagnosis of regex problems in L.
649              
650             =item * Max Maischein
651              
652             For diagnosis of File::Temp problems in L.
653              
654             =back
655              
656             =head1 COPYRIGHT
657              
658             Copyright (c) 2016-2021 James E. Keenan. United States. All rights reserved.
659             This is free software and may be distributed under the same terms as Perl
660             itself.
661              
662             =cut
663              
664             1;
665