File Coverage

blib/lib/Code/Quality.pm

Criterion	Covered	Total	%
statement	72	124	58.0
branch	16	52	30.7
condition	1	3	33.3
subroutine	22	28	78.5
pod	4	12	33.3
total	115	219	52.5

line	stmt	bran	cond	sub	pod	time	code
1							package Code::Quality;
2
3	3			3		58829	use 5.020000;
	3					20
4	3			3		13	use strict;
	3					5
	3					56
5	3			3		13	use warnings;
	3					5
	3					65
6	3			3		1420	use utf8;
	3					33
	3					12
7	3			3		82	use re '/s';
	3					5
	3					236
8	3			3		986	use parent qw/Exporter/;
	3					752
	3					12
9
10							=encoding utf-8
11
12							=head1 NAME
13
14							Code::Quality - use static analysis to compute a "code quality" metric for a program
15
16							=head1 SYNOPSIS
17
18							use v5.20;
19							use Code::Quality;
20							# code to test (required)
21							my $code = ...;
22							# reference code to compare against (optional)
23							my $reference = ...;
24
25							my $warnings =
26							analyse_code
27							code => $code,
28							reference => $reference,
29							language => 'C';
30							if (defined $warnings) {
31							my $stars = star_rating_of_warnings $warnings;
32							say "Program is rated $stars stars"; # 3 is best, 1 is worst
33							my @errors = grep { $_->[0] eq 'error' } @$warnings;
34							if (@errors > 0) {
35							say 'Found ', scalar @errors, ' errors';
36							say "First error: $errors[0][1]";
37							}
38							} else {
39							say 'Failed to analyse code';
40							}
41
42							=head1 DESCRIPTION
43
44							Code::Quality runs a series of tests on a piece of source code to
45							compute a code quality metric. Each test returns a possibly empty list
46							of warnings, that is potential issues present in the source code. This
47							list of warnings can then be turned into a star rating: 3 stars for
48							good code, 2 stars for acceptable code, and 1 stars for dubious code.
49
50							=head2 Warnings
51
52							A warning is an arrayref C<< [type, message, row, column] >>, where
53							the first two entries are mandatory and the last two can be either
54							both present or both absent.
55							The type is one of C<< qw/error warning info/ >>.
56
57							Four-element warnings correspond to ACE code editor annotations.
58							Two-element warnings apply to the entire document, not a specific
59							place in the code.
60
61							=head2 Tests
62
63							A test is a function that takes key-value arguments:
64
65							B(code => I<$code>, language => I<$language>, [reference => I<$reference>, formatted_code => I<$formatted>])
66
67							Here I<$code> is the code to be tested, I<$language> is the
68							programming language, I<$reference> is an optional reference source
69							code to compare I<$code> against, and I<$formatted_code> is the
70							optional result of running I<$code> through a source code formatter.
71
72							Each test returns undef if the test failed (for example, if the test
73							cannot be applied to this programming language), and an arrayref of
74							warnings otherwise.
75
76							Most tests have several configurable parameters, which come from
77							global variables. The documentation of each test mentions the global
78							variables that affect its operations. C can be used to run a
79							test with special configuration once, without affecting other code:
80
81							{
82							local $Code::Quality::bla_threshold = 5;
83							test_bla code => $code, language => 'C';
84							}
85
86							=cut
87
88							our $VERSION = '0.002';
89							our @EXPORT = qw/analyse_code star_rating_of_warnings/;
90							our @EXPORT_OK = (@EXPORT, qw/test_lines test_clang_tidy test_lizard/);
91							our %EXPORT_TAGS = (default => \@EXPORT, all => \@EXPORT_OK);
92
93							# set this to a "Test::More::diag"-like function to get debug output
94							our $DEBUG;
95
96	3			3		353	use Carp qw/carp croak/;
	3					6
	3					179
97	3			3		2322	use Cpanel::JSON::XS qw/encode_json/;
	3					13594
	3					148
98	3			3		1688	use File::Temp qw//;
	3					46465
	3					83
99	3			3		1057	use File::Which qw/which/;
	3					2366
	3					147
100	3			3		17	use List::Util qw/reduce any/;
	3					5
	3					238
101	3			3		1708	use Text::CSV qw/csv/;
	3					32109
	3					4667
102
103	8	50		8	0	27	sub diag { $DEBUG->(@_) if defined $DEBUG }
104
105							sub remove_empty_lines {
106	16			16	0	23	my ($code) = @_;
107	16					234	$code =~ s/\n\s*/\n/g; # remove empty lines
108	16					48	$code =~ s/^\s*//g; # remove leading whitespace
109	16					31	return $code;
110							}
111
112							our $warn_code_is_long = [warning => 'A shorter solution is possible'];
113							our $warn_code_is_very_long = [error => 'A significantly shorter solution is possible'];
114
115							# a criterion is a pair [abs, rel]. a program matches a criterion if
116							# the absolute loc difference is at most abs AND the relative loc
117							# difference is at most rel. These criteria are used to categorise
118							# code as "short", "long", or "very long".
119
120							# code is considered short if one of these criteria match
121							our @short_code_criteria = (
122							[1e9, 0.3],
123							[20, 0.5],
124							[10, 1],
125							);
126
127							# code is considered long if one of these criteria match, and none of
128							# the above do
129							our @long_code_criteria = (
130							[1e9, 0.5],
131							[20, 1],
132							[10, 2],
133							);
134
135							# code is considered very long if none of the criteria above match
136
137							=head3 test_lines
138
139							This test counts non-empty lines in both the formatted code and the reference.
140							If no formatted code is available, the original code is used.
141							If the code is significantly longer than the reference, it returns a warning.
142							If the code is much longer, it returns an error.
143							Otherwise it returns an empty arrayref.
144
145							The thresholds for raising a warning/error are available in the source
146							code, see global variables C<@short_code_criteria> and
147							C<@long_code_criteria>.
148
149							This test fails if no reference is provided, but is language-agnostic
150
151							=cut
152
153							sub test_lines {
154	8			8	1	2204	my %args = @_;
155	8		33			32	my $user_solution = $args{formatted_code} // $args{code};
156	8					11	my $official_solution = $args{reference};
157	8	50				15	return unless defined $official_solution;
158
159	8					22	$user_solution = remove_empty_lines($user_solution . "\n");
160	8					44	$official_solution = remove_empty_lines($official_solution . "\n");
161
162							# Count number of lines of code from both solutions.
163	8					75	my $loc_user_solution = () = $user_solution =~ /\n/g;
164	8					43	my $loc_official_solution = () = $official_solution =~ /\n/g;
165	8	50				24	return if $loc_official_solution == 0;
166
167	8					10	my $loc_absolute_diff = $loc_user_solution - $loc_official_solution;
168	8					16	my $loc_relative_diff = $loc_absolute_diff / $loc_official_solution;
169	8					42	diag "abs diff: $loc_absolute_diff, rel diff: $loc_relative_diff";
170							my $predicate = sub {
171	29	100		29		105	$loc_absolute_diff <= $_->[0] && $loc_relative_diff <= $_->[1]
172	8					2107	};
173
174	8	100				26	return [] if any \&$predicate, @short_code_criteria;
175	4	100				9	return [$warn_code_is_long] if any \&$predicate, @long_code_criteria;
176	1					5	return [$warn_code_is_very_long]
177							}
178
179							=head3 test_clang_tidy
180
181							This test runs the
182							L static analyser
183							on the code and returns all warnings found.
184
185							The clang-tidy checks in use are determined by two global variables,
186							each of which is a list of globs such as C. The checks in
187							C<@clang_tidy_warnings> produce warnings, while the checks in
188							C<@clang_tidy_errors> produce errors. There is also a hash
189							C<%clang_tidy_check_options> which contains configuration for the
190							checks. Finally, the path to the clang-tidy executable is
191							C<$clang_tidy_path>, which is initialized by looking in the PATH using
192							L. Set this variable to undef to disable this test.
193
194							This test does not require a reference, but is limited to languages
195							that clang-tidy understands. This is controlled by the global variable
196							C<%clang_tidy_extension_of_language>, which contains file extensions
197							for the supported languages.
198
199							=cut
200
201							our $clang_tidy_path = which 'clang-tidy';
202
203							our %clang_tidy_extension_of_language = (
204							'C' => '.c',
205							'C++' => '.cpp',
206							);
207
208							our @clang_tidy_warnings =
209							qw/clang-analyzer-deadcode.DeadStores
210							clang-analyzer-unix.*
211							clang-analyzer-valist.*
212							misc-*
213							modernize-*
214							performance-*
215							readability-*
216							-readability-braces-around-statements/;
217
218							our @clang_tidy_errors =
219							qw/bugprone-*
220							clang-analyzer-core.*
221							clang-analyzer-cplusplus.*
222							clang-diagnostic-*/;
223
224							our %clang_tidy_check_options = (
225							'readability-implicit-bool-conversion.AllowIntegerConditions' => 1,
226							);
227
228							sub write_code_to_file {
229	0			0	0	0	my ($code, $extension) = @_;
230	0					0	my $fh = File::Temp->new(
231							TEMPLATE => 'code-qualityXXXXX',
232							TMPDIR => 1,
233							SUFFIX => $extension,
234							);
235	0	0				0	print $fh $code or croak 'Failed to write code to temporary file';
236	0	0				0	close $fh or croak 'Failed to close temporary file';
237	0					0	$fh
238							}
239
240							sub test_clang_tidy {
241	3			3	1	9	my %args = @_;
242	3	50				9	return unless $clang_tidy_path;
243	0					0	my $extension = $clang_tidy_extension_of_language{uc $args{language}};
244	0	0				0	return unless defined $extension;
245
246	0					0	my $fh = write_code_to_file $args{code}, $extension;
247
248	0					0	my $checks = join ',', '-*', @clang_tidy_warnings, @clang_tidy_errors;
249	0					0	my $errors = join ',', '-*', @clang_tidy_errors;
250	0					0	my @check_options;
251	0					0	while (my ($key, $value) = each %clang_tidy_check_options) {
252	0					0	push @check_options, { key => $key, value => $value }
253							}
254	0					0	my $config = encode_json +{
255							CheckOptions => \@check_options,
256							Checks => $checks,
257							WarningsAsErrors => $errors,
258							};
259
260	0					0	my @output = qx,$clang_tidy_path -config='$config' $fh 2>/dev/null,;
261	0					0	my $signal = $? & 127;
262	0	0				0	if ($signal) {
263	0					0	carp "Failed to run $clang_tidy_path, \$? is $?";
264							return
265	0					0	}
266
267	0					0	my @warnings;
268	0					0	for my $line (@output) {
269	0	0				0	my ($row, $col, $type, $msg) =
270							$line =~ /$fh:(\d+):(\d+): (\w+): (.*)$/
271							or next;
272	0					0	chomp $msg;
273	0					0	$msg =~ s/,-warnings-as-errors//;
274	0	0				0	$type = 'info' if $type eq 'note';
275	0					0	push @warnings, [$type, $msg, $row, $col]
276							}
277							\@warnings
278	0					0	}
279
280							=head3 test_lizard
281
282							This test runs the L
283							code complexity analyser on the code, and reports a warning for every
284							function that has high cyclomatic complexity, or that is too long.
285
286							The thresholds that determine whether a warning or an error are raised
287							are determined by four global variables, C<$lizard_warning_loc>,
288							C<$lizard_error_loc>, C<$lizard_warning_ccn>, C<$lizard_error_ccn>.
289							Finally, the path to the lizard executable is C<$lizard_path>, which
290							is initialized by looking in the PATH using L. Set this
291							variable to undef to disable this test.
292
293							This test does not require a reference, but is limited to languages
294							that lizard understands. This is controlled by the global variable
295							C<%lizard_extension_of_language>, which contains file extensions for
296							the supported languages.
297
298							=cut
299
300							our $lizard_path = which 'lizard.py' // which 'lizard';
301
302							our %lizard_extension_of_language = (
303							'C' => '.c',
304							'C++' => '.cpp',
305							'JAVA' => '.java',
306							);
307
308							our $lizard_warning_loc = 70;
309							our $lizard_error_loc = 140;
310							our $lizard_warning_ccn = 15;
311							our $lizard_error_ccn = 25;
312
313							# call each of these functions with line number as argument
314	0			0	0	0	sub warn_function_is_long { [warning => "This function is over $lizard_warning_loc lines long [long-function]", shift, 0] }
315	0			0	0	0	sub warn_function_is_very_long { [error => "This function is over $lizard_error_loc lines long [very-long-function]", shift, 0] }
316	0			0	0	0	sub warn_function_is_complex { [warning => "Cyclomatic complexity of this function is over $lizard_warning_ccn \[high-complexity]", shift, 0] }
317	0			0	0	0	sub warn_function_is_very_complex { [error => "Cyclomatic complexity of this function is over $lizard_error_ccn \[very-high-complexity]", shift, 0] }
318
319							sub test_lizard {
320	3			3	1	7	my %args = @_;
321	3	50				10	return unless $lizard_path;
322	0					0	my $extension = $lizard_extension_of_language{uc $args{language}};
323	0	0				0	return unless defined $extension;
324
325	0					0	my $fh = write_code_to_file $args{code}, $extension;
326
327	0					0	my $output = qx,$lizard_path --csv -i -1 -m -V $fh 2>/dev/null,;
328	0	0				0	if ($?) { # we expect lizard to always exit 0, unless there is a problem
329	0					0	carp "Failed to run $lizard_path, \$? is $?";
330							return
331	0					0	}
332
333	0					0	my $csv = csv in => \$output, headers => 'auto';
334	0	0				0	unless ($csv) {
335	0					0	carp 'Failed to parse output of lizard.py as CSV';
336							return
337	0					0	}
338
339	0					0	my @warnings;
340	0					0	for my $line (@$csv) {
341	0					0	my %line = %$line;
342	0					0	diag "$line{function} has $line{NLOC} loc and $line{CCN} ccn";
343							push @warnings,
344							$line{NLOC} > $lizard_error_loc ? warn_function_is_very_long $line{start} :
345	0	0				0	$line{NLOC} > $lizard_warning_loc ? warn_function_is_long $line{start} : ();
		0
346							push @warnings,
347							$line{CCN} > $lizard_error_ccn ? warn_function_is_very_complex $line{start} :
348	0	0				0	$line{CCN} > $lizard_warning_ccn ? warn_function_is_complex $line{start} : ();
		0
349							}
350
351							\@warnings
352	0					0	}
353
354							=head3 analyse_code
355
356							B runs every test above on the code, producing a
357							combined list of warnings. It fails (returns undef) if all tests fail.
358							The tests run by B are those in the global variable
359							C<@all_tests>, which is a list of coderefs.
360
361							=cut
362
363							our @all_tests = (
364							\&test_lines,
365							\&test_clang_tidy,
366							\&test_lizard,
367							);
368
369							sub analyse_code {
370							# arguments/return value are identical to those of individual tests
371	3			3	1	1034	my @test_args = @_;
372	3					6	my @test_results = map { $_->(@test_args) } @all_tests;
	9					18
373							reduce {
374							# $a accumulates warnings so far, $b are warnings from current test
375	0	0		0		0	return $b unless defined $a;
376	0	0				0	push @$a, @$b if defined $b;
377	0					0	$a
378	3					20	} @test_results;
379							}
380
381							=head2 Star rating
382
383							B(I<$warnings>) is a subroutine that takes
384							the output of a test and computes the star rating as an integer. The
385							rating is undef if the test failed, 1 if the test returned at least
386							one error, 2 if the test returned at least one warning but no errors,
387							and 3 otherwise. So a program gets 3 stars if it only raises
388							informational messages, or no messages at all.
389
390							=cut
391
392							sub star_rating_of_warnings {
393	8			8	0	24	my ($warnings) = @_;
394	8	50				15	return unless defined $warnings;
395	8	100		4		24	return 1 if any { $_->[0] eq 'error' } @$warnings;
	4					10
396	7	100		3		25	return 2 if any { $_->[0] eq 'warning' } @$warnings;
	3					8
397	4					11	return 3;
398							}
399
400							1;
401							__END__