File Coverage

blib/lib/Text/FindIndent.pm
Criterion Covered Total %
statement 185 224 82.5
branch 113 162 69.7
condition 37 61 60.6
subroutine 12 14 85.7
pod 2 2 100.0
total 349 463 75.3


line stmt bran cond sub pod time code
1             package Text::FindIndent;
2             # -*- mode: Perl -*-
3             # Emacs mode is necessary for https://github.com/github/linguist/issues/2458
4              
5 2     2   24927 use 5.00503;
  2         7  
6 2     2   12 use strict;
  2         3  
  2         53  
7              
8 2     2   18 use vars qw{$VERSION};
  2         4  
  2         109  
9             BEGIN {
10 2     2   40 $VERSION = '0.11';
11             }
12              
13 2     2   9 use constant MAX_LINES => 500;
  2         3  
  2         7059  
14              
15             sub parse {
16 21     21 1 23595 my $class = shift;
17 21         34 my $text = shift;
18              
19 21         62 my %opts = @_;
20 21 50       62 my $textref = ref($text) ? $text : \$text; # accept references, too
21              
22 21         37 my $skip_pod = $opts{skip_pod};
23 21 50       52 my $first_level_indent_only = $opts{first_level_indent_only}?1:0;
24              
25 21         32 my %modeline_settings;
26              
27             my %indentdiffs;
28 21         28 my $lines = 0;
29 21         30 my $prev_indent = undef;
30 21         26 my $skip = 0;
31 21         27 my $in_pod = 0;
32              
33             # Do we have emacs smart comments?
34 21         68 $class->_check_emacs_local_variables_at_file_end($textref, \%modeline_settings);
35 21 50 33     160 if (exists $modeline_settings{softtabstop} and exists $modeline_settings{usetabs}) {
    50 66        
    100 66        
36             $modeline_settings{mixedmode} = $modeline_settings{usetabs}
37 0 0       0 if not defined $modeline_settings{mixedmode};
38             return(
39             ($modeline_settings{mixedmode} ? "m" : "s")
40             . $modeline_settings{softtabstop}
41 0 0       0 );
42             }
43             elsif (exists $modeline_settings{tabstop} and $modeline_settings{usetabs}) {
44 0 0       0 return( ($modeline_settings{mixedmode} ? "m" : "t") . $modeline_settings{tabstop} );
45             }
46             elsif (exists $modeline_settings{tabstop} and exists $modeline_settings{usetabs}) {
47 1         6 return( "s" . $modeline_settings{tabstop} );
48             }
49              
50 20         25 my $next_line_braces_pos_plus_1;
51 20         23 my $prev_indent_type = undef;
52 20         91 while ($$textref =~ /\G([ \t]*)([^\r\n]*)[\r\n]+/cgs) {
53 404         728 my $ws = $1;
54 404         623 my $rest = $2;
55 404         603 my $fullline = "$ws$rest";
56 404         444 $lines++;
57            
58             # check emacs start line stuff with some slack (shebang)
59 404         434 my $changed_modelines;
60 404 100       811 if ($lines < 3) {
61 37         102 $changed_modelines = $class->_check_emacs_local_variables_first_line($fullline, \%modeline_settings);
62             }
63              
64             # Do we have emacs smart comments?
65             # ==> Done once at start
66             #$class->_check_emacs_local_variables($fullline, \%modeline_settings);
67              
68             # Do we have vim smart comments?
69 404 100 100     865 if ($class->_check_vim_modeline($fullline, \%modeline_settings) || $changed_modelines) {
70 10 100 66     79 if (exists $modeline_settings{softtabstop} and exists $modeline_settings{usetabs}) {
    100 66        
    100 66        
71             $modeline_settings{mixedmode} = $modeline_settings{usetabs}
72 2 50       8 if not defined $modeline_settings{mixedmode};
73             return(
74             ($modeline_settings{mixedmode} ? "m" : "s")
75             . $modeline_settings{softtabstop}
76 2 100       19 );
77             }
78             elsif (exists $modeline_settings{tabstop} and $modeline_settings{usetabs}) {
79 2 100       17 return( ($modeline_settings{mixedmode} ? "m" : "t") . $modeline_settings{tabstop} );
80             }
81             elsif (exists $modeline_settings{tabstop} and exists $modeline_settings{usetabs}) {
82 3         19 return( "s" . $modeline_settings{tabstop} );
83             }
84             }
85              
86 397 50       775 if ($lines > MAX_LINES) {
87 0         0 next;
88             }
89              
90 397 50       734 if ($skip) {
91 0         0 $skip--;
92 0         0 next;
93             }
94              
95 397 100 100     1025 if ($skip_pod and $ws eq '' and substr($rest, 0, 1) eq '=') {
      100        
96 4 100 66     30 if (not $in_pod and $rest =~ /^=(?:head\d|over|item|back|pod|begin|for|end)/ ) {
    50 33        
97 2         3 $in_pod = 1;
98             }
99             elsif ($in_pod and $rest =~ /^=cut/) {
100 2         4 $in_pod = 0;
101             }
102              
103             }
104 397 100 100     1554 next if $in_pod or $rest eq '';
105              
106 352 100       639 if ($ws eq '') {
107 99         109 $prev_indent = $ws;
108 99         364 next;
109             }
110              
111             # skip next line if the last char is a backslash.
112             # Doesn't matter for Perl, but let's be generous!
113 253 50       502 $skip = 1 if $rest =~ /\\$/;
114            
115             # skip single-line comments
116 253 50       623 next if $rest =~ /^(?:#|\/\/|\/\*)/; # TODO: parse /* ... */!
117              
118 253 100       408 if ($next_line_braces_pos_plus_1) {
119 10 100       20 if ($next_line_braces_pos_plus_1==_length_with_tabs_converted($ws)) {
120 8         32 next;
121             }
122 2         3 $next_line_braces_pos_plus_1=0;
123             } else {
124 243 100       521 if ($rest =~ /=> \{$/) { #handle case where hash keys and values are indented by braces pos + 1
125 2         6 $next_line_braces_pos_plus_1=_length_with_tabs_converted($ws)+length($rest);
126             }
127             }
128              
129 245 50 33     532 if ($first_level_indent_only and $prev_indent ne '') {
130 0         0 next;
131             }
132              
133 245 100       500 if ($prev_indent eq $ws) {
134 70 50       127 if ($prev_indent_type) {
135 70         117 $indentdiffs{$prev_indent_type}+=0.01;
136             #coefficient is not based on data, so change if you think it should be different
137             }
138 70         282 next;
139             }
140              
141             # prefix-matching higher indentation level
142 175 100       2095 if ($ws =~ /^\Q$prev_indent\E(.+)$/) {
143 75         128 my $diff = $1;
144 75         115 my $indent_type=_analyse_indent_diff($diff);
145 75         156 $indentdiffs{$indent_type}++;
146 75         90 $prev_indent_type=$indent_type;
147 75         87 $prev_indent = $ws;
148 75         324 next;
149             }
150              
151             # prefix-matching lower indentation level
152 100 100       929 if ($prev_indent =~ /^\Q$ws\E(.+)$/) {
153 42         73 my $diff = $1;
154             #_grok_indent_diff($diff, \%indentdiffs);
155 42         72 my $indent_type=_analyse_indent_diff($diff);
156 42         87 $indentdiffs{$indent_type}++;
157 42         50 $prev_indent_type=$indent_type;
158 42         57 $prev_indent = $ws;
159 42         184 next;
160             }
161              
162             # at this point, we're desperate!
163 58         91 my $prev_spaces = $prev_indent;
164 58         215 $prev_spaces =~ s/[ ]{0,7}\t/ /g;
165 58         86 my $spaces = $ws;
166 58         196 $spaces =~ s/[ ]{0,7}\t/ /g;
167 58         86 my $len_diff = length($spaces) - length($prev_spaces);
168 58 50       126 if ($len_diff) {
169 58         68 $len_diff = abs($len_diff);
170 58         117 $indentdiffs{"m$len_diff"}++;
171             }
172 58         256 $prev_indent = $ws;
173            
174             } # end while lines
175              
176             # nothing found
177 13 100       43 return 'u' if not keys %indentdiffs;
178              
179 12         16 my $max = 0;
180 12         14 my $maxkey = undef;
181 12         41 while (my ($key, $value) = each %indentdiffs) {
182 27 100       116 $maxkey = $key, $max = $value if $value > $max;
183             }
184              
185 12 100       36 if ($maxkey =~ /^s(\d+)$/) {
186 8         20 my $mixedkey = "m" . $1;
187 8         12 my $mixed = $indentdiffs{$mixedkey};
188 8 100 66     31 if (defined($mixed) and $mixed >= $max * 0.2) {
189 3         6 $maxkey = $mixedkey;
190             }
191             }
192              
193             # fallback to emacs styles which are guesses only
194 12         20 foreach my $key (qw(softtabstop tabstop usetabs)) {
195 36 100 100     169 if (not exists $modeline_settings{$key}
196             and exists $modeline_settings{"style_$key"}) {
197 6         16 $modeline_settings{$key} = $modeline_settings{"style_$key"};
198             }
199             }
200              
201 12 100       37 if (exists $modeline_settings{softtabstop}) {
    100          
202 3         19 $maxkey =~ s/\d+/$modeline_settings{softtabstop}/;
203             }
204             elsif (exists $modeline_settings{tabstop}) {
205 1         6 $maxkey =~ s/\d+/$modeline_settings{tabstop}/;
206             }
207 12 100       31 if (exists $modeline_settings{usetabs}) {
208 2 50       7 if ($modeline_settings{usetabs}) {
209 2 100       8 $maxkey =~ s/^(.)(\d+)$/$1 eq 'u' ? "t8" : ($2 == 8 ? "t8" : "m$2")/e;
  2 50       13  
210             }
211             else {
212 0         0 $maxkey =~ s/^./m/;
213             }
214             }
215              
216             # mixedmode explicitly asked for
217 12 50       24 if ($modeline_settings{mixedmode}) {
218 0         0 $maxkey =~ s/^./m/;
219             }
220              
221 12         64 return $maxkey;
222             }
223              
224             sub _length_with_tabs_converted {
225 12     12   16 my $str=shift;
226 12   50     39 my $tablen=shift || 8;
227 12         40 $str =~ s/( +)$//;
228 12   50     38 my $trailing_spaces = $1||'';
229 12         14 $str =~ s/ +//g; # assume the spaces are all contained in tabs!
230 12         38 return length($str)*$tablen+length($trailing_spaces);
231             }
232              
233             sub _grok_indent_diff {
234 0     0   0 my $diff = shift;
235 0         0 my $indentdiffs = shift;
236              
237 0 0       0 if ($diff =~ /^ +$/) {
    0          
238 0         0 $indentdiffs->{"s" . length($diff)}++;
239             }
240             elsif ($diff =~ /^\t+$/) {
241 0         0 $indentdiffs->{"t8"}++; # we can't infer what a tab means. Or rather, we need smarter code to do it
242             }
243             else { # mixed!
244 0         0 $indentdiffs->{"m" . _length_with_tabs_converted($diff)}++;
245             }
246             }
247              
248             sub _analyse_indent_diff {
249 117     117   170 my $diff = shift;
250              
251 117 100       339 if ($diff =~ /^ +$/) {
    50          
252 102         263 return "s" . length($diff);
253             }
254             elsif ($diff =~ /^\t+$/) {
255 15         29 return "t8"; # we can't infer what a tab means. Or rather, we need smarter code to do it
256             }
257             else { # mixed!
258 0         0 return "m" . _length_with_tabs_converted($diff);
259             }
260             }
261              
262             {
263             # the vim modeline regexes
264             my $VimTag = qr/(?:ex|vim?(?:[<=>]\d+)?):/;
265             my $OptionArg = qr/[^\s\\]*(?:\\[\s\\][^\s\\]*)*/;
266             my $VimOption = qr/
267             \w+(?:=)?$OptionArg
268             /xo;
269              
270             my $VimModeLineStart = qr/(?:^|\s+)$VimTag/o;
271              
272             # while technically, we match against $VimModeLineStart before,
273             # IF there is a vim modeline, we don't need to optimize
274             my $VimModelineTypeOne = qr/
275             $VimModeLineStart
276             \s*
277             ($VimOption
278             (?:
279             (?:\s*:\s*|\s+)
280             $VimOption
281             )*
282             )
283             \s*$
284             /xo;
285            
286             my $VimModelineTypeTwo = qr/
287             $VimModeLineStart
288             \s*
289             set?\s+
290             ($VimOption
291             (?:\s+$VimOption)*
292             )
293             \s*
294             :
295             /xo;
296              
297             sub _check_vim_modeline {
298 404     404   557 my $class = shift;
299 404         564 my $line = shift;
300 404         491 my $settings = shift;
301              
302             # Quoting the vim docs:
303             # There are two forms of modelines. The first form:
304             # [text]{white}{vi:|vim:|ex:}[white]{options}
305             #
306             #[text] any text or empty
307             #{white} at least one blank character ( or )
308             #{vi:|vim:|ex:} the string "vi:", "vim:" or "ex:"
309             #[white] optional white space
310             #{options} a list of option settings, separated with white space or ':',
311             # where each part between ':' is the argument for a ":set"
312             # command (can be empty)
313             #
314             #Example:
315             # vi:noai:sw=3 ts=6 ~
316             # The second form (this is compatible with some versions of Vi):
317             #
318             # [text]{white}{vi:|vim:|ex:}[white]se[t] {options}:[text]
319             #
320             #[text] any text or empty
321             #{white} at least one blank character ( or )
322             #{vi:|vim:|ex:} the string "vi:", "vim:" or "ex:"
323             #[white] optional white space
324             #se[t] the string "set " or "se " (note the space)
325             #{options} a list of options, separated with white space, which is the
326             # argument for a ":set" command
327             #: a colon
328             #[text] any text or empty
329             #
330             #Example:
331             # /* vim: set ai tw=75: */ ~
332             #
333            
334 404         415 my @options;
335 404 100       3126 return if $line !~ $VimModeLineStart;
336              
337 6 100       600 if ($line =~ $VimModelineTypeOne) {
    50          
338 2         12 push @options, split /(?!<\\)[:\s]+/, $1;
339             }
340             elsif ($line =~ $VimModelineTypeTwo) {
341 4         22 push @options, split /(?!<\\)\s+/, $1;
342             }
343             else {
344 0         0 return;
345             }
346              
347 6 50       18 return if not @options;
348              
349 6         8 my $changed = 0;
350 6         26 foreach (@options) {
351 14 100       48 /s(?:ts|ofttabstop)=(\d+)/i and $settings->{softtabstop} = $1, $changed = 1, next;
352 11 100       45 /t(?:s|abstop)=(\d+)/i and $settings->{tabstop} = $1, $changed = 1, next;
353 7 100 33     56 /((?:no)?)(?:expandtab|et)/i and $settings->{usetabs} = (defined $1 and $1 =~ /no/i ? 1 : 0), $changed = 1, next;
354             }
355 6         24 return $changed;
356             }
357             }
358              
359              
360              
361              
362             {
363             # lookup for emacs tab modes
364             my %tabmodelookup = (
365             t => sub {
366             $_[0]->{mixedmode} = 1;
367             $_[0]->{usetabs} = 1;
368             },
369             nil => sub {
370             delete $_[0]->{mixedmode};
371             $_[0]->{usetabs} = 0;
372             },
373             );
374              
375             # lookup for emacs styles
376             my %stylelookup = (
377             kr => sub {
378             $_[0]->{style_softtabstop} = 4;
379             $_[0]->{style_tabstop} = 8;
380             $_[0]->{style_usetabs} = 1;
381             },
382             linux => sub {
383             $_[0]->{style_softtabstop} = 8;
384             $_[0]->{style_tabstop} = 8;
385             $_[0]->{style_usetabs} = 1;
386             },
387             'gnu' => sub {
388             $_[0]->{style_softtabstop} = 2;
389             $_[0]->{style_tabstop} = 8;
390             $_[0]->{style_usetabs} = 1;
391             },
392             'bsd' => sub {
393             $_[0]->{style_softtabstop} = 4;
394             $_[0]->{style_tabstop} = 8;
395             $_[0]->{style_usetabs} = 1;
396             },
397             'ellemtel' => sub {
398             $_[0]->{style_softtabstop} = 3;
399             $_[0]->{style_tabstop} = 3;
400             $_[0]->{style_usetabs} = 0;
401             },
402             'java' => sub {
403             $_[0]->{style_softtabstop} = 4;
404             $_[0]->{style_tabstop} = 8;
405             },
406             );
407             $stylelookup{'k&r'} = $stylelookup{kr};
408             $stylelookup{'bsd'} = $stylelookup{kr};
409             $stylelookup{'whitesmith'} = $stylelookup{kr};
410             $stylelookup{'stroustrup'} = $stylelookup{kr};
411              
412             my $FirstLineVar = qr/[^\s:]+/;
413             my $FirstLineValue = qr/[^;]+/; # dumb
414             my $FirstLinePair = qr/\s*$FirstLineVar\s*:\s*$FirstLineValue;/o;
415             my $FirstLineRegexp = qr/-\*-\s*mode:\s*[^\s;]+;\s*($FirstLinePair+)\s*-\*-/o;
416            
417            
418             sub _check_emacs_local_variables_first_line {
419 37     37   52 my $class = shift;
420 37         54 my $line = shift;
421 37         47 my $settings = shift;
422              
423             # on first line (second if there is a shebang):
424             # -*- mode: $MODENAME; $VARNAME: $VALUE; ... -*-
425             # ($FOO is not a literal)
426             # Example with a Lisp comment:
427             # ;; -*- mode: Lisp; fill-column: 75; comment-column: 50; -*-
428              
429              
430 37         44 my $changed = 0;
431 37 100       180 if ($line =~ $FirstLineRegexp) {
432 4         28 my @pairs = split /\s*;\s*/, $1;
433 4         8 foreach my $pair (@pairs) {
434 7         34 my ($key, $value) = split /\s*:\s*/, $pair, 2;
435 7 100       33 if ($key eq 'tab-width') {
    100          
    100          
    50          
436 1         3 $settings->{tabstop} = $value;# FIXME: check var
437 1         3 $changed = 1;
438             }
439             elsif ($key eq 'indent-tabs-mode') {
440 3 50       15 $tabmodelookup{$value}->($settings), $changed = 1 if defined $tabmodelookup{$value};
441             }
442             elsif ($key eq 'c-basic-offset') {
443 2   33     11 $settings->{tabstop} ||= $value; # tab-width takes precedence!?
444 2         5 $changed = 1;
445             }
446             elsif ($key eq 'style') { # this is quite questionable practice...
447 1 50       8 $stylelookup{$value}->($settings), $changed = 1 if defined $stylelookup{$value};
448             }
449             }
450             }
451              
452             # do this only as a LAST resort!
453             #$settings->{tabstop} = $settings->{style_tabstop} if not exists $settings->{tabstop};
454             #$settings->{softtabstop} = $settings->{style_softtabstop} if not exists $settings->{softtabstop};
455             #$settings->{usetabs} = $settings->{style_usetabs} if not exists $settings->{usetabs};
456              
457 37         75 return $changed;
458             }
459              
460             sub _check_emacs_local_variables {
461 513     513   792 my $class = shift;
462 513         906 my $line = shift;
463 513         635 my $settings = shift;
464              
465             # A local variables list goes near the end of the file, in the last page.[...]
466             # The local variables list starts with a line containing the string `Local Variables:',
467             # and ends with a line containing the string `End:'. In between come the variable names
468             # and values, one set per line, as `variable: value'. The values are not evaluated;
469             # they are used literally. If a file has both a local variables list and a `-*-'
470             # line, Emacs processes everything in the `-*-' line first, and everything in the
471             # local variables list afterward.
472             #
473             # Here is an example of a local variables list:
474             #
475             # ;; Local Variables: **
476             # ;; mode:lisp **
477             # ;; comment-column:0 **
478             # ;; comment-start: ";; " **
479             # ;; comment-end:"**" **
480             # ;; End: **
481             #
482             # Each line starts with the prefix `;; ' and each line ends with the suffix ` **'.
483             # Emacs recognizes these as the prefix and suffix based on the first line of the
484             # list, by finding them surrounding the magic string `Local Variables:'; then it
485             # automatically discards them from the other lines of the list.
486             #
487             # The usual reason for using a prefix and/or suffix is to embed the local variables
488             # list in a comment, so it won't confuse other programs that the file is intended as
489             # input for. The example above is for a language where comment lines start with `;; '
490             # and end with `**'; the local values for comment-start and comment-end customize the
491             # rest of Emacs for this unusual syntax. Don't use a prefix (or a suffix) if you don't need one.
492             #
493             #
494             # Can it be any more annoying to parse? --Steffen
495              
496 513 100       3024 if ($settings->{in_local_variables_section}) {
    100          
497 5         9 my $prefix = $settings->{local_variable_prefix};
498 5 50       13 $prefix = '' if not defined $prefix;
499 5         8 $prefix = quotemeta($prefix);
500 5         8 my $suffix = $settings->{local_variable_suffix};
501 5 50       11 $suffix = '' if not defined $suffix;
502 5         6 $suffix = quotemeta($suffix);
503              
504 5 100       67 if ($line =~ /^\s*$prefix\s*([^\s:]+):\s*(.+)$suffix\s*$/) {
505 3         6 my $key = $1;
506 3         6 my $value = $2;
507 3         7 $value =~ s/\s+$//;
508 3 50       14 if ($key eq 'tab-width') {
    100          
    100          
    50          
509 0         0 $settings->{tabstop} = $value;
510             }
511             elsif ($key eq 'indent-tabs-mode') {
512 1 50       7 $tabmodelookup{$value}->($settings) if defined $tabmodelookup{$value};
513             }
514             elsif ($key eq 'c-basic-offset') {
515 1   33     71 $settings->{tabstop} ||= $value; # tab-width takes precedence!?
516             }
517             elsif ($key eq 'style') { # this is quite questionable practice...
518 1 50       7 $stylelookup{$value}->($settings) if defined $stylelookup{$value};
519             }
520             } # end if variable line
521             else {
522 2         4 delete $settings->{in_local_variables_section};
523 2         3 delete $settings->{local_variable_prefix};
524 2         9 delete $settings->{local_variable_suffix};
525             }
526             }
527             elsif ($line =~ /^\s*(\S*)\s*Local Variables:\s*(\S*)\s*$/) {
528 2         8 $settings->{local_variable_prefix} = $1;
529 2         4 $settings->{local_variable_suffix} = $2;
530 2         11 $settings->{in_local_variables_section} = 1;
531             }
532             }
533              
534             sub _check_emacs_local_variables_at_file_end {
535 21     21   35 my $class = shift;
536 21         32 my $textref = shift;
537 21         33 my $settings = shift;
538 21         26 my $len = length($$textref);
539 21         34 my $start = $len-3000;
540 21 50       55 $start = 0 if $start < 0;
541 21         64 my $text = substr($$textref, $start);
542              
543 21         129 while ($text =~ /\G[ \t]*([^\r\n]*)[\r\n]+/cgs) {
544 513         1056 $class->_check_emacs_local_variables($1, $settings);
545             }
546 21         51 return;
547             }
548             } # end lexical block for emacs lookups
549              
550              
551             sub to_vim_commands {
552 0     0 1   my $indent = shift;
553 0 0         $indent = shift if $indent eq __PACKAGE__;
554 0 0 0       $indent = __PACKAGE__->parse($indent) if ref($indent) or length($indent) > 5;
555              
556 0           my @cmd;
557 0 0         if ( $indent =~ /^t(\d+)/ ) {
    0          
    0          
558 0           my $chars = $1;
559 0           push @cmd, ":set shiftwidth=$chars";
560 0           push @cmd, ":set tabstop=$chars";
561 0           push @cmd, ":set softtabstop=0";
562 0           push @cmd, ":set noexpandtab";
563             } elsif ( $indent =~ /^s(\d+)/ ) {
564 0           my $spaces = $1;
565 0           push @cmd, ":set shiftwidth=$spaces";
566 0           push @cmd, ":set tabstop=8";
567 0           push @cmd, ":set softtabstop=$spaces";
568 0           push @cmd, ':set expandtab';
569             } elsif ( $indent =~ /^m(\d+)/ ) {
570 0           my $spaces = $1;
571 0           push @cmd, ":set shiftwidth=$spaces";
572 0           push @cmd, ":set tabstop=8";
573 0           push @cmd, ":set softtabstop=$spaces";
574 0           push @cmd, ':set noexpandtab';
575             }
576 0           return @cmd;
577             }
578              
579             1;
580              
581             __END__