File Coverage

blib/lib/C/Scan.pm
Criterion Covered Total %
statement 319 452 70.5
branch 82 184 44.5
condition 20 43 46.5
subroutine 32 35 91.4
pod 0 24 0.0
total 453 738 61.3


line stmt bran cond sub pod time code
1             package C::Scan;
2              
3             require Exporter;
4 1     1   1010 use Config '%Config';
  1         2  
  1         51  
5 1     1   6 use File::Basename;
  1         2  
  1         133  
6 1     1   1061 use Data::Flow qw(0.05);
  1         3330  
  1         131  
7 1     1   6 use strict; # Earlier it catches ISA and EXPORT.
  1         1  
  1         8264  
8              
9             @C::Scan::ISA = qw(Exporter Data::Flow);
10              
11             # Items to export into callers namespace by default. Note: do not export
12             # names by default without a very good reason. Use EXPORT_OK instead.
13             # Do not simply export all your public functions/methods/constants.
14              
15             @C::Scan::EXPORT = qw(
16             );
17             @C::Scan::EXPORT_OK = qw(
18             );
19             # this flag tells cpp to only output macros
20             $C::Scan::MACROS_ONLY = '-dM';
21              
22             $C::Scan::VERSION = '0.74';
23              
24             my (%keywords,%style_keywords);
25             for (qw(asm auto break case char continue default do double else enum
26             extern float for fortran goto if int long register return short
27             sizeof static struct switch typedef union unsigned signed while void)) {
28             $keywords{$_}++;
29             }
30             for (qw(bool class const delete friend inline new operator overload private
31             protected public virtual)) {
32             $style_keywords{'C++'}{$_}++;
33             }
34             for (qw(__func__ _Complex _Imaginary _Bool inline restrict)) {
35             $style_keywords{'C9X'}{$_}++;
36             }
37             for (qw(inline const asm noreturn format section
38             constructor destructor unused weak)) {
39             $style_keywords{'GNU'}{$_}++;
40             $style_keywords{'GNU'}{"__$ {_}__"}++;
41             }
42             $style_keywords{'GNU'}{__attribute__}++;
43             $style_keywords{'GNU'}{__extension__}++;
44             $style_keywords{'GNU'}{__consts}++;
45             $style_keywords{'GNU'}{__const}++;
46              
47             my $recipes
48             = { Defines => { default => '' },
49             cppstdin => { default => $Config{cppstdin} },
50             cppflags => { default => $Config{cppflags} },
51             cppminus => { default => $Config{cppminus} },
52             c_styles => { default => [qw(C++ GNU C9X)] },
53             add_cppflags => { default => '' },
54             keywords => { prerequisites => ['c_styles'],
55             output => sub {
56             my %kw = %keywords;
57             my %add;
58             for ( @{ shift->{c_styles} } ) {
59             %add = %{ $style_keywords{$_} };
60             %kw = (%kw, %add);
61             }
62             \%kw;
63             }, },
64             'undef' => { default => undef },
65             filename_filter => { default => undef },
66             full_text => { class_filter => [ 'text', 'C::Preprocessed',
67             qw(undef filename Defines includeDirs Cpp)] },
68             text => { class_filter => [ 'text', 'C::Preprocessed',
69             qw(filename_filter filename Defines includeDirs Cpp)] },
70             text_only_from => { class_filter => [ 'text_only_from', 'C::Preprocessed',
71             qw(filename_filter filename Defines includeDirs Cpp)] },
72             includes => { filter => [ \&includes,
73             qw(filename Defines includeDirs Cpp) ], },
74             includeDirs => { prerequisites => ['filedir'],
75             output => sub {
76             my $data = shift;
77             [ $data->{filedir}, '/usr/local/include', '.'];
78             } },
79             Cpp => { prerequisites => [qw(cppminus add_cppflags cppflags cppstdin)],
80             output => sub {
81             my $data = shift;
82             return { cppstdin => $data->{cppstdin},
83             cppflags => "$data->{cppflags} $data->{add_cppflags}",
84             cppminus => $data->{cppminus} };
85             } },
86             filedir => { output => sub { dirname ( shift->{filename} || '.' ) } },
87             sanitized => { filter => [ \&sanitize, 'text'], },
88             toplevel => { filter => [ \&top_level, 'sanitized'], },
89             full_sanitized => { filter => [ \&sanitize, 'full_text'], },
90             full_toplevel => { filter => [ \&top_level, 'full_sanitized'], },
91             no_type_decl => { filter => [ \&remove_type_decl, 'toplevel'], },
92             typedef_chunks => { filter => [ \&typedef_chunks, 'full_toplevel'], },
93             typedefs_maybe => { filter => [ sub {[keys %{+shift}]}, 'typedef_hash'], },
94             typedefs_whited => { filter => [ \&typedefs_whited,
95             'full_sanitized', 'typedef_chunks',
96             'keywords_rex'], },
97             typedef_texts => { filter => [ \&typedef_texts,
98             'full_text', 'typedef_chunks'], },
99             typedef_hash => { filter => [ \&typedef_hash,
100             'typedef_texts', 'typedefs_whited'], },
101             typedef_structs => { filter => [ \&typedef_structs,
102             'typedef_hash'], },
103             defines_maybe => { filter => [ \&defines_maybe, 'filename'], },
104             defines_no_args => { prerequisites => ['defines_maybe'],
105             output => sub { shift->{defines_maybe}->[0] }, },
106             defines_args => { prerequisites => ['defines_maybe'],
107             output => sub { shift->{defines_maybe}->[1] }, },
108              
109             defines_full => { filter => [ \&defines_full,
110             qw(filename Defines includeDirs Cpp) ], },
111             defines_no_args_full => { prerequisites => ['defines_full'],
112             output => sub { shift->{defines_full}->[0] }, },
113             defines_args_full => { prerequisites => ['defines_full'],
114             output => sub { shift->{defines_full}->[1] }, },
115              
116             decl_inlines => { filter => [ \&functions_in, 'no_type_decl'], },
117             inline_chunks => { filter => [ sub { shift->[0] }, 'decl_inlines'], },
118             inlines => { filter => [ \&from_chunks, 'inline_chunks', 'text'], },
119             decl_chunks => { filter => [ sub { shift->[1] }, 'decl_inlines'], },
120             decls => { filter => [ \&from_chunks, 'decl_chunks', 'text'], },
121             fdecl_chunks => { filter => [ sub { shift->[4] }, 'decl_inlines'], },
122             fdecls => { filter => [ \&from_chunks, 'fdecl_chunks', 'text'], },
123             mdecl_chunks => { filter => [ sub { shift->[2] }, 'decl_inlines'], },
124             mdecls => { filter => [ \&from_chunks, 'mdecl_chunks', 'text'], },
125             vdecl_chunks => { filter => [ sub { shift->[3] }, 'decl_inlines'], },
126             vdecls => { filter => [ \&from_chunks, 'vdecl_chunks', 'text'], },
127             vdecl_hash => { filter => [ \&vdecl_hash, 'vdecls', 'mdecls' ], },
128             parsed_fdecls => { filter => [ \&do_declarations, 'fdecls',
129             'typedef_hash', 'keywords'], },
130             keywords_rex => { filter => [ sub { my @k = keys %{ shift() };
131             local $" = '|';
132             my $r = "(?:@k)";
133             eval 'qr/$r/' or $r # Older Perls
134             }, 'keywords'], },
135             };
136              
137             sub from_chunks {
138 4     4 0 136 my $chunks = shift;
139 4         8 my $txt = shift;
140 4         8 my @out;
141 4         13 my $i = 0;
142 4         28 while ($i < @$chunks) {
143 1         6 push @out, substr $txt, $chunks->[$i], $chunks->[ $i + 1 ] - $chunks->[$i];
144 1         5 $i += 2;
145             }
146 4         43 \@out;
147             }
148              
149             #sub process { request($recipes, @_) }
150             # Preloaded methods go here.
151              
152             sub includes {
153 2     2 0 29 my %seen;
154 2 50       18 my $stream = new C::Preprocessed (@_)
155             or die "Cannot open pipe from cppstdin: $!\n";
156              
157 2         46067 while (<$stream>) {
158 536 100       2548 next unless m(^\s*\#\s* # Leading hash
159             (line\s*)? # 1: Optional line
160             ([0-9]+)\s* # 2: Line number
161             (.*) # 3: The rest
162             )x;
163 110         310 my $include = $3;
164 110 50       746 $include = $1 if $include =~ /"(.*)"/; # Filename may be in quotes
165 110 50       715 $include =~ s,\\\\,/,g if $^O eq 'os2';
166 110 50       1817 $seen{$include}++ if $include ne "";
167             }
168 2         221 [keys %seen];
169             }
170              
171             sub defines_maybe {
172 1     1 0 225 my $file = shift;
173 1         5 my ($mline,$line,%macros,%macrosargs,$sym,$args);
174 1 50       96 open(C, $file) or die "Cannot open file $file: $!\n";
175 1   66     49 while (not eof(C) and $line = ) {
176             next unless
177 730 50       2981 ( $line =~ s[
178             ^ \s* \# \s* # Start of directive
179             define \s+
180             (\w+) # 1: symbol
181             (?:
182             \( (.*?) \s* \) # 2: Minimal match for arguments
183             # in parenths (without trailing
184             # spaces)
185             )? # optional, no grouping
186             \s* # rest is the definition
187             ([\s\S]*) # 3: the rest
188             ][]x );
189 0         0 ($sym, $args, $mline) = ($1, $2, $3);
190 0   0     0 $mline .= while not eof(C) and $mline =~ s/\\\n/\n/;
191 0         0 chomp $mline;
192             #print "sym: `$sym', args: `$args', mline: `$mline'\n";
193 0 0       0 if (defined $args) {
194 0         0 $macrosargs{$sym} = [ [split /\s*,\s*/, $args], $mline];
195             } else {
196 0         0 $macros{$sym} = $mline;
197             }
198             }
199 1 50       23 close(C) or die "Cannot close file $file: $!\n";
200 1         16 [\%macros, \%macrosargs];
201             }
202              
203             sub defines_full {
204 1     1 0 17 my $Cpp = $_[3];
205 1         2 my ($mline,$line,%macros,%macrosargs,$sym,$args);
206              
207             # save the old cppflags and add the flag for only ouputting macro definitions
208 1         3 my $old_cppstdin = $Cpp->{'cppstdin'};
209 1         5 $Cpp->{'cppstdin'} = $old_cppstdin . " " . $C::Scan::MACROS_ONLY;
210              
211 1 50       8 my $stream = new C::Preprocessed (@_)
212             or die "Cannot open pipe from cppstdin: $!\n";
213              
214 1         22660 while (defined ($line = <$stream>)) {
215             next unless
216 239 50       1310 ( $line =~ s[
217             ^ \s* \# \s* # Start of directive
218             define \s+
219             (\w+) # 1: symbol
220             (?:
221             \( (.*?) \s* \) # 2: Minimal match for arguments
222             # in parenths (without trailing
223             # spaces)
224             )? # optional, no grouping
225             \s* # rest is the definition
226             ([\s\S]*) # 3: the rest
227             ][]x );
228 239         3762 ($sym, $args, $mline) = ($1, $2, $3);
229 239         609 $mline .= <$stream> while ($mline =~ s/\\\n/\n/);
230 239         264 chomp $mline;
231             #print STDERR "sym: `$sym', args: `$args', mline: `$mline'\n";
232 239 100       354 if (defined $args) {
233 10         233 $macrosargs{$sym} = [ [split /\s*,\s*/, $args], $mline];
234             } else {
235 229         1229 $macros{$sym} = $mline;
236             }
237             }
238             # restore the original cppflags
239 1         7 $Cpp->{'cppstdin'} = $old_cppstdin;
240 1         32 [\%macros, \%macrosargs];
241             }
242              
243             # sub nexttypedef {
244             # return unless $_[0] =~ /(\G|^|;)\s*typedef\b/g;
245             # my $start = pos($_[0]) - 7;
246             # nextsemi($_[0]);
247             # my $end = pos $_[0];
248             # # warn "Found `", substr($_[0], $start, $end - $start), "'\n" if $debug;
249             # return $start, $end;
250             # }
251              
252             # sub nextsemi {
253             # my $n = 0;
254             # while ($_[0] =~ /([\(\{\[])|([\]\)\}])|(\;)/g) {
255             # $n++ if defined $1;
256             # $n-- if defined $2;
257             # return if defined $3 and $n == 0;
258             # }
259             # die "No semicolon on the outer level";
260             # }
261              
262             sub typedef_texts {
263 1     1 0 38 my ($txt, $chunks) = (shift, shift);
264 1         7 my ($b, $e, $in, @out);
265 1         34 my @in = @$chunks;
266 1         15 while (($b, $e) = splice @in, 0, 2) {
267 81         171 $in = substr($txt, $b, $e - $b);
268             # remove any remaining directives
269 81         274 $in =~ s/^ ( \s* \# .* ( \\ $ \n .* )* ) / ' ' x length($1)/xgem;
  0         0  
270 81         309 push @out, $in;
271             }
272 1         17 \@out;
273             }
274              
275             sub typedef_hash_old {
276 0     0 0 0 +{ map {($_,1)} map /(\w+)/, @{$_[0]} };
  0         0  
  0         0  
277             }
278              
279             sub typedef_hash {
280 1     1 0 14 my ($typedefs, $whited) = (shift,shift);
281 1         2 my %out;
282              
283             loop:
284 1         5 for my $o (0..$#$typedefs) {
285 81         96 my $wh = $whited->[$o];
286 81         78 my $td = $typedefs->[$o];
287 81 50 33     380 if ($wh =~ /,/ or not $wh =~ /\w/) { # Hard case, guessimates ...
288             # Determine whether the new thingies are inside parens
289 0         0 $wh =~ /,/g;
290 0         0 my $p = pos $wh;
291 0         0 my ($s, $e);
292 0 0       0 if (matchingbrace($wh)) { # Inside. Easy part: just split on /,/...
293 0         0 $e = pos($wh) - 1;
294 0         0 $s = $e;
295 0         0 my $d = 0;
296             # Skip back
297 0         0 while (--$s >= 0) {
298 0         0 my $c = substr $wh, $s, 1;
299 0 0       0 if ($c =~ /[\(\{\[]/) {
    0          
300 0         0 $d--;
301             } elsif ($c =~ /[\)\]\}]/) {
302 0         0 $d++;
303             }
304 0 0       0 last if $d < 0;
305             }
306 0 0       0 if ($s < 0) { # Should not happen
307 0         0 warn("panic: could not match braces in\n\t$td\nwhited as\n\t$wh\n");
308 0         0 next loop;
309             }
310 0         0 $s++;
311             } else { # We are at toplevel
312             # We need to skip back all the modifiers attached to the first thingy
313             # Guesstimates: everything after the first '*' (inclusive)
314 0         0 pos $wh = 0;
315 0         0 $wh = /(?=\w)/g;
316 0         0 my $ws = pos $wh;
317 0         0 my $pre = substr $wh, 0, $ws;
318 0         0 $s = $ws;
319 0 0       0 $s = pos $pre if $pre =~ /(?=\*)/g;
320 0         0 $e = length $wh;
321             }
322             # Now: need to split $td based on commas in $wh!
323             # And need to split each chunk of $td based on word in the chunk of $wh!
324 0         0 my $td_decls = substr($td, $s, $e - $s);
325 0         0 my ($pre, $post) = (substr($td, 0, $s), substr($td, $e));
326 0         0 my $wh_decls = substr($wh, $s, $e - $s);
327 0         0 my @wh_decls = split /,/, $wh_decls;
328 0         0 my $td_s = 0;
329 0         0 my (@td_decl, @td_pre, @td_post, @td_word);
330 0         0 for my $wh_d (@wh_decls) {
331 0         0 my $td_d = substr $td, $td_s, length $wh_d;
332 0         0 push @td_decl, $td_d;
333 0         0 $wh_d =~ /(\w+)/g;
334 0         0 push @td_word, $1;
335 0         0 push @td_post, substr $td_d, pos($wh_d);
336 0         0 push @td_pre, substr $td_d, pos($wh_d) - length $1, length $1;
337 0         0 $td_s += 1 + length $wh_d; # Skip over ','
338             }
339 0         0 for my $i (0..$#wh_decls) {
340 0         0 my $p = "$td_post[$i]$post";
341 0 0       0 $p = '' unless $p =~ /\S/;
342 0         0 $out{$td_word[$i]} = ["$pre$td_pre[$i]", $p];
343             }
344             } else { # Only one thing defined...
345 81         169 $wh =~ /(\w+)/g;
346 81         86 my $e = pos $wh;
347 81         106 my $s = $e - length $1;
348 81         93 my $type = $1;
349 81         96 my $pre = substr $td, 0, $s;
350 81         134 my $post = substr $td, $e, length($td) - $e;
351 81 100       172 $post = '' unless $post =~ /\S/;
352 81         296 $out{$type} = [$pre, $post];
353             }
354             }
355 1         6 \%out;
356             }
357              
358             sub typedef_chunks { # Input is toplevel, output: starts and ends
359 1     1 0 16 my $txt = shift;
360 1         17 pos $txt = 0;
361 1         4 my ($b, $e, @out);
362 1         26 while ($txt =~ /\btypedef\b/g) {
363 81         291 push @out, pos $txt;
364 81         568 $txt =~ /(?=;)|\Z/g;
365 81         557 push @out, pos $txt;
366             }
367 1         18 \@out;
368             }
369              
370             sub typedef_structs {
371 1     1 0 397 my $typehash = shift;
372 1         3 my %structs;
373 1         20 while (my($key, $text) = each %$typehash) {
374 81         245 my $name = parse_struct($text->[0], \%structs);
375 81 100       648 $structs{$key} = defined($name) ? $structs{$name} : undef;
376             }
377 1         14 \%structs;
378             }
379              
380             sub parse_struct {
381 81     81 0 592 my($in, $structs) = @_;
382 81         121 my($b, $e, $chunk, $vars, $struct, $structname);
383 81 100       608 ($structname, $in) = $in =~ /
384             ^ \s* ( (?: struct | union ) (?: \s+ \w+ )? ) \s* { \s* (.*?) \s* } \s* $
385             /gisx or return;
386 3 50       21 $structname .= " _ANON" unless $structname =~ /\s/;
387 3 100       13 $structname .= " 0" if exists $structs->{$structname};
388 3         22 $structname =~ s/(\d+$)/$1 + 1/e while exists $structs->{$structname};
  1         12  
389 3         6 $b = 0;
390 3         49 while ($in =~ /(\{|;|$)/g) {
391 6 50       25 matchingbrace($in), next if $1 eq '{';
392 6         11 $e = pos($in);
393 6 100       26 next if $b == $e;
394 3         13 $chunk = substr($in, $b, $e - $b);
395 3         5 $b = $e;
396 3 50       17 if ($chunk =~ /\G\s*(struct|union).*\}/gs) {
397 0         0 my $term = pos $chunk;
398 0         0 my $name = parse_struct(substr($chunk, 0, $term), $structs);
399 0         0 $vars = parse_vars(join ' ', $name, substr $chunk, $term);
400             } else {
401 3         12 $vars = parse_vars($chunk);
402             }
403 3         18 push @$struct, @$vars;
404             }
405 3         10 $structs->{$structname} = $struct;
406 3         13 $structname;
407             }
408              
409             sub parse_vars {
410 3     3 0 6 my $in = shift;
411 3         7 my($vars, $type, $word, $id, $post);
412 3         27 while ($in =~ /\G\s*([\[;,]|\S+?\b|$)\s*/g) {
413 17         40 $word = $1;
414 17 100 100     131 if ($word eq ';' || $word eq '') {
    50          
    100          
415 6 100       30 next unless defined $id;
416 3 50       8 $type = 'int' unless defined $type; # or is this an error?
417 3         24 push @$vars, [ $type, $post, $id ];
418 3         20 ($type, $post, $id) = (undef, undef, undef);
419             } elsif ($word eq ',') {
420 0 0       0 warn "panic: expecting name before comma in '$in'\n" unless defined $id;
421 0 0       0 $type = 'int' unless defined $type; # or is this an error?
422 0         0 push @$vars, [ $type, $post, $id ];
423 0         0 $type =~ s/[ *]*$//;
424 0         0 $id = undef;
425             } elsif ($word eq '[') {
426 3 50       11 warn "panic: expecting name before '[' in '$in'\n" unless defined $id;
427 3 50       9 $type = 'int' unless defined $type; # or is this an error?
428 3         7 my $b = pos $in;
429 3         10 matchingbrace($in);
430 3         361 $post .= $word . substr $in, $b, pos($in) - $b;
431             } else {
432 8 50       20 if (defined $post) {
433 0         0 warn "panic: not expecting '$word' after array bounds in '$in'\n";
434             } else {
435 8 100       197 $type = join ' ', grep defined, $type, $id if defined $id;
436 8         148 $id = $word;
437             }
438             }
439             }
440 3         12 $vars;
441             }
442              
443             sub vdecl_hash {
444 1     1 0 238 my($vdecls, $mdecls) = @_;
445 1         3 my %vdecl_hash;
446 1         5 for (@$vdecls, @$mdecls) {
447 0 0       0 next if /[()]/; # ignore functions, and function pointers
448 0         0 my $copy = $_;
449 0 0       0 next unless $copy =~ s/^\s*extern\s*//;
450 0         0 my $vars = parse_vars($copy);
451 0         0 $vdecl_hash{$_->[2]} = [ @$_[0, 1] ] for @$vars;
452             }
453 1         13 \%vdecl_hash;
454             }
455              
456             # The output is the list of list of inline chunks and list of
457             # declaration chunks.
458              
459             sub functions_in { # The arg is text without type declarations.
460 1     1 0 38 my $in = shift; # remove_type_decl(top_level(sanitize($txt)));
461             # What remains now consists of variable and function declarations,
462             # and inline functions.
463 1         47 $in =~ /(?=\S)/g;
464 1         8 my ($b, $e, $b1, $e1, @inlines, @decls, @mdecls, @fdecls, @vdecls);
465 1         7 $b = pos $in;
466 1         2 my $chunk;
467 1         25 while ($b != length $in) {
468 1 50       15 $in =~ /;/g or pos $in = $b, $in =~ /.*\S|\Z/g ; # Or last non-space
469 1         4 $e = pos $in;
470 1         6 $chunk = substr $in, $b, $e - $b;
471             # Now subdivide the chunk.
472             #
473             # What we got is one chunk, probably finished by `;'. Whoever, it
474             # may start with several inline functions.
475             #
476             # Note that inline functions contain ( ) { } in the stripped version.
477 1         3 $b1 = 0;
478 1         34 while ($chunk =~ /\(\s*\)\s*\{\s*\}/g) {
479 0         0 $e1 = pos $chunk;
480 0         0 push @inlines, $b + $b1, $b + $e1;
481 0         0 $chunk =~ /(?=\S)/g;
482 0         0 $b1 = pos $chunk;
483 0 0       0 $b1 = length $chunk, last unless defined $b1;
484             }
485 1 50       10 if ($e - $b - $b1 > 0) {
486 1         8 push @decls, $b + $b1, $e;
487 1         12 substr ($chunk, 0, $b1) = '';
488 1 50       9 if ($chunk =~ /,/) { # Contains multiple declarations.
489 0         0 push @mdecls, $b + $b1, $e;
490             } else { # Non-multiple.
491 1         4 my $isvar = 1;
492             # Since leading \s* is not optimized, this is quadratic!
493 1         92 $chunk =~ s{
494             ( ( const
495             | __attribute__ \s* \( \s* \)
496             ) \s* )* ( ; \s* )? \Z # Strip from the end
497             }()x;
498 1         52 $chunk =~ s/\s*\Z//;
499 1 50       12 if ($chunk =~ /\)\Z/) { # Function declaration ends on ")"!
500 1 50 33     24 if ($chunk !~ m{
501             \( .* \( # Multiple parenths
502             }x
503             and $chunk =~ / \w \s* \( /x) { # Most probably pointer to a function?
504 1         3 $isvar = 0;
505             }
506             }
507 1 50       16 if ($isvar) { # Heuristically variable
508 0         0 push @vdecls, $b + $b1, $e;
509             } else {
510 1         5 push @fdecls, $b + $b1, $e;
511             }
512             }
513             }
514 1         9 $in =~ /\G\s*/g ;
515 1         6 $b = pos $in;
516             }
517 1         13 [\@inlines, \@decls, \@mdecls, \@vdecls, \@fdecls];
518             }
519              
520             sub typedefs_whited { # Input is sanitized text, and list of beg/end.
521 1     1 0 20 my @lst = @{$_[1]};
  1         20  
522 1         3 my @out;
523 1         2 my ($b, $e);
524 1         6 while ($b = shift @lst) {
525 81         154 $e = shift @lst;
526 81         272 push @out, whited_decl($_[2], substr $_[0], $b, $e - $b);
527             }
528 1         8 \@out;
529             }
530              
531             # XXXX This is heuristical in many respects...
532             # Recipe: remove all struct-ish chunks. Remove all array specifiers.
533             # Remove GCC attribute specifiers.
534             # What remains may contain function's arguments, old types, and newly
535             # defined types.
536             # Remove function arguments using heuristics methods.
537             # Now out of several words in a row the last one is a newly defined type.
538              
539             sub whited_decl { # Input is sanitized.
540 81     81 0 127 my $keywords_rex = shift;
541 81         169 my $in = shift; # Text of a declaration
542 81         110 my $rest = $in;
543 81         101 my $out = $in; # Whited out $in
544              
545             # Remove all the structs
546 81         257 while ($out =~ /(\b(struct|union|class|enum)(\s+\w+)?\s*\{)/g) {
547 3         8 my $pos_start = pos($out) - length $1;
548              
549 3         7 matchingbrace($out);
550 3         5 my $pos_end = pos $out;
551 3         14 substr($out, $pos_start, $pos_end - $pos_start) =
552             ' ' x ($pos_end - $pos_start);
553 3         16 pos $out = $pos_end;
554             }
555              
556             # Deal with glibc's wierd ass __attribute__ tag. Just dump it.
557             # Maaaybe this should check to see if you're using GCC, but I don't
558             # think so since glibc is nice enough to do that for you. [MGS]
559 81         235 while ( $out =~ m/(\b(__attribute__|attribute)\s*\((?=\s*\())/g ) {
560 9         21 my $att_pos_start = pos($out) - length($1);
561              
562             # Need to figure out where ((..)) ends.
563 9         18 matchingbrace($out);
564 9         11 my $att_pos_end = pos $out;
565              
566             # Remove the __attribute__ tag.
567 9         25 substr($out, $att_pos_start, $att_pos_end - $att_pos_start) =
568             ' ' x ($att_pos_end - $att_pos_start);
569 9         34 pos $out = $att_pos_end;
570             }
571              
572             # Remove arguments of functions (heuristics only).
573             # These things (start) arglist of a declared function:
574             # paren word comma
575             # paren word space non-paren
576             # paren keyword paren
577             # start a list of arguments. (May be "cdecl *myfunc"?) XXXXX ?????
578 81         822 while ( $out =~ /(\(\s*(\w+(,|\s+[^\)\s])|$keywords_rex\s*\)))/g ) {
579 0         0 my $pos_start = pos($out) - length($1);
580 0         0 pos $out = $pos_start + 1;
581 0         0 matchingbrace($out);
582 0         0 substr ($out, $pos_start + 1, pos($out) - 2 - $pos_start)
583             = ' ' x (pos($out) - 2 - $pos_start);
584             }
585             # Remove array specifiers
586 81         148 $out =~ s/(\[[\w\s\+]*\])/ ' ' x length $1 /ge;
  0         0  
587 81         123 my $tout = $out;
588             # Several words in a row cannot be new typedefs, but the last one.
589 81         548 $out =~ s/((\w+\s+)+(?=[^\s,;\[\{\)]))/ ' ' x length $1 /ge;
  78         574  
590 81 50       307 unless ($out =~ /\w/) {
591             # Probably a function-type declaration: typedef int f(int);
592             # Redo scan leaving the last word of the first group of words:
593 0         0 $tout =~ /(\w+\s+)*(\w+)/g;
594 0         0 $out = ' ' x (pos($tout) - length $2)
595             . $2 . ' ' x (length($tout) - pos($tout));
596             # warn "function typedef\n\t'$in'\nwhited-out as\n\t'$out'\n";
597             }
598 81 50       198 warn "panic: length mismatch\n\t'$in'\nwhited-out as\n\t'$out'\n"
599             if length($in) != length $out;
600             # Sanity check
601 81 50       307 warn "panic: multiple types without intervening comma in\n\t$in\nwhited-out as\n\t$out\n"
602             if $out =~ /\w[^\w,]+\w/;
603 81 50       264 warn "panic: no types found in\n\t$in\nwhited-out as\n\t$out\n"
604             unless $out =~ /\w/;
605 81         400 $out
606             }
607              
608             sub matchingbrace {
609             # pos($_[0]) is after the opening brace now
610 44     44 0 74 my $n = 0;
611 44         517 while ($_[0] =~ /([\{\[\(])|([\]\)\}])/g) {
612 210 100       490 $1 ? $n++ : $n-- ;
613 210 100       1907 return 1 if $n < 0;
614             }
615             # pos($_[0]) is after the closing brace now
616 0         0 return; # false
617             }
618              
619             sub remove_Comments_no_Strings { # We expect that no strings are around
620 0     0 0 0 my $in = shift;
621 0         0 $in =~ s,/(/.*|\*[\s\S]*?\*/),,g ; # C and C++
622 0 0       0 die "Unfinished comment" if $in =~ m,/\*, ;
623 0         0 $in;
624             }
625              
626             sub sanitize { # We expect that no strings are around
627 2     2 0 286 my $in = shift;
628             # C and C++, strings and characters
629 2         68 $in =~ s{ / (
630             / .* # C++ style
631             |
632             \* [\s\S]*? \*/ # C style
633             ) # (1)
634             | '((?:[^\\\']|\\.)+)' # (2) Character constants
635             | "((?:[^\\\"]|\\.)*)" # (3) Strings
636             | ( ^ \s* \# .* # (4) Preprocessor
637             ( \\ $ \n .* )* ) # and continuation lines
638             } {
639             # We want to preserve the length, so that one may go back
640 110 0       2342 defined $1 ? ' ' x (1 + length $1) :
    0          
    50          
    50          
641             defined $4 ? ' ' x length $4 :
642             defined $2 ? "'" . ' ' x length($2) . "'" :
643             defined $3 ? '"' . ' ' x length($3) . '"' : '???'
644             }xgem ;
645 2 50       53 die "Unfinished comment" if $in =~ m{ /\* }x;
646 2         54 $in;
647             }
648              
649             sub top_level { # We expect argument is sanitized
650             # Note that this may remove the variable in declaration: int (*func)();
651 3     3 0 53 my $in = shift;
652 3         11 my $start;
653 3         19 my $out = $in;
654 3         40 while ($in =~ /[\[\{\(]/g ) {
655 28         51 $start = pos $in;
656 28         91 matchingbrace($in);
657 28         247 substr($out, $start, pos($in) - 1 - $start)
658             = ' ' x (pos($in) - 1 - $start);
659             }
660 3         47 $out;
661             }
662              
663             sub remove_type_decl { # We suppose that the arg is top-level only.
664 1     1 0 312 my $in = shift;
665 1         26 $in =~ s/(\b__extension__)(\s+typedef\b)/(' ' x length $1) . $2/gse;
  2         50  
666 1         15 $in =~ s/(\btypedef\b.*?;)/' ' x length $1/gse;
  81         618  
667             # The following form may appear only in the declaration of the type itself:
668 1         109 $in =~
669 1         9 s/(\b(enum|struct|union|class)\b[\s\w]*\{\s*\}\s*;)/' ' x length $1/gse;
670             # Pre-declarations:
671 1         115 $in =~
672 1         13 s/(\b(enum|struct|union|class)\b[\s\w]*;)/' ' x length $1/gse;
673 1         24 $in;
674             }
675              
676             sub new {
677 3     3 0 35628 my $class = shift;
678 3         68 my $out = SUPER::new $class $recipes;
679 3         62 $out->set(@_);
680 3         64 $out;
681             }
682              
683             sub do_declarations {
684 1     1 0 141 my @d = map do_declaration($_, $_[1], $_[2]), @{ $_[0] };
  1         17  
685 1         13 \@d;
686             }
687              
688             # Forth argument: if defined, there maybe no identifier. Generate one
689             # basing on this argument.
690              
691             sub do_declaration {
692 1     1 0 5 my ($decl, $typedefs, $keywords, $argnum) = @_;
693 1         51 $decl =~ s/;?\s*$//;
694 1         3 my ($type, $typepre, $typepost, $ident, $args, $w, $pos, $repeater);
695 1         19 $decl =~ s/^\s*extern\b\s*//;
696 1         3 $pos = 0;
697 1   66     49 while ($decl =~ /(\w+)/g and ($typedefs->{$1} or $keywords->{$1})) {
      33        
698 1         4 $w = $1;
699 1 50       18 if ($w =~ /^(struct|class|enum|union)$/) {
700 0 0       0 $decl =~ /\G\s+\w+/g or die "`$w' is not followed by word in `$decl'";
701             }
702 1         16 $pos = pos $decl;
703             }
704 1         9 pos $decl = $pos;
705 1 50       15 $decl =~ /\G[\s*]*\*/g or pos $decl = $pos;
706 1         5 $type = substr $decl, 0, pos $decl;
707 1 50       7 $decl =~ /\G\s*/g or pos $decl = length $type; # ????
708 1         3 $pos = pos $decl;
709 1 50       9 if (defined $argnum) {
710 0 0       0 if ($decl =~ /\G(\w+)((\s*\[[^][]*\])*)/g) { # The best we can do with [2]
711 0         0 $ident = $1;
712 0         0 $repeater = $2;
713 0         0 $pos = pos $decl;
714             } else {
715 0         0 pos $decl = $pos = length $decl;
716 0         0 $type = $decl;
717 0         0 $ident = "arg$argnum";
718             }
719             } else {
720 1 50       11 die "Cannot process declaration `$decl' without an identifier"
721             unless $decl =~ /\G(\w+)/g;
722 1         3 $ident = $1;
723 1         3 $pos = pos $decl;
724             }
725 1 50       8 $decl =~ /\G\s*/g or pos $decl = $pos;
726 1         4 $pos = pos $decl;
727 1 50       8 if (pos $decl != length $decl) {
728 1         5 pos $decl = $pos;
729 1 50       12 die "Expecting parenth after identifier in `$decl'\nafter `",
730             substr($decl, 0, $pos), "'"
731             unless $decl =~ /\G\(/g;
732 1         5 my $argstring = substr($decl, pos($decl) - length $decl);
733 1 50       15 matchingbrace($argstring) or die "Cannot find matching parenth in `$decl'";
734 1         5 $argstring = substr($argstring, 0, pos($argstring) - 1);
735 1         6 $argstring =~ s/ ^ ( \s* void )? \s* $ //x;
736 1         3 $args = [];
737 1         3 my @args;
738 1 50       7 if ($argstring ne '') {
739 1         4 my $top = top_level $argstring;
740 1         3 my $p = 0;
741 1         2 my $arg;
742 1         286 while ($top =~ /,/g) {
743 4         14 $arg = substr($argstring, $p, pos($top) - 1 - $p);
744 4         41 $arg =~ s/^\s+|\s+$//gs;
745 4         11 push @args, $arg;
746 4         16 $p = pos $top;
747             }
748 1         11 $arg = substr $argstring, $p;
749 1         17 $arg =~ s/^\s+|\s+$//gs;
750 1         6 push @args, $arg;
751             }
752 1         3 my $i = 0;
753 1         4 for (@args) {
754 5         22 push @$args, do_declaration1($_, $typedefs, $keywords, $i++);
755             }
756             }
757 1         8 [$type, $ident, $args, $decl, $repeater];
758             }
759              
760             sub do_declaration1 {
761 5     5 0 14 my ($decl, $typedefs, $keywords, $argnum) = @_;
762 5         59 $decl =~ s/;?\s*$//;
763 5         16 my ($type, $typepre, $typepost, $ident, $args, $w, $pos, $repeater);
764 5         9 $pos = 0;
765 5   100     77 while ($decl =~ /(\w+)/g and ($typedefs->{$1} or $keywords->{$1})) {
      33        
766 5         13 $w = $1;
767 5 100       23 if ($w =~ /^(struct|class|enum|union)$/) {
768 1 50       9 $decl =~ /\G\s+\w+/g or die "`$w' is not followed by word in `$decl'";
769             }
770 5         66 $pos = pos $decl;
771             }
772 5         17 pos $decl = $pos;
773 5 100       34 $decl =~ /\G[\s*]*\*/g or pos $decl = $pos;
774 5         14 $type = substr $decl, 0, pos $decl;
775 5 50       32 $decl =~ /\G\s*/g or pos $decl = length $type; # ????
776 5         9 $pos = pos $decl;
777 5 50       14 if (defined $argnum) {
778 5 50       29 if ($decl =~ /\G(\w+)((\s*\[[^][]*\])*)/g) { # The best we can do with [2]
779 5         19 $ident = $1;
780 5         14 $repeater = $2;
781 5         11 $pos = pos $decl;
782             } else {
783 0         0 pos $decl = $pos = length $decl;
784 0         0 $type = $decl;
785 0         0 $ident = "arg$argnum";
786             }
787             } else {
788 0 0       0 die "Cannot process declaration `$decl' without an identifier"
789             unless $decl =~ /\G(\w+)/g;
790 0         0 $ident = $1;
791 0         0 $pos = pos $decl;
792             }
793 5 50       22 $decl =~ /\G\s*/g or pos $decl = $pos;
794 5         10 $pos = pos $decl;
795 5 50       18 if (pos $decl != length $decl) {
796 0         0 pos $decl = $pos;
797 0 0       0 die "Expecting parenth after identifier in `$decl'\nafter `",
798             substr($decl, 0, $pos), "'"
799             unless $decl =~ /\G\(/g;
800 0         0 my $argstring = substr($decl, pos($decl) - length $decl);
801 0 0       0 matchingbrace($argstring) or die "Cannot find matching parenth in `$decl'";
802 0         0 $argstring = substr($argstring, 0, pos($argstring) - 1);
803 0         0 $argstring =~ s/ ^ ( \s* void )? \s* $ //x;
804 0         0 $args = [];
805 0         0 my @args;
806 0 0       0 if ($argstring ne '') {
807 0         0 my $top = top_level $argstring;
808 0         0 my $p = 0;
809 0         0 my $arg;
810 0         0 while ($top =~ /,/g) {
811 0         0 $arg = substr($argstring, $p, pos($top) - 1 - $p);
812 0         0 $arg =~ s/^\s+|\s+$//gs;
813 0         0 push @args, $arg;
814 0         0 $p = pos $top;
815             }
816 0         0 $arg = substr $argstring, $p;
817 0         0 $arg =~ s/^\s+|\s+$//gs;
818 0         0 push @args, $arg;
819             }
820 0         0 my $i = 0;
821 0         0 for (@args) {
822 0         0 push @$args, do_declaration2($_, $typedefs, $keywords, $i++);
823             }
824             }
825 5         37 [$type, $ident, $args, $decl, $repeater];
826             }
827              
828             ############################################################
829              
830             package C::Preprocessed;
831 1     1   1007 use Symbol;
  1         1128  
  1         105  
832 1     1   8 use File::Basename;
  1         2  
  1         64  
833 1     1   7 use Config;
  1         1  
  1         634  
834              
835             sub new {
836 5 50 33 5   56 die "usage: C::Preprocessed->new(filename[, defines[, includes[, cpp]]])"
837             if @_ < 2 or @_ > 5;
838 5         18 my ($class, $filename, $Defines, $Includes, $Cpp)
839             = (shift, shift, shift, shift, shift);
840 5   50     14 $Cpp ||= \%Config::Config;
841 5   50     272 my $filedir = dirname $filename || '.';
842 5   50     13 $Includes ||= [$filedir, '/usr/local/include', '.'];
843 5         16 my $addincludes = "";
844 5 50 33     56 $addincludes = "-I" . join(" -I", @$Includes)
845             if defined $Includes and @$Includes;
846 5         24 my($sym) = gensym;
847 5         179 my $cmd = "echo '\#include \"$filename\"' | $Cpp->{cppstdin} $Defines $addincludes $Cpp->{cppflags} $Cpp->{cppminus} |";
848             #my $cmd = "$Cpp->{cppstdin} $Defines $addincludes $Cpp->{cppflags} $Cpp->{cppminus} < $filename |";
849             #my $cmd = "echo '\#include <$filename>' | $Cpp->{cppstdin} $Defines $addincludes $Cpp->{cppflags} $Cpp->{cppminus} |";
850              
851 5 50 50     29803 (open($sym, $cmd) or die "Cannot open pipe from `$cmd': $!")
852             and bless $sym => $class;
853             }
854              
855             sub text {
856 2     2   482 my $class = shift;
857 2         3 my $filter = shift;
858 2 50       6 if (defined $filter) {
859 0         0 return text_only_from($class, $filter, @_);
860             }
861 2         16 my $stream = $class->new(@_);
862 2         57 my $oh = select $stream;
863 2         33 $/ = undef;
864 2         20 select $oh;
865 2         61066 <$stream>;
866             }
867              
868             sub text_only_from {
869 0     0   0 my $class = shift;
870 0   0     0 my $from = shift || die "Expecting argument in `text_only_from'";
871 0         0 my $stream = $class->new(@_);
872 0         0 my $on = $from eq $_[0];
873 0 0       0 my $eqregexp = $on ? '\"\"|' : '';
874 0         0 my @out;
875 0         0 while (<$stream>) {
876             #print;
877              
878 0 0       0 $on = /$eqregexp[\"\/]\Q$from\"/ if /^\#/;
879 0 0       0 push @out, $_ if $on;
880             }
881 0         0 join '', @out;
882             }
883              
884             sub DESTROY {
885 5 50   5   710 close($_[0])
886             or die "Cannot close pipe from `$Config::Config{cppstdin}': err $?, $!\n";
887             }
888              
889             # Autoload methods go after __END__, and are processed by the autosplit program.
890             # Return to the principal package.
891             package C::Scan;
892              
893             1;
894             __END__