File Coverage

blib/lib/File/OSS/Scan.pm
Criterion Covered Total %
statement 51 327 15.6
branch 0 162 0.0
condition 0 81 0.0
subroutine 17 29 58.6
pod 7 12 58.3
total 75 611 12.2


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             File::OSS::Scan - Scan the repository of project and detect any OSS ( Open Source Software ) files
4              
5             =head1 VERSION
6              
7             version 0.04
8              
9             =head1 SYNOPSIS
10              
11             use File::OSS::Scan qw(:scan);
12              
13             scan_init( 'verbose' => 0, 'inflate' => 1 );
14              
15             scan_execute($proj_dir);
16             my $ret = scan_result();
17              
18             =head1 DESCRIPTION
19              
20             This module allows you to scan your project directory based on a set of pre-defined
21             but also customizable rules, to detect all the used source files that originate from
22             OSS ( or commercial software ). Unlike some of those commercial solutions for the OSS
23             management, here we don't have to maintain a OSS code database, it means that we will
24             not conduct code snippet match, and completely rely on the pattern match ( looking for
25             a particular type of file, eg COPYING, LICENSE, or the existence of specific strings in
26             file content, eg Copyright, LGPL License etc ).
27              
28             =head1 ATTRIBUTES
29              
30             C takes a set of options. These options will be printed out to the C
31             if it runs in the I mode ( C<'verbose' =E 2> ).
32              
33             =over 4
34              
35             =item C
36              
37             used to specify the path of your own config file for L, where
38             you can write up your own rules for OSS detection. If not specified, then it will try to
39             check the value of C<$ENV{OSSSCAN_CONFIG}> and C<./.ossscan.rc>, if still can not find a
40             valid configuration file in all of the above places, then it will default to use the
41             embedded rules contained in the C<__DATA__> section of L.
42              
43             =item C
44              
45             C<[0|1|2]>. set your I level, 0 is silent and 2 is verbose, 1 is well. It
46             defaults to 1 if not specified, and only ouput messages about detected matches.
47              
48             =item C
49              
50             C<[0|1|2]>. set your I mode, 0 is no cache, 1 is to use cache, 2 is to refresh cache.
51             It defaults to 0 if not specified, and will not enable the cache feature. if set this option
52             to 1, it checks every file against the records in the cache to see if the file has been
53             changed recently, if there is no change since the last run of scanning, then this file will
54             be skipped. if set to 2, it will not check the change on files and hence process each one of
55             them, also forces the refresh of cache records for every files.
56              
57             =item C
58              
59             C<[0|1]>. This option is used to indicate whether we want to inflate a compressed or archived
60             files and scan those extracted content. It defaults to 0 if not specified. Supported file
61             types include: I<.jar>, I<.tar>, I<.gz>, I<.zip>, I<.Z>.
62              
63             =item C
64              
65             used to specify the working directory for file inflating. if not specified, it defaults to
66             use ( create one if not existed ) the dir named I<.working> under the current directory where
67             the program is running. B, C will empty this dir everytime it is called
68             by using a C command. so one should be very cautious to any value assigned to this
69             option, make sure that it doesn't clash with any existing dirs where you have important data
70             stored.
71              
72             =item C
73              
74             path of the cmd I. If not specified, it defaults to C. If can not find
75             an executable I command, then it will skip any binary files encountered.
76              
77             =item C
78              
79             path of the cmd I. If not specified, it defaults to C. If can not find an
80             executable I command, then it will skip any I<.jar> files encountered.
81              
82             =item C
83              
84             path of the cmd I. If not specified, it defaults to C. If can not find an
85             executable I command, then it will skip any I<.tar> files encountered.
86              
87             =item C
88              
89             path of the cmd I. If not specified, it defaults to C. If can not find an
90             executable I command, then it will skip any I<.gz> files encountered.
91              
92             =item C
93              
94             path of the cmd I. If not specified, it defaults to C. If can not find an
95             executable I command, then it will skip any I<.zip> files encountered.
96              
97              
98             =item C
99              
100             path of the cmd I. If not specified, it defaults to C. If can not
101             find an executable I command, then it will skip any I<.Z> files encountered.
102              
103             =back
104              
105             =head1 METHODS
106              
107             =head2 C
108              
109             use File::OSS::Scan qw( :scan );
110              
111             scan_init(
112             'verbose' => 2, # chatty output
113             'inflate' => 1, # inflate archived files
114             'cache' => 1 # enable cache
115             );
116              
117             Do the necessary initialization works required prior to running the scan, including availability
118             checks on needed commands, initialize the working directory and initiate a L
119             and a L instance. Accepted parameters are described in details in
120             L section.
121              
122             =head2 C
123              
124             use File::OSS::Scan qw( :scan );
125              
126             scan_init(); # we are fine with defaults
127             scan_execute($proj_dir);
128              
129             Do the actual scanning on the given project directory and any detected OSS files will be recorded
130             in the instance of L and can be fetched via method C later.
131             The only parameter required here is the C<$proj_dir>, which is used to tell the module which project
132             directory you want to scan.
133              
134             =head2 C
135              
136             use File::OSS::Scan qw( :scan );
137              
138             scan_init(); # we are fine with defaults
139             scan_execute($proj_dir);
140              
141             my $ret_hash = scan_result();
142             my $ret_text = scan_result('txt');
143             my $ret_html = scan_result('html');
144             my $ret_json = scan_result('json');
145              
146             Get all the detected matches on files within the project directory. Parameter $format can be one of
147             the I - plain text, I - formatted HTML tables or I - JSON string. If not specified,
148             then it will return the raw data hash.
149              
150             =head2 C
151              
152             use File::OSS::Scan qw( :all );
153              
154             clear_cache();
155              
156             Clean all cached results from file system.
157              
158             =head1 SCAN RULES
159              
160             Scan rules can be configured in the config file specified via param C, or in the file
161             I<.ossscan.rc> under the current directory where the program is running. If neither of them exists,
162             then as a last resort, it will read the C<__DATA__> section of the module L.
163             Currently it supports the following types of rules, If you are not sure about how to compose it, then
164             the best approach is to refer to the C<__DATA__> section of the module L.
165              
166             =head2 C<[SECTION]>
167              
168             # section for file check
169             [FILE]
170             ...
171              
172             # section for line check
173             [LINE]
174             ...
175              
176             This is used to declare section of rules that all following rules are belong to. Valid sections contain
177             C, C, C and C.
178              
179             =head2 C
180              
181             100% filename_match COPYING\.\w+
182             50% filename_match AUTHOR[S]?
183              
184             Detect OSS file based on the filename check. The first element is the I, ranging from 0(%)
185             to 100 (%). The second element is the function name which will be called to process this rule. The
186             rest part is a pattern(regex) used for searching.
187              
188             =head2 C
189              
190             100% content_match MIT\W*Licen[cs]e
191             100% content_match Artistic\W*Licen[cs]e
192              
193             Detect OSS file by checking if the file's content matches some of the license strings. The first
194             element is the I, ranging from 0(%) to 100 (%). The second element is the function
195             name which will be called to process this rule. The rest part is a pattern(regex) used for searching.
196              
197             =head2 C
198              
199             50% copyright_match MY_COMPANY MyCompany
200              
201             Detect OSS file by checking if there is a copryright declaration statement in the file. The first
202             element is the I, ranging from 0(%) to 100 (%). The second element is the function
203             name which will be called to process this rule. The rest part is a list of names to be excluded, usually
204             we specify our own company's name here, so when we found a copyright statement like:
205              
206             Copyright (C) 1998 - 2012, MyCompany,
207              
208             we will know that these are proprietary codes and should be excluded from the detected matches.
209              
210             =head2 C
211              
212             exclude_dir: data
213              
214             This is a global setting, so should be defined under the section C or in the very begining of the
215             configuration file. It accepts a list of directory names and these directories will be skipped during the
216             scanning.
217              
218             =head2 C
219              
220             exclude_file: Makefile Build\.PL
221              
222             This is a global setting, so should be defined under the section C or in the very begining of the
223             configuration file. It accepts a list of file names ( or pattern ) and these files will be skipped during
224             the scanning.
225              
226             =head2 C
227              
228             exclude_extension: png jpg gif pdf doc docx html htm xml json xls
229              
230             This is a global setting, so should be defined under the section C or in the very begining of the
231             configuration file. It accepts a list of file extension names and files with the listed extensions will be
232             skipped during the scanning.
233              
234             =head1 SEE ALSO
235              
236             =over 4
237              
238             =item * L
239              
240             =item * L
241              
242             =item * L
243              
244             =item * L
245              
246             =back
247              
248             =head1 AUTHOR
249              
250             Harry Wang
251              
252             =head1 COPYRIGHT AND LICENSE
253              
254             This software is Copyright (c) 2014 by Harry Wang.
255              
256             This is free software, licensed under:
257              
258             Artistic License 1.0
259              
260             =cut
261              
262             package File::OSS::Scan;
263              
264 1     1   1004 use strict;
  1         2  
  1         40  
265 1     1   5 use warnings FATAL => 'all';
  1         2  
  1         50  
266              
267 1     1   5353 use Fatal qw( open close );
  1         43493  
  1         7  
268 1     1   2262 use Carp;
  1         2  
  1         279  
269 1     1   1714 use English qw( -no_match_vars );
  1         4911  
  1         11  
270 1     1   9238 use Data::Dumper; # for debug
  1         14470  
  1         84  
271 1     1   10 use Cwd;
  1         2  
  1         66  
272 1     1   1133 use File::Copy;
  1         2338  
  1         52  
273 1     1   4 use File::Basename;
  1         2  
  1         60  
274              
275 1     1   587 use File::OSS::Scan::Constant qw(:all);
  1         4  
  1         514  
276 1     1   708 use File::OSS::Scan::Ruleset;
  1         4  
  1         38  
277 1     1   1000 use File::OSS::Scan::Matches;
  1         4  
  1         38  
278 1     1   11879 use File::OSS::Scan::Cache;
  1         4  
  1         583  
279              
280             our $VERSION = '0.04';
281              
282             our @ISA = qw(Exporter);
283             our @EXPORT_OK = qw(scan_init scan_execute scan_result clear_cache);
284              
285             our %EXPORT_TAGS = (
286             all => \@EXPORT_OK,
287             scan => [ @EXPORT_OK[0..2] ],
288             );
289              
290             my $curr_dir;
291             my $recur_depth;
292             my $scan_base = '';
293             my $anchor_file;
294              
295             our $cmd_strings;
296             our $cmd_jar;
297             our $cmd_tar;
298             our $cmd_gunzip;
299             our $cmd_unzip;
300             our $cmd_uncompress;
301              
302             my $ruleset = undef;
303             my $setting = undef;
304             my $result = undef;
305              
306             # list all valid options with their default values
307             my %valid_options = (
308             ruleset_config => undef,
309             verbose => VERBOSE_NORMAL,
310             cache => CACHE_NONE,
311             strings => '/bin/strings',
312             jar => '/bin/jar',
313             tar => '/bin/tar',
314             gunzip => '/bin/gunzip',
315             unzip => '/bin/unzip',
316             uncompress => '/bin/uncompress',
317             working_dir => getcwd() . "\/\.working",
318             inflate => UNI_FALSE,
319             );
320              
321             my $options = undef;
322             my $user = getlogin() || ( getpwuid $< )[0];
323              
324             sub scan_init {
325 0 0   0 1   my %params = ( scalar(@_) != 1 ) ? @_ : ( 'ruleset_config' => $_[0] );
326              
327             # convert hash keys to lower case
328 0           %params = map { lc $_ => $params{$_} } keys %params;
  0            
329              
330             # clear previously set options
331 0           undef $options;
332              
333             # set options
334 0           foreach my $opt ( keys %valid_options ) {
335 0 0         $options->{$opt} = defined $params{$opt} ?
336             $params{$opt} : $valid_options{$opt};
337             }
338              
339 0 0 0       croak "invalid option verbose: $options->{'verbose'}"
      0        
340             if ( ( $options->{'verbose'} !~ /^\d$/ ) ||
341             ( $options->{'verbose'} < VERBOSE_SILIENT ) ||
342             ( $options->{'verbose'} > VERBOSE_CHATTY ) );
343              
344 0 0 0       croak "invalid option cache: $options->{'cache'}"
      0        
345             if ( ( $options->{'cache'} !~ /^\d$/ ) ||
346             ( $options->{'cache'} < CACHE_NONE ) ||
347             ( $options->{'cache'} > CACHE_REFRESH ) );
348              
349 0 0 0       croak "working directory $options->{'working_dir'} doesn't exist or not writable"
      0        
350             if ( ! ( ( -d $options->{'working_dir'} and
351             -w $options->{'working_dir'} ) ||
352             mkdir( $options->{'working_dir'}, 0755 ) ) );
353              
354             # empty the working directory,
355             # should be very very cautious with the param working_dir ...
356 0           system("rm -rf $options->{'working_dir'}/*");
357              
358             # make sure the tools are available
359 0           foreach ( qw/strings jar tar gunzip unzip uncompress/ ) {
360 1     1   57 no strict 'refs';
  1         4  
  1         2522  
361              
362 0           my $cmd_var = __PACKAGE__ . '::cmd_' . $_;
363 0           $$cmd_var = $options->{$_};
364              
365 0 0         if ( ! -x $$cmd_var ) {
366 0           carp "unable to execute the $_ binary $$cmd_var";
367 0           undef $$cmd_var;
368             }
369             }
370              
371 0           my $config_file = $options->{'ruleset_config'};
372              
373             # clear previously set rulesets
374 0           undef $ruleset;
375              
376             # clear previous settings
377 0           undef $setting;
378              
379             # initiate an Ruleset object with the rules
380             # fetched from the config file.
381 0           File::OSS::Scan::Ruleset->init($config_file);
382              
383             # set rulesets
384 0           $ruleset = File::OSS::Scan::Ruleset->get_ruleset();
385              
386 0           $setting = $ruleset->{'GLOBAL'};
387              
388             # initialize Matches object to store the result.
389 0           File::OSS::Scan::Matches->init();
390              
391 0           return SUCCESS;
392             }
393              
394             sub scan_execute {
395 0   0 0 1   my $base_dir = shift || return SUCCESS;
396              
397             # if we have fetched the ruleset config ?
398 0 0         croak __PACKAGE__ . " is not properly initialized."
399             if ( ! defined $ruleset );
400              
401             # reset scan result
402 0           undef $result;
403              
404             # reset recursion depth
405 0           $recur_depth = 0;
406              
407             # store the top level directory where the scan begins
408 0           $scan_base = $base_dir;
409              
410             # get current directory
411 0           $curr_dir = getcwd();
412              
413             # be nice to your users
414 0 0         greeting()
415             if ( $options->{'verbose'} != VERBOSE_SILIENT );
416              
417             # initiate a cache object
418 0 0         File::OSS::Scan::Cache->init($base_dir)
419             if ( $options->{'cache'} != CACHE_NONE );
420              
421 0 0         File::OSS::Scan::Cache->clear()
422             if ( $options->{'cache'} == CACHE_REFRESH );
423              
424             # call scan_dir to recursivly scan for all files and
425             # directories under the given base dir.
426 0           scan_dir($base_dir);
427              
428 0           return SUCCESS;
429             }
430              
431             sub scan_result {
432             return
433 0     0 1   File::OSS::Scan::Matches->get_matches($_[0]);
434             }
435              
436             sub clear_cache {
437             return
438 0     0 1   File::OSS::Scan::Cache->clear_all();
439             }
440              
441             sub scan_dir {
442 0   0 0 0   my $base_dir = shift || return SUCCESS;
443              
444 0           my $exclude_dirs = $setting->{'exclude_dir'};
445              
446 0           foreach my $exclude_dir ( @$exclude_dirs ) {
447 0 0         if ( $base_dir =~ /$exclude_dir/ ) {
448 0 0         printing("exclude directory pattern $exclude_dir, skipping directory $base_dir")
449             if ( $options->{'verbose'} == VERBOSE_CHATTY );
450              
451 0           $recur_depth--;
452 0           return SKIP;
453             }
454             }
455              
456 0           my $leading_fmt = "--> " x $recur_depth;
457              
458 0 0         if ( $options->{'verbose'} == VERBOSE_CHATTY ) {
459 0           $base_dir =~ /^$scan_base\/(.*)$/;
460 0   0       my $short_base_dir = $1 || '';
461              
462 0 0         if ( not $short_base_dir ) {
463 0           $base_dir =~ /^$options->{'working_dir'}\/(.*)$/;
464 0   0       $short_base_dir = $1 || '';
465             }
466 0           printing("entering directory: ./$short_base_dir");
467             }
468              
469 0           local *DIR;
470 0 0         opendir(DIR, $base_dir) ||
471             croak "could not open directory $base_dir, $!";
472              
473 0           my @items = sort grep { $_ !~ m{^\.} } readdir DIR;
  0            
474 0           closedir(DIR);
475              
476 0           my ( @subfiles, @subdirs );
477 0 0         map {
478 0           ( -f "$base_dir/$_" ) ?
479             ( push @subfiles, $_ ) :
480             ( push @subdirs, $_ )
481             } @items;
482              
483             # check for each files
484 0           foreach my $file ( @subfiles ) {
485 0           my $file_path = $base_dir . "/" . $file;
486              
487 0           my ( $size, $mtime ) = (stat($file_path))[7,9];
488 0           my $mtime_stamp = localtime($mtime);
489              
490 0 0         printf( " " x 10 . $leading_fmt . "%-" . WIDTH_FILENAME . "s " .
491             "%-" . WIDTH_SIZE . "s " .
492             "%-" . WIDTH_MTIME . "s\n",
493             $file, $size, $mtime_stamp)
494             if ( $options->{'verbose'} == VERBOSE_CHATTY );
495              
496 0           my $h_file = {
497             'name' => $file,
498             'path' => $file_path,
499             'size' => $size,
500             'mtime' => $mtime,
501             };
502              
503 0           my $skip_flag = UNI_FALSE;
504 0 0         if ( $options->{'cache'} == CACHE_USE ) {
505 0           my $cached_file = File::OSS::Scan::Cache->get($file_path);
506              
507 0 0         if ( defined $cached_file ) {
508              
509 0           my $c_size = $cached_file->{'size'};
510 0           my $c_mtime = $cached_file->{'mtime'};
511              
512 0 0 0       if ( ( $size eq $c_size ) &&
513             ( $mtime eq $c_mtime ) ) {
514              
515 0 0         printing("file $file_path has not been changed since the last scan, skipping ...")
516             if ( $options->{'verbose'} == VERBOSE_CHATTY );
517              
518 0           $skip_flag = UNI_TRUE;
519             }
520             }
521              
522             }
523              
524 0 0 0       File::OSS::Scan::Cache->set( $file_path => $h_file )
525             if ( ! $skip_flag and
526             ( $options->{'verbose'} != CACHE_NONE ) );
527              
528 0 0         check_file($h_file)
529             if not $skip_flag;
530             }
531              
532             # check for each subdirs
533 0           foreach my $dir ( @subdirs ) {
534 0           my $new_dir = $base_dir . "/" . $dir;
535 0           $recur_depth++;
536 0           scan_dir($new_dir);
537             }
538              
539 0           $recur_depth--;
540              
541 0           return SUCCESS;
542              
543             }
544              
545             sub greeting {
546 0     0 0   my $bar_fmt = "#" x WIDTH_BAR . "\n";
547 0           print $bar_fmt . "#\n";
548              
549 0           my $winfo_leading_fmt = "#" . " " x 4;
550              
551 0           print $winfo_leading_fmt . __PACKAGE__ . " v$VERSION\n";
552 0           print $winfo_leading_fmt . "\n";
553              
554 0           printf( $winfo_leading_fmt . "%-" . WIDTH_INFO_KEY . "s " .
555             "%-" . WIDTH_INFO_VAL . "s\n", "[User]:", $user );
556 0           printf( $winfo_leading_fmt . "%-" . WIDTH_INFO_KEY . "s " .
557             "%-" . WIDTH_INFO_VAL . "s\n", "[Current Directory]:", $curr_dir );
558 0           printf( $winfo_leading_fmt . "%-" . WIDTH_INFO_KEY . "s " .
559             "%-" . WIDTH_INFO_VAL . "s\n", "[Scanning Directory]:", $scan_base );
560              
561 0           print $winfo_leading_fmt . "\n";
562 0           print $winfo_leading_fmt . "-" x WIDTH_INFO_KEY . "\n";
563 0           print $winfo_leading_fmt . "\n";
564              
565 0           print $winfo_leading_fmt . "[Options]:\n";
566 0           print $winfo_leading_fmt . "\n";
567              
568 0           foreach my $opt ( keys %$options ) {
569 0 0         printf( $winfo_leading_fmt . "%-" . WIDTH_INFO_KEY . "s " .
570             "%-" . WIDTH_INFO_VAL . "s\n", "$opt",
571             defined $options->{$opt} ? $options->{$opt} : 'UNDEF' );
572             }
573              
574 0           print "#\n" . $bar_fmt;
575             }
576              
577             sub printing {
578 0   0 0 0   my $msg = shift || return SUCCESS;
579              
580 0           my ( $sec, $min, $hr, $day, $mon, $yr ) = localtime();
581 0           my $timestamp = sprintf "%04d/%02d/%02d %02d:%02d:%02d", $yr + 1900,
582             $mon + 1, $day, $hr, $min, $sec;
583              
584 0           print "$timestamp $msg\n";
585 0           return SUCCESS;
586             }
587              
588             sub check_file {
589 0   0 0 0   my $h_file = shift || return SUCCESS;
590              
591 0   0       my $rules = $ruleset->{'FILE'}
592             || return SUCCESS;
593              
594 0           my $exclude = $setting->{'exclude_extension'};
595 0 0         my $file_ext = ( $h_file->{'name'} =~ /\.([^.]+)$/ ) ? $1 : '';
596 0           my $binary = UNI_FALSE;
597              
598 0 0         if ( $file_ext ne '') {
599              
600 0 0         if ( grep(/^\Q$file_ext\E$/i, @$exclude) ) {
601 0 0         printing("exclude extension $file_ext, skipping file $h_file->{'path'}")
602             if ( $options->{'verbose'} == VERBOSE_CHATTY );
603              
604 0           return SKIP;
605             }
606             }
607              
608 0           my $exclude_files = $setting->{'exclude_file'};
609 0           foreach my $exclude_file ( @$exclude_files ) {
610 0 0         if ( $h_file->{'name'} =~ /\Q$exclude_file\E/ ) {
611 0 0         printing("exclude file pattern $exclude_file, skipping file $h_file->{'path'}")
612             if ( $options->{'verbose'} == VERBOSE_CHATTY );
613              
614 0           return SKIP;
615             }
616             }
617              
618 0 0 0       if ( -f $h_file->{'path'} and ! -s $h_file->{'path'} ) {
619 0 0         printing("encountered an empty file $h_file->{'path'}, skipping ...")
620             if ( $options->{'verbose'} == VERBOSE_CHATTY );
621              
622 0           return SKIP;
623             }
624              
625 0 0         if ( -B $h_file->{'path'} ) {
626 0 0         printing("encountered a binary file $h_file->{'path'}")
627             if ( $options->{'verbose'} == VERBOSE_CHATTY );
628              
629 0           $binary = UNI_TRUE;
630             }
631              
632 0           foreach my $rule ( @$rules ) {
633 0           my ( $func, $cert, $args )
634             = @$rule{qw/func cert args/};
635              
636             {
637 1     1   11 no strict 'refs';
  1         2  
  1         446  
  0            
638 0           my $msg = $func->( $h_file, $cert, $args );
639              
640 0 0         if ( $msg ) {
641 0 0 0       if ( defined $anchor_file and
642             $anchor_file =~ /^(.*)\.[^.]*$/ ) {
643              
644 0           my $anchor = $1;
645              
646 0 0         my $path = ( $h_file->{'path'} =~
647             /^$options->{'working_dir'}\/(.*)$/ ) ?
648             $1 : '';
649              
650 0           $path = $anchor . "/inflated_dir/" . $path;
651 0           File::OSS::Scan::Matches->add(
652             {
653             'name' => $h_file->{'name'},
654             'path' => $path,
655             'size' => $h_file->{'size'},
656             'mtime' => $h_file->{'mtime'},
657             },
658             $func, $cert, join(' ', @$args), $msg
659             );
660             }
661             else {
662 0           File::OSS::Scan::Matches
663             ->add( $h_file, $func, $cert, join(' ', @$args), $msg);
664             }
665             }
666             }
667             }
668              
669 0 0 0       if ( $binary and
      0        
670             ( $file_ext eq 'jar' or
671             $file_ext eq 'gz' or
672             $file_ext eq 'zip' or
673             $file_ext eq 'Z' or
674             $file_ext eq 'tar'
675             )
676             ) {
677              
678 0 0         if ( not $options->{'inflate'} ) {
679 0 0         printing("inflate option is not set, skipping file $h_file->{'path'}")
680             if ( $options->{'verbose'} == VERBOSE_CHATTY );
681              
682 0           return SKIP;
683             }
684              
685 0           my ( $cmd, $cmd_str );
686 0           my $ext_cmd_map = {
687             'jar' => [ 'jar', 'xvf' ],
688             'gz' => [ 'gunzip', undef ],
689             'zip' => [ 'unzip', undef ],
690             'Z' => [ 'uncompress', undef ],
691             'tar' => [ 'tar', 'xvf' ],
692             };
693              
694             {
695 1     1   8 no strict 'refs';
  1         2  
  1         1278  
  0            
696 0           $cmd = ${__PACKAGE__ . '::cmd_' .
  0            
697             $ext_cmd_map->{$file_ext}->[0]};
698             }
699              
700 0 0         if ( not defined $cmd ) {
701 0 0         printing("can't find the executable to process file $h_file->{'path'}, skipping ...")
702             if ( $options->{'verbose'} == VERBOSE_CHATTY );
703              
704 0           return SKIP;
705             }
706              
707 0 0         $cmd_str = "$cmd" .
708             ( defined $ext_cmd_map->{$file_ext}->[1] ?
709             " \-$ext_cmd_map->{$file_ext}->[1] " : ' ' );
710              
711 0 0         printing("try using $cmd to process file $h_file->{'path'}")
712             if ( $options->{'verbose'} == VERBOSE_CHATTY );
713              
714 0           my $inflate_file = undef;
715 0           my $inflate_dir = undef;
716 0           my $curr_inflate_dir = undef;
717              
718 0 0         if ( $h_file->{'path'} =~ /^$scan_base/ ) {
719 0           $inflate_file = $options->{'working_dir'} .
720             "\/$h_file->{'name'}";
721              
722 0 0         copy($h_file->{'path'}, $inflate_file)
723             or croak "Can't copy file to $inflate_file : $!";
724              
725 0           chdir($options->{'working_dir'});
726 0           $anchor_file = $h_file->{'path'};
727             }
728             else {
729 0           $inflate_file = $h_file->{'path'};
730 0           my ( $file_name, $dir_name ) =
731             (
732             fileparse($inflate_file,
733             qr/\.[^.]*/)
734             )[0,1];
735              
736 0           $inflate_dir = $dir_name . "inflating_$file_name";
737 0           $curr_inflate_dir = $dir_name;
738              
739 0           mkdir( $inflate_dir, 0755 );
740              
741 0           move($h_file->{'path'},
742             $inflate_dir . "/$h_file->{'name'}");
743              
744 0           $inflate_file = $inflate_dir . "/$h_file->{'name'}";
745              
746 0           chdir($inflate_dir);
747             }
748              
749 0 0 0       if ( -f $inflate_file and -r $inflate_file ) {
750              
751 0 0         printing("execute command ${cmd_str}${inflate_file}")
752             if ( $options->{'verbose'} != VERBOSE_SILIENT );
753              
754 0           foreach (`${cmd_str}${inflate_file}`) {
755 0           chomp;
756 0 0         print ' ' x 10 . "$_\n"
757             if ( $options->{'verbose'} == VERBOSE_CHATTY );
758             }
759              
760 0   0       unlink $inflate_file
761             || carp "can't unlink file $inflate_file : $!";
762              
763             # reset the recursion depth counter and restore it
764             # after finishing the scan for files in working direcotry.
765 0 0         if ( $h_file->{'path'} =~ /^$scan_base/ ) {
766              
767 0 0         printing("changing to directory: $options->{'working_dir'}")
768             if ( $options->{'verbose'} != VERBOSE_SILIENT );
769              
770 0           my $curr_recur_depth = $recur_depth;
771 0           $recur_depth = 0;
772 0           scan_dir($options->{'working_dir'});
773 0           $recur_depth = $curr_recur_depth;
774              
775 0           chdir($curr_dir);
776 0           system("rm -rf $options->{'working_dir'}/*");
777              
778 0 0         printing("changing to directory: $curr_dir")
779             if ( $options->{'verbose'} != VERBOSE_SILIENT );
780              
781 0           undef $anchor_file;
782             }
783             else {
784 0           $recur_depth++;
785 0           scan_dir($inflate_dir);
786              
787 0           chdir($curr_inflate_dir);
788 0           system("rm -rf $inflate_dir/*");
789             }
790              
791             }
792              
793 0           return SUCCESS;
794             }
795              
796 0 0         if ( $ruleset->{'LINE'} ) {
797 0           check_line($h_file, $binary);
798             }
799              
800 0           return SUCCESS;
801             }
802              
803             sub check_line {
804 0   0 0 0   my $h_file = shift || return SUCCESS;
805 0           my $binary = shift;
806              
807 0   0       my $rules = $ruleset->{'LINE'}
808             || return SUCCESS;
809              
810 0           my $fname = $h_file->{'name'};
811 0           my $fpath = $h_file->{'path'};
812              
813 0           local *FILE;
814              
815 0 0         if ( not $binary ) {
816 0   0       open FILE, $fpath ||
817             croak "could not open file $fpath, $!";
818             }
819             else {
820 0   0       open FILE, "-|", "$cmd_strings $fpath" ||
821             croak "could not get content from $fpath by using $cmd_strings, $!";
822              
823 0 0         printing("try using $cmd_strings on the file $h_file->{'path'}")
824             if ( $options->{'verbose'} == VERBOSE_CHATTY );
825             }
826              
827 0           my $line_no = 0;
828 0           LINE: while() {
829 0           chomp;
830 0           my $line = $_;
831 0           $line_no++;
832              
833 0           RULE: foreach my $rule ( @$rules ) {
834 0           my ( $func, $cert, $args )
835             = @$rule{qw/func cert args/};
836              
837             {
838 1     1   8 no strict 'refs';
  1         2  
  1         1491  
  0            
839 0           my $msg = $func->( $h_file, $cert, $args, $line, $line_no );
840              
841 0 0         if ($msg) {
842              
843 0 0 0       if ( defined $anchor_file and
844             $anchor_file =~ /^(.*)\.[^.]*$/ ) {
845              
846 0           my $anchor = $1;
847              
848 0 0         my $path = ( $h_file->{'path'} =~
849             /^$options->{'working_dir'}\/(.*)$/ ) ?
850             $1 : '';
851              
852 0           $path = $anchor . "/inflated_dir/" . $path;
853 0           File::OSS::Scan::Matches->add(
854             {
855             'name' => $h_file->{'name'},
856             'path' => $path,
857             'size' => $h_file->{'size'},
858             'mtime' => $h_file->{'mtime'},
859             },
860             $func, $cert, join(' ', @$args), $msg
861             );
862             }
863             else {
864 0           File::OSS::Scan::Matches
865             ->add( $h_file, $func, $cert, join(' ', @$args), $msg);
866             }
867              
868             # ignore other rules on the same line
869 0           next LINE;
870             }
871             }
872             }
873             }
874              
875 0           close(FILE);
876              
877 0           return SUCCESS;
878             }
879              
880             sub filename_match {
881 0     0 1   my ( $h_file, $cert, $args ) = @_;
882 0           my $msg = '';
883              
884 0           my $fname = $h_file->{'name'};
885 0           my $fpath = $h_file->{'path'};
886              
887 0           my $sname = $args->[0];
888              
889 0 0         if ( $fname =~ /^\Q$sname\E$/i ) {
890              
891 0           (caller(0))[3] =~ /\:{2}(\w+)$/;
892              
893 0 0         printing("${cert}% matched: $1 " .
894             join(' ', @$args) )
895             if ( $options->{'verbose'} != VERBOSE_SILIENT );
896              
897 0           $msg = "found file $fname ($fpath)";
898 0 0         printing(' ' x 4 . $msg)
899             if ( $options->{'verbose'} != VERBOSE_SILIENT );
900             }
901              
902 0           return $msg;
903             }
904              
905             sub content_match {
906 0     0 1   my ( $h_file, $cert, $args, $line, $line_no ) = @_;
907 0           my $msg = '';
908              
909 0           my $fname = $h_file->{'name'};
910 0           my $fpath = $h_file->{'path'};
911              
912 0           (caller(0))[3] =~ /\:{2}(\w+)$/;
913 0           my $func = $1;
914              
915 0           my $pattern = $args->[0];
916              
917 0 0         $pattern = '\b' . $pattern . '\b'
918             if ( $pattern =~ /^\w+$/ );
919              
920 0           my $case_sensitive = UNI_FALSE;
921 0 0         $case_sensitive = $args->[1] ? UNI_TRUE : UNI_FALSE
    0          
922             if ( defined $args->[1] );
923              
924 0 0         if ( $case_sensitive ) {
925 0 0         if ( $line =~ /$pattern/ ) {
926 0 0         printing("${cert}% matched: $func " .
927             join(' ', @$args) )
928             if ( $options->{'verbose'} != VERBOSE_SILIENT );
929              
930 0           $msg = "found matched content in $fname ($fpath)";
931 0 0         printing(' ' x 4 . $msg)
932             if ( $options->{'verbose'} != VERBOSE_SILIENT );
933 0           $msg = "[line:$line_no]$line";
934 0 0         printing(' ' x 4 . $msg)
935             if ( $options->{'verbose'} != VERBOSE_SILIENT );
936              
937             }
938             }
939             else {
940 0 0         if ( $line =~ /$pattern/i ) {
941 0 0         printing("${cert}% matched: $func " .
942             join(' ', @$args) )
943             if ( $options->{'verbose'} != VERBOSE_SILIENT );
944              
945 0           $msg = "found matched content in $fname ($fpath)";
946 0 0         printing(' ' x 4 . $msg)
947             if ( $options->{'verbose'} != VERBOSE_SILIENT );
948 0           $msg = "[line:$line_no]$line";
949 0 0         printing(' ' x 4 . $msg)
950             if ( $options->{'verbose'} != VERBOSE_SILIENT );
951              
952             }
953             }
954              
955 0           return $msg;
956             }
957              
958             sub copyright_match {
959 0     0 1   my ( $h_file, $cert, $args, $line, $line_no ) = @_;
960 0           my $msg = '';
961              
962 0           my $fname = $h_file->{'name'};
963 0           my $fpath = $h_file->{'path'};
964              
965 0           (caller(0))[3] =~ /\:{2}(\w+)$/;
966 0           my $func = $1;
967              
968 0           my @exclude_list = @$args;
969 0           my $matched = UNI_FALSE;
970              
971 0 0 0       $matched = UNI_TRUE
      0        
      0        
      0        
972             if (
973             $line =~ /Copyright\s*\(C\)\s*\w+/i or
974             $line =~ /Copyright\s*[\d\-]+\s*\w+/i or
975             $line =~ /Copyright.*Software/i or
976             $line =~ /Copyright.*All\srights\sreserved/i or
977             $line =~ /\@Copyright/i
978             );
979              
980 0 0         if ( $matched ) {
981 0           foreach my $exclude_ext ( @exclude_list ) {
982              
983 0 0         if ( $line =~ /\b$exclude_ext\b/i ) {
984 0 0         if ( $options->{'verbose'} == VERBOSE_CHATTY ) {
985 0           printing("excluded pattern found $exclude_ext, so not a match");
986 0           printing(' ' x 4 . "[line:$line_no]$line");
987             }
988              
989             # return no match
990 0           return undef;
991             }
992             }
993             }
994              
995 0 0         if ( $matched ) {
996 0 0         printing("${cert}% matched: $func " .
997             join(' ', @$args) )
998             if ( $options->{'verbose'} != VERBOSE_SILIENT );
999              
1000 0           $msg = "found matched copyright in $fname ($fpath)";
1001 0 0         printing(' ' x 4 . $msg)
1002             if ( $options->{'verbose'} != VERBOSE_SILIENT );
1003 0           $msg = "[line:$line_no]$line";
1004 0 0         printing(' ' x 4 . $msg)
1005             if ( $options->{'verbose'} != VERBOSE_SILIENT );
1006              
1007             }
1008              
1009 0           return $msg;
1010             }
1011              
1012              
1013              
1014              
1015              
1016              
1017              
1018             1;
1019