File Coverage

blib/lib/Module/MetaInfo/AutoGuess.pm
Criterion Covered Total %
statement 131 161 81.3
branch 51 88 57.9
condition 1 3 33.3
subroutine 14 17 82.3
pod 4 4 100.0
total 201 273 73.6


line stmt bran cond sub pod time code
1             package Module::MetaInfo::AutoGuess;
2             $VERSION = "0.01";
3 2     2   801 use warnings;
  2         3  
  2         60  
4 2     2   10 use strict;
  2         5  
  2         57  
5 2     2   12 use Carp;
  2         6  
  2         147  
6 2     2   10 use Cwd;
  2         4  
  2         146  
7 2     2   872 use Symbol;
  2         996  
  2         113  
8 2     2   11 use File::Find;
  2         4  
  2         111  
9 2     2   1125 use Module::MetaInfo::_Extractor;
  2         5  
  2         71  
10              
11 2     2   14 use vars qw(@ISA);
  2         2  
  2         6340  
12              
13             @ISA= qw(Module::MetaInfo::_Extractor);
14              
15             =head1 NAME
16              
17             Module::MetaInfo::AutoGuess - Guess meta information from perl modules
18              
19             =head1 USAGE
20              
21             use Module::MetaInfo::AutoGuess;
22             $mod=new Module::MetaInfo::AutoGuess(perl-module-file.tar.gz);
23             $desc=$mod->description();
24              
25             =head1 DESCRIPTION
26              
27             This module provides functions for guessing meta information from old
28             perl modules which have no explicit meta information storage. The aim
29             is to provide a transition mechnism through which meta information can
30             be supported for the majority of perl modules without any extra work
31             from the module maintainers.
32              
33             =head1 FUNCTIONS
34              
35             The meta information which should be generated can be worked out from
36             the needs of packaging systems such as RPM (RedHat Package Manager:
37             for RedHat Linux and related Linux distributions), DPKG (Debian
38             Packager - for Debian GNU/Linux).
39              
40             =head2 description
41              
42             This function tries to get a description for the module. It does this
43             by searching for files which might have description information then
44             looking in each one in order (from the most likely to the least -
45             heuristic guessing) until it finds something which seems to be a
46             reasonable description.
47              
48             The description returned should be treated as plain text. In the
49             current version however, it may contain unconverted POD directives.
50             In future these will probably be converted to text. Possibly some
51             options should be given about the kind of text to be produced?
52              
53             =head2 docs
54              
55             This function returns an array (or reference to an array in a scalar
56             context) which contains all of the files in the perl module which are
57             thought to be documentation.
58              
59             =head1 UNIMPLEMENTED FUNCTIONS
60              
61             Currently there are no dependency related functions (requires /
62             provides / suggests). The first two of these can be taken from
63             programs included in RPM > 3.0.4 if they are needed. Please indicate
64             that you need this to the author. There isn't a function to return a
65             module summary. This would be a one line summary of the function.
66             Probably best would be to take this from the CPAN modules.txt file.
67              
68             =cut
69              
70             # sub ProcessFileNames
71             # looks through a list of candidate files names and orders them
72             # according to desirability then cuts off those that look likely
73             # to do more harm than good.
74              
75             # N.B. function call to here is done a bit wierdly...
76              
77             sub _process_file_names {
78 4     4   17 my ($self, $doclist) = @_;
79 4 50 33     90 die "function miscall" unless (ref $self && (ref $doclist eq "ARRAY"));
80              
81 4         55 print STDERR "Sorting different perl file possibilities\n"
82 4 100       11 if ${$self->{_verbose}};
83              
84 4         355 local $::simplename=$self->{package_name};
85 4         56 local ($::A, $::B);
86 4         58 $::simplename =~ s,[-/ ],_,g;
87 4         15 $::simplename =~ tr/[A-Z]/[a-z]/;
88              
89             #Ordering Heuristic
90             #
91             #best: the description in the module named the same as the package
92             #
93             #next: documentation files
94             #
95             #next: files named as package
96             #finally: prefer .pod to .pm to .pl
97             #
98             #N.B. sort high to low not low to high
99              
100 6         25 my @sort_list = sort {
101 4         200 local $::res=0;
102 6         13 $::A = $a;
103 6         10 $::B = $b;
104 6         80 $::A =~ s,[-/ ],_,g;
105 6         16 $::A =~ tr/[A-Z]/[a-z]/;
106 6         68 $::B =~ s,[-/ ],_,g;
107 6         14 $::B =~ tr/[A-Z]/[a-z]/;
108              
109             #bundles seem a bad place to look from our limited experience
110             #this might be better as an exception on the next rule??
111 6 50       72 return $::res
112             if ( $::res = - (($::B =~ m/(^|_)bundle_/ )
113             <=> ($::A =~ m/(^|_)bundle_/ )) ) ;
114 6 50       410 return $::res
115             if ( $::res = (($::B =~ m/$::simplename.(pm|pod|pod)/ )
116             <=> ($::A =~ m/$::simplename.(pm|pod|pod)/ )) ) ;
117 6 50       38 return $::res
118             if ( $::res = (($::B =~ m/^readme/ )
119             <=> ($::A =~ m/^readme/ )) ) ;
120 6 50       24 return $::res
121             if ( $::res = (($::B =~ m/.pod$/ )
122             <=> ($::A =~ m/.pod$/ )) ) ;
123 6 100       51 return $::res
124             if ( $::res = (($::B =~ m/.pm$/ )
125             <=> ($::A =~ m/.pm$/ )) ) ;
126 4 50       33 return $::res
127             if ( $::res = (($::B =~ m/.pl$/ )
128             <=> ($::A =~ m/.pl$/ )) ) ;
129 4 50       209 return $::res
130             if ( $::res = (($::B =~ m/$::simplename/ )
131             <=> ($::A =~ m/$::simplename/ )) ) ;
132 4         27 return length $::B <=> length $::A;
133             } @$doclist;
134              
135 4         27 print STDERR "Checking which fies could really be used\n"
136 4 100       14 if ${$self->{_verbose}};
137 4         28 my $useful=0; #assume first always good
138             CASE: {
139 4 100       10 $#sort_list == 1 && do {
  4         27  
140 2         15 $useful=1;
141 2         15 last CASE;
142             };
143 2         3 while (1) {
144 6 100       19 $useful==$#sort_list and last CASE;
145             #non perl files in the list must be there for some reason
146 4 50       15 ($sort_list[$useful+1] =~ m/\.p(od|m|l)$/) or do {$useful++; next};
  4         8  
  4         6  
147 0         0 my $cmp_name=$sort_list[$useful+1];
148 0         0 $cmp_name =~ s,[-/ ],_,g;
149 0         0 $cmp_name =~ tr/[A-Z]/[a-z]/;
150             #perl files should look something like the package name???
151 0 0       0 ($cmp_name =~ m/$::simplename/) && do {$useful++; next};
  0         0  
  0         0  
152 0         0 last CASE;
153             }
154             }
155 4         18 $#sort_list = $useful;
156              
157 4         29 print STDERR "Description file list is as follows:\n " ,
158 4 100       9 join ("\n ", @sort_list), "\n" if ${$self->{_verbose}};
159              
160             #FIXME: ref return would be more efficient
161 4         21 return \@sort_list;
162             }
163              
164              
165             # sub _check_perl_prog_for_desc
166              
167             # given a documentation file, see if we can extract a description from it
168              
169             sub _check_doc_file_for_desc {
170 0     0   0 my $self=shift;
171 0         0 my $filename=shift;
172 0         0 my $fh = Symbol::gensym();
173 0         0 print STDERR "Try to use $filename as description\n"
174 0 0       0 if ${$self->{_verbose}};
175 0 0       0 open($fh, "<$filename") || die "Failed to open $filename: $!";
176 0         0 my $desc;
177 0         0 my $linecount=1;
178 0         0 LINE: while ( my $line=<$fh> ) {
179 0         0 $desc .= $line;
180 0         0 $linecount++;
181 0 0       0 $linecount > 30 && last LINE;
182             }
183 0 0       0 close($fh) or die "Failed to close $filename $!";
184             #FIXME: quality check
185 0 0       0 $linecount > 2 or return undef;
186 0 0       0 return $desc if ( $desc );
187             }
188              
189              
190             # sub _check_perl_prog_for_desc
191              
192             # given a valid perl program see if there is a valid description in it.
193              
194             sub _check_perl_prog_for_desc {
195 2     2   8 my $self=shift;
196 2         5 my $filename=shift;
197 2         4 my $desc="";
198 2         46 my $fh = Symbol::gensym();
199 2         19 print STDERR "Try to use $filename as description\n"
200 2 100       73 if ${$self->{_verbose}};
201 2 50       298 open($fh, $filename) || die "Failed to open $filename: $!";;
202              
203 2         6 my $linecount=1;
204 2         112 LINE: while (my $line=<$fh>){
205 54 100       179 ($line =~ m/^=head1[\t ]+DESCRIPTION/) and do {
206 2         13 while ( $line=<$fh> ) {
207 41 100       366 ($line =~ m/^=(head1)|(cut)/) and last LINE;
208 40         50 $desc .= $line;
209 40         35 $linecount++;
210 40 100       105 $linecount > 30 && last LINE;
211             }
212             };
213             #tests to see if the descripiton is good enough
214             #FIXME: mentions package name?
215             }
216 2 50       38 close($fh) or die "Failed to close $filename $!";
217 2 50       29 ( $desc =~ m/(....\n.*){3}/m ) and do {
218             #Often descriptions don't say the name of the module and
219             #furthermore they always assume that we know they are a perl
220             #module so put in a little header.
221 2         14 $desc =~ s/^\s*\n//;
222 2         11 $desc="This package contains the perl module " .
223             $self->{package_name} . ".\n\n" . $desc;
224 2 100       5 print STDERR "Found description in $filename\n" if ${$self->{_verbose}};
  2         38  
225 2         14 return $desc;
226             };
227 0 0       0 print STDERR "No description found in $filename\n" if ${$self->{_verbose}};
  0         0  
228 0         0 return undef;
229             }
230              
231              
232              
233             #=head1 $self->_check_files_for_desc()
234             #
235             #this function looks at a files in a list and for each in order identifies
236             #if it has content that could be used as a module description.
237             #
238             #=cut
239              
240             sub _check_files_for_desc {
241              
242 4     4   33 my $doc_list=&_process_file_names;
243              
244 4         8 my $self = shift;
245 4         6 my $desc;
246              
247 4         23 FILE: foreach my $filename ( @$doc_list ){
248 4         33 -e $filename or
249 6 50       366 do {print STDERR "no $filename file\n" if ${$self->{'_verbose'}};
  4 100       23  
250 4         23 next FILE};
251 2 50       23 $filename =~ m/\.p(od|m|l)$/ && do {
252 2         38 $desc=$self->_check_perl_prog_for_desc($filename);
253 2 50       10 $desc && last FILE;
254 0         0 next FILE;
255             };
256 0         0 $desc=$self->_check_doc_file_for_desc($filename);
257 0 0       0 last FILE if $desc;
258             }
259 4         23 return $desc;
260             }
261              
262             =head2 description
263              
264             This function finds and returns a description of the perl module.
265              
266             In the current implementation we use a set of wierd heuristics to
267             guess what is the best description available.
268              
269             When creating an rpm, for example, it's a good idea to proceed the
270             description with something to the effect of:
271              
272             this rpm contains the perl module XXX
273              
274             where XXX is the name you are using for the perl module.
275              
276             =cut
277              
278             sub description {
279 4     4 1 24 my $self=shift;
280 4 50       19 croak "$self must be a reference" unless ref $self;
281 4 50       80 $self->setup() unless $self->{setup};
282 4         300 my $desc = "";
283 4 100       15 print STDERR "Hunting for files in distribution\n" if ${$self->{'_verbose'}};
  4         57  
284              
285             #Files for use for a description. Names are relative to package
286             #base. Are there more names which work good? BLURB? INTRO?
287              
288 4         28 my (@doc_list) = ( $self->{expand_dir} ."/". "README" ,
289             $self->{expand_dir} ."/". "DESCRIPTION" );
290              
291             #we just use absolute paths
292             # my $dirpref = $self->{expand_dir};
293              
294             my $handler=sub {
295 39 100   39   2927 m/\.p(od|m|l)$/ or return;
296 2         7 my $name=$File::Find::name;
297             # $name =~ s/^$dirpref//;
298 2         91 push @doc_list, $name;
299 4         96 };
300 4         1062 &File::Find::find($handler, $self->{expand_dir});
301              
302 4         39 $desc=$self->_check_files_for_desc(\@doc_list);
303              
304 4 100       17 unless ( $desc ) {
305 2         280 warn "Failed to generate any description for "
306             . $self->{package_name} . ".\n";
307 2         34 return undef;
308             }
309              
310             #FIXME: what's the best way to clean up whitespace? Is it needed at all?
311             #bear in mind that both perl descriptions and rpm special case
312             #indentation with white space to mean something like \verbatim
313              
314 2         49 $desc=~s/^[\t ]*//mg; #space at the start of lines
315 2         368 $desc=~s/[\t ]*$//mg; #space at the end of lines
316 2         11 $desc=~s/^[_\W]*//s ; #blank punctuation lines at the start
317 2         253 $desc=~s/\s*$//; #blank lines at the end.
318              
319 2         25 return $desc;
320             }
321              
322             =head2 doc_files
323              
324             We give a list of files or directories which it is good to treat as
325             documentation and include within any binary distribution.
326              
327             We like to include usage documentation, copyrights and release
328             information. Probably we don't care too much about implementation
329             documentation. Right now we just doo fairly simple file name guessing
330             in the top level directory of the distribution.
331              
332             doc_files returns a list of files to a list context and a reference to
333             an array of files to a scalar context.
334              
335             =cut
336              
337             my $docre='(?x) (^README)
338             |(^C((?i)OPY((ING)|(RIGHT))))|(LICENSE$)
339             |(^doc(s|u.*)?)
340             |(^FAQ)
341             |(^(?i)notes$)
342             |(^(?i)todo$)
343             |(^(Changes)|(NEWS)$)
344             |((?i)examples?)';
345              
346             sub doc_files() {
347 2     2 1 36 my $self = shift;
348 2 50       10 $self->setup() unless $self->{setup};
349 2         8369 my $old_dir = Cwd::cwd();
350 2         35 my @docs = ();
351 2         20 my $return="";
352 2 50       136 opendir (BASEDIR , $self->{expand_dir})
353             || die "can't open package main directory $self->{expand_dir} $!";
354 2         73 my @files=readdir (BASEDIR);
355 2         27 @docs= grep {m/$docre/i} @files;
  29         457  
356 2         68 print STDERR "Found the following documentation files\n" ,
357 2 100       7 join (" " , @docs ), "\n" if ${$self->{'_verbose'}};
358 2 50       66 return wantarray ? @docs : \@docs;
359             }
360              
361             =head2 requires / provides
362              
363             These functions would try to guess which perl modules are needed to
364             run this one / which perl libraries this module provides.. They
365             aren't implmeneted yet.
366              
367             Currently we haven't distinguished pre-requisite modules needed to run
368             the module from ones needed merely to install it.
369              
370             There is code in perl.prov and perl.req in the lib directory of RPM
371             (the RedHat Package Manager) which can determine this information,
372             however, it requires the module to have been build correctly and
373             installed in a temporary directory hierarchy.
374              
375             =cut
376              
377             sub requires {
378 0     0 1   die "Module::Metainfo::requires() isn't yet implemented";
379             }
380              
381             sub provides {
382 0     0 1   die "Module::Metainfo::provides() isn't yet implemented";
383             }
384              
385             =head1 FUTURE FUNCTIONS
386              
387             There are a number of other things which should be implemented. These
388             can be guessed from looking at the possible meta-information which can
389             be stored in the RPM or DPG formats, for example. Examples include
390              
391             =over 4
392              
393             =item *
394              
395             copyright - GPL / as perl / redistributable / etc.
396              
397             =item *
398              
399             application area - Database / Internet / WWW / HTTP etc.
400              
401             =item *
402              
403             suggests - related applications
404              
405             =back
406              
407             In many cases this data is generated currently by package building
408             tools simply by using a fixed string. The function should do better
409             than that in almost all cases or else it is't worth having...
410              
411             =head1 FUTURE DEVELOPMENT
412              
413             Incorporate a mechanism for deliberately storing meta information
414             inside perl modules, e.g. by adding a directory structure inside. I
415             already have a prototype for this included into makerpm.
416              
417             =head1 COPYRIGHT
418              
419             You may distribute under the terms of either the GNU General
420             Public License or the Artistic License, as specified in the
421             Perl README.
422              
423             =head1 AUTHOR
424              
425             Michael De La Rue.
426              
427             =head1 SEE ALSO
428              
429             L
430              
431             =cut
432              
433             42;