File Coverage

blib/lib/Sman/Man/Convert.pm
Criterion Covered Total %
statement 24 162 14.8
branch 0 78 0.0
condition 0 87 0.0
subroutine 8 12 66.6
pod 0 4 0.0
total 32 343 9.3


line stmt bran cond sub pod time code
1             package Sman::Man::Convert;
2             #$Id: Convert.pm,v 1.37 2008/05/25 02:40:59 joshr Exp $
3              
4 2     2   6562 use strict;
  2         5  
  2         67  
5 2     2   10 use warnings;
  2         3  
  2         60  
6 2     2   11 use Cwd;
  2         4  
  2         155  
7 2     2   24535 use fields qw( config cache options );
  2         3293  
  2         11  
8 2     2   2209 use FreezeThaw qw( freeze thaw );
  2         15691  
  2         171  
9 2     2   42638 use Compress::Zlib qw ( compress uncompress );
  2         431488  
  2         234  
10 2     2   21 use Digest::MD5 qw( md5_hex );
  2         5  
  2         110  
11 2     2   9222 use File::Temp;
  2         64723  
  2         5412  
12              
13             # call like my $converter = new Sman::Man::Convert($config);
14             # or my $converter = new Sman::Man::Convert($config, { nocache=>1 } );
15             sub new {
16 0     0 0   my $proto = shift;
17 0   0       my $class = ref($proto) || $proto;
18 0           my $self = {};
19 0           bless ($self, $class);
20 0           $self->{config} = shift;
21 0   0       $self->{options} = shift || {};
22 0           my $cachepath = $self->{config}->GetConfigData("CACHEPATH");
23              
24 0 0         unless($self->{options}->{nocache}) {
25 0           eval {
26 0           require Sman::Man::Cache::FileCache;
27 0           $self->{cache} = new Sman::Man::Cache::FileCache ( $cachepath );
28             };
29 0 0         if ($@) {
30 0           warn "Couldn't create cachepath $cachepath, continuing: $@";
31 0           delete($self->{cache});
32             }
33             }
34            
35 0           return $self;
36             }
37             sub ClearCache {
38 0     0 0   my $self = shift;
39 0           my $cache = $self->{cache};
40 0           $cache->Clear();
41             }
42              
43             #returns a list of (ParserToUse, ContentRef)
44             sub ConvertManfile {
45 0     0 0   my ($self, $file) = @_;
46 0           my $filemtime = (stat($file)) [9];
47 0           my $hascache = defined($self->{cache});
48 0           my $rawdata;
49 0           my $config = $self->{config};
50 0           my $cachename = "[$Sman::SMAN_DATA_VERSION] " . $config->GetConfigData("MANCMD") . " '$file'";
51 0 0 0       if ($hascache && defined($rawdata = $self->{cache}->get($cachename))) {
52 0           my ($mtime, $xml) = thaw( $rawdata );
53 0           $rawdata = ""; # return memory
54 0 0         if ($mtime) {
55 0 0         if ($filemtime < $mtime) {
56             # cached file is newer than source manfile! fetched from our cache.
57 0           $xml = uncompress($xml);
58 0 0         print "** Found data in cache for $file\n" if $self->{config}->GetConfigData("VERBOSE");
59 0           return("XML*", \$xml);
60             } else {
61 0 0         warn "** Data too old found in cache for $file" if $self->{config}->GetConfigData("DEBUG");
62             }
63             } else {
64 0 0         warn "** Data not found in cache for $file" if $self->{config}->GetConfigData("DEBUG");
65             }
66             }
67              
68 0           my $origdir;
69 0           my $hadwarning = 0;
70 0           my ($out, $err) = ("", "");
71 0 0         if ($file =~ /^(.*\/man\/)/) {
72 0           $origdir = Cwd::getcwd();
73 0           my $dir = $1;
74             #warn "** $0: chdir-ing to $dir\n";
75 0 0 0       chdir($dir) || ( (warn "** Couldn't cd to $dir!\n") && ($origdir = "") );
76             } else {
77 0 0         warn "** Couldn't find /man/... dir to cd into for $file" if $config->GetConfigData("VERBOSE");
78             }
79             ## DOCLIFTER HACKED IN FOR TESTING:
80             #if (-x "/usr/local/bin/doclifter" ) {
81             # my ($stdout, $stderr, $dollarquestionmark) =
82             # Sman::Util::RunCommand( "/bin/zcat -f '$file' | /usr/local/bin/doclifter" );
83             # $out = $stdout;
84             # if ($stderr) {
85             # warn "Error from doclifter: $stderr\n";
86             # }
87             #}
88             # DOCLIFTER HACKED IN FOR TESTING:
89 0 0         unless($out) {
90 0           my $hashref = $self->ConvertManfileManually($file);
91 0           $out = Sman::Util::MakeXML($hashref);
92             }
93              
94 0 0 0       if ($out && $hascache) { # only store the XML if we got a man page.
95 0           $self->{cache}->set($cachename, freeze( time(), compress($out) ));
96 0 0         warn "** Cached (mtime=$filemtime, bytes = " . length($out) . ") for $file"
97             if $self->{config}->GetConfigData("DEBUG");
98             }
99              
100 0 0         unless ($out) {
101 0           warn "** Couldn't get any data for $file!\n";
102 0           my %hhh = ();
103 0           $out = Sman::Util::MakeXML( \%hhh );
104             }
105 0 0         if ($origdir) { chdir($origdir) || warn "** Couldn't cd back to $origdir: $!"; }
  0 0          
106 0           return ("XML*", \$out);
107             }
108              
109              
110             sub ConvertManfileManually { # do it manually, if we can
111 0     0 0   my ($self, $file) = @_;
112 0           my ($manpage, $cur_content) = ('', '');
113 0           my ($cur_section,%h) = qw(NOSECTION);
114              
115 0           my $config = $self->{config};
116 0   0       my $man = $config->GetConfigData("MANCMD") || die "Couldn't get a man cmd: need MANCMD set.";
117 0   0       my $col = $config->GetConfigData("COLCMD") || "col -b";
118 0           my $warn = $config->GetConfigData("WARN");
119 0           my $debug = $config->GetConfigData("DEBUG");
120 0           my $autoconfiguring = $config->GetConfigData("AUTOCONFIGURING"); # internal flag
121              
122 0   0       my $tmpdir = $config->GetConfigData("TMPDIR") || "/tmp";
123              
124 0           my $testfile = $file;
125              
126            
127 0 0         print "** testfile starts out $testfile\n" if $debug;
128 0           $testfile =~ s/\.(gz|bz2)$//; # remove compression ending
129 0 0         print "** testfile is now $testfile\n" if $debug;
130 0           $testfile =~ s/\.((\d|\w)[^.]{0,3})$//; #remove .3-like ending
131 0 0         print "** testfile is now $testfile\n" if $debug;
132              
133 0           $testfile =~ m!man/man([^/]+) / (.+)? !x;
134             # above works for manpages like /usr/man/man1/ls.1.gz or
135             # (italian) /usr/share/man/it/man1/ls.1.gz
136             # changed to also work with /usr/X11R6/man/man7/X.Org.7
137              
138 0   0       my $cmd = $2 || $file;
139 0   0       my $sec = $1 || $3 || "";
140 0 0 0       warn "** Couldn't figure out cmd for $file" if ($warn && $cmd eq $file);
141 0 0 0       warn "** Couldn't figure out section for $file" if ($warn && $sec eq "");
142             #if ($sec =~ /^n$/i) { $sec = ""; }
143             # section 'n' doesn't work on some versions of osx (pre-10.4) and linux, but tk
144             # installs in places like /sw/share/man/mann/wm.n. So we ignore section 'n'.
145             # hm, now, in 10.4, section 'n' works (ala 'man n wm'). Apparently we should
146             # autoprobe the features of the local man command... (sigh... added to TODO list)
147 0           $man =~ s/%F/'$file'/;
148 0           $man =~ s/%C/'$cmd'/;
149 0           $man =~ s/%S/'$sec'/;
150 0           my $mancmd = "$man";
151 0 0         print qq{** Running "$mancmd"...\n} if ($config->GetConfigData("VERBOSE"));
152 0           my ($out, $err) = Sman::Util::RunCommand($mancmd, $tmpdir);
153 0 0 0       if (!$autoconfiguring && $config->GetConfigData("WARN") && $err && (!$out || $warn)) {
      0        
      0        
      0        
154 0           warn "** Errors from '$mancmd'\n";
155 0           my @errlines = split(/\n/, $err);
156 0           for(@errlines) { warn "** MAN: $_\n"; }
  0            
157             }
158 0 0         if (!$out) {
159 0           return \%h; # no vals
160             }
161             #my $tmpname = "$tmpdir/sman-man-$$.tmp";
162 0           my ($tempfh, $tmpname) = File::Temp::tempfile( "sman-mantxt.XXXXX", DIR => $tmpdir);
163 0 0         Sman::Util::WriteFile($tmpname, \$out) ||
164             die "Couldn't write file $tmpname: $!";
165 0 0         if ($debug) {
166 0           print "DEBUG: $tmpname is\n" . Sman::Util::ReadFile($tmpname) . "\n";
167             }
168 0           my $colcmd = "cat $tmpname | $col ";
169              
170 0           my ($out2, $err2) = Sman::Util::RunCommand($colcmd, $tmpdir);
171 0 0         unlink($tmpname) || warn "Couldn't unlink $tmpname: $!";
172 0 0 0       if (!$autoconfiguring && $config->GetConfigData("WARN") && $err2 && (!$out2 || $warn)) {
      0        
      0        
      0        
173 0           warn "** Errors from '$colcmd'\n";
174 0           my @errlines = split(/\n/, $err2);
175 0           for(@errlines) { warn "** COL: $_\n"; }
  0            
176             }
177              
178 0           my @lines = split(/\n/, $out2);
179 0   0       my ($line1, $lineM) = (shift(@lines) || "", "");
180              
181             # parse manpage into sections
182 0           for my $l (@lines) {
183 0           $l =~ s/\s+$//; # remove trailing ws
184 0           $l =~ s/\s+/ /; # replace multiple ws
185 0           $l .= "\n";
186 0 0 0       next if (!defined($l) || $l =~ /^\s*$/); # skip ws
187 0 0         $line1 = $l if $line1 =~ /^\s*$/;
188 0           $manpage .= $lineM = $l;
189 0 0 0       if ($l =~ s/^(\w(\s|\w)+)// || $l =~ s/^\s*(NAME)//i){
190 0           chomp( my $sectitle = $1 ); # section title
191 0           $h{$cur_section} .= $cur_content;
192 0           $cur_content = "";
193 0           $cur_section = $sectitle; # new section name
194             }
195 0 0         $cur_content .= $l unless $l =~ /^\s*$/;
196             }
197 0           $h{$cur_section} .= $cur_content;
198              
199             # examine NAME, HEADer, FOOTer, (and
200             # maybe the filename too).
201              
202 0           @h{qw(A_AHEAD A_BFOOT)} = ($line1, $lineM);
203 0           my ($mn, $ms, $md) = ($cmd, $sec, "");
204             # NAME mn, SECTION ms, & DESCRIPTION md
205            
206 0           for(sort keys(%h)) { # A_AHEAD & A_BFOOT first
207 0           my ($k, $v) = ($_, $h{$_}); # copy key&val
208 0 0         if (/^A_(AHEAD|BFOOT)$/) { #get sec or cmd
    0          
209             # look for the 'section' in ()'s
210 0 0 0       if ($v =~ /\(([^)]+)\)\s*$/) {$ms||= $1;}
  0            
211             } elsif($k =~ s/^\s*(NOSECTION|NAME)\s*//) {
212 0   0       my $namestr = $v || $k; # 'cmd - a desc'
213 0 0         if ($namestr =~ /(\S.*)\s+--?\s*(.*)/) {
214 0   0       $mn ||= $1 || "";
      0        
215 0   0       $md ||= $2 || "";
      0        
216             } else { # that regex could fail. oh well.
217 0   0       $md ||= $namestr || $v;
      0        
218             }
219             }
220             }
221 0 0 0       if (!$ms && $file =~ m!/man/man([^/]*)/!) {
222 0           $ms = $1; # get sec from path if not found
223             }
224 0 0         ($mn = $file) =~ s!(^.*/)|(\.gz$)!! unless $mn;
225 0           $mn =~ s/\s+/ /g;
226 0           $ms =~ s/\s+/ /g;
227 0           $md =~ s/\s+/ /g;
228              
229 0           my %metas;
230 0           @metas{qw(swishtitle sec desc swishdefault manpage digest)} =
231             ($mn, $ms, $md, $manpage, $manpage, md5_hex($manpage));
232             #yes, manpage is twice.
233             # Once for swishdefault, and once for the manpage property
234             # Q: can one make swishdefault a Property?
235 0           return ( \%metas ); # return ref to hash.
236             }
237              
238             1;
239              
240             =head1 NAME
241              
242             Sman::Man::Convert - Convert manpages to XML for sman-update and sman
243              
244             =head1 SYNOPSIS
245              
246             # this module is intended for internal use by sman-update
247             my $smanconfig = new Sman::Config();
248             $smanconfig->ReadDefaultConfigFile();
249             my $converter = new Sman::Man::Convert($smanconfig);
250             #$converter->ClearCache(); # if you wish
251             my ($type, $outputref) =
252             $converter->ConvertManfile($manfile);
253            
254             =head1 DESCRIPTION
255              
256             Use MANCMD and COLCMD (see 'perldoc sman.conf') to convert
257             the man pages from ASCII into XML.
258              
259             =head1 AUTHOR
260              
261             Josh Rabinowitz
262              
263             =head1 SEE ALSO
264              
265             L, L, L
266              
267             =cut
268