File Coverage

blib/lib/Sman/Man/Convert.pm
Criterion Covered Total %
statement 24 173 13.8
branch 0 82 0.0
condition 0 89 0.0
subroutine 8 13 61.5
pod 0 4 0.0
total 32 361 8.8


line stmt bran cond sub pod time code
1             package Sman::Man::Convert;
2             #$Id$
3              
4 2     2   465 use strict;
  2         2  
  2         45  
5 2     2   6 use warnings;
  2         1  
  2         35  
6 2     2   6 use Cwd;
  2         2  
  2         103  
7 2     2   731 use fields qw( config cache options );
  2         2070  
  2         8  
8 2     2   1084 use FreezeThaw qw( freeze thaw );
  2         6607  
  2         111  
9 2     2   1029 use Compress::Zlib qw ( compress uncompress );
  2         87590  
  2         146  
10 2     2   12 use Digest::MD5 qw( md5_hex );
  2         2  
  2         72  
11 2     2   1302 use File::Temp;
  2         12964  
  2         3152  
12              
13             # call like my $converter = new Sman::Man::Convert($config);
14             # or my $converter = new Sman::Man::Convert($config, { nocache=>1 } );
15             sub new {
16 0     0 0   my $proto = shift;
17 0   0       my $class = ref($proto) || $proto;
18 0           my $self = {};
19 0           bless ($self, $class);
20 0           $self->{config} = shift;
21 0   0       $self->{options} = shift || {};
22 0           my $cachepath = $self->{config}->GetConfigData("CACHEPATH");
23              
24 0 0         unless($self->{options}->{nocache}) {
25 0           eval {
26 0           require Sman::Man::Cache::FileCache;
27 0           $self->{cache} = new Sman::Man::Cache::FileCache ( $cachepath );
28             };
29 0 0         if ($@) {
30 0           warn "Couldn't create cachepath $cachepath, continuing: $@";
31 0           delete($self->{cache});
32             }
33             }
34            
35 0           return $self;
36             }
37             sub ClearCache {
38 0     0 0   my $self = shift;
39 0           my $cache = $self->{cache};
40 0           $cache->Clear();
41             }
42              
43             #returns a list of (ParserToUse, ContentRef)
44             sub ConvertManfile {
45 0     0 0   my ($self, $file) = @_;
46 0           my $filemtime = (stat($file)) [9];
47 0           my $hascache = defined($self->{cache});
48 0           my $rawdata;
49 0           my $config = $self->{config};
50 0           my $cachename = "[$Sman::Util::SMAN_DATA_VERSION] " . $config->GetConfigData("MANCMD") . " '$file'";
51 0 0 0       if ($hascache && defined($rawdata = $self->{cache}->get($cachename))) {
52 0           my ($mtime, $xml) = thaw( $rawdata );
53 0           $rawdata = ""; # return memory
54 0 0         if ($mtime) {
55 0 0         if ($filemtime < $mtime) {
56             # cached file is newer than source manfile! fetched from our cache.
57 0           $xml = uncompress($xml);
58 0 0         print "** Found data in cache for $file\n" if $self->{config}->GetConfigData("VERBOSE");
59 0           return("XML*", \$xml);
60             } else {
61 0 0         warn "** Data too old found in cache for $file" if $self->{config}->GetConfigData("DEBUG");
62             }
63             } else {
64 0 0         warn "** Data not found in cache for $file" if $self->{config}->GetConfigData("DEBUG");
65             }
66             }
67              
68 0           my $origdir;
69 0           my $hadwarning = 0;
70 0           my ($out, $err) = ("", "");
71 0 0         if ($file =~ /^(.*\/man\/)/) {
72 0           $origdir = Cwd::getcwd();
73 0           my $dir = $1;
74             #warn "** $0: chdir-ing to $dir\n";
75 0 0 0       chdir($dir) || ( (warn "** Couldn't cd to $dir!\n") && ($origdir = "") );
76             } else {
77 0 0         warn "** Couldn't find /man/... dir to cd into for $file" if $config->GetConfigData("VERBOSE");
78             }
79             ## DOCLIFTER HACKED IN FOR TESTING:
80             #if (-x "/usr/local/bin/doclifter" ) {
81             # my ($stdout, $stderr, $dollarquestionmark) =
82             # Sman::Util::RunCommand( "/bin/zcat -f '$file' | /usr/local/bin/doclifter" );
83             # $out = $stdout;
84             # if ($stderr) {
85             # warn "Error from doclifter: $stderr\n";
86             # }
87             #}
88             # DOCLIFTER HACKED IN FOR TESTING:
89 0 0         unless($out) {
90 0           my $hashref = $self->ConvertManfileManually($file);
91 0           $out = Sman::Util::MakeXML($hashref);
92             }
93              
94 0 0 0       if ($out && $hascache) { # only store the XML if we got a man page.
95 0           $self->{cache}->set($cachename, freeze( time(), compress($out) ));
96             warn "** Cached (mtime=$filemtime, bytes = " . length($out) . ") for $file"
97 0 0         if $self->{config}->GetConfigData("DEBUG");
98             }
99              
100 0 0         unless ($out) {
101 0           warn "** Couldn't get any data for $file!\n";
102 0           my %hhh = ();
103 0           $out = Sman::Util::MakeXML( \%hhh );
104             }
105 0 0         if ($origdir) { chdir($origdir) || warn "** Couldn't cd back to $origdir: $!"; }
  0 0          
106 0           return ("XML*", \$out);
107             }
108              
109              
110             sub ConvertManfileManually { # do it manually, if we can
111 0     0 0   my ($self, $file) = @_;
112 0           my ($manpage, $cur_content) = ('', '');
113 0           my ($cur_section,%h) = qw(NOSECTION);
114              
115 0           my $config = $self->{config};
116 0   0       my $man = $config->GetConfigData("MANCMD") || die "Couldn't get a man cmd: need MANCMD set.";
117 0   0       my $col = $config->GetConfigData("COLCMD") || "col -b";
118 0           my $warn = $config->GetConfigData("WARN");
119 0           my $debug = $config->GetConfigData("DEBUG");
120 0           my $autoconfiguring = $config->GetConfigData("AUTOCONFIGURING"); # internal flag
121              
122 0   0       my $tmpdir = $config->GetConfigData("TMPDIR") || "/tmp";
123              
124 0           my $testfile = $file;
125              
126            
127 0 0         print "** testfile starts out $testfile\n" if $debug;
128 0           $testfile =~ s/\.(gz|bz2)$//; # remove compression ending
129 0 0         print "** testfile is now $testfile\n" if $debug;
130 0           $testfile =~ s/\.((\d|\w)[^.]{0,3})$//; #remove .3-like ending
131 0 0         print "** testfile is now $testfile\n" if $debug;
132              
133 0           $testfile =~ m!man/man([^/]+) / (.+)? !x;
134             # above works for manpages like /usr/man/man1/ls.1.gz or
135             # (italian) /usr/share/man/it/man1/ls.1.gz
136             # changed to also work with /usr/X11R6/man/man7/X.Org.7
137              
138 0   0       my $cmd = $2 || $file;
139 0   0       my $sec = $1 || $3 || "";
140 0 0 0       warn "** Couldn't figure out cmd for $file" if ($warn && $cmd eq $file);
141 0 0 0       warn "** Couldn't figure out section for $file" if ($warn && $sec eq "");
142             #if ($sec =~ /^n$/i) { $sec = ""; }
143             # section 'n' doesn't work on some versions of osx (pre-10.4) and linux, but tk
144             # installs in places like /sw/share/man/mann/wm.n. So we ignore section 'n'.
145             # hm, now, in 10.4, section 'n' works (ala 'man n wm'). Apparently we should
146             # autoprobe the features of the local man command... (sigh... added to TODO list)
147 0           my $mancmd = $man;
148 0           $mancmd =~ s/%F/'$file'/;
149 0           $mancmd =~ s/%C/'$cmd'/;
150 0           $mancmd =~ s/%S/'$sec'/;
151 0 0         print qq{** Running "$mancmd"...\n} if ($config->GetConfigData("VERBOSE"));
152 0   0       my $timeout = $config->GetConfigData("CONVERSION_TIMEOUT") || 60;
153 0           my ($out, $err);
154 0           eval {
155 0     0     local $SIG{ALRM} = sub { die "ALARM\n"; };
  0            
156 0           alarm( $timeout );
157 0           ($out, $err) = Sman::Util::RunCommand($mancmd, $tmpdir);
158 0           alarm( 0 );
159             };
160 0 0         if ($@) {
161 0 0         if ($@ eq "ALARM\n") {
162 0           $err .= "\n(Conversion with $mancmd timed out after $timeout seconds\n";
163             } else {
164 0           die "$0: Error converting $file: $@\n";
165             }
166             } else {
167             }
168            
169            
170 0 0 0       if (!$autoconfiguring && $config->GetConfigData("WARN") && $err && (!$out || $warn)) {
      0        
      0        
      0        
171 0           warn "** Errors from '$mancmd'\n";
172 0           my @errlines = split(/\n/, $err);
173 0           for(@errlines) { warn "** MAN: $_\n"; }
  0            
174             }
175 0 0         if (!$out) {
176 0           return \%h; # no vals
177             }
178             #my $tmpname = "$tmpdir/sman-man-$$.tmp";
179 0           my ($tempfh, $tmpname) = File::Temp::tempfile( "sman-mantxt.XXXXX", DIR => $tmpdir);
180 0 0         Sman::Util::WriteFile($tmpname, \$out) ||
181             die "Couldn't write file $tmpname: $!";
182 0 0         if ($debug) {
183 0           print "DEBUG: $tmpname is\n" . Sman::Util::ReadFile($tmpname) . "\n";
184             }
185 0           my $colcmd = "cat $tmpname | $col ";
186              
187 0           my ($out2, $err2) = Sman::Util::RunCommand($colcmd, $tmpdir);
188 0 0         unlink($tmpname) || warn "Couldn't unlink $tmpname: $!";
189 0 0 0       if (!$autoconfiguring && $config->GetConfigData("WARN") && $err2 && (!$out2 || $warn)) {
      0        
      0        
      0        
190 0           warn "** Errors from '$colcmd'\n";
191 0           my @errlines = split(/\n/, $err2);
192 0           for(@errlines) { warn "** COL: $_\n"; }
  0            
193             }
194              
195 0           my @lines = split(/\n/, $out2);
196 0   0       my ($line1, $lineM) = (shift(@lines) || "", "");
197              
198             # parse manpage into sections
199 0           for my $l (@lines) {
200 0           $l =~ s/\s+$//; # remove trailing ws
201 0           $l =~ s/\s+/ /; # replace multiple ws
202 0           $l .= "\n";
203 0 0 0       next if (!defined($l) || $l =~ /^\s*$/); # skip ws
204 0 0         $line1 = $l if $line1 =~ /^\s*$/;
205 0           $manpage .= $lineM = $l;
206 0 0 0       if ($l =~ s/^(\w(\s|\w)+)// || $l =~ s/^\s*(NAME)//i){
207 0           chomp( my $sectitle = $1 ); # section title
208 0           $h{$cur_section} .= $cur_content;
209 0           $cur_content = "";
210 0           $cur_section = $sectitle; # new section name
211             }
212 0 0         $cur_content .= $l unless $l =~ /^\s*$/;
213             }
214 0           $h{$cur_section} .= $cur_content;
215              
216             # examine NAME, HEADer, FOOTer, (and
217             # maybe the filename too).
218              
219 0           @h{qw(A_AHEAD A_BFOOT)} = ($line1, $lineM);
220 0           my ($mn, $ms, $md) = ($cmd, $sec, "");
221             # NAME mn, SECTION ms, & DESCRIPTION md
222            
223 0           for(sort keys(%h)) { # A_AHEAD & A_BFOOT first
224 0           my ($k, $v) = ($_, $h{$_}); # copy key&val
225 0 0         if (/^A_(AHEAD|BFOOT)$/) { #get sec or cmd
    0          
226             # look for the 'section' in ()'s
227 0 0 0       if ($v =~ /\(([^)]+)\)\s*$/) {$ms||= $1;}
  0            
228             } elsif($k =~ s/^\s*(NOSECTION|NAME)\s*//) {
229 0   0       my $namestr = $v || $k; # 'cmd - a desc'
230 0 0         if ($namestr =~ /(\S.*)\s+--?\s*(.*)/) {
231 0   0       $mn ||= $1 || "";
      0        
232 0   0       $md ||= $2 || "";
      0        
233             } else { # that regex could fail. oh well.
234 0   0       $md ||= $namestr || $v;
      0        
235             }
236             }
237             }
238 0 0 0       if (!$ms && $file =~ m!/man/man([^/]*)/!) {
239 0           $ms = $1; # get sec from path if not found
240             }
241 0 0         ($mn = $file) =~ s!(^.*/)|(\.gz$)!! unless $mn;
242 0           $mn =~ s/\s+/ /g;
243 0           $ms =~ s/\s+/ /g;
244 0           $md =~ s/\s+/ /g;
245              
246 0           my %metas;
247 0           @metas{qw(swishtitle sec desc swishdefault manpage digest)} =
248             ($mn, $ms, $md, $manpage, $manpage, md5_hex($manpage));
249             #yes, manpage is twice.
250             # Once for swishdefault, and once for the manpage property
251             # Q: can one make swishdefault a Property?
252 0           return ( \%metas ); # return ref to hash.
253             }
254              
255             1;
256              
257             =head1 NAME
258              
259             Sman::Man::Convert - Convert manpages to XML for sman-update and sman
260              
261             =head1 SYNOPSIS
262              
263             # this module is intended for internal use by sman-update
264             my $smanconfig = new Sman::Config();
265             $smanconfig->ReadDefaultConfigFile();
266             my $converter = new Sman::Man::Convert($smanconfig);
267             #$converter->ClearCache(); # if you wish
268             my ($type, $outputref) =
269             $converter->ConvertManfile($manfile);
270            
271             =head1 DESCRIPTION
272              
273             Use MANCMD and COLCMD (see 'perldoc sman.conf') to convert
274             the man pages from ASCII into XML.
275              
276             =head1 AUTHOR
277              
278             Josh Rabinowitz
279              
280             =head1 SEE ALSO
281              
282             L, L, L
283              
284             =cut
285