File Coverage

blib/lib/Plucene/SearchEngine/Index/PDF.pm
Criterion Covered Total %
statement 6 13 46.1
branch 0 2 0.0
condition n/a
subroutine 2 3 66.6
pod 0 1 0.0
total 8 19 42.1


line stmt bran cond sub pod time code
1             package Plucene::SearchEngine::Index::PDF;
2 1     1   1081 use base 'Plucene::SearchEngine::Index::Base';
  1         3  
  1         81  
3             __PACKAGE__->register_handler("application/pdf", ".pdf");
4 1     1   6 use File::Temp qw/tmpnam/;
  1         2  
  1         215  
5              
6             =head1 NAME
7              
8             Plucene::SearchEngine::Index::PDF - Backend for parsing PDF
9              
10             =head1 DESCRIPTION
11              
12             This backend analyzes a PDF file for its textual content (using C)
13             and turns any metadata found in the PDF into Plucene fields.
14              
15             =cut
16              
17             sub gather_data_from_file {
18 0     0 0   my ($self, $filename) = @_;
19 0           my $html = tmpnam();
20 0           system("pdftotext", "-htmlmeta", $filename, $html);
21 0 0         return unless -e $html;
22 0           $self->Plucene::SearchEngine::Index::HTML::gather_data_from_file($html);
23 0           unlink $html;
24 0           return $self;
25             }
26              
27             1;