| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# $Id: /mirror/perl/File-Extract/trunk/lib/File/Extract/PDF.pm 4210 2007-10-27T13:43:07.499967Z daisuke $ |
|
2
|
|
|
|
|
|
|
# |
|
3
|
|
|
|
|
|
|
# Copyright (c) 2005 Daisuke Maki |
|
4
|
|
|
|
|
|
|
# All rights reserved. |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package File::Extract::PDF; |
|
7
|
2
|
|
|
2
|
|
13
|
use strict; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
99
|
|
|
8
|
2
|
|
|
2
|
|
13
|
use base qw(File::Extract::Base); |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
200
|
|
|
9
|
2
|
|
|
2
|
|
3830
|
use CAM::PDF; |
|
|
2
|
|
|
|
|
118249
|
|
|
|
2
|
|
|
|
|
89
|
|
|
10
|
2
|
|
|
2
|
|
30
|
use File::Extract::Result; |
|
|
2
|
|
|
|
|
25
|
|
|
|
2
|
|
|
|
|
312
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
2
|
|
|
2
|
1
|
9
|
sub mime_type { 'application/pdf' } |
|
13
|
|
|
|
|
|
|
sub extract |
|
14
|
|
|
|
|
|
|
{ |
|
15
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
16
|
0
|
|
|
|
|
|
my $file = shift; |
|
17
|
|
|
|
|
|
|
|
|
18
|
0
|
|
|
|
|
|
my $doc = CAM::PDF->new($file); |
|
19
|
0
|
|
|
|
|
|
my $text = ''; |
|
20
|
|
|
|
|
|
|
|
|
21
|
0
|
|
|
|
|
|
foreach my $p (1..$doc->numPages()) { |
|
22
|
0
|
|
|
|
|
|
$text .= $doc->getPageText($p); |
|
23
|
|
|
|
|
|
|
} |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
return File::Extract::Result->new( |
|
26
|
0
|
|
0
|
|
|
|
text => eval { $self->recode($text) } || $text, |
|
27
|
|
|
|
|
|
|
filename => $file, |
|
28
|
|
|
|
|
|
|
mime_type => $self->mime_type |
|
29
|
|
|
|
|
|
|
); |
|
30
|
|
|
|
|
|
|
} |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
1; |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
__END__ |