File Coverage

blib/lib/Image/Density/TIFF.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1             #
2             # Image::Density::TIFF
3             #
4             # Calculate the density of a TIFF image in a way that helps estimate scanned
5             # image quality.
6             #
7             # Copyright (C) 2003-2012 Gregor N. Purdy, Sr. All rights reserved.
8             # This program is free software. It is subject to the same license as Perl.
9             #
10             # $Id$
11             #
12              
13             =head1 NAME
14              
15             Image::Density::TIFF
16              
17             =head1 SYNOPSIS
18              
19             use Image::Density::TIFF;
20             print "Density: %f\n", tiff_density("foo.tif"); # single-page
21             print "Densities: ", join(", ", tiff_densities("bar.tif")), "\n"; # multi-page
22              
23             =head1 DESCRIPTION
24              
25             A trivial density calculation would count the number of black pixels and
26             divide by the total number of pixels. However, it would produce misleading
27             results in the case where the image contains one or more target areas with
28             scanned content and large blank areas in between (imagine a photocopy of a
29             driver's license in the middle of a page).
30              
31             The metric implemented here estimates the density of data where there I
32             data, and has a
33             reasonable correlation with goodness as judged by humans. That is, if you
34             let a human look at a set of images and judge quality, the density values for
35             those images as calculated here tend to correlate well with the human
36             judgement (densities that are too high or too low represent "bad" images).
37              
38             This algorithm is intended for use on bitonal TIFF images, such as those from
39             scanning paper documents.
40              
41             =head2 The calculation
42              
43             We omit the margins because there is likely to be noise there, such as black
44             strips due to page skew. This does admit the possibility that we are skipping
45             over something important, but the margin skipping here worked well on the
46             test images.
47              
48             Leading and trailing white on a row are omitted from counting, as are runs of
49             white at least as long as the margin width. This helps out when we have images
50             with large blank areas, but decent density within the areas filled in, which
51             is what we really care about.
52              
53             =head1 AUTHOR
54              
55             Gregor N. Purdy, Sr.
56              
57             =head1 COPYRIGHT
58              
59             Copyright (C) 2003-2012 Gregor N. Purdy, Sr. All rights reserved.
60              
61             =head1 LICENSE
62              
63             This program is free software. Its use is subject to the same license as Perl.
64              
65             =cut
66              
67 1     1   632 use strict;
  1         2  
  1         34  
68 1     1   6 use warnings 'all';
  1         1  
  1         56  
69              
70             package Image::Density::TIFF;
71              
72 1     1   2780 use MAS::TIFF::File;
  0            
  0            
73              
74             our $VERSION = '0.3';
75              
76             BEGIN {
77             use Exporter;
78             use vars qw(@ISA @EXPORT);
79             @ISA = qw(Exporter);
80             @EXPORT = qw(&tiff_density &tiff_densities);
81             }
82              
83             my $MARGIN_FACTOR = 20;
84              
85             sub tiff_directory_density {
86             use integer;
87            
88             my $t = shift;
89            
90             die "Could not open file for reading" unless defined $t;
91            
92             my $bps = $t->bits_per_sample;
93            
94             die "Could not determine TIFF bits per sample file for reading" unless defined $bps;
95            
96             die "Cannot process TIFF files with more than on ebit per sample!" unless $bps == 1;
97            
98             my $spp = $t->samples_per_pixel;
99            
100             my $w = $t->image_width;
101             my $h = $t->image_length;
102            
103             my $w_margin = $w / $MARGIN_FACTOR;
104             my $h_margin = $h / $MARGIN_FACTOR;
105            
106             my $black = 0;
107             my $white = 0;
108            
109             #
110             # We omit the top and bottom margins because there is likely to be noise there,
111             # such as black strips due to page skew.
112             #
113             # We have to read the first h_margin rows, rather than skip them, because the
114             # TIFF file's compression algorithm might not support random access.
115             #
116              
117             my $scan_line_reader = $t->scan_line_reader;
118            
119             for (my $i = $h_margin; $i < ($h - $h_margin); $i++) {
120             #
121             # We omit the left and right margins because there is likely to be noise there,
122             # such as black strips due to page skew.
123             #
124             # The setup of last_sample and run_length simulates a leading white run long
125             # enough that any actual leading white, no matter how much, will be omitted.
126             #
127              
128             my $row_black = 0;
129             my $row_white = 0;
130             my $last_sample = 0;
131             my $run_length = $w_margin;
132            
133             my $scan_line = &$scan_line_reader($i);
134            
135             for (my $j = $w_margin; $j < ($w - $w_margin); $j++) {
136             my $byte_index = $j / 8;
137             my $byte = vec($scan_line, $byte_index, 8);
138             my $bit_index = 7 - ($j % 8);
139             my $bit = ($byte >> $bit_index) & 0x01;
140             my $sample = !$bit;
141            
142             #
143             # We don't count row white until we see black. This omits leading and trailing
144             # white on the row, which helps out when we have images with large blank areas,
145             # but decent density within the areas filled in, which is what we really care
146             # about.
147             #
148             # We also don't count row_white when it is greater than the margin, since that
149             # amounts to a "large" empty space, and we really want the density of *data*,
150             # where there *is* data.
151             #
152            
153             if ($sample == $last_sample) {
154             $run_length++;
155             }
156             else {
157             if ($run_length < $w_margin) {
158             if ($last_sample) {
159             $row_black += $run_length;
160             }
161             else {
162             $row_white += $run_length;
163             }
164             }
165              
166             $last_sample = $sample;
167             $run_length = 1;
168             }
169             }
170            
171             if ($run_length < $w_margin) {
172             if ($last_sample) {
173             $row_black += $run_length;
174             }
175              
176             # We don't add trailing white runs to the row's total
177             }
178            
179             $white += $row_white;
180             $black += $row_black;
181             }
182            
183             my $density;
184            
185             if ($black + $white > 0) {
186             no integer;
187             $density = $black / ($black + $white);
188             }
189             else {
190             $density = -1.0;
191             }
192              
193             return $density;
194             }
195              
196             sub tiff_density {
197             my $file_name = shift;
198            
199             my $tiff = MAS::TIFF::File->new($file_name);
200              
201             my ($first_ifd, ) = $tiff->ifds;
202            
203             my $density = tiff_directory_density($first_ifd);
204            
205             $tiff->close;
206            
207             undef $tiff;
208            
209             return $density;
210             }
211              
212             sub tiff_densities {
213             my $file_name = shift;
214            
215             my $tiff = MAS::TIFF::File->new($file_name);
216              
217             my @densities = map { $_ = tiff_directory_density($_) } $tiff->ifds;
218            
219             $tiff->close;
220            
221             undef $tiff;
222            
223             return @densities;
224             }
225              
226             1;
227