File Coverage

blib/lib/OCR/Naive.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1             # $Id: Naive.pm,v 1.13 2009/02/10 08:04:55 dk Exp $
2             package OCR::Naive;
3            
4 1     1   2194 use strict;
  1         3  
  1         35  
5 1     1   6 use warnings;
  1         2  
  1         28  
6 1     1   402 use Prima;
  0            
  0            
7             require Exporter;
8            
9             our $VERSION = '0.07';
10             use base qw(Exporter);
11            
12             our @EXPORT_OK = qw(
13             load_dictionary save_dictionary find_images
14             image2db_key suggest_glyph_order enhance_image
15             recognize
16             );
17             our %EXPORT_TAGS = ( all => \@EXPORT_OK);
18            
19             sub load_dictionary
20             {
21             my ( $file) = @_;
22            
23             return unless open DB, '<', $file;
24            
25             my %db;
26            
27             while () {
28             chomp;
29             s/^\s*\#.*//;
30             next unless length;
31             my %k = m/(\w+)='((?:\\[\\']|[^\\'])*)'\s*/g;
32             unless ( 4 == grep { exists $k{$_}} qw(w h t d)) {
33             warn ("malformed line in $file, line $.\n");
34             next;
35             }
36             s/\\(.)/$1/g for values %k;
37             if ( $k{w} <= 0 or $k{h} <= 0) {
38             warn ("malformed line in $file, line $.\n");
39             next;
40             }
41            
42             $k{d} =~ s/(..)/chr(hex($1))/ge;
43             my $i = Prima::Image-> create(
44             width => $k{w},
45             height => $k{h},
46             data => $k{d},
47             type => im::BW,
48             );
49             $db{$k{d}} = {
50             width => $k{w},
51             height => $k{h},
52             text => $k{t},
53             image => $i,
54             };
55             }
56            
57             close DB;
58             return \%db;
59             }
60            
61             sub save_dictionary
62             {
63             my ( $file, $db) = @_;
64            
65             return unless open DB, ">", $file;
66            
67             while ( my ( $k, $v) = each %$db) {
68             next unless defined $v-> {text};
69             my $t = $v->{text};
70             $k =~ s/(.)/sprintf("%02x",ord($1))/ges;
71             $t =~ s/(['\\])/\\$1/ge;
72             print DB "t='$t' w='$v->{width}' h='$v->{height}' d='$k'\n";
73             }
74             close DB;
75             return 1;
76             }
77            
78             sub find_images
79             {
80             my ( $image, $subimage, $multiple) = @_;
81            
82             my $G = $image-> data;
83             my $W = $image-> width;
84             my $w = $subimage-> width;
85             my $h = $subimage-> height;
86             my $bpp = ($image-> type & im::BPP) / 8;
87             die "won't do images with less than 256 colors"
88             if $bpp < 0;
89             if ( $subimage-> type != $image-> type) {
90             $subimage = $subimage-> dup;
91             $subimage-> type( $image-> type);
92             }
93             my $I = $subimage-> data;
94             my $gw = int(( $W * ( $image-> type & im::BPP) + 31) / 32) * 4;
95             my $iw = int(( $w * ( $subimage-> type & im::BPP) + 31) / 32) * 4;
96             my $ibw = $w * $bpp;
97             my $dw = $gw - $ibw;
98            
99             my $rx = join( ".{$dw}", map { quotemeta substr( $I, $_ * $iw, $ibw) }
100             (0 .. $subimage-> height - 1));
101             my ( $x, $y);
102             my @ret;
103             my $blanker = ("\0" x ( $bpp * $w));
104            
105             while ( 1) {
106             pos($G) = 0;
107             study $G;
108             my @loc_ret;
109             while ( 1) {
110             unless ( $G =~ m/\G.*?$rx/gcs) {
111             return unless $multiple;
112             last;
113             }
114             my $p = pos($G);
115             $x = ($p - $w * $bpp) % $gw / $bpp;
116             $y = int(($p - ( $x + $w) * $bpp) / $gw) + 1;
117             next if $x + $w > $W; # scanline wrap
118             push @loc_ret, [ $x, $y - $h ];
119             return @{ $loc_ret[0] } unless $multiple;
120             }
121             # blank zeros over the found stuff to avoid overlapping matches
122             for ( @loc_ret) {
123             my ( $x, $y) = @$_;
124             my $pos = $y * $gw + $x;
125             for ( my $i = 0; $i < $h; $i++, $pos += $gw) {
126             substr( $G, $pos, $w * $bpp) = $blanker;
127             }
128             }
129             push @ret, @loc_ret;
130             return @ret unless @loc_ret;
131             @loc_ret = ();
132             }
133             }
134            
135             sub image2db_key { $_[0]-> data }
136            
137             # suggest OCR order so that glyphs covering larger area come first (so f.ex.)
138             # (i) is recognized before (.) and (dotlessi).
139             sub suggest_glyph_order
140             {
141             my $db = $_[0];
142             return map {
143             $$_[0]
144             } sort {
145             $$b[1] <=> $$a[1]
146             } map {
147             [ $_, $db->{$_}->{width} * $db->{$_}->{height} ]
148             } keys %$db;
149             }
150            
151             sub enhance_image
152             {
153             my ( $i, %options) = @_;
154            
155             require IPA;
156             require IPA::Misc;
157             require IPA::Point;
158            
159             my $min_contrast = $options{min_contrast} || 128;
160            
161             # convert to grayscale
162             $i-> type(im::Byte);
163            
164             # get histogram and peaks
165             my @h = (0, IPA::Misc::histogram( $i), 0);
166             my @peaks =
167             map { $_ - 1 }
168             sort { $h[$b] <=> $h[$a] }
169             grep { $h[$_] > $h[$_-1] and $h[$_] > $h[$_+1] }
170             1..256;
171             @h = @h[1..256];
172            
173             die "Image's not clear enough"
174             if @peaks < 2;
175            
176             warn "peaks: @peaks / @h[@peaks]\n"
177             if $options{verbose};
178            
179             # make BW
180             my $peak = 1;
181             my ( $bg, $fg) = @peaks[0,1];
182             while ( abs( $bg - $fg) < $min_contrast) {
183             $bg = $fg if $bg < $fg;
184             $fg = $peaks[ ++$peak ];
185             die "Image's not clear enough (min_contrast required more than $min_contrast)"
186             unless defined $fg;
187             }
188             my $threshold = int(($bg + $fg) / 2);
189             warn "fg=$fg bg=$bg threshold=$threshold\n"
190             if $options{verbose};
191             $i = IPA::Point::threshold( $i, minvalue => $threshold);
192            
193             # invert if any; we need white glyphs on black background
194             if ( $bg > $fg) {
195             warn "invert\n"
196             if $options{verbose};
197             $i-> put_image( 0, 0, $i, rop::NotPut);
198             ( $bg, $fg) = ( $fg, $bg);
199             }
200            
201             return $i;
202             }
203            
204             sub recognize
205             {
206             my ( $i, $db, %options) = @_;
207            
208             unless ( scalar keys %$db) {
209             warn "empty dictionary"
210             if $options{verbose};
211             }
212            
213             my @sorted_glyphs = suggest_glyph_order( $db);
214            
215             # OCR and get glyph positions
216             my $num = 0;
217             my $max_line_height = 1;
218             my @vmap = ( 0 x ( $i-> height));
219             my @unsorted = map {
220             my $v = $_;
221             my @positions = find_images( $i, $v-> {image}, 1);
222            
223             my $h = $v-> {image}-> height - 1;
224             for my $p ( @positions) {
225             # erase glyphs
226             $i-> put_image( @$p, $v-> {image}, rop::Blackness);
227             # put on vmap
228             $vmap[ $$p[1] + $_ ]++ for 0 .. $h;
229             }
230             $max_line_height = $h + 1 if $max_line_height <= $h;
231             $num++;
232            
233             warn "$num/", scalar(@sorted_glyphs), ", '$v->{text}' found ", scalar(@positions), " times\n"
234             if $options{verbose};
235            
236             map { [ $v, @$_ ] } @positions;
237             } @$db { @sorted_glyphs };
238             $max_line_height *= 2;
239             warn "max line height $max_line_height\n"
240             if $options{verbose};
241            
242             # vmap-> rle vmap
243             {
244             my @chunks = ([]);
245             for ( my $j = 0; $j < @vmap; $j++) {
246             if ( $vmap[$j]) {
247             push @{ $chunks[-1] }, $j unless @{ $chunks[-1] };
248             push @{ $chunks[-1] }, $vmap[$j];
249             } else {
250             push @chunks, [] if @{ $chunks[-1] };
251             }
252             }
253             @vmap = @chunks;
254             }
255            
256             # vmap-> occupied ranges; detect number of lines
257             my ( @ready_vmap);
258             while ( @vmap) {
259             my @new;
260             for my $v ( @vmap) {
261             if ( $#$v > $max_line_height) {
262             # split further -- subtract the minimum
263             my $min = $v->[1];
264             for ( @$v) {
265             $min = $_ if $min > $_;
266             }
267             my @new_chunks = [];
268             for ( my $i = 1; $i < @$v; $i++) {
269             my $reduced = $v->[$i] - $min;
270             if ( $reduced > 0) {
271             push @{ $new_chunks[-1]}, $v->[0] + $i - 1
272             unless @{ $new_chunks[-1] };
273             push @{ $new_chunks[-1]}, $reduced;
274             } else {
275             push @new_chunks, [ $v-> [0] + $i - 1, 1], [];
276             }
277             }
278             @new_chunks = grep { @$_ } @new_chunks;
279             push @new, @new_chunks;
280             warn "too wide vline $v->[0]:$#$v split into ",
281             scalar( @new_chunks), " chunks\n"
282             if $options{verbose};
283             # warn "@$_\n" for @new_chunks;
284             } else {
285             warn "new vline $v->[0]:$#$v\n"
286             if $options{verbose};
287             push @ready_vmap, $v;
288             }
289             }
290             @vmap = @new;
291             }
292            
293             # assign Y-> textline map
294             my ( @vlines, %ranges);
295             for my $v ( sort { $a->[0] <=> $b->[0] } @ready_vmap) {
296             push @vlines, [];
297             for ( my $i = 0; $i < $#$v; $i++) {
298             $ranges{ $v->[0] + $i } = $#vlines;
299             }
300             }
301             undef @ready_vmap;
302             warn "glyphs grouped in " ,scalar(@vlines), " lines of text\n"
303             if $options{verbose};
304            
305             # put glyphs into lines sorted by X
306             for ( @unsorted) {
307             my ( $v, $x, $y) = @$_;
308             push @{ $vlines[ $ranges{$y} ] }, $_;
309             }
310            
311             # sort vlines
312             for ( @vlines) {
313             @$_ = sort { $$a[1] <=> $$b[1] } @$_;
314             }
315            
316             my $minspace;
317             unless ( defined $options{minspace}) {
318             # Calculate min space.
319             # - get average glyph width:
320             my $ave_width = 0;
321             $ave_width += $_-> {width} for values %$db;
322             $ave_width /= scalar keys %$db;
323             # - one line of text occupies up to $i-> width, right?
324             my $max_chars_in_line = 0;
325             for ( @vlines) {
326             $max_chars_in_line = @$_ if $max_chars_in_line < @$_;
327             }
328             $minspace = int($ave_width + .5);
329             warn "minspace: $minspace \n"
330             if $options{verbose};
331             } else {
332             $minspace = $options{minspace};
333             }
334            
335             my @text;
336             for my $l ( reverse @vlines) {
337             my $last = $#$l;
338             my $text = '';
339             if ( $last >= 0) {
340             my $first = $l->[0]->[1] / $minspace;
341             $text .= (' ' x $first) if $first > 0;
342             for ( my $i = 0; $i < $last; $i++) {
343             my $v = $l-> [$i];
344             my $dist = ($l-> [$i+1]-> [1] - $v->[1] - $v->[0]->{width}) / $minspace;
345             $text .= $v-> [0]-> {text};
346             $text .= (' ' x $dist) if $dist > 0;
347             }
348             $text .= $l-> [-1]-> [0]-> {text};
349             }
350             push @text, $text;
351             }
352            
353             return @text;
354             }
355            
356             1;
357            
358             =pod
359            
360             =head1 NAME
361            
362             OCR::Naive - convert images into text in an extremely naive fashion
363            
364             =head1 DESCRIPTION
365            
366             The module implements a very simple and unsophisticated OCR by finding all
367             known images in a larger image. The known images are mapped to text using the
368             preexisting dictionary, and the text lines are returned.
369            
370             The interesting stuff here is the image finding itself - it is done by a
371             regexp! For all practical reasons, images can be easily treated as byte
372             strings, and regexps are not exception. For example, one needs to locate an
373             image 2x2 in larger 7x7 image. The regexp constructed should be the first
374             scanline of smaller image, 2 bytes, verbatim, then 7 - 2 = 5 of any character,
375             and finally the second scanline, 2 bytes again. Of course there are some quirks,
376             but these explained in API section.
377            
378             Dictionaries for different fonts can be created interactively by
379             C; the non-interactive recognition is performed by C
380             which is a mere wrapper to this module.
381            
382             =head1 SYNOPSIS
383            
384             use Prima::noX11; # Prima imaging required
385             use OCR::Naive;
386            
387             # load a dictionary created by bin/makedict
388             $db = load_dictionary( 'my.dict');
389            
390             # load image to recognize
391             my $i = Prima::Image-> load( 'screenshot.png' );
392             $i = enhance_image( $i );
393            
394             # ocr!
395             print "$_\n" for recognize( $i, $db);
396            
397             =head1 API
398            
399             =over
400            
401             =item load_dictionary $FILE
402            
403             Loads a glyph dictionary from $FILE, returns a dictionary hash table. If not loaded,
404             returns C and C<$!> contains the error.
405            
406             =item save_dictionary $FILE, $DB
407            
408             Saves a glyph dictionary from $DB into $FILE, returns success flag. If failed,
409             C<$!> contains the error.
410            
411             =item image2db_key $IMAGE
412            
413             The dictionary is intended to be a simple hash, where the key is the image pixel data,
414             and value is a hash of image attributes - width, height, text, and possible something
415             more for the future. The key currently is image data verbatim, and C
416             returns the data of $IMAGE.
417            
418             =item find_images $IMAGE, $SUBIMAGE, $MULTIPLE
419            
420             Locates a $SUBIMAGE in $IMAGE, returns one or many matches, depending on $MULTIPLE.
421             If single match is requested, stops on the first match, and returns a pair of (X,Y)
422             coordinates. If $MULTIPLE is 1, returns array of (X,Y) pairs. In both modes, returns
423             empty list if nothing was found.
424            
425             =item suggest_glyph_order $DB
426            
427             When more than one subimage is to be found on a larger image, it is important that
428             parts of larger glyphs are not eventually attributed to smaller ones. For example,
429             letter C<('i')> might be detected as a combination of C<('dot')> and C<('dotlessi')>.
430             To avoid this C sorts all dictionary entries by their occupied
431             area, larger first, and returns sorted set of keys.
432            
433             =item enhance_image $IMAGE, %OPTIONS
434            
435             Glyphs in dictionary are black-and-white images, and the ideal detection should
436             also happed on 2-color images. C tries to enhance the contrast of
437             the image, find histogram peaks, and detect what is foreground and what is background,
438             and finally converts the image into a black-and-white.
439            
440             This procedure is of course nowhere near any decent pre-OCR image processing, so
441             don't expect much. OTOH it might be serve a good-enough quick hack for screen dumps.
442            
443             If C<$OPTIONS{verbose}> is set, prints details is it goes.
444            
445             =item recognize $IMAGE, $DB, %OPTIONS
446            
447             Given a dictionary $DB, recognizes all text it can find on $IMAGE. Returns
448             array of text lines.
449            
450             The spaces are a problem with approach, and even though C tries to
451             deduce a minimal width in pixels that should not be treated a
452             character, it will inevitably fail. Set C<$OPTION{minspace}> to the space
453             width if you happen to know what font you're detecting.
454            
455             If C<$OPTIONS{verbose}> is set, prints details is it goes.
456            
457             =back
458            
459             =head1 PREREQUISITES
460            
461             L, L
462            
463             =head1 SEE ALSO
464            
465             L, L
466            
467             =head1 LICENSE AND COPYRIGHT
468            
469             Copyright (c) 2007 capmon ApS. All rights reserved.
470            
471             This library is free software; you can redistribute it and/or modify it
472             under the same terms as Perl itself.
473            
474             =head1 AUTHOR
475            
476             Dmitry Karasik, Edmitry@karasik.eu.orgE.
477            
478             =cut