File Coverage

blib/lib/OCR/Naive.pm

Criterion	Covered	Total	%
statement	7	9	77.7
branch			n/a
condition			n/a
subroutine	3	3	100.0
pod			n/a
total	10	12	83.3

line	stmt	sub	time	code
1				# $Id: Naive.pm,v 1.13 2009/02/10 08:04:55 dk Exp $
2				package OCR::Naive;
3
4	1	1	2194	use strict;
	1		3
	1		35
5	1	1	6	use warnings;
	1		2
	1		28
6	1	1	402	use Prima;
	0
	0
7				require Exporter;
8
9				our $VERSION = '0.07';
10				use base qw(Exporter);
11
12				our @EXPORT_OK = qw(
13				load_dictionary save_dictionary find_images
14				image2db_key suggest_glyph_order enhance_image
15				recognize
16				);
17				our %EXPORT_TAGS = ( all => \@EXPORT_OK);
18
19				sub load_dictionary
20				{
21				my ( $file) = @_;
22
23				return unless open DB, '<', $file;
24
25				my %db;
26
27				while () {
28				chomp;
29				s/^\s\#.//;
30				next unless length;
31				my %k = m/(\w+)='((?:\\[\\']\|[^\\']))'\s/g;
32				unless ( 4 == grep { exists $k{$_}} qw(w h t d)) {
33				warn ("malformed line in $file, line $.\n");
34				next;
35				}
36				s/\\(.)/$1/g for values %k;
37				if ( $k{w} <= 0 or $k{h} <= 0) {
38				warn ("malformed line in $file, line $.\n");
39				next;
40				}
41
42				$k{d} =~ s/(..)/chr(hex($1))/ge;
43				my $i = Prima::Image-> create(
44				width => $k{w},
45				height => $k{h},
46				data => $k{d},
47				type => im::BW,
48				);
49				$db{$k{d}} = {
50				width => $k{w},
51				height => $k{h},
52				text => $k{t},
53				image => $i,
54				};
55				}
56
57				close DB;
58				return \%db;
59				}
60
61				sub save_dictionary
62				{
63				my ( $file, $db) = @_;
64
65				return unless open DB, ">", $file;
66
67				while ( my ( $k, $v) = each %$db) {
68				next unless defined $v-> {text};
69				my $t = $v->{text};
70				$k =~ s/(.)/sprintf("%02x",ord($1))/ges;
71				$t =~ s/(['\\])/\\$1/ge;
72				print DB "t='$t' w='$v->{width}' h='$v->{height}' d='$k'\n";
73				}
74				close DB;
75				return 1;
76				}
77
78				sub find_images
79				{
80				my ( $image, $subimage, $multiple) = @_;
81
82				my $G = $image-> data;
83				my $W = $image-> width;
84				my $w = $subimage-> width;
85				my $h = $subimage-> height;
86				my $bpp = ($image-> type & im::BPP) / 8;
87				die "won't do images with less than 256 colors"
88				if $bpp < 0;
89				if ( $subimage-> type != $image-> type) {
90				$subimage = $subimage-> dup;
91				$subimage-> type( $image-> type);
92				}
93				my $I = $subimage-> data;
94				my $gw = int(( $W * ( $image-> type & im::BPP) + 31) / 32) * 4;
95				my $iw = int(( $w * ( $subimage-> type & im::BPP) + 31) / 32) * 4;
96				my $ibw = $w * $bpp;
97				my $dw = $gw - $ibw;
98
99				my $rx = join( ".{$dw}", map { quotemeta substr( $I, $_ * $iw, $ibw) }
100				(0 .. $subimage-> height - 1));
101				my ( $x, $y);
102				my @ret;
103				my $blanker = ("\0" x ( $bpp * $w));
104
105				while ( 1) {
106				pos($G) = 0;
107				study $G;
108				my @loc_ret;
109				while ( 1) {
110				unless ( $G =~ m/\G.*?$rx/gcs) {
111				return unless $multiple;
112				last;
113				}
114				my $p = pos($G);
115				$x = ($p - $w * $bpp) % $gw / $bpp;
116				$y = int(($p - ( $x + $w) * $bpp) / $gw) + 1;
117				next if $x + $w > $W; # scanline wrap
118				push @loc_ret, [ $x, $y - $h ];
119				return @{ $loc_ret[0] } unless $multiple;
120				}
121				# blank zeros over the found stuff to avoid overlapping matches
122				for ( @loc_ret) {
123				my ( $x, $y) = @$_;
124				my $pos = $y * $gw + $x;
125				for ( my $i = 0; $i < $h; $i++, $pos += $gw) {
126				substr( $G, $pos, $w * $bpp) = $blanker;
127				}
128				}
129				push @ret, @loc_ret;
130				return @ret unless @loc_ret;
131				@loc_ret = ();
132				}
133				}
134
135				sub image2db_key { $_[0]-> data }
136
137				# suggest OCR order so that glyphs covering larger area come first (so f.ex.)
138				# (i) is recognized before (.) and (dotlessi).
139				sub suggest_glyph_order
140				{
141				my $db = $_[0];
142				return map {
143				$$_[0]
144				} sort {
145				$$b[1] <=> $$a[1]
146				} map {
147				[ $_, $db->{$_}->{width} * $db->{$_}->{height} ]
148				} keys %$db;
149				}
150
151				sub enhance_image
152				{
153				my ( $i, %options) = @_;
154
155				require IPA;
156				require IPA::Misc;
157				require IPA::Point;
158
159				my $min_contrast = $options{min_contrast} \|\| 128;
160
161				# convert to grayscale
162				$i-> type(im::Byte);
163
164				# get histogram and peaks
165				my @h = (0, IPA::Misc::histogram( $i), 0);
166				my @peaks =
167				map { $_ - 1 }
168				sort { $h[$b] <=> $h[$a] }
169				grep { $h[$_] > $h[$_-1] and $h[$_] > $h[$_+1] }
170				1..256;
171				@h = @h[1..256];
172
173				die "Image's not clear enough"
174				if @peaks < 2;
175
176				warn "peaks: @peaks / @h[@peaks]\n"
177				if $options{verbose};
178
179				# make BW
180				my $peak = 1;
181				my ( $bg, $fg) = @peaks[0,1];
182				while ( abs( $bg - $fg) < $min_contrast) {
183				$bg = $fg if $bg < $fg;
184				$fg = $peaks[ ++$peak ];
185				die "Image's not clear enough (min_contrast required more than $min_contrast)"
186				unless defined $fg;
187				}
188				my $threshold = int(($bg + $fg) / 2);
189				warn "fg=$fg bg=$bg threshold=$threshold\n"
190				if $options{verbose};
191				$i = IPA::Point::threshold( $i, minvalue => $threshold);
192
193				# invert if any; we need white glyphs on black background
194				if ( $bg > $fg) {
195				warn "invert\n"
196				if $options{verbose};
197				$i-> put_image( 0, 0, $i, rop::NotPut);
198				( $bg, $fg) = ( $fg, $bg);
199				}
200
201				return $i;
202				}
203
204				sub recognize
205				{
206				my ( $i, $db, %options) = @_;
207
208				unless ( scalar keys %$db) {
209				warn "empty dictionary"
210				if $options{verbose};
211				}
212
213				my @sorted_glyphs = suggest_glyph_order( $db);
214
215				# OCR and get glyph positions
216				my $num = 0;
217				my $max_line_height = 1;
218				my @vmap = ( 0 x ( $i-> height));
219				my @unsorted = map {
220				my $v = $_;
221				my @positions = find_images( $i, $v-> {image}, 1);
222
223				my $h = $v-> {image}-> height - 1;
224				for my $p ( @positions) {
225				# erase glyphs
226				$i-> put_image( @$p, $v-> {image}, rop::Blackness);
227				# put on vmap
228				$vmap[ $$p[1] + $_ ]++ for 0 .. $h;
229				}
230				$max_line_height = $h + 1 if $max_line_height <= $h;
231				$num++;
232
233				warn "$num/", scalar(@sorted_glyphs), ", '$v->{text}' found ", scalar(@positions), " times\n"
234				if $options{verbose};
235
236				map { [ $v, @$_ ] } @positions;
237				} @$db { @sorted_glyphs };
238				$max_line_height *= 2;
239				warn "max line height $max_line_height\n"
240				if $options{verbose};
241
242				# vmap-> rle vmap
243				{
244				my @chunks = ([]);
245				for ( my $j = 0; $j < @vmap; $j++) {
246				if ( $vmap[$j]) {
247				push @{ $chunks[-1] }, $j unless @{ $chunks[-1] };
248				push @{ $chunks[-1] }, $vmap[$j];
249				} else {
250				push @chunks, [] if @{ $chunks[-1] };
251				}
252				}
253				@vmap = @chunks;
254				}
255
256				# vmap-> occupied ranges; detect number of lines
257				my ( @ready_vmap);
258				while ( @vmap) {
259				my @new;
260				for my $v ( @vmap) {
261				if ( $#$v > $max_line_height) {
262				# split further -- subtract the minimum
263				my $min = $v->[1];
264				for ( @$v) {
265				$min = $_ if $min > $_;
266				}
267				my @new_chunks = [];
268				for ( my $i = 1; $i < @$v; $i++) {
269				my $reduced = $v->[$i] - $min;
270				if ( $reduced > 0) {
271				push @{ $new_chunks[-1]}, $v->[0] + $i - 1
272				unless @{ $new_chunks[-1] };
273				push @{ $new_chunks[-1]}, $reduced;
274				} else {
275				push @new_chunks, [ $v-> [0] + $i - 1, 1], [];
276				}
277				}
278				@new_chunks = grep { @$_ } @new_chunks;
279				push @new, @new_chunks;
280				warn "too wide vline $v->[0]:$#$v split into ",
281				scalar( @new_chunks), " chunks\n"
282				if $options{verbose};
283				# warn "@$_\n" for @new_chunks;
284				} else {
285				warn "new vline $v->[0]:$#$v\n"
286				if $options{verbose};
287				push @ready_vmap, $v;
288				}
289				}
290				@vmap = @new;
291				}
292
293				# assign Y-> textline map
294				my ( @vlines, %ranges);
295				for my $v ( sort { $a->[0] <=> $b->[0] } @ready_vmap) {
296				push @vlines, [];
297				for ( my $i = 0; $i < $#$v; $i++) {
298				$ranges{ $v->[0] + $i } = $#vlines;
299				}
300				}
301				undef @ready_vmap;
302				warn "glyphs grouped in " ,scalar(@vlines), " lines of text\n"
303				if $options{verbose};
304
305				# put glyphs into lines sorted by X
306				for ( @unsorted) {
307				my ( $v, $x, $y) = @$_;
308				push @{ $vlines[ $ranges{$y} ] }, $_;
309				}
310
311				# sort vlines
312				for ( @vlines) {
313				@$_ = sort { $$a[1] <=> $$b[1] } @$_;
314				}
315
316				my $minspace;
317				unless ( defined $options{minspace}) {
318				# Calculate min space.
319				# - get average glyph width:
320				my $ave_width = 0;
321				$ave_width += $_-> {width} for values %$db;
322				$ave_width /= scalar keys %$db;
323				# - one line of text occupies up to $i-> width, right?
324				my $max_chars_in_line = 0;
325				for ( @vlines) {
326				$max_chars_in_line = @$_ if $max_chars_in_line < @$_;
327				}
328				$minspace = int($ave_width + .5);
329				warn "minspace: $minspace \n"
330				if $options{verbose};
331				} else {
332				$minspace = $options{minspace};
333				}
334
335				my @text;
336				for my $l ( reverse @vlines) {
337				my $last = $#$l;
338				my $text = '';
339				if ( $last >= 0) {
340				my $first = $l->[0]->[1] / $minspace;
341				$text .= (' ' x $first) if $first > 0;
342				for ( my $i = 0; $i < $last; $i++) {
343				my $v = $l-> [$i];
344				my $dist = ($l-> [$i+1]-> [1] - $v->[1] - $v->[0]->{width}) / $minspace;
345				$text .= $v-> [0]-> {text};
346				$text .= (' ' x $dist) if $dist > 0;
347				}
348				$text .= $l-> [-1]-> [0]-> {text};
349				}
350				push @text, $text;
351				}
352
353				return @text;
354				}
355
356				1;
357
358				=pod
359
360				=head1 NAME
361
362				OCR::Naive - convert images into text in an extremely naive fashion
363
364				=head1 DESCRIPTION
365
366				The module implements a very simple and unsophisticated OCR by finding all
367				known images in a larger image. The known images are mapped to text using the
368				preexisting dictionary, and the text lines are returned.
369
370				The interesting stuff here is the image finding itself - it is done by a
371				regexp! For all practical reasons, images can be easily treated as byte
372				strings, and regexps are not exception. For example, one needs to locate an
373				image 2x2 in larger 7x7 image. The regexp constructed should be the first
374				scanline of smaller image, 2 bytes, verbatim, then 7 - 2 = 5 of any character,
375				and finally the second scanline, 2 bytes again. Of course there are some quirks,
376				but these explained in API section.
377
378				Dictionaries for different fonts can be created interactively by
379				C; the non-interactive recognition is performed by C
380				which is a mere wrapper to this module.
381
382				=head1 SYNOPSIS
383
384				use Prima::noX11; # Prima imaging required
385				use OCR::Naive;
386
387				# load a dictionary created by bin/makedict
388				$db = load_dictionary( 'my.dict');
389
390				# load image to recognize
391				my $i = Prima::Image-> load( 'screenshot.png' );
392				$i = enhance_image( $i );
393
394				# ocr!
395				print "$_\n" for recognize( $i, $db);
396
397				=head1 API
398
399				=over
400
401				=item load_dictionary $FILE
402
403				Loads a glyph dictionary from $FILE, returns a dictionary hash table. If not loaded,
404				returns C and C<$!> contains the error.
405
406				=item save_dictionary $FILE, $DB
407
408				Saves a glyph dictionary from $DB into $FILE, returns success flag. If failed,
409				C<$!> contains the error.
410
411				=item image2db_key $IMAGE
412
413				The dictionary is intended to be a simple hash, where the key is the image pixel data,
414				and value is a hash of image attributes - width, height, text, and possible something
415				more for the future. The key currently is image data verbatim, and C
416				returns the data of $IMAGE.
417
418				=item find_images $IMAGE, $SUBIMAGE, $MULTIPLE
419
420				Locates a $SUBIMAGE in $IMAGE, returns one or many matches, depending on $MULTIPLE.
421				If single match is requested, stops on the first match, and returns a pair of (X,Y)
422				coordinates. If $MULTIPLE is 1, returns array of (X,Y) pairs. In both modes, returns
423				empty list if nothing was found.
424
425				=item suggest_glyph_order $DB
426
427				When more than one subimage is to be found on a larger image, it is important that
428				parts of larger glyphs are not eventually attributed to smaller ones. For example,
429				letter C<('i')> might be detected as a combination of C<('dot')> and C<('dotlessi')>.
430				To avoid this C sorts all dictionary entries by their occupied
431				area, larger first, and returns sorted set of keys.
432
433				=item enhance_image $IMAGE, %OPTIONS
434
435				Glyphs in dictionary are black-and-white images, and the ideal detection should
436				also happed on 2-color images. C tries to enhance the contrast of
437				the image, find histogram peaks, and detect what is foreground and what is background,
438				and finally converts the image into a black-and-white.
439
440				This procedure is of course nowhere near any decent pre-OCR image processing, so
441				don't expect much. OTOH it might be serve a good-enough quick hack for screen dumps.
442
443				If C<$OPTIONS{verbose}> is set, prints details is it goes.
444
445				=item recognize $IMAGE, $DB, %OPTIONS
446
447				Given a dictionary $DB, recognizes all text it can find on $IMAGE. Returns
448				array of text lines.
449
450				The spaces are a problem with approach, and even though C tries to
451				deduce a minimal width in pixels that should not be treated a
452				character, it will inevitably fail. Set C<$OPTION{minspace}> to the space
453				width if you happen to know what font you're detecting.
454
455				If C<$OPTIONS{verbose}> is set, prints details is it goes.
456
457				=back
458
459				=head1 PREREQUISITES
460
461				L, L
462
463				=head1 SEE ALSO
464
465				L, L
466
467				=head1 LICENSE AND COPYRIGHT
468
469				Copyright (c) 2007 capmon ApS. All rights reserved.
470
471				This library is free software; you can redistribute it and/or modify it
472				under the same terms as Perl itself.
473
474				=head1 AUTHOR
475
476				Dmitry Karasik, Edmitry@karasik.eu.orgE.
477
478				=cut