File Coverage

blib/lib/WebService/KoreanSpeller.pm
Criterion Covered Total %
statement 45 45 100.0
branch 2 4 50.0
condition n/a
subroutine 8 8 100.0
pod 1 1 100.0
total 56 58 96.5


line stmt bran cond sub pod time code
1             package WebService::KoreanSpeller;
2              
3             our $VERSION = '0.013';
4             $VERSION = eval $VERSION;
5              
6 2     2   14955 use Moose;
  2         779890  
  2         17  
7 2     2   13247 use Moose::Util::TypeConstraints;
  2         5  
  2         19  
8 2     2   5194 use namespace::autoclean;
  2         12577  
  2         7  
9 2     2   963 use HTTP::Request::Common qw/POST/;
  2         34377  
  2         125  
10 2     2   1092 use LWP::UserAgent;
  2         32580  
  2         58  
11 2     2   915 use utf8;
  2         25  
  2         12  
12 2     2   965 use Encode qw/encode decode/;
  2         15187  
  2         838  
13              
14             subtype 'UTF8FlagOnString'
15             => as 'Str'
16             => where { utf8::is_utf8($_) };
17              
18             has 'text' => ( is => 'ro', isa => 'UTF8FlagOnString', required => 1 );
19              
20             sub spellcheck {
21 1     1 1 8 my ($self) = @_;
22 1         8 my $ua = LWP::UserAgent->new;
23 1         2168 my $text = $self->text;
24 1         5 my $text1 = encode('utf8', $text);
25 1         55 my $req = POST 'http://speller.cs.pusan.ac.kr/PnuWebSpeller/lib/check.asp', [ text1 => $text1 ];
26 1         6038 my $res = $ua->request($req);
27              
28 1 50       1917238 die unless $res->is_success;
29 1         34 my $content = decode('utf8', $res->as_string);
30             #print "$content"; exit;
31              
32 1         609 my @items;
33 1         31 my @tables = $content =~ m{<table id='tableErr_\d+'.*?>(.*?)</TABLE>}sg;
34 1 50       4 return @items unless @tables; # No error
35 1         3 foreach my $table (@tables) {
36 1         3 my %item;
37             @item{qw/incorrect correct comment/} =
38             (
39             map {
40 1         18 $_ =~s/<.*?br\/>/\n/g;
  3         14  
41 3         10 $_ =~s/^\s+//s;
42 3         14 $_ =~s/\s+$//s;
43 3         8 $_ =~s/&nbsp;/ /gs;
44 3         10 $_
45             } $table =~ m{<TD id='td(?:ErrorWord|ReplaceWord|Help)_\d+'.*?>(.*?)</TD>}sg
46             )[0,1,2];
47 1         28 $text =~ m/\Q$item{incorrect}\E/g;
48 1         314 print "pos: ", pos($text), " length: ", length($item{incorrect}),"\n";
49 1         8 $item{position} = pos($text) - length($item{incorrect});
50 1         4 push @items, \%item;
51             }
52 1         44 return @items;
53             }
54              
55             __PACKAGE__->meta->make_immutable;
56              
57             1;
58              
59             __END__
60              
61             =pod
62              
63             =encoding utf8
64              
65             =head1 NAME
66              
67             WebService::KoreanSpeller - Korean spellchecker
68              
69             =head1 SYNOPSIS
70              
71             use WebService::KoreanSpeller;
72             use utf8;
73              
74             my $checker = WebService::KoreanSpeller->new( text=> '안뇽하세요? 방갑습니다.' );
75             my @results = $checker->spellcheck; # returns array of hashes
76             binmode STDOUT, ':encoding(UTF-8)';
77             foreach my $item (@results) {
78             print $item->{position}, "\n"; # index in the original text (starting from 0)
79             print $item->{incorrect}, " -> "; # incorrect spelling
80             print $item->{correct}, "\n"; # correct spelling
81             print $item->{comment}, "\n"; # comment about spelling
82             print "------------------------------\n";
83             }
84              
85              
86             OUTPUT:
87              
88             0
89             안뇽하세요 -> 안녕하세요
90             표준 발음·표준어 오류
91             어린이들의 발음을 흉내내어 '안뇽'이라고 말하는 사람들이 종종 있습니다. 특히, 글을 쓸 때에는 이러한 단어를 쓰지 않도록 합시다.
92             ------------------------------
93             7
94             방갑습니다 -> 반갑습니다
95             약어 사용 오류
96             오늘날 통신에서 자주 쓰는 은어입니다.
97             ------------------------------
98              
99             =head1 DESCRIPTION
100              
101             This module provides a Perl interface to the Web-based korean speller service( 온라인 한국어 맞춤법/문법 검사기 - http://speller.cs.pusan.ac.kr ).
102              
103             =head1 METHODS
104              
105             =head2 new( text => 'text for spell check' )
106              
107             Returns an obejct instance of this module. text should be "Unicode string"(a.k.a. perl's internal format - utf8 encoding/utf8 flag on)
108              
109             =head2 spellcheck
110              
111             Returns results as array of hashes(if there is no error in the text, this method will return empty list), See SYNOPSIS. you can easily convert AoH to JSON or XML.
112              
113             =head1 CAUTION
114              
115             I'm afraid we don't have a good open source korean spell checker. but there is a decent proprietary service that runs on the online website( 온라인 한국어 맞춤법/문법 검사기 - http://speller.cs.pusan.ac.kr ). So I made this module with web-scrapping approach, this is easy to mess up if they change layout of the website and has same limitation(checking only 300 synatic words at once). Let me know if this does not work. *This module follows the same terms of the original service agreement.*
116              
117             =head1 AUTHOR
118              
119             C.H. Kang <chahkang@gmail.com>
120              
121             =head1 COPYRIGHT AND LICENSE
122              
123             This software is copyright (c) 2017 by C.H. Kang.
124              
125             This is free software; you can redistribute it and/or modify it under
126             the same terms as the Perl 5 programming language system itself.
127              
128             =cut