File Coverage

blib/lib/WebService/KoreanSpeller.pm
Criterion Covered Total %
statement 44 44 100.0
branch 3 6 50.0
condition n/a
subroutine 8 8 100.0
pod 1 1 100.0
total 56 59 94.9


line stmt bran cond sub pod time code
1             package WebService::KoreanSpeller;
2              
3             our $VERSION = '0.015';
4             $VERSION = eval $VERSION;
5              
6 2     2   71552 use Moose;
  2         941752  
  2         17  
7 2     2   16055 use Moose::Util::TypeConstraints;
  2         5  
  2         21  
8 2     2   5951 use namespace::autoclean;
  2         16677  
  2         9  
9 2     2   1195 use HTTP::Request::Common qw/POST/;
  2         48815  
  2         148  
10 2     2   1462 use LWP::UserAgent;
  2         46126  
  2         76  
11 2     2   1209 use utf8;
  2         32  
  2         21  
12 2     2   1189 use Encode qw/encode decode/;
  2         20495  
  2         1014  
13              
14             subtype 'UTF8FlagOnString'
15             => as 'Str'
16             => where { utf8::is_utf8($_) };
17              
18             has 'text' => ( is => 'ro', isa => 'UTF8FlagOnString', required => 1 );
19              
20             sub spellcheck {
21 1     1 1 8 my ($self) = @_;
22 1         17 my $ua = LWP::UserAgent->new;
23 1         3098 my $text = $self->text;
24 1         7 my $text1 = encode('utf8', $text);
25 1         65 my $req = POST 'http://speller.cs.pusan.ac.kr/results', [ text1 => $text1 ];
26 1         8315 my $res = $ua->request($req);
27              
28 1 50       1266122 die unless $res->is_success;
29 1         23 my $content = decode('utf8', $res->as_string);
30             #print "$content"; exit;
31              
32 1         528 my @items;
33 1         25 my ($res_json) = ( $content =~ m/\bdata = \[\{"str":.*?"errInfo":(\[.*?\])/ );
34 1 50       6 return @items unless defined $res_json; # No error
35              
36 1         12 my @tables = $res_json =~ m/(\{"help":.*?\})/g;
37 1 50       5 return @items unless @tables; # No error
38              
39 1         5 foreach my $table (@tables) {
40 1         2 my %item;
41             @item{qw/comment position incorrect correct/} =
42             (
43             map {
44 1         14 $_ =~s/
/\n/g;
  4         12  
45 4         9 $_ =~s/^\s+//s;
46 4         15 $_ =~s/\s+$//s;
47 4         13 $_ =~s/'//gs;
48 4         13 $_
49             } $table =~ m{"help":"(.*?)".*?,"start":(\d+),.*?,"orgStr":"(.*?)","candWord":"(.*?)"}sg
50             );
51 1         7 push @items, \%item;
52             }
53              
54 1         42 return @items;
55             }
56              
57             __PACKAGE__->meta->make_immutable;
58              
59             1;
60              
61             __END__
62              
63             =pod
64              
65             =encoding utf8
66              
67             =head1 NAME
68              
69             WebService::KoreanSpeller - Korean spellchecker
70              
71             =head1 SYNOPSIS
72              
73             use WebService::KoreanSpeller;
74             use utf8;
75              
76             my $checker = WebService::KoreanSpeller->new( text=> '안뇽하세요? 방갑습니다.' );
77             my @results = $checker->spellcheck; # returns array of hashes
78             binmode STDOUT, ':encoding(UTF-8)';
79             foreach my $item (@results) {
80             print $item->{position}, "\n"; # index in the original text (starting from 0)
81             print $item->{incorrect}, " -> "; # incorrect spelling
82             print $item->{correct}, "\n"; # correct spelling
83             print $item->{comment}, "\n"; # comment about spelling
84             print "------------------------------\n";
85             }
86              
87              
88             OUTPUT:
89              
90             0
91             안뇽하세요 -> 안녕하세요
92             표준 발음·표준어 오류
93             어린이들의 발음을 흉내내어 '안뇽'이라고 말하는 사람들이 종종 있습니다. 특히, 글을 쓸 때에는 이러한 단어를 쓰지 않도록 합시다.
94             ------------------------------
95             7
96             방갑습니다 -> 반갑습니다
97             약어 사용 오류
98             오늘날 통신에서 자주 쓰는 은어입니다.
99             ------------------------------
100              
101             =head1 DESCRIPTION
102              
103             This module provides a Perl interface to the Web-based korean speller service( 온라인 한국어 맞춤법/문법 검사기 - http://speller.cs.pusan.ac.kr ).
104              
105             =head1 METHODS
106              
107             =head2 new( text => 'text for spell check' )
108              
109             Returns an obejct instance of this module. text should be "Unicode string"(a.k.a. perl's internal format - utf8 encoding/utf8 flag on)
110              
111             =head2 spellcheck
112              
113             Returns results as array of hashes(if there is no error in the text, this method will return empty list), See SYNOPSIS. you can easily convert AoH to JSON or XML.
114              
115             =head1 CAUTION
116              
117             I'm afraid we don't have a good open source korean spell checker. but there is a decent proprietary service that runs on the online website( 온라인 한국어 맞춤법/문법 검사기 - http://speller.cs.pusan.ac.kr ). So I made this module with web-scrapping approach, this is easy to mess up if they change layout of the website and has same limitation(checking only 300 synatic words at once). Let me know if this does not work. *This module follows the same terms of the original service agreement.*
118              
119             =head1 AUTHOR
120              
121             C.H. Kang <chahkang@gmail.com>
122              
123             =head1 COPYRIGHT AND LICENSE
124              
125             This software is copyright (c) 2017 by C.H. Kang.
126              
127             This is free software; you can redistribute it and/or modify it under
128             the same terms as the Perl 5 programming language system itself.
129              
130             =cut