File Coverage

blib/lib/WebService/KoreanSpeller.pm
Criterion Covered Total %
statement 44 44 100.0
branch 2 4 50.0
condition n/a
subroutine 8 8 100.0
pod 1 1 100.0
total 55 57 96.4


line stmt bran cond sub pod time code
1             package WebService::KoreanSpeller;
2              
3             our $VERSION = '0.014';
4             $VERSION = eval $VERSION;
5              
6 2     2   14710 use Moose;
  2         801394  
  2         20  
7 2     2   13536 use Moose::Util::TypeConstraints;
  2         5  
  2         19  
8 2     2   5283 use namespace::autoclean;
  2         13006  
  2         7  
9 2     2   911 use HTTP::Request::Common qw/POST/;
  2         33101  
  2         110  
10 2     2   1030 use LWP::UserAgent;
  2         31999  
  2         62  
11 2     2   1068 use utf8;
  2         28  
  2         11  
12 2     2   895 use Encode qw/encode decode/;
  2         15272  
  2         762  
13              
14             subtype 'UTF8FlagOnString'
15             => as 'Str'
16             => where { utf8::is_utf8($_) };
17              
18             has 'text' => ( is => 'ro', isa => 'UTF8FlagOnString', required => 1 );
19              
20             sub spellcheck {
21 1     1 1 7 my ($self) = @_;
22 1         11 my $ua = LWP::UserAgent->new;
23 1         2258 my $text = $self->text;
24 1         5 my $text1 = encode('utf8', $text);
25 1         56 my $req = POST 'http://speller.cs.pusan.ac.kr/PnuWebSpeller/lib/check.asp', [ text1 => $text1 ];
26 1         5958 my $res = $ua->request($req);
27              
28 1 50       1974016 die unless $res->is_success;
29 1         35 my $content = decode('utf8', $res->as_string);
30             #print "$content"; exit;
31              
32 1         639 my @items;
33 1         32 my @tables = $content =~ m{<table id='tableErr_\d+'.*?>(.*?)</TABLE>}sg;
34 1 50       5 return @items unless @tables; # No error
35 1         4 foreach my $table (@tables) {
36 1         3 my %item;
37             @item{qw/incorrect correct comment/} =
38             (
39             map {
40 1         29 $_ =~s/<.*?br\/>/\n/g;
  3         16  
41 3         10 $_ =~s/^\s+//s;
42 3         15 $_ =~s/\s+$//s;
43 3         8 $_ =~s/&nbsp;/ /gs;
44 3         9 $_
45             } $table =~ m{<TD id='td(?:ErrorWord|ReplaceWord|Help)_\d+'.*?>(.*?)</TD>}sg
46             )[0,1,2];
47 1         28 $text =~ m/\Q$item{incorrect}\E/g;
48 1         6 $item{position} = pos($text) - length($item{incorrect});
49 1         4 push @items, \%item;
50             }
51 1         43 return @items;
52             }
53              
54             __PACKAGE__->meta->make_immutable;
55              
56             1;
57              
58             __END__
59              
60             =pod
61              
62             =encoding utf8
63              
64             =head1 NAME
65              
66             WebService::KoreanSpeller - Korean spellchecker
67              
68             =head1 SYNOPSIS
69              
70             use WebService::KoreanSpeller;
71             use utf8;
72              
73             my $checker = WebService::KoreanSpeller->new( text=> '안뇽하세요? 방갑습니다.' );
74             my @results = $checker->spellcheck; # returns array of hashes
75             binmode STDOUT, ':encoding(UTF-8)';
76             foreach my $item (@results) {
77             print $item->{position}, "\n"; # index in the original text (starting from 0)
78             print $item->{incorrect}, " -> "; # incorrect spelling
79             print $item->{correct}, "\n"; # correct spelling
80             print $item->{comment}, "\n"; # comment about spelling
81             print "------------------------------\n";
82             }
83              
84              
85             OUTPUT:
86              
87             0
88             안뇽하세요 -> 안녕하세요
89             표준 발음·표준어 오류
90             어린이들의 발음을 흉내내어 '안뇽'이라고 말하는 사람들이 종종 있습니다. 특히, 글을 쓸 때에는 이러한 단어를 쓰지 않도록 합시다.
91             ------------------------------
92             7
93             방갑습니다 -> 반갑습니다
94             약어 사용 오류
95             오늘날 통신에서 자주 쓰는 은어입니다.
96             ------------------------------
97              
98             =head1 DESCRIPTION
99              
100             This module provides a Perl interface to the Web-based korean speller service( 온라인 한국어 맞춤법/문법 검사기 - http://speller.cs.pusan.ac.kr ).
101              
102             =head1 METHODS
103              
104             =head2 new( text => 'text for spell check' )
105              
106             Returns an obejct instance of this module. text should be "Unicode string"(a.k.a. perl's internal format - utf8 encoding/utf8 flag on)
107              
108             =head2 spellcheck
109              
110             Returns results as array of hashes(if there is no error in the text, this method will return empty list), See SYNOPSIS. you can easily convert AoH to JSON or XML.
111              
112             =head1 CAUTION
113              
114             I'm afraid we don't have a good open source korean spell checker. but there is a decent proprietary service that runs on the online website( 온라인 한국어 맞춤법/문법 검사기 - http://speller.cs.pusan.ac.kr ). So I made this module with web-scrapping approach, this is easy to mess up if they change layout of the website and has same limitation(checking only 300 synatic words at once). Let me know if this does not work. *This module follows the same terms of the original service agreement.*
115              
116             =head1 AUTHOR
117              
118             C.H. Kang <chahkang@gmail.com>
119              
120             =head1 COPYRIGHT AND LICENSE
121              
122             This software is copyright (c) 2017 by C.H. Kang.
123              
124             This is free software; you can redistribute it and/or modify it under
125             the same terms as the Perl 5 programming language system itself.
126              
127             =cut