File Coverage

blib/lib/Text/Shinobi.pm
Criterion Covered Total %
statement 50 50 100.0
branch 2 2 100.0
condition 5 8 62.5
subroutine 13 13 100.0
pod 3 4 75.0
total 73 77 94.8


line stmt bran cond sub pod time code
1             package Text::Shinobi;
2 6     6   76383 use 5.010001;
  6         15  
3 6     6   572 use utf8;
  6         14  
  6         26  
4 6     6   122 use strict;
  6         9  
  6         129  
5 6     6   19 use warnings;
  6         5  
  6         292  
6             our $VERSION = "0.01";
7              
8 6     6   22 use Exporter 'import';
  6         6  
  6         200  
9 6     6   3291 use Unicode::Normalize qw/NFD NFC/;
  6         974011  
  6         882  
10 6     6   3716 use Lingua::JA::Regular::Unicode;
  6         62899  
  6         677  
11              
12             our @EXPORT_OK = qw/shinobi/;
13              
14             use constant {
15 6         7126 DUO => 1 << 0,
16             MONO => 1 << 1,
17             JIS => 1 << 2, # JIS X 0208 + JIS X 0212 OR JIS X 0213
18             UTF8MB3 => 1 << 3, # as utf-8 encoding
19             Y2016 => 1 << 10, # almost viewable in 2016 (Mac10.11, Win10, iOS9, Andoid5
20 6     6   39 };
  6         7  
21              
22             our $ENCODE = Y2016; # this version's default mask
23              
24             our $map = [
25             { char => 'い', code => "\x{682C}", flag => MONO | UTF8MB3 | JIS | Y2016 },
26             # ろ
27             # は
28             { char => 'に', code => "\x{92AB}", flag => MONO | UTF8MB3 | JIS | Y2016 },
29             { char => 'ほ', code => "\x{23D0A}", flag => MONO },
30             { char => 'へ', code => "\x{2021C}", flag => MONO },
31             { char => 'と', code => "\x{28246}", flag => MONO },
32             { char => 'ち', code => "\x{68C8}", flag => MONO | UTF8MB3 | JIS | Y2016 },
33             # り
34             { char => 'ぬ', code => "\x{57E5}", flag => MONO | UTF8MB3 | Y2016 },
35             { char => 'る', code => "\x{9306}", flag => MONO | UTF8MB3 | JIS | Y2016 },
36             { char => 'を', code => "\x{6E05}", flag => MONO | UTF8MB3 | JIS | Y2016 },
37             { char => 'わ', code => "\x{5029}", flag => MONO | UTF8MB3 | JIS | Y2016 },
38             # か
39             { char => 'よ', code => "\x{6A2A}", flag => MONO | UTF8MB3 | JIS | Y2016 },
40             { char => 'た', code => "\x{71BF}", flag => MONO | UTF8MB3 | JIS | Y2016 },
41             { char => 'れ', code => "\x{58B4}", flag => MONO | UTF8MB3 | Y2016 },
42             { char => 'そ', code => "\x{9404}", flag => MONO | UTF8MB3 | JIS | Y2016 },
43             { char => 'つ', code => "\x{6F62}", flag => MONO | UTF8MB3 | JIS | Y2016 },
44             { char => 'ね', code => "\x{50D9}", flag => MONO | UTF8MB3 | JIS | Y2016 },
45             { char => 'な', code => "\x{28287}", flag => MONO },
46             # ら
47             { char => 'む', code => "\x{7103}", flag => MONO | UTF8MB3 | JIS | Y2016 },
48             { char => 'う', code => "\x{212FD}", flag => MONO | JIS | Y2016 },
49             { char => 'ゐ', code => "\x{4932}", flag => MONO | UTF8MB3 | Y2016 },
50             { char => 'の', code => "\x{6D7E}", flag => MONO | UTF8MB3 | Y2016 },
51             # お
52             # く
53             { char => 'や', code => "\x{67CF}", flag => MONO | UTF8MB3 | JIS | Y2016 },
54             { char => 'ま', code => "\x{241E2}", flag => MONO | Y2016 },
55             { char => 'け', code => "\x{2129A}", flag => MONO },
56             { char => 'ふ', code => "\x{9251}", flag => MONO | UTF8MB3 | JIS | Y2016 },
57             { char => 'こ', code => "\x{6CCA}", flag => MONO | UTF8MB3 | JIS | Y2016 },
58             { char => 'え', code => "\x{4F2F}", flag => MONO | UTF8MB3 | JIS | Y2016 },
59             # て
60             { char => 'あ', code => "\x{23638}", flag => MONO | JIS | Y2016 },
61             { char => 'さ', code => "\x{3DF5}", flag => MONO | UTF8MB3 | Y2016 },
62             # き
63             { char => 'ゆ', code => "\x{28B46}", flag => MONO | Y2016 },
64             { char => 'め', code => "\x{6F76}", flag => MONO | UTF8MB3 | Y2016 },
65             { char => 'み', code => "\x{20381}", flag => MONO | JIS | Y2016 },
66             { char => 'し', code => "\x{28282}", flag => MONO | JIS | Y2016 },
67             { char => 'ゑ', code => "\x{6A74}", flag => MONO | UTF8MB3 | Y2016 },
68             # ひ
69             # も
70             # せ
71             # す
72             # ん
73            
74             { char => 'い', code => "\x{2F4A}\x{2F8A}", flag => DUO | UTF8MB3 | JIS | Y2016 },
75             { char => 'ろ', code => "\x{2F55}\x{2F8A}", flag => DUO | UTF8MB3 | JIS | Y2016 },
76             { char => 'は', code => "\x{2F1F}\x{2F8A}", flag => DUO | UTF8MB3 | JIS | Y2016 },
77             { char => 'に', code => "\x{2FA6}\x{2F8A}", flag => DUO | UTF8MB3 | JIS | Y2016 },
78             { char => 'ほ', code => "\x{6C35}\x{2F8A}", flag => DUO | UTF8MB3 | JIS | Y2016 },
79             { char => 'へ', code => "\x{4EBB}\x{2F8A}", flag => DUO | UTF8MB3 | JIS | Y2016 },
80             { char => 'と', code => "\x{2F9D}\x{2F8A}", flag => DUO | UTF8MB3 | JIS | Y2016 },
81             { char => 'ち', code => "\x{2F4A}\x{2ED8}", flag => DUO | UTF8MB3 | JIS | Y2016 },
82             { char => 'り', code => "\x{2F55}\x{2ED8}", flag => DUO | UTF8MB3 | JIS | Y2016 },
83             { char => 'ぬ', code => "\x{2F1F}\x{2ED8}", flag => DUO | UTF8MB3 | JIS | Y2016 },
84             { char => 'る', code => "\x{2FA6}\x{2ED8}", flag => DUO | UTF8MB3 | JIS | Y2016 },
85             { char => 'を', code => "\x{6C35}\x{2ED8}", flag => DUO | UTF8MB3 | JIS | Y2016 },
86             { char => 'わ', code => "\x{4EBB}\x{2ED8}", flag => DUO | UTF8MB3 | JIS | Y2016 },
87             { char => 'か', code => "\x{2F9D}\x{2ED8}", flag => DUO | UTF8MB3 | JIS | Y2016 },
88             { char => 'よ', code => "\x{2F4A}\x{2EE9}", flag => DUO | UTF8MB3 | JIS | Y2016 },
89             { char => 'た', code => "\x{2F55}\x{2EE9}", flag => DUO | UTF8MB3 | JIS | Y2016 },
90             { char => 'れ', code => "\x{2F1F}\x{2EE9}", flag => DUO | UTF8MB3 | JIS | Y2016 },
91             { char => 'そ', code => "\x{2FA6}\x{2EE9}", flag => DUO | UTF8MB3 | JIS | Y2016 },
92             { char => 'つ', code => "\x{6C35}\x{2EE9}", flag => DUO | UTF8MB3 | JIS | Y2016 },
93             { char => 'ね', code => "\x{4EBB}\x{2EE9}", flag => DUO | UTF8MB3 | JIS | Y2016 },
94             { char => 'な', code => "\x{2F9D}\x{2EE9}", flag => DUO | UTF8MB3 | JIS | Y2016 },
95             { char => 'ら', code => "\x{2F4A}\x{2F9A}", flag => DUO | UTF8MB3 | JIS | Y2016 },
96             { char => 'む', code => "\x{2F55}\x{2F9A}", flag => DUO | UTF8MB3 | JIS | Y2016 },
97             { char => 'う', code => "\x{2F1F}\x{2F9A}", flag => DUO | UTF8MB3 | JIS | Y2016 },
98             { char => 'ゐ', code => "\x{2FA6}\x{2F9A}", flag => DUO | UTF8MB3 | JIS | Y2016 },
99             { char => 'の', code => "\x{6C35}\x{2F9A}", flag => DUO | UTF8MB3 | JIS | Y2016 },
100             { char => 'お', code => "\x{4EBB}\x{2F9A}", flag => DUO | UTF8MB3 | JIS | Y2016 },
101             { char => 'く', code => "\x{2F9D}\x{2F9A}", flag => DUO | UTF8MB3 | JIS | Y2016 },
102             { char => 'や', code => "\x{2F4A}\x{2F69}", flag => DUO | UTF8MB3 | JIS | Y2016 },
103             { char => 'ま', code => "\x{2F55}\x{2F69}", flag => DUO | UTF8MB3 | JIS | Y2016 },
104             { char => 'け', code => "\x{2F1F}\x{2F69}", flag => DUO | UTF8MB3 | JIS | Y2016 },
105             { char => 'ふ', code => "\x{2FA6}\x{2F69}", flag => DUO | UTF8MB3 | JIS | Y2016 },
106             { char => 'こ', code => "\x{6C35}\x{2F69}", flag => DUO | UTF8MB3 | JIS | Y2016 },
107             { char => 'え', code => "\x{4EBB}\x{2F69}", flag => DUO | UTF8MB3 | JIS | Y2016 },
108             { char => 'て', code => "\x{2F9D}\x{2F69}", flag => DUO | UTF8MB3 | JIS | Y2016 },
109             { char => 'あ', code => "\x{2F4A}\x{9ED2}", flag => DUO | UTF8MB3 | JIS | Y2016 },
110             { char => 'さ', code => "\x{2F55}\x{9ED2}", flag => DUO | UTF8MB3 | JIS | Y2016 },
111             { char => 'き', code => "\x{2F1F}\x{9ED2}", flag => DUO | UTF8MB3 | JIS | Y2016 },
112             { char => 'ゆ', code => "\x{2FA6}\x{9ED2}", flag => DUO | UTF8MB3 | JIS | Y2016 },
113             { char => 'め', code => "\x{6C35}\x{9ED2}", flag => DUO | UTF8MB3 | JIS | Y2016 },
114             { char => 'み', code => "\x{4EBB}\x{9ED2}", flag => DUO | UTF8MB3 | JIS | Y2016 },
115             { char => 'し', code => "\x{2F9D}\x{9ED2}", flag => DUO | UTF8MB3 | JIS | Y2016 },
116             { char => 'ゑ', code => "\x{2F4A}\x{7D2B}", flag => DUO | UTF8MB3 | JIS | Y2016 },
117             { char => 'ひ', code => "\x{2F55}\x{7D2B}", flag => DUO | UTF8MB3 | JIS | Y2016 },
118             { char => 'も', code => "\x{2F1F}\x{7D2B}", flag => DUO | UTF8MB3 | JIS | Y2016 },
119             { char => 'せ', code => "\x{2FA6}\x{7D2B}", flag => DUO | UTF8MB3 | JIS | Y2016 },
120             { char => 'す', code => "\x{6C35}\x{7D2B}", flag => DUO | UTF8MB3 | JIS | Y2016 },
121             { char => 'ん', code => "\x{4EBB}\x{7D2B}", flag => DUO | UTF8MB3 | JIS | Y2016 },
122             ];
123              
124             my $encode = {};
125             my $decode = {};
126             my $decode_re = join '|', map { $_->{code} } reverse @$map;
127             $decode_re = qr/($decode_re)/;
128              
129             for my $v (@$map) {
130             my $list = $encode->{ $v->{char} } ||= [];
131             push @$list, $v;
132            
133             $decode->{$v->{code}} = $v->{char};
134             }
135              
136             sub _encode {
137 314     314   268 my $char = shift;
138 314   100     582 my $list = $encode->{$char} // [];
139 314         280 for my $v (@$list) {
140 225 100       309 if ($v->{flag} & $ENCODE) {
141 168         129 $char = $v->{code};
142 168         118 last;
143             }
144             }
145              
146 314         428 $char;
147             }
148              
149             sub normalize {
150 12   50 12 0 29 my $text = shift // "";
151            
152             # decomposition for 濁点s
153 12         76 $text =~ s/(\p{InHiragana}|\p{InKatakana})/NFD($1)/ge;
  185         2564  
154            
155             # katakana to hiragana
156 12         251 $text = katakana2hiragana(katakana_h2z($text));
157            
158             # upper ぁぃぅぇぉっゃゅょゎゕゖㇾㇷㇶㇸㇲㇹㇱㇼㇳㇰㇿㇻㇺㇵㇽㇴ
159 12         522 $text =~ tr[\x{3041}\x{3043}\x{3045}\x{3047}\x{3049}\x{3063}\x{3083}\x{3085}\x{3087}\x{308E}\x{3095}\x{3096}\x{31FE}\x{31F7}\x{31F6}\x{31F8}\x{31F2}\x{31F9}\x{31F1}\x{31FC}\x{31F3}\x{31F0}\x{31FF}\x{31FB}\x{31FA}\x{31F5}\x{31FD}\x{31F4}]
160             [\x{3042}\x{3044}\x{3046}\x{3048}\x{304A}\x{3064}\x{3084}\x{3086}\x{3088}\x{308F}\x{304B}\x{3051}\x{30EC}\x{30D5}\x{30D2}\x{30D8}\x{30B9}\x{30DB}\x{30B7}\x{30EA}\x{30C8}\x{30AF}\x{30ED}\x{30E9}\x{30E0}\x{30CF}\x{30EB}\x{30CC}];
161            
162 12         23 $text;
163             }
164              
165             sub encode {
166 10     10 1 3867 my $class = shift;
167 10   50     24 my $text = shift // "";
168            
169 10         20 $text = normalize($text);
170 10         29 $text =~ s{(.)}{_encode($1)}ge;
  314         270  
171 10         52 $text;
172             }
173              
174             sub decode {
175 4     4 1 19 my $class = shift;
176 4   50     10 my $text = shift // "";
177            
178 4         87 $text =~ s/$decode_re/$decode->{$1}/ge;
  147         325  
179 4         20 $text =~ s/(\p{InHiragana}+)/NFC($1)/ge;
  145         2436  
180 4         160 $text;
181             }
182              
183             sub shinobi {
184 4     4 1 22 Text::Shinobi->encode(@_);
185             }
186              
187             1;
188             __END__