File Coverage

blib/lib/Lingua/ZH/Romanize/Pinyin.pm
Criterion Covered Total %
statement 58 59 98.3
branch 20 30 66.6
condition 1 3 33.3
subroutine 11 11 100.0
pod 4 4 100.0
total 94 107 87.8


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Lingua::ZH::Romanize::Pinyin - Romanization of Standard Chinese language
4              
5             =head1 SYNOPSIS
6              
7             use Lingua::ZH::Romanize::Pinyin;
8              
9             my $conv = Lingua::ZH::Romanize::Pinyin->new();
10             my $roman = $conv->char( $hanji );
11             printf( "%s%s", $hanji, $roman );
12              
13             my @array = $conv->string( $string );
14             foreach my $pair ( @array ) {
15             my( $raw, $ruby ) = @$pair;
16             if ( defined $ruby ) {
17             printf( "%s%s", $raw, $ruby );
18             } else {
19             print $raw;
20             }
21             }
22              
23             =head1 DESCRIPTION
24              
25             Pinyin is a phonemic notation for Chinese characters.
26              
27             =head2 $conv = Lingua::ZH::Romanize::Pinyin->new();
28              
29             This constructer methods returns a new object with its dictionary cached.
30              
31             =head2 $roman = $conv->char( $hanji );
32              
33             This method returns romanized letters of a Hanji character.
34             It returns undef when $hanji is not a valid Hanji character.
35             The argument's encoding must be UTF-8.
36             Both of Simplified Chinese and Traditional Chinese are allowed.
37              
38             =head2 $roman = $conv->chars( $string );
39              
40             This method returns romanized letters of Hanji characters.
41              
42             =head2 @array = $conv->string( $string );
43              
44             This method returns a array of referenced arrays
45             which are pairs of a Hanji chacater and its romanized letters.
46              
47             $array[0] # first Chinese character's pair (array)
48             $array[1][0] # secound Chinese character itself
49             $array[1][1] # its romanized letters
50              
51             =head1 DICTIONARY
52              
53             This module internally uses a mapping table from Hanji to roman
54             which is based on C which is distributed with C.
55              
56             =head1 MODULE DEPENDENCY
57              
58             L module is required.
59              
60             =head1 UTF-8 FLAG
61              
62             This treats utf8 flag transparently.
63              
64             =head1 SEE ALSO
65              
66             L for romanization of Cantonese
67              
68             L for romanization of Japanese
69              
70             L for romanization of Korean
71              
72             http://www.kawa.net/works/perl/romanize/romanize-e.html
73              
74             http://linuga-romanize.googlecode.com/svn/trunk/Lingua-ZH-Romanize-Pinyin/
75              
76             =head1 COPYRIGHT
77              
78             Copyright (c) 2003-2008 Yusuke Kawasaki. All rights reserved.
79              
80             =head1 LICENSE
81              
82             Any commercial use of the Software requires a license directly from
83             the author(s). Please contact the author(s) to negotiate an
84             appropriate license. Commercial use includes integration of all or
85             part of the binary or source code covered by this permission
86             notices into a product for sale or license to third parties on your
87             behalf, or distribution of the binary or source code to third
88             parties that need it to utilize a product sold or licensed on your
89             behalf.
90              
91             =cut
92              
93             package Lingua::ZH::Romanize::Pinyin;
94 4     4   3576 use strict;
  4         11  
  4         148  
95 4     4   23 use Carp;
  4         8  
  4         485  
96 4     4   5247 use Storable;
  4         17781  
  4         344  
97 4     4   40 use vars qw( $VERSION );
  4         10  
  4         4298  
98             $VERSION = "0.23";
99             my $PERL581 = 1 if ( $] >= 5.008001 );
100              
101             sub new {
102 4     4 1 3807 my $package = shift;
103 4   33     30 my $store = shift || &_detect_store($package);
104 4 50       242 Carp::croak "$! - $store\n" unless ( -r $store );
105 4 50       18 my $self = Storable::retrieve($store) or Carp::croak "$! - $store\n";
106 4         64157 bless $self, $package;
107 4         25 $self;
108             }
109              
110             sub char {
111 12     12 1 8152 my $self = shift;
112 12 50       40 return $self->_char(@_) unless $PERL581;
113 12         21 my $char = shift;
114 12         30 my $utf8 = utf8::is_utf8( $char );
115 12 100       36 utf8::encode( $char ) if $utf8;
116 12         42 $char = $self->_char( $char );
117 12 100       35 utf8::decode( $char ) if $utf8;
118 12         30 $char;
119             }
120              
121             sub _char {
122 12     12   25 my $self = shift;
123 12         20 my $char = shift;
124 12 100       67 return unless exists $self->{$char};
125 8         25 $self->{$char};
126             }
127              
128             sub chars {
129 8     8 1 4047 my $self = shift;
130 8         37 my @array = $self->string(shift);
131 8 50       18 join( " ", map { $#$_ > 0 ? $_->[1] : $_->[0] } @array );
  16         94  
132             }
133              
134             sub string {
135 20     20 1 18841 my $self = shift;
136 20 50       56 return $self->_string(@_) unless $PERL581;
137 20         27 my $char = shift;
138 20         51 my $flag = utf8::is_utf8( $char );
139 20 100       60 utf8::encode( $char ) if $flag;
140 20         54 my @array = $self->_string( $char );
141 20 100       49 if ( $flag ) {
142 10         19 foreach my $pair ( @array ) {
143 44 50       121 utf8::decode( $pair->[0] ) if defined $pair->[0];
144 44 50       388 utf8::decode( $pair->[1] ) if defined $pair->[1];
145             }
146             }
147 20         105 @array;
148             }
149              
150             sub _string {
151 20     20   31 my $self = shift;
152 20         24 my $src = shift;
153 20         72 my $array = [];
154 20         105 while ( $src =~ /([\300-\377][\200-\277]+)|([\000-\177]+)/sg ) {
155 88 50       180 if ( defined $1 ) { # Chinese
156 88         195 my $pair = [$1];
157 88 50       385 $pair->[1] = $self->{$1} if exists $self->{$1};
158 88         436 push( @$array, $pair );
159             }
160             else {
161 0         0 push( @$array, [$2] ); # ASCII
162             }
163             }
164 20         84 @$array;
165             }
166              
167             # Pinyin.pm -> Pinyin.store
168             # Cantonese.pm -> Cantonese.store
169              
170             sub _detect_store {
171 4     4   9 my $package = shift;
172 4         28 my $store = $INC{ join( "/", split( "::", "$package.pm" ) ) };
173 4 50       48 $store =~ s#\.pm$#.store# or Carp::croak "Invalid module name: $package\n";
174 4         21 $store;
175             }
176              
177             1;