File Coverage

blib/lib/HTML/Encoder.pm
Criterion Covered Total %
statement 11 43 25.5
branch 0 16 0.0
condition 0 15 0.0
subroutine 4 6 66.6
pod 2 2 100.0
total 17 82 20.7


line stmt bran cond sub pod time code
1             # HTML Encoder
2             #
3             # Encode special caracters content in data structure to HTML code.
4             #
5             # Copyright 2003 Fabiano Reese Righetti
6             # All rights reserved.
7             #
8             # This program is free software; you can redistribute it and/or
9             # modify it under the terms of the GNU General Public License as
10             # published by the Free Software Foundation; either version 2 of the
11             # License, or (at your option) any later version.
12             # This program is distributed in the hope that it will be useful,
13             # but WITHOUT ANY WARRANTY; without even the implied warranty of
14             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15             # General Public License for more details.
16              
17             package HTML::Encoder;
18             require 5.005;
19              
20             =head1 NAME
21              
22             HTML::Encoder - Encode special caracters to HTML code.
23              
24             =head1 SYNOPSIS
25              
26             use HTML::Encoder;
27              
28             my $HE = new HTML::Encoder(
29             Extended_A => 1,
30             Extended_B => 1,
31             Latin_1 => 1,
32             );
33              
34             $HE->encode($ref);
35              
36             =head1 DESCRIPTION
37              
38             This module implement algorithm for encoding special caracters content
39             in data structure to HTML code.
40              
41             =head1 METHODS
42              
43             =over 4
44              
45             =cut
46              
47 1     1   59542 use vars qw($VERSION %Entities);
  1         2  
  1         68  
48              
49 1     1   5 use strict;
  1         2  
  1         35  
50 1     1   4 use warnings;
  1         7  
  1         1488  
51              
52             BEGIN
53             {
54 1     1   8 our $VERSION = '0.00_04';
55 1         704 our %Entities = (
56             Latin_1 => {
57             chr(0x00a0) => 'nbsp', # NO-BREAK SPACE
58             chr(0x00a1) => 'iexcl', # INVERTED EXCLAMATION MARK
59             chr(0x00a2) => 'cent', # CENT SIGN
60             chr(0x00a3) => 'pound', # POUND SIGN
61             chr(0x00a4) => 'curren', # CURRENCY SIGN
62             chr(0x00a5) => 'yen', # YEN SIGN
63             chr(0x00a6) => 'brvbar', # BROKEN BAR
64             chr(0x00a7) => 'sect', # SECTION SIGN
65             chr(0x00a8) => 'uml', # DIAERESIS
66             chr(0x00a9) => 'copy', # COPYRIGHT SIGN
67             chr(0x00aa) => 'ordf', # FEMININE ORDINAL INDICATOR
68             chr(0x00ab) => 'laquo', # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
69             chr(0x00ac) => 'not', # NOT SIGN
70             chr(0x00ad) => 'shy', # SOFT HYPHEN
71             chr(0x00ae) => 'reg', # REGISTERED SIGN
72             chr(0x00af) => 'macr', # MACRON
73             chr(0x00b0) => 'deg', # DEGREE SIGN
74             chr(0x00b1) => 'plusmn', # PLUS-MINUS SIGN
75             chr(0x00b2) => 'sup2', # SUPERSCRIPT TWO
76             chr(0x00b3) => 'sup3', # SUPERSCRIPT THREE
77             chr(0x00b4) => 'acute', # ACUTE ACCENT
78             chr(0x00b5) => 'micro', # MICRO SIGN
79             chr(0x00b6) => 'para', # PILCROW SIGN
80             chr(0x00b7) => 'middot', # MIDDLE DOT
81             chr(0x00b8) => 'cedil', # CEDILLA
82             chr(0x00b9) => 'sup1', # SUPERSCRIPT ONE
83             chr(0x00ba) => 'ordm', # MASCULINE ORDINAL INDICATOR
84             chr(0x00bb) => 'raquo', # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
85             chr(0x00bc) => 'frac14', # VULGAR FRACTION ONE QUARTER
86             chr(0x00bd) => 'frac12', # VULGAR FRACTION ONE HALF
87             chr(0x00be) => 'frac34', # VULGAR FRACTION THREE QUARTERS
88             chr(0x00bf) => 'iquest', # INVERTED QUESTION MARK
89             chr(0x00c0) => 'Agrave', # LATIN CAPITAL LETTER A WITH GRAVE
90             chr(0x00c1) => 'Aacute', # LATIN CAPITAL LETTER A WITH ACUTE
91             chr(0x00c2) => 'Acirc', # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
92             chr(0x00c3) => 'Atilde', # LATIN CAPITAL LETTER A WITH TILDE
93             chr(0x00c4) => 'Auml', # LATIN CAPITAL LETTER A WITH DIAERESIS
94             chr(0x00c5) => 'Aring', # LATIN CAPITAL LETTER A WITH RING ABOVE
95             chr(0x00c6) => 'AElig', # LATIN CAPITAL LETTER AE
96             chr(0x00c7) => 'Ccedil', # LATIN CAPITAL LETTER C WITH CEDILLA
97             chr(0x00c8) => 'Egrave', # LATIN CAPITAL LETTER E WITH GRAVE
98             chr(0x00c9) => 'Eacute', # LATIN CAPITAL LETTER E WITH ACUTE
99             chr(0x00ca) => 'Ecirc', # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
100             chr(0x00cb) => 'Euml', # LATIN CAPITAL LETTER E WITH DIAERESIS
101             chr(0x00cc) => 'Igrave', # LATIN CAPITAL LETTER I WITH GRAVE
102             chr(0x00cd) => 'Iacute', # LATIN CAPITAL LETTER I WITH ACUTE
103             chr(0x00ce) => 'Icirc', # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
104             chr(0x00cf) => 'Iuml', # LATIN CAPITAL LETTER I WITH DIAERESIS
105             chr(0x00d0) => 'ETH', # LATIN CAPITAL LETTER ETH (Icelandic)
106             chr(0x00d1) => 'Ntilde', # LATIN CAPITAL LETTER N WITH TILDE
107             chr(0x00d2) => 'Ograve', # LATIN CAPITAL LETTER O WITH GRAVE
108             chr(0x00d3) => 'Oacute', # LATIN CAPITAL LETTER O WITH ACUTE
109             chr(0x00d4) => 'Ocirc', # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
110             chr(0x00d5) => 'Otilde', # LATIN CAPITAL LETTER O WITH TILDE
111             chr(0x00d6) => 'Ouml', # LATIN CAPITAL LETTER O WITH DIAERESIS
112             chr(0x00d7) => 'times', # MULTIPLICATION SIGN
113             chr(0x00d8) => 'Oslash', # LATIN CAPITAL LETTER O WITH STROKE
114             chr(0x00d9) => 'Ugrave', # LATIN CAPITAL LETTER U WITH GRAVE
115             chr(0x00da) => 'Uacute', # LATIN CAPITAL LETTER U WITH ACUTE
116             chr(0x00db) => 'Ucirc', # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
117             chr(0x00dc) => 'Uuml', # LATIN CAPITAL LETTER U WITH DIAERESIS
118             chr(0x00dd) => 'Yacute', # LATIN CAPITAL LETTER Y WITH ACUTE
119             chr(0x00de) => 'THORN', # LATIN CAPITAL LETTER THORN (Icelandic)
120             chr(0x00df) => 'szlig', # LATIN SMALL LETTER SHARP S (German)
121             chr(0x00e0) => 'agrave', # LATIN SMALL LETTER A WITH GRAVE
122             chr(0x00e1) => 'aacute', # LATIN SMALL LETTER A WITH ACUTE
123             chr(0x00e2) => 'acirc', # LATIN SMALL LETTER A WITH CIRCUMFLEX
124             chr(0x00e3) => 'atilde', # LATIN SMALL LETTER A WITH TILDE
125             chr(0x00e4) => 'auml', # LATIN SMALL LETTER A WITH DIAERESIS
126             chr(0x00e5) => 'aring', # LATIN SMALL LETTER A WITH RING ABOVE
127             chr(0x00e6) => 'aelig', # LATIN SMALL LETTER AE
128             chr(0x00e7) => 'ccedil', # LATIN SMALL LETTER C WITH CEDILLE
129             chr(0x00e8) => 'egrave', # LATIN SMALL LETTER E WITH GRAVE
130             chr(0x00e9) => 'eacute', # LATIN SMALL LETTER E WITH ACUTE
131             chr(0x00ea) => 'ecirc', # LATIN SMALL LETTER E WITH CIRCUMFLEX
132             chr(0x00eb) => 'euml', # LATIN SMALL LETTER E WITH DIAERESIS
133             chr(0x00ec) => 'igrave', # LATIN SMALL LETTER I WITH GRAVE
134             chr(0x00ed) => 'iacute', # LATIN SMALL LETTER I WITH ACUTE
135             chr(0x00ee) => 'icirc', # LATIN SMALL LETTER I WITH CIRCUMFLEX
136             chr(0x00ef) => 'iuml', # LATIN SMALL LETTER I WITH DIAERESIS
137             chr(0x00f0) => 'eth', # LATIN SMALL LETTER ETH (Icelandic)
138             chr(0x00f1) => 'ntilde', # LATIN SMALL LETTER N WITH TILDE
139             chr(0x00f2) => 'ograve', # LATIN SMALL LETTER O WITH GRAVE
140             chr(0x00f3) => 'oacute', # LATIN SMALL LETTER O WITH ACUTE
141             chr(0x00f4) => 'ocirc', # LATIN SMALL LETTER O WITH CIRCUMFLEX
142             chr(0x00f5) => 'otilde', # LATIN SMALL LETTER O WITH TILDE
143             chr(0x00f6) => 'ouml', # LATIN SMALL LETTER O WITH DIAERESIS
144             chr(0x00f7) => 'divide', # DIVISION SIGN
145             chr(0x00f8) => 'oslash', # LATIN SMALL LETTER O WITH STROKE
146             chr(0x00f9) => 'ugrave', # LATIN SMALL LETTER U WITH GRAVE
147             chr(0x00fa) => 'uacute', # LATIN SMALL LETTER U WITH ACUTE
148             chr(0x00fb) => 'ucirc', # LATIN SMALL LETTER U WITH CIRCUMFLEX
149             chr(0x00fc) => 'uuml', # LATIN SMALL LETTER U WITH DIAERESIS
150             chr(0x00fd) => 'yacute', # LATIN SMALL LETTER Y WITH ACUTE
151             chr(0x00fe) => 'thorn', # LATIN SMALL LETTER THORN (Icelandic)
152             chr(0x00ff) => 'yuml', # LATIN SMALL LETTER Y WITH DIAERESIS
153             },
154            
155             Extended_B => {
156             chr(0x0391) => 'Alpha', # GREEK CAPITAL LETTER ALPHA
157             chr(0x0392) => 'Beta', # GREEK CAPITAL LETTER BETA
158             chr(0x0393) => 'Gamma', # GREEK CAPITAL LETTER GAMMA
159             chr(0x0394) => 'Delta', # GREEK CAPITAL LETTER DELTA
160             chr(0x0395) => 'Epsilon', # GREEK CAPITAL LETTER EPSILON
161             chr(0x0396) => 'Zeta', # GREEK CAPITAL LETTER ZETA
162             chr(0x0397) => 'Eta', # GREEK CAPITAL LETTER ETA
163             chr(0x0398) => 'Theta', # GREEK CAPITAL LETTER THETA
164             chr(0x0399) => 'Iota', # GREEK CAPITAL LETTER IOTA
165             chr(0x039a) => 'Kappa', # GREEK CAPITAL LETTER KAPPA
166             chr(0x039b) => 'Lambda', # GREEK CAPITAL LETTER LAMBDA
167             chr(0x039c) => 'Mu', # GREEK CAPITAL LETTER MU
168             chr(0x039d) => 'Nu', # GREEK CAPITAL LETTER NU
169             chr(0x039e) => 'Xi', # GREEK CAPITAL LETTER XI
170             chr(0x039f) => 'Omicron', # GREEK CAPITAL LETTER OMICRON
171             chr(0x03a0) => 'Pi', # GREEK CAPITAL LETTER PI
172             chr(0x03a1) => 'Rho', # GREEK CAPITAL LETTER RHO
173             chr(0x03a3) => 'Sigma', # GREEK CAPITAL LETTER SIGMA
174             chr(0x03a4) => 'Tau', # GREEK CAPITAL LETTER TAU
175             chr(0x03a5) => 'Upsilon', # GREEK CAPITAL LETTER UPSILON
176             chr(0x03a6) => 'Phi', # GREEK CAPITAL LETTER PHI
177             chr(0x03a7) => 'Chi', # GREEK CAPITAL LETTER CHI
178             chr(0x03a8) => 'Psi', # GREEK CAPITAL LETTER PSI
179             chr(0x03a9) => 'Omega', # GREEK CAPITAL LETTER OMEGA
180             chr(0x03b1) => 'alpha', # GREEK SMALL LETTER ALPHA
181             chr(0x03b2) => 'beta', # GREEK SMALL LETTER BETA
182             chr(0x03b3) => 'gamma', # GREEK SMALL LETTER GAMMA
183             chr(0x03b4) => 'delta', # GREEK SMALL LETTER DELTA
184             chr(0x03b5) => 'epsilon', # GREEK SMALL LETTER EPSILON
185             chr(0x03b6) => 'zeta', # GREEK SMALL LETTER ZETA
186             chr(0x03b7) => 'eta', # GREEK SMALL LETTER ETA
187             chr(0x03b8) => 'theta', # GREEK SMALL LETTER THETA
188             chr(0x03b9) => 'iota', # GREEK SMALL LETTER IOTA
189             chr(0x03ba) => 'kappa', # GREEK SMALL LETTER KAPPA
190             chr(0x03bb) => 'lambda', # GREEK SMALL LETTER LAMBDA
191             chr(0x03bc) => 'mu', # GREEK SMALL LETTER MU
192             chr(0x03bd) => 'nu', # GREEK SMALL LETTER NU
193             chr(0x03be) => 'xi', # GREEK SMALL LETTER XI
194             chr(0x03bf) => 'omicron', # GREEK SMALL LETTER OMICRON
195             chr(0x03c0) => 'pi', # GREEK SMALL LETTER PI
196             chr(0x03c1) => 'rho', # GREEK SMALL LETTER RHO
197             chr(0x03c2) => 'sigmaf', # GREEK SMALL LETTER FINAL SIGMA
198             chr(0x03c3) => 'sigma', # GREEK SMALL LETTER SIGMA
199             chr(0x03c4) => 'tau', # GREEK SMALL LETTER TAU
200             chr(0x03c5) => 'upsilon', # GREEK SMALL LETTER UPSILON
201             chr(0x03c6) => 'phi', # GREEK SMALL LETTER PHI
202             chr(0x03c7) => 'chi', # GREEK SMALL LETTER CHI
203             chr(0x03c8) => 'psi', # GREEK SMALL LETTER PSI
204             chr(0x03c9) => 'omega', # GREEK SMALL LETTER OMEGA
205             chr(0x03d1) => 'thetasym', # GREEK SMALL LETTER THETA SYMBOL
206             chr(0x03d2) => 'upsih', # GREEK UPSILON WITH HOOK SYMBOL
207             chr(0x03d6) => 'piv', # GREEK PI SYMBOL
208             },
209             Punctuation => {
210             chr(0x2022) => 'bull', # BULLET = BLACK SMALL CIRCLE
211             chr(0x2026) => 'hellip', # HORIZONTAL ELLIPSIS = THREE DOT LEADER
212             chr(0x2032) => 'prime', # PRIME = MINUTES = FEET
213             chr(0x2033) => 'Prime', # DOUBLE PRIME = SECONDS = INCHES
214             chr(0x203e) => 'oline', # OVERLINE = SPACING OVERSCORE
215             chr(0x2044) => 'frasl', # FRACTION SLASH
216             },
217             Letterlike => {
218             chr(0x2111) => 'image', # BLACKLETTER CAPITAL I = IMAGINARY PART
219             chr(0x211c) => 'real', # BLACKLETTER CAPITAL R = REAL PART SYMBOL
220             chr(0x2122) => 'trade', # TRADE MARK SIGN
221             chr(0x2135) => 'alefsym', # ALEF SYMBOL = FIRST TRANSFINITE CARDINAL
222             },
223             Arrows => {
224             chr(0x2190) => 'larr', # LEFTWARDS ARROW
225             chr(0x2191) => 'uarr', # UPWARDS ARROW
226             chr(0x2192) => 'rarr', # RIGHTWARDS ARROW
227             chr(0x2193) => 'darr', # DOWNWARDS ARROW
228             chr(0x2194) => 'harr', # LEFT RIGHT ARROW
229             chr(0x21d0) => 'lArr', # LEFTWARDS DOUBLE ARROW
230             chr(0x21d1) => 'uArr', # UPWARDS DOUBLE ARROW
231             chr(0x21d2) => 'rArr', # RIGHTWARDS DOUBLE ARROW
232             chr(0x21d3) => 'dArr', # DOWNWARDS DOUBLE ARROW
233             chr(0x21d4) => 'hArr', # LEFT RIGHT DOUBLE ARROW
234             },
235             Mathematical => {
236             chr(0x2200) => 'forall', # FOR ALL
237             chr(0x2202) => 'part', # PARTIAL DIFFERENTIAL
238             chr(0x2203) => 'exist', # THERE EXISTS
239             chr(0x2205) => 'empty', # EMPTY SET = NULL SET = DIAMETER
240             chr(0x2207) => 'nabla', # NABLA = BACKWARD DIFFERENCE
241             chr(0x2208) => 'isin', # ELEMENT OF
242             chr(0x2209) => 'notin', # NOT AN ELEMENT OF
243             chr(0x220b) => 'ni', # CONTAINS AS MEMBER
244             chr(0x220f) => 'prod', # N-ARY PRODUCT = PRODUCT SIGN
245             chr(0x2211) => 'sum', # N-ARY SUMATION
246             chr(0x2212) => 'minus', # MINUS SIGN
247             chr(0x2217) => 'lowast', # ASTERISK OPERATOR
248             chr(0x221a) => 'radic', # SQUARE ROOT = RADICAL SIGN
249             chr(0x221d) => 'prop', # PROPORTIONAL TO
250             chr(0x221e) => 'infin', # INFINITY
251             chr(0x2220) => 'ang', # ANGLE
252             chr(0x2227) => 'and', # LOGICAL AND = WEDGE
253             chr(0x2228) => 'or', # LOGICAL OR = VEE
254             chr(0x2229) => 'cap', # INTERSECTION = CAP
255             chr(0x222a) => 'cup', # UNION = CUP
256             chr(0x222b) => 'int', # INTEGRAL
257             chr(0x2234) => 'there4', # THEREFORE
258             chr(0x223c) => 'sim', # TILDE OPERATOR = VARIES WITH = SIMILAR TO
259             chr(0x2245) => 'cong', # APPROXIMATELY EQUAL TO
260             chr(0x2248) => 'asymp', # ALMOST EQUAL TO = ASYMPTOTIC TO
261             chr(0x2260) => 'ne', # NOT EQUAL TO
262             chr(0x2261) => 'equiv', # IDENTICAL TO
263             chr(0x2264) => 'le', # LESS-THAN OR EQUAL TO
264             chr(0x2265) => 'ge', # GREATER-THAN OR EQUAL TO
265             chr(0x2282) => 'sub', # SUBSET OF
266             chr(0x2283) => 'sup', # SUPERSET OF
267             chr(0x2284) => 'nsub', # NOT A SUBSET OF
268             chr(0x2286) => 'sube', # SUBSET OF OR EQUAL TO
269             chr(0x2287) => 'supe', # SUPERSET OF OR EQUAL TO
270             chr(0x2295) => 'oplus', # CIRCLED PLUS = DIRECT SUM
271             chr(0x2297) => 'otimes', # CIRCLED TIMES = VECTOR PRODUCT
272             chr(0x22a5) => 'perp', # UP TACK = ORTHOGONAL TO = PERPENDICULAR
273             chr(0x22c5) => 'sdot', # DOT OPERATOR
274             },
275             Technical => {
276             chr(0x2308) => 'lceil', # LEFT CEILING = APL UPSTILE
277             chr(0x2309) => 'rceil', # RIGHT CEILING
278             chr(0x230a) => 'lfloor', # LEFT FLOOR = APL DOWNSTILE
279             chr(0x230b) => 'rfloor', # RIGHT FLOOR
280             chr(0x2329) => 'lang', # LEFT-POINTING ANGLE BRACKET = BRA
281             chr(0x232a) => 'rang', # RIGHT-POINTING ANGLE BRACKET = KET
282             },
283             Geometric => {
284             chr(0x25ca) => 'loz', # LOZENGE
285             },
286             Miscellaneous => {
287             chr(0x2660) => 'spades', # BLACK SPADE SUIT
288             chr(0x2663) => 'clubs', # BLACK CLUB SUIT = SHAMROCK
289             chr(0x2665) => 'hearts', # BLACK HEART SUIT = VALENTINE
290             chr(0x2666) => 'diams', # BLACK DIAMOND SUIT
291             },
292             Controls => {
293             chr(0x0022) => 'quot', # QUOTATION MARK = APL QUOTE
294             chr(0x0026) => 'amp', # AMPERSAND
295             chr(0x003c) => 'lt', # LESS-THAN SIGN
296             chr(0x003e) => 'gt', # GREATER-THAN SIGN
297             },
298             Extended_A => {
299             chr(0x0152) => 'OElig', # LATIN CAPITAL LIGATURE OE
300             chr(0x0153) => 'oelig', # LATIN SMALL LIGATURE OE
301             chr(0x0160) => 'Scaron', # LATIN CAPITAL LETTER S WITH CARON
302             chr(0x0161) => 'scaron', # LATIN SMALL LETTER S WITH CARON
303             chr(0x0178) => 'Yuml', # LATIN CAPITAL LETTER Y WITH DIAERESIS
304             },
305             Modifier => {
306             chr(0x02c6) => 'circ', # MODIFIER LETTER CIRCUMFLEX ACCENT
307             chr(0x02dc) => 'tilde', # SMALL TILDE
308             },
309             Punctuation => {
310             chr(0x2002) => 'ensp', # EN SPACE
311             chr(0x2003) => 'emsp', # EM SPACE
312             chr(0x2009) => 'thinsp', # THIN SPACE
313             chr(0x200c) => 'zwnj', # ZERO WIDTH NON-JOINER
314             chr(0x200d) => 'zwj', # ZERO WIDTH JOINER
315             chr(0x200e) => 'lrm', # LEFT-TO-RIGHT MARK
316             chr(0x200f) => 'rlm', # RIGHT-TO-LEFT MARK
317             chr(0x2013) => 'ndash', # EN DASH
318             chr(0x2014) => 'mdash', # EM DASH
319             chr(0x2018) => 'lsquo', # LEFT SINGLE QUOTATION MARK
320             chr(0x2019) => 'rsquo', # RIGHT SINGLE QUOTATION MARK
321             chr(0x201a) => 'sbquo', # SINGLE LOW-9 QUOTATION MARK
322             chr(0x201c) => 'ldquo', # LEFT DOUBLE QUOTATION MARK
323             chr(0x201d) => 'rdquo', # RIGHT DOUBLE QUOTATION MARK
324             chr(0x201e) => 'bdquo', # DOUBLE LOW-9 QUOTATION MARK
325             chr(0x2020) => 'dagger', # DAGGER
326             chr(0x2021) => 'Dagger', # DOUBLE DAGGER
327             chr(0x2030) => 'permil', # PER MILLE SIGN
328             chr(0x2039) => 'lsaquo', # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
329             chr(0x203a) => 'rsaquo', # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
330             chr(0x20ac) => 'euro', # EURO SIGN
331             },
332             );
333             }
334              
335             =item B
336              
337             The constructor method.
338              
339             my $HE = new HTML::Encoder(
340             Arrows => 0,
341             Controls => 0,
342             Extended_A => 1,
343             Extended_B => 1,
344             Geometric => 0,
345             Latin_1 => 1,
346             Letterlike => 0,
347             Mathematical => 0,
348             Miscellaneous => 0,
349             Modifier => 0,
350             Punctuation => 0,
351             Technical => 0,
352             );
353              
354             or
355              
356             my $HE = new HTML::Encode(); # Default Latin_1 entities encode true.
357              
358             =cut
359              
360             sub new
361             {
362 0     0 1   my $type = shift;
363 0   0       my $class = ref $type || $type;
364            
365 0           my $self = {
366             Arrows => 0,
367             Controls => 0,
368             Extended_A => 0,
369             Extended_B => 0,
370             Geometric => 0,
371             Latin_1 => 0,
372             Letterlike => 0,
373             Mathematical => 0,
374             Miscellaneous => 0,
375             Modifier => 0,
376             Punctuation => 0,
377             Technical => 0,
378             @_,
379             };
380              
381 0           my $ok = 0;
382 0           for my $i (keys %Entities) {
383 0 0         if ($self->{$i}) {
384 0           $ok = 1;
385 0           $self->{'Heads_'.$i} = join '|', keys %{$Entities{$i}};
  0            
386             }
387             }
388              
389             # Set default HTML codes.
390 0 0         if (!$ok) {
391 0           $self->{Latin_1} = 1;
392 0           $self->{Heads_Latin_1} = join '|', keys %{$Entities{Latin_1}};
  0            
393             }
394              
395 0           bless $self, $class;
396 0           return $self;
397             }
398              
399             =item B
400              
401             Parsing data structure to searching special caracters for
402             convert in HTML code.
403              
404             $HE->encode($ref);
405              
406             =cut
407              
408             sub encode
409             {
410 0     0 1   my $self = shift;
411 0           my $ref = shift;
412              
413 0 0         if (ref $ref eq 'ARRAY') {
    0          
    0          
414 0           for my $i (0 .. $#{$ref}) {
  0            
415 0 0 0       if ((ref $ref->[$i] ne 'ARRAY') and
      0        
416             (ref $ref->[$i] ne 'HASH') and
417             (ref $ref->[$i] ne 'SCALAR')) {
418 0           &encode($self, \$ref->[$i]);
419             } else {
420 0           &encode($self, $ref->[$i]);
421             }
422             }
423             } elsif (ref $ref eq 'HASH') {
424 0           for my $i (keys %{$ref}) {
  0            
425 0 0 0       if ((ref $ref->{$i} ne 'ARRAY') and
      0        
426             (ref $ref->{$i} ne 'HASH') and
427             (ref $ref->{$i} ne 'SCALAR')) {
428 0           &encode($self, \$ref->{$i});
429             } else {
430 0           &encode($self, $ref->{$i});
431             }
432             }
433             } elsif (ref $ref eq 'SCALAR') {
434 0           for my $i (keys %Entities) {
435 0 0         if ($self->{$i}) {
436 0           ${$ref} =~ s/($self->{'Heads_'.$i})/&$Entities{$i}{$1};/g;
  0            
437             }
438             }
439             }
440             }
441              
442             1;
443              
444             __END__