File Coverage

blib/lib/TeX/Encode/charmap.pm
Criterion Covered Total %
statement 15 15 100.0
branch n/a
condition n/a
subroutine 5 5 100.0
pod n/a
total 20 20 100.0


line stmt bran cond sub pod time code
1             package TeX::Encode::charmap;
2             our $VERSION = '2.008'; # VERSION
3              
4             =head1 NAME
5              
6             TeX::Encode::charmap - Character mappings between TeX and Unicode
7              
8             =head1 DESCRIPTION
9              
10             Most of the mapping was built from Tralics, see http://www-sop.inria.fr/apics/tralics/
11              
12             A part was built from Clark Grubb's L.
13              
14             =begin comment
15              
16             latex-input is avilable under following terms:
17              
18             Copyright (C) 2014 Clark Grubb
19              
20              
21             Permission is hereby granted, free of charge, to any person obtaining
22             a copy of this software and associated documentation files (the
23             "Software"), to deal in the Software without restriction, including
24             without limitation the rights to use, copy, modify, merge, publish,
25             distribute, sublicense, and/or sell copies of the Software, and to
26             permit persons to whom the Software is furnished to do so, subject to
27             the following conditions:
28              
29             The above copyright notice and this permission notice shall be
30             included in all copies or substantial portions of the Software.
31              
32             =end comment
33              
34             =cut
35              
36 3     3   19 use vars qw( %RESERVED %BIBTEX_RESERVED %CHARS %ACCENTED_CHARS %LATEX_MACROS %GREEK %TEX_GREEK %MATH %MATH_CHARS %ASTRONOMY %GAMES %KEYS %IPA );
  3         5  
  3         15493  
37              
38             # reserved latex characters
39             %RESERVED = (
40             '#' => '\\#',
41             '$' => '\\$',
42             '%' => '\\%',
43             '&' => '\\&',
44             '_' => '\\_',
45             '{' => '\\{',
46             '}' => '\\}',
47             '\\' => '\\texttt{\\char92}',
48             '^' => '\^{ }', # '\\texttt{\\char94}',
49             '~' => '\\texttt{\\char126}',
50             );
51              
52             %BIBTEX_RESERVED = (
53             '#' => '\\#',
54             '$' => '\\$',
55             '%' => '\\%',
56             '&' => '\\&',
57             '_' => '\\_',
58             '{' => '\\{',
59             '}' => '\\}',
60             '\\' => '{$\\backslash$}',
61             '^' => '{\^{ }}',
62             '~' => '{\\texttt{\\char126}}',
63             );
64              
65             # single, non-ligature characters
66             %CHARS = (
67              
68             # ASCII characters
69             '<' => "\\ensuremath{<}",
70             '>' => "\\ensuremath{>}",
71             '|' => "\\ensuremath{|}",
72             '[' => '{[}', # opening argument(s)
73             ']' => '{]}', # closing argument(s)
74             chr(0x2014) => "--", # emdash
75              
76             # non-accented
77             chr(0x00a3) => '\\pounds', # £
78             chr(0x00a7) => '\\S', # §
79             chr(0x00a9) => '\\copyright',
80             chr(0x00b6) => '\\P', # ¶
81             chr(0x00c6) => '\\AE', # Æ
82             chr(0x00d0) => '\\DH', # Ð
83             chr(0x00d8) => '\\O', # Ø
84             chr(0x00de) => '\\TH', # Þ
85             chr(0x00df) => '\\ss', # ß
86             chr(0x00e6) => '\\ae', # æ
87             chr(0x00f0) => '\\dh', # ð
88             chr(0x00f8) => '\\o', # ø
89             chr(0x00fe) => '\\th', # þ
90             chr(0x0110) => '\\DJ', # Đ
91             chr(0x0111) => '\\dj', # đ
92             chr(0x0131) => '\\i', # ı
93             chr(0x0132) => '\\IJ', # IJ
94             chr(0x0133) => '\\ij', # ij
95             chr(0x0141) => '\\L', # Ł
96             chr(0x0142) => '\\l', # ł
97             chr(0x014a) => '\\NG', # Ŋ
98             chr(0x014b) => '\\ng', # ŋ
99             chr(0x0152) => '\\OE', # Œ
100             chr(0x0153) => '\\oe', # œ
101              
102             # superscript/subscript (maths)
103             chr(0x2070) => '$^0$',
104             chr(0x2071) => '$^i$',
105             chr(0x2074) => '$^4$',
106             chr(0x2075) => '$^5$',
107             chr(0x2076) => '$^6$',
108             chr(0x2077) => '$^7$',
109             chr(0x2078) => '$^8$',
110             chr(0x2079) => '$^9$',
111             chr(0x207A) => '$^+$',
112             chr(0x207B) => '$^-$',
113             chr(0x207C) => '$^=$',
114             chr(0x207D) => '$^($',
115             chr(0x207E) => '$^)$',
116             chr(0x207F) => '$^n$',
117             chr(0x2080) => '$_0$',
118             chr(0x2081) => '$_1$',
119             chr(0x2082) => '$_2$',
120             chr(0x2083) => '$_3$',
121             chr(0x2084) => '$_4$',
122             chr(0x2085) => '$_5$',
123             chr(0x2086) => '$_6$',
124             chr(0x2087) => '$_7$',
125             chr(0x2088) => '$_8$',
126             chr(0x2089) => '$_9$',
127             chr(0x208A) => '$_+$',
128             chr(0x208B) => '$_-$',
129             chr(0x208C) => '$_=$',
130             chr(0x208D) => '$_($',
131             chr(0x208E) => '$_)$',
132              
133             chr(0x1D43) => '^a', # ᵃ
134             chr(0x2090) => '_a', # ₐ
135             chr(0x1D47) => '^b', # ᵇ
136             chr(0x1D9C) => '^c', # ᶜ
137             chr(0x1D2C) => '^A', # ᴬ
138             chr(0x1D2E) => '^B', # ᴮ
139             chr(0x1D45) => '^\alpha', # ᵅ
140             chr(0x1D5D) => '^\beta', # ᵝ
141             chr(0x1D66) => '_\beta', # ᵦ
142             chr(0x1D5E) => '^\gamma', # ᵞ
143             chr(0x1D67) => '_\gamma', # ᵧ
144              
145             );
146              
147             # accented characters
148             %ACCENTED_CHARS = (
149              
150             ### Æ
151              
152             chr(0x01fc) => "\\\'{\\AE}", # Ǽ
153             chr(0x01e2) => "\\\={\\AE}", # Ǣ
154              
155             ### æ
156              
157             chr(0x01fd) => "\\\'{\\ae}", # ǽ
158             chr(0x01e3) => "\\\={\\ae}", # ǣ
159              
160             ### Å
161              
162             chr(0x01fa) => "\\\'{\\AA}", # Ǻ
163              
164             ### å
165              
166             chr(0x01fb) => "\\\'{\\aa}", # ǻ
167              
168             ### Ø
169              
170             chr(0x01fe) => "\\\'{\\O}", # Ǿ
171              
172             ### ø
173              
174             chr(0x01ff) => "\\\'{\\o}", # ǿ
175              
176             ### 
177              
178              
179             ### 
180              
181              
182             ### A
183              
184             chr(0x00c1) => "\\\'A", # Á
185             chr(0x00c0) => "\\\`A", # À
186             chr(0x00c2) => "\\\^A", # Â
187             chr(0x00c4) => "\\\"A", # Ä
188             chr(0x00c3) => "\\\~A", # Ã
189             chr(0x0104) => "\\kA", # Ą
190             chr(0x01cd) => "\\vA", # Ǎ
191             chr(0x0102) => "\\uA", # Ă
192             chr(0x0226) => "\\\.A", # Ȧ
193             chr(0x0100) => "\\\=A", # Ā
194             chr(0x00c5) => "\\AA", # Å
195             chr(0x1ea0) => "\\dA", # Ạ
196             chr(0x0200) => "\\CA", # Ȁ
197             chr(0x0202) => "\\fA", # Ȃ
198             chr(0x1e00) => "\\DA", # Ḁ
199             chr(0x1ea2) => "\\hA", # Ả
200              
201             ### B
202              
203             chr(0x1e02) => "\\\.B", # Ḃ
204             chr(0x1e06) => "\\bB", # Ḇ
205             chr(0x1e04) => "\\dB", # Ḅ
206              
207             ### C
208              
209             chr(0x0106) => "\\\'C", # Ć
210             chr(0x0108) => "\\\^C", # Ĉ
211             chr(0x010c) => "\\vC", # Č
212             chr(0x00c7) => "\\cC", # Ç
213             chr(0x010a) => "\\\.C", # Ċ
214              
215             ### D
216              
217             chr(0x010e) => "\\vD", # Ď
218             chr(0x1e10) => "\\cD", # Ḑ
219             chr(0x1e0a) => "\\\.D", # Ḋ
220             chr(0x1e0e) => "\\bD", # Ḏ
221             chr(0x1e0c) => "\\dD", # Ḍ
222             chr(0x1e12) => "\\VD", # Ḓ
223              
224             ### E
225              
226             chr(0x00c9) => "\\\'E", # É
227             chr(0x00c8) => "\\\`E", # È
228             chr(0x00ca) => "\\\^E", # Ê
229             chr(0x00cb) => "\\\"E", # Ë
230             chr(0x1ebc) => "\\\~E", # Ẽ
231             chr(0x0118) => "\\kE", # Ę
232             chr(0x011a) => "\\vE", # Ě
233             chr(0x0114) => "\\uE", # Ĕ
234             chr(0x0228) => "\\cE", # Ȩ
235             chr(0x0116) => "\\\.E", # Ė
236             chr(0x0112) => "\\\=E", # Ē
237             chr(0x1eb8) => "\\dE", # Ẹ
238             chr(0x0204) => "\\CE", # Ȅ
239             chr(0x0206) => "\\fE", # Ȇ
240             chr(0x1e1a) => "\\TE", # Ḛ
241             chr(0x1e18) => "\\VE", # Ḙ
242             chr(0x1eba) => "\\hE", # Ẻ
243              
244             ### F
245              
246             chr(0x1e1e) => "\\\.F", # Ḟ
247              
248             ### G
249              
250             chr(0x01f4) => "\\\'G", # Ǵ
251             chr(0x011c) => "\\\^G", # Ĝ
252             chr(0x01e6) => "\\vG", # Ǧ
253             chr(0x011e) => "\\uG", # Ğ
254             chr(0x0122) => "\\cG", # Ģ
255             chr(0x0120) => "\\\.G", # Ġ
256             chr(0x1e20) => "\\\=G", # Ḡ
257              
258             ### H
259              
260             chr(0x0124) => "\\\^H", # Ĥ
261             chr(0x1e26) => "\\\"H", # Ḧ
262             chr(0x021e) => "\\vH", # Ȟ
263             chr(0x1e28) => "\\cH", # Ḩ
264             chr(0x1e22) => "\\\.H", # Ḣ
265             chr(0x0126) => "\\\=H", # Ħ
266             chr(0x1e24) => "\\dH", # Ḥ
267              
268             ### I
269              
270             chr(0x00cd) => "\\\'I", # Í
271             chr(0x00cc) => "\\\`I", # Ì
272             chr(0x00ce) => "\\\^I", # Î
273             chr(0x00cf) => "\\\"I", # Ï
274             chr(0x0128) => "\\\~I", # Ĩ
275             chr(0x012e) => "\\kI", # Į
276             chr(0x01cf) => "\\vI", # Ǐ
277             chr(0x012c) => "\\uI", # Ĭ
278             chr(0x0130) => "\\\.I", # İ
279             chr(0x012a) => "\\\=I", # Ī
280             chr(0x1eca) => "\\dI", # Ị
281             chr(0x0208) => "\\CI", # Ȉ
282             chr(0x020a) => "\\fI", # Ȋ
283             chr(0x1e2c) => "\\TI", # Ḭ
284             chr(0x1ec8) => "\\hI", # Ỉ
285              
286             ### J
287              
288             chr(0x0134) => "\\\^J", # Ĵ
289              
290             ### K
291              
292             chr(0x1e30) => "\\\'K", # Ḱ
293             chr(0x01e8) => "\\vK", # Ǩ
294             chr(0x0136) => "\\cK", # Ķ
295             chr(0x1e34) => "\\bK", # Ḵ
296             chr(0x1e32) => "\\dK", # Ḳ
297              
298             ### L
299              
300             chr(0x0139) => "\\\'L", # Ĺ
301             chr(0x013d) => "\\vL", # Ľ
302             chr(0x013b) => "\\cL", # Ļ
303             chr(0x013f) => "\\\.L", # Ŀ
304             chr(0x1e3a) => "\\bL", # Ḻ
305             chr(0x1e36) => "\\dL", # Ḷ
306             chr(0x1e3c) => "\\VL", # Ḽ
307              
308             ### M
309              
310             chr(0x1e3e) => "\\\'M", # Ḿ
311             chr(0x1e40) => "\\\.M", # Ṁ
312             chr(0x1e42) => "\\dM", # Ṃ
313              
314             ### N
315              
316             chr(0x0143) => "\\\'N", # Ń
317             chr(0x01f8) => "\\\`N", # Ǹ
318             chr(0x00d1) => "\\\~N", # Ñ
319             chr(0x0147) => "\\vN", # Ň
320             chr(0x0145) => "\\cN", # Ņ
321             chr(0x1e44) => "\\\.N", # Ṅ
322             chr(0x1e48) => "\\bN", # Ṉ
323             chr(0x1e46) => "\\dN", # Ṇ
324             chr(0x1e4a) => "\\VN", # Ṋ
325              
326             ### O
327              
328             chr(0x00d3) => "\\\'O", # Ó
329             chr(0x00d2) => "\\\`O", # Ò
330             chr(0x00d4) => "\\\^O", # Ô
331             chr(0x00d6) => "\\\"O", # Ö
332             chr(0x00d5) => "\\\~O", # Õ
333             chr(0x01ea) => "\\kO", # Ǫ
334             chr(0x0150) => "\\HO", # Ő
335             chr(0x01d1) => "\\vO", # Ǒ
336             chr(0x014e) => "\\uO", # Ŏ
337             chr(0x022e) => "\\\.O", # Ȯ
338             chr(0x014c) => "\\\=O", # Ō
339             chr(0x1ecc) => "\\dO", # Ọ
340             chr(0x020c) => "\\CO", # Ȍ
341             chr(0x020e) => "\\fO", # Ȏ
342             chr(0x1ece) => "\\hO", # Ỏ
343              
344             ### P
345              
346             chr(0x1e54) => "\\\'P", # Ṕ
347             chr(0x1e56) => "\\\.P", # Ṗ
348              
349             ### Q
350              
351              
352             ### R
353              
354             chr(0x0154) => "\\\'R", # Ŕ
355             chr(0x0158) => "\\vR", # Ř
356             chr(0x0156) => "\\cR", # Ŗ
357             chr(0x1e58) => "\\\.R", # Ṙ
358             chr(0x1e5e) => "\\bR", # Ṟ
359             chr(0x1e5a) => "\\dR", # Ṛ
360             chr(0x0210) => "\\CR", # Ȑ
361             chr(0x0212) => "\\fR", # Ȓ
362              
363             ### S
364              
365             chr(0x015a) => "\\\'S", # Ś
366             chr(0x015c) => "\\\^S", # Ŝ
367             chr(0x0160) => "\\vS", # Š
368             chr(0x015e) => "\\cS", # Ş
369             chr(0x1e60) => "\\\.S", # Ṡ
370             chr(0x1e62) => "\\dS", # Ṣ
371              
372             ### T
373              
374             chr(0x0164) => "\\vT", # Ť
375             chr(0x0162) => "\\cT", # Ţ
376             chr(0x1e6a) => "\\\.T", # Ṫ
377             chr(0x0166) => "\\\=T", # Ŧ
378             chr(0x1e6e) => "\\bT", # Ṯ
379             chr(0x1e6c) => "\\dT", # Ṭ
380             chr(0x1e70) => "\\VT", # Ṱ
381              
382             ### U
383              
384             chr(0x00da) => "\\\'U", # Ú
385             chr(0x00d9) => "\\\`U", # Ù
386             chr(0x00db) => "\\\^U", # Û
387             chr(0x00dc) => "\\\"U", # Ü
388             chr(0x0168) => "\\\~U", # Ũ
389             chr(0x0172) => "\\kU", # Ų
390             chr(0x0170) => "\\HU", # Ű
391             chr(0x01d3) => "\\vU", # Ǔ
392             chr(0x016c) => "\\uU", # Ŭ
393             chr(0x016a) => "\\\=U", # Ū
394             chr(0x016e) => "\\rU", # Ů
395             chr(0x1ee4) => "\\dU", # Ụ
396             chr(0x0214) => "\\CU", # Ȕ
397             chr(0x0216) => "\\fU", # Ȗ
398             chr(0x1e74) => "\\TU", # Ṵ
399             chr(0x1e76) => "\\VU", # Ṷ
400             chr(0x1ee6) => "\\hU", # Ủ
401              
402             ### V
403              
404             chr(0x1e7c) => "\\\~V", # Ṽ
405             chr(0x1e7e) => "\\dV", # Ṿ
406              
407             ### W
408              
409             chr(0x1e82) => "\\\'W", # Ẃ
410             chr(0x1e80) => "\\\`W", # Ẁ
411             chr(0x0174) => "\\\^W", # Ŵ
412             chr(0x1e84) => "\\\"W", # Ẅ
413             chr(0x1e86) => "\\\.W", # Ẇ
414             chr(0x1e88) => "\\dW", # Ẉ
415              
416             ### X
417              
418             chr(0x1e8c) => "\\\"X", # Ẍ
419             chr(0x1e8a) => "\\\.X", # Ẋ
420              
421             ### Y
422              
423             chr(0x00dd) => "\\\'Y", # Ý
424             chr(0x1ef2) => "\\\`Y", # Ỳ
425             chr(0x0176) => "\\\^Y", # Ŷ
426             chr(0x0178) => "\\\"Y", # Ÿ
427             chr(0x1ef8) => "\\\~Y", # Ỹ
428             chr(0x1e8e) => "\\\.Y", # Ẏ
429             chr(0x0232) => "\\\=Y", # Ȳ
430             chr(0x1ef4) => "\\dY", # Ỵ
431             chr(0x1ef6) => "\\hY", # Ỷ
432              
433             ### Z
434              
435             chr(0x0179) => "\\\'Z", # Ź
436             chr(0x1e90) => "\\\^Z", # Ẑ
437             chr(0x017d) => "\\vZ", # Ž
438             chr(0x017b) => "\\\.Z", # Ż
439             chr(0x1e94) => "\\bZ", # Ẕ
440             chr(0x1e92) => "\\dZ", # Ẓ
441              
442             ### [
443              
444              
445             ### \
446              
447              
448             ### ]
449              
450              
451             ### ^
452              
453              
454             ### _
455              
456              
457             ### `
458              
459              
460             ### a
461              
462             chr(0x00e1) => "\\\'a", # á
463             chr(0x00e0) => "\\\`a", # à
464             chr(0x00e2) => "\\\^a", # â
465             chr(0x00e4) => "\\\"a", # ä
466             chr(0x00e3) => "\\\~a", # ã
467             chr(0x0105) => "\\ka", # ą
468             chr(0x01ce) => "\\va", # ǎ
469             chr(0x0103) => "\\ua", # ă
470             chr(0x0227) => "\\\.a", # ȧ
471             chr(0x0101) => "\\\=a", # ā
472             chr(0x00e5) => "\\aa", # å
473             chr(0x1ea1) => "\\da", # ạ
474             chr(0x0201) => "\\Ca", # ȁ
475             chr(0x0203) => "\\fa", # ȃ
476             chr(0x1e01) => "\\Da", # ḁ
477             chr(0x1ea3) => "\\ha", # ả
478              
479             ### b
480              
481             chr(0x1e03) => "\\\.b", # ḃ
482             chr(0x1e07) => "\\bb", # ḇ
483             chr(0x1e05) => "\\db", # ḅ
484              
485             ### c
486              
487             chr(0x0107) => "\\\'c", # ć
488             chr(0x0109) => "\\\^c", # ĉ
489             chr(0x010d) => "\\vc", # č
490             chr(0x00e7) => "\\cc", # ç
491             chr(0x010b) => "\\\.c", # ċ
492              
493             ### d
494              
495             chr(0x010f) => "\\vd", # ď
496             chr(0x1e11) => "\\cd", # ḑ
497             chr(0x1e0b) => "\\\.d", # ḋ
498             chr(0x1e0f) => "\\bd", # ḏ
499             chr(0x1e0d) => "\\dd", # ḍ
500             chr(0x1e13) => "\\Vd", # ḓ
501              
502             ### e
503              
504             chr(0x00e9) => "\\\'e", # é
505             chr(0x00e8) => "\\\`e", # è
506             chr(0x00ea) => "\\\^e", # ê
507             chr(0x00eb) => "\\\"e", # ë
508             chr(0x1ebd) => "\\\~e", # ẽ
509             chr(0x0119) => "\\ke", # ę
510             chr(0x011b) => "\\ve", # ě
511             chr(0x0115) => "\\ue", # ĕ
512             chr(0x0229) => "\\ce", # ȩ
513             chr(0x0117) => "\\\.e", # ė
514             chr(0x0113) => "\\\=e", # ē
515             chr(0x1eb9) => "\\de", # ẹ
516             chr(0x0205) => "\\Ce", # ȅ
517             chr(0x0207) => "\\fe", # ȇ
518             chr(0x1e1b) => "\\Te", # ḛ
519             chr(0x1e19) => "\\Ve", # ḙ
520             chr(0x1ebb) => "\\he", # ẻ
521              
522             ### f
523              
524             chr(0x1e1f) => "\\\.f", # ḟ
525              
526             ### g
527              
528             chr(0x01f5) => "\\\'g", # ǵ
529             chr(0x011d) => "\\\^g", # ĝ
530             chr(0x01e7) => "\\vg", # ǧ
531             chr(0x011f) => "\\ug", # ğ
532             chr(0x0123) => "\\cg", # ģ
533             chr(0x0121) => "\\\.g", # ġ
534             chr(0x1e21) => "\\\=g", # ḡ
535              
536             ### h
537              
538             chr(0x0125) => "\\\^h", # ĥ
539             chr(0x1e27) => "\\\"h", # ḧ
540             chr(0x021f) => "\\vh", # ȟ
541             chr(0x1e29) => "\\ch", # ḩ
542             chr(0x1e23) => "\\\.h", # ḣ
543             chr(0x0127) => "\\\=h", # ħ
544             chr(0x1e96) => "\\bh", # ẖ
545              
546             ### i
547              
548             chr(0x00ed) => "\\\'i", # í
549             chr(0x00ec) => "\\\`i", # ì
550             chr(0x00ee) => "\\\^i", # î
551             chr(0x00ef) => "\\\"i", # ï
552             chr(0x0129) => "\\\~i", # ĩ
553             chr(0x012f) => "\\ki", # į
554             chr(0x01d0) => "\\vi", # ǐ
555             chr(0x012d) => "\\ui", # ĭ
556             chr(0x012b) => "\\\=i", # ī
557             chr(0x1ecb) => "\\di", # ị
558             chr(0x0209) => "\\Ci", # ȉ
559             chr(0x020b) => "\\fi", # ȋ
560             chr(0x1e2d) => "\\Ti", # ḭ
561             chr(0x1ec9) => "\\hi", # ỉ
562              
563             ### j
564              
565             chr(0x0135) => "\\\^j", # ĵ
566             chr(0x01f0) => "\\vj", # ǰ
567              
568             ### k
569              
570             chr(0x1e31) => "\\\'k", # ḱ
571             chr(0x01e9) => "\\vk", # ǩ
572             chr(0x0137) => "\\ck", # ķ
573             chr(0x1e35) => "\\bk", # ḵ
574             chr(0x1e33) => "\\dk", # ḳ
575              
576             ### l
577              
578             chr(0x013a) => "\\\'l", # ĺ
579             chr(0x013e) => "\\vl", # ľ
580             chr(0x013c) => "\\cl", # ļ
581             chr(0x0140) => "\\\.l", # ŀ
582             chr(0x1e3b) => "\\bl", # ḻ
583             chr(0x1e37) => "\\dl", # ḷ
584             chr(0x1e3d) => "\\Vl", # ḽ
585              
586             ### m
587              
588             chr(0x1e3f) => "\\\'m", # ḿ
589             chr(0x1e41) => "\\\.m", # ṁ
590             chr(0x1e43) => "\\dm", # ṃ
591              
592             ### n
593              
594             chr(0x0144) => "\\\'n", # ń
595             chr(0x01f9) => "\\\`n", # ǹ
596             chr(0x00f1) => "\\\~n", # ñ
597             chr(0x0148) => "\\vn", # ň
598             chr(0x0146) => "\\cn", # ņ
599             chr(0x1e45) => "\\\.n", # ṅ
600             chr(0x1e49) => "\\bn", # ṉ
601             chr(0x1e47) => "\\dn", # ṇ
602             chr(0x1e4b) => "\\Vn", # ṋ
603              
604             ### o
605              
606             chr(0x00f3) => "\\\'o", # ó
607             chr(0x00f2) => "\\\`o", # ò
608             chr(0x00f4) => "\\\^o", # ô
609             chr(0x00f6) => "\\\"o", # ö
610             chr(0x00f5) => "\\\~o", # õ
611             chr(0x01eb) => "\\ko", # ǫ
612             chr(0x0151) => "\\Ho", # ő
613             chr(0x01d2) => "\\vo", # ǒ
614             chr(0x014f) => "\\uo", # ŏ
615             chr(0x022f) => "\\\.o", # ȯ
616             chr(0x014d) => "\\\=o", # ō
617             chr(0x1ecd) => "\\do", # ọ
618             chr(0x020d) => "\\Co", # ȍ
619             chr(0x020f) => "\\fo", # ȏ
620             chr(0x1ecf) => "\\ho", # ỏ
621              
622             ### p
623              
624             chr(0x1e55) => "\\\'p", # ṕ
625             chr(0x1e57) => "\\\.p", # ṗ
626              
627             ### q
628              
629              
630             ### r
631              
632             chr(0x0155) => "\\\'r", # ŕ
633             chr(0x0159) => "\\vr", # ř
634             chr(0x0157) => "\\cr", # ŗ
635             chr(0x1e59) => "\\\.r", # ṙ
636             chr(0x1e5f) => "\\br", # ṟ
637             chr(0x1e5b) => "\\dr", # ṛ
638             chr(0x0211) => "\\Cr", # ȑ
639             chr(0x0213) => "\\fr", # ȓ
640              
641             ### s
642              
643             chr(0x015b) => "\\\'s", # ś
644             chr(0x015d) => "\\\^s", # ŝ
645             chr(0x0161) => "\\vs", # š
646             chr(0x015f) => "\\cs", # ş
647             chr(0x1e61) => "\\\.s", # ṡ
648             chr(0x1e63) => "\\ds", # ṣ
649              
650             ### t
651              
652             chr(0x1e97) => "\\\"t", # ẗ
653             chr(0x0165) => "\\vt", # ť
654             chr(0x0163) => "\\ct", # ţ
655             chr(0x1e6b) => "\\\.t", # ṫ
656             chr(0x0167) => "\\\=t", # ŧ
657             chr(0x1e6f) => "\\bt", # ṯ
658             chr(0x1e6d) => "\\dt", # ṭ
659             chr(0x1e71) => "\\Vt", # ṱ
660              
661             ### u
662              
663             chr(0x00fa) => "\\\'u", # ú
664             chr(0x00f9) => "\\\`u", # ù
665             chr(0x00fb) => "\\\^u", # û
666             chr(0x00fc) => "\\\"u", # ü
667             chr(0x0169) => "\\\~u", # ũ
668             chr(0x0173) => "\\ku", # ų
669             chr(0x0171) => "\\Hu", # ű
670             chr(0x01d4) => "\\vu", # ǔ
671             chr(0x016d) => "\\uu", # ŭ
672             chr(0x016b) => "\\\=u", # ū
673             chr(0x016f) => "\\ru", # ů
674             chr(0x1ee5) => "\\du", # ụ
675             chr(0x0215) => "\\Cu", # ȕ
676             chr(0x0217) => "\\fu", # ȗ
677             chr(0x1e75) => "\\Tu", # ṵ
678             chr(0x1e77) => "\\Vu", # ṷ
679             chr(0x1ee7) => "\\hu", # ủ
680              
681             ### v
682              
683             chr(0x1e7d) => "\\\~v", # ṽ
684             chr(0x1e7f) => "\\dv", # ṿ
685              
686             ### w
687              
688             chr(0x1e83) => "\\\'w", # ẃ
689             chr(0x1e81) => "\\\`w", # ẁ
690             chr(0x0175) => "\\\^w", # ŵ
691             chr(0x1e85) => "\\\"w", # ẅ
692             chr(0x1e87) => "\\\.w", # ẇ
693             chr(0x1e98) => "\\rw", # ẘ
694             chr(0x1e89) => "\\dw", # ẉ
695              
696             ### x
697              
698             chr(0x1e8d) => "\\\"x", # ẍ
699             chr(0x1e8b) => "\\\.x", # ẋ
700              
701             ### y
702              
703             chr(0x00fd) => "\\\'y", # ý
704             chr(0x1ef3) => "\\\`y", # ỳ
705             chr(0x0177) => "\\\^y", # ŷ
706             chr(0x00ff) => "\\\"y", # ÿ
707             chr(0x1ef9) => "\\\~y", # ỹ
708             chr(0x1e8f) => "\\\.y", # ẏ
709             chr(0x0233) => "\\\=y", # ȳ
710             chr(0x1e99) => "\\ry", # ẙ
711             chr(0x1ef5) => "\\dy", # ỵ
712             chr(0x1ef7) => "\\hy", # ỷ
713              
714             ### z
715              
716             chr(0x017a) => "\\\'z", # ź
717             chr(0x1e91) => "\\\^z", # ẑ
718             chr(0x017e) => "\\vz", # ž
719             chr(0x017c) => "\\\.z", # ż
720             chr(0x1e95) => "\\bz", # ẕ
721             chr(0x1e93) => "\\dz", # ẓ
722              
723             );
724              
725             # latex character references
726             %LATEX_MACROS = (
727              
728             "\\\\" => "\n",
729              
730             "\\char92" => '\\',
731             "\\char94" => '^',
732             "\\char126" => '~',
733              
734             "--" => chr(0x2014), # --
735              
736             "\\acute{e}" => chr(0x00e9), # é
737             "\\textunderscore" => chr(0x005f), # _
738             "\\textbraceleft" => chr(0x007b), # {
739             "\\textbraceright" => chr(0x007d), # }
740             "\\textasciitilde" => chr(0x007e), # ~
741             "\\textexclamdown" => chr(0x00a1), # ¡
742             "\\textcent" => chr(0x00a2), # ¢
743             "\\textsterling" => chr(0x00a3), # £
744             "\\textcurrency" => chr(0x00a4), # ¤
745             "\\textyen" => chr(0x00a5), # ¥
746             "\\textbrokenbar" => chr(0x00a6), # ¦
747             "\\textsection" => chr(0x00a7), # §
748             "\\textasciidieresis" => chr(0x00a8), # ¨
749             "\\copyright" => chr(0x00a9), # ©
750             "\\textcopyright" => chr(0x00a9), # ©
751             "\\textordfeminine" => chr(0x00aa), # ª
752             "\\guillemotleft" => chr(0x00ab), # «
753             "\\textlnot" => chr(0x00ac), # ¬
754             "\\textsofthyphen" => chr(0x00ad), # ­
755             "\\textregistered" => chr(0x00ae), # ®
756             "\\textasciimacron" => chr(0x00af), # ¯
757             "\\textdegree" => chr(0x00b0), # °
758             "\\textpm" => chr(0x00b1), # ±
759             "\\texttwosuperior" => chr(0x00b2), # ²
760             "\\textthreesuperior" => chr(0x00b3), # ³
761             "\\apostrophe" => chr(0x00b4), # ´
762             "\\textasciiacute" => chr(0x00b4), # ´
763             "\\textmu" => chr(0x00b5), # µ
764             "\\textpilcrow" => chr(0x00b6), # ¶
765             "\\textparagraph" => chr(0x00b6), # ¶
766             "\\textperiodcentered" => chr(0x00b7), # ·
767             "\\textasciicedilla" => chr(0x00b8), # ¸
768             "\\textonesuperior" => chr(0x00b9), # ¹
769             "\\textordmasculine" => chr(0x00ba), # º
770             "\\guillemotright" => chr(0x00bb), # »
771             "\\textonequarter" => chr(0x00bc), # ¼
772             "\\textonehalf" => chr(0x00bd), # ½
773             "\\textthreequarters" => chr(0x00be), # ¾
774             "\\textquestiondown" => chr(0x00bf), # ¿
775             "\\texttimes" => chr(0x00d7), # ×
776             "\\textdiv" => chr(0x00f7), # ÷
777             "\\textflorin" => chr(0x0192), # ƒ
778             "\\textasciibreve" => chr(0x0306), # ̆
779             "\\textasciicaron" => chr(0x030c), # ̌
780             "\\textbaht" => chr(0x0e3f), # ฿
781             "\\textnospace" => chr(0x200b), # ​
782             "\\textendash" => chr(0x2013), # –
783             "\\textemdash" => chr(0x2014), # —
784             "\\textbardbl" => chr(0x2016), # ‖
785             "\\textquoteleft" => chr(0x2018), # ‘
786             "\\textquoteright" => chr(0x2019), # ’
787             "\\textquotedblleft" => chr(0x201c), # “
788             "\\textquotedblright" => chr(0x201d), # ”
789             "\\textdagger" => chr(0x2020), # †
790             "\\textdaggerdbl" => chr(0x2021), # ‡
791             "\\textbullet" => chr(0x2022), # •
792             "\\textellipsis" => chr(0x2026), # …
793             "\\textperthousand" => chr(0x2030), # ‰
794             "\\textpertenthousand" => chr(0x2031), # ‱
795             "\\textacutedbl" => chr(0x2033), # ″
796             "\\textasciigrave" => chr(0x2035), # ‵
797             "\\textgravedbl" => chr(0x2036), # ‶
798             "\\textreferencemark" => chr(0x203b), # ※
799             "\\textinterrobang" => chr(0x203d), # ‽
800             "\\textfractionsolidus" => chr(0x2044), # ⁄
801             "\\textlquill" => chr(0x2045), # ⁅
802             "\\textrquill" => chr(0x2046), # ⁆
803             "\\textasteriskcentered" => chr(0x204e), # ⁎
804             "\\textcolonmonetary" => chr(0x20a1), # ₡
805             "\\textfrenchfranc" => chr(0x20a3), # ₣
806             "\\textlira" => chr(0x20a4), # ₤
807             "\\textnaira" => chr(0x20a6), # ₦
808             "\\textwon" => chr(0x20a9), # ₩
809             "\\textdong" => chr(0x20ab), # ₫
810             "\\texteuro" => chr(0x20ac), # €
811             "\\textpeso" => chr(0x20b1), # ₱
812             "\\textcelsius" => chr(0x2103), # ℃
813             "\\textnumero" => chr(0x2116), # №
814             "\\textcircledP" => chr(0x2117), # ℗
815             "\\textrecipe" => chr(0x211e), # ℞
816             "\\textservicemark" => chr(0x2120), # ℠
817             "\\texttrademark" => chr(0x2122), # ™
818             "\\textohm" => chr(0x2126), # Ω
819             "\\textmho" => chr(0x2127), # ℧
820             "\\textestimated" => chr(0x212e), # ℮
821             "\\textleftarrow" => chr(0x2190), # ←
822             "\\textuparrow" => chr(0x2191), # ↑
823             "\\textrightarrow" => chr(0x2192), # →
824             "\\textdownarrow" => chr(0x2193), # ↓
825             "\\textsurd" => chr(0x221a), # √
826             "\\textasciicircum" => chr(0x2303), # ⌃
827             "\\textvisiblespace" => chr(0x2423), # ␣
828             "\\textopenbullet" => chr(0x25e6), # ◦
829             "\\textbigcircle" => chr(0x25ef), # ◯
830             "\\textmusicalnote" => chr(0x266a), # ♪
831             "\\textlangle" => chr(0x3008), # 〈
832             "\\textrangle" => chr(0x3009), # 〉
833              
834             );
835              
836             %GREEK = %TEX_GREEK = ();
837             {
838             my $i = 0;
839             for(qw( alpha beta gamma delta epsilon zeta eta theta iota kappa lambda mu nu xi omicron pi rho varsigma sigma tau upsilon phi chi psi omega )) {
840             # lowercase
841             $GREEK{$TEX_GREEK{"\\$_"} = chr(0x3b1+$i)} = "\\ensuremath{\\$_}";
842             # uppercase
843             $GREEK{$TEX_GREEK{"\\\u$_"} = chr(0x391+$i)} = "\\ensuremath{\\\u$_}";
844             $i++;
845             }
846             # lamda/lambda
847             $TEX_GREEK{"\\lamda"} = $LATEX_Escapes_inv{"\\lambda"};
848             $TEX_GREEK{"\\Lamda"} = $LATEX_Escapes_inv{"\\Lambda"};
849             # Remove Greek letters that aren't available in TeX
850             # http://www.artofproblemsolving.com/Wiki/index.php/LaTeX:Symbols
851             for(qw( omicron Alpha Beta Epsilon Zeta Eta Iota Kappa Mu Nu Omicron Rho Varsigma Tau Chi Omega ))
852             {
853             delete $GREEK{delete $TEX_GREEK{"\\$_"}};
854             }
855             }
856              
857             %MATH_CHARS = (
858             # Sets, http://www.unicode.org/charts/PDF/Unicode-4.1/U41-2100.pdf
859             'N' => chr(0x2115),
860             'R' => chr(0x211d),
861             'Z' => chr(0x2124),
862              
863             );
864              
865             %MATH = (
866             # 'sin' => 'sin', # sin (should be romanised), other trigonometric functions???
867             chr(0x2192) => '\\to', # -->
868             chr(0x2190) => '\\leftarrow', # <--
869             chr(0x2192) => '\\rightarrow', # -->
870             chr(0x2248) => '\\approx', # ≈ Approximately equal to
871             chr(0x2272) => '\\lesssim', # May not exist!
872             chr(0x2273) => '\\gtrsim', # May not exist!
873             chr(0x2243) => '\\simeq',
874             chr(0x2264) => '\\leq',
875             chr(0x00b1) => '\\pm', # ± Plus-minus
876             chr(0x00d7) => '\\times', # × Times
877             chr(0x2299) => '\\odot', # odot
878             chr(0x222b) => '\\int', # integral
879             chr(0x221a) => '\\sqrt', # square root
880             chr(0x223c) => '\\sim', # tilda/mathematical similar
881             chr(0x22c5) => '\\cdot', # dot
882              
883             # Relations and Operators
884             chr(0x2265) => '\ge', # ≥
885             chr(0x2213) => '\mp', # ∓
886             chr(0x2260) => '\neq', # ≠
887             chr(0x2249) => '\not\approx', # ≉
888             chr(0x2218) => '\circ', # ∘
889             chr(0x2245) => '\cong', # ≅
890             chr(0x2261) => '\equiv', # ≡
891             chr(0x2262) => '\not\equiv', # ≢
892             chr(0x226E) => '\not<', # ≮
893             chr(0x226F) => '\not>', # ≯
894             chr(0x2270) => '\not\le', # ≰
895             chr(0x2271) => '\not\ge', # ≱
896              
897             # Sets and Logic
898             chr(0x2205) => '\emptyset', # ∅
899             chr(0x2135) => '\aleph', # ℵ
900             chr(0x2208) => '\in', # ∈
901             chr(0x2136) => '\beth', # ℶ
902             chr(0x2209) => '\notin', # ∉
903             chr(0x220B) => '\ni', # ∋
904             chr(0x2227) => '\wedge', # ∧
905             chr(0x220C) => '\not\ni', # ∌
906             chr(0x2228) => '\vee', # ∨
907             chr(0x2282) => '\subset', # ⊂
908             chr(0x22BB) => '\veebar', # ⊻
909             chr(0x2286) => '\subseteq', # ⊆
910             chr(0x2200) => '\forall', # ∀
911             chr(0x2284) => '\not\subset', # ⊄
912             chr(0x2203) => '\exists', # ∃
913             chr(0x2288) => '\not\subseteq', # ⊈
914             chr(0x22A4) => '\top', # ⊤
915             chr(0x228A) => '\subsetneq', # ⊊
916             chr(0x22A5) => '\bot', # ⊥
917             chr(0x228B) => '\supsetneq', # ⊋
918             chr(0x2234) => '\therefore', # ∴
919             chr(0x2283) => '\supset', # ⊃
920             chr(0x22A2) => '\vdash', # ⊢
921             chr(0x2287) => '\supseteq', # ⊇
922             chr(0x22A8) => '\models', # ⊨
923             chr(0x222A) => '\cup', # ∪
924             chr(0x25A1) => '\Box', # □
925             chr(0x2229) => '\cap', # ∩
926             chr(0x22C3) => '\bigcup', # ⋃
927             chr(0x22C2) => '\bigcap', # ⋂
928             chr(0x2216) => '\setminus', # ∖
929              
930             # Geometry
931             chr(0x2220) => '\angle', # ∠
932             chr(0x25B3) => '\triangle', # △
933             chr(0x22A5) => '\perp', # ⊥
934             chr(0x2225) => '\parallel', # ∥
935             chr(0x2245) => '\cong', # ≅
936              
937             # Analysis
938             chr(0x221E) => '\infty', # ∞
939             chr(0x230A) => '\lfloor', # ⌊
940             chr(0x0394) => '\Delta', # Δ
941             chr(0x230B) => '\rfloor', # ⌋
942             chr(0x2207) => '\nabla', # ∇
943             chr(0x2308) => '\lceil', # ⌈
944             chr(0x2202) => '\partial', # ∂
945             chr(0x2309) => '\rceil', # ⌉
946             chr(0x2211) => '\sum', # ∑
947             #chr(0x2225) => '\|', # ∥
948             chr(0x220F) => '\prod', # ∏
949             chr(0x27E8) => '\langle', # ⟨
950             chr(0x27E9) => '\rangle', # ⟩
951             chr(0x222C) => '\iint', # ∬
952             #chr(0x2032) => q"'", # ′
953             chr(0x222D) => '\iiint', # ∭
954             chr(0x2A0C) => '\iiiint', # ⨌
955             #chr(0x2034) => q"'''", # ‴
956             chr(0x222E) => '\oint', # ∮
957             chr(0x211C) => '\Re', # ℜ
958             chr(0x2111) => '\Im', # ℑ
959             chr(0x2118) => '\wp', # ℘
960              
961             # Algebra
962             chr(0x2295) => '\oplus', # ⊕
963             chr(0x2A01) => '\bigoplus', # ⨁
964             chr(0x2297) => '\otimes', # ⊗
965             chr(0x2A02) => '\bigotimes', # ⨂
966             chr(0x25C3) => '\triangleleft', # ◃
967             chr(0x22B4) => '\unlhd', # ⊴
968             chr(0x22CA) => '\rtimes', # ⋊
969             chr(0x2240) => '\wr', # ≀
970              
971             # Arrows
972             chr(0x21D2) => '\Rightarrow', # ⇒
973             chr(0x21D0) => '\Leftarrow', # ⇐
974             chr(0x21D1) => '\Uparrow', # ⇑
975             chr(0x21D3) => '\Downarrow', # ⇓
976             chr(0x2196) => '\nwarrow', # ↖
977             chr(0x2197) => '\nearrow', # ↗
978             chr(0x2198) => '\searrow', # ↘
979             chr(0x2199) => '\swarrow', # ↙
980             chr(0x21A6) => '\mapsto', # ↦
981             chr(0x2194) => '\leftrightarrow', # ↔
982             chr(0x21D4) => '\Leftrightarrow', # ⇔
983             chr(0x21A3) => '\rightarrowtail', # ↣
984             chr(0x21A0) => '\twoheadrightarrow', # ↠
985             chr(0x21AA) => '\hookrightarrow', # ↪
986              
987             # Dots
988             chr(0x22EF) => '\cdots', # ⋯
989             chr(0x22F1) => '\ddots', # ⋱
990             chr(0x22EE) => '\vdots', # ⋮
991              
992             chr(0x1d538) => '\mathbb{A}', # 𝔸
993             chr(0x1d552) => '\mathbb{a}', # 𝕒
994             chr(0x1d539) => '\mathbb{B}', # 𝔹
995             chr(0x1d553) => '\mathbb{b}', # 𝕓
996             chr(0x2102) => '\mathbb{C}', # ℂ
997             chr(0x1d554) => '\mathbb{c}', # 𝕔
998             chr(0x1d7d8) => '\mathbb{0}', # 𝟘
999             chr(0x1d7d9) => '\mathbb{1}', # 𝟙
1000             chr(0x1d7da) => '\mathbb{2}', # 𝟚
1001              
1002             chr(0x1d504) => '\mathfrak{A}', # 𝔄
1003             chr(0x1d51e) => '\mathfrak{a}', # 𝔞
1004             chr(0x1d505) => '\mathfrak{B}', # 𝔅
1005             chr(0x1d51f) => '\mathfrak{b}', # 𝔟
1006             chr(0x212d) => '\mathfrak{C}', # ℭ
1007             chr(0x1d520) => '\mathfrak{c}', # 𝔠
1008              
1009             chr(0x1d49c) => '\mathcal{A}', # 𝒜
1010             chr(0x1d4b6) => '\mathcal{a}', # 𝒶
1011             chr(0x212c) => '\mathcal{B}', # ℬ
1012             chr(0x1d4b7) => '\mathcal{b}', # 𝒷
1013             chr(0x1d49e) => '\mathcal{C}', # 𝒞
1014             chr(0x1d4b8) => '\mathcal{c}', # 𝒸
1015              
1016             # var greek characters
1017             chr(0x03B5) => '\varepsilon', # ε
1018             chr(0x03F0) => '\varkappa', # ϰ
1019             chr(0x03C6) => '\varphi', # φ
1020             chr(0x03D6) => '\varpi', # ϖ
1021             chr(0x03F1) => '\varrho', # ϱ
1022             chr(0x03C2) => '\varsigma', # ς
1023             chr(0x03D1) => '\vartheta', # ϑ
1024             );
1025              
1026             %ASTRONOMY = (
1027             chr(0x263F) => '\mercury', # ☿
1028             chr(0x2648) => '\aries', # ♈
1029             chr(0x2640) => '\venus', # ♀
1030             chr(0x2649) => '\taurus', # ♉
1031             chr(0x2295) => '\earth', # ⊕
1032             chr(0x264A) => '\gemini', # ♊
1033             chr(0x2642) => '\mars', # ♂
1034             chr(0x264B) => '\cancer', # ♋
1035             chr(0x2643) => '\jupiter', # ♃
1036             chr(0x264C) => '\leo', # ♌
1037             chr(0x2644) => '\saturn', # ♄
1038             chr(0x264D) => '\virgo', # ♍
1039             chr(0x26E2) => '\uranus', # ⛢
1040             chr(0x264E) => '\libra', # ♎
1041             chr(0x2646) => '\neptune', # ♆
1042             chr(0x264F) => '\scorpio', # ♏
1043             chr(0x2647) => '\pluto', # ♇
1044             chr(0x2650) => '\sagittarius', # ♐
1045             chr(0x2609) => '\astrosun', # ☉
1046             chr(0x2651) => '\capricornus', # ♑
1047             chr(0x263D) => '\rightmoon', # ☽
1048             chr(0x2652) => '\aquarius', # ♒
1049             chr(0x263E) => '\leftmoon', # ☾
1050             chr(0x2653) => '\pisces', # ♓
1051             chr(0x260A) => '\ascnode', # ☊
1052             chr(0x260B) => '\descnode', # ☋
1053             chr(0x260C) => '\conjunction', # ☌
1054             chr(0x260D) => '\opposition', # ☍
1055              
1056             );
1057              
1058             %GAMES = (
1059             chr(0x265D) => '\blackbishop', # ♝
1060             chr(0x2680) => '\epsdice{1}', # ⚀
1061             chr(0x265A) => '\blackking', # ♚
1062             chr(0x2681) => '\epsdice{2}', # ⚁
1063             chr(0x265E) => '\blackknight', # ♞
1064             chr(0x2682) => '\epsdice{3}', # ⚂
1065             chr(0x265F) => '\blackpawn', # ♟
1066             chr(0x2683) => '\epsdice{4}', # ⚃
1067             chr(0x265B) => '\blackqueen', # ♛
1068             chr(0x2684) => '\epsdice{5}', # ⚄
1069             chr(0x265C) => '\blackrook', # ♜
1070             chr(0x2685) => '\epsdice{6}', # ⚅
1071             chr(0x2657) => '\whitebishop', # ♗
1072             chr(0x2663) => '\clubsuit', # ♣
1073             chr(0x2654) => '\whiteking', # ♔
1074             chr(0x2661) => '\heartsuit', # ♡
1075             chr(0x2658) => '\whiteknight', # ♘
1076             chr(0x2660) => '\spadesuit', # ♠
1077             chr(0x2659) => '\whitepawn', # ♙
1078             chr(0x2662) => '\diamondsuit', # ♢
1079             chr(0x2655) => '\whitequeen', # ♕
1080             chr(0x2656) => '\whiterook', # ♖
1081             );
1082              
1083             %KEYS = (
1084             chr(0x2318) => '\cmdkey', # ⌘
1085             chr(0x21E5) => '\tabkey', # ⇥
1086             chr(0x2325) => '\optkey', # ⌥
1087             chr(0x21E4) => '\revtabkey', # ⇤
1088             chr(0x21E7) => '\shiftkey', # ⇧
1089             chr(0x238B) => '\esckey', # ⎋
1090             chr(0x232B) => '\delkey', # ⌫
1091             chr(0x23CE) => '\returnkey', # ⏎
1092             chr(0x21EA) => '\capslockkey', # ⇪
1093             chr(0x2324) => '\enterkey', # ⌤
1094             chr(0x23CF) => '\ejectkey', # ⏏
1095             chr(0x2326) => '\rightdelkey', # ⌦
1096             );
1097              
1098             # International Phonetic Alphabet
1099             %IPA = (
1100             # Plosives
1101             chr(0x0062) => 'b', # b voiced bilabial plosive
1102             chr(0x0063) => 'c', # c voiceless palatal plosive (e.g. Hungarian ty)
1103             chr(0x0064) => 'd', # d voiced dental/alveolar plosive
1104             chr(0x0256) => '\textrtaild', # ɖ voiced retroflex plosive
1105             chr(0x0067) => 'g', # g voiced velar plosive
1106             chr(0x0262) => '\textscg', # ɢ voiced uvular plosive
1107             chr(0x006B) => 'k', # k voiceless velar plosive
1108             chr(0x0070) => 'p', # p voiceless bilabial plosive
1109             chr(0x0071) => 'q', # q voiceless uvular plosive
1110             chr(0x0074) => 't', # t voiceless dental/alveolar plosive
1111             chr(0x0288) => '\textrtailt', # ʈ voiceless retroflex plosive
1112             chr(0x0294) => '\textglotstop', # ʔ glottal plosive
1113             chr(0x02A1) => '\textbarglotstop', # ʡ epiglottal plosive
1114              
1115             # Nasals
1116             chr(0x006D) => 'm', # m voiced bilabial nasal
1117             chr(0x0271) => '\textltailm', # ɱ voiced labiodental nasal
1118             chr(0x006E) => 'n', # n voiced dental/alveolar nasal
1119             chr(0x0273) => '\textrtailn', # ɳ voiced retroflex nasal
1120             chr(0x0272) => '\textltailn', # ɲ voiced palatal nasal
1121             chr(0x0274) => '\textscn', # ɴ voiced uvular nasal
1122              
1123             # Fricatives & Approximants
1124             chr(0x03B2) => '\textbeta', # β voiced bilabial fricative
1125             chr(0x0255) => '\textctc', # ɕ voicelss alveolo-palatal median laminal fricative
1126             chr(0x0066) => 'f', # f voiceless labiodental fricative
1127             chr(0x0263) => '\textgamma', # ɣ voiced velar fricative
1128             chr(0x0068) => 'h', # h voiceless glottal fricative/approximant
1129             chr(0x0265) => '\textturnh', # ɥ voiced rounded palatal median approximant (i.e. rounded [j])
1130             chr(0x029C) => '\textsch', # ʜ voiceless epiglottal fricative
1131             chr(0x0266) => '\texthth', # ɦ voiced glottal fricative
1132             chr(0x0267) => '\texththeng', # ɧ combination of [x] and [ʃ] (e.g. Swedish tj, kj)
1133             chr(0x006A) => 'j', # j voiced palatal median approximant
1134             chr(0x029D) => '\textctj', # ʝ voiced palatal median fricative
1135             chr(0x006C) => 'l', # l voiced alveolar lateral approximant
1136             chr(0x026D) => '\textrtaill', # ɭ voiced retroflex lateral approximant
1137             chr(0x026C) => '\textbeltl', # ɬ voiceless alveolar lateral fricative
1138             chr(0x026B) => '\textltilde', # ɫ velarized voiced alveolar lateral approximant
1139             chr(0x026E) => '\textlyoghlig', # ɮ voiced alveolar lateral fricative
1140             chr(0x029F) => '\textscl', # ʟ voiced velar lateral approximant
1141             chr(0x0270) => '\textturnmrleg', # ɰ voiced velar median approximant
1142             chr(0x03B8) => '\texttheta', # θ voiceless interdental median fricative
1143             chr(0x0278) => '\textphi', # ɸ voiceless bilabial fricative
1144             chr(0x0072) => 'r', # r voiced apico-alveolar trill
1145             chr(0x0279) => '\textturnr', # ɹ voiced alveolar/postalveolar approximant
1146             chr(0x027A) => '\textturnlonglegr', # ɺ voiced alveolar lateral flap
1147             chr(0x027E) => '\textfishhookr', # ɾ voiced alveolar flap
1148             chr(0x027B) => '\textturnrrtail', # ɻ voiced retroflex approximant
1149             chr(0x0280) => '\textscr', # ʀ voiced uvular trill or flap
1150             chr(0x0281) => '\textinvscr', # ʁ voiced uvular fricative or approximant (e.g. French r)
1151             chr(0x027D) => '\textrtailr', # ɽ voiced retroflex flap
1152             chr(0x0073) => 's', # s voiceless alveolar median fricative
1153             chr(0x0282) => '\textrtails', # ʂ voiceless retroflex median fricative
1154             chr(0x0283) => '\textesh', # ʃ voiceless palato-alveolar median laminal fricative
1155             chr(0x0076) => 'v', # v voiced labiodental fricative
1156             chr(0x028B) => '\textscriptv', # ʋ voiced labiodental approximant
1157             chr(0x0077) => 'w', # w voiced rounded labial-velar approximant
1158             chr(0x028D) => '\textturnw', # ʍ voiceless rounded labial-velar approximant/fricative (i.e. voiceless [w])
1159             chr(0x0078) => 'x', # x voiceless velar median fricative
1160             chr(0x03C7) => '\textchi', # χ voicelss uvular median fricative
1161             chr(0x028E) => '\textturny', # ʎ voiced palatal lateral approximant (e.g. Italian gl)
1162             chr(0x007A) => 'z', # z voiced alveolar/dental median fricative
1163             chr(0x0290) => '\textrtailz', # ʐ voiced retroflex median fricative
1164             chr(0x0291) => '\textctz', # ʑ voiced alveolo-palatal median laminal fricative
1165             chr(0x0292) => '\textyogh', # ʒ voiced palato-alveolar median laminal fricative
1166             chr(0x0295) => '\textrevglotstop', # ʕ voiced pharyngeal fricative
1167             chr(0x02A2) => '\textbarrevglotstop', # ʢ voiced epiglottal fricative
1168              
1169             # Vowels
1170             chr(0x0061) => 'a', # a unrounded front low vowel (cardinal vowel no. 4)
1171             chr(0x0250) => '\textturna', # ɐ unrounded central low vowel
1172             chr(0x0251) => '\textscripta', # ɑ unrounded back low vowel (cardinal vowel no. 5)
1173             chr(0x0252) => '\textturnscripta', # ɒ rounded back low vowel (cardinal vowel no. 13)
1174             chr(0x0065) => 'e', # e unrounded front high-mid vowel (cardinal vowel no. 2)
1175             chr(0x0259) => '\textschwa', # ə unrounded central mid vowel
1176             chr(0x0258) => '\textreve', # ɘ unrounded central high-mid vowel
1177             chr(0x025A) => '\textrighthookschwa', # ɚ rhotacized [ə]
1178             chr(0x025B) => '\textepsilon', # ɛ unrounded front low-mid vowel (cardinal vowel no. 3)
1179             chr(0x025C) => '\textrevepsilon', # ɜ unrounded central low-mid vowel
1180             chr(0x025D) => '\textrhookrevepsilon', # ɝ rhotacized [ɜ]
1181             chr(0x025E) => '\textcloserevepsilon', # ɞ rounded central low-mid vowel
1182             chr(0x0264) => '\textbabygamma', # ɤ unrounded back high-mid vowel (cardinal vowel no. 15)
1183             chr(0x026F) => '\textturnm', # ɯ unrounded back high vowel (cardinal vowel no. 16)
1184             chr(0x0069) => 'i', # i unrounded front high vowel (cardinal vowel no. 1)
1185             chr(0x03B9) => '\textiota', # ι unrounded front semi-high vowel
1186             chr(0x026A) => '\textsci', # ɪ synonym for [ι]
1187             chr(0x0268) => '\textbari', # ɨ unrounded central high vowel (cardinal vowel no. 17)
1188             chr(0x006F) => 'o', # o rounded back high-mid vowel (cardinal vowel no. 7)
1189             chr(0x0275) => '\textbaro', # ɵ rounded central high-mid vowel
1190             chr(0x0276) => '\textscoelig', # ɶ rounded front low vowel (cardinal vowel no. 12)
1191             chr(0x0254) => '\textopeno', # ɔ rounded back low-md vowel (cardinal vowel no. 6)
1192             chr(0x0075) => 'u', # u rounded back high vowel (cardinal vowel no. 8)
1193             chr(0x0289) => '\textbaru', # ʉ rounded central high vowel (cardinal vowel no. 18)
1194             chr(0x028A) => '\textupsilon', # ʊ rounded back semi-high vowel
1195             chr(0x028C) => '\textturnv', # ʌ unrounded back low-mid vowel (cardinal vowel no. 14)
1196             chr(0x0079) => 'y', # y rounded front high vowel (cardinal vowel no. 9)
1197             chr(0x028F) => '\textscy', # ʏ rounded front semi-high vowel
1198              
1199             # Implosives & Clicks
1200             chr(0x0253) => '\texthtb', # ɓ voiced glottalic ingressive bilabial stop
1201             chr(0x0257) => '\texthtd', # ɗ voiced glottalic ingressive dental/postalveolar stop
1202             chr(0x0260) => '\texthtg', # ɠ voiced glottalic ingressive velar stop
1203             chr(0x029B) => '\texthtscg', # ʛ voiced glottalic ingressive uvular stop
1204             chr(0x0298) => '\textbullseye', # ʘ bilabial click
1205             chr(0x01C0) => '\textpipe', # ǀ dental click
1206             chr(0x01C1) => '\textdoublepipe', # ǁ lateral click
1207             chr(0x0021) => '!', # ! alveloar/postalveolar click
1208             );
1209              
1210             # derived mappings
1211 3     3   25 use vars qw( %CHAR_MAP $CHAR_MAP_RE );
  3         6  
  3         497  
1212              
1213             %CHAR_MAP = (%CHARS, %ACCENTED_CHARS, %GREEK);
1214             for(keys %MATH)
1215             {
1216             $CHAR_MAP{$_} ||= '$' . $MATH{$_} . '$';
1217             }
1218             for(keys %MATH_CHARS)
1219             {
1220             $CHAR_MAP{$MATH_CHARS{$_}} ||= '$' . $_ . '$';
1221             }
1222              
1223             $CHAR_MAP_RE = '[' . join('', map { quotemeta($_) } sort { length($b) <=> length($a) } keys %CHAR_MAP) . ']';
1224              
1225 3     3   18 use vars qw( $RESERVED_RE $BIBTEX_RESERVED_RE );
  3         6  
  3         531  
1226              
1227             $RESERVED_RE = '[' . join('', map { quotemeta($_) } sort { length($b) <=> length($a) } keys %RESERVED) . ']';
1228             $BIBTEX_RESERVED_RE = '[' . join('', map { quotemeta($_) } sort { length($b) <=> length($a) } keys %BIBTEX_RESERVED) . ']';
1229              
1230 3     3   21 use vars qw( %MACROS $MACROS_RE );
  3         5  
  3         460  
1231              
1232             %MACROS = (
1233             reverse(%RESERVED),
1234             reverse(%CHARS),
1235             reverse(%ACCENTED_CHARS),
1236             reverse(%MATH),
1237             reverse(%ASTRONOMY),
1238             reverse(%GAMES),
1239             reverse(%KEYS),
1240             reverse(%IPA),
1241             %TEX_GREEK,
1242             %LATEX_MACROS
1243             );
1244              
1245             $MACROS_RE = join('|', map { "(?:$_)" } map { quotemeta($_) } sort { length($b) <=> length($a) } keys %MACROS);
1246              
1247 3     3   19 use vars qw( $MATH_CHARS_RE );
  3         6  
  3         598  
1248              
1249             $MATH_CHARS_RE = '[' . join('', map { quotemeta($_) } sort { length($b) <=> length($a) } keys %MATH_CHARS) . ']';
1250              
1251             1;