File Coverage

blib/lib/UI/KeyboardLayout.pm
Criterion Covered Total %
statement 50 4182 1.2
branch 2 2900 0.0
condition 1 1943 0.0
subroutine 13 223 5.8
pod 0 186 0.0
total 66 9434 0.7


line stmt bran cond sub pod time code
1             package UI::KeyboardLayout;
2            
3             $VERSION = $VERSION = "0.72";
4            
5             binmode $DB::OUT, ':utf8' if $DB::OUT; # (older) Perls had "Wide char in Print" in debugger otherwise
6             binmode $DB::LINEINFO, ':utf8' if $DB::LINEINFO; # (older) Perls had "Wide char in Print" in debugger otherwise
7            
8 1     1   54896 use strict;
  1         1  
  1         28  
9 1     1   379 use utf8;
  1         12  
  1         5  
10 1   50 1   147 BEGIN { my $n = ($ENV{UI_KEYBOARDLAYOUT_DEBUG} || 0);
11 1 50       7 if ($n =~ /^0x/i) {
12 0         0 $n = hex $n;
13             } else {
14 1         1 $n += 0;
15             }
16 1         43 eval "sub debug() { $n }";
17             # 1 2 4 8 0x10 0x20
18 1         5 my @dbg = (qw( debug_face_layout_recipes debug_GUESS_MASSAGE debug_OPERATOR debug_import debug_stacking debug_noid ),
19             # 0x40 0x80 0x100 0x200 0x400 0x800 0x1000
20             qw(warnSORTEDLISTS printSORTEDLISTS warnSORTCOMPOSE warnDO_COMPOSE warnCACHECOMP dontCOMPOSE_CACHE warnUNRES),
21             # 0x2000 0x4000
22             qw(debug_STACKING printSkippedComposeKey),
23             '_debug_PERL_dollar1_scoping');
24 1         2 my $c = 0; # printSORTEDLISTS: Dumpvalue to STDOUT (implementation detail!)
25 1         8 my @dbg_b = map $n & (1<<$_), 0..31;
26 1         3 for (@dbg) {
27 16         709 eval "sub $_ () {$dbg_b[$c++]}";
28             }
29             }
30             sub debug_PERL_dollar1_scoping () { debug & 0x1000000 }
31            
32             my $ctrl_after = 1; # In "pairs of nonShift/Shift-columns" (1 simplifies output of BACK/ESCAPE/RETURN/CANCEL)
33             my $create_alpha_ctrl = 2;
34             my %start_SEC = (FKEYS => [96, 24, sub { my($self,$u,$v)=@_; 'F' . (1+$u-$v->[0]) }],
35             ARROWS => [128, 16,
36             sub { my($self,$u,$v)=@_;
37             (qw(HOME UP PRIOR DIVIDE LEFT CLEAR RIGHT MULTIPLY END DOWN NEXT SUBTRACT INSERT DELETE RETURN ADD))[$u-$v->[0]]}],
38             NUMPAD => [144, 16,
39             sub { my($self,$u,$v)=@_;
40             ((map { ($_ > 10 ? 'F' : "NUMPAD") . $_} 7..9,14,4..6,15,1..3,16,0), 'DECIMAL')[$u-$v->[0]]}]);
41             my $maxEntityLen = 111; # Avoid overflow of prefix char above 0fff in kbdutool (but now can channel them to smaller values)
42             my $avoid_overlong_synonims_Entity = 20; # These two are currently disabled
43            
44 133     133 0 612 sub toU($) { substr+(qq(\x{fff}).shift),1 } # Some bullshit one must do to make perl's Unicode 8-bit-aware (!)
45            
46             #use subs qw(chr lc);
47 1     1   304 use subs qw(chr lc uc ucfirst);
  1         20  
  1         5  
48            
49             #BEGIN { *CORE::GLOGAL::chr = sub ($) { toU CORE::chr shift };
50             # *CORE::GLOGAL::lc = sub ($) { CORE::lc toU shift };
51             #}
52             ### Remove ß ẞ :
53             ## my %fix = qw( ԥ Ԥ ԧ Ԧ ӏ Ӏ ɀ Ɀ ꙡ Ꙡ ꞑ Ꞑ ꞧ Ꞧ ɋ Ɋ ꞩ Ꞩ ȿ Ȿ ꞓ Ꞓ ꞥ Ꞥ ); # Perl 5.8.8 uc is wrong with palochka, 5.10 with z with swash tail
54             my %fix = qw( ԥ Ԥ ԧ Ԧ ӏ Ӏ ɀ Ɀ ꙡ Ꙡ ꞑ Ꞑ ꞧ Ꞧ ɋ Ɋ ß ẞ ꞩ Ꞩ ȿ Ȿ ꞓ Ꞓ ꞥ Ꞥ ℊ Ɡ ϳ Ϳ ); # Perl 5.8.8 uc is wrong with palochka, 5.10 with z with swash tail
55             my %unfix = reverse %fix;
56            
57 88     88   178 sub chr($) { local $^W = 0; toU CORE::chr shift } # Avoid illegal character 0xfffe etc warnings...
  88         152  
58 0 0   0   0 sub lc($) { my $in = shift; $unfix{$in} || CORE::lc toU $in }
  0         0  
59 45 50   45   60 sub uc($) { my $in = shift; $fix{$in} || CORE::uc toU $in }
  45         88  
60 0 0   0   0 sub ucfirst($) { my $in = shift; $fix{$in} || CORE::ucfirst toU $in }
  0         0  
61            
62             # We use this for printing, not for reading (so we can use //o AFTER the UCD is read)
63 1     1   194 my $rxCombining = qr/\p{NonspacingMark}/; # The initial version matches what Perl knows
  1         1  
  1         10  
64             my $rxZW = qr/\p{Line_Break: ZW}|[\xAD\x{200b}-\x{200f}\x{2060}-\x{2064}\x{fe00}-\x{fe0f}]/;
65            
66 0     0 0 0 sub rxCombining { $rxCombining }
67            
68             =pod
69            
70             =encoding UTF-8
71            
72             =head1 NAME
73            
74             UI::KeyboardLayout - Module for designing keyboard layouts
75            
76             =head1 SYNOPSIS
77            
78             #!/usr/bin/perl -wC31
79             use UI::KeyboardLayout;
80             use strict;
81            
82             # Download from http://www.unicode.org/Public/UNIDATA/
83             UI::KeyboardLayout::->set_NamesList("$ENV{HOME}/Downloads/NamesList.txt");
84            
85             UI::KeyboardLayout::->set__value('ComposeFiles', # CygWin too
86             ['/usr/share/X11/locale/en_US.UTF-8/Compose']);
87             # http://cgit.freedesktop.org/xorg/proto/xproto/plain/keysymdef.h
88             UI::KeyboardLayout::->set__value('KeySyms',
89             ['/usr/share/X11/include/keysymdef.h']);
90             UI::KeyboardLayout::->set__value('EntityFiles',
91             ["$ENV{HOME}/Downloads/bycodes.html"]);
92             UI::KeyboardLayout::->set__value('rfc1345Files',
93             ["$ENV{HOME}/Downloads/rfc1345.html"]);
94            
95             my $i = do {local $/; open $in, '<', 'MultiUni.kbdd' or die; <$in>};
96             # Init from in-memory copy of the configfile
97             # Combines new()->parse_add_configfile()->massage_full():
98             my $k = UI::KeyboardLayout:: -> new_from_configfile_string($i)
99             -> fill_win_template( 1, [qw(faces CyrillicPhonetic)] );
100             print $k;
101            
102             open my $f, '<', "$ENV{HOME}/Downloads/NamesList.txt" or die;
103             my $k = UI::KeyboardLayout::->new();
104             my ($d,$c,$names,$blocks,$extraComb,$uniVersion) = $k->parse_NameList($f);
105             close $f or die;
106             $k->print_decompositions($d);
107             $k->print_compositions ($c);
108            
109             UI::KeyboardLayout::->set_NamesList("$ENV{HOME}/Downloads/NamesList.txt",
110             "$ENV{HOME}/Downloads/DerivedAge.txt");
111             my $l = UI::KeyboardLayout::->new();
112             $l->print_compositions;
113             $l->print_decompositions;
114            
115             UI::KeyboardLayout::->set_NamesList("$ENV{HOME}/Downloads/NamesList-6.1.0d8.txt",
116             "$ENV{HOME}/Downloads/DerivedAge-6.1.0d13.txt"));
117             # Combines new()->parse_add_configfile()->massage_full():
118             my $l = UI::KeyboardLayout::->new_from_configfile('examples/EurKey++.kbdd');
119            
120             for my $F (qw(US CyrillicPhonetic)) {
121             # Open file, select()
122             print $l->fill_win_template(1,[qw(faces US)]);
123             $l->print_coverage(q(US));
124             print $l->fill_osx_template([qw[faces US)]);
125             }
126            
127             perl -wC31 UI-KeyboardLayout\examples\grep_nameslist.pl "\b(ALPHA|BETA|GAMMA|DELTA|EPSILON|ZETA|ETA|THETA|IOTA|KAPPA|LAMDA|MU|NU|XI|OMICRON|PI|RHO|SIGMA|TAU|UPSILON|PHI|CHI|PSI|OMEGA)\b" ~/Downloads/NamesList.txt >out-greek
128            
129             =head1 AUTHORS
130            
131             Ilya Zakharevich, ilyaz@cpan.org
132            
133             =head1 DESCRIPTION
134            
135             In this section, a "keyboard" has a certain "character repertoir" (which characters may be
136             entered using this keyboard), and a mapping associating a character in the repertoir
137             to a keypress or to several (sequential or simultaneous) keypresses. A small enough keyboard
138             may have a pretty arbitrary mapping and remain useful (witness QUERTY
139             vs Dvorak vs Colemac). However, if a keyboard has a sufficiently large repertoir,
140             there must be a strong logic ("orthogonality") in this association - otherwise
141             the most part of the repertoir will not be useful (except for people who have an
142             extraordinary memory - and are ready to invest part of it into the keyboard).
143            
144             "Character repertoir" needs of different people vary enormously; observing
145             the people around me, I get a very narrow point of view. But it is the best
146             I can do; what I observe is that many of them would use 1000-2000 characters
147             if they had a simple way to enter them; and the needs of different people do
148             not match a lot. So to be helpful to different people, a keyboard should have
149             at least 2000-3000 different characters in the repertoir. (Some ballpark
150             comparisons: L
151             has about 2800 characters; L corresponds
152             to about 3600 Unicode characters.)
153            
154             To access these characters, how much structure one needs to carry in memory? One can
155             make a (trivial) estimate from below: on Windows, the standard US keyboard allows
156             entering 100 - or 104 - characters (94 ASCII keys, SPACE, ENTER, TAB - moreover, C-ENTER,
157             BACKSPACE and C-BACKSPACE also produce characters; so do C-[, C-] and C-\
158             C-Break in most layouts!). If one needs about 30 times more, one could do
159             with 5 different ways to "mogrify" a character; if these mogrifications
160             are "orthogonal", then there are 2^5 = 32 ways of combining them, and
161             one could access 32*104 = 3328 characters.
162            
163             Of course, the characters in a "reasonable repertoir" form a very amorphous
164             mass; there is no way to introduce a structure like that which is "natural"
165             (so there is a hope for "ordinary people" to keep it in memory). So the
166             complexity of these mogrification is not in their number, but in their
167             "nature". One may try to decrease this complexity by having very easy to
168             understand mogrifications - but then there is no hope in having 5 of them
169             - or 10, or 15, or 20.
170            
171             However, we B that many people I able to memorise the layout of
172             70 symbols on a keyboard. So would they be able to handle, for example, 30
173             different "natural" mogrifications? And how large a repertoir of characters
174             one would be able to access using these mogrifications?
175            
176             This module does not answer these questions directly, but it provides tools
177             for investigating them, and tools to construct the actually working keyboard
178             layouts based on these ideas. It consists of the following principal
179             components:
180            
181             =over 4
182            
183             =item Unicode table examiner
184            
185             distills relations between different Unicode characters from the Unicode tables,
186             and combines the results with user-specified "manual mogrification" rules.
187             From these automatic/manual mogrifications, it constructs orthogonal scaffolding
188             supporting Unicode characters (we call it I, but it
189             is a major generalization of the corresponding Unicode consortium's terms).
190            
191             =item Layout constructor
192            
193             allows building keyboard layouts based on the above mogrification rules, and
194             on other visual and/or logical directives. It combines the bulk-handling
195             ability of automatic rule-based approach with a flexibility provided by
196             a system of manual overrides. (The rules are read from a F<.kbdd> L
197             Description> file|/"Keyboard description files">.
198            
199             =item System-specific software layouts
200            
201             may be created basing on the "theoretical layout" made by the layout
202             constructor — currently only on Windows (only via F route) and OS X.
203            
204             =item Report/Debugging framework
205            
206             creates human-readable descriptions of the layout, and/or debugging reports on
207             how the layout creation logic proceeded.
208            
209             =back
210            
211             The last (and, probably, the most important) component of the distribution is
212             L created using this toolset.
213            
214             =head1 Keyboard description files
215            
216             =head2 Syntax
217            
218             I could not find an appropriate existing configuration file format, so was
219             farced to invent yet-another-config-file-format. Sorry...
220            
221             Config file is for initialization of a tree implementing a hash of hashes of
222             hashes etc whole leaves are either strings or arrays of strings, and keys are
223             words. The file consists of I<"sections">; each section fills a certain hash
224             in the tree.
225            
226             Sections are separated by "section names" which are sequences of word
227             character and C (possibly empty) enclosed in square brackets.
228             C<[]> is a root hash, then C<[word]> is a hash reference by key C in the
229             root hash, then C<[word/another]> is a hash referenced by element of the hash
230             referenced by C<[word]> etc. Additionally, a section separator may look like
231             C<< [visual -> wordsAndSlashes] >>.
232            
233             Sections are of two type: normal and visual. A normal section
234             consists of comments (starting with C<#>) and assignments. An assignment is
235             in one of 4 forms:
236            
237             word=value
238             +word=value
239             @word=value,value,value,value
240             /word=value/value/value/value
241            
242             The first assigns a string C to the key C in the hash of the
243             current section. The second adds a value to an array referenced by the key
244             C; the other two add several values. Trailing whitespace is stripped.
245            
246             Any string value without end-of-line characters and trailing whitespace
247             can be added this way (and values without commas or without slash can
248             be added in bulk to arrays). In particular, there may be no whitespace before
249             C<=> sign, and the whitespace after C<=> is a part of the value.
250            
251             Visual sections consist of comments, assignments, and C, which
252             is I of the section. Comments
253             after the last assignment become parts of the content. The content is
254             preserved as a whole, and assigned to the key C; trailing
255             whitespace is stripped. (This is the way to insert a value containing
256             end-of-line-characters.)
257            
258             In the context of this distribution, the intent of visual sections is to be
259             parsed by a postprocessor. So the only purpose of explicit assignments in a
260             visual section is to configure how I is parsed; after the parsing
261             is done (and the result is copied elsewhere in the tree) these values should
262             better be not used.
263            
264             =head2 Semantic of visual sections
265            
266             Two types of visual sections are supported: C and C. A content of
267             C section is just an embedded (part of) F<.klc> file. We can read deadkey
268             mappings and deadkey names from such sections. The name of the section becomes the
269             name of the mapping functions which may be used inside the C rule
270             (or in a recipe for a computed layer).
271            
272             A content of C section consists of C<#>-comment lines and "the mapping
273             lines"; every "mapping line" encodes one row in a keyboard (in one or several
274             layouts). (But the make up of rows of this keyboard may be purely imaginary;
275             it is normal to have a "keyboard" with one row of numbers 0...9.)
276             Configuration settings specify how many lines are per row, and how many layers
277             are encoded by every line, and what are the names of these layers:
278            
279             visual_rowcount # how many config lines per row of keyboard
280             visual_per_row_counts # Array of length visual_rowcount
281             visual_prefixes # Array of chars; <= visual_rowcount (miss=SPACE)
282             prefix_repeat # How many times prefix char is repeated (n/a to SPACE)
283             in_key_separator # If several layers per row, splits a key-descr
284             layer_names # Where to put the resulting keys array
285             in_key_separator2 # If one of entries is longer than 1 char, join by this
286             # (optional)
287            
288             Each line consists of a prefix (which is ignored except for sanity checking), and
289             whitespace-separated list of key descriptions. (Whitespace followed by a
290             combining character is not separating.) Each key description is split using
291             C into slots, one slot per layout. (The leading
292             C is not separating.) Each key/layout
293             description consists of one or two entries. An entry is either two dashes
294             C<--> (standing for empty), or a hex number of length >=4, or a string.
295             (A hex numbers must be separated by C<.> from neighbor word
296             characters.) A loner character which has a different uppercase is
297             auto-replicated in uppercase (more precisely, titlecase) form. Missing or empty key/layout description
298             gives two empty entries (note that the leading key/layout description cannot
299             be empty; same for "the whole key description" - use the leading C<-->.
300            
301             If one of the entries in a slot is a string of length ≥ 2, one must separate
302             the entries by C. Likewise, if a slot has only one entry,
303             and it is longer than 1 char, it must be started or terminated by C.
304            
305             To simplify BiDi keyboards, a line may optionally be prefixed with the L|http://en.wikipedia.org/wiki/Unicode_character_property#Bidirectional_writing>
306             character; if so, it may optionally be ended by spaces and the L|http://en.wikipedia.org/wiki/Unicode_character_property#Bidirectional_writing> character.
307             For compatibility with other components, layer names should not contain characters C<+()[]>.
308            
309             =head2 Inclusion of F<.klc> files
310            
311             Instead of including a F<.klc> file (or its part) verbatim in a visual
312             section, one can make a section C with
313             a key C. Filename will be included and parsed as a C
314             visual section (with name C???). (Currently only UTF-16
315             files are supported.)
316            
317             =head2 Metadata
318            
319             A metadata entry is either a string, or an array. A string behaves as
320             if were an array with the string repeated sufficiently many times. Each
321             personality defines C which chooses the element of the arrays.
322             The entries
323            
324             COMPANYNAME LAYOUTNAME COPYR_YEARS LOCALE_NAME LOCALE_ID
325             DLLNAME SORT_ORDER_ID_ LANGUAGE_NAME
326            
327             should be defined in the personality section, or above this section in the
328             configuration tree. (Used when output Windows F<.klc> files and OS X
329             F<.keylayout> files.)
330            
331             OSX_ADD_VERSION OSX_LAYOUTNAME
332            
333             The first one is the ordinal of the word after which to insert the version
334             into C (OS X allows layout names longer than the limit of 64 UTF-16
335             codepoints of Windows); the second one allows a completely different name.
336            
337             Optional metadata currently consists only of C key (the protocol
338             version; hardwired now as C<1.0>) and keys C defining
339             what goes into the C section of F<.klc> file (the latter may also
340             be specified in a face's section, or its parents).
341            
342             =head2 Layer/Face/Prefix-key Recipes
343            
344             The sections C and C contain instructions how
345             to build Layers and Faces out of simpler elements. Similar recipes appear
346             as values of C entries in a face. Such a "recipe" is
347             executed with I: a base face name, a layer number, and a prefix
348             character (the latter is undefined when the recipe is a layer recipe or
349             face recipe). (The recipe is free to ignore the parameters; for example, most
350             recipes ignore the prefix character even when they are "prefix key" recipes.)
351            
352             The recipes and the visual sections are the most important components of the description
353             of a keyboard group.
354            
355             To construct layers of a face, a face recipe is executed several times with different
356             "layer number" parameter. In contrast, in simplest cases a layer recipe is executed
357             once. However, when the layer is a part of a compound ("parent") recipe, it inherits
358             the "parameters" from the parent. In particular, it may be executed several times with
359             different face name (if used in different faces), or with different layer number (if used
360             - explicitly or explicitly - in different layer slots; for example, C
361             in a face/prefix-key recipe will execute the C recipe separately for all the
362             layer numbers; or one can use C together with
363             C). Depending on the recipe, these calls may result in the same layout
364             of the resulting layers, or in different layouts.
365            
366             A recipe may be of three kinds: it is either a "first comer wins" which is a space-separated collection of
367             simpler recipes, or C, or a "mutator": C or just C.
368             All recipes must be C<()>-balanced
369             and C<[]>-balanced; so must be the C; in turn, the C is either a
370             layer name, or another recipe. A layer name must be defined either in a visual C section,
371             or be a key in the C section (so it should not have C<+()[]> characters),
372             or be the literal C.
373             When C is processed, first, the resulting layer(s) of the C recipe
374             are calculated; then the layer(s) are processed by the C (one key at a time).
375            
376             The most important C keywords are C (with argument a face name, defined either
377             via a C section, or via C) and C (with argument
378             of the form C, with layer names defined as above). Both
379             select the layer (out of a face, or out of a list) with number equal to the "layer number parameter" in the context
380             of the recipe. The C builder is similar to C, but chooses the "other"
381             layer ("cyclically the next" layer if more than 2 are present).
382            
383             The other selectors are C, C and C; they
384             operate on the base face or face associated to the base face.
385            
386             The simplest forms of C are C (note that
387             C/C/C return C when case-conversion results in no
388             change; use C/C/C if one wants them to behave
389             as Perl operators). Recall that a layer
390             is nothing more than a structure associating a pair "unshifted/shifted character" to the key number, and that
391             these characters may be undefined. These simplest mutators modify these characters
392             independently of their key numbers and shift state (with C making all of
393             them undefined). Similar user-defined simple mutators are C;
394             here C consists of pairs "FROM TO" of characters (with optional spaces between pairs);
395             characters not appearing as FROM become undefined by C.
396             (As usual, characters may be replaced by hex numbers with 4 or more hex digits;
397             separate the number from a neighboring word character by C<.> [dot].)
398            
399             All mutators must have a form C or C, with C
400             C<(),[]>-balanced. Other simple mutators are C (converts
401             control-char [those between 0x00 and 0x1f] to the corresponding [uppercase] character),
402             C (adds a constant to the [numerical code of the] input character
403             so that C becomes C), C (keeps input characters
404             which match, converts everything else to C), C
405             (similar to C, but pairs all characters in the layers based on their position),
406             C (all defined characters are converted to C).
407            
408             The mutator C is similar to , but takes the F<.klc>-style
409             visual C section as the description of the mutation. C may
410             be followed by a character as in C; if not, C is the prefix key from
411             the recipe's execution parameters.
412            
413             The simple mutator C has flavors: one can append C or C
414             to the name, and the resulting characters become prefix keys (the “C-inverted”
415             prefix followed by C behaves as non-inverted prefix followed by C).
416            
417             Some mutators pay attention not only to what the character is, but how it is
418             accessible on the given key: such are C, C,
419             C. Some other mutators also take into
420             account how the key is positioned with respect to the other keys.
421            
422             C assigns a character
423             to a particular column of the keyboard. Which keys are in which columns is
424             governed by how the corresponding
425             visual layer is formatted (shifted to the right by C array of the
426             visual layer). This visual layer is one associated to the face by the
427             C key (and the face is the parameter face of the
428             mutator). C is a comma-separated list;
429             empty positions map to the undefined character.
430            
431             C chooses a mutator based on the row of the keyboard. On the top row,
432             it is the first mutator which is chosen, etc. The list C is separated by C
433             surrounded by whitespace.
434            
435             The mutator C converts some non-prefix characters to prefix
436             characters; the conversion happens if the argument of the mutator coincides with
437             what is at the corresponding position in C, and this position contains
438             a prefix character. (Nowadays this mutator is not very handy — most of its uses
439             may be accomplished by having I prefix characters in appropriate faces.)
440            
441             The mutators C, C process their
442             argument in a special way: the characters in C which duplicated the characters
443             present (on the same key, and possibly with the same modifiers) in C are
444             ignored. The remaining characters are combined “as usual” with “the first comer wins”.
445            
446             The most important mutator is C (and its flavors). (See L mutator>.)
447            
448             Note that C is similar to a selector;
449             it is the only way to insert a
450             layer without a selector, since a bareword is interpreted as a C; C is a synonym
451             of C (repeated as many times as there are layers
452             in the parameter "base face").
453            
454            
455             The recipes in a space-separated list of recipes ("first comer wins") are
456             interpreted independently to give a collection of layers to combine; then,
457             for every key numbers and both shift states, one takes the leftmost recipe
458             which produces a defined character for this position, and the result is put
459             into the resulting layer.
460            
461             Keep in mind that to understand what a recipe does, one should trace
462             its description right-to-left order: for example, C creates
463             a layout where C<:> is at position of C<.>, but on the second [=other] layer (essentially,
464             if the base layout is the standard one, it binds the character C<:> to the keypress C).
465            
466             To simplify formatting of F<.kbdd> files, a recipe may be an array reference.
467             The string may be split on spaces, or split after comma or C<|>.
468            
469             =head2 The C mutator
470            
471             The essense of C is to have several mutation rules and choose I
472             of the results of application of these rules. Grouping the rules allows
473             one a flexible way to control what I actually means. The rules may
474             be separated by comma, by C<|>, or by C<|||> (interchangeable with C<||||>).
475            
476             In the simplest case of grouping, C form a C<|>-separated list, and
477             each group consists of one rule. Then I result is one coming from
478             an earlier rule. The groups are separated by C<|>, and the rules inside the
479             group are separated by comma; if more than one rule appears in a group, a
480             different kind of competition appears (inside the group).
481            
482             The I of the generated characters is a list C
483             UNICODE_BLOCK, IN_CASE_PAIR, FROM_NON_ALTGR_POSITION>
484             with lexicographical order (the earlier element is stronger that ones after it).
485             Here C describes whether a character is generated by
486             Unicode compositing (versus “compatibility compositing” or other
487             “artificially generated” mogrifiers); the older age wins, as well as
488             honest compositing, earlier Unicode blocks, as well as case pairs and
489             characters from non-C-positions. (Experience shows that these rules
490             have a pretty good correlation with being “more suitable for human consumption”.)
491            
492             Moreover, quality in case-pairs is equalized by assigning the strongest
493             I of two. Such pairs are always considered “tied together” when
494             they compete with other characters. (In particular, if a single character
495             with higher quality occupies one of C positions, a
496             case pair with lower quality is completely ignored; so the “other” position
497             may be taken by a single character with yet lower quality.)
498            
499             In addition, the characters which lost the competition for
500             non-C-positions are considered I on C-positions. (With
501             boosted priority compared to mutated C-characters; see above.)
502            
503             This mutator comes in several flavors: one can append to its name
504             C/C/C/C<32OK> (in this
505             order). Unless C is specified, it will not modify characters on a key
506             which produces C when used without modifiers. Unless C<32OK> is specified, it
507             will not produce Unicode characters after C<0xFFFF> (the default is to follow
508             the brain-damaged semantic of prefix keys on Windows). Unless C is
509             specified, the result is optimized by removing duplicates (per key) generated
510             by application of C. With the C modifier, the generated characters
511             are not counted as “obtained by logical rules” when statistics for the generated
512             keyboard layout are calculated.
513            
514             =head2 Linked prefixes
515            
516             On top of what is explained above, there is a way to arrange “linking” of two prefix keys;
517             this linking allows characters which cannot be fit on one (prefixed) key to
518             “migrate” to unassigned positions on the otherwise-prefixed key. (This is
519             similar to migration from non-C-position to C-position.)
520             This is achieved by using mutator rules of the following form:
521            
522             primary = +PRE-GROUPS1|||SHARED||||POST-GROUPS1
523             secondary = PRE-GROUPS2||||PRE-GROUPS1|||SHARED||||POST-GROUPS2
524            
525             Groups with digits are not shared (specific to a particular prefix); C is
526             (effectively) reverted when accessed from the secondary prefix; for the
527             secondary key, the recipies from C which were used in the primary
528             key are removed from C, and are appended to the end of C;
529             the C are skipped when finding assignments for the secondary
530             prefix.
531            
532             In the primary recipe, C<|||> and C<||||> are interchangeable with C<|>.
533             Moreover, if C is empty, the secondary recipe should be written as
534            
535             secondary = PRE-GROUPS2|||PRE-GROUPS1|||SHARED
536            
537             if C is empty, this should be written as one of
538            
539             secondary = PRE-GROUPS2|||SHARED
540             secondary = PRE-GROUPS2||||SHARED
541             secondary = PRE-GROUPS2||||SHARED||||POST-GROUPS2
542            
543             These rules are to allow macro-ization of the common parts of the primary
544             and secondary recipe. Put the common parts as a value of the key
545             C (here C<***> denotes a word), and replace them by
546             the macro C<< >> in the recipes.
547            
548             B: the primary key recipe starts with the C<+> character; it
549             forces interpretation of C<|||> and C<||||> as of ordinary C<|>.
550            
551             If not I, the top-level groups are formed by C<||||> (if present), otherwise by C<|||>.
552             The number of top-level groups should be at most 3. The second of C<||||>-groups
553             may have at most 2 C<|||>-groups; there should be no other subdivision. This way,
554             there may be up to 4 groups with different roles.
555            
556             The second of 3 toplevel C<|||>-groups, or the first of two sublevel C<|||>-groups
557             is the “skip” group. The last of two or three toplevel C<|||>-groups (or of
558             sublevel C<|||>-groups, or the 2nd toplevel C<||||>-group without subdivisions) is the
559             inverted group; the 3rd of toplevel C<||||>-groups is the “extra” group.
560            
561             “Penalize/prohibit” lists start anew in every top-level group.
562            
563             =head2 Atomic mutators rules
564            
565             As explained above, the individual RULES in C may be
566             separated by C<,> or C<|>, or C<|||> or C<||||>. Such an individual
567             rule is a combination of I combined by C<+> operators,
568             and/or preceded by C<-> prefix (with understanding that C<+-> must
569             be replaced by C<-->). The prefix C<-> means I of the
570             rule; the operator C<+> is the composition of the rules.
571            
572             B the atomic rule C<< >> converts its input character into
573             its superscript forms (if such forms exist; for example, C may
574             be converted to C<ᵃ> or C<ª>). The atomic rules C, C, C
575             behave the same as the corresponding MUTATORs. The atomic rule C
576             converts a control-character to the corresponding “uppercase” character:
577             C<^A> is converted to C, and C<^\> is converted to C<\>. (The last
578             4 rules cannot be inverted by C<->.)
579            
580             The composition is performed (as usual) from right to left. B the
581             indivial rule C<< +lc+dectrl >> converts C<^A> to C<ᵃ> or C<ª>.
582            
583             In addition to rules listed above, the atomic rules may be of the
584             following types:
585            
586             =over
587            
588             =item *
589            
590             A hex number with ≥4 digits, or a character: implements the composition
591             inverting (compatibility or not) Unicode decompositions into two characters;
592             the character in the rule must the first character of the decomposition.
593             Here “Unicode decompositions” are either deduced from Unicode decomposition
594             rules (with compatibility decompositions having lower priority), or deduced
595             basing on splitting the name of the character into parts.
596            
597             =item *
598            
599             C<< >> is an inversion of a Unicode decomposition which goes from
600             1 character to 1 character.
601            
602             =item *
603            
604             Flavors of characters C<< >> from Unicode tables come from Unicode
605             1-character to 1-character decompositions
606             marked with C<< >>. B C<< >> for a subscript form;
607             or C<< >>.
608            
609             =item *
610            
611             C<< >> rules TBC ..........................................
612            
613             =item *
614            
615             Calculated rules C<< >> are extracted by a
616             heuristic algorithm which tries to parse the Unicode name of the character.
617            
618             For the best understanding of what these rules produce, inspect
619             results of print_compositions(), print_decompositions() methods documented
620             in L<"SYNOPSIS">. The following “keywords” are processed by the algorithm:
621            
622             WITH, OVER, ABOVE, PRECEDED BY, BELOW (only with LONG DASH)
623            
624             are separators;
625            
626             COMBINING CYRILLIC LETTER, BARRED, SLANTED, APPROXIMATELY, ASYMPTOTICALLY,
627             SMALL (not near LETTER), ALMOST, SQUARED, BIG, N-ARY, LARGE, LUNATE,
628             SIDEWAYS DIAERESIZED, SIDEWAYS OPEN, INVERTED, ARCHAIC, EPIGRAPHIC,
629             SCRIPT, LONG, MATHEMATICAL, AFRICAN, INSULAR, VISIGOTHIC, MIDDLE-WELSH,
630             BROKEN, TURNED, INSULAR, SANS-SERIF, REVERSED, OPEN, CLOSED, DOTLESS, TAILLESS, FINAL
631             BAR, SYMBOL, OPERATOR, SIGN, ROTUNDA, LONGA, IN TRIANGLE, SMALL CAPITAL (as smallcaps)
632            
633             are modifiers. For an C, one scans for
634            
635             QUAD, UNDERBAR, TILDE, DIAERESIS, VANE, STILE, JOT, OVERBAR, BAR
636            
637             TBC ..........................................
638            
639             =item *
640            
641             Additionally, C are considered C variants of
642             their middle letter, as well as C of C<0>.
643            
644             =item *
645            
646             C<< >> rules are obtained by scanning the name for
647            
648             WHITE, BLACK, CIRCLED, BUT NOT
649            
650             as well as for C (as C), paleo-Latin digraphs and C
651             (as C), doubled-letters
652             (as C), C doubled-letters
653             (as C), C (possibly with C
654             or C; as C).
655            
656             =item *
657            
658             Manual prearranged rules TBC ..........................................
659            
660             =item *
661            
662             C<< >> Explicit named substitution rules TBC ..........................................
663            
664             =item *
665            
666             C<< >> Prohibits handling non-substituted input TBC ..........................................
667            
668             =item *
669            
670             C<< >> rules TBC ..........................................
671            
672             =back
673            
674             =head2 Input substitution in atomic rules
675            
676             TBC ..........................................
677            
678             =head2 The C mutator
679            
680             TBC ..............................
681            
682             =head2 Pseudo-mutators for generation of documentation
683            
684             A few mutators do not introduce any characters (in other words, they behave as
685             C) but are used for their side effects: in prefix-key recipes,
686             C introduces documentation of what the prefix key is intended
687             for. Likewise, C allows adding CSS classes to highlight
688             parts of HTML output generated by this module, the parts corresponding to selected
689             characters in a face.
690            
691             C is a comma-separated list, every triple in the
692             list being C. C is one of C/C (which
693             add formatting to the key containing one of the C) or C/C
694             (which add formatting to an individual character displayed on the key),
695             one can add a digit to C to limit to a particular layer in the face
696             (useful when a character appears several times in a face).
697             The lower-case variants select characters basing on the I of a key.
698             One can also append C<=CONTEXT> to C, then the class is added only if
699             C appears as one of the options for the HTML output generator.
700            
701             The CSS rules generated by this module support several classes directly; the
702             rest should be supported by the user-supplied rules. The classes with existing
703             support are: on keys
704            
705             to_w from_w # generate arrows between keys
706             from_nw from_ne to_nw to_ne # generate arrows between keys; will yellow-outline
707             pure # unless combined with this
708             red-bg green-bg blue-bg # tint the key as the whole (as background)
709            
710             On characters
711            
712             very-special need-learn may-guess # provide green/brown/yellow-outlines
713             special # provide blue outline (thick unless combined with
714             thinspecial # <-- this)
715            
716             =head2 Extra CSS classes for documentation
717            
718             In additional, several CSS classes are auto-generated basing on Unicode
719             properties of the character. TBC ........................
720            
721             =head2 Debugging mutators
722            
723             If the bit 0x40 of the environment variable C
724             (decimal or C<0xHEX>) is set, debugging output for mutators is enabled:
725            
726             r ║ ║ ┆ ║ ṙ ṛ ┆ ║ ║ ║ ║ ⓡ ┆
727             ║ ║ ┆ ║ Ṙ Ṛ ┆ ║ ║ ║ ║ Ⓡ ┆
728             ║ ║ ặ ┆ ║ ┆ ║ ║ ║ ║ ┆
729             ║ ║ Ặ ┆ ║ ┆ ║ ║ ║ ║ ┆
730             Extracted [ …list… ] deadKey=00b0
731            
732             The output contains a line per character assigned to the keyboard key (if
733             there are 2 layers, each with lc/uc variants, there are 4 lines); empty lines are
734             omitted. The first column indicates the base character (lc of the 1st layer) of
735             the key; the separator C<║> indicates C<|>-groups in the mutator. Above, the first
736             group produces no mutations, the second group mutates only the characters in
737             the second layer, and the third group produces two mutations per a character in
738             the first layer. The 7th group is also producing mogrifications on the 1st layer.
739            
740             The next example clarifies C<┆>-separator: to the left of it are mogrifications which
741             come in case pairs, to the right are mogrifications where mogrified-lc is not
742             a case pair of mogrified-uc:
743            
744             t ║ ║ ᵵ ║ ꞇ ┆ ʇ ║ ┆ ║
745             ║ ║ ║ Ꞇ ┆ ᴛ ║ ┆ ║
746             ║ ║ ║ ┆ ║ ꝧ ┆ ║
747             ║ ║ ║ ┆ ║ Ꝧ ┆ ║
748             Extracted [ …list… ] deadKey=02dc
749            
750             In this one, C<│> separates mogrifications with different priorities (based on
751             Unicode ages, whether the atomic mutator was compatibility/synthetic one, and the
752             Unicode block).
753            
754             / ║ ║ ║ ║ ║ │ ∴ ║ ║
755             ║ ║ ║ ║ ║ │ ≘ ≗ ║ ║
756             ║ ║ ║ ║ ║ / │ ⊘ ║ ║
757             Extracted [ …list… ] deadKey=00b0
758            
759             For secondary mogrifiers, where the distinction between C<|||> and C<|>
760             matters, some of the C<║>-separators are replaced by C<┃>. Additionally,
761             there are two rounds of extraction: first the characters corresponding
762             to the primary mogrifier are TMP-extracted (from the groups PRE-GROUPS1,
763             COMMON); then what is the extracted from COMMON is put back at the
764             effective end (at the end of POST-GROUPS2, or, if no such, at
765             the beginning of COMMON):
766            
767             t ║ ║ ᵵ ┃ ┃ ʇ │ │ ꞇ ┆ ║
768             ║ ║ ┃ ┃ │ ᴛ │ Ꞇ ┆ ║
769             ║ ║ ┃ ┃ │ │ ꝧ ┆ ║
770             ║ ║ ┃ ┃ │ │ Ꝧ ┆ ║
771             TMP Extracted: <…list…> from layers 0 0 | 0 0
772             t ║ ║ ᵵ ┃ ꞇ ┆ ʇ ┋ ┃ ┆ │ ┆ │ ┆ ║
773             ║ ║ ┃ Ꞇ ┆ ᴛ ┋ ┃ ┆ │ ┆ │ ┆ ║
774             ║ ║ ┃ ┆ ┋ ┃ ┆ │ ┆ │ ꝧ ┆ ║
775             ║ ║ ┃ ┆ ┋ ┃ ┆ │ ┆ │ Ꝧ ┆ ║
776             Extracted [ …list… ] deadKey=02dc
777            
778             In the second part of the debugging output, the part of common which is put
779             back is separated by C<┋>.
780            
781             When bit 0x80 is set, much more lower-level debugging info is printed. The
782             arrays at separate depth mean: group number, priority, not-cased-pair, layer
783             number, subgroup, is-uc. When bit 0x100 is set, the debugging output for
784             combining atomic mutators is enabled.
785            
786             =head2 Personalities
787            
788             A personality C is defined in the section C. (C may
789             include slashes - untested???)
790            
791             An array C gives the list of layers forming the face. (As of version
792             0.03, only 2 layers are supported.) The string C is a “fallback”
793             face: if a keypress is not defined by C, it would be taken from
794             C; additionally, it affects the C key bindings: for example,
795             if C has C where C has C<γ>, and there is a binding for
796             C, the same binding applies for C.
797             .........
798            
799             =head2 Substitutions
800            
801             In section C one defines composition rules which may be
802             used on par with composition rules extracted from I.
803             An array C is converted to a hash accessible as C<< >> from
804             a C filter of satellite face processor. An element of the the array
805             must consist of two characters (the first is mapped to the second one). If
806             both characters have upper-case variants, the translation between these variants
807             is also included.
808            
809             =head2 Classification of diacritics
810            
811             The section C contains arrays each describing a class of
812             diacritic marks. Each array may contain up to 7 elements, each
813             consising of diacritic marks in the order of similarity to the
814             "principal" mark of the array. Combining characters may be
815             preceded by horizontal space. Seven elements should contain:
816            
817             Surrogate chars; 8bit chars; Modifiers
818             Modifiers below (or above if the base char is below)
819             Vertical (or Comma-like or Doubled or Dotlike or Rotated or letter-like) Modifiers
820             Prime-like or Centered modifiers
821             Combining
822             Combining below (or above if base char is below)
823             Vertical combining and dotlike Combining
824            
825             These lists determine what a C filter of satellite face processor
826             will produce when followed by whitespace characters
827             (possibly with modifiers) C. (So, if F<.kbdd> file
828             uses C) this determines what diacritic prefix keys produce.
829            
830             =head2 Compose Key
831            
832             The scalar configuration variable C controls the ID of the prefix
833             key to access F<.Compose> composition rules. The rules are read from files
834             in the class/object variable; set this variable with
835            
836             $self->set__value('ComposeFiles', [@Files]); # Class name (instead of $self) is OK here
837            
838             The format of the files is the same as for X11’s F<.Compose> (but C are
839             not supported); only compositions starting with C<< >>, having no
840             deadkeys, and (on Windows) expanding to 1 UTF-16 codepoint are processed. (See
841             L<“systematic” parts of rules in the standard
842             F<.XCompose>|"“Systematic” parts of rules in a few .XCompose"> — see lines with postfix C.)
843            
844             Repeating this prefix twice accesses characters via their HTML/MathML entity names. The files
845             are as above (the variable name is C); the format is the same as in
846             F.
847            
848             Repeating this prefix 3 times accesses characters via their C codes;
849             the variable C contains files in the format of F.
850             It is recommended to download these files (or the later flavors)
851            
852             http://www.x.org/releases/X11R7.6/doc/libX11/Compose/en_US.UTF-8.html
853             http://www.w3.org/TR/xml-entity-names/bycodes.html
854             http://tools.ietf.org/html/rfc1345
855            
856             See L<"SYNOPSIS"> for an example. Note that this mechanism does not assign this
857             prefix key to any particular position on the keyboard layout; this should be
858             done elsewhere. Implementation detail: if some of these 3 maps cannot be created,
859             they are skipped (so less than 3 chained maps are created).
860            
861             For more control, one can make this configuration variable into an array. The
862             value C is equivalent to the array with elements
863            
864             ComposeFiles,dotcompose,warn,KEY
865             EntityFiles,entity,warn,,KEY
866             rfc1345Files,rfc1345,warn,,KEY
867            
868             Five comma-separated fields are: the variable controlling the filelist,
869             the type of files in the filelist (only the 3 listed types are supported now),
870             whether to warn when a particular flavor
871             of composition table could not be loaded, the global access prefix, the prefix
872             for access from the previous element (chained access).
873            
874             If C (etc.) has more than 1 file, bindings from earlier files
875             take precedence over bindings from the later ones. If the same sequence is
876             bound several times inside a file, a later binding takes precedence.
877            
878             =head2 Names of prefix keys
879            
880             Section C defines naming of prefix keys. If not named there (or in
881             processed F<.klc> files), the C property will be used; if none,
882             Unicode name of the character will be used.
883            
884             =head2 More than 2 layers and/or exotic modifier keys
885            
886             This is controlled by C, C, and C
887             configuration arrays. TBC..................................
888            
889             =head2 CAVEATS for German/French/BÉPO/Neo keyboards
890            
891             Non-US keycaps: the character "a" is on C<(VK_)A>, but its scancode is now different.
892             E.g., French's A is on 0x10, which is US's Q. Our table of scancodes is
893             currently hardwired. Some pictures and tables are available on
894            
895             http://bepo.fr/wiki/Pilote_Windows
896            
897             With this module, the scancode and the C-code for a position in a layout
898             are calculated via the C configuration variable; the first recognized
899             character at the given position of this layer is translated to
900             the C-code (using a hardwired table). The mapping of C-codes
901             to scancodes is currently hardwired.
902            
903             For “unusual” keys, one can use the C subsection of the face to describe
904             its scancode (the first entry in the array) and the bindings. If the scancode
905             is empty, the name of the key is translated to a scancode using the hardwired
906             tables.
907            
908             =head1 Keyboards: on ease of access (What makes an easy-to-use keyboard layout)
909            
910             The content of this section has no I relationship to the functionality
911             of this module. However, we feel that it is better that the user of this
912             module understands these concerns. Moreover, it is these concerns which
913             lead to the principles underlying the functionality of this module.
914            
915             =head2 On the needs of keyboard layout users
916            
917             Let's start with trivialities: different people have different needs
918             with respect to keyboard layouts. For a moment, ignore the question
919             of the repertoir of characters available via keyboard; then the most
920             crucial distinction corresponds to a certain scale. In absense of
921             a better word, we use a provisional name "the required typing speed".
922            
923             One example of people on the "quick" (or "rabid"?) pole of this scale are
924             people who type a lot of text which is either "already prepared", or for
925             which the "quality of prose" is not crucial. Quite often, these people may
926             type in access of 100 words per minute. For them, the most important
927             questions are of physical exhaustion from typing. The position
928             of most frequent letters relative to the "rest" finger position, whether
929             frequently typed together letters are on different hands (or at least
930             not on the same/adjacent fingers), the distance fingers must travel
931             when typing common words, how many keypresses are needed to reach
932             a letter/symbol which is not "on the face fo the keyboard" - their
933             primary concerns are of this kind.
934            
935             On the other, "deliberate", pole these concerns cease to be crucial.
936             On this pole are people who type while they "create" the text, and
937             what takes most of their focus is this "creation" process. They may
938             "polish their prose", or the text they write may be overburdened by
939             special symbols - anyway, what they concentrate on is not the typing itself.
940            
941             For them, the details of the keyboard layout are important mostly in
942             the relation to how much they I the writer from the other
943             things the writer is focused on. The primary question is now not
944             "how easy it is to type this", but "how easy it is to I how
945             to type this". The focus transfers from the mechanics of finger movements
946             to the psycho/neuro/science of memory.
947            
948             These questions are again multifaceted: there are symbols one encounters
949             every minute; after you recall once how to access them, most probably
950             you won't need to recall them again - until you have a long interval when
951             you do not type. The situation is quite different with symbols you need
952             once per week - most probably, each time you will need to call them again
953             and again. If such rarely used symbols/letters are frequenct (since I
954             of them appear), it is important to have an easy way to find how to type them;
955             on the other hand, probably there is very little need for this way to
956             be easily memorizable. And for symbols which you need once per day, one needs
957             both an easy way to find how to type them, I the way to type them should
958             better be easily memorizable.
959            
960             Now add to this the fact that for different people (so: different usage
961             scenarios) this division into "all the time/every minute/every day/every week"
962             categories is going to be different. And one should not forget important
963             scenario of going to vacation: when you return, you need to "reboot" your
964             typing skills from the dormant state.
965            
966             =head2 On “mixing” several “allied” layouts
967            
968             On the other hand, note that the questions discussed above are more or less
969             orthogonal: if the logic of recollection requires ω to be related in some
970             way to the W-key,
971             then it does not matter where the W-key is on the keyboard - the same logic
972             is applicable to the QWERTY base layou t, or BÉPO one, or Colemak, or Dvorak.
973             This module concerns itself I with the questions of "consistency" and
974             the related question of "the ease of recall"; we care only about which symbols
975             relate to which "base keys", and do not care about where the base key sit on
976             the physical keyboard.
977            
978             B The “main island” of the keyboard contains a 4×10 rectangle
979             of keys. So if a certain collection of special keys may be easily memorized
980             as a rectangular table, it is nice to be able to map this table to the
981             physical keyboard layout. This module contains tool making this task easy.
982            
983             Now consider the question of the character repertoir: a person may need ways
984             to type "continuously" in several languages; quite often one must must type
985             a “standalone” foreign word in a sentence; in addition to this, there may
986             be a need to I type "standalone" characters or symbols outside
987             the repertoir of these languages. Moreover, these languages may use different
988             scripts (such as Polish/Bulgarian/Greek/Arabic/Japanese), or may share a
989             "bulk" of their characters, and differ only in some "exceptional letters".
990             To add insult to injury, these "exceptional letters" may be rare in the language
991             (such as ÿ in French or à in Swedish) or may have a significant letter frequency
992             (such as é in French) or be somewhere in between (such as ñ in Spanish).
993            
994             And the non-language symbols do not need to be the I symbols (although
995             often they are). An Engish-language discussion of etimology at the coffee table
996             may lead to a need to write down a word in polytonic greek, or old norse;
997             next moment one would need to write a phonetic transcription in IPA/APA
998             symbols. A discussion of keyboard layout may involve writing down symbols
999             for non-character keys of the keyboard. A typography freak would optimize
1000             a document by fine-tuned whitespaces. Almost everybody needs arrows symbols,
1001             and many people would use box drawing characters if they had a simple access
1002             to them.
1003            
1004             Essentially, this means that as far as it does not impacts other accessibility
1005             goals, it makes sense to have unified memorizable access to as many
1006             symbols/characters as possible. (An example of impacting other aspects:
1007             MicroSoft's (and IBM's) "US International" keyboards steal characters C<`~'^">:
1008             typing them produces "unexpected results" - they are deadkeys. This
1009             significantly simplifies entering characters with accents, but makes it
1010             harder to enter non-accented characters.)
1011            
1012             =head2 The simplest rules of design of “large” keyboard layouts
1013            
1014             One of the most known principles of design of human-machine interaction
1015             is that "simple common tasks should be simple to perform, and complicated
1016             tasks should be possible to perform". I strongly disagree with this
1017             principle - IMO, it lacks a very important component: "a gradual increase
1018             in complexity". When a certain way of doing things is easy to perform, and another
1019             similar way is still "possible to perform", but on a very elevated level
1020             of complexity, this leads to a significant psychological barrier erected
1021             between these two ways. Even when switching from the first way to the other one
1022             has significant benefits, this barrier leads to self-censorship. Essentially,
1023             people will
1024             ignore the benefits even if they exceed the penalty of "the elevated level of
1025             complexity" mentioned above. And IMO self-censorship is the worst type of
1026             censorship. (There is a certain similarity between this situation and that
1027             of "self-fulfilled prophesies". "People won't want to do this, so I would not
1028             make it simpler to do" - and now people do not want to do this...)
1029            
1030             So I would add another clause to the law above: "and moderately complicated
1031             tasks should remain moderately hard to perform". What does it tell us in
1032             the situation of keyboard layout? One can separate several levels of
1033             complexity.
1034            
1035             =over 10
1036            
1037             =item Basic:
1038            
1039             There should be some "base keyboards": keyboard layouts used for continuous
1040             typing in a certain language or script. Access from one base keyboard to
1041             letters of another should be as simple as possible.
1042            
1043             =item By parts:
1044            
1045             If a symbol can be thought of as a combination of certain symbols accessible
1046             on the base keyboard, one should be able to "compose" the symbol: enter it
1047             by typing a certain "composition prefix" key then the combination (as far
1048             as the combination is unambiguously associated to one symbol).
1049            
1050             The "thoughts" above should be either obvious (as in "combining a and e should
1051             give æ") or governed by simple mneumonic rules; the rules should cover as
1052             wide a range as possible (as in "Greek/Coptic/Hebrew/Russian letters are
1053             combined as G/C/H/R and the corresponding Latin letter; the correspondence is
1054             phonetic, or, in presence of conflicts, visual").
1055            
1056             =item Quick access:
1057            
1058             As many non-basic letters as possible (of those expected to appear often)
1059             should be available via shortcuts. Same should be applicable to starting
1060             sequences of composition rules (such as "instead of typing C
1061             and C<'> one can type C).
1062            
1063             =item Smart access
1064            
1065             Certain non-basic characters may be accessible by shortcuts which are not
1066             based on composition rules. However, these shortcuts should be deducible
1067             by using simple mneumonic rules (such as "to get a vowel with `-accent,
1068             type C-key with the physical keyboard's key sitting below the vowel key").
1069            
1070             =item Superdeath:
1071            
1072             If everything else fails, the user should be able to enter a character by
1073             its Unicode number (preferably in the most frequently referenced format:
1074             hexadecimal).
1075            
1076             =back
1077            
1078             =over
1079            
1080             B This does not seem to be easily achievable, but it looks like a very nifty
1081             UI: a certain HotKey is reserved (e.g., C);
1082             when it is tapped, and a character-key is pressed (for example, B) a
1083             menu-driven interface pops up where user may navigate to different variants
1084             of B, Beta, etc - each of variants with a hotkey to reach I, and with
1085             instructions how to reach it later from the keyboard without this UI.
1086            
1087             Also: if a certain timeout passes after pressing the initial HotKey, an instruction
1088             what to do next should appear.
1089            
1090             =back
1091            
1092             =head2 The finer rules of design of “large” keyboard layouts
1093            
1094             Here are the finer points elaborating on the levels of complexity discussed above:
1095            
1096             =over 4
1097            
1098             =item 1
1099            
1100             It looks reasonable to allow "fuzzy mneumonic rules": the rules which specify
1101             several possible variants where to look for the shortcut (up to 3-4 variants).
1102             If/when one forgets the keying of the shortcut, but remembers such a rule,
1103             a short experiment with these positions allows one to reconstruct the lost
1104             memory.
1105            
1106             =item
1107            
1108             The "base keyboards" (those used for continuous typing in a certain language
1109             or script) should be identical to some "standard" widely used keyboards.
1110             These keyboards should differ from each other in position of keys used by the
1111             scripts only; the "punctuation keys" should be in the same position. If a
1112             script B has more letters than a script A, then a lot of
1113             "punctuation" on the layout A will be replaced by letters in the layout B.
1114             This missing punctuation should be made available by pressing a modifier
1115             (C? compare with L's top row).
1116            
1117             =item
1118            
1119             If more than one base keyboard is used, there must be a quick access:
1120             if one needs to enter one letter from layout B when the active layout is A, one
1121             should not be forced to switch to B, type the letter, then switch back
1122             to A. It should better be available I on a prefixed combination "C".
1123            
1124             =item
1125            
1126             One should consider what the C does when the layouts A
1127             and B are identical on a particular key (e.g., punctuation). One can go with the "Occam's
1128             razor" approach and make the C prefix into the do-nothing identity map.
1129             The alternative is make it access some symbols useful both for
1130             script A and script B. It is a judgement call.
1131            
1132             Note that there is a gray area when layouts A and B are not identical,
1133             but a key C produces punctuation in layout A, and a letter in layout
1134             B. Then when in layout B, this punctuation is available on C,
1135             so, in principle, C would duplicate the functionality
1136             of C. Compare with "there is more than one way to do it" below;
1137             remember that OS (or misbehaving applications) may make some keypresses
1138             "unavailable". I feel that in these situations, “having duplication” is
1139             a significant advantage over “having some extra symbols available”.
1140            
1141             =item
1142            
1143             The considerations in two preceding parts are applicable also in the
1144             case when there are more “allied” layouts than A and B. Ways to make it possible
1145             are numerous: one can have several alternative C’s, B one
1146             can use a I prefix key C. With a large enough
1147             collection of layouts, a combination of both approaches may be visualized
1148             as a chain of layout
1149            
1150             S< >… C B C
1151            
1152             here we have two quick access prefix keys, the left one C, and the right one
1153             C. Superscripts C<² ³ …> mean “pressing the prefix key several times”;
1154             the prefix keys move one left/right along the chain of layouts.
1155            
1156             =item
1157            
1158             The three preceding parts were concerned with entering one character from
1159             an “allied” layout. To address another frequent need, entering one word
1160             from an “allied” layout, yet another approach may be needed. The solution may
1161             be to use a certain combination of modifier keys. (How to choose useful
1162             combinations? See: L<"A convenient assignment of KBD* bitmaps to modifier keys">.)
1163            
1164             (Using “exotic” modifier keys may be impossible in some badly coded applications.
1165             This should not stop one from implementing this feature: sometimes one has a choice
1166             from several applications performing the same task. Moreover, since this feature
1167             is a “frill”, there is no pressing need to have it I available.)
1168            
1169             =item
1170            
1171             Paired symbols (such as such as ≤≥, «», ‹›, “”, ‘’ should be put on paired
1172             keyboard's keys: <> or [] or ().
1173            
1174             =item
1175            
1176             "Directional symbols" (such as arrows) should be put either on numeric keypad
1177             or on a 3×3 subgrid on the letter-part of the keyboard (such as QWE/ASD/ZXC).
1178             (Compare with [broken?] implementation in L.)
1179            
1180             =item
1181            
1182             for symbols that are naturally thought of as sitting in a table, one can
1183             create intuitive mapping of quite large tables to the keyboard. Split each
1184             key in halves by a horizontal line, think of C as sitting in the
1185             top half. Then ignoring C<`~> key and most of punctuation on the right
1186             hand side, keyboard becomes an 8×10 grid. Taking into account C
1187             modifier (either as an extra bit, or as splitting a key by a horizontal line),
1188             one can map up to 8×10×2 (or 8×20) table to a keyboard.
1189            
1190             B Think of L.
1191            
1192             =item
1193            
1194             Cheatsheets are useful. And there are people who are ready to dedicate a
1195             piece of their memory to where on a layout is a particularly useful to them
1196             symbol. So even if there is no logical position for a certain symbol, but
1197             there is an empty slot on layout, one should not hesitate in using this slot.
1198            
1199             However, this I distractive to people who do not want to dedicate
1200             their memory to "special cases". So it makes sense to have three kinds of
1201             cheatsheets for layouts: one with special cases ignored (useful for most
1202             people), one with all general cases ignored (useful for checks "is this
1203             symbol available in some place I do not know about" and for memorization),
1204             and one with all the bells and whistles.
1205            
1206             (Currently this module allows emitting HTML keyboard layouts with such
1207             information indicated by classes in markup. The details may be treated
1208             by the CSS rules.)
1209            
1210             =item
1211            
1212             "There is more than one way to do it" is not a defect, it is an asset.
1213             If it is a reasonable expectation to find a symbol X on keypress K', and
1214             the same holds for keypress K'' I they both do not conflict with other
1215             "being intuitive" goals, go with both variants. Same for 3 variants, 4
1216             - now you get my point.
1217            
1218             B The standard Russian phonetic layout has Ё on the C<^>-key; on the
1219             other hand, Ё is a variant of Е; so it makes sense to have Ё available on
1220             C as well. Same for Ъ and Ь.
1221            
1222             =item
1223            
1224             Dead keys which are "abstract" (as opposed to being related to letters
1225             engraved on physical keyboard) should better be put on modified state
1226             of "zombie" keys of the keyboard (C, C, C, C).
1227            
1228             B Making C a prefix key may lead to usability issues
1229             for people used to type CAPITALIZED PHRASES by keeping C pressed
1230             all the time. As a minimum, the symbols accessed via C
1231             should be strikingly different from those produced by C so that
1232             such problems are noted ASAP. Example: on the first sight, producing
1233             C on C or C
1234             looks like a good idea. Do not do this: the visually undistinguishable
1235             C would lead to significantly hard-to-debug problems if
1236             it was unintentional.
1237            
1238             =back
1239            
1240            
1241             =head2 Explanation of keyboard layout terms used in the docs
1242            
1243             The aim of this module is to make keyboard layout design as simple as
1244             possible. It turns out that even very elaborate designs can be made
1245             quickly and the process is not very error-prone. It looks like certain
1246             venues not tried before are now made possible; at least I'm not aware of
1247             other attempts in this direction. One can make layouts which can be
1248             "explained" very concisely, while they contain thousand(s) of accessible
1249             letters.
1250            
1251             Unfortunately, being on unchartered territories, in my explanations I'm
1252             forced to use home-grown terms. So be patient with me... The terms are
1253             I, I, I and I. (One may want compare them
1254             with what ISO 9995 does: L…. On
1255             the other hand, most parts of ISO 9995 look as remote from being ergonomic
1256             [in the sense discussed in these sections] as one may imagine!)
1257            
1258             In what follows,
1259             the words I and I are used interchangeably. A I
1260             means a physical key on a keyboard tapped (possibly together with
1261             one of modifiers C, C - or, rarely, L<[right] C|http://www.microsoft.com/resources/msdn/goglobal/keyboards/kbdcan.html>;
1262             more advanced layouts may use “extra” modifiers). The key C
1263             is often marked as such on the keycap, otherwise it is just the "right" C key; at least
1264             on Windows, for many simple layouts it can be replaced by C. What is a I?
1265             Tapping such a key does not produce any letter, but modifies what the next
1266             keypress would do (sometimes it is called a I; in C terms,
1267             it is probably a I. Sometimes, prefix keys may be “chained”; then
1268             insertion of a character happens not on the second keypress, but on the third one [or fourth/etc]).
1269            
1270             To describe which character (or a prefix) is produced by a keypress one must describe
1271             I: which prefix keys were already tapped, and which modifier keys are
1272             currently pressed. It is natural to consider the C modifier specially: let’s
1273             remove it from the context; now given a context, a keypress may produce two characters:
1274             one with C, one without. A I describe such a pair of characters (or
1275             prefixes) for every key of the keyboard.
1276            
1277             So, the plain I is the part of keyboard layout accessible by using only
1278             non-prefix keys (possibly in combination with C). Many keyboard layouts
1279             have up to 2 additional layers accessible without prefix keys: the C-layer and C-layer.
1280            
1281             On the simplest layouts, such as "US" or "Russian", there is no prefix keys or “extra”
1282             modifier keys -
1283             but this is only feasible for languages which use very few characters with
1284             diacritic marks. However, note that most layouts do not use
1285             C-layer - sometimes it is claimed that this causes problems with
1286             system/application interaction.
1287            
1288             A I consists of the layers of the layout accessible with a particular
1289             combination of prefix keys. The I consists of the plain layer
1290             and “additional prefix-less layers” of the layout;
1291             it is the part of layout accessible without switching "sticky state" and
1292             without using prefix keys. There may be up to 3 layers (Plain, C, C)
1293             per face on the standard Windows keyboard layouts. A I is a face exposed after pressing
1294             a prefix key (or a chain of prefix keys).
1295            
1296             A I is a collection of faces: the primary face, plus one face per
1297             a defined prefix-key (or a prefix chain). Finally, a I is a collection of personalities
1298             (switchable by sticky keys [like C] and/or in other system-specific ways)
1299             designed to work smoothly together. For example, in multi-script settings, there may be:
1300            
1301             =over 4
1302            
1303             =item *
1304            
1305             one personality per script (e.g., Latin/Greek/Cyrillic/Arabic);
1306            
1307             =item *
1308            
1309             every personality may have several script-specific additional (“satellite”) faces (one per a particular diacritic for Latin
1310             personality, one for regional/historic “flavors” for Cyrillic personality, one per aspiration type for Greek personality, etc);
1311            
1312             =item *
1313            
1314             every personality may also have “liason” faces accessing the base faces of other personalities;
1315            
1316             =item *
1317            
1318             with chained prefixes, it is easy to design intuitive ways to access satellite faces of other personalities;
1319             then every personality will also contain the satellite faces of I personalities (on different prefix chains!).
1320            
1321             =item *
1322            
1323             For access to “technical symbols” (currencies/math/IPA etc), the personalities may share a certain collection
1324             of faces assigned to the same prefix keys.
1325            
1326             =back
1327            
1328             =head2 Example of keyboard layout groups
1329            
1330             Start with a I elaborate example (it is more or less a simplified variant
1331             of the L layout|http://k.ilyaz.org>. A keyboard layout group may consist of
1332             phonetically matched Latin and Cyrillic personalities, and visually matched Greek
1333             and Math personalities. Several prefix-keys may be shared by all 4 of these
1334             personalities; in addition, there would be 4 prefix-keys allowing access to primary
1335             faces of these 4 personalities from other personalities of the group. Also, there
1336             may be specialised prefix keys tuned for particular need of entering Latin script,
1337             Cyrillic script, Greek script, and Math.
1338            
1339             Suppose that there are 8 specialized-for-Latin prefix-keys (for example, name them
1340            
1341             grave/tilde/hat/breve/ring_above/macron/acute/diaeresis
1342            
1343             although in practice each one of them may do more than the name suggests).
1344             Then the Latin personality will have the following 13 faces:
1345            
1346             Primary/Latin-Primary/Cyrillic-Primary/Greek-Primary/Math-Primary
1347             grave/tilde/hat/breve/ring_above/macron/acute/diaeresis
1348            
1349             B Here Latin-Primary is the face one gets when one presses
1350             the Access-Latin prefix-key when in Latin mode; it may be convenient to define
1351             it to be the same as Primary - or maybe not. For example, if one defines it
1352             to be Greek-Primary, then this prefix-key has a convenient semantic of flipping
1353             between Latin and Greek modes for the next typed character: when in
1354             Latin, C would enter α, when in Greek, the same keypresses
1355             [now meaning "Latin-PREFIX-KEY α"] would enter "a".
1356            
1357             Assume that the only “extra” modifier used by the layout is C. Then each of
1358             these faces would consists of two layers: the plain one, and the C-
1359             one. For example, pressing C with a key on Greek face could add
1360             diaeresis to a vowel, or use a modified ("final" or "symbol") "glyph" for
1361             a consonant (as in σ/ς θ/ϑ). Or, on Latin face, C may produce æ. Or, on a
1362             Cyrillic personality, AltGr-я (ya) may produce ѣ (yat').
1363            
1364             Likewise, the Greek personality may define special prefix-keys to access polytonic
1365             greek vowels. “Chaining” these prefix keys after the C prefix
1366             key would make it possible to enter polytonic Greek letters from non-Greek
1367             personalities without switching to the Greek personality.
1368            
1369             With such a keyboard layout group, to type one Greek word in a Cyrillic text one
1370             would switch to the Greek personality, then back to Cyrillic; but when all one
1371             need to type now is only one Greek letter, it may be easier to use the
1372             "Greek-PREFIX-KEY letter" combination, and save switching back to the
1373             Cyrillic personality. (Of course, for this to work the letter should be
1374             on the primary face of the Greek personality.)
1375            
1376             How to make it possible to easily enter a short Greek word when in Cyrillic mode?
1377             If one uses one more “extra” modifier key (say, C), one could
1378             reserve combinations of modifiers with this key to “use” other personality. Say,
1379             C would enter Greek β, C would enter
1380             Cyrillic б, etc.
1381            
1382             =head2 “Onion rings” approach to keyboard layout groups
1383            
1384             Looks too complicated? Try to think about it in a different way: there
1385             are many faces in a keyboard layout group; break them into 3 "onion rings":
1386            
1387             =over 4
1388            
1389             =item I faces
1390            
1391             one can "switch to a such a face" and type continuously using
1392             this face without pressing prefix keys. In other words, these faces
1393             can be made "active" (in an OS-dependent way).
1394            
1395             When one CORE face is active, the letters in another CORE face are still
1396             accessible by pressing one particular prefix key before each of these
1397             letters. This prefix key does not depend on which core face is
1398             currently "active".
1399            
1400             =item I faces
1401            
1402             one cannot "switch to them", however, letters
1403             in these faces are accessible by pressing one particular prefix key
1404             before this letter. This prefix key does not depend on which
1405             core face is currently "active".
1406            
1407             =item I faces
1408            
1409             one cannot "switch to them", and letters in these faces
1410             are accessible from one particular core face only. One must press a
1411             prefix key before every letter in such faces.
1412            
1413             (In presence of “chained prefixes”, the description is less direct:
1414             these faces are much easier to access from one particular CORE face.
1415             From another CORE face, one must preceed this prefix key by the
1416             access-that-CORE-face prefix.)
1417            
1418             =back
1419            
1420             For example, when entering a mix of Latin/Cyrillic scripts and math,
1421             it makes sense to make the base-Latin and base-Cyrillic faces into
1422             the core; it is convenient when (several) Math faces and a Greek face
1423             can be made universally accessible. On the other hand, faces containing
1424             diacritized Latin letters and diacritized Cyrillic letters should better
1425             be made satellite; this avoids a proliferation of prefix keys which would
1426             make typing slower.
1427            
1428             Comparing to the terms of the preceding section, the CORE faces correspond
1429             to personalities. A personality I the base face from other personalities;
1430             it may also import satellite faces from other personalities.
1431            
1432             In a personality, one should make access to satellite faces, the imported
1433             CORE faces, and the universally accessible faces as simple as possible.
1434             If “other” satellite faces are imported, the access to them may be more
1435             cumbersome.
1436            
1437             =head2 Large Latin layouts: on access to diacritic marks
1438            
1439             Every prefix key has a numeric I. On Windows, there are situations
1440             when this numeric ID may be visible to the user. (This module makes every
1441             effort to make this happen as rarely as possible. However, this effort
1442             blows up the size of the layout DLL, and at some moment one may hit the
1443             L.
1444             To reduce the size of the DLL, the module makes a triage, and won’t protect the ID from leaking in some rare cases.)
1445             When such a leak happens, what the user sees is the character with this codepoint.
1446             So it makes sense to choose the ID to be the codepoint of a character “related
1447             to what the prefix key ‘does’”.
1448            
1449             The logic: if the prefix keys add some diacritic, the ID should be the
1450             I related to this diacritic: either
1451             C’s 8-bit characters with high bit set, or
1452             if none with the needed glyph, suitable non-Latin-1 "spacing modifier letters" or
1453             "spacing clones of diacritics".
1454            
1455             If followed by “special keys”, one should be able to access other related
1456             modifier letters and combining characters (see L<"Classification of diacritics">
1457             and the section C in L
1458             layout|http://search.cpan.org/~ilyaz/UI-KeyboardLayout/examples/izKeys.kbdd>);
1459             one possible convenient choice is:
1460            
1461             =over 4
1462            
1463             =item The second press of the prefix key
1464            
1465             The principal combining mark;
1466            
1467             =item SPACE
1468            
1469             The primary non-ASCII spacing modifier letter;
1470            
1471             =item SPACE-related (NBSP, or C, or C)
1472            
1473             The secondary/ternary/etc modifier letter;
1474            
1475             =item digits (possibly with C and/or C)
1476            
1477             related combining marks (with C and/or C, other categories
1478             from L<"Classification of diacritics">).
1479            
1480             =item C<'> or C<"> (possibly with C)
1481            
1482             secondary/ternary/etc combining marks (or, if these are on
1483             digits, replace by prime-shape modifier chars).
1484            
1485             =back
1486            
1487             =head2 The choice of prefix keys
1488            
1489             Some stats on prefix keys: C uses 41 prefix keys for diacritics (but 15 are fake, see below!);
1490             L uses 24|http://www.macfreek.nl/memory/Mac_Keyboard_Layout> (not counting prefix №, action=specials
1491             on L:
1492            
1493             "'@2#3%5^67*8AaCcEeGghHjJ KkMmNnQqRrsUuvwWYyZz‘’“ default=terminator
1494             №ʺʹƧƨƐɛƼƽƄƅ⁊ȢȣƏəƆɔƎǝƔɣƕǶƞȠ K’ĸƜɯŊŋƢƣƦʀſƱʊʌƿǷȜȝƷʒʻʼʽ №
1495            
1496             ); bépo uses 20, while EurKey uses 8, and L uses 5|http://www.macfreek.nl/memory/Mac_Keyboard_Layout>.
1497             On the other end of spectrum, there are 10 US keyboard keys with "calculatable" relation to Latin diacritics:
1498            
1499             `~^-'",./? --- grave/tilde/hat/macron/acute/diaeresis/cedilla/dot/stroke/hook-above
1500            
1501             To this list one may add a "calculatable" key C<$> as I;
1502             on the other hand, one should probably remove C since C should better
1503             be "set in stone" to denote C<¿>. If one adds Greek, then the calculatable positions
1504             for aspiration are on C<[ ]> (or on C<( )>). Of widely used Latin diacritics, this
1505             leaves out I (and doubled I);
1506             these diacretics should be either “mixed in” with similar "calculatable" diacritics
1507             (for example, may either create a character with cedilla, or with
1508             ogonek — depending on the character), or should be assigned on less intuitive positions.
1509            
1510             Extra prefix keys of L|http://www.pentzlin.com/info2-9995-3-V3.pdf>:
1511             I.
1512             Additionally, the following diacritics produce only 4 precomposed characters: ṲṳḀḁ, so their use as prefix characters is questionable:
1513             I
1514             (Here ↓ is a shortcut for C, same with ↑ for C, and ↗ for C; ↺ means C, and ² means C.
1515             Combined arrows expand to multiple diacritics.)
1516            
1517             (Keep in mind that this list is just a conjecture; the standard does not distinguish combining characters
1518             and prefix keys, so it is not clear which keypresses produce combining characters, and which are prefix keys.)
1519            
1520             =head2 What follows is partially deprecated
1521            
1522             Parts of following subsections is better explained in
1523             L;
1524             some other parts duplicate
1525            
1526             =head2 On principles of intuitive design of Latin keyboard
1527            
1528             Using tricks described below, it is easy to create a convenient map of vowels
1529             with 3 diacritics `¨´ to the QWERTY keyboad. However, some common
1530             (meaning: from Latin-1–10 of ISO 8859) letters from Latin alphabet
1531             cannot be composed this way; they are B<ÆÐÞÇIJØŒß>
1532             (one may need to add B<ªº>, as well as B<¡¿> for non-alphabetical symbols). It is crucial
1533             that these letters may be entered by an intuitively clear key of the keyboard.
1534             There is an obvious ASCII letter associated to each of these (e.g., B associated to the thorn
1535             B<Þ>), and in the best world just pressing this letter with C-modifier
1536             would produce the desired symbol.
1537            
1538             Note that ª may be associated to @; then º may be mapped to the nearby 2.
1539            
1540             There is only one conflict: both B<Ø>,B<Œ> "want" to be entered as C;
1541             this is the ONLY piece of arbitrariness in the design so far. After
1542             resolving this conflict, C-keys B are assigned their meanings,
1543             and cannot carry other letters (call them the “stuck in stone keys”).
1544            
1545             (Other keys "stuck in stone" are dead keys: it is important to have the
1546             glyph etched on these keyboard's keys similar to the task they perform.)
1547            
1548             Then there are several non-alphabetical symbols accessible through ISO 8859
1549             encodings. Assigning them C- access is another important task to perform.
1550             Some of these symbols come in pairs, such as ≤≥, «», ‹›, “”, ‘’; it makes
1551             sense to assign them to paired keyboard's keys: <> or [] or ().
1552            
1553             However, this task is in conflict of interests with yet another (!) task, so
1554             let us explain the needs answered by that task first.
1555            
1556             One can always enter accented letters using dead keys; but many people desire a
1557             quickier way to access them, by just pressing AltGr-key (possibly with
1558             shift). The most primitive keyboard designs (such as IBM International
1559             or Apple’s US (Extended)
1560            
1561             http://www.borgendale.com/uls.htm
1562             http://www.macfreek.nl/memory/Mac_Keyboard_Layout
1563            
1564             ) omit this step and assign only the NECESSARY letters for AltGr- access.
1565             (Others, like MicroSoft International, assign only a very small set.)
1566            
1567             This problem breaks into two tasks, choosing a repertoir of letters which
1568             will be typable this way, and map them to the keys of the keyboard.
1569             For example, EurKey choses to use ´¨`-accented characters B (except
1570             for B<Ỳ>), plus B<ÅÑ>; MicroSoft International does C<ÄÅÉÚÍÓÖÁÑß> only (and IBM
1571             International does
1572             none); Bepo does only B<ÉÈÀÙŸ> (but also has the Azeri B<Ə> available - which is
1573             not in ISO 8819 - and has B<Ê> on the 105th key "C<2nd \|>"),
1574             L has none
1575             (at least if one does not count uc characters without lc counterparts), same for L
1576            
1577             http://bepo.fr/wiki/Manuel
1578             http://bepo.fr/wiki/Utilisateur:Masaru # old version of .klc
1579             http://www.jlg-utilities.com/download/us_jlg.klc
1580             http://tlt.its.psu.edu/suggestions/international/accents/codemacext.html
1581             or look for "a graphic of the special characters" on
1582             http://web.archive.org/web/20080717203026/http://homepage.mac.com/thgewecke/mlingos9.html
1583            
1584             =head2 Our solution
1585            
1586             First, the answer (the alternative, illustrated description is on
1587             L):
1588            
1589             =over 10
1590            
1591             =item Rule 0:
1592            
1593             non-ASCII letters which are not accented by B<` ´ ¨ ˜ ˆ ˇ ° ¯ ⁄> are entered by
1594             C-keys "obviously associated" to them. Supported: B<ÆÐÞÇIJŒß>.
1595            
1596             =item Rule 0a:
1597            
1598             Same is applicable to B<Ê> and B<Ñ>.
1599            
1600             =item Rule 1:
1601            
1602             Vowels B accented by B<¨´`> are assigned the so called I<"natural position">:
1603             3 “alphabetic” rows of keyboard are allocated to accents (B<¨> is the top, B<´> is the middle, B<`> is
1604             the bottom row of 3 alphabetic-rows on keyboard - so B<À> is on B-row),
1605             and are on the same diagonal as the base letter. For left-hand
1606             vowels (B,B) the diagonal is in the direction of \, for right hand
1607             voweles (B,B,B,B) - in the direction of /.
1608            
1609             =item Rule 1a:
1610            
1611             If the "natural position" is occupied, the neighbor key in the
1612             direction of "the other diagonal" is chosen. (So for B,B it is
1613             the /-diagonal, and for right-hand vowels B it is the \-diag.)
1614            
1615             =item Rule 1b:
1616            
1617             This neighbor key is below unless the key is on bottom row - then it is above.
1618            
1619             Supported by rules "1": all but B<ÏËỲ>.
1620            
1621             =item Rule 2:
1622            
1623             Additionally, B<Å>,B<Ø>,B<Ì> are available on keys B,B

,B.

1624             B<ª> is on B<@>, and B<º> is on the nearby B<2>.
1625            
1626             =back
1627            
1628             =head2 Clarification:
1629            
1630             B<0.> If you remember only Rule 0, you still can enter all Latin-1 letter using
1631             Rule 0; all you need to remember that most of the dead keys are at “obvious”
1632             positions: for L|http://k.ilyaz.org> it is B<`';"~^.,-/> for B<`´¨¨˜ˆ°¸¯ ̸>
1633             (B<¨> is repeated on B<;">!) and B<6> for B<ˇ> (memorizable as “opposite” of B<^> for B<ˆ>).
1634            
1635             (What the rule 0 actually says is: "You do not need to memorize me". ;-)
1636            
1637             (If you need a diacritic which is only I to one of the listed diacritics,
1638             there is a good chance that the dead key above L.)
1639            
1640             B<1.> If all you remember are rules 1,1a, you can calculate the position of the
1641             AltGr-key for AEYUIO accented by `´¨ up to a choice of 3 keys (the "natural
1642             key" and its 2 neighbors) - which are quick to try all if you forgot the
1643             precise position. If you remember rules 1,1ab, then this choice is down to
1644             2 possible candidates.
1645            
1646             Essentially, all you must remember in details is that the "natural positions"
1647             form a B — \ on left, / on right, and in case of bad luck you
1648             should move in the direction of other diagonal one step. Then a letter is
1649             either in its "obvious position", or in one of 3 modifications of the
1650             “natural position”.
1651            
1652             Note that these rules cover I the Latin letters appearing in
1653             Latin-1..Latin-10, I we resolve the B<Œ/Ø>-conflict by putting B<Œ> to the key B (since
1654             B<Ø> may be entered using CB)!
1655            
1656             =head2 Motivations:
1657            
1658             It is important to have a logical way to quickly understand whether a letter
1659             is quickly accessible from a keyboard, and on which key. (Or, maybe, to find
1660             a small set of keys on which a letter may be present — then, if one forgets,
1661             it is possible to quickly un-forget by trying a small number of keys).
1662            
1663             In fact, the problem of choosing “the optimal” assignment (by minimizing the
1664             rules to remember) has almost unique solution. Understanding this solution
1665             (to a problem which is essentially combinatorial optimization) may be a great help
1666             in memorizing the rules.
1667            
1668             The idea: we assign alphabetical Latin characters only to alphabetical keys
1669             on the keyboard; this frees the way to use (paired) symbol keys to enter (paired)
1670             Unicode symbols. Now observe the diagonals on the alphabetic part of the
1671             keyboard: \-diagonals (like B) and /-diagonals (like B). Each diagonal
1672             contains 3 (or less) alphabetic keys; what we want is to assign ¨-accent to the top
1673             one, ´-accent to the middle one, and `-accent to the bottom one.
1674            
1675             On the left-hand part of the keyboard, use \-diagonals, on the right-hand
1676             part use /-diagonals; now each diagonal contains EXACTLY 3 alphabetic keys.
1677             Moreover, the diagonals which contain vowels B do not intersect!
1678            
1679             If we have not decided to have keys set in stone, this would be all - we
1680             would get "completely predictable" access to B<´¨`>-accented characters B.
1681             For example, B<Ÿ> would be accessible on CB, B<Ý> on CB, B<Ỳ> on CB.
1682             Unfortunately, the diagonals contain keys C set in stone. So we need
1683             a way to "move away" from these keys. The rule is very simple: we move
1684             one step away in the direction of "other" diagonal (/-diagonal on the left
1685             half, and \-diagonal on the right half) one step down (unless we start
1686             on keys B, B where "down" is impossible and we move up to B or B).
1687            
1688             Examples: B<Ä> is on B, B<Á> "wants to be" on B (used for C<Æ>), so it is moved to
1689             C; B<Ö> wants to be on B (already used for B<Ø> or B<Œ>), and is moved away to B;
1690             B<È> wants to be on B (occupied by B<Ç>), but is moved away to B.
1691            
1692             There is no way to enter B<Ï> using this layout (unless we agree to move it
1693             to the "8*" key, which may conflict with convenience of entering typographic
1694             quotation marks). Fortunately, this letter is rare (comparing even to B<Ë>
1695             which is quite frequent in Dutch). So there is no big deal that it is not
1696             available for "handy" input - remember that one can always use deadkeys.
1697            
1698             http://en.wikipedia.org/wiki/Letter_frequency#Relative_frequencies_of_letters_in_other_languages
1699            
1700             Note that the keys B

and B are not engaged by this layout; since B

1701             is a neighbor of B, it is natural to use it to resolve the conflict
1702             between B<Ø> or B<Œ> (which both want to be set in stone on B). This leaves
1703             only the key B unengaged; but what we do not cover are two keys B<Å> and B<Ñ>
1704             which are relatively frequent in Latin-derived European languages.
1705            
1706             Note that B<Ì> is moderately frequent in Italian, but B<Ñ> is much more frequent
1707             in Spanish. Since B<Ì> and B<Ñ> want to be on the same key (which on many keyboards is taken by
1708             B<Ñ>), it makes sense to prefer B<Ñ>… Likewise, B<Ê> is much more frequent
1709             than B<Ë>; switch them.
1710            
1711             This leaves only the key B unassigned, I a very rare B<Ỳ> on B. In
1712             L|http://k.ilyaz.org>, one puts B<Å> and B<Ì> there. This completes
1713             the explanation of the rule 2.
1714            
1715             =head2 On possibilities of merging 2 diacritics on one prefix key
1716            
1717             With many diacritics, and the limited mnemonically-viable positions on
1718             the keyboard, it makes sense to merge several diacritics on the same prefix key.
1719             Possible candidates are cedilla/ogonek/comma-below (on C),
1720             dot-above/ring-above/dot-below (on C), caron/breve, circumflex/inverted-breve (on C
1721             In some cases, only one of the diacretics would be applicable to a particular character.
1722             Otherwise, one must decide which of several choices to prefer. The notes below may be
1723             useful when designing such preferences. (This module can take most of such choices
1724             automatically due to knowledge of L
1725             of characters; this age correlates well with expected frequency of use.)
1726            
1727             Another trick discussed below is implementing a rare diacritic X by applying the diacretic Y to a character
1728             with pre-composed diacritic Z.
1729            
1730             U-caron: ǔ, Ǔ which is used to indicate u in the third tone of Chinese language pinyin.
1731             But U-breve ŭ/Ŭ is used in Latin encodings.
1732             Ǧ/ǧ (G with caron) is used, but only in "exotic" or old languages (has no
1733             combined form - while G-breve ğ/Ğ is in Latin encodings.
1734             A-breve Ă: A-caron Ǎ is not in Latin-N; apparently, is used only in pinyin,
1735             zarma, Hokkien, vietnamese, IPA, transliteration of Old Latin, Bible and Cyrillic's big yus.
1736            
1737             In EurKey: only a takes breve, the rest take caron (including G but not U)
1738            
1739             Merging ° and dot-accent ˙ in Latin-N: only A and U take °, and they
1740             do not take dot-accent. In EurKey: also small w,y take ring accent; same in
1741             Bepo - but they do not take dot accent in Latin-N.
1742            
1743             Double-´ and cornu (both on a,u only) can be taken by ¨ or ˙ on letters with
1744             ¨ precombined (in Unicode ¨ is not precombined with diaeresis or dots).
1745             But one must special-case Ë and Ï and Ø (have Ê and IJ instead; IJ takes no accents,
1746             but Ê takes acute, grave, tilde and dot below...)! Æ takes acute and macron; Ø takes acute.
1747            
1748             Actually, cornu=horn is only on o,u, so using dot/ring on ö and ü is very viable...
1749            
1750             So for using AltGr-letter after deadkeys: diaeresis can take dot above, hat and wedge, diaeresis.
1751             Likewise, ` and ´ are not precombined together (but there is a combined
1752             combining mark). So one can do something else on vowels (ogonek?).
1753            
1754             Applying ´ to `-accented forms: we do not have ỳ (on AltGr-keys), so must use "the natural position"
1755             which is mixed with Ñ (takes no accents) and Ç (takes acute!!!).
1756            
1757             s, t do not precombine with `; so can use for the "alternative cedilla".
1758            
1759             Only a/u/w/y take ring, and they do not take cedilla. Can merge.
1760            
1761             Bepo's hook above; ảɓƈɗẻểƒɠɦỉƙɱỏƥʠʂɚƭủʋⱳƴỷȥ ẢƁƇƊẺỂƑƓỈƘⱮỎƤƬỦƲⱲƳỶȤ
1762            
1763             perl -wlnae "next unless /HOOK/; push @F, shift @F; print qq(@F)" NamesList.txt | sort | less
1764            
1765             Of capital letters only T and Y take different kinds of hooks... (And for T both are in Latin-Extended-B...)
1766            
1767            
1768             =head1 Useful tidbits from Unicode mailing list
1769            
1770             =for html
1771            
1772            
1773             =head2 On keyboards
1774            
1775             On MS keyboard (absolutely wrong!)
1776            
1777             http://unicode.org/mail-arch/unicode-ml/y2012-m05/0268.html
1778            
1779             Symbols for Keyboard keys:
1780            
1781             http://unicode.org/mail-arch/unicode-ml/Archives-Old/UML009/0204.html
1782             “Menu key” variations:
1783             http://unicode.org/mail-arch/unicode-ml/Archives-Old/UML009/0239.html
1784             Role of ISO/IEC 9995, switchable keycaps
1785             http://unicode.org/mail-arch/unicode-ml/Archives-Old/UML009/0576.html
1786            
1787             On the other hand, having access to text only math symbols makes it possible to implement it in computer languages, making source code easier to read.
1788            
1789             Right now, I feel there is a lack of keyboard maps. You can develop them on your own, but that is very time consuming.
1790            
1791             http://unicode.org/mail-arch/unicode-ml/y2011-m04/0117.html
1792            
1793             Fallback in “smart keyboards” interacting with Text-Service unaware applications
1794            
1795             http://unicode.org/mail-arch/unicode-ml/y2014-m03/0165.html
1796            
1797             Keyboards - agreement (5 scripts at end)
1798            
1799             ftp://ftp.cen.eu/CEN/Sectors/List/ICT/CWAs/CWA-16108-2010-MEEK.pdf
1800            
1801             Need for a keyboard, keyman examples; why "standard" keyboards are doomed
1802            
1803             http://unicode.org/mail-arch/unicode-ml/y2010-m01/0015.html
1804             http://unicode.org/mail-arch/unicode-ml/y2010-m01/0022.html
1805             http://unicode.org/mail-arch/unicode-ml/y2010-m01/0036.html
1806             http://unicode.org/mail-arch/unicode-ml/y2010-m01/0053.html
1807            
1808             =head2 History of Unicode
1809            
1810             Unicode in 1889
1811            
1812             http://www.archive.org/stream/unicodeuniversa00unkngoog#page/n3/mode/2up
1813            
1814             Structure of development of Unicode
1815            
1816             http://unicode.org/mail-arch/unicode-ml/y2006-m07/0056.html
1817             http://unicode.org/mail-arch/unicode-ml/y2005-m07/0099.html
1818             I don't have a problem with Unicode. It is what it is; it cannot
1819             possibly be all things to all people:
1820             http://unicode.org/mail-arch/unicode-ml/y2005-m07/0101.html
1821            
1822             Control characters’ names
1823            
1824             http://unicode.org/mail-arch/unicode-ml/y2014-m03/0036.html
1825            
1826             Compromizes vs reality
1827            
1828             http://unicode.org/mail-arch/unicode-ml/y2010-m02/0106.html
1829             http://unicode.org/mail-arch/unicode-ml/y2010-m02/0117.html
1830            
1831             Stability of normalization
1832            
1833             http://unicode.org/mail-arch/unicode-ml/y2005-m07/0055.html
1834            
1835             Universality vs affordability
1836            
1837             http://unicode.org/mail-arch/unicode-ml/y2007-m07/0157.html
1838            
1839             Drachma
1840            
1841             http://unicode.org/mail-arch/unicode-ml/y2012-m05/0167.html
1842             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3866.pdf
1843            
1844             w-ring is a stowaway
1845            
1846             http://unicode.org/mail-arch/unicode-ml/y2012-m04/0043.html
1847            
1848             History of squared pH (and about what fits into ideographic square)
1849            
1850             http://unicode.org/mail-arch/unicode-ml/y2012-m02/0123.html
1851             http://unicode.org/mail-arch/unicode-ml/y2013-m09/0111.html
1852            
1853             Silly quotation marks: 201b, 201f
1854            
1855             http://en.wikipedia.org/wiki/Quotation_mark_glyphs
1856             http://unicode.org/mail-arch/unicode-ml/y2006-m06/0300.html
1857             http://unicode.org/mail-arch/unicode-ml/y2006-m06/0317.html
1858             http://en.wikipedia.org/wiki/Comma
1859             http://en.wikipedia.org/wiki/%CA%BBOkina
1860             http://en.wikipedia.org/wiki/Saltillo_%28linguistics%29
1861             http://unicode.org/mail-arch/unicode-ml/y2006-m06/0367.html
1862             http://unicode.org/unicode/reports/tr8/
1863             under "4.6 Apostrophe Semantics Errata"
1864            
1865             OHM: In modern usage, for new documents, this character should not be used
1866            
1867             http://unicode.org/mail-arch/unicode-ml/y2011-m08/0060.html
1868            
1869             Uppercase eszett ß ẞ
1870            
1871             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0007.html
1872             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0008.html
1873             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0142.html
1874             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0045.html
1875             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0147.html
1876             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0170.html
1877             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0196.html
1878            
1879             Should not use (roman numerals)
1880            
1881             http://unicode.org/mail-arch/unicode-ml/y2007-m11/0253.html
1882            
1883             Colors in Unicode names
1884            
1885             http://unicode.org/mail-arch/unicode-ml/y2011-m03/0100.html
1886            
1887             Xerox and interrobang
1888            
1889             http://unicode.org/mail-arch/unicode-ml/y2005-m04/0035.html
1890            
1891             Tibetian (history of encoding, relative difficulty of handling comparing to cousins)
1892            
1893             http://unicode.org/mail-arch/unicode-ml/y2013-m04/0036.html
1894             http://unicode.org/mail-arch/unicode-ml/y2013-m04/0040.html
1895            
1896             Translation of 8859 to 10646 for Latvian was MECHANICAL
1897            
1898             http://unicode.org/mail-arch/unicode-ml/y2013-m06/0057.html
1899            
1900             Hyphens:
1901            
1902             http://unicode.org/mail-arch/unicode-ml/y2009-m10/0038.html
1903            
1904             NOT and BROKEN BAR
1905            
1906             http://unicode.org/mail-arch/unicode-ml/y2007-m12/0207.html
1907             http://www.cs.tut.fi/~jkorpela/latin1/ascii-hist.html#5C
1908            
1909             Combining power of generative features - implementor's view
1910            
1911             http://unicode.org/mail-arch/unicode-ml/y2004-m09/0145.html
1912            
1913             =head2 Greek and about
1914            
1915             OXIA vs TONOS
1916            
1917             http://www.tlg.uci.edu/~opoudjis/unicode/unicode_gkbkgd.html#oxia
1918            
1919             Greek letters for non-Greek
1920            
1921             http://stephanus.tlg.uci.edu/~opoudjis/unicode/unicode_interloping.html#ipa
1922            
1923             Macron and breve in Greek dictionaries
1924            
1925             http://www.unicode.org/mail-arch/unicode-ml/y2013-m08/0011.html
1926            
1927             LAMBDA vs LAMDA
1928            
1929             http://unicode.org/mail-arch/unicode-ml/y2010-m06/0063.html
1930            
1931             COMBINING GREEK YPOGEGRAMMENI equilibristic (depends on a vowel?)
1932            
1933             http://unicode.org/mail-arch/unicode-ml/y2006-m06/0299.html
1934             http://unicode.org/mail-arch/unicode-ml/y2006-m06/0308.html
1935             http://www.tlg.uci.edu/~opoudjis/unicode/unicode_adscript.html
1936             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0046.html
1937            
1938             =head2 Latin, Cyrillic, Hebrew, etc
1939            
1940             Book Spine reading direction
1941            
1942             http://www.artlebedev.com/mandership/122/
1943            
1944             What is a "Latin" char
1945            
1946             http://unicode.org/forum/viewtopic.php?f=23&t=102
1947            
1948             Federal vs regional aspects of Latinization (a lot of flak; cp1251)
1949            
1950             http://peoples.org.ru/stenogramma.html
1951            
1952             Yiddish digraphs
1953            
1954             http://unicode.org/mail-arch/unicode-ml/y2011-m10/0121.html
1955            
1956             Cyrillic Script, Unicode status (+combining)
1957            
1958             http://scriptsource.org/cms/scripts/page.php?item_id=entry_detail&uid=ngc339csy8
1959             http://scriptsource.org/cms/scripts/page.php?item_id=entry_detail&uid=ktxptbccph
1960            
1961             The IBM 1401 Hebrew Letter Key
1962            
1963             http://www.qsm.co.il/Hebrew/HebKey.htm
1964            
1965             GOST 10859
1966            
1967             http://unicode.org/mail-arch/unicode-ml/y2009-m09/0082.html
1968             http://www.mailcom.com/besm6/ACPU-128.jpg
1969            
1970             Hebrew char input
1971            
1972             http://rishida.net/scripts/pickers/hebrew/
1973             http://rishida.net/scripts/uniview/#title
1974            
1975             Cyrillic soup
1976            
1977             http://czyborra.com/charsets/cyrillic.html
1978            
1979             How to encode Latin-in-fraktur
1980            
1981             http://unicode.org/mail-arch/unicode-ml/y2007-m01/0279.html
1982             http://unicode.org/mail-arch/unicode-ml/y2007-m01/0263.html
1983            
1984             The presentation of the existing COMBINING CEDILLA which has three major forms [ȘșȚț and Latvian Ģģ]
1985            
1986             http://unicode.org/mail-arch/unicode-ml/y2013-m06/0045.html
1987             http://unicode.org/mail-arch/unicode-ml/y2013-m06/0066.html
1988            
1989             =head2 Math and technical texts
1990            
1991             Missing: .... skew-orthogonal complement
1992            
1993             Math Almost-Text encoding
1994            
1995             http://unicode.org/notes/tn28/UTN28-PlainTextMath-v3.pdf
1996             http://unicode.org/mail-arch/unicode-ml/y2011-m10/0018.html
1997             For me 1/2/3/4 means unambiguously ((1/2)/3)/4, i.e. 1/(2*3*4)
1998            
1999             Unicode mostly encodes characters that are in use or have been
2000             encoded in other standards. While not semantically agnostic, it is
2001             much less oriented towards semantic clarifications and
2002             distinctions than many people might hope for (and this includes
2003             me, some of the time at least).
2004            
2005             Horizontal/vertical line/arrow extensions
2006            
2007             http://unicode.org/charts/PDF/U2300.pdf
2008             http://unicode.org/mail-arch/unicode-ml/y2003-m07/0513.html
2009             http://std.dkuug.dk/JTC1/SC2/WG2/docs/n2508.htm
2010            
2011             Pretty-printing text math
2012            
2013             http://code.google.com/p/sympy/wiki/PrettyPrinting
2014            
2015             Sub/Super on a terminal
2016            
2017             http://unicode.org/mail-arch/unicode-ml/y2008-m07/0028.html
2018            
2019             CR symbols
2020            
2021             http://unicode.org/mail-arch/unicode-ml/y2006-m07/0163.html
2022            
2023             Math layout
2024            
2025             http://unicode.org/mail-arch/unicode-ml/y2007-m01/0303.html
2026            
2027             Attempts of classification
2028            
2029             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n4384.pdf
2030             http://std.dkuug.dk/JTC1/SC2/WG2/
2031            
2032             Buttons Target Also=not-in-series-of-n4384
2033             square 1🞌 2⬝ 3🞍 4▪ 5◾ 6◼ 7■ s⬛ (solid=s⬛)
2034             box 1□ 2🞎 3🞏 4🞐 5🞑 6🞒 7🞓 o⬜ 1🞔 2▣ 3🞕 🞖 =white square (open=o⬜) also: ▫◽◻⌑⧈⬚⸋⊡
2035             black circle 1⋅ 2∙ 3🞄 4⦁ 5⦁ 6⚫ 7● also: ·
2036             ring 1○ 2⭘ 3🞆 4🞆 5🞇 6🞈 7🞉 1⊙ 2🞊 3⦿ 🞋 =white circle also: ⊚⌾◌⚪⚬⨀◦⦾
2037             black diamond 1🞗 2🞘 3⬩ 4🞙 5⬥ 6◆
2038             white diamond ◇ 1🞚 2◈ 3🞛 🞜 also: ⋄
2039             black lozenge 1🞝 2🞞 3⬪ 4🞟 5⬧ 6⧫
2040             white lozenge ◊ 🞠
2041             cross 1🞡 2🞢 3🞣 4🞤 5🞥 6🞦 7🞧
2042             saltire 1🞨 2🞩 3🞪 4🞫 5🞬 6🞭 7🞮 ≈ times (rotated cross)
2043             5-asterisk 1🞯 2🞰 3🞱 4🞲 5🞳 6🞴
2044             6-asterisk 1🞵 2🞶 3🞷 4🞸 5🞹 6🞺
2045             8-asterisk 1🞻 2🞼 3🞽 4🞾 5🞿
2046             centered n-gon 3⯅ 4⯀ 5⬟ 6⬣ 8⯃
2047             cent on-corner 3⯆ 4⯁ 5⯂ 6⬢ 8⯄ (also ⯇ ⯈)
2048             light star 3🟀 4🟄 5🟉 6✶ 8🟎 12🟒
2049             medium star 3🟁 4🟅 5★ 6🟋 8🟏 12🟓
2050             (heavy) star 3🟂 4🟆 5🟊 6🟌 8🟐 12✹
2051             pinwheel 3🟃 4🟇 5✯ 6🟍 8🟑 12🟔 lighter: ✵
2052            
2053             =head2 Unicode and linguists
2054            
2055             Linguists mailing lists
2056            
2057             http://unicode.org/mail-arch/unicode-ml/y2009-m06/0066.html
2058            
2059             Obsolete IPA
2060            
2061             http://unicode.org/mail-arch/unicode-ml/y2009-m01/0487.html
2062             http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3Asubhead%3D%2F%28%3Fi%29archaic%2F%3A]+&g=
2063            
2064             Teutonista (vowel guide p11, kbd p13)
2065            
2066             http://www.sprachatlas.phil.uni-erlangen.de/materialien/Teuthonista_Handbuch.pdf
2067            
2068             Glottals
2069            
2070             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0151.html
2071             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0163.html
2072             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0202.html
2073             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0205.html
2074            
2075             =head2 Spaces, invisible characters, VS
2076            
2077             Substitute blank
2078            
2079             http://unicode.org/mail-arch/unicode-ml/y2011-m07/0101.html
2080            
2081             Representing invisible characters
2082            
2083             http://unicode.org/mail-arch/unicode-ml/y2011-m07/0094.html
2084            
2085             Ignorable glyphs
2086            
2087             http://unicode.org/mail-arch/unicode-ml/y2007-m08/0132.html
2088             http://unicode.org/mail-arch/unicode-ml/y2007-m08/0138.html
2089             http://unicode.org/mail-arch/unicode-ml/y2007-m08/0120.html
2090            
2091             HOWTO: (non)dummy VS in fonts
2092            
2093             http://unicode.org/mail-arch/unicode-ml/y2007-m08/0118.html
2094            
2095             ZWSP ZWNJ WJ SHY NON-BREAKING HYPHEN
2096            
2097             http://unicode.org/mail-arch/unicode-ml/y2007-m08/0123.html
2098             http://unicode.org/mail-arch/unicode-ml/y2007-m07/0188.html
2099             http://unicode.org/mail-arch/unicode-ml/y2007-m07/0199.html
2100             http://unicode.org/mail-arch/unicode-ml/y2007-m07/0201.html
2101             http://unicode.org/mail-arch/unicode-ml/y2007-m06/0122.html
2102             http://unicode.org/mail-arch/unicode-ml/y2007-m01/0297.html
2103            
2104             On which base to draw a "standalone" diacritics
2105            
2106             http://unicode.org/mail-arch/unicode-ml/y2007-m07/0075.html
2107            
2108             Variation sequences
2109            
2110             http://unicode.org/mail-arch/unicode-ml/y2004-m07/0246.html
2111            
2112             =head2 Typesetting
2113            
2114             Upside-down text in CSS (remove position?)
2115            
2116             http://unicode.org/mail-arch/unicode-ml/y2012-m01/0037.html
2117            
2118             Unicode to PostScript
2119            
2120             http://unicode.org/mail-arch/unicode-ml/y2009-m06/0056.html
2121             http://www.linuxfromscratch.org/blfs/view/svn/pst/enscript.html
2122             http://unicode.org/mail-arch/unicode-ml/y2009-m06/0062.html
2123            
2124             Spacing: English and French
2125            
2126             http://unicode.org/mail-arch/unicode-ml/y2006-m09/0167.html
2127             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0103.html
2128             http://unicode.org/mail-arch/unicode-ml/y2007-m08/0138.html
2129            
2130             Chicago Manual of Style
2131            
2132             http://unicode.org/mail-arch/unicode-ml/y2006-m01/0127.html
2133            
2134             Coloring parts of ligatures
2135             Implemenations:
2136            
2137             http://unicode.org/mail-arch/unicode-ml/y2005-m06/0195.html
2138             http://unicode.org/mail-arch/unicode-ml/y2005-m06/0233.html
2139             http://unicode.org/mail-arch/unicode-ml/y2005-m06/0208.html
2140             GPOS
2141             http://unicode.org/mail-arch/unicode-ml/y2005-m06/0167.html
2142            
2143             Chinese typesetting
2144            
2145             http://idsgn.org/posts/the-end-of-movable-type-in-china/
2146            
2147             @fonts and non-URL URIs
2148            
2149             http://unicode.org/mail-arch/unicode-ml/y2010-m01/0156.html
2150            
2151             =head2 Looking at the future
2152            
2153             Why and how to introduce innovative characters
2154            
2155             http://unicode.org/mail-arch/unicode-ml/y2012-m01/0045.html
2156            
2157             Unicode knows the concept of a provisional property
2158            
2159             http://unicode.org/mail-arch/unicode-ml/y2011-m11/0142.html
2160             http://unicode.org/reports/tr23/
2161             http://unicode.org/mail-arch/unicode-ml/y2011-m11/0161.html
2162             If you want to make analogies, however, the ISO ballots constitute
2163             the *provisional* publication for character code points and names.
2164             that needs to be available from day one for a character to be
2165             implementable at all (such as decomp mappings, bidi class,
2166             code point, name, etc.).
2167            
2168             ZERO-WIDTH UNDEFINED DECOMPOSITION MARK
2169             - to define decomposition, prepend it
2170            
2171             Exciting new letter forms for English
2172            
2173             http://www.theonion.com/articles/alphabet-updated-with-15-exciting-new-replacement,2869/
2174            
2175             Proposing new stuff, finding new stuff proposed
2176            
2177             http://unicode.org/mail-arch/unicode-ml/y2008-m01/0238.html
2178             http://www.unicode.org/mail-arch/unicode-ml/y2013-m09/0056.html
2179            
2180             A useful set of criteria for encoding symbols is found in
2181             Annex H of this document:
2182            
2183             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3002.pdf
2184            
2185             =head2 Unsorted
2186            
2187             Summary views into CLDR
2188            
2189             http://www.unicode.org/cldr/charts//by_type/patterns.characters.html
2190             http://www.unicode.org/cldr/charts//by_type/misc.exemplarCharacters.html
2191            
2192             Pound
2193            
2194             http://unicode.org/mail-arch/unicode-ml/y2012-m05/0242.html
2195            
2196             Classification of Dings (bats etc)
2197            
2198             std.dkuug.dk/jtc1/sc2/wg2/docs/n4115.pdf
2199            
2200             Escape: 2be9 2b9b
2201             ARROW SHAFT - various
2202            
2203             Locales
2204            
2205             http://blog.kyero.com/2011/11/14/what-is-the-common-locale-data-repository/
2206             http://blog.kyero.com/2010/12/02/lost-in-translation-locales-not-languages/
2207             http://unicode.org/mail-arch/unicode-ml/y2006-m06/0203.html
2208            
2209             General
2210            
2211             http://ebixio.com/online_docs/UnicodeDemystified.pdf
2212            
2213             Diacritics in fonts
2214            
2215             http://unicode.org/mail-arch/unicode-ml/y2011-m05/0047.html
2216             http://www.user.uni-hannover.de/nhtcapri/combining-marks.html#greek
2217            
2218             Licences (GPL etc) in TV sets
2219            
2220             http://unicode.org/mail-arch/unicode-ml/y2009-m12/0092.html
2221            
2222             Similar glyphs:
2223            
2224             http://unicode.org/reports/tr39/data/confusables.txt
2225            
2226             GeoLocation by IP
2227            
2228             http://unicode.org/mail-arch/unicode-ml/y2009-m04/0197.html
2229            
2230             Per language character repertoir:
2231            
2232             http://unicode.org/mail-arch/unicode-ml/y2009-m04/0253.html
2233             http://unicode.org/mail-arch/unicode-ml/y2009-m04/0255.html
2234            
2235             Dates/numbers in Unicode
2236            
2237             http://unicode.org/mail-arch/unicode-ml/y2010-m02/0122.html
2238            
2239             Normalization FAQ
2240            
2241             http://www.macchiato.com/unicode/nfc-faq
2242            
2243             Apostrophe
2244            
2245             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0060.html
2246             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0063.html
2247             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0066.html
2248             http://unicode.org/mail-arch/unicode-ml/y2007-m07/0251.html
2249             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0309.html
2250            
2251             Apostroph as soft sign
2252            
2253             http://unicode.org/mail-arch/unicode-ml/y2010-m08/0123.html
2254            
2255             Questionner at start of Unicode proposal
2256            
2257             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0087.html
2258            
2259             Rubi
2260            
2261             http://en.wikipedia.org/wiki/Ruby_character#Unicode
2262            
2263             Tamil/ISCII
2264            
2265             http://unicode.org/faq/indic.html
2266             http://unicode.org/versions/Unicode6.1.0/ch09.pdf
2267             http://www.brainsphere.co.in/keyboard/tm.pdf
2268            
2269             CGI and OpenType
2270            
2271             http://unicode.org/mail-arch/unicode-ml/y2008-m02/0097.html
2272            
2273             Numbers in scripts ;-)
2274            
2275             http://unicode.org/mail-arch/unicode-ml/y2008-m02/0120.html
2276            
2277             Indicating coverage of the font
2278            
2279             http://unicode.org/mail-arch/unicode-ml/y2008-m02/0152.html
2280             http://unicode.org/mail-arch/unicode-ml/y2008-m02/0167.html
2281            
2282             Accessing ligatures
2283            
2284             http://unicode.org/mail-arch/unicode-ml/y2007-m11/0210.html
2285            
2286             Folding characters
2287            
2288             http://unicode.org/reports/tr30/tr30-4.html
2289            
2290             Writing systems vs written languages
2291            
2292             http://unicode.org/mail-arch/unicode-ml/y2005-m07/0198.html
2293             http://unicode.org/mail-arch/unicode-ml/y2005-m07/0241.html
2294            
2295             MS Visual OpenType tables
2296            
2297             http://www.microsoft.com/typography/VOLT.mspx
2298             http://www.microsoft.com/typography
2299            
2300             "Same" character Oacute used for different "functions" in the same text
2301            
2302             http://unicode.org/mail-arch/unicode-ml/y2004-m08/0019.html
2303             etc:
2304             http://unicode.org/mail-arch/unicode-ml/y2004-m07/0227.html
2305            
2306             Diacritics
2307            
2308             http://www.sil.org/~gaultney/ProbsOfDiacDesignLowRes.pdf
2309             http://en.wikipedia.org/wiki/Sylfaen_%28typeface%29
2310             http://tiro.com/Articles/sylfaen_article.pdf
2311            
2312             Sign writing
2313            
2314             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n4342.pdf
2315            
2316             Writing digits in non-decimal
2317            
2318             http://unicode.org/mail-arch/unicode-ml/y2011-m03/0050.html
2319             Which separator is less ambiguous? Breve ˘ ? ␣ ? Inverted ␣ ?
2320            
2321             Use to identify a letter:
2322            
2323             http://unicode.org/charts/collation/
2324            
2325             Perl has problems with unpaired surrogates (whole thread)
2326            
2327             http://unicode.org/mail-arch/unicode-ml/y2010-m11/0034.html
2328            
2329             Complex fonts (e.g., Indic)
2330            
2331             http://unicode.org/mail-arch/unicode-ml/y2010-m10/0049.html
2332            
2333             Complex glyphs in Symbola (pre-6.01) font may crash older versions of Windows
2334            
2335             http://unicode.org/mail-arch/unicode-ml/y2010-m10/0082.html
2336             http://unicode.org/mail-arch/unicode-ml/y2010-m10/0084.html
2337            
2338             Window 7 SP1 improvements
2339            
2340             http://babelstone.blogspot.de/2010/05/prototyping-tangut-imes-or-why-windows.html
2341            
2342             Middle dot is ambiguous
2343            
2344             http://unicode.org/mail-arch/unicode-ml/y2010-m09/0023.html
2345             http://unicode.org/mail-arch/unicode-ml/y2013-m03/0151.html
2346            
2347             Superscript == modifiers
2348            
2349             http://unicode.org/mail-arch/unicode-ml/y2010-m03/0133.html
2350            
2351             Translation of Unicode names
2352            
2353             http://unicode.org/mail-arch/unicode-ml/y2012-m12/0066.html
2354             http://unicode.org/mail-arch/unicode-ml/y2012-m12/0076.html
2355            
2356             Transliteration on passports (see p.IV-48), UniDEcode
2357            
2358             http://www.icao.int/publications/Documents/9303_p1_v1_cons_en.pdf
2359             http://unicode.org/mail-arch/unicode-ml/y2013-m11/0025.html
2360            
2361             =head1 Keyboard input on Windows: interaction of applications and the kernel
2362            
2363             =head2 Keyboard input on Windows, Part I: what is the kernel doing?
2364            
2365             This is not documented. We try to provide a description which is
2366             both as simple as possible, and as complete as possible. (We ignore
2367             many important parts: the handling of hot keys [or C]), IME,
2368             handling of focus switch [C etc], the syncronization of keystate
2369             between different queues, waking up the system, the keyboard filters,
2370             widening of virtual keycodes, and LED lights.)
2371            
2372             We omit Step 0, when the hardware keyboard drivers (PS/2 or USB) deliver keydown/up(/repeat???) event for scan
2373             codes of corresponding keys. (This is a complicated topic, but well-documented.)
2374            
2375             =over
2376            
2377             =item 1
2378            
2379             The scan codes are massaged (see “Low level scancode mapping” in L<"SEE ALSO">).
2380            
2381             =item 2
2382            
2383             The keyboard layout tables map the translated scancode to a virtual keycode.
2384             (This may also depend on the “modification column”; see L<"Far Eastern keyboards on Windows">.)
2385             The “internal” key state table is updated.
2386            
2387             =item 3
2388            
2389             Mythology: the modification keys (C, C, C etc) are taken into account.
2390            
2391             What actually happens: any key may act as a modification key. The keyboard layout tables
2392             map keycodes to 8-bit masks. (The customary names for lower bits of the mask are C,
2393             C, C, C; two more bits are named C and C — after
2394             OYAYUBI 親指, meaning THUMB; two more
2395             bits are unnamed.) The keycodes of the currently pressed keys (from the “internal” table) are translated to masks, and
2396             these masks are ORed together. (For the purpose of translation to C/etc [done
2397             in ToUnicode()/ToUnicodeEx()], the bit C may be set
2398             also when key C was pressed odd number of times; this is
2399             controlled by C flag in a virtual key descriptor [of the key being currently processed]
2400             of the keyboard layout tables.)
2401            
2402             The keyboard layout tables translate the ORed mask to a number called “modification column”.
2403             (Thess two numbers are completely hidden from applications. The only glint the
2404             applications get is in the [useless, since there is no way to map it to anything “real”] result of
2405             L.])
2406            
2407             =item 4
2408            
2409             Depending on the current “modification column”, the virtual keycode of the current key event
2410             may be massaged further. (See L<"Far Eastern keyboards on Windows">.) Numpad keycodes
2411             depend also on the state of C — provided the keyboard layout table marks them with
2412             C flag. A few other scancodes may also produce different virtual keycodes in
2413             different situations (e.g., C).
2414            
2415             When C flag is present, fake presses/releases of left C are generated
2416             on presses(repeats)/releases of right C (exception: the press is not generated if any
2417             Ctrl key is down; likewise for when left C up when right C is released). With
2418             keypad presses/releases in presence of C and C, fake releases/presses of C
2419             are generated.
2420            
2421             =item 5
2422            
2423             If needed, asyncroneous key state for the current key's non-left-non-right flavor is updated.
2424             (The rest is dropped if the key is consumed by a C hook.)
2425            
2426             Asyncroneous key state for the current key is updated. Numpad-by-number flags are updated.
2427             (The rest is dropped if the key is a hotkey.)
2428            
2429             The message C is posted to the application. If C [usually
2430             called the C key] is
2431             down, but C is not, the event is of C flavor (this info is duplicated in
2432             lParam. Additionally, for C tapping, the UP event is also made C — although
2433             at this moment C is not down!).
2434             (The C flag [of the scancode] is also delivered to the application.)
2435            
2436             (When a C message is posted, the key state is updated. This key state
2437             may be used by TranslateMessage() as an argument to ToUnicode(), and is returned by GetKeyState() etc.)
2438            
2439             B
2440             with TranslateMessage()/DispatchMessage() or uses some equivalent code.>
2441            
2442             =item 6
2443            
2444             Before the application dispatches C to the message handler,
2445             TranslateMessage() calls L with C (unless a popup menu
2446             is active; then C — which disables character-by-number input via
2447             numeric KeyPad) and the buffer of 16 UTF-16 code units.
2448            
2449             =item 7
2450            
2451             The UTF-16 code units obtained from ToUnicode() are posted via PostMessage(). All the code units but
2452             the last one are marked by C flag in C. If the initial message
2453             was C, the C flavor is posted; if ToUnicode() returns a
2454             deadkey, the C flavor is posted.
2455            
2456             (The bit C is set/used only for the console handler.)
2457            
2458             =back
2459            
2460             =head2 Keyboard input on Windows, Part II: The semantic of ToUnicode()
2461            
2462             L,
2463             the semantic is not. Here we fix this.
2464            
2465             =over 4
2466            
2467             =item 1
2468            
2469             If the bit 0x01 in C is not set, the key event is checked for contributing to
2470             character-by-number input via numeric KeyPad (and numpad-by-number flags are updated).
2471             If so, the character is
2472             delivered only when C is released. (This the only case when KEYUP
2473             delivers a character.) Unless the bit 0x02 in C is set, the KEYUP
2474             events are not processed any more.
2475            
2476             =item 2
2477            
2478             The flag C is acted upon, and C is processed.
2479            
2480             =item 3
2481            
2482             The keys which are currently down are mapped to the ORed bitmap (see above).
2483            
2484             =item 4
2485            
2486             If the key event does not contribute to input-by-number via numeric keypad,
2487             and C is set, and no other bits except C, C are set:
2488             then the bit C is removed from the ORed mask.
2489            
2490             =item 5
2491            
2492             If C is active, C state is flipped in the following cases: either at most
2493             C is set in the bitmap, and C is set in the descriptor,
2494             or both C and C are set in the bitmap, and C is set in the
2495             descriptor.
2496            
2497             Now the ORed bitmap is converted to the modification column (see above).
2498            
2499             =item 6
2500            
2501             The key descriptor for the current virtual keycode is consulted (the “row” of the table).
2502             If C flag is on, C is active, and no other bits but C are set in the bitmap,
2503             the row is replaced by the next row.
2504            
2505             =item 7
2506            
2507             The entry at
2508             the row/column is extracted; if defined, it is either a string (zero or more UTF-16 code units), or a
2509             dead key ID (one UTF-16 unit). (I: the ID is taken from the next row of the table.)
2510            
2511             (If the ORed mask corresponds to a valid modification column, but the row does not
2512             define the behaviour at this column, and the bit C is set, and no other bits but C, C
2513             are set, then an autogenerated character in the range 0x00..0x1f is emitted for virtual keycodes
2514             'A'..'Z' and widened virtual keycodes 0xFF61..0xFF91 [for latter, based on the low bits of translation-to-scancode]).
2515            
2516             =item 8
2517            
2518             The resulting units are fed to the finite automaton. When the automaton is in
2519             0-state, a fed character unit is passed through, and a fed deadkey ID sets the state
2520             of the automaton to this number. In non-0 state, the IDs behave the
2521             same as numerically equal character units; the behaviour is described by the keyboard layout
2522             tables. The automaton changes the state according to the input; it may also emit a character
2523             (= 1 code unit; then it is always reset to 0 state). When “unrecognized input” arrives, the automaton
2524             emits the ID I the input, and resets to 0 state.
2525            
2526             (On KEYUP event, the changes to the state of the finite-automaton are ignored. This is only
2527             relevant if C has bit 0x02 set.)
2528            
2529             =item 9
2530            
2531             After UTF-16 units are passed through the automaton, its output is returned by ToUnicode().
2532             If the automaton is in non-0 state, the state ID becomes the output.
2533            
2534             =back
2535            
2536             B MSKLC restricts the length of the string associated to the row/column cell to
2537             be at most 4 UTF-16 code units. There are 2 restrictions for keyboard layouts created with other tools:
2538             first, the maximal number of UTF-16 codepoints in all these strings is stored in a byte, hence there
2539             may be at most 255 UTF-16 codepoints. Second, the actual slot C where the string is allocated
2540             contains two shorts, then the UTF-16 data; its length is also stored in a byte. This results in
2541             the maximal string length of 125 code units — if it is stored in one slot.
2542            
2543             However, with creative allocations, one can use more than one slot for a string storage
2544             (theoretically, one may imagine specially crafted layout where this would break the
2545             layout; on practice, such situations should not arise — even if one stores long strings in
2546             I slots good for 4-chars strings.
2547            
2548             B If the application uses the stardard message pump
2549             with TranslateMessage()/DispatchMessage(), the caller of ToUnicode() is TranslateMessage().
2550             In this case, ToUnicode() is called with an output buffer consisting of 16 UTF-16 code units. For
2551             such applications, the strings associated to keypresses are truncated after 16 code units.
2552            
2553             B If the string is “long” (i.e., defined via LIGATURES), when it is fed through the
2554             finite automaton, the transitions to non-0 state do not generate deadkey IDs in the output
2555             string. (The LIGATURES may contain strings of one code unit! This may lead to non-obvious
2556             behaviour! If pressing such a key after a deadkey generates a chained deadkey, this
2557             would happen without delivering C message.)
2558            
2559             B How kernel recognizes which key sequences contribute to
2560             character-by-number input via numeric KeyPad? First, the starter keydown must happen
2561             when the ORed mask contains C, and no other bits except C
2562             and C. (E.g., one can press C, then tap C, release C
2563             [with 1,2,3 on the numeric keypad].
2564             This would deliver C, then C<1> would start character-by-number input
2565             provided C and C together have ORed mask “in between” of C
2566             and C.)
2567            
2568             After the starter keydown (NumPad: 0..9, DOT, PLUS) is recognized as such, all the keydowns
2569             should be followed by the corresponding keyup (keydowns-due-to-repeat are ignored);
2570             more precisely, between two KEYDOWN events, the KEYUP for the first of them must be present.
2571             (In other words, KEYDOWN/KEYUP events must come in the expected order, maybe with some intermixed “extra” KEYUP events.)
2572             In the decimal mode (numeric starter) only the keys with scancodes of NumPad 0..9 are allowed.
2573             In the hex mode (starter is NumPad's DOT or PLUS) also the keys with virtual codes
2574             '0'..'9' and 'A'..'F' are allowed. The sequence is terminated by releasing C
2575             (=C) key.
2576            
2577             B In most cases, the resulting number is reduced mod 256. The exceptions are: the starter key is C,
2578             or the translate-to codepage is multibyte (then a number above 255 is interpreted as big-endian combination
2579             of bytes). In multibyte codepages, numbers 0x80..0xFF
2580             are considered in C codepage (unless the translate-to codepage is Japanese, and the number’s codepoint is Katakana).
2581            
2582             B If the starter key is C or C, the number is a codepoint in the default codepage of the keyboard layout;
2583             if it is another digit, it is in the OEM codepage.
2584             Enabling hex modes (C or C) requires extra tinkering; see L<"Hex input of unicode is not enabled">.
2585            
2586             B since keyboard layout normally map C to the mask C, and do not define
2587             a modification column for the ORed mask C<=KBDALT>, and C is B stripped for
2588             key events in input-by-number, these key events usually do not generate spurious Cs.
2589            
2590             B if the bit 0x01 of C is intended to be set, then there is a way to query
2591             the kernel “what would happen if a particular key with a particular combination of modifiers
2592             were pressed now”. (Recall that a “usual” ToUnicode() call is “destructive”: it modifies the
2593             I of the keyboard stored in the kernel. The information about whether one is in the
2594             middle of entering-by-number and/or whether one is in a middle of a deadkey sequence is
2595             erased or modified by such calls.) In general, there is no way preserve the state of
2596             entering-by-number; however, in presence of bit 0x01, this is of no concern, so a solution
2597             exists.
2598            
2599             Using C, and setting the high bit of C gives the same result as
2600             ToUnicode() with C and no high bit in C. Moreover, this preserves the state of
2601             the deadkey-finite-automaton. This way, one gets a “I” flavor of ToUnicode().
2602            
2603             =head2 Keyboard input on Windows, Part III: Customary “special” keybindings of typical keyboards
2604            
2605             Typically, keyboards define a few keypresses which deliver “control” characters
2606             (for benefits of console applications). As shown above, even if the keyboard does not
2607             define C combinations (but does define modification column for C
2608             which is associated to C — with maybe C, C intermixed), C
2609             with C<^letter> I be delivered to the application. Same with happen for combinations
2610             with modifiers which produce only C, C, C.
2611            
2612             Additionally, the typical keyboards also define the following bindings:
2613            
2614             Ctrl-Space ——→ 0x20
2615             Esc, Ctrl-[ ——→ 0x1b
2616             Ctrl-] ——→ 0x1d
2617             Ctrl-\ ——→ 0x1c
2618             BackSpace ——→ ^H
2619             Ctrl-BackSpace ——→ 0x7f
2620             Ctrl-Break ——→ ^C
2621             Tab ——→ ^I
2622             Enter ——→ ^M
2623             Ctrl-Enter ——→ ^J
2624            
2625             In addition to this, the standard US keyboard (and keyboards built by this Perl module) define
2626             the following bindings with C modifiers:
2627            
2628             @ ——→ 0x00
2629             ^ ——→ 0x1e
2630             _ ——→ 0x1f
2631            
2632             =head2 Can an application on Windows accept keyboard events? Part I: insert only
2633            
2634             The logic described above makes the kernel deliver more or less “correct” C messages
2635             to the application. The only bindings which may be defined in the keyboard layout, but will not be
2636             seen as C are those in modification columns which involve C, and do not
2637             involve any bits except C and C. (Due to the stripping of C described
2638             above, these modification columns are never accessed — I.)
2639            
2640             Try to design an application with an entry field; the application should insert B the
2641             characters ”delivered for insertion” by the keyboard layout and the kernel. The application
2642             should not do anything else for all the other keyboard events. First, ignore
2643             the C stripping.
2644            
2645             Then the only C which are NOT supposed to insert the contents to the editable UI fields are the
2646             L described above. They are easy to recognize and ignore: just
2647             ignore all the C carrying characters in the range C<0x00..0x1f>, C<0x7f>, and ignore C<0x20>
2648             delivered when one of C keys is down. So the application which inserts all the I
2649             Cs will follow I of the keyboard as close as possible.
2650            
2651             Now return to consideration of C stripping. If the application follows the policy above,
2652             pressing C would enter C — provided C is mapped to C, as done
2653             on standard keyboards. So the application should recognize which C carrying C
2654             are actually due to stripping of C, and should not insert the delivered characters.
2655            
2656             Here comes the major flaw of the Windows’ keyboard subsystem: the kernel translates
2657             SCANCODE —→ VK_CODE —→ ORED_MASK —→ MODIFICATION_COLUMN, then operates in terms of
2658             ORed masks and modification columns. The application can access only the first two levels
2659             of this translation; one cannot query the kernel for any information about the last
2660             two numbers. (Except for the API L,
2661             but it is unclear how this API may help: it translates “in wrong direction” and covers only BMP.)
2662             Therefore, there is no bullet-proof way to recognize when C arrived
2663             due to C stripping.
2664            
2665             B of course, if only C keys are associated to non-0 ORed mask bitmaps,
2666             and they are associated to the “expected” C bits, then the
2667             application would easily recognize this situation by checking whether C is down,
2668             but C is not. (Also observe that this is exactly the situation distinguishing
2669             C from C — no surprises here!)
2670            
2671             Assuming that the application uses this method, it would correctly recognize stripped
2672             events on the “primitive” keyboards. However, on a keyboard with an extra modifier
2673             key (call it C; assume its mask involves a non-SHIFT/ALT/CTRL/KANA bit),
2674             the C combination will not be stripped by the kernel, but the application
2675             would think that it was, and would not insert the character in C message. A bug!
2676            
2677             Moreover, if “supporing only the naive mapping” were a feasible
2678             restriction, there would be no reason for the kernel to go through the extra step of “the ORed mask”.
2679             Actually, to have a keyboard which is simultaneously backward compatible, easy for users, and
2680             covering a sufficiently wide range of possible characters, one B use more or
2681             less convoluted implementations (as in L bitmaps to modifier keys>).
2682            
2683             B the fact that the kernel and the applications speak different
2684             incompatible languages makes even the primitive task discussed here impossible
2685             to code in a bullet-proof way. A heuristic workaround exists, but it will not
2686             work with all keyboards and all combinations of modifiers.
2687            
2688             B some applications (e.g., Emacs) manage to distinguish
2689             C combination of modifier keys from the combination C produced by
2690             a typical C; these applications are able to use C-modified
2691             keys as a bindable accelerator keys. We address this question in the L.
2692            
2693             =head2 Can an application on Windows accept keyboard events? Part II: special key events
2694            
2695             In the preceding section, we considered the most primitive application accepting
2696             the user inserting of characters, and nothing more. “Real applications” must
2697             support also keyboard actions different from “insertion”; so those KEYDOWN events
2698             which are not related to insertion may trigger some “special actions”. To model a full-featured
2699             keyboard input, consider the following specification:
2700            
2701             As above, the application has an entry field, and should insert B the
2702             characters ”delivered for insertion” by the keyboard layout and the kernel.
2703             For all the keyboard events I, the application
2704             should write to the log file which of C modifiers were down,
2705             and the virtual keycode of the KEYDOWN event. Again, at first, we ignore
2706             the C stripping.
2707            
2708             At first, the problem looks simple: with the standard message pump, when C
2709             message is processed, the corresponding C messages are already
2710             sent to the message queue. One can PeekMessage() for these messages; if present,
2711             and not “special”, they correspond to “insertion”, so nothing should be written to the log.
2712             Otherwise, one reports this C to the log.
2713            
2714             Unfortunately, this solution is wrong. Inspect again what the kernel is delivering
2715             during the input-by-number via numeric keyboard: the KEYDOWN for decimal/hex digits
2716             B a part of the “insertion”, but it does not generate any C.
2717             Essentially, the application may see C pressed during the processing of
2718             C, but even if C is supposed to format the paragraph,
2719             this action should not be triggered (but C should be eventually inserted).
2720            
2721             B Input-by-number is getting in the way of using the standard message
2722             pump. C: one should write a clone of TranslateMessage() which delivers
2723             suitable C messages for KEYDOWN/KEYUP involved in Input-by-number. Doing
2724             this, one can also remove sillyness from the Windows’ handling of Input-by-number
2725             (such as taking C for numbers above 255).
2726            
2727             B: myTranslateMessage() should:
2728            
2729             =over 4
2730            
2731             =item *
2732            
2733             when non handling input-by-number, call ToUnicode(), but use C, so that ToUnicode() does not handle input-by-number.
2734            
2735             =item *
2736            
2737             Recognize input-by-number starters by the scancode/virtual-keycode, the presence of C down, and
2738             the fact that ToUnicode() produces nothing or C<'0'..'9','.',',','+'>.
2739            
2740             =item *
2741            
2742             After the starter, allow continuation by checking the scancode/virtual-keycode and the presence of C down.
2743             Do not call ToUnicode() for continuation keydown/up events.
2744            
2745             =item *
2746            
2747             After a chain of continuations followed by KEYUP for C, one should PostMessage() for C with
2748             accumulated input.
2749            
2750             =back
2751            
2752             Combining this with the heuristical recognition of stripped C, one gets an architecture
2753             with a naive approximation to handling of C (but still miles ahead of all the applications
2754             I saw!), and bullet-proof handling of other combinations of modifiers.
2755            
2756             B this implementation of MyTranslateMessage() loses one “feature” of the original one:
2757             that input-by-number is disabled in the presence of (popup) menu. However, since I never saw
2758             this “feature” in action (and never have heard of it described anywhere), this must be of
2759             negligible price.
2760            
2761             B I the applications I checked do this logic wrong. Most of them check B for
2762             “whether the key event looks like those which should trigger special actions”, then perform
2763             these special actions (and ignore the character payload).
2764            
2765             As shown above, the reasonable way is to do this in the opposite order, and check for
2766             special actions only I it is known that the key event does not carry a character payload.
2767             The impossibility of reversing the order of these checks is due to the same reason as one discussed
2768             above: the
2769             kernel and application speaking different languages.
2770            
2771             Indeed, since the application knows nothing
2772             about ORed masks, it has no way to distinguish that, for example, C may be I to be
2773             distinct from C and C, and while the last two do not carry the character
2774             payload, the first one does. Checking I for the absense of C
2775             delegates such a discrimination to the kernel, which has enough information about the
2776             intent of the keyboard layout. (Likewise, the keyboard may define the pair of C
2777             and C to insert ᵃ. Then C alone will not carry any character payload,
2778             its combination with a deadkey may.)
2779            
2780             Why the applications are trying to grab the potential special-key messages as early
2781             as possible? I suspect that the developers are afraid that otherwise, a keyboard layout may
2782             “steal” important accelerators from the application. While this is technically possible,
2783             nowadays keyboard accelerators are rarely the I way to access features of the applications;
2784             and among hundreds of keyboard layout I saw, all but 2 or 3 would not “steal” I from applications.
2785             (Or maybe the developers just have no clue that the correct solution is so simple?)
2786            
2787             B Among the applications I checked, the worst offender is Firefox. It follows L
2788             unfortunate advice by Mike Kaplan|http://blogs.msdn.com/b/michkap/archive/2005/01/19/355870.aspx>
2789             and tries to reconstruct the mentioned above row/columns table of the keyboard layout, then
2790             uses this (heuristically reconstructed) table as a substitute for the real thing. And
2791             due to the mismatch of languages spoken by kernel and applications, working via such an
2792             attempted reconstruction turns out to have very little relationship to the actually intended
2793             behaviour of the keyboard (the behaviour observed in less baroque applications). In particular, if
2794             keyboards uses different modification columns for C and C=C
2795             modifiers, pressing C inputs wrong characters in Firefox.
2796            
2797             B Among notable applications which fail spectacularly is Emacs. The developers
2798             forget that for a generation, it is already XXI century; so they L
2799             ToUnicode()|http://fossies.org/linux/misc/emacs-24.3.tar.gz:a/emacs-24.3/src/w32fns.c>!
2800             (Even if ToUnicode() is available, its result is converted to the result of the
2801             corresponding ToAscii() code.)
2802            
2803             In addition to 8-bitness, Emacs also suffers from check-for-specials-first syndrome…
2804            
2805             =head2 Can an application on Windows accept keyboard events? Part III: better detection of C stripping
2806            
2807             We explained above that L
2808             handling the case when C might have been stripped by the kernel|"Can an application on Windows accept keyboard events? Part I: insert only">. The
2809             very naive heuristic algorithm described there will recognize the simplest
2810             cases, but will also have many false positives: for many combinations it will decide
2811             that C was stripped while it was not. The result will be that
2812             when the kernel reports that the character C is delivered, the
2813             application would interpret it as C, so C would not be inserted.
2814             It will not handle, for example,
2815             the C modifier combinations with L
2816             from that section|"A convenient assignment of C bitmaps to modifier keys">.
2817            
2818             Indeed, with this assignment, the only combination of modifiers for which the kernel will strip C
2819             is C (and C if one does not assign any bits to C).
2820             So C is not stripped, hence the
2821             correct C is delivered by the kernel. However, since this combination is
2822             still visible to the application as having C, and not having C,
2823             it is delivered as the C flavor.
2824            
2825             So the net result is: one designed a nice assignment of masks to the modifier
2826             keys. This assignment makes keypresses successfully navigate around the quirks
2827             of I’s calculations of the character to deliver. However, the naive
2828             algorithm used by I will force the application to ignore this
2829             correctly delivered character to insert.
2830            
2831             A very robust workaround for this problem is introduced in the
2832             L.
2833             What we discuss here is a simple heuristic to recognize the combinations involving
2834             C and an “unexpected modifier”, so that these combinations become
2835             exceptions to the rule “C flavor means ‘do not insert’”.
2836            
2837             B when C message arrives, inspect the virtual keycodes
2838             which are reported as pressed. Ignore the keycode for the current message.
2839             Ignore the keycodes for “usual modifiers” (C) which are
2840             expected to keep stripping. Ignore the keycode for the keys which may be
2841             kept “stuck down” by the keyboards (see L<"Far Eastern keyboards on Windows">).
2842             If some keycode remains, then consider it as an “extra” modifier, and ignore
2843             the fact that the message was of C flavor.
2844            
2845             So all one must do is to define one user message (for input-by-number-in-progress),
2846             code two very simple routines, MyTranslateMessage() and HasExtraModifiersHeuristical(), and perform two
2847             PeekMessage() on KEYDOWN event, and one gets a powerful almost-robust
2848             algorithm for keyboard input on Windows. (Recall that all the applications
2849             I saw provide close-to-abysmal support of keyboard input on Windows.)
2850            
2851             =head2 Can an application on Windows accept keyboard events? Part IV: application-specific modifiers
2852            
2853             Some application handle certain keys as “extra modifiers for the purpose of
2854             application-specific accelerator keypresses”. For example, Emacs may treat
2855             the C in this way (as a C modifier for its bindable-keys
2856             framework). Usually, C does not
2857             contribute anything into the ORed mask; hence, C
2858             combination will deliver the same character as just C alone. When
2859             the application treats C as an accelerator, it must
2860             ignore the character delivered by this combination.
2861            
2862             Additionally, many keyboard layouts
2863             use the C flag (it makes the kernel to fake pressing/releasing the
2864             left C key when the right C is pressed/released) with “standard”
2865             assignments of the ORed masks. On such keyboards, pressing right C (i.e.,
2866             C) delivers the same characters as pressing any C together with
2867             any C. On the other hand, an application may distinguish left-C combinined
2868             with left-C from C pressed
2869             on such keyboards by inspecting which (virtual) keys are currently down. So the application
2870             may consider left-C combinined with left-C
2871             as “intended to be an accelerator”; then the application would ignore the characters delivered by
2872             such a keypress.
2873            
2874             One can immediately see that such applications would inevitably enter into conflict
2875             with keyboards which B these key combinations. For example, on a keyboard
2876             which defines an ORed mask for C, pressing C
2877             I deliver a different character than pressing C. However, the
2878             application does not know this, and just ignores the character delivered by
2879             C.
2880            
2881             A similar situation arises when the keyboard defines C to
2882             deliver a different character than C. Again, the character will be ignored
2883             by the application. Since the fact that such a “unusual” keyboard is active
2884             implies user's intent, such behaviour is a bug of the application.
2885            
2886             B an application must interpret a keypress as “intended to be an accelerator”
2887             only if this keypress produces no character, or produces B character as
2888             the key without the “extra” modifier. (Correspondingly, if replacing C by
2889             C does not change the delivered character.)
2890            
2891             B to do this, the application must be able to query “what would happen
2892             if the user pressed different key combinations?”; such a query requires “non-destructive”
2893             calls of ToUnicode(). (These calls must be done I the “actual”, destructive,
2894             call of ToUnicode() corresponding to the currently pressed down modifiers.)
2895            
2896             Fortunately, with the framework described in the
2897             L stripping">,
2898             the call of ToUnicode() is performed with C being 0x01. As explained near the end of the section
2899             L<"Keyboard input on Windows, Part II: The semantic of ToUnicode()">, this call has a “non-destructive”
2900             flavor! Hence, for applications with such “enhanced” modifier keys, the logic of the
2901             L stripping">
2902             should be enhanced in the following ways:
2903            
2904             =over 4
2905            
2906             =item *
2907            
2908             Make a non-destructive call of ToUnicode(). Store the result. If no insertable character
2909             (or deadkey) is delivered, ignore the rest.
2910            
2911             =item *
2912            
2913             If both left C and left C are down (AND right C AND right C are up!)
2914             replace left C by the right C, and
2915             make another non-destructive call of ToUnicode(). If the result is identical to the first one,
2916             mark C as “special modifiers present for accelerators”.
2917            
2918             Remove left C and left C from the collection of keys which are down (argument to ToUnicde()),
2919             and continue with the previous step.
2920             (This may be generalized to other combinations of left/right C/C.)
2921            
2922             =item *
2923            
2924             For every other “special modifier” virtual key which is down,
2925             make another non-destructive call of ToUnicode() with this virtual key up.
2926             If the result is identical to the first one, mark this “special modifier” as “present for accelerators”.
2927            
2928             =item *
2929            
2930             Finally, if nothing suitable for accelerators is found, make a “usual” call of ToUnicode()
2931             (so that on future keypresses the deadkey finite automaton behaves as expected). Generate the
2932             corresponding messages.
2933            
2934             =back
2935            
2936             If no insertable character is delivered, or suitable “extra” accelerators are found, the
2937             process-the-accelerator logic should be triggered.
2938            
2939             For example, if the character Ω is delivered, and a special modifier C is down
2940             and marked as suitable as accelerator, then Ω will be ignored. The accelerator for C
2941             should be triggered. (Processing this as C may be also done. This may require an
2942             extra non-destructive call.)
2943            
2944             An alternative logic is possible: if this Ω was generated by modifiers C
2945             with the virtual key C, then the application may query what C generates standalone (for example,
2946             cyrillic ц), and trigger the accelerator for C. (This assumes that
2947             C with C generates the same Ω!)
2948            
2949             If no character is delivered, then this is a “trivial” situation, and the framework of accelerator keys
2950             should be called as if the complication considered here did not exist.
2951            
2952             B this logic handles the intended behaviour of C key as well! So, with this implementation,
2953             the application would
2954            
2955             =over 5
2956            
2957             =item *
2958            
2959             Handle C-NUMPAD input-by-number in an intuitive mostly compatible with Windows way
2960             (but not bug-for-bug compatible with the Windows' way);
2961            
2962             =item *
2963            
2964             Would recognize C modifier which does not change the delivered character as such. (So it may be processed
2965             as the menu accessor.)
2966            
2967             =item *
2968            
2969             Would recognize B the key combinations defined by the keyboard layout (and deliverable via ToUnicode());
2970            
2971             =item *
2972            
2973             Would recognize all the application-specific extra modifier keys which do not interfere with the
2974             key combinations defined by the keyboard layout.
2975            
2976             =back
2977            
2978             =head2 Far Eastern keyboards on Windows
2979            
2980             The syntax of defining these keyboards is documented in F of the toolkit.
2981             The semantic of the NLS table is undocumented. Here we fix this.
2982            
2983             The function returning the NLS table should be exported with ordinal 2.
2984             The offsets of both tables in the module should be below 0x10000.
2985             The keyboard layout should define a function with ordinal 3 or 5 returning 0, or
2986             be loaded through such a function returning non-0; the signature is
2987            
2988             BOOL ordinal5(HKL hkl, LPWSTR __OUT__ dllname , PCLIENTKEYBOARDTYPE type_if_remote_session, LPVOID dummy);
2989             BOOL ordinal3(LPWSTR __OUT__ dllname);
2990            
2991             if return is non-0, keyboard is reloaded from C.
2992            
2993             In short, these layouts have an extra table which may define the following enhancements:
2994            
2995             One 3-state (or 2-state) radio-button:
2996             on keys with VK codes DBE_ALPHANUMERIC/DBE_HIRAGANA/DBE_KATAKANA
2997             (the third state can be also toggled independently of the others).
2998             Three Toggling (like CAPSLOCK) button (pairs):
2999             toggling radio-button-like VK codes DBE_SBCSCHAR/DBE_DBCSCHAR, DBE_ROMAN/DBE_NOROMAN, DBE_CODEINPUT/DBE_NOCODEINPUT
3000             Make key produce different VK codes with different modifiers.
3001             Make a “reverse NUMPAD” translation.
3002             Manipulate a couple of bits of IME state.
3003             A few random hacks for key-deficient hardware layouts.
3004            
3005             (Via assigning ORed masks to radio-buttons, the radio-buttons and toggle-buttons above may affect the layout.
3006             Using this, it is easy to convert each toggling buttons to 2-state radiobuttons.
3007             The limitation is that the number of modification columns compatible with the
3008             extra table is at most 8 — counting one for C.)
3009            
3010             Every C may be associated to two tables of functions, the “normal” one, and the “alternative” one. For
3011             every modification column, each table
3012             assigns a filter id, and a parameter for the filter. (Recall that columns are associated
3013             to the ORed masks by the table in the C structure. One B define all the entries
3014             in the table — or at least the entries reachable by the
3015             modifier keys. B the limit on the number of states in the tables is 8; it is not clear what happens with the 
3016             states above this; some versions of Windows may buffer-overflow.)
3017            
3018             The input/output for the filters consists of: the C, C/C flag, the flags associated to the scancode in C<< KBDTABLES->ausVK >>
3019             (may be added to upsteam), the
3020             parameter given in C structure (and an unused C read/write parameter). A filter may change these parameters,
3021             then pass the event forward, or it may ignore an event. Filters by ID:
3022            
3023             KBDNLS_NULL Ignore key (should not be called; only for unreachable slots in the tables).
3024             KBDNLS_NOEVENT Ignore key.
3025             KBDNLS_SEND_BASE_VK Pass through VK unchanged.
3026             KBDNLS_SEND_PARAM_VK Replace VK by the number specified as the parameter.
3027             KBDNLS_KANAMODE Ignore UP; on DOWN, toggle (=generate UP-or-DOWN for) DBE_KATAKANA
3028            
3029             These 3 generate UP for “other” key, then DOWN for the target (as needed!):
3030             KBDNLS_ALPHANUM Ignore UP; DBE_ALPHANUMERIC,DBE_HIRAGANA,DBE_KATAKANA → DBE_ALPHANUMERIC
3031             KBDNLS_HIRAGANA Ignore UP; DBE_ALPHANUMERIC,DBE_HIRAGANA,DBE_KATAKANA → DBE_HIRAGANA
3032             KBDNLS_KATAKANA Ignore UP; DBE_ALPHANUMERIC,DBE_HIRAGANA,DBE_KATAKANA → DBE_KATAKANA
3033            
3034             KBDNLS_SBCSDBCS Ignore UP; Toggle DBE_SBCSCHAR / DBE_DBCSCHAR
3035             KBDNLS_ROMAN Ignore UP; Toggle DBE_ROMAN / DBE_NOROMAN
3036             KBDNLS_CODEINPUT Ignore UP; Toggle DBE_CODEINPUT / DBE_NOCODEINPUT
3037             KBDNLS_HELP_OR_END Pass-through if NUMPAD flag ON (in ausVK); send-or-toggle HELP/END (see below)
3038             KBDNLS_HOME_OR_CLEAR Pass-through if NUMPAD flag ON (in ausVK); send HOME/CLEAR (see below)
3039             KBDNLS_NUMPAD If !NUMLOCK | SHIFT, replace NUMPADn/DECIMAL by no-numpad flavors
3040             KBDNLS_KANAEVENT Replace VK by the number specified as the parameter. On DOWN, see below
3041             KBDNLS_CONV_OR_NONCONV See below
3042            
3043             The startup values are C, C, C, C.
3044            
3045             Typical usages:
3046            
3047             KBDNLS_KANAMODE (VK_KANA (Special case))
3048             KBDNLS_ALPHANUM (VK_DBE_ALPHANUMERIC)
3049             KBDNLS_HIRAGANA (VK_DBE_HIRAGANA)
3050             KBDNLS_KATAKANA (VK_DBE_KATAKANA)
3051             KBDNLS_SBCSDBCS (VK_DBE_SBCSCHAR/VK_DBE_DBCSCHAR)
3052             KBDNLS_ROMAN (VK_DBE_ROMAN/VK_DBE_NOROMAN)
3053             KBDNLS_CODEINPUT (VK_DBE_CODEINPUT/VK_DBE_NOCODEINPUT)
3054             KBDNLS_HELP_OR_END (VK_HELP or VK_END) [NEC PC-9800 Only]
3055             KBDNLS_HOME_OR_CLEAR (VK_HOME or VK_CLEAR) [NEC PC-9800 Only]
3056             KBDNLS_NUMPAD (VK_xxx for Numpad) [NEC PC-9800 Only]
3057             KBDNLS_KANAEVENT (VK_KANA) [Fujitsu FMV oyayubi Only]
3058             KBDNLS_CONV_OR_NONCONV (VK_CONVERT and VK_NONCONVERT) [Fujitsu FMV oyayubi Only]
3059            
3060             Toggle (= 2-state) and 3-state radio-keys are switched by sending KEYUP for the currently
3061             “active” key, then KEYDOWN for the newly activated key. When switching 3-state, additional
3062             action happens depending on the new state:
3063            
3064             DBE_ALPHANUMERIC If IME is off, and KANA toggle is on, switch IME on in the KATAKANA mode
3065             DBE_HIRAGANA If IME is off, and KANA toggle is off, switch IME off in the ALPHANUMERIC mode
3066             DBE_KATAKANA SAME AS HIRAGANA
3067            
3068             Additionally, C of C switches IME to
3069            
3070             KANA toggle on: switch IME off in the ALPHANUMERIC mode
3071             KANA toggle off: switch IME on in the KATAKANA mode
3072            
3073             and C (on C and C) passes through, and does
3074            
3075             KANA toggle on, IME off: switch IME off in the ALPHANUMERIC mode
3076             otherwise: Do nothing
3077            
3078             (The semantic of IME being-in/switching-to OFF/ON mode is not clear (probably IME-specific).
3079             The switching happens by
3080             calling C for devices with a C
3081             and C, while putting the request at into global memory — unless
3082             C flag is set on the foreground keyboard.)
3083            
3084             For C, the registry is checked at statup. For C, the registry is checked at statup, and:
3085            
3086             KANA_AWARE: flips END/HELP if KANA toggle is ON (on input, “HELP” means not-an-END)
3087             otherwise: sends END/HELP depending on what registry says.
3088            
3089             The checked values are C, C, C in the hive C.
3090            
3091             Which of two tables is chosen is controlled by the type (C/C/C) of the key's tables, and the (per key) history bit.
3092             The initial state of the bit is in C
3093             (L!).
3094             The tables of type C are ignored (the key descriptor passes all events
3095             through), the C key uses only the first table. The C key uses the first table on KEYDOWN, and
3096             uses the first or the second table on KEYUP. The choice depends on modifiers present in the preceding KEYDOWN;
3097             the bitmap C is indexed by the modification column of KEYDOWN event; the second table is
3098             used on the following KEYUP if the indexed bit is set. (The KEYREPEAT events are handled the same way as KEYUP.)
3099            
3100             The typical usage of C keys is to make the KEYUP event match B no matter what
3101             is the order of releasing the modifier keys and the main key.
3102             Having the history bit up “propagates” to KEYUP the information about which modifiers were active on KEYDOWN. This helps in ensuring
3103             consistency of some actions between the KEYDOWN event and the corresponding KEYUP event: remember that the state of modifiers
3104             on KEYUP is often different than the state on KEYDOWN: people can release modifiers in different orders:
3105            
3106             press-Shift, press-Enter, release-Shift, release-Enter ---> Shift-Enter pressed, Enter released
3107             press-Shift, press-Enter, release-Enter, release-Shift ---> Shift-Enter pressed and released
3108            
3109             If pressing C acts as if it were the C key (and only so with C!), to ensure consistency, one would need
3110             to make releasing C B also releasing C to act as if it were the C key. So one can make pressing
3111             C special (via the first table), sets the history bit on C, and make I map C
3112             and C to be special too (send C) I.
3113            
3114             B the standard key processing has its own filters too. C processing adds fake C up/down events
3115             (provided the flag C is set);
3116             C processing ignores/fakes the C/C for C (=C)
3117             (provided the flag C is set); C becomes
3118             C (same for C); C become C/C; C may become C.
3119             OEM translations (NumPad→Cursor, except C; C<00> to double-press of C<0>) come first, then locale-specific (C,
3120             C), then those defined in the tables above.
3121            
3122             B As opposed to these translations, C and C is actually handled inside the
3123             even loop, by ToUnicode().
3124            
3125             B L (and references inside!)
3126             explains fine points of using Japanese keyboards. See also: L.
3127            
3128             =head2 A convenient assignment of C bitmaps to modifier keys
3129            
3130             In this section, we omit discussion of C modifier; so every
3131             bitmap may be further combined with C to produce two different bindings.
3132             Assign ORed masks to the modifier keys as follows:X
3133            
3134             lCtrl Win lAlt rAlt Menu rCtrl
3135             CTRL|LOYA CTRL|X1 ALT|KANA CTRL|ALT|LOYA|X1 CTRL|ALT|X2 CTRL|ALT|ROYA
3136            
3137             with suitable backward-compatible mapping of ORed masks to modification columns.
3138             This assignment allows using C flag (faking presses of C when
3139             C is pressed — this greatly increases compatibility of C with brain-damaged
3140             applications), all the combinations involving at most one of C, C or
3141             C give distinct ORed masks, it
3142             avoids stripping of C on C combined with other modifiers,
3143             makes C work with all relevant combinations, while completely preserving all
3144             application-visible properties of keyboard events [except those with C
3145             modifiers; this combination is equivalent to C].
3146            
3147             Note that ignoring the C and C bits, all combinations of
3148             C are possible, which gives at least 32 C-pairs.
3149             In fact, the only combination of C which may appear with
3150             different C bits is C; hence there are 33 possible combinations
3151             of C. Indeed, C is determined by C.
3152             If one of C is present, then C is set; so assume C are not present.
3153             But then, if C B set, then both C B be present; which gives the
3154             only duplication.
3155            
3156             Leaving out 5 combinations of C, C, C [8, minus the empty one, and
3157             C, which is avoided by most application due to its similarity to C,
3158             and C which is undistinguishable by the mask from C]
3159             to have bindable keypresses in applications, and having C as equivalent to
3160             C, this gives 27 C-pairs which may produce characters.
3161            
3162             B C being undistinguishable by the mask from C
3163             is not a big deal, since there is no standard keyboard shortcuts involving C.
3164            
3165             B Combinations of C with C L combination: multiple problems">;
3166             likewise for L with C |"C combination: many keys are not delivered to applications">.
3167            
3168             B Removing the binding for C key, only 21 useful C-pairs remain.
3169             (This is what C of L is
3170             using; out of 24 distinct combinations, C, C and C should be
3171             excluded.) B While this may look as a complete overkill, recall that characters
3172             outside BMP can be inserted on Windows I via one keypress, possibly with many
3173             modifiers. (This restriction relates only to the “classical” flavor of Windows keyboard layouts).
3174             Unicode L
3175             discourse|http://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols>. If a keyboard
3176             layout would want to support these letters, this would quickly exhaust the possible combinations
3177             of modifiers. (For 2-script layout, one could live with Latin/AltGr-Latin/Greek + 18 mathematical
3178             alphabets. But for layouts supporting more scripts, it lookes like using C key is not
3179             avoidable.)
3180            
3181             B Applications may call ToUnicode() with I of modifiers:
3182             for example, they may "put" C down, but do not specify whether it is C or
3183             C. Likewise for C.
3184            
3185             To support that, one would need to define a mask for standalone C and C
3186             (i.e., C and C). Since these modifiers are present when the real “left-right-handed”
3187             keys are down, the masks should be “contained” in the masks of handed keys. B one
3188             can make the pseudo-key C to generate bit C, and the pseudo-key C to generate
3189             the bit C. Then for any combination of modifiers with unhanded C and/or C,
3190             either the corresponding combination of bits is supported by the layout (and then the
3191             application will access the corresponding modification column — which is probably not
3192             the “expected” column corresponding to some handed flavor), or the combination is not
3193             yet defined. In the latter case, one may actually decide I to resolve this: one can
3194             map this combination of modifiers to an arbitatrary modification column!
3195            
3196             In particular, one can map such combination of modifiers to a certain choice of handedness
3197             of C and C. (An example of such a problematic application is L;
3198             look for “I”.)
3199            
3200             B Some applications may do a "reverse lookup" using
3201             L|https://msdn.microsoft.com/en-us/library/windows/desktop/ms646329%28v=vs.85%29.aspx>
3202             (this is B API which exposes the modifier masks). Most of these calls would not
3203             know anything about "higher bits", only S/C/A would be covered. In particular,
3204             it makes sense to add "fake" entries mapping combinations of bits 0x1/0x2/0x4 to the
3205             "corresponding" modification columns.
3206            
3207             For example, C above would produce modififier mask C;
3208             this mask would access a certain column in the table of bindings; make the
3209             mask C access the same column. Then an application making a lookup
3210             for a certain character via VkKeyScanW() would see C. Since this is
3211             the mask which is I produced by pressing C, the application
3212             would think (correctly! — but only thanks to this fake entry) that this character
3213             may be produced with C modifier.
3214            
3215             B The maximal number of “modification columns” supported by Windows is 126. A
3216             larger number would make the size of C to overflow the maximal number
3217             storable in the field C of type C = C.
3218            
3219             Given that the column 15 is ignored, this reduces the number of strings associated to
3220             a keypress (with different “modifiers”) to 125.
3221            
3222             =head1 WINDOWS GOTCHAS
3223            
3224             First of all, keyboard layouts on Windows are controlled by DLLs; the only function
3225             of these DLLs is to export a table of "actions" to perform. This table is passed
3226             to the kernel, and that's it - whatever is not supported by the format of this table
3227             cannot be implemented by native layouts. (The DLL performs no "actions" when
3228             actual keyboard events arrive.)
3229            
3230             Essentially, the logic is like that: there are primary "keypresses", and
3231             chained "keypresses" ("prefix keys" [= deadkeys] and keys pressed after them).
3232             Primary keypresses are distinguished by which physical key on keyboard is
3233             pressed, and which of "modifier keys" are also pressed at this moment (as well
3234             as the state of "latched keys" - usually C only, but may be also C). This combination
3235             determines which Unicode character is generated by the keypress, and whether
3236             this character starts a "chained sequence".
3237            
3238             On the other hand, the behaviour of chained keys is governed I by Unicode
3239             characters they generate: if there are several physical keypresses generating
3240             the same Unicode characters, these keypresses are completely interchangeable
3241             inside a chained sequence. (The only restriction is that the first keypress
3242             should be marked as "prefix key"; for example, there may be two keys producing
3243             B<-> so that one is producing a "real dash sign", and another is producing a
3244             "prefix" B<->.)
3245            
3246             The table allows: to map Cs to Cs; to associate a C to several
3247             (numbered) choices of characters to output, and mark some of these choices as prefixes
3248             (deadkeys). (These "base" choices may contain up to 4 16-bit characters (with 32-bit
3249             characters mapped to 2 16-bit surrogates); but only those with 1 16-bit character may
3250             be marked as deadkeys.) For each prefix character (not a prefix key!) one can
3251             associate a table mapping input 16-bit "base characters" to output 16-bit characters,
3252             and mark some of the output choices as prefix characters.
3253            
3254             The numbered choices above are determined by the state of "modifier keys" (such as
3255             C, C, C), but not directly. First of all, C may be
3256             associated to a certain combination of 6 "modifier bits" (called "logical" C,
3257             C, C, C, C and C, but the logical bits are not
3258             required to coincide with names of modifier keys). (Example: one can bind C
3259             to activate C and C bits.) The 64 possible combinations of modifier bits
3260             are mapped to the numbered choices above.
3261            
3262             Additionally, one can define two "separate
3263             numbered choices" in presence of CapsLock (but the only allowed modifier bit is C).
3264             The another way to determine what C is doing: one can mark that it
3265             flips the "logical C" bit (separately on no-modifiers state, C-only state,
3266             and C-only state [?!] - here "only" allow for the C bit to be C).
3267            
3268             C key is considered equivalent to C combination (of those
3269             are present, or always???), and one cannot bind C and C combinations.
3270             Additionally, binding bare C modifier on alphabetical keys (and
3271             C, C<[>, C<]>, C<\>) may confuse some applications.
3272            
3273             B there is some additional stuff allowed to be done (but only in presence
3274             of Far_East_Support installed???). FE-keyboards can define some sticky state (so
3275             may define some other "latching" keys in addition to C). However,
3276             I did not find a clear documentation yet (C in the DDK toolkit???).
3277            
3278             There is a tool to create/compile the required DLL: F of I
3279             Keyboard Layout Creator> (with a graphic frontend F). The tool does
3280             not support customization of modifier bits, and has numerous bugs concerning binding keys which
3281             usually do not generate characters. The graphic frontend does not support
3282             chained prefix keys, adds another batch of bugs, and has arbitrarily limitations:
3283             refuses to work if the compiled version of keyboard is already installed;
3284             refuses to work if C is redefined in useful ways.
3285            
3286             B uninstall the keyboard, comment the definition of C,
3287             load in F and create an install package. Then uncomment the
3288             definition of C, and compile 4 architecture versions using F,
3289             moving the DLLs into suitable directories of the install package. Install
3290             the keyboard.
3291            
3292             For development cycle, one does not need to rebuild the install package
3293             while recompiling.
3294            
3295             The following sections classify GOTCHAS into 3 categories:
3296            
3297             L<"WINDOWS GOTCHAS for keyboard users">
3298            
3299             L<"WINDOWS GOTCHAS for keyboard developers using MSKLC">
3300            
3301             L<"WINDOWS GOTCHAS for keyboard developers (problems in kernel)">
3302            
3303             =head1 WINDOWS GOTCHAS for keyboard users
3304            
3305             =head2 MSKLC keyboards not working on Windows 8 without reboot
3306            
3307             The layout is shown as active, but "preview" is grayed out,
3308             and is not shown on the Win-Space list. See also:
3309            
3310             http://www.errordetails.com/125726/activate-custom-keyboard-layout-created-with-msklc-windows
3311            
3312             The workaround is to reboot. Compare with
3313            
3314             http://blogs.msdn.com/b/michkap/archive/2012/03/12/10281199.aspx
3315            
3316             =head2 Default keyboard of an application
3317            
3318             Apparently, there is no way to choose a default keyboard for a certain
3319             language. The configuration UI allows moving keyboards up and down in
3320             the list, but, apparently, this order is not related to which keyboard
3321             is selected when an application starts. (This may be fixed on Windows 8?)
3322            
3323             =head2 Hex input of unicode is not enabled
3324            
3325             One needs to explicitly tinker with the registry (see F)
3326             and then I to enable this.
3327            
3328             =head2 Standard fonts have some chars exchanged
3329            
3330             At least in Consolas and Lucida Sans Unicode φ and ϕ are exchanged.
3331             Compare with Courier and Times. (This may be due to the L
3332             Unicode's pre-v3.0 choice of representative glyphs|http://en.wikipedia.org/wiki/Phi#Computing>,
3333             or the L
3334             between French/English Apla=Didot/Porson's approaches|http://www.greekfontsociety.gr/pages/en_typefaces19th.html>.)
3335            
3336             =head2 The console font configuration
3337            
3338             According to L, it is controlled by Registry hive
3339            
3340             HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Console\TrueTypeFont
3341            
3342             The key C<0> usually gives C, and the key C<00>
3343             gives C. Adding random numbers does not work; however,
3344             if one adds one more zero (at least when adding to a sequence of zeros),
3345             one can add more fonts.
3346             You need to export this hive (e.g., use
3347            
3348             reg export "HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Console\TrueTypeFont" console-ttf.reg
3349            
3350             ), save a copy (so you can always restore if the love goes sour)
3351             then edit the resulting file.
3352            
3353             So if the maximal key with 0s is C<00>, add one extra row with an extra 0
3354             at end, and the family name of your font. The "family name" is what the Font
3355             list in C shows for I (a "stacked" icon is shown);
3356             for individual fonts the weight (Regular, Book, Bold etc) is appended. So I add a line
3357            
3358             "000"="DejaVu Sans Mono"
3359            
3360             the result is (omitting Far Eastern fonts)
3361            
3362             Windows Registry Editor Version 5.00
3363            
3364             [HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Console\TrueTypeFont]
3365             "949"="..."
3366             "0"="Lucida Console"
3367             "950"="..."
3368             "932"="..."
3369             "936"="..."
3370             "00"="Consolas"
3371             "000"="DejaVu Sans Mono"
3372            
3373             The full file is in F. After importing this
3374             file via F (or give it as parameter to F; both require administrative priviledges)
3375             the font is immediately available in menu. (However, it does not work in "existing"
3376             console windows, only in newly created windows.)
3377            
3378             B<(Do not use the example file directly. First inspect the hive exported on your system,
3379             and find the number of 0s to use. Then add a new line with correct number of
3380             zeros - as a value, one can use the string above. This will I the defaults
3381             of your setup.> Keep in mind that
3382             selection-by-fontfamily is buggy: if you have more than one version of the font
3383             in different weight, it is a Russian Rullette which one of them will be taken
3384             (at least for DejaVu, which uses C as the default weight). First install
3385             the "normal" flavor of the font, then do as above (so the system has no way of picking
3386             the wrong flavor!), and only after this install the remaining
3387             flavors.
3388            
3389             B keep in mind that I distribute a good-for-console L<“merge” of two
3390             fonts|http://ilyaz.org/software/fonts/>: C; C brings
3391             in nicely shaped nicely-scalable
3392             glyphs, and C brings a scalable font with complete coverage of BMP (as of 2015, of Unicode C).
3393             (We omit Han/Hangul since it does not fit in a narrow box of a console font.
3394             (As of 2015, it does not include U+30fb since apparently, this breaks display of
3395             "undefined" character in PUA in Windows' console.)
3396            
3397             B the string to put into C is the I of the font.
3398             The family name is what is shown in the C list of the C — but only
3399             for families with more than one font; otherwise the “metric name” of the font is appended.
3400            
3401             On Windows, it is tricky to find the family name using the default Windows' tools, without
3402             inspecting the font in a font editor. One workaround is to select the font in C
3403             application, then inspect C via:
3404            
3405             reg export HKCU\Software\Microsoft\CharMap character-map-font.reg
3406            
3407             Note: the mentioned above MicroSoft KB article lists the wrong way to find the family name.
3408             What is visible in the C dialogue of the font, and in C is the
3409             I. Fortunately, quite often the full name and the family name coincide —
3410             this is what happened with C. To find the "Full name" of the font, one can look into the hive
3411            
3412             HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Fonts
3413             reg export "HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Fonts" fonts.reg
3414            
3415             For example, after installing C, I see
3416             C as a key in this hive.
3417            
3418             B for desktop icons coming from the “Public” user (“shared”
3419             icons) which start a console application, the default font is not directly editable.
3420             To reset it, one must:
3421            
3422             =over
3423            
3424             =item *
3425            
3426             copy the F<.lnk> icon file to “your” desktop directory;
3427            
3428             =item *
3429            
3430             start the application using the “new” icon;
3431            
3432             =item *
3433            
3434             change the font via “Properties” of the window's menu;
3435            
3436             =item *
3437            
3438             as administrator, copy the F<.lnk> file back to the F
3439             directory (usually in something like F). Manually refresh
3440             the desktop. Verify that the “old” icon works as expected.
3441             (Now you can remove the “new” icon created on the first step.)
3442            
3443             =back
3444            
3445             =head2 There is no way to show Unicode contents on Windows
3446            
3447             Until Firefox C, one could use FireFox to show arbitrary
3448             Unicode text (limited only by which fonts are installed on your
3449             system). If you upgraded to a newer version, there is no (AFAIK)
3450             Windows program (for general public consumption) which would visualize
3451             Unicode text. The applications are limited either (in the worst case) by
3452             the characters supported by the currently selected font, or (in the best
3453             case) they can show additionally characters, but only those considered by the
3454             system as "important enough" (coming from a few of default fonts?).
3455            
3456             There is a workaround for this major problem in FireFox (present at least
3457             up to C). The problem is caused
3458             by L
3459             which blatantly saves a few seconds of load time for a tiny minority of
3460             users, the price being an unability to show Unicode I
3461             (compare with comments L<33|https://bugzilla.mozilla.org/show_bug.cgi?id=705594#c33>
3462             and L<75|https://bugzilla.mozilla.org/show_bug.cgi?id=705594#c75> on the bug report above).
3463            
3464             It is not documented, but this action is controlled by C
3465             setting C. To enable Unicode,
3466             make this setting into C (if you have it in the list as C, double-clicking it would
3467             do this — do search to determine this; otherwise you need to create a new
3468             C entry).
3469            
3470             There is an alternative/additional way to enable extra fonts; it makes
3471             sense if you know a few character-rich fonts present on your system. The (undocumented)
3472             settings C (apparently) control fallback fonts for situations
3473             when a suitable font cannot be found via more specific settings. For example, when
3474             you installed (free) L,
3475             L, L fonts on your system, you may set (these
3476             variables are not present by default; you need to create new C variables):
3477            
3478             font.name-list.sans-serif.x-unicode DejaVu Sans,Symbola,DejaVu Serif,DejaVu Sans Mono,Junicode,Unifont Smooth
3479             font.name-list.serif.x-unicode DejaVu Serif,Symbola,Junicode,DejaVu Sans,Symbola,DejaVu Sans Mono,Unifont Smooth
3480             font.name-list.cursive.x-unicode Junicode,Symbola,DejaVu Sans,DejaVu Serif,DejaVu Sans Mono,Unifont Smooth
3481             font.name-list.monospace.x-unicode DejaVu Sans Mono,DejaVu Sans,Symbola,DejaVu Serif,Junicode,Unifont Smooth
3482            
3483             And maybe also L
3484            
3485             font.name-list.fantasy.x-unicode Symbola,DejaVu Serif,Junicode,DejaVu Sans Mono,DejaVu Sans,Unifont Smooth
3486            
3487             (Above, we use the L||http://ilyaz.org/software/fonts/>
3488             as the font of last resort. Although the glyphs are very coarse, in this role
3489             it is very useful since it contains all the Unicode C characters in BMP.
3490            
3491             B L of C
3492             contains “fake” glyphs for characters not supported by the font. Such a design error is unexcusable for a TrueType font; this gets
3493             in the way when an application tries to find the best way to show a character. Using
3494             (non-C variant of) my “C” re-build not only fixes this (and some others) problems,
3495             but also makes the font nicely scalable — the original works well only in the size 16px.
3496            
3497             If you set both: the C variables with rich enough fonts,
3498             B C,
3499             then you may have the best of both worlds: the situation when a character cannot
3500             be shown via C settings will be extremely rare, so the possiblity of delay
3501             due to C is irrelevant.
3502            
3503             =head2 Firefox misinterprets keypresses
3504            
3505             =over 4
3506            
3507             =item *
3508            
3509             Multiple prefix keys are not supported.
3510            
3511             =item *
3512            
3513             C and C are recognized as a character-generating
3514             keypress (good!), but the character they produce bears little relationship
3515             to what keyboard produces. (In our examples, the character may be available
3516             only via multiple prefix keys!)
3517            
3518             =item *
3519            
3520             After a prefix key, C is not recognized as a
3521             character-generating key.
3522            
3523             =item *
3524            
3525             C is not recognized as a character-generating key.
3526            
3527             =item *
3528            
3529             C is not recognized as a character-generating key sequence (recall
3530             that C should be pressed all the time, and other keys C<+ HEXDIGITS> should be
3531             pressed+released sequentially).
3532            
3533             =item *
3534            
3535             When keyboard has an “extra” modifier key in addition to C (an
3536             analogue of C key), combining it with C or with C is interpreted
3537             by Firefox as if only C or C were pressed.
3538            
3539             =item *
3540            
3541             When keyboard generates different characters on C than on C
3542             (possible with assigning extra modifier bits to C), FireFox interprets any
3543             C as if it were C.
3544            
3545             C when C produces a character, this character is understood
3546             correctly by FF. Same for C (but again, while this works on numeric
3547             keypad, it is still buggy if C is on, or if the key is C.)
3548            
3549             =item *
3550            
3551             The keyboard may have C which produces the same characters as C, but
3552             which behaves differently when combined with other keys. FireFox ignores these
3553             differences.
3554            
3555             This is combinable with other remarks above: e.g., C is interpreted
3556             by FireFox as C.
3557            
3558             =item *
3559            
3560             In addition to this, Firefox replaces C and C modifiers by
3561             an I: Firefox pretends that I C is down. (Here
3562             C is a fake key C which Window pretends is down when either one
3563             of C or C is down.) Since the situation when C
3564             is down, but neither C nor C are down is not possible, this
3565             may access parts of the keyboard layout not visible to other applications.
3566             (Same for C and C.)
3567            
3568             The net effect is that key combinations involving C or C keys
3569             may behave wrong in Firefox. For example, with version C<0.63> of
3570             L, C and C
3571             are ignored on character-producing keys.
3572            
3573             =item *
3574            
3575             If C produces C< — > (this is C), and
3576             C produces the “cedilla deadkey”, then pressing C
3577             acts as both: first C are inserted, then C<ç>.
3578            
3579             =item *
3580            
3581             A subtle variation of the previous failure mode: If C produces
3582             deadkey X, and C produces the deadkey Y, then combining C
3583             with C gives the expected Y*a combination. However, if combining with
3584             something more complicated (C or C), with what
3585             deadkey Y is not combinable, B the bugs strike:
3586            
3587             =over 4
3588            
3589             =item 1
3590            
3591             in the first case the deadkey behaves as X: it produces a pair of characters
3592             C; here C produces C<α>. (Keep in mind that inserting two
3593             characters is the expected behaviour outside of Firefox, but Firefox usually
3594             “eats” an undefined deadkey combination; and note that it is X, not the
3595             expected Y!).
3596            
3597             =item 2
3598            
3599             in the second case it produces only the character C<ф> generated by C. Here
3600             the behaviour is neither as outside Firefox (where it would produce C) nor as
3601             usual in Firefox (where it would eat the undefined sequence).
3602            
3603             =back
3604            
3605             =back
3606            
3607             Of these problems, C has only C one, but a very cursory inspection shows
3608             other problems: C are not recognized as character-generating keys. (And IE9 just
3609             crashes in most of these situations…)
3610            
3611             =head2 C-keypresses triggering some actions
3612            
3613             For example, newer versions of windows have graphics driver reacting on Cs by
3614             rotating the screen. Usually, when you know which application is stealing your keypresses, one
3615             can find a way to disable or reconfigure this action.
3616            
3617             For screen rotation: Right-Click on desktop, “Graphics Options”, “Hot Keys”, disable. The way to
3618             reconfigure this is to use “Graphics Properties” instead of “Graphics Options” (but this may depend
3619             on your graphics subsystem).
3620            
3621             =head2 C-keypresses going nowhere
3622            
3623             Some C-keypresses do not result in the corresponding letter on
3624             keyboard being inserted. It looks like they are stolen by some system-wide
3625             hotkeys. See:
3626            
3627             http://www.kbdedit.com/manual/ex13_replacing_altgr_with_kana.html
3628            
3629             If these keypresses would perform some action, one might be able to deduce
3630             how to disable the hotkeys. So the real problem comes when the keypress
3631             is silently dropped.
3632            
3633             I found out one scenario how this might happen, and how to fix this particular
3634             situation. (Unfortunately, it did not fix what I see, when C [but not
3635             C] is stolen.) Installing a shortcut, one can associate a hotkey to
3636             the shortcut. Unfortunately, the UI allows (and encourages!) hotkeys of the
3637             form (which are equivalent to C) - instead
3638             of safe combinations like C or
3639             C (which — by convention — are ignored by keyboard drivers, and do not generate
3640             characters). If/when an application linked to by this shortcut is
3641             gone, the hotkey remains, but now it does nothing (no warning or dialogue comes).
3642            
3643             If the shortcut is installed in one of "standard places", one can find it.
3644             Save this to F (replace F by the suitable drive letter
3645             here and below)
3646            
3647             on error resume next
3648             set WshShell = WScript.CreateObject("WScript.Shell")
3649             Dim A
3650             Dim Ag
3651             Set Ag=Wscript.Arguments
3652             If Ag.Count > 0 then
3653             For x = 0 to Ag.Count -1
3654             A = A & Ag(x)
3655             Next
3656             End If
3657             Set FSO = CreateObject("Scripting.FileSystemObject")
3658             f=FSO.GetFile(A)
3659             set lnk = WshShell.CreateShortcut(A)
3660             If lnk.hotkey <> "" then
3661             msgbox A & vbcrlf & lnk.hotkey
3662             End If
3663            
3664             Save this to F
3665            
3666             set findhotkey=k:\findhotkey
3667             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3668             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3669             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3670             cd /d %UserProfile%\desktop
3671             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3672             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3673             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3674             cd /d %AllUsersProfile%\desktop
3675             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3676             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3677             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3678             cd /d %UserProfile%\Start Menu
3679             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3680             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3681             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3682             cd /d %AllUsersProfile%\Start Menu
3683             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3684             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3685             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3686             cd /d %APPDATA%
3687             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3688             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3689             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3690             cd /d %HOMEDRIVE%%HOMEPATH%
3691             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3692             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3693             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3694            
3695             (In most situations, only the section after the last C is important;
3696             in my configuration all the "interesting" stuff is in C<%APPDATA%>. Running
3697             this should find all shortcuts which define hot keys.
3698            
3699             Run the cmd file. Repeat in the "All users"/"Public" directory. It should
3700             show a dialogue for every shortcut with a hotkey it finds. (But, as I said,
3701             it did not fix I problem: C works in F test window,
3702             and nowhere else I tried...)
3703            
3704             =head2 C-keypresses starting bloatware applications
3705            
3706             (Seen on IdeaPad.) Some pre-installed programs may steal C-keypresses;
3707             it may be hard to understand what is the name of the application even when
3708             the stealing results in user-visible changes.
3709            
3710             One way to deal with it is to start C in C (or
3711             C
) panel, and click on CPU column until one gets decreasing-order
3712             of CPU percentage. Then one can try to detect which process is becoming
3713             active by watching top rows when the action happens (or when one manages to
3714             get back to the desktop from the full-screen bloatware); one may need to
3715             repeat triggering this action several times in a row. After you know
3716             the name of executable, you can google to find out how to disable it, and/or
3717             whether it is safe to kill this process.
3718            
3719             B On IdeaPad, it was F (safe to kill). It was stealing
3720             C and C.
3721            
3722             B On MSI, a similar stealer was F (some claim it is used to show on-screen
3723             animation when special laptop keys are pressed; if you do not need them, it is safe
3724             to kill). It was stealing C. (But to find I one, I needed to
3725             kill all suspicious apps one by one…)
3726            
3727             =back
3728            
3729             =head1 WINDOWS GOTCHAS for keyboard developers using MSKLC
3730            
3731             =head2 Several similar F created keyboards may confuse the system
3732            
3733             Apparently, the system may get majorly confused when the C
3734             of the project gets changed without changing the DLL (=project) name.
3735            
3736             (Tested only with Win7 and the name in the DESCRIPTIONS section
3737             coinciding with the name on the KBD line - both in F<*.klc> file.)
3738            
3739             The symptoms: I know how one can get 4 different lists of keyboards:
3740            
3741             =over 4
3742            
3743             =item 1
3744            
3745             Click on the keyboard icon in the C - usually shown
3746             on the toolbar; positioned to the right of the language code EN/RU
3747             etc (keyboard icon is not shown if only one keyboard is associated
3748             to the current language).
3749            
3750             =item
3751            
3752             Go to the C settings (e.g., right-click on the
3753             Language bar, Settings, General.
3754            
3755             =item
3756            
3757             on this C page, press C button, go to the language
3758             in question.
3759            
3760             =item
3761            
3762             Check the F<.klc> files for recently installed Input Languages.
3763            
3764             =item
3765            
3766             In MS Keyboard Layout Creator, go to C
3767             list.
3768            
3769             =back
3770            
3771             It looks like the first 4 get in sync if one deletes all related keyboards,
3772             then installs the necessary subset. I do not know how to fix 5 - MSKLC
3773             continues to show the old name for this project.
3774            
3775             Another symptom: Current language indicator (like C) on the language
3776             bar disappears. (Reboot time?)
3777            
3778             Is it related to C<***\Local Settings\MuiCache\***> hive???
3779            
3780             Possible workaround: manually remove the entry in C
3781             (the last 4 digits match the codepage in the F<.klc> file).
3782            
3783             =head2 Too long description (or funny characters in description?)
3784            
3785             If the name in the C section is too long, the name shown in
3786             the list C<2> above may be empty.
3787            
3788             (Checked only on Win7 and when the name in the DESCRIPTIONS section
3789             coincides with the name on the C line - both in F<*.klc> file.
3790             Length=63 works fine, Length=64 triggers the bug.)
3791            
3792             (Fixed by shortening the name [but see
3793             L<"Several similar F created keyboards may confuse the system">
3794             above!], so maybe it was
3795             not the length but some particular character (C<+>?) which was confusing
3796             the system. (I saw a report on F bug when description had apostroph
3797             character C<'>.)
3798            
3799             =head2 F ruins names of dead key when reading a F<.klc>
3800            
3801             When reading a F<.klc> file, MS Keyboard Layout Creator may ruin the names
3802             of dead keys. Symptom: open the dialogue for a dead key mapping
3803             (click the key, check that C has checkmark, click on the
3804             C<...> button near the C checkbox); then the name (the first
3805             entry field) contains some junk. (Looks like a long ASCII string
3806            
3807             U+0030 U+0030 U+0061 U+0039
3808            
3809             .)
3810            
3811             B if all one needs is to compile a F<.klc>, one can run
3812             F directly.
3813            
3814             B correct ALL these names manually in MSKLC. If the names are
3815             the Unicode name for the dead character, just click the C button
3816             near the entry field. Do this for ALL the dead keys in all the registers
3817             (including C!). If C is not made "semantically meaningful",
3818             there are 6 views of the keyboard (C
3819             AltGr, AltGr+Shift>) - check them all for grayed out keys (=deadkeys).
3820            
3821             Check for success: C, use a temporary name.
3822             Inspect near the end of the generated F<.klc> file. If OK, you can
3823             go to the Project/Build menu. (Likewise, this way lets you find which
3824             deadkey's names need to be fixed.)
3825            
3826             !!! This is time-consuming !!! Make sure that I things are OK
3827             before you do this (by C, C).
3828            
3829             BTW: It might be that this is cosmetic only. I do not know any bad
3830             effect - but I did not try to use any tool with visual feedback on
3831             the currently active sub-layout of keyboard.
3832            
3833             =head2 Double bug in F with dead characters above 0x0fff
3834            
3835             This line in F<.klc> file is treated correctly by F's builtin keyboard tester:
3836            
3837             39 SPACE 0 0020 00a0@ 0020 2009@ 200a@ // ,  , ,  ,   // SPACE, NO-BREAK SPACE, SPACE, THIN SPACE, HAIR SPACE
3838            
3839             However, via F it produces the following two bugs:
3840            
3841             static ALLOC_SECTION_LDATA MODIFIERS CharModifiers = {
3842             &aVkToBits[0],
3843             7,
3844             {
3845             // Modification# // Keys Pressed
3846             // ============= // =============
3847             0, //
3848             1, // Shift
3849             2, // Control
3850             SHFT_INVALID, // Shift + Control
3851             SHFT_INVALID, // Menu
3852             SHFT_INVALID, // Shift + Menu
3853             3, // Control + Menu
3854             4 // Shift + Control + Menu
3855             }
3856             };
3857             .....................................
3858             {VK_SPACE ,0 ,' ' ,WCH_DEAD ,' ' ,WCH_LGTR ,WCH_LGTR },
3859             {0xff ,0 ,WCH_NONE ,0x00a0 ,WCH_NONE ,WCH_NONE ,WCH_NONE },
3860             .....................................
3861             static ALLOC_SECTION_LDATA LIGATURE2 aLigature[] = {
3862             {VK_SPACE ,6 ,0x2009 ,0x2009 },
3863             {VK_SPACE ,7 ,0x200a ,0x200a },
3864            
3865             Essentially, C<2009@ 200a@> produce C (= multiple 16-bit chars)
3866             instead of deadkeys. Moreover, these ligatures are put on non-existing
3867             "modifications" 6, 7 (the maximal modification defined is 4; so the code uses
3868             the C flags instead of "modification number" in
3869             the ligatures table.
3870            
3871             =head2 F keyboards handle C, C , C and C differently than US keyboard
3872            
3873             The US keyboard produces (as the
3874             “string value”) the corresponding Control-letter when
3875             C is pressed. (In console applications,
3876             C<\x00> is not visible.) F does not reproduces this
3877             behaviour. This may break an application if
3878             it was not specifically tested with “complicated” keyboards.
3879            
3880             The only way to fix this from the “naive” keyboard
3881             layout DLL (i.e., the kind that F generates) which I found is to
3882             explicitly include C as a handled combination, and return
3883             C on such keypresses. (This is enabled in the generated
3884             keyboards generated by this module - not customizable in v0.12.)
3885            
3886             =head2 "There was a problem loading the file" from F
3887            
3888             Make line endings in F<.klc> DOSish.
3889            
3890             =head2 C do not work
3891            
3892             Make line endings in F<.klc> DOSish (when given as input to F -
3893             it gives no error messages, and deadkeys work [?!]).
3894            
3895             =head2 Error 2011 (ooo-us, line 33): There are not enough columns in the layout list.
3896            
3897             The maximal line end of F is exceeded (a line or two ahead). Try remoing
3898             inline comments. If helps, change he workflow to cut off long lines (250 bytes is OK).
3899            
3900             =head2 C
3901            
3902            
3903            
3904             from F. This means that the internal table of virtual keys
3905             mapped to non-C (sic!) scancodes is overloaded.
3906            
3907             Time to switch to direct generation of F<.c> file? Or you need to
3908             triage the “added” virtual keys, and decide which are less important
3909             so you can delete them from the F<.klc> file.
3910            
3911             =head2 Only the first 8 with-modifiers columns are processed by F
3912            
3913             Time to switch to direct generation of F<.c> file?
3914            
3915             =head2 Only the first digit of the which-modifier-column is output by F in C
3916            
3917             Time to switch to direct generation of F<.c> file?
3918            
3919             =head2 F produces C section with meaningless entries for prefix keys C<0x08>, C<0x0A>, C<0x0D>
3920            
3921             These entries do not stop keyboard from working. They look like C...
3922            
3923             Time to switch to direct generation of F<.c> file?
3924            
3925             =head2 It is not clear how to compile F<.C> files emitted by F
3926            
3927             This distribution includes a script F which can do this. It is
3928             inspired by
3929            
3930             http://stackoverflow.com/questions/3360746/how-can-i-compile-programmer-dvorak
3931             http://levicki.net/articles/tips/2006/09/29/HOWTO_Build_keyboard_layouts_for_Windows_x64.php
3932            
3933             It allows us to build using the cycle
3934            
3935             =over 4
3936            
3937             =item *
3938            
3939             Build skeleton F<.klc> file.
3940            
3941             =item *
3942            
3943             Convert to B using F.
3944            
3945             =item *
3946            
3947             Patch against bugs in F.
3948            
3949             =item *
3950            
3951             Patch in features not supported by F.
3952            
3953             =item *
3954            
3955             Compile and link DLLs.
3956            
3957             =back
3958            
3959             (This assumes that the installer was already built by F using a
3960             “simplified-to-nothing” F<.klc> file which does not trigger the F bugs).
3961            
3962             (See also L.)
3963            
3964             =head2 F cannot ignore column=15 of the keybinding definition table
3965            
3966             (Compare with L<"Windows ignores column=15 of the keybinding definition table">.)
3967            
3968             F requires that all the columns are associated to a modifier-bitmap.
3969             But column=15 should not be associated to any.
3970            
3971             The workaround is to associate it to the bitmap which should not be bound to any
3972             column (like C<4=KBDALT>). In the output C<.C> file, one would have 15 instead
3973             of C for the bitmap 4, but C is defined to be 15 anyway…
3974            
3975             =head2 F ignores bits above 0x20 in the modification columns descriptor
3976            
3977             Time to switch to direct generation of F<.C> files?
3978            
3979             =head2 F cannot assign more than one bitmask to a modification column
3980            
3981             Time to switch to direct generation of F<.C> files?
3982            
3983             (Quite often, one combination of modifiers should produce the same characters as
3984             another one. The format of keyboard layout tables allows them to share a
3985             modification column. The format of F<.klc> files does not allow sharing.)
3986            
3987             =head2 F forgets to emit C/6/8
3988            
3989             If the F<.klc> file has many modification columns, the emitted aVkToWcharTable
3990             contains only C/2.
3991            
3992             =head2 F confuses LIGATURES on unusual keys
3993            
3994             For example, C may be replaced by C in the LIGATURES table.
3995            
3996             Time to switch to direct generation of F<.C> files?
3997            
3998             =head2 F places C at end of the generated F<.c> file
3999            
4000             The offset of this structure should be no more than 0x10000. Thus keyboards
4001             with large tables of prefixed keys may fail to load. This may be related to
4002             the bug L<"If data in C takes too much space, keyboard is mis-installed, and “Language Bar” goes crazy">.
4003            
4004             Time to switch to direct generation of F<.C> files?
4005            
4006             =head2 Error "the required resource DATABASE is missing" from F
4007            
4008             The localized C in F<.klc> file contains a character outside of
4009             the repertoir of the codepage in question. Removing offending characters, or
4010             removing the C altogether should fix this. (But either way, the name of
4011             layout in the C of the Language Bar may become empty.) Having a
4012             different localized description has a side effect that the name of the layout
4013             shown in the Language Bar popups is localized.
4014            
4015             (The localized description is what put into the C of the
4016             DLL file; it is this resource which is mentioned in the registry. (There
4017             will be no such resource when the localized C is missing.)
4018            
4019             (The failure of F is not reproducible after a reboot!)
4020            
4021             Apparently, this has nothing to do with the length, so the (older) conjectures
4022             below are wrong (although the F<.RC> file generated by MSKLC has the [non-localized] name
4023             truncated after 40 chars in the field C — but not in other fields):
4024            
4025             It looks like there is a buffer overflow in MSKLC, and sometimes the generated
4026             F in the install package would just exit with this error. The
4027             apparent reason is the length of the C-like fields.
4028            
4029             Workaround: it looks like the C field is not used in F.
4030             So generate an “extra dummied” F<.klc> file I (with shortened descriptions),
4031             make an install package from it, and mix the F from the “extra
4032             dummied” variant with the rest of the install package from a
4033             “less dummied” F<.klc> file.
4034            
4035             The alternative is to get rid of F completely, and ask users
4036             to run the appropriate F<.msi> file from the install package by hand
4037             (choosing basing on 32-bit vs 64-bit architecture).
4038            
4039             =head2 Summary of the productive workflow with F<.klc>:
4040            
4041             If direct generation of F<.C> files is out of question, the following workflow
4042             may be used (some of these steps may be omitted depending on how complicated
4043             your F<.klc> layout is; for practical implementation, see
4044             L creation|http://cpansearch.perl.org/src/ILYAZ/UI-KeyboardLayout/examples/build-iz.pl>
4045             and L to F<.dll>
4046             processing|http://cpansearch.perl.org/src/ILYAZ/UI-KeyboardLayout/examples/build_here.cmd>):
4047            
4048             =over 4
4049            
4050             =item
4051            
4052             Make an “extra dummied” F<.klc> (short descriptions, short dummy C,
4053             C, C, C sections, no C section). Run
4054             it through GUI MSKLC (C, then C, C).
4055             Store the generated F, rename the directory.
4056            
4057             =item
4058            
4059             Make a “less dummied” F<.klc> file (as above, but with the correct description).
4060             Do as above, and mix in the F from the previous step.
4061            
4062             =item
4063            
4064             Run the “real” F<.klc> file through the F CLI. Fix errors in the
4065             generated F<.C> and F<.H> files (using scripts and patches if needed).
4066            
4067             (One may need to remove a few lines in the C section to avoid buffer overflows too.)
4068            
4069             =item
4070            
4071             Compile the fixed F<.C> files. (One may need to split them in two to
4072             decrease the offset of the static table in the DLL to the level
4073             Windows can handle: less than 64K.) Mix the generated F<.dll> files
4074             with the install package made above.
4075            
4076             =back
4077            
4078             =head1 WINDOWS GOTCHAS for application developers (problems in kernel)
4079            
4080             =head2 Many applications need to know the state of hidden flag C
4081            
4082             To decide what to do with a keypress, an application may need to know
4083             whether C is enabled in the keyboard (in other words, if
4084             C is faked when C is pressed). For example, when
4085             the kernel processes accelerators, it would not trigger C
4086             if C was pressed with C in the presence of this flag — even
4087             though C I visible as being pressed (one needs to press
4088             C).
4089            
4090             An application with configurable bindings may need to emulate this action
4091             of TranslateMessage(). One of the ways to do this may be to do (when
4092             C and C are down)
4093            
4094             =over 4
4095            
4096             =item *
4097            
4098             Set a global flag disabling processing of C in the application;
4099            
4100             =item *
4101            
4102             Call TranslateAccelerator() with an improbably virtual key (C or
4103             some such) and appropriate ad hoc translation table;
4104            
4105             =item *
4106            
4107             Check whether accelerator was recognized (if so, C is not enabled).
4108            
4109             =back
4110            
4111             Possible problems with this approach: the “improbable key” should better not
4112             trigger some system accelerator (this is why one should not use “ordinary”
4113             keys). Additionally, some system accelerators react on Windows key as a
4114             modifier; so acceleration table may specify this as a certain flag. This
4115             would imply that the algorithm above may not work when C key is
4116             down. (Not tested.)
4117            
4118             (Or maybe these C bindings are not accelerators, and are
4119             processed in a different part of keyboard input events. — Then there is
4120             little to worry about.)
4121            
4122             =head1 WINDOWS GOTCHAS for keyboard developers (problems in kernel)
4123            
4124             =head2 It is hard to understand what a keyboard really does
4125            
4126             To inspect the output of the keyboard in the console mode (may be 8-bit,
4127             depending on how Perl is compiled), one can run
4128            
4129             perl -MWin32::Console -wle 0 || cpan install Win32::Console
4130             perl -we "sub mode2s($){my $in = shift; my @o; $in & (1<<$_) and push @o, (qw(rAlt lAlt rCtrl lCtrl Shft NumL ScrL CapL Enh ? ??))[$_] for 0..10; qq(@o)} use Win32::Console; my $c = Win32::Console->new( STD_INPUT_HANDLE); my @k = qw(T down rep vkey vscan ch ctrl); for (1..20) {my @in = $c->Input; print qq($k[$_]=), ($in[$_] < 0 ? $in[$_] + 256 : $in[$_]), q(; ) for 0..$#in; print(@in ? mode2s $in[-1] : q(empty)); print qq(\n)}"
4131            
4132             This installs Win32::Console module (if needed; included with ActiveState Perl)
4133             then reports 20 following console events (press and keep C key
4134             to exit by generating a “harmless” chain of events). B the reported
4135             input character is not processed (via ToUnicode(); hence chained keys and
4136             multiple chars per key are reported only as low-level), and is reported as
4137             a signed 8-bit integer (so the report for above-8bit characters is
4138             completely meaningless).
4139            
4140             T=1; down=1; rep=1; vkey=65; vscan=30; ch=240; ctrl=9; rAlt lCtrl
4141             T=1; down=0; rep=1; vkey=65; vscan=30; ch=240; ctrl=9; rAlt lCtrl
4142            
4143             This reports single (T=1) events for keypress/keyrelease (down=1/0) of
4144             C. One can see that C generates C modifiers
4145             (this is just a transcription of C,
4146             that C is on virtual key 65 (this is C) with virtual scancode
4147             30, and that the generated character (it was C<æ>) is C<240>.
4148            
4149             The character is approximated to the current codepage. For example, this is
4150             C entering C<β = U+03b2> in codepage C:
4151            
4152             T=1; down=1; rep=1; vkey=66; vscan=48; ch=223; ctrl=0;
4153             T=1; down=0; rep=1; vkey=66; vscan=48; ch=223; ctrl=0;
4154            
4155             Note that C<223 = 0xDF>, and C. So I is substituted by
4156             I.
4157            
4158             There is also a script F in this distribution
4159             which does a little
4160             bit more than this. One can also give this script the argument C (or C,
4161             where C is the 0-based number among the listed keyboard layouts) to report
4162             ToUnicode() results, or argument C to report what is produced by reading raw
4163             charactes (as opposed to events) from the console.
4164            
4165             =head2 It is not documented how to make a with-prefix-key(s) combination produce 0-length string
4166            
4167             Use C<0000@> (in F<.klc>), or DEADKEY 0 in a F<.c> file. Explanation: what a prefix key
4168             is doing is making the kernel remember a word (the state of the finite automaton), and not
4169             producing any output character. Having no prefix key corresponds to the state being 0.
4170            
4171             Hence makeing prefix_key=0 is the same as switching the finite automaton to the initial
4172             state, and not producing any character — and this exactly what is requested in the question.
4173            
4174             =head2 If data in C takes too much space, keyboard is mis-installed, and “Language Bar” goes crazy
4175            
4176             Installation reports success, the keyboard appears in the list in the Language Bar's "Settings".
4177             But the keyboard is not listed in the menu of the Language Bar itself. (This is not fixed
4178             by a reboot.)
4179            
4180             Deinstalling (by F's installer) in such a case removes one (apparently, the last) of the listed keyboards for the language;
4181             at least it is removed from the menu of the Language Bar itself. However, the list in the “Settings”
4182             does not change! One can't restore the (wrongly) removed (unrelated!) layout by manipulating the latter list.
4183             (I did not try to check what will happen if only one keyboard for the language is available — is it removed
4184             for good?) I condition is fixed by a reboot: the “missing” “unrelated” layout jumps to existence.
4185            
4186             I did not find a way to restore the deleted keyboard layout (without a reboot). Experimenting with these is kinda painful:
4187             with each failure,
4188             I add one extra keyboard to the list in the “Settings”; - so the list is growing and growing! [Better
4189             add useless-to-you keyboards, since until the reboot you will never be able to install them again.]
4190            
4191             B this condition reappeared in update from v0.61 to v0.63 of B layouts. Between
4192             these versions, there was
4193             a very small increment of the size: one modification column was added, and two deadkeys were added.
4194             Removing a bunch of (useless?) dead keys descriptions fixed this again; but now I have my doubts on
4195             whether it was due to I increasing the size of C… Maybe it is due to the total
4196             size of certain segments in the DLL.
4197            
4198             (This may be related to the bug L<"F places C at end of the generated F<.c> file">.)
4199            
4200             =head2 Windows ignores column=15 of the keybinding definition table
4201            
4202             Note that 15 is C; this column number is used to indicate that
4203             this particular combination of modifiers does not produce keys. In particular,
4204             the generator must avoid this column number.
4205            
4206             Workaround: put junk into this column, and use different columns for useful modifier
4207             combinations. The mapping from modifiers to columns should not be necessarily 1-to-1.
4208             (But see L<"F cannot ignore column=15 of the keybinding definition table">.)
4209            
4210             =head2 Windows combines modifier bitmaps for C, C and C on C
4211            
4212             (At least when C is special in the keyboard,) the modifier bitmap bound to this
4213             key is actually bit-or of bitmaps above. Essentially, this prohibits assigning
4214             interesting flag combinations to C.
4215            
4216             The (very limited) workaround is to ensure that the flags one puts on C contain
4217             all the flags assigned to the above VK codes. (This does not change anything, but
4218             at least makes the assignments less confusing for human inspection.)
4219            
4220             =head2 Windows ignores C if its modifier bitmaps is not standard
4221            
4222             Adding C to C disables console sending non-modified char on keydown.
4223             Together with the previous problem, this looks like essentially prohibiting
4224             putting interesting bitmaps on the left modifier keys.
4225            
4226             Workaround: one can add C on C. It looks like the combination
4227             C is compatible with Windows' handling of C (both in console,
4228             and for accessing/highlighting the menu entries). (However, since only C
4229             is going to be stripped for handling of C, the modification column for
4230             C should duplicate the modification column for no-C-flags. Same with
4231             C added.)
4232            
4233             =head2 When C produces C, problems in Notepad
4234            
4235             Going to the Save As dialogue in Notepad loses "speciality of AltGr" (it highlights Menu);
4236             one need to switch layouts via LAlt+LShift to restore.
4237            
4238             I do not know any workaround.
4239            
4240             =head2 Console applications cannot detect when a keypress may be interpreted as a “command”
4241            
4242             The typical logic of an (advanced) application is that it interprets certain keypresses
4243             (combinations of keys with modifiers) as “commands”. To do this in presence of user-switchable
4244             keyboards, when it is not known in compile time which key sequences generate characters,
4245             the application must be able to find at runtime which keypresses are characters-generating,
4246             and which are not. The latter keypresses are candidates to be checked whether they should trigger commands
4247             of the application.
4248            
4249             For final keypresses of a character-generating key-sequence, an application gets a notification
4250             from the ReadConsoleEvent() API call that this keypress generates a character. However, for the
4251             keypresses of the sequence which are non the last one (“dead” keys), there is no such notification.
4252            
4253             Therefore, there is no way to avoid dead keys triggering actions in an application. What is the
4254             difference with non-console applications? First of all, they get such a notification (with the
4255             standard TranslateMessage()/DispatchMessage() sequence of API calls, on WM_KEYDOWN, one can
4256             PeekMessage() for WM_SYSDEADCHAR/WM_DEADCHAR and/or WM_SYSCHAR/WM_CHAR). Second, the windowed
4257             application may call ToUnicode(Ex)() to calculate this information itself.
4258            
4259             Well, why a console application cannot use the second method? First, the active keyboard layout
4260             of a console application is the default one. When user switches the keyboard layout of the console,
4261             the application gets no notification of this, and its keyboard layout does not change. This makes
4262             ToUnicode() useless. Moreover, due to
4263             security architecture, the console application cannot query the ID of the thread serving the message
4264             loop of the console, so cannot query GetKeyboardLayout() of this thread. Hence ToUnicodeEx() is
4265             useless too.
4266            
4267             (There may be a lousy workaround: run ToUnicodeEx() on B the installed keyboard layouts, and
4268             check which of them are excluded by comparing with results of ReadConsoleEvent(). Interpret
4269             contradictions as user changing the keyboard layout. Of course, on several keypresses following
4270             a change of keyboard layout one may get unexpected results. And if two similar
4271             keyboards are installed, one may also never get definite answer on which of them is currently active.)
4272            
4273             (To handle this workaround, one must have a way to call ToUnicode() in a way which does not change
4274             the internal state of the keyboard driver. Observe:
4275            
4276             =over 4
4277            
4278             =item *
4279            
4280             Such a way is not documented.
4281            
4282             =item *
4283            
4284             Watch the character reported by ReadConsoleEvent() on the C event for deadkeys. This is
4285             the character which a deadkey would produce if it is pressed twice (and is 0 if pressing it twice
4286             results in a deadkey again). The only explanation for this I can fathom is that the console's
4287             message queue thread calls such a non-disturbing-state version of ToUnicode().
4288            
4289             Why it should be “non-disturbing”? Otherwise it would reset the state “this deadkey was pressed”,
4290             and the following keypress would be interpreted as not preceded by a deadkey. And this is not
4291             what happens. (If one does it with usual ToUnicode() call, DOWN reports a deadkey, but UP reports
4292             “ignored”; to see this, run F with arguments C
4293             with a keyboard which produces ç on C. Here C is the number of the keyboard in the list
4294             of available keyboards reported by C).
4295            
4296             Well, when one I that some API calls are possible, it is just a SMP to find it out
4297             (see F). It turns out that given argument C achieves
4298             the behaviour of a console during KeyUp event. (As a side benefit, it also avoids another
4299             glitch in Windows' keyboard processing: it reports the character value in presence of C
4300             modifier — recall that ToUnicodeEx() ignores C unless C is present too. Well, I
4301             checked this so far only on KeyUp event, where console producess mysterious results.)
4302            
4303             =item *
4304            
4305             However, even without using undocumented flags, it is not hard to construct such a non-disturbing version of ToUnicode(). The only
4306             ingredient needed is a way to reset the state to “no deadkeys pressed” one. Then just store
4307             keypresses/releases from the time the last such state was found, call ToUnicode(), reset state,
4308             and call ToUnicode() again for all the stored keypresses/releases; then update the stored state
4309             appropriately.
4310            
4311             =item *
4312            
4313             But I strongly doubt that console's message loop does anything so advanced. My bet would be that
4314             it uses a non-documented call or non-documented flags. (Especially since the approach above does
4315             not handle C the same way as the console does.)
4316            
4317             =back
4318            
4319             =head2 In console, which combinations of keypresses may deliver characters?
4320            
4321             In addition to the problem outlined in the preceding section, a console application should
4322             better support input of character-by-numeric-code, and of copy-and-pasted strings. Actually,
4323             the second situation, although undocumented, is well-engineered, so let us document these two
4324             here. (These two should better be documented together, since pasting may fake input by
4325             repeated character-by-numeric-code.)
4326            
4327             Pasting happens character-by-character (more precise, by UTF-16 codepoints), but C
4328             would group them together:
4329            
4330             =over 4
4331            
4332             =item *
4333            
4334             When pasting a character present in a keyboard layout with at most C modifier,
4335             a fully correct emulation of a sequence C
4336             is produced (without C if it is not needed). The character (as usual) is delivered
4337             on both C events.
4338            
4339             =item *
4340            
4341             When pasting a character present in a keyboard layout, but needing I modifiers (not
4342             only C), a partial emulation of a certain key tap is produced:
4343             C. The character (as usual) is delivered
4344             on both C events.
4345            
4346             Quirks: first, if C is needed, its press/release are not emulated, but the flags on
4347             the C events indicate presence of a C. Second (by this, the
4348             pasting may be distinguished from “real” keypress), C press/release are not emulated,
4349             but it is indicated as "present" in flags of all 4 events.
4350            
4351             =item *
4352            
4353             When pasting control-characters (available via the C-maps of the layout),
4354             the press/release of C is not emulated (but the flags indicate C downs); however,
4355             if C is needed, its press/release is emulated (and flags for I events do not
4356             have C is down).
4357            
4358             Pasting C delivers only U+000D (CR) — the typical maps have it on C and C<^M>,
4359             and C is delivered.
4360            
4361             =item *
4362            
4363             Otherwise, an emulation of C is sent, with the C delivering a character:
4364             C. The C
4365             are very unusual combinations of scancode/vkey for C<6> and C<3> on the numeric keyboard:
4366             they are delivered as if C (or C) is down, but the flags indicate that
4367             these modifiers are "not present".
4368            
4369             The “honest” C delivers U+003f, which is "C" (as above, it is delivered on release
4370             of C).
4371            
4372             =item *
4373            
4374             In general, entering characters-by-numeric-code (entering the decimal — or “KP+” then hex — while
4375             C is down) produces the resulting character when C is released. Processing this may create
4376             a significant problem for applications which interpret C as “commands” (e.g., if
4377             they interpret C as “word-left”).
4378            
4379             There may several work-arounds. First, usually hex input is much more important than decimal,
4380             and usually, C is not bound to commands. Then the application may ignore characters
4381             delivered on C B the C was immediately followed by the press/release
4382             of C; additionally, it should disable the interpret-as-commands logic while C is down,
4383             and its press was followed by press/release of C.
4384            
4385             Second, it is not crucial to deliver Unicode characters numbered in single-digits. So one may
4386             require that commands are triggered by C only when pressed one-by-one (releasing
4387             C between them), and consider multi-digit presses as input-by-number only.
4388            
4389             Finally, Windows aborts entering character-by-numeric-code if any unexpected key press interferes.
4390             For example, C is “C”, but pressing-releasing C after pressing down C
4391             would not deliver anything. If an application follows the same logic (in reverse!) when recognizing
4392             keypressing resulting in “commands”, the users would have at least a “technical ability” to enter
4393             both commands, I enter characters-by-numeric-code.
4394            
4395             =back
4396            
4397             This is tested I in the situation when a layout has C present, and all the
4398             "with-extra-modifiers" characters are on bitmap entries with C bit marked. This is
4399             a situation with discussed in the section L<"A convenient assignment of C bitmaps to modifier keys">.
4400            
4401             It is plausible that only C, C and C bits in a bitmap returned by C are
4402             acted upon (with C flag added based on C). Some popular keyboard layouts
4403             use C bit on the C key; under this assumption, the characters available via C key
4404             would be delivered with at most C modifier.
4405            
4406             All the emulated events do not have C indicated as "present" in their flags.
4407            
4408             =head2 Behaviour of C vs C
4409            
4410             When both combinations produce characters (say, X and Y), it is not clear
4411             how an application shouild decide whether it got C event (for menu
4412             entry starting with Y), or an C event.
4413            
4414             A partial workaround (if the semantic of the layout fits into the limited number
4415             of bits in the ORed mask): make all the keys which may be combined with
4416             C to have the C bit in the mask set; add some extra bit
4417             to C keys to be able to distinguish them. Then at least the
4418             kernel will produce the correct character on the ToUnicode() call (hence
4419             in TranslateMessage()). [A potential that an application may be confused
4420             is still large.]
4421            
4422             =head2 Customization of what C is doing is very limited
4423            
4424             (See the description of the semantic of C in L<"Keyboard input on Windows, Part II: The semantic of ToUnicode()">.)
4425            
4426             A partial workaround (if the semantic of the layout fits into the limited number
4427             of bits in the ORed mask): make all the modifier combinations (except for the
4428             base layer) to have C and C bits set; add some extra bits to
4429             C keys and C keys (apparently, only C will work with C)
4430             to be able to distinguish them. Then the C flag will affect all
4431             these combinations too.
4432            
4433             =head2 C combination: multiple problems
4434            
4435             First of all, sometimes C is ignored when used with this combination.
4436             (Fixed by reboot. When this happens, C does not work also with combinations
4437             with C and/or C). On the
4438             other hand, C works as intended. (I even got an impression that
4439             sometimes C works when C is active; cannot reproduce this,
4440             though.)
4441            
4442             I suspect this is related to the binding (usually not active) of C to switch between
4443             keyboards of a language. It may have suddently jumped to existence (without my interaction).
4444             Simultaneously, this option disappeared from the UI to change keyboard options
4445             (L in Windows 7). It might be that
4446             press/release of C is filtered out in presence of C? (Looks
4447             like this for C now...)
4448            
4449             (I also saw what looks like C key being stuck in some rare situations — fixed
4450             by pressing it again. Do not know how to reproduce this. It is interesting to
4451             note that one of the bits in the mask of the C key is 0x80, and there is
4452             a define for this bit in F named C — but it is undocumented,
4453             and, judging by names, one might think that C would work in pair with the flag
4454             C of CAttributes>.)
4455            
4456             B Apparently, key up/down for many combinations of C are
4457             not delivered to applications.
4458             Key up/down for C<`/5/6/-/=/Z/X/C/V/M/,/./Enter/rShift> are not delivered here when used with C modifiers
4459             (at least in a console). Adding C does not change this. Same for C
4460             and C (but not for keypad ones!).
4461            
4462             Moreover, when used with C or C, this behaves as if both these
4463             keys were pressed. Same with the pair C and C (is it hardware-dependent???).
4464            
4465             (Time to time C do not work — neither with nor without C.)
4466            
4467             No workarounds are known. Although I could reproduce this on 3 physically different
4468             keyboards, this is, most probably, a design defect of hardware keyboards. Compare with
4469             L and
4470             L.
4471             Another related tidbit: apparently, L
4472             after pressing some modifier keys|http://ccm.net/forum/affich-24692-keyboard-mess-up-after-shift-key-held-too-lon?page=2>
4473            
4474             =head2 C combination: many keys are not delivered to applications
4475            
4476             Apparently, key up/down for many combinations of C are
4477             not delivered to applications.
4478             For example, C and C — neither with nor without C; same
4479             for C (at least in a console). Adding C
4480             does not change this. Same for C.
4481            
4482             No workarounds are known (except that C and C (without C)
4483             may be replaced by C and C).
4484            
4485             B in the bottom row of the keyboard, all the keys (except C) are
4486             either in the list above, or in the list for C modifiers. See also the
4487             references in the discussion of the previous problem (with C).
4488            
4489             =head2 Too long C of the layout is not shown in Language Bar Settings
4490            
4491             (the description is shown in the Language Bar itself). The examples are (behave the same)
4492            
4493             Greek-QWERTY (Pltn) Grn=⇑␣=^ˡⒶˡ-=Lat; Ripe=Ⓐʳ␣=Mnu-=Rus(Phon); Ripe²=Mnu-^ʳ-=Hbr; k.ilyaz.org
4494             US-Intl Grn=⇑␣=^ˡⒶˡ-=Grk; Ripe=Ⓐʳ␣=Mnu-=Rus(Phon); Ripe²=Mnu-^ʳ-=Hbr; k.ilyaz.org
4495            
4496             (Or maybe it is the semicolons in the names???). If this happens, one can still assign
4497             distinctive icons to the layout, and distinguish them via going to C.
4498            
4499             =head1 UNICODE TABLE GOTCHAS
4500            
4501             The position of Unicode consortium is, apparently, that the “name” of
4502             a Unicode character is “just an identifier”. In other words, its
4503             (primary) function is to identify a character uniquely: different
4504             characters should have different names, and that's it. Any other function
4505             is secondary, and “if it works, fine”; if it does not work, tough luck.
4506             If the name does not match how people use the character (and with the
4507             giant pool of defined characters, this has happened a few times), this is not
4508             a reason to abandon the name.
4509            
4510             This position makes the practice of maintaining backward compatibility
4511             easy. There is L.
4512            
4513             However, this module tries to extract a certain amount of I
4514             from the giant heap of characters defined in Unicode; the principal concept
4515             is “a mutator”. Most mutators are defined by programmatic inspection of names
4516             of characters and relations between names of different characters. (In other
4517             words, we base such mutators on names, not glyphs.) Here we
4518             sketch the irregularities uncovered during this process.
4519            
4520             APL symbols with C and C look reverted w.r.t. other
4521             C and C symbols.
4522            
4523             C, C, C, C C, C
4524             are defined with C or C at end, but (may) drop it when combined
4525             with modifiers via C. Likewise for C, C,
4526             C, C, C, C.
4527            
4528             Sometimes opposite happens, and C appears out of blue sky; compare:
4529            
4530             2A18 INTEGRAL WITH TIMES SIGN
4531             2A19 INTEGRAL WITH INTERSECTION
4532            
4533             C I a combination of C with C, but it is not marked as such
4534             in its name.
4535            
4536             Sometimes a name of diacritic (after C) acquires an C at end
4537             (see C).
4538            
4539             Oftentimes the part to the left of C is not resolvable: sometimes it
4540             is underspecified (e.g, just C), sometimes it is overspecified
4541             (e.g., in C), sometime it should be understood
4542             as a glyph-of-written-word (e.g, in C). Sometimes it just
4543             does not exist (e.g., C -
4544             there is C, but not the reversed variant).
4545             Sometimes it is a defined synonym (C).
4546            
4547             Sometimes it has something appended (C).
4548            
4549             Sometimes C is just a clarification (C).
4550            
4551             1 AND
4552             1 ANTENNA
4553             1 ARABIC MATHEMATICAL OPERATOR HAH
4554             1 ARABIC MATHEMATICAL OPERATOR MEEM
4555             1 ARABIC ROUNDED HIGH STOP
4556             1 ARABIC SMALL HIGH LIGATURE ALEF
4557             1 ARABIC SMALL HIGH LIGATURE QAF
4558             1 ARABIC SMALL HIGH LIGATURE SAD
4559             1 BACK
4560             1 BLACK SUN
4561             1 BRIDE
4562             1 BROKEN CIRCLE
4563             1 CIRCLED HORIZONTAL BAR
4564             1 CIRCLED MULTIPLICATION SIGN
4565             1 CLOSED INTERSECTION
4566             1 CLOSED LOCK
4567             1 COMBINING LEFTWARDS HARPOON
4568             1 COMBINING RIGHTWARDS HARPOON
4569             1 CONGRUENT
4570             1 COUPLE
4571             1 DIAMOND SHAPE
4572             1 END
4573             1 EQUIVALENT
4574             1 FISH CAKE
4575             1 FROWNING FACE
4576             1 GLOBE
4577             1 GRINNING CAT FACE
4578             1 HEAVY OVAL
4579             1 HELMET
4580             1 HORIZONTAL MALE
4581             1 IDENTICAL
4582             1 INFINITY NEGATED
4583             1 INTEGRAL AVERAGE
4584             1 INTERSECTION BESIDE AND JOINED
4585             1 KISSING CAT FACE
4586             1 LATIN CAPITAL LETTER REVERSED C
4587             1 LATIN CAPITAL LETTER SMALL Q
4588             1 LATIN LETTER REVERSED GLOTTAL STOP
4589             1 LATIN LETTER TWO
4590             1 LATIN SMALL CAPITAL LETTER I
4591             1 LATIN SMALL CAPITAL LETTER U
4592             1 LATIN SMALL LETTER LAMBDA
4593             1 LATIN SMALL LETTER REVERSED R
4594             1 LATIN SMALL LETTER TC DIGRAPH
4595             1 LATIN SMALL LETTER TH
4596             1 LEFT VERTICAL BAR
4597             1 LOWER RIGHT CORNER
4598             1 MEASURED RIGHT ANGLE
4599             1 MONEY
4600             1 MUSICAL SYMBOL
4601             1 NIGHT
4602             1 NOTCHED LEFT SEMICIRCLE
4603             1 ON
4604             1 OR
4605             1 PAGE
4606             1 RIGHT ANGLE VARIANT
4607             1 RIGHT DOUBLE ARROW
4608             1 RIGHT VERTICAL BAR
4609             1 RUNNING SHIRT
4610             1 SEMIDIRECT PRODUCT
4611             1 SIX POINTED STAR
4612             1 SMALL VEE
4613             1 SOON
4614             1 SQUARED UP
4615             1 SUMMATION
4616             1 SUPERSET BESIDE AND JOINED BY DASH
4617             1 TOP
4618             1 TOP ARC CLOCKWISE ARROW
4619             1 TRIPLE VERTICAL BAR
4620             1 UNION BESIDE AND JOINED
4621             1 UPPER LEFT CORNER
4622             1 VERTICAL BAR
4623             1 VERTICAL MALE
4624             1 WHITE SUN
4625             2 CLOSED MAILBOX
4626             2 CLOSED UNION
4627             2 DENTISTRY SYMBOL LIGHT VERTICAL
4628             2 DOWN-POINTING TRIANGLE
4629             2 HEART
4630             2 LEFT ARROW
4631             2 LINE INTEGRATION
4632             2 N-ARY UNION OPERATOR
4633             2 OPEN MAILBOX
4634             2 PARALLEL
4635             2 RIGHT ARROW
4636             2 SMALL CONTAINS
4637             2 SMILING CAT FACE
4638             2 TIMES
4639             2 TRIPLE HORIZONTAL BAR
4640             2 UP-POINTING TRIANGLE
4641             2 VERTICAL KANA REPEAT
4642             3 CHART
4643             3 CONTAINS
4644             3 TRIANGLE
4645             4 BANKNOTE
4646             4 DIAMOND
4647             4 PERSON
4648             5 LEFTWARDS TWO-HEADED ARROW
4649             5 RIGHTWARDS TWO-HEADED ARROW
4650             8 DOWNWARDS HARPOON
4651             8 UPWARDS HARPOON
4652             9 SMILING FACE
4653             11 CIRCLE
4654             11 FACE
4655             11 LEFTWARDS HARPOON
4656             11 RIGHTWARDS HARPOON
4657             15 SQUARE
4658            
4659             perl -wlane "next unless /^Unresolved: <(.*?)>/; $s{$1}++; END{print qq($s{$_}\t$_) for keys %s}" oxx-us2 | sort -n > oxx-us2-sorted-kw
4660            
4661             C specify fill - not combining. C is not combining, same for Cs.
4662            
4663             Only C is combining. Triangle is combining only with underbar and dot above.
4664            
4665             C means C. C - C (so do many others.)
4666             C means C; but C means C - go figure!
4667             C is not a decomposition (it is "something circled").
4668            
4669             Another way of compositing is C (but not C!) and C. See also C, C
4670             - but only C. Avoid C after these.
4671            
4672             C should replace C. C means C, same for C.
4673             C means C - actually just a bug - http://www.reddit.com/r/programming/comments/fv8ao/unicode_600_standard_published/?
4674             C means C. C means C.
4675             C means C. C means C.
4676            
4677             C means C. C looks genuinely missing...
4678            
4679             C means one of two, left or right???
4680            
4681             This better be convertible by rounding/sharpening mutators, but see
4682             C
4683            
4684             2268 LESS-THAN BUT NOT EQUAL TO; 1.1
4685             2269 GREATER-THAN BUT NOT EQUAL TO; 1.1
4686             228A SUBSET OF WITH NOT EQUAL TO; 1.1
4687             228B SUPERSET OF WITH NOT EQUAL TO; 1.1
4688             @ Relations
4689             22E4 SQUARE IMAGE OF OR NOT EQUAL TO; 1.1
4690             22E5 SQUARE ORIGINAL OF OR NOT EQUAL TO; 1.1
4691             @@ 2A00 Supplemental Mathematical Operators 2AFF
4692             @ Relational operators
4693             2A87 LESS-THAN AND SINGLE-LINE NOT EQUAL TO; 3.2
4694             x (less-than but not equal to - 2268)
4695             2A88 GREATER-THAN AND SINGLE-LINE NOT EQUAL TO; 3.2
4696             x (greater-than but not equal to - 2269)
4697             2AB1 PRECEDES ABOVE SINGLE-LINE NOT EQUAL TO; 3.2
4698             2AB2 SUCCEEDS ABOVE SINGLE-LINE NOT EQUAL TO; 3.2
4699             2AB5 PRECEDES ABOVE NOT EQUAL TO; 3.2
4700             2AB6 SUCCEEDS ABOVE NOT EQUAL TO; 3.2
4701             @ Subset and superset relations
4702             2ACB SUBSET OF ABOVE NOT EQUAL TO; 3.2
4703             2ACC SUPERSET OF ABOVE NOT EQUAL TO; 3.2
4704            
4705             Looking into v6.1 reference PDFs, 2268,2269,2ab5,2ab6,2acb,2acc have two horizontal bars,
4706             228A,228B,22e4,22e5,2a87,2a88,2ab1,2ab2 have one horizontal bar, Hence C and C
4707             are equivalent; so are C, C, C
4708             and C. (Square variants come only with one horizontal line?)
4709            
4710            
4711             Set C<$ENV{UI_KEYBOARDLAYOUT_UNRESOLVED}> to enable warnings. Then do
4712            
4713             perl -wlane "next unless /^Unresolved: <(.*?)>/; $s{$1}++; END{print qq($s{$_}\t$_) for keys %s}" oxx | sort -n > oxx-sorted-kw
4714            
4715             =head1 SEE ALSO
4716            
4717             The keyboard(s) generated with this module: L, L
4718            
4719             On diacritics:
4720            
4721             http://www.phon.ucl.ac.uk/home/wells/dia/diacritics-revised.htm#two
4722             http://en.wikipedia.org/wiki/Tonos#Unicode
4723             http://en.wikipedia.org/wiki/Early_Cyrillic_alphabet#Numerals.2C_diacritics_and_punctuation
4724             http://en.wikipedia.org/wiki/Vietnamese_alphabet#Tone_marks
4725             http://diacritics.typo.cz/
4726            
4727             http://en.wikipedia.org/wiki/User:TEB728/temp (Chars of languages)
4728             http://www.evertype.com/alphabets/index.html
4729            
4730             Accents in different Languages:
4731             http://fonty.pl/porady,12,inne_diakrytyki.htm#07
4732             http://en.wikipedia.org/wiki/Latin-derived_alphabet
4733            
4734             On typography marks
4735            
4736             http://wiki.neo-layout.org/wiki/Striche
4737             http://www.matthias-kammerer.de/SonsTypo3.htm
4738             http://en.wikipedia.org/wiki/Soft_hyphen
4739             http://en.wikipedia.org/wiki/Dash
4740             http://en.wikipedia.org/wiki/Ditto_mark
4741            
4742             On keyboard layouts:
4743            
4744             http://en.wikipedia.org/wiki/Keyboard_layout
4745             http://en.wikipedia.org/wiki/Keyboard_layout#US-International
4746             http://en.wikipedia.org/wiki/ISO/IEC_9995
4747             http://www.pentzlin.com/info2-9995-3-V3.pdf (used almost nowhere - only half of keys in Canadian multilanguage match)
4748             http://en.wikipedia.org/wiki/QWERTY#Canadian_Multilingual_Standard
4749             http://en.wikipedia.org/wiki/Unicode_input
4750             Discussion of layout changes and position of €:
4751             https://www.libreoffice.org/bugzilla/show_bug.cgi?id=5981
4752            
4753             History of QUERTY
4754             http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/publications/PreQWERTY.html
4755             http://kanji.zinbun.kyoto-u.ac.jp/db-machine/~yasuoka/QWERTY/
4756            
4757             http://msdn.microsoft.com/en-us/goglobal/bb964651
4758             http://eurkey.steffen.bruentjen.eu/layout.html
4759             http://ru.wikipedia.org/wiki/%D0%A4%D0%B0%D0%B9%D0%BB:Birman%27s_keyboard_layout.svg
4760             http://bepo.fr/wiki/Accueil
4761             http://www.unibuc.ro/e/prof/paliga_v_s/soft-reso/ (Academic for Mac)
4762             http://cgit.freedesktop.org/xkeyboard-config/tree/symbols/ru
4763             http://cgit.freedesktop.org/xkeyboard-config/tree/symbols/keypad
4764             http://www.evertype.com/celtscript/type-keys.html (Old Irish mechanical typewriters)
4765             http://eklhad.net/linux/app/halfqwerty.xkb (One-handed layout)
4766             http://www.doink.ch/an-x11-keyboard-layout-for-scholars-of-old-germanic/ (and references there)
4767             http://www.neo-layout.org/
4768             https://commons.wikimedia.org/wiki/File:Neo2_keyboard_layout.svg
4769             Images in (download of)
4770             http://www.mzuther.de/en/contents/osd-neo2
4771             Neo2 sources:
4772             http://wiki.neo-layout.org/browser/windows/kbdneo2/Quelldateien
4773             Shift keys at center, nice graphic:
4774             http://www.tinkerwithabandon.com/twa/keyboarding.html
4775             Physical keyboard:
4776             http://www.konyin.com/?page=product.Multilingual%20Keyboard%20for%20UNITED%20STATES
4777             Polytonic Greek
4778             http://www.polytoniko.org/keyb.php?newlang=en
4779             Portable keyboard layout
4780             http://www.autohotkey.com/forum/viewtopic.php?t=28447
4781             One-handed
4782             http://www.autohotkey.com/forum/topic1326.html
4783             Typing on numeric keypad
4784             http://goron.de/~johns/one-hand/#documentation
4785             On screen keyboard indicator
4786             http://www.autohotkey.com/docs/scripts/KeyboardOnScreen.htm
4787             Keyboards of ЕС-1840/1/5
4788             http://aic-crimea.narod.ru/Study/Shen/PC/1/5-4-1.htm
4789             (http://www.aic-crimea.narod.ru/Study/Shen/PC/main.htm) Руководство пользователя ПЭВМ
4790             http://fdd5-25.net/fddforum/index.php?PHPSESSID=201bd45ab972f1ab4b440dcb6c7ca18f&topic=489.30
4791             Phonetic Hebrew layout(s) (1st has many duplicates, 2nd overweighted)
4792             http://bc.tech.coop/Hebrew-ZC.html
4793             http://help.keymanweb.com/keyboards/keyboard_galaxiehebrewkm6.php
4794             Greek (Galaxy) with a convenient mapping (except for Ψ) and BibleScript
4795             http://www.tavultesoft.com/keyboarddownloads/%7B4D179548-1215-4167-8EF7-7F42B9B0C2A6%7D/manual.pdf
4796             With 2-letter input of Unicode names:
4797             http://www.jlg-utilities.com
4798             Medievist's
4799             http://www.personal.leeds.ac.uk/~ecl6tam/
4800             Yandex visual keyboards
4801             http://habrahabr.ru/company/yandex/blog/108255/
4802             Implementation in FireFox
4803             http://mxr.mozilla.org/mozilla-central/source/widget/windows/KeyboardLayout.cpp#1085
4804             Implementation in Emacs 24.3 (ToUnicode() in fns)
4805             http://fossies.org/linux/misc/emacs-24.3.tar.gz:a/emacs-24.3/src/w32inevt.c
4806             http://fossies.org/linux/misc/emacs-24.3.tar.gz:a/emacs-24.3/src/w32fns.c
4807             http://fossies.org/linux/misc/emacs-24.3.tar.gz:a/emacs-24.3/src/w32term.c
4808             Naive implementations:
4809             http://social.msdn.microsoft.com/forums/en-US/windowssdk/thread/07afec87-68c1-4a56-bf46-a38a9c2232e9/
4810             Quality of a keyboard
4811             http://www.tavultesoft.com/keymandev/quality/whitepaper1.1.pdf
4812            
4813             Manipulating keyboards on Windows and X11
4814            
4815             http://symbolcodes.tlt.psu.edu/keyboards/winkeyvista.html (using links there: up to Win7)
4816             http://windows.microsoft.com/en-us/windows-8/change-keyboard-layout
4817             http://www.howtoforge.com/changing-language-and-keyboard-layout-on-various-linux-distributions
4818            
4819             MSKLC parser
4820            
4821             http://pastebin.com/UXc1ub4V
4822            
4823             By author of MSKLC Michael S. Kaplan (do not forget to follow links)
4824            
4825             Input on Windows:
4826             http://seit.unsw.adfa.edu.au/staff/sites/hrp/personal/Sanskrit-External/Unicode-KbdsonWindows.pdf
4827            
4828             http://blogs.msdn.com/b/michkap/archive/2006/03/26/560595.aspx
4829             http://blogs.msdn.com/b/michkap/archive/2006/04/22/581107.aspx
4830             Chaining dead keys:
4831             http://blogs.msdn.com/b/michkap/archive/2011/04/16/10154700.aspx
4832             Mapping VK to VSC etc:
4833             http://blogs.msdn.com/b/michkap/archive/2006/08/29/729476.aspx
4834             [Link] Remapping CapsLock to mean Backspace in a keyboard layout
4835             (if repeat, every second Press counts ;-)
4836             http://colemak.com/forum/viewtopic.php?id=870
4837             Scancodes from kbd.h get in the way
4838             http://blogs.msdn.com/b/michkap/archive/2006/08/30/726087.aspx
4839             What happens if you start with .klc with other VK_ mappings:
4840             http://blogs.msdn.com/b/michkap/archive/2010/11/03/10085336.aspx
4841             Keyboards with Ctrl-Shift states:
4842             http://blogs.msdn.com/b/michkap/archive/2010/10/08/10073124.aspx
4843             On assigning Ctrl-values
4844             http://blogs.msdn.com/b/michkap/archive/2008/11/04/9037027.aspx
4845             On hotkeys for switching layouts:
4846             http://blogs.msdn.com/b/michkap/archive/2008/07/16/8736898.aspx
4847             Text services
4848             http://blogs.msdn.com/b/michkap/archive/2008/06/30/8669123.aspx
4849             Low-level access in MSKLC
4850             http://levicki.net/articles/tips/2006/09/29/HOWTO_Build_keyboard_layouts_for_Windows_x64.php
4851             http://blogs.msdn.com/b/michkap/archive/2011/04/09/10151666.aspx
4852             On font linking
4853             http://blogs.msdn.com/b/michkap/archive/2006/01/22/515864.aspx
4854             Unicode in console
4855             http://blogs.msdn.com/michkap/archive/2005/12/15/504092.aspx
4856             Adding formerly "invisible" keys to the keyboard
4857             http://blogs.msdn.com/b/michkap/archive/2006/09/26/771554.aspx
4858             Redefining NumKeypad keys
4859             http://blogs.msdn.com/b/michkap/archive/2007/07/04/3690200.aspx
4860             BUT!!!
4861             http://blogs.msdn.com/b/michkap/archive/2010/04/05/9988581.aspx
4862             And backspace/return/etc
4863             http://blogs.msdn.com/b/michkap/archive/2008/10/27/9018025.aspx
4864             kbdutool.exe, run with the /S ==> .c files
4865             Doing one's own WM_DEADKEY processing'
4866             http://blogs.msdn.com/b/michkap/archive/2006/09/10/748775.aspx
4867             Dead keys do not work on SG-Caps
4868             http://blogs.msdn.com/b/michkap/archive/2008/02/09/7564967.aspx
4869             Dynamic keycaps keyboard
4870             http://blogs.msdn.com/b/michkap/archive/2005/07/20/441227.aspx
4871             Backslash/yen/won confusion
4872             http://blogs.msdn.com/b/michkap/archive/2005/09/17/469941.aspx
4873             Unicode output to console
4874             http://blogs.msdn.com/b/michkap/archive/2010/10/07/10072032.aspx
4875             Install/Load/Activate an input method/layout
4876             http://blogs.msdn.com/b/michkap/archive/2007/12/01/6631463.aspx
4877             http://blogs.msdn.com/b/michkap/archive/2008/05/23/8537281.aspx
4878             Reset to a TT font from an application:
4879             http://blogs.msdn.com/b/michkap/archive/2011/09/22/10215125.aspx
4880             How to (not) treat C-A-Q
4881             http://blogs.msdn.com/b/michkap/archive/2012/04/26/10297903.aspx
4882             Treating Brazilian ABNT c1 c2 keys
4883             http://blogs.msdn.com/b/michkap/archive/2006/10/07/799605.aspx
4884             And JIS ¥|-key
4885             (compare with http://www.scs.stanford.edu/11wi-cs140/pintos/specs/kbd/scancodes-7.html
4886             http://hp.vector.co.jp/authors/VA003720/lpproj/others/kbdjpn.htm )
4887             http://blogs.msdn.com/b/michkap/archive/2006/09/26/771554.aspx
4888             Suggest a topic:
4889             http://blogs.msdn.com/b/michkap/archive/2007/07/29/4120528.aspx#7119166
4890            
4891             Installable Keyboard Layouts - Apple Developer (“.keylayout” files; modifiers not editable; cache may create problems;
4892             to enable deadkeys in X11, one may need extra work)
4893            
4894             http://developer.apple.com/technotes/tn2002/tn2056.html
4895             http://wordherd.com/keyboards/
4896             http://stackoverflow.com/questions/999681/how-to-remap-context-menu-key-in-mac-os-x
4897             http://apple.stackexchange.com/questions/21691/ukelele-generated-custom-keyboard-layouts-not-working-in-lion
4898             http://wiki.openoffice.org/wiki/X11Keymaps
4899             http://www.tenshu.net/2012/11/using-caps-lock-as-new-modifier-key-in.html
4900             http://raw.github.com/lreddie/ukelele-steps/master/USExtended.keylayout
4901             http://scripts.sil.org/cms/scripts/page.php?item_id=keylayoutmaker
4902            
4903             ANSI/ISO/ABNT/JIS/Russian Apple’s keyboards
4904            
4905             https://discussions.apple.com/thread/1508293
4906             http://www.dtp-transit.jp/apple/mac/post_1137.html
4907             http://www.dtp-transit.jp/images/apple-keyboards-US-JIS.jpg
4908             http://m10lmac.blogspot.co.il/2007/02/fixing-brazilian-keyboard-layout.html
4909             http://www2d.biglobe.ne.jp/~msyk/keyboard/layout/mac-jiskbd.html
4910             http://commons.wikimedia.org/wiki/File:KB_Russian_Apple_Macintosh.svg
4911            
4912             JIS variations (OADG109 vs A)
4913            
4914             http://ja.wikipedia.org/wiki/JIS%E3%82%AD%E3%83%BC%E3%83%9C%E3%83%BC%E3%83%89
4915            
4916             Different ways to access chars on Mac (1ˢᵗ suggests adding a Discover via plists via Keycaps≠Strings)
4917            
4918             http://apple.stackexchange.com/questions/49565/how-can-i-expand-the-number-of-special-characters-i-can-type-using-my-keyboard
4919             http://developer.apple.com/library/mac/#documentation/cocoa/conceptual/eventoverview/TextDefaultsBindings/TextDefaultsBindings.html#//apple_ref/doc/uid/20000468-CJBDEADF
4920             http://www.hcs.harvard.edu/~jrus/Site/System%20Bindings.html Default keybindings
4921             http://www.hcs.harvard.edu/~jrus/Site/Cocoa%20Text%20System.html
4922             http://hints.macworld.com/article.php?story=2005051118320432 Mystery keys on Mac
4923             http://www.snark.de/index.cgi/0007 Patching ADB drivers
4924             http://www.snark.de/mac/usbkbpatch/index_en.html Patching USB drivers (gives LCtrl vs RCtrl etc???)
4925             http://www.lorax.com/FreeStuff/TextExtras.html (has no docs???)
4926             http://stevelosh.com/blog/2012/10/a-modern-space-cadet/ Combining different approaches
4927             http://brettterpstra.com/2012/12/08/a-useful-caps-lock-key/ (simplified version of ↖)
4928             http://david.rothlis.net/keyboards/microsoft_natural_osx/ Num Lock is claimed as not working
4929            
4930             Compose on Mac requires hacks:
4931            
4932             http://apple.stackexchange.com/questions/31487/add-compose-key-to-os-x
4933            
4934             Convert Apple to MSKLC
4935            
4936             http://typophile.com/node/90606
4937            
4938             Keyboards on Mac:
4939            
4940             http://homepage.mac.com/thgewecke/mlingos9.html
4941             http://web.archive.org/web/20080717203026/http://homepage.mac.com/thgewecke/mlingos9.html
4942            
4943             Tool to produce:
4944            
4945             http://wordherd.com/keyboards/
4946             http://developer.apple.com/library/mac/#technotes/tn2056/_index.html
4947            
4948             VK_OEM_8 Kana modifier - Using instead of AltGr
4949            
4950             http://www.kbdedit.com/manual/ex13_replacing_altgr_with_kana.html
4951            
4952             Limitations of using KANA toggle
4953            
4954             http://www.kbdedit.com/manual/ex12_trilang_ser_cyr_lat_gre.html
4955            
4956             FE (Far Eastern) keyboard source code example (NEC AT is 106 with SPECIAL MULTIVK flags changed on some scancodes, OEM_7/8 producing 0x1e 0x1f, and no OEM_102):
4957            
4958             http://read.pudn.com/downloads3/sourcecode/windows/248345/win2k/private/ntos/w32/ntuser/kbd/fe_kbds/jpn/ibm02/kbdibm02.c__.htm
4959             http://read.pudn.com/downloads3/sourcecode/windows/248345/win2k/private/ntos/w32/ntuser/kbd/fe_kbds/jpn/kbdnecat/kbdnecat.c__.htm
4960             http://read.pudn.com/downloads3/sourcecode/windows/248345/win2k/private/ntos/w32/ntuser/kbd/fe_kbds/jpn/106/kbd106.c__.htm
4961            
4962             Investigation on relation between VK_ asignments, KBDEXT, KBDNUMPAD etc:
4963             http://code.google.com/p/ergo-dvorak-for-developers/source/browse/trunk/kbddvp.c
4964            
4965             PowerShell vs ISE (and how to find them [On Win7: WinKey Accessories]
4966             http://blogs.msdn.com/b/powershell/archive/2009/04/17/differences-between-the-ise-and-powershell-console.aspx
4967             http://blogs.msdn.com/b/michkap/archive/2013/01/23/10387424.aspx
4968             http://blogs.msdn.com/b/michkap/archive/2013/02/15/10393862.aspx
4969             http://blogs.msdn.com/b/michkap/archive/2013/02/19/10395086.aspx
4970             http://blogs.msdn.com/b/michkap/archive/2013/02/20/10395416.aspx
4971            
4972             Google for "Get modification number for Shift key" for code to query the kbd DLL directly ("keylogger")
4973             http://web.archive.org/web/20120106074849/http://debtnews.net/index.php/article/debtor/2008-09-08/1088.html
4974             http://code.google.com/p/keymagic/source/browse/KeyMagicDll/kbdext.cpp?name=0419d8d626&r=d85498403fd59bca9efc04b4e5bb4406d39439a0
4975            
4976             How to read Unicode in an ANSI Window:
4977             http://social.msdn.microsoft.com/Forums/en-US/windowsgeneraldevelopmentissues/thread/d455e846-d18b-4086-98de-822658bcebf0/
4978             http://blog.tavultesoft.com/2011/06/accepting-unicode-input-in-your-windows-application.html
4979            
4980             HTML consolidated entity names and discussion, MES charsets:
4981            
4982             http://www.w3.org/TR/xml-entity-names
4983             http://www.w3.org/2003/entities/2007/w3centities-f.ent
4984             http://www.cl.cam.ac.uk/~mgk25/ucs/mes-2-rationale.html
4985             http://web.archive.org/web/20000815100817/http://www.egt.ie/standards/iso10646/pdf/cwa13873.pdf
4986            
4987             Ctrl2cap
4988            
4989             http://technet.microsoft.com/en-us/sysinternals/bb897578
4990            
4991             Low level scancode mapping
4992            
4993             http://www.annoyances.org/exec/forum/winxp/r1017256194
4994             http://web.archive.org/web/20030211001441/http://www.microsoft.com/hwdev/tech/input/w2kscan-map.asp
4995             http://msdn.microsoft.com/en-us/windows/hardware/gg463447
4996             http://www.annoyances.org/exec/forum/winxp/1034644655
4997             ???
4998             http://netj.org/2004/07/windows_keymap
4999             the free remapkey.exe utility that's in Microsoft NT / 2000 resource kit.
5000            
5001             perl -wlne "BEGIN{$t = {T => q(), qw( X e0 Y e1 )}} print qq( $t->{$1}$2\t$3) if /^#define\s+([TXY])([0-9a-f]{2})\s+(?:_EQ|_NE)\((?:(?:\s*\w+\s*,){3})?\s*([^\W_]\w*)\s*(?:(?:,\s*\w+\s*){2})?\)\s*(?:\/\/.*)?$/i" kbd.h >ll2
5002             then select stuff up to the first e1 key (but DECIMAL is not there T53 is DELETE??? take from MSKLC help/using/advanced/scancodes)
5003            
5004             CapsLock as on typewriter:
5005            
5006             http://web.archive.org/web/20120717083202/http://www.annoyances.org/exec/forum/winxp/1071197341
5007            
5008             Scancodes visible on the low level:
5009            
5010             http://openbsd.7691.n7.nabble.com/Patch-Support-F13-F24-on-PC-122-terminal-keyboard-td224992.html
5011             http://www.seasip.info/Misc/1227T.html
5012            
5013             Scancodes visible on Windows (with USB)
5014            
5015             http://download.microsoft.com/download/1/6/1/161ba512-40e2-4cc9-843a-923143f3456c/translate.pdf
5016            
5017             X11 XKB docs:
5018            
5019             https://www.x.org/releases/X11R7.7/doc/kbproto/xkbproto.html
5020            
5021             ftp://www.x.org/pub/xorg/X11R7.5/doc/input/XKB-Enhancing.html (what is caps:shift* ???)
5022             https://wiki.gentoo.org/wiki/Keyboard_layout_switching
5023             http://webkeys.platonix.co.il/about/use_xkb/#caps-key-types
5024            
5025             https://apt-browse.org/browse/debian/wheezy/main/all/xkb-data/2.5.1-3/file/usr/share/X11/xkb/symbols/keypad
5026             http://misc.openbsd.narkive.com/UK2Xlptl/shift-backspace-in-x
5027             NoSymbol (do not change; do not make array longer; if alphabetic, may be extended to width 2)
5028             vs VoidSymbol (undefine; may actually extend the array. Undocumented in xkbproto??? )
5029             compare with http://kotoistus.tksoft.com/linux/void_no_symbol-en.html
5030            
5031             overlay1= overlay2= How to switch to overlay: see compat/keypad
5032             RadioGroup ???
5033            
5034             Problems on X11:
5035            
5036             http://www.x.org/releases/X11R7.7/doc/kbproto/xkbproto.html (definition of XKB protocol)
5037             http://www.x.org/releases/current/doc/kbproto/xkbproto.html
5038            
5039             http://web.archive.org/web/20050306001520/http://pascal.tsu.ru/en/xkb/
5040            
5041             Some features are removed in libxkbcommon, which is used by many toolkits now:
5042             https://xkbcommon.org/doc/current/md_doc_compat.html
5043             But XKB is implemented in the server???
5044            
5045             http://wiki.linuxquestions.org/wiki/Configuring_keyboards (current???)
5046             http://wiki.linuxquestions.org/wiki/Accented_Characters (current???)
5047             http://wiki.linuxquestions.org/wiki/Altering_or_Creating_Keyboard_Maps (current???)
5048             https://help.ubuntu.com/community/ComposeKey (documents almost 1/2 of the needed stuff)
5049             http://www.gentoo.org/doc/en/utf-8.xml (2005++ ???)
5050             http://en.gentoo-wiki.com/wiki/X.Org/Input_drivers (2009++ HAS: How to make CapsLock change layouts)
5051             http://www.freebsd.org/cgi/man.cgi?query=setxkbmap&sektion=1&manpath=X11R7.4
5052             http://people.uleth.ca/~daniel.odonnell/Blog/custom-keyboard-in-linuxx11
5053             http://shtrom.ssji.net/skb/xorg-ligatures.html (of 2008???)
5054             http://tldp.org/HOWTO/Danish-HOWTO-2.html (of 2005???)
5055             http://www.tux.org/~balsa/linux/deadkeys/index.html (of 1999???)
5056             http://www.x.org/releases/X11R7.6/doc/libX11/Compose/en_US.UTF-8.html
5057             http://cgit.freedesktop.org/xorg/proto/xproto/plain/keysymdef.h
5058            
5059             EIGHT_LEVEL FOUR_LEVEL_ALPHABETIC FOUR_LEVEL_SEMIALPHABETIC PC_SYSRQ : see
5060             http://cafbit.com/resource/mackeyboard/mackeyboard.xkb
5061            
5062             ./xkb in /etc/X11 /usr/local/X11 /usr/share/local/X11 /usr/share/X11
5063             (maybe it is more productive to try
5064             ls -d /*/*/xkb /*/*/*/xkb
5065             ?)
5066             but what dead_diaeresis means is defined here:
5067             Apparently, may be in /usr/X11R6/lib/X11/locale/en_US.UTF-8/Compose /usr/share/X11/locale/en_US.UTF-8/Compose
5068             http://wiki.maemo.org/Remapping_keyboard
5069             http://www.x.org/releases/current/doc/man/man8/mkcomposecache.8.xhtml
5070            
5071             B have XIM input method in GTK disables Control-Shift-u way of entering HEX unicode.
5072            
5073             How to contribute:
5074             http://www.freedesktop.org/wiki/Software/XKeyboardConfig/Rules
5075            
5076             B the problems with handling deadkeys via .Compose are that: .Compose is handled by
5077             applications, while keymaps by server (since they may be on different machines, things can
5078             easily get out of sync); .Compose knows nothing about the current "Keyboard group" or of
5079             the state of CapsLock etc (therefore emulating "group switch" via composing is impossible).
5080            
5081             JS code to add "insert these chars": google for editpage_specialchars_cyrilic, or
5082            
5083             http://en.wikipedia.org/wiki/User:TEB728/monobook.jsx
5084            
5085             Latin paleography
5086            
5087             http://en.wikipedia.org/wiki/Latin_alphabet
5088             http://tlt.its.psu.edu/suggestions/international/bylanguage/oenglish.html
5089             http://guindo.pntic.mec.es/~jmag0042/LATIN_PALEOGRAPHY.pdf
5090             http://www.evertype.com/standards/wynnyogh/ezhyogh.html
5091             http://www.wordorigins.org/downloads/OELetters.doc
5092             http://www.menota.uio.no/menota-entities.txt
5093             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n2957.pdf (Uncomplete???)
5094             http://skaldic.arts.usyd.edu.au/db.php?table=mufi_char&if=mufi (No prioritization...)
5095            
5096             Summary tables for Cyrillic
5097            
5098             http://ru.wikipedia.org/wiki/%D0%9A%D0%B8%D1%80%D0%B8%D0%BB%D0%BB%D0%B8%D1%86%D0%B0#.D0.A1.D0.BE.D0.B2.D1.80.D0.B5.D0.BC.D0.B5.D0.BD.D0.BD.D1.8B.D0.B5_.D0.BA.D0.B8.D1.80.D0.B8.D0.BB.D0.BB.D0.B8.D1.87.D0.B5.D1.81.D0.BA.D0.B8.D0.B5_.D0.B0.D0.BB.D1.84.D0.B0.D0.B2.D0.B8.D1.82.D1.8B_.D1.81.D0.BB.D0.B0.D0.B2.D1.8F.D0.BD.D1.81.D0.BA.D0.B8.D1.85_.D1.8F.D0.B7.D1.8B.D0.BA.D0.BE.D0.B2
5099             http://ru.wikipedia.org/wiki/%D0%9F%D0%BE%D0%B7%D0%B8%D1%86%D0%B8%D0%B8_%D0%B1%D1%83%D0%BA%D0%B2_%D0%BA%D0%B8%D1%80%D0%B8%D0%BB%D0%BB%D0%B8%D1%86%D1%8B_%D0%B2_%D0%B0%D0%BB%D1%84%D0%B0%D0%B2%D0%B8%D1%82%D0%B0%D1%85
5100             http://en.wikipedia.org/wiki/List_of_Cyrillic_letters - per language tables
5101             http://en.wikipedia.org/wiki/Cyrillic_alphabets#Summary_table
5102             http://en.wiktionary.org/wiki/Appendix:Cyrillic_script
5103            
5104             Extra chars (see also the ordering table on page 8)
5105             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3194.pdf
5106            
5107             Typesetting Old and Modern Church Slavonic
5108             http://www.sanu.ac.rs/Cirilica/Prilozi/Skup.pdf
5109             http://irmologion.ru/ucsenc/ucslay8.html
5110             http://irmologion.ru/csscript/csscript.html
5111             http://cslav.org/success.htm
5112             http://irmologion.ru/developer/fontdev.html#allocating
5113            
5114             Non-dialogue of Slavists and Unicode experts
5115             http://www.sanu.ac.rs/Cirilica/Prilozi/Standard.pdf
5116             http://kodeks.uni-bamberg.de/slavling/downloads/2008-07-26_white-paper.pdf
5117            
5118             Newer: (+ combining ф)
5119             http://tug.org/pipermail/xetex/2012-May/023007.html
5120             http://www.unicode.org/alloc/Pipeline.html As below, plus N-left-hook, ДЗЖ ДЧ, L-descender, modifier-Ь/Ъ
5121             http://www.synaxis.info/azbuka/ponomar/charset/charset_1.htm
5122             http://www.synaxis.info/azbuka/ponomar/charset/charset_2.htm
5123             http://www.synaxis.info/azbuka/ponomar/roadmap/roadmap.html
5124             http://www.ponomar.net/cu_support.html
5125             http://www.ponomar.net/files/out.pdf
5126             http://www.ponomar.net/files/variants.pdf (5 VS for Mark's chapter, 2 VS for t, 1 VS for the rest)
5127            
5128             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3772.pdf typikon (+[semi]circled), ε-form
5129             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3971.pdf inverted ε-typikon
5130             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3974.pdf two variants of o/O
5131             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3998.pdf Mark's chapter
5132             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3563.pdf Reversed tse
5133            
5134             IPA
5135            
5136             http://upload.wikimedia.org/wikipedia/commons/f/f5/IPA_chart_2005_png.svg
5137             http://en.wikipedia.org/wiki/Obsolete_and_nonstandard_symbols_in_the_International_Phonetic_Alphabet
5138             http://en.wikipedia.org/wiki/Case_variants_of_IPA_letters
5139             Table with Unicode points marked:
5140             http://www.staff.uni-marburg.de/~luedersb/IPA_CHART2005-UNICODE.pdf
5141             (except for "Lateral flap" and "Epiglottal" column/row.
5142             (Extended) IPA explained by consortium:
5143             http://unicode.org/charts/PDF/U0250.pdf
5144             IPA keyboard
5145             http://www.rejc2.co.uk/ipakeyboard/
5146            
5147             http://en.wikipedia.org/wiki/International_Phonetic_Alphabet_chart_for_English_dialects#cite_ref-r_11-0
5148            
5149            
5150             Is this discussing KBDNLS_TYPE_TOGGLE on VK_KANA???
5151            
5152             http://mychro.mydns.jp/~mychro/mt/2010/05/vk-f.html
5153            
5154             Windows: fonts substitution/fallback/replacement
5155            
5156             http://msdn.microsoft.com/en-us/goglobal/bb688134
5157            
5158             Problems on Windows:
5159            
5160             http://en.wikipedia.org/wiki/Help:Special_characters#Alt_keycodes_for_Windows_computers
5161             http://en.wikipedia.org/wiki/Template_talk:Unicode#Plane_One_fonts
5162            
5163             Console font: Lucida Console 14 is viewable, but has practically no Unicode support.
5164             Consolas (good at 16) has much better Unicode support (sometimes better sometimes worse than DejaVue)
5165             Dejavue is good at 14 (equal to a GUI font size 9 on 15in 1300px screen; 16px unifont is native at 12 here)
5166             http://cristianadam.blogspot.com/2009/11/windows-console-and-true-type-fonts.html
5167            
5168             Apparently, Windows picks up the flavor (Bold/Italic/Etc) of DejaVue at random; see
5169             http://jpsoft.com/forums/threads/strange-results-with-cp-1252.1129/
5170             - he got it in bold. I''m getting it in italic... Workaround: uninstall
5171             all flavors but one (the BOOK flavor), THEN enable it for the console... Then reinstall
5172             (preferably newer versions).
5173            
5174             Display (how WikiPedia does it):
5175            
5176             http://en.wikipedia.org/wiki/Help:Special_characters#Displaying_special_characters
5177             http://en.wikipedia.org/wiki/Template:Unicode
5178             http://en.wikipedia.org/wiki/Template:Unichar
5179             http://en.wikipedia.org/wiki/User:Ruud_Koot/Unicode_typefaces
5180             In CSS: .IPA, .Unicode { font-family: "Arial Unicode MS", "Lucida Sans Unicode"; }
5181             http://web.archive.org/web/20060913000000/http://en.wikipedia.org/wiki/Template:Unicode_fonts
5182            
5183             Inspect which font is used by Firefox:
5184            
5185             https://addons.mozilla.org/en-US/firefox/addon/fontinfo/
5186            
5187             Windows shortcuts:
5188            
5189             http://windows.microsoft.com/en-US/windows7/Keyboard-shortcuts
5190             http://www.redgage.com/blogs/pankajugale/all-keyboard-shortcuts--very-useful.html
5191             https://skydrive.live.com/?cid=2ee8d462a8f365a0&id=2EE8D462A8F365A0%21141
5192             http://windows.microsoft.com/en-us/windows-8/new-keyboard-shortcuts
5193            
5194             On meaning of Unicode math codepoints
5195            
5196             http://milde.users.sourceforge.net/LUCR/Math/unimathsymbols.pdf
5197             http://milde.users.sourceforge.net/LUCR/Math/data/unimathsymbols.txt
5198             http://www.ams.org/STIX/bnb/stix-tbl.ascii-2006-10-20
5199             http://www.ams.org/STIX/bnb/stix-tbl.layout-2006-05-15
5200             http://mirrors.ibiblio.org/CTAN/macros/latex/contrib/unicode-math/unimath-symbols.pdf
5201             http://mirrors.ibiblio.org/CTAN//biblio/biber/documentation/utf8-macro-map.html
5202             http://tex.stackexchange.com/questions/14/how-to-look-up-a-symbol-or-identify-a-math-symbol-or-character
5203             http://unicode.org/Public/math/revision-09/MathClass-9.txt
5204             http://www.w3.org/TR/MathML/
5205             http://www.w3.org/TR/xml-entity-names/
5206             http://www.w3.org/TR/xml-entity-names/bycodes.html
5207            
5208             Transliteration (via iconv [it is locale-dependent], example rules for Greek)
5209            
5210             http://sourceware.org/bugzilla/show_bug.cgi?id=12031
5211            
5212             Monospaced fonts with combining marks (!)
5213            
5214             https://bugs.freedesktop.org/show_bug.cgi?id=18614
5215             https://bugs.freedesktop.org/show_bug.cgi?id=26941
5216            
5217             Indic ISCII - any hope with it? (This is not representable...:)
5218            
5219             http://unicode.org/mail-arch/unicode-ml/y2012-m09/0053.html
5220            
5221             (Percieved) problems of Unicode (2001)
5222            
5223             http://www.ibm.com/developerworks/library/u-secret.html
5224            
5225             On a need to have input methods for unicode
5226            
5227             http://unicode.org/mail-arch/unicode-ml/y2012-m07/0226.html
5228            
5229             On info on Unicode chars
5230            
5231             http://unicode.org/mail-arch/unicode-ml/y2012-m07/0415.html
5232            
5233             Zapf dingbats encoding, and other fine points of AdobeGL:
5234            
5235             ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/ADOBE/zdingbat.txt
5236             http://web.archive.org/web/20001015040951/http://partners.adobe.com/asn/developer/typeforum/unicodegn.html
5237            
5238             Yet another (IMO, silly) way to handle '; fight: ' vs ` ´
5239            
5240             http://www.cl.cam.ac.uk/~mgk25/ucs/apostrophe.html
5241            
5242             Surrogate characters on IE
5243            
5244             HKEY_CURRENT_USER\Software\Microsoft\Internet Explorer\International\Scripts\42
5245             http://winvnkey.sourceforge.net/webhelp/surrogate_fonts.htm
5246             http://msdn.microsoft.com/en-us/library/aa918682.aspx Script IDs
5247            
5248             Quoting tchrist:
5249             I, C, and C from L if you like.>
5250            
5251             Tom's unicode scripts
5252            
5253             http://search.cpan.org/~bdfoy/Unicode-Tussle-1.03/lib/Unicode/Tussle.pm
5254            
5255             =head2 F<.XCompose>: on docs and examples
5256            
5257             Syntax of C<.XCompose> is (partially) documented in
5258            
5259             http://www.x.org/archive/current/doc/man/man5/Compose.5.xhtml
5260             http://cgit.freedesktop.org/xorg/lib/libX11/tree/man/Compose.man
5261            
5262             # Modifiers are not documented
5263             # (Shift, Alt, Lock, Ctrl with aliases Meta, Caps [Alt/Meta binds Mod1];
5264             # ! means that not mentioned supported modifiers must be off;
5265             # None means that all recognizerd modifiers are off.)
5266            
5267             Semantic (e.g., which of keybindings has a preference) is not documented.
5268             Experiments (see below) show that a longer binding wins; if same
5269             length, one which is loaded later wins (as far as they match exactly, both
5270             the keys, and the set of required modifiers and their states).
5271             Note that a given keypress may match several I lists of
5272             modifier; one defined earlier wins.
5273            
5274             For example, in
5275            
5276             ~Ctrl Shift : "a1"
5277             Shift ~Ctrl : "ab1"
5278             ~Meta Shift : "b1"
5279             ~Ctrl ~Meta Shift : "ba1"
5280             Shift ~Meta : "b2"
5281             Shift ~Meta ~Lock : "b3"
5282            
5283             there is no way to trigger the output C<"a1"> (since the next row captures
5284             essentially the same keypress into a longer binding). The only binding which
5285             is explicitly overwritten is one for C<"b1">. Hence pressing
5286             C would trigger the binding C<"b2">, and there is no way to trigger
5287             the bindings for C<"b3"> and C<"ba1">.
5288            
5289             # (the source of imLcPrs.c shows that the expansion of the
5290             # shorter sequence is stored too - but the presence of
5291             # ->succession means that the code to process the resulting
5292             # tree ignores the expansion).
5293            
5294             The interaction of C<.Compose> with
5295             L
5296             of passed-through C and C modifiers is not documented.
5297            
5298             Before the syntax was documented: For the best approximation,
5299             read the parser's code, e.g., google for
5300            
5301             inurl:compose.c XCompose
5302             site:cgit.freedesktop.org "XCompose"
5303             site:cgit.freedesktop.org "XCompose" filetype:c
5304             _XimParseStringFile
5305            
5306             http://cgit.freedesktop.org/xorg/lib/libX11/tree/modules/im/ximcp/imLcIm.c
5307             http://cgit.freedesktop.org/xorg/lib/libX11/tree/modules/im/ximcp/imLcPrs.c
5308             http://uim.googlecode.com/svn-history/r6111/trunk/gtk/compose.c
5309             http://uim.googlecode.com/svn/tags/uim-1.5.2/gtk/compose.c
5310            
5311             The actual use of the compiled compose table:
5312            
5313             http://cgit.freedesktop.org/xorg/lib/libX11/tree/modules/im/ximcp/imLcFlt.c
5314            
5315             Apparently, the first node (= defined last) in the tree which
5316             matches keysym and modifiers is chosen. So to override C<< >>,
5317             looks like (checked to work!) C<< ~Ctrl >> may be used...
5318             On the other hand, defining both C<< >> and (later) C<< ~Ctrl >>,
5319             one would expect that C<< >> should still trigger the
5320             expansion of C<< >> — but it does not... See also:
5321            
5322             http://cgit.freedesktop.org/xorg/lib/libX11/tree/modules/im/ximcp/imLcLkup.c
5323            
5324             The file F<.XCompose> is processed by X11 I on startup. The changes
5325             to this file should be seen immediately by all newly started clients
5326             (but GTK or QT applications may need extra config - see below)
5327             unless the directory F<~/.compose-cache> is present and has a cache
5328             file compatible with binary architecture (then until cache
5329             expires - one day after creation - changes are not seen). The
5330             name F<.XCompose> may be overriden by environment variable C.
5331            
5332             To get (better?) examples, google for C<"multi_key" partial alpha "DOUBLE-STRUCK">.
5333            
5334             # include these first, so they may be overriden later
5335             include "%H/my-Compose/.XCompose-kragen"
5336             include "%H/my-Compose/.XCompose-ootync"
5337             include "%H/my-Compose/.XCompose-pSub"
5338            
5339             Check success: kragen: C<\ space> --> ␣; ootync: C --> ℉; pSub: C<0 0> --> ∞ ...
5340            
5341             Older versions of X11 do not understand %L %S. - but understand %H
5342            
5343             E.g. Debian Squeeze 6.0.6; according to
5344            
5345             http://packages.debian.org/search?keywords=x11-common
5346            
5347             it has C).
5348            
5349             include "/etc/X11/locale/en_US.UTF-8/Compose"
5350             include "/usr/share/X11/locale/en_US.UTF-8/Compose"
5351            
5352             Import default rules from the system Compose file:
5353             usually as above (but supported only on newer systems):
5354            
5355             include "%L"
5356            
5357             detect the success of the lines above: get C<#> by doing C ...
5358            
5359             The next file to include have been generated by
5360            
5361             perl -wlne 'next if /#\s+CIRCLED/; print if />\s+<.*>\s+<.*>\s+<.*/' /usr/share/X11/locale/en_US.UTF-8/Compose
5362             ### Std tables contain quadruple prefix for GREEK VOWELS and CIRCLED stuff
5363             ### only. But there is a lot of triple prefix...
5364             perl -wne 'next if /#\s+CIRCLED/; $s{$1}++ or print qq( $1) if />\s+<.*>\s+<.*>\s+<.*"(.*)"/' /usr/share/X11/locale/en_US.UTF-8/Compose
5365             ## – — ☭ ª º Ǖ ǖ Ǘ ǘ Ǚ ǚ Ǜ ǜ Ǟ ǟ Ǡ ǡ Ǭ ǭ Ǻ ǻ Ǿ ǿ Ȫ ȫ Ȭ ȭ Ȱ ȱ ʰ ʱ ʲ ʳ ʴ ʵ ʶ ʷ ʸ ˠ ˡ ˢ ˣ ˤ ΐ ΰ Ḉ ḉ Ḕ ḕ Ḗ ḗ Ḝ ḝ Ḯ ḯ Ḹ ḹ Ṍ ṍ Ṏ ṏ Ṑ ṑ Ṓ ṓ Ṝ ṝ Ṥ ṥ Ṧ ṧ Ṩ ṩ Ṹ ṹ Ṻ ṻ Ấ ấ Ầ ầ Ẩ ẩ Ẫ ẫ Ậ ậ Ắ ắ Ằ ằ Ẳ ẳ Ẵ ẵ Ặ ặ Ế ế Ề ề Ể ể Ễ ễ Ệ ệ Ố ố Ồ ồ Ổ ổ Ỗ ỗ Ộ ộ Ớ ớ Ờ ờ Ở ở Ỡ ỡ Ợ ợ Ứ ứ Ừ ừ Ử ử Ữ ữ Ự ự ἂ ἃ ἄ ἅ ἆ ἇ Ἂ Ἃ Ἄ Ἅ Ἆ Ἇ ἒ ἓ ἔ ἕ Ἒ Ἓ Ἔ Ἕ ἢ ἣ ἤ ἥ ἦ ἧ Ἢ Ἣ Ἤ Ἥ Ἦ Ἧ ἲ ἳ ἴ ἵ ἶ ἷ Ἲ Ἳ Ἴ Ἵ Ἶ Ἷ ὂ ὃ ὄ ὅ Ὂ Ὃ Ὄ Ὅ ὒ ὓ ὔ ὕ ὖ ὗ Ὓ Ὕ Ὗ ὢ ὣ ὤ ὥ ὦ ὧ Ὢ Ὣ Ὤ Ὥ Ὦ Ὧ ᾀ ᾁ ᾂ ᾃ ᾄ ᾅ ᾆ ᾇ ᾈ ᾉ ᾊ ᾋ ᾌ ᾍ ᾎ ᾏ ᾐ ᾑ ᾒ ᾓ ᾔ ᾕ ᾖ ᾗ ᾘ ᾙ ᾚ ᾛ ᾜ ᾝ ᾞ ᾟ ᾠ ᾡ ᾢ ᾣ ᾤ ᾥ ᾦ ᾧ ᾨ ᾩ ᾪ ᾫ ᾬ ᾭ ᾮ ᾯ ᾲ ᾴ ᾷ ῂ ῄ ῇ ῒ ῗ ῢ ῧ ῲ ῴ ῷ ⁱ ⁿ ℠ ™ שּׁ שּׂ а̏ А̏ е̏ Е̏ и̏ И̏ о̏ О̏ у̏ У̏ р̏ Р̏ 🙌
5366            
5367             The following exerpt from NEO compose tables may be good if you use
5368             keyboards which do not generate dead keys, but may generate Cyrillic keys;
5369             in other situations, edit filtering/naming on the following download
5370             command and on the C line below. (For my taste, most bindings are
5371             useless since they contain keysymbols which may be generated with NEO, but
5372             not with less intimidating keylayouts.)
5373            
5374             (Filtering may be important, since having a large file may
5375             significantly slow down client's startup (without F<~/.compose-cache>???).)
5376            
5377             # perl -wle 'foreach (qw(base cyrillic greek lang math)) {my @i=@ARGV; $i[-1] .= qq($_.module?format=txt); system @i}' wget -O - http://wiki.neo-layout.org/browser/Compose/src/ | perl -wlne 'print unless /<(U[\dA-F]{4,6}>|dead_|Greek_)/' > .XCompose-neo-no-Udigits-no-dead-no-Greek
5378             include "%H/.XCompose-neo-no-Udigits-no-dead-no-Greek"
5379             # detect the success of the line above: get ♫ by doing Compose Compose (but this binding is overwritten later!)
5380            
5381             ###################################### Neo's Math contains junk at line 312
5382            
5383             Print with something like (loading in a web browser after this):
5384            
5385             perl -l examples/filter-XCompose ~/.XCompose-neo-no-Udigits-no-dead-no-Greek > ! o-neo
5386             env LC_ALL=C sort -f o-neo | column -x -c 130 > ! /tmp/oo-neo-x
5387            
5388             =head2 “Systematic” parts of rules in a few F<.XCompose>
5389            
5390             ================== .XCompose b=bepo o=ootync k=kragen p=pSub s=std
5391             b Double-Struck b
5392             o circled ops b
5393             O big circled ops b
5394             r rotated b 8ACETUv ∞
5395            
5396             - sub p
5397             = double arrows po
5398             g greek po
5399             m math p |=Double-Struck rest haphasard...
5400             O circles p Oo
5401             S stars p Ss
5402             ^ sup p added: i -
5403             | daggers p
5404            
5405             Double mathop ok +*&|%8CNPQRZ AE
5406            
5407             # thick-black arrows o
5408             -,Num- arrows o
5409             N/N fractions o
5410             hH pointing hands o
5411             O circled ops o
5412             o degree o
5413             rR roman nums o
5414             \ UP upper modifiers o
5415             \ DN lower modifiers o
5416             { set theoretic o
5417             | arrows |-->flavors o
5418             UP / roots o
5419             LFT DN 6-quotes, bold delim o
5420             RT DN 9-quotes, bold delim o
5421             UP,DN super,sub o
5422            
5423             DOUBLE-separated-by-& op k ( )
5424             in-() circled k xx for tensor
5425             in-[] boxed, dice, play-cards k
5426             BKSP after revert k
5427             < after revert k
5428             ` after small-caps k
5429             ' after hook k
5430             , after hook below k
5431             h after phonetic k
5432            
5433             # musical k
5434             %0 ROMAN k %_0 for two-digit
5435             % roman k %_ for two-digit
5436             * stars k
5437             *. var-greek k
5438             * greek k
5439             ++, 3 triple k
5440             + double k
5441             , quotes k
5442             !, / negate k
5443             6,9 6,9-quotes k
5444             N N fractions k
5445             = double-arrows, RET k
5446             CMP x2 long names k
5447             f hand, pencils k
5448             \ combining??? k
5449             ^ super, up modifier k
5450             _ low modifiers k
5451             |B, |W chess, checkers, B&W k
5452             | double-struck k
5453             ARROWS ARROWS k
5454            
5455             ! dot below s
5456             " diaeresis s
5457             ' acute s
5458             trail < left delimiter s
5459             trail > right delimiter s
5460             trail \ slanted variant s
5461             ( ... ) circled s
5462             ( greek aspirations s
5463             ) greek aspirations s
5464             + horn s
5465             , cedilla s
5466             . dot above s
5467             - hor. bar s
5468             / diag, vert hor. bar s
5469             ; ogonek s
5470             = double hor.bar, ₤₦€¥≠ s
5471             trail = double hor.bar s
5472             ? hook above s
5473             b breve s
5474             c check above s
5475             iota iota below s
5476             trail 0338 negated s
5477             o ring above s
5478             U breve s
5479             SOME HEBREW
5480             ^ circumflex s
5481             ^ _ superscript s
5482             ^ undbr superscript s
5483             _ bar s
5484             _ subscript s
5485             underbr subscript s
5486             ` grave s
5487             ~ greek dieresis s
5488             ~ tilde s
5489             overbar bar s
5490             ´ acute s ´ is not '
5491             ¸ cedilla s ¸ is cedilla
5492            
5493             =head1 LIMITATIONS
5494            
5495             Currently only output for Windows keyboard layout drivers (via MSKLC) is available.
5496            
5497             Currently only the keyboards with US-mapping of hardware keys to "the etched
5498             symbols" are supported (think of German physical keyboards where Y/Z keycaps
5499             are swapped: Z is etched between T and U, and Y is to the left of X, or French
5500             which swaps A and Q, or French or Russian physical keyboards which have more
5501             alphabetical keys than 26).
5502            
5503             While the architecture of assembling a keyboard of small easy-to-describe
5504             pieces is (IMO) elegant and very powerful, and is proven to be useful, it
5505             still looks like a collection of independent hacks. Many of these hacks
5506             look quite similar; it would be great to find a way to unify them, so
5507             reduce the repertoir of operations for assembly.
5508            
5509             The current documentation of the module’s functionality is not complete.
5510            
5511             The implementation of the module is crumbling under its weight. Its
5512             evolution was by bloating (even when some design features were simplified).
5513             Since initially I had very little clue to which level of abstraction and
5514             flexibility the keyboard description would evolve, bloating accumulated
5515             to incredible amounts.
5516            
5517             =head1 COPYRIGHT
5518            
5519             Copyright (c) 2011-2013 Ilya Zakharevich
5520            
5521             This library is free software; you can redistribute it and/or modify
5522             it under the same terms as Perl itself, either Perl version 5.8.0 or,
5523             at your option, any later version of Perl 5 you may have available.
5524            
5525             The distributed examples may have their own copyrights.
5526            
5527             =head1 TODO
5528            
5529             UniPolyK-MultiSymple
5530            
5531             Multiple linked faces (accessible as described in ChangeLog); designated
5532             Primary- and Secondary- switch keys (as Shift-Space and AltGr-Space now).
5533            
5534             C as a deadkey may be not a good idea: following it by a special key
5535             (such as C, or C) may insert the deadkey character???
5536             Hence the character should be highly visible... (Now the key is invisible,
5537             so this is irrelevant...)
5538            
5539             Currently linked layers must have exactly the same number of keys in VK-tables.
5540            
5541             VK tables for TAB, BACK were BS. Same (remains) for the rest of unusual keys... (See TAB-was.)
5542             But UTOOL cannot handle them anyway...
5543            
5544             Define an extra element in VK keys: linkable. Should be sorted first in the kbd map,
5545             and there should be the same number in linked lists. Non-linkable keys should not
5546             be linked together by deadkey access...
5547            
5548             Interaction of FromToFlipShift with SelectRX not intuitive. This works: Diacritic[](SelectRX[[0-9]](FlipShift(Latin)))
5549            
5550             DefinedTo cannot be put on Cyrillic 3a9 (yo to superscript disappears - due to duplication???).
5551            
5552             ... so we do it differently now, but: LinkLayer was not aggressively resolving all the occurences of a character on a layer
5553             before we started to combine it with Diacritic_if_undef... - and Cyrillic 3a9 is not helped...
5554            
5555             via_parent() is broken - cannot replace for Diacritic_if_undef.
5556            
5557             Currently, we map ephigraphic letters to capital letters - is it intuitive???
5558            
5559             dotted circle ◌ 25CC
5560            
5561             DeadKey_Map200A= FlipLayers
5562             #DeadKey_Map200A_0= Id(Russian-AltGr)
5563             #DeadKey_Map200A_1= Id(Russian)
5564             performs differently from the commented variant: it adds links to auto-filled keys...
5565            
5566             Why ¨ on THIN SPACE inserts OGONEK after making ¨ multifaceted???
5567            
5568             When splitting a name on OVER/BELOW/ABOVE, we need both sides as modifiers???
5569            
5570             Ỳ currently unreachable (appears only in Latin-8 Celtic, is not on Wikipedia)
5571            
5572             Somebody is putting an extra element at the end of arrays for layers??? - Probably SPACE...
5573            
5574             Need to treat upside-down as a pseudo-decomposition.
5575            
5576             We decompose reversed-smallcaps in one step - probably better add yet another two-steps variant...
5577            
5578             When creating a treat SYMBOL/SIGN/FINAL FORM/ISOLATED FORM/INITIAL FORM/MEDIAL FORM;
5579             note that SIGN may be stripped: LESS-THAN SIGN becomes LESS-THAN WITH DOT
5580            
5581             We do not do canonical-merging of diacritics; so one needs to specify VARIA in addition to GRAVE ACCENT.
5582            
5583             We use a smartish algorithm to assign multiple diacritics to the same deadkey. A REALLY smart algorithm
5584             would use information about when a particular precombined form was introduced in Unicode...
5585            
5586             Inspector tool for NamesList.txt:
5587            
5588             grep " WITH .* " ! | grep -E -v "(ACUTE|GRAVE|ABOVE|BELOW|TILDE|DIAERESIS|DOT|HOOK|LEG|MACRON|BREVE|CARON|STROKE|TAIL|TONOS|BAR|DOTS|ACCENT|HALF RING|VARIA|OXIA|PERISPOMENI|YPOGEGRAMMENI|PROSGEGRAMMENI|OVERLAY|(TIP|BARB|CORNER) ([A-Z]+WARDS|UP|DOWN|RIGHT|LEFT))$" | grep -E -v "((ISOLATED|MEDIAL|FINAL|INITIAL) FORM|SIGN|SYMBOL)$" |less
5589             grep " WITH " ! | grep -E -v "(ACUTE|GRAVE|ABOVE|BELOW|TILDE|DIAERESIS|CIRCUMFLEX|CEDILLA|OGONEK|DOT|HOOK|LEG|MACRON|BREVE|CARON|STROKE|TAIL|TONOS|BAR|CURL|BELT|HORN|DOTS|LOOP|ACCENT|RING|TICK|HALF RING|COMMA|FLOURISH|TITLO|UPTURN|DESCENDER|VRACHY|QUILL|BASE|ARC|CHECK|STRIKETHROUGH|NOTCH|CIRCLE|VARIA|OXIA|PSILI|DASIA|DIALYTIKA|PERISPOMENI|YPOGEGRAMMENI|PROSGEGRAMMENI|OVERLAY|(TIP|BARB|CORNER) ([A-Z]+WARDS|UP|DOWN|RIGHT|LEFT))$" | grep -E -v "((ISOLATED|MEDIAL|FINAL|INITIAL) FORM|SIGN|SYMBOL)$" |less
5590            
5591             AltGrMap should be made CapsLock aware (impossible: smart capslock works only on the first layer, so
5592             the dead char must be on the first layer). [May work for Shift-Space - but it has a bag of problems...]
5593            
5594             Alas, CapsLock'ing a composition cannot be made stepwise. Hence one must calculate it directly.
5595             (Oups, Windows CapsLock is not configurable on AltGr-layer. One may need to convert
5596             it to VK_KANA???)
5597            
5598             WarnConflicts[exceptions] and NoConflicts translation map parsing rules.
5599            
5600             Need a way to map to a different face, not a different layer.
5601            
5602             Vietnamese: to put second accent over ă, ơ (o/horn), put them over ae/oe; - including
5603             another ˘ which would "cancel the implied one", so will get o-horn itself. - Except
5604             for acute accent which should replaced by ¨, and hook must be replaced by ˆ. (Over ae/oe
5605             there is only macron and diaeresis over ae.)
5606            
5607             Or: for the purpose of taking a second accent, AltGr-A behaves as Ă (or Â?), AltGr-O
5608             behaves as Ô (or O-horn Ơ?). Then Å and O/ behave as the other one... And ˚ puts the
5609             dot *below*, macron puts a hook. Exception: ¨ acts as ´ on the unaltered AE.
5610            
5611             While Å takes acute accent, one can always input it via putting ˚ on Á.
5612            
5613             If Ê is on the keyboard (and macron puts a hook), then the only problem is how to enter
5614             a hook alone (double circumflex is not precombined), dot below (???), and accents on u-horn ư.
5615            
5616             Mogrification rules for double accents: AE Å OE O/ Ù mogrify into hatted/horned versions; macron
5617             mogrifies into a hook; second hat modifies a hat into a horn. The only problem: one won't be
5618             able to enter double grave on U - use the OTHER combination of ¨ and `... And how to enter
5619             dot below on non-accented aue? Put ¨ on umlaut? What about Ë?
5620            
5621             To allow . or , on VK_DECIMAL: maybe make CapsLock-dependent?
5622            
5623             http://blogs.msdn.com/b/michkap/archive/2006/09/13/752377.aspx
5624            
5625             How to write this diacritic recipe: insert hacheck on AltGr-variant, but only if
5626             the breve on the base layer variant does not insert hacheck (so inserts breve)???
5627            
5628             Sorting diacritics by usefulness: we want to apply one of accents from the
5629             given list to a given key (with l layers of 2 shift states). For each accent,
5630             we have 2l possible variants for composition; assign to 2 variants differing
5631             by Shift the minimum penalty of the two. For each layer we get several possible
5632             combinations of different priority; and for each layer, we have a certain number
5633             of slots open. We can redistribute combinations from the primary layer to
5634             secondary one, but not between secondary layers.
5635            
5636             Work with slots one-by-one (so that the assignent is "monotinic" when the number
5637             of slots increases). Let m be the number of layers where slots are present.
5638             Take highest priority combinations; if the number of "extra" combinations
5639             in the primary layer is at least m, distribute the first m of them to
5640             secondary layers. If n
5641             have no their own combinations first, then other n-k layers. More precisely,
5642             if n<=k, use the first n of "free" layers; if n>k, fill all free layers, then
5643             the last n-k of non-free layers.
5644            
5645             Repeat as needed (on each step, at most one slot in each layer appears).
5646            
5647             But we do not need to separate case-differing keys! How to fix?
5648            
5649             All done, but this works only on the current face! To fix, need to pass
5650             to the translator all the face-characters present on the given key simultaneously.
5651            
5652             ===== Accent-key TAB accesses extra bindinges (including NUM->numbered one)
5653             (may be problematic with some applications???
5654             -- so duplicate it on + and @ if they is not occupied
5655             -- there is nothing related to AT in Unicode)
5656            
5657             Diacritics_0218_0b56_0c34= May create such a thing...
5658             (0b56_0c34 invisible to the user).
5659            
5660             Hmm - how to combine penaltized keys with reversion? It looks like
5661             the higher priority bindings would occupy the hottest slots in both
5662             direct and reverse bindings...
5663            
5664             Maybe additional forms Diacrtitics2S_* and Diacrtitics2E_* which fight
5665             for symbols of the same penalty from start and from end (with S winning
5666             on stuff exactly in the middle...). (The E-form would also strip the last |-group.)
5667            
5668             ' Shift-Space (from US face) should access the second level of Russian face.
5669             To avoid infinite cycles, face-switch keys to non-private faces should be
5670             marked in each face...
5671            
5672             "Acute makes sharper" is applicable to () too to get <>-parens...
5673            
5674             Another ways of combining: "OR EQUAL TO", "OR EQUIVALENT TO", "APL FUNCTIONAL
5675             SYMBOL QUAD", "APL FUNCTIONAL SYMBOL *** UNDERBAR", "APL FUNCTIONAL SYMBOL *** DIAERESIS".
5676            
5677             When recognizing symbols for GREEK, treat LUNATE (as NOP). Try adding HEBREW LETTER at start as well...
5678            
5679             Compare with: 8 basic accents: http://en.wikipedia.org/wiki/African_reference_alphabet (English 78)
5680            
5681             When a diacritic on a base letter expands to several variants, use them all
5682             (with penalty according to the flags).
5683            
5684             Problem: acute on acute makes double acute modifier...
5685            
5686             Penalized letter are temporarily completely ignored; need to attach them in the end...
5687             - but not 02dd which should be completely ignore...
5688            
5689             Report characters available on diacritic chains, but not accessible via such chains.
5690             Likewise for characters not accessible at all. Mark certain chains as "Hacks" so that
5691             they are not counted in these lists.
5692            
5693             Long s and "preceded by" are not handled since the table has its own (useless) compatibility decompositions.
5694            
5695             ╒╤╕
5696             ╞╪╡
5697             ╘╧╛
5698             ╓╥╖
5699             ╟╫╢
5700             ╙╨╜
5701             ╔╦╗
5702             ╠╬╣
5703             ╚╩╝
5704             ┌┬┐
5705             ├┼┤
5706             └┴┘
5707             ┎┰┒
5708             ┠╂┨
5709             ┖┸┚
5710             ┍┯┑
5711             ┝┿┥
5712             ┕┷┙
5713             ┏┳┓
5714             ┣╋┫
5715             ┗┻┛
5716             On top of a light-lines grid (3×2, 2×3, 2×2; H, V, V+H):
5717             ┲┱
5718             ╊╉
5719             ┺┹
5720             ┢╈┪
5721             ┡╇┩
5722             ╆╅
5723             ╄╇
5724             ╼†━†╾†╺†╸†╶†─†╴†╌†┄†┈† †╍†┅†┉†
5725             ╼━╾╺╸╶─╴╌┄┈ ╍┅┉
5726            
5727            
5728            
5729            
5730            
5731            
5732            
5733            
5734            
5735             ╎┆┊╏┇┋
5736            
5737             ╲ ╱
5738            
5739             ╭╮
5740             ╰╯
5741             ◤▲◥
5742             ◀■▶
5743             ◣▼◢
5744             ◜△◝
5745             ◁□▷
5746             ◟▽◞
5747             ◕◓◔
5748             ◐○◑
5749            
5750             ▗▄▖
5751             ▐█▌
5752             ▝▀▘
5753             ▛▀▜
5754             ▌ ▐
5755             ▙▄▟
5756            
5757             ░▒▓
5758            
5759            
5760             =head2 Implementation details
5761            
5762             Since the C accessor may have different effects at different moment of
5763             a face C synthesis, here is the order in which C changes:
5764            
5765             ini_layers: essentially, contains what is given in the key “layers” of the face recipe
5766             Later, a version of these layers with exportable keys marked is created as ini_layers_prefix.
5767             ini_filled_layers: adds extra (fake) keys containing control characters and created via-VK-keys
5768             (For these extended layers, the previous version can be inspected via ini_copy1.)
5769             (created when exportable keys are handled.)
5770            
5771             The next modification is done not by modifying the list of names of layers
5772             associated to the face, but by editing the corresponding layers in place.
5773             (The unmodified version of layer, one containing the exportable keys, is
5774             accessible via C.) On this step one adds the missing characters via
5775             from the face specified in the C key.
5776            
5777             =cut
5778            
5779             # '
5780             my (%Globals, $DEBUG);
5781            
5782             sub set__value ($$$) {
5783 0     0 0 0 my($class, $key) = (shift, shift);
5784 0 0       0 (ref $class ? $class->{$key} : $Globals{$key}) = shift;
5785             }
5786             sub get__value ($$) {
5787 0     0 0 0 my($class, $key) = (shift, shift);
5788 0 0 0     0 if (ref $class and defined(my $v = $class->{$key})) {
5789 0         0 $v;
5790             } else {
5791 0         0 $Globals{$key};
5792             }
5793             }
5794             sub set_NamesList ($$;$) {
5795 0     0 0 0 my $class = shift;
5796 0         0 set__value($class, 'NamesList', shift);
5797 0         0 set__value($class, 'AgeList', shift);
5798             }
5799 0     0 0 0 sub get_NamesList ($) { get__value(shift, 'NamesList') }
5800 0     0 0 0 sub get_AgeList ($) { get__value(shift, 'AgeList') }
5801            
5802             sub new ($;$) {
5803 0     0 0 0 my $class = shift;
5804 0 0       0 die "too many arguments to UI::KeyboardLayout->new" if @_ > 1;
5805 0 0       0 my $data = @_ ? {%{shift()}} : {};
  0         0  
5806 0   0     0 bless $data, (ref $class or $class);
5807             }
5808            
5809             sub put_deep($$$$@) {
5810 0     0 0 0 my($self, $hash, $v, $k) = (shift, shift, shift, shift);
5811 0 0 0     0 return $self->put_deep($hash->{$k} ||= {}, $v, @_) if @_;
5812 0         0 $hash->{$k} = $v;
5813             }
5814            
5815             # Sections [foo/bar] [visual -> foo/bar]; directives foo=bar or @foo=bar,baz
5816             # actually: parses configfile string, not file
5817             sub parse_configfile ($$) { # Trailing whitespace is ignored, whitespace about "=" is not
5818 0     0 0 0 my ($self, $s) = (shift, shift);
5819 0         0 $self->parse_add_configstring($s, {});
5820             }
5821            
5822             sub parse_add_configstring ($$$) { # Trailing whitespace is ignored, whitespace about "=" is not
5823 0     0 0 0 my ($self, $s, $vv, @KEYS) = (shift, shift, shift);
5824 0         0 $s =~ s/[^\S\n]+$//gm;
5825 0         0 $s =~ s/^\x{FEFF}//; # BOM are not stripped by Perl from UTF-8 files with -C31
5826 0         0 (my $pre, my %f) = split m(^\[((?:visual\s*->\s*)?[\w/]*)\]\s*$ \n?)mx, $s; # //x is needed to avoid $\
5827 0 0       0 warn "Part before the first section in configfile ignored: `$pre'" if length $pre;
5828 0         0 for my $k (keys %f) {
5829             # warn "Section `$k'";
5830 0         0 my($v, $V, @V) = $f{$k};
5831 0 0       0 if ($k =~ s{^visual\s*->\s*}{[unparsed]/}) { # Make sure that prefixes do not allow visual line to be confused with a config
5832 0         0 $v =~ s[(^(?!#|[/\@+]?\w+=).*)]//ms; # find non-comment non-assignment
5833 0         0 @V = "unparsed_data=$1";
5834             }
5835             # warn "xxx: @V";
5836 0         0 push @KEYS, $k;
5837 0         0 my @k = split m(/), $k;
5838 0 0       0 @k = () if "@k" eq ''; # root
5839 0         0 for my $l ((grep !/^#/, split(/\n/, $v)), @V) {
5840 0 0       0 die "unrecognized config file line: `$l' in `$s'"
5841             unless my($arr, $at, $slash, $kk, $vvv) = ($l =~ m[^((?:(\@)|(/)|\+)?)(\w+)=(.*)]s);
5842 0 0       0 my $spl = $at ? qr/,/ : ( $slash ? qr[/] : qr[(?!)] );
    0          
5843 0 0       0 $vvv = [ length $vvv ? (split $spl, $vvv, -1) : $vvv ] if $arr; # create empty element if $vvv is empty
    0          
5844 0         0 my $slot = $self->get_deep($vv, @k);
5845 0 0 0     0 if ($slot and exists $slot->{$kk}) {
5846 0 0       0 if ($arr) {
5847 0 0 0     0 if (ref($slot->{$kk} || 0) eq 'ARRAY') {
5848 0         0 $vvv = [@{$slot->{$kk}}, @$vvv];
  0         0  
5849             } else {
5850 0         0 warn "Redefinition of non-array entry `$kk' in `$k' by array one, old value ignored"
5851             }
5852             } else {
5853 0         0 warn "Redefinition of entry `$kk' in `$k', old value ignored"
5854             }
5855             }
5856             # warn "Putting to the root->@k->`$kk'";
5857 0         0 $self->put_deep($vv, $vvv, @k, $kk);
5858             }
5859             }
5860 0         0 $vv->{'[keys]'} = \@KEYS;
5861             # warn "config parsed";
5862 0         0 $vv
5863             }
5864            
5865             sub merge_configstrings ($$@) { # Trailing whitespace is ignored, whitespace about "=" is not
5866 0     0 0 0 my ($self, $overwrite) = (shift, shift);
5867 0         0 for my $s (@_) {
5868 0         0 my $data = {};
5869 0         0 $self->parse_add_configstring($s, $data); # consolidate arrays into $data
5870 0         0 $self->merge_hash($data, $self, $overwrite);
5871             }
5872             $self
5873 0         0 }
5874            
5875             sub merge_confighash ($$@) { # Trailing whitespace is ignored, whitespace about "=" is not
5876 0     0 0 0 my ($self, $overwrite) = (shift, shift);
5877 0         0 for my $data (@_) {
5878 0         0 $self->merge_hash($data, $self, $overwrite);
5879             }
5880             $self
5881 0         0 }
5882            
5883             sub merge_hash ($$$$) {
5884 0     0 0 0 my ($self, $from, $to, $overwrite) = (shift, shift, shift, shift);
5885 0         0 for my $k (keys %$from) {
5886 0 0       0 my $old = (exists $to->{k} ? ref($to->{k}) : '-');
5887 0 0 0     0 if ($old eq 'HASH') {
    0          
    0          
5888 0 0       0 die "Merging non-HASH subentry into a HASH" unless 'HASH' eq ref($from->{$k});
5889 0         0 $self->merge_hash($from->{$k}, $to->{$k}, $overwrite);
5890             } elsif ($old ne '-' and not $overwrite) { # Do nothing
5891             } elsif (ref $from->{$k} eq 'HASH') {
5892 0         0 die "Merging HASH subentry into a non-HASH";
5893             } else {
5894 0         0 $to->{$k} = $from->{$k};
5895             }
5896             }
5897             $self
5898 0         0 }
5899            
5900            
5901             sub process_key_chunk ($$$$$) {
5902 0     0 0 0 my $self = shift;
5903 0         0 my $name = shift;
5904 0         0 my $skip_first = shift;
5905 0         0 (my $k = shift) =~ s/\p{Blank}(?=\p{NonspacingMark})//g; # Allow combining marks to be on top of SPACE
5906 0         0 my $sep2 = shift;
5907 0         0 $k = $self->stringHEX2string($k);
5908 0         0 my @k = split //, $k;
5909 0 0 0     0 if (defined $sep2 and 3 <= @k and $k =~ /$sep2/) { # Allow separation by $sep2, but only if too long
      0        
5910 0         0 @k = split /$sep2/, $k;
5911 0 0 0     0 shift @k if not length $k[0] and @k == 2;
5912 0 0 0     0 warn "Zero length expansion in the key slot <$k>\n" if not @k or grep !length, @k;
5913             }
5914 0 0 0     0 undef $k[0] if ($k[0] || '') eq "\0" and $skip_first;
      0        
5915 0 0 0     0 push @k, ucfirst $k[0] if @k == 1 and defined $k[0] and 1==length $k[0] and $k[0] ne ucfirst $k[0];
      0        
      0        
5916 0 0       0 $name = "VisLr=$name" if $name;
5917             # warn "Multi-char key in <<@k>>" if grep $_ && 1
5918 0 0       0 warn "More that 2 Shift-states in <<@k>>" if @k > 2;
5919             #warn "Sep2 in $name, $skip_first, <$k> ==> <@k>\n" if defined $sep2 and $k =~ /$sep2/;
5920 0 0       0 map {defined() ? [$_, undef, undef, $name] : $_} @k;
  0         0  
5921             # @k
5922             } # -> list of chars
5923            
5924             sub process_key ($$$$$$;$) { # $sep may appear only in a beginning of the first key chunk
5925 0     0 0 0 my ($self, $k, $limit, $sep, $ln, $l_off, $sep2, @tr) = (shift, shift, shift, shift, shift, shift, shift);
5926 0         0 my @k = split m((?!^)\Q$sep), $k;
5927 0 0       0 die "Key descriptor `$k' separated by `$sep' has too many parts: expected $limit, got ", scalar @k
5928             if @k > $limit;
5929 0   0     0 defined $k[$_] and $k[$_] =~ s/^--(?=.)/\0/ and $tr[$_]++ for 0..$#k;
      0        
5930 0 0       0 $k[0] = '' if $k[0] eq '--'; # Allow a filler (multi)-chunk
5931 0 0       0 map [$self->process_key_chunk( $ln->[$l_off+$_], $tr[$_], (defined($k[$_]) ? $k[$_] : ''), $sep2)], 0..$#k;
5932             } # -> list of arrays of chars
5933            
5934             sub decode_kbd_layers ($@) {
5935 0     0 0 0 my ($self, $lineN, $row, $line_in_row, $cur_layer, @out, $N, $l0) = (shift, 0, -1);
5936 0         0 my %needed = qw(unparsed_data x visual_rowcount 2 visual_per_row_counts [2;2] visual_prefixes * prefix_repeat 3 in_key_separator / layer_names ???);
5937 0         0 my %extra = (qw(keyline_offsets 1 in_key_separator2), undef);
5938 0         0 my $opt;
5939 0         0 for my $k (keys %needed, keys %extra) {
5940 0 0       0 my ($from) = grep exists $_->{$k}, @_, (ref $self ? $self : ());
5941 0 0 0     0 die "option `$k' not specified" unless $from or exists $extra{$k};
5942 0         0 $opt->{$k} = $from->{$k};
5943             }
5944             die "option `visual_rowcount' differs from length of `visual_per_row_counts': $opt->{visual_rowcount} vs. ",
5945 0 0       0 scalar @{$opt->{visual_per_row_counts}} unless $opt->{visual_rowcount} == @{$opt->{visual_per_row_counts}};
  0         0  
  0         0  
5946 0         0 my @lines = grep !/^#/, split /\s*\n/, $opt->{unparsed_data};
5947 0         0 my ($C, $lc, $pref) = map $opt->{$_}, qw(visual_rowcount visual_per_row_counts visual_prefixes);
5948 0 0       0 die "Number of uncommented rows (" . scalar @lines . ") in a visual template not divisible by the rowcount $C: `$opt->{unparsed_data}'"
5949             if @lines % $C;
5950 0 0       0 $pref = [map {$_ eq ' ' ? qr/\s/ : qr/\Q$_/ } split(//, $pref), (' ') x $C];
  0         0  
5951             # my $line_in_row = [];
5952 0         0 my @counts;
5953             my $sep2;
5954 0 0       0 $sep2 = qr/$opt->{in_key_separator2}/ if defined $opt->{in_key_separator2};
5955 0         0 while (@lines) {
5956             # push @out, $line_in_row = [] unless $C % $c;
5957 0 0       0 $row++, $line_in_row = $cur_layer = 0 unless $lineN % $C;
5958 0         0 $lineN++;
5959 0         0 my $l1 = shift @lines;
5960 0         0 my $PREF = qr/(?:$pref->[$line_in_row]){$opt->{prefix_repeat}}/;
5961 0 0       0 $PREF = '\s' if $pref->[$line_in_row] eq qr/\s/;
5962 0 0       0 $l1 =~ s/\s*\x{202c}$// if $l1 =~ s/^[\x{202d}\x{202e}]//; # remove PDF if removed LRO, RLO
5963 0 0       0 die "line $lineN in visual layers has unexpected prefix:\n\tPREF=/$PREF/\n\tLINE=`$l1'" unless $l1 =~ s/^$PREF\s*(?<=\s)//;
5964 0         0 my @k1 = split /\s+(?!\p{NonspacingMark})/, $l1;
5965 0 0       0 $l0 = $l1, $N = @k1 if $line_in_row == 0;
5966             # warn "Got keys: ", scalar @k1;
5967 0 0       0 die sprintf "number of keys in lines differ: %s vs %s in:\n\t`%s'\n\t`%s'\n\t<%s>",
5968             scalar @k1, $N, $l0, $l1, join(">\t<", @k1) unless @k1 == $N; # One can always fill by --
5969 0         0 for my $key (@k1) {
5970 0         0 my @kk = $self->process_key($key, $lc->[$line_in_row], $opt->{in_key_separator}, $opt->{layer_names}, $cur_layer, $sep2);
5971 0         0 push @{$out[$cur_layer + $_]}, $kk[$_] || [] # (defined $kk[$_] ? [$kk[$_],undef,undef,$opt->{layer_names}[$cur_layer + $_]] : [])
5972 0   0     0 for 0..($lc->[$line_in_row]-1);
5973             }
5974 0         0 $cur_layer += $lc->[$line_in_row++];
5975 0 0       0 push @counts, scalar @k1 if 1 == $lineN % $C;
5976             }
5977             # warn "layer[0] = ", join ', ', map "@$_", @{$out[0]};
5978 0         0 die "Got ", scalar @out, " layers, but ", scalar @{$opt->{layer_names}}, " layer names"
5979 0 0       0 unless @out == @{$opt->{layer_names}};
  0         0  
5980 0         0 my(%seen, %out);
5981 0   0     0 $seen{$_}++ and die "Duplicate layer name `$_'" for @{$opt->{layer_names}};
  0         0  
5982 0         0 @out{ @{$opt->{layer_names}} } = @out;
  0         0  
5983 0         0 \%out, \@counts, $opt->{keyline_offsets};
5984             }
5985            
5986             sub decode_rect_layers ($@) {
5987 0     0 0 0 my ($self, $cnt, %extra, $opt, @out) = (shift, 0, qw(empty N/A));
5988 0         0 my %needed = qw(unparsed_data x rect_rows_cols [4;4] rect_horizontal_counts [2;2] layer_names ??? COLgap 0 ROWgap 0);
5989 0         0 for my $k (keys %needed, keys %extra) {
5990 0 0       0 my ($from) = grep exists $_->{$k}, @_, (ref $self ? $self : ());
5991 0 0 0     0 die "option `$k' not specified" unless $from or exists $extra{$k};
5992 0         0 $opt->{$k} = $from->{$k};
5993             }
5994 0         0 $cnt += $_ for @{ $opt->{rect_horizontal_counts} };
  0         0  
5995             die "total of option `rect_horizontal_counts' differs from count of `layer_names': $cnt vs. ",
5996 0 0       0 scalar @{$opt->{layer_names}} unless $cnt == @{$opt->{layer_names}};
  0         0  
  0         0  
5997 0         0 $cnt = @{ $opt->{rect_horizontal_counts} };
  0         0  
5998 0         0 (my $D = $opt->{unparsed_data}) =~ s/^(#.*\n)+//;
5999 0         0 $D =~ s/^(#.*(\n|\z))+\z//m;
6000 0         0 my @lines = split /\s*\n/, $D;
6001 0         0 my ($C, $lc, $pref, $c0, $r0) = map $opt->{$_}, qw(visual_rowcount visual_per_row_counts visual_prefixes COLgap ROWgap);
6002             die "Number of uncommented rows (" . scalar @lines . ") in a visual rect template not matching rows(rect_rows_cols) x cnt(rect_horizontal_counts) = $opt->{rect_rows_cols}[0] x $cnt: `$opt->{unparsed_data}'"
6003 0 0       0 if @lines != $cnt * $opt->{rect_rows_cols}[0] + ($cnt-1)*$r0;
6004 0         0 my $c = 0;
6005 0         0 while (@lines) {
6006 0         0 die "Too many rect vertically: expect only ", scalar @{ $opt->{rect_horizontal_counts} }, " in `" . join("\n",'',@lines,'') . "'"
6007 0 0       0 if $c >= @{ $opt->{rect_horizontal_counts} };
  0         0  
6008 0         0 my @L = splice @lines, 0, $opt->{rect_rows_cols}[0];
6009 0         0 my ($cR, $L) = 0;
6010 0         0 while (++$cR <= $r0) { # Inter-row gap
6011 0 0       0 last unless @lines;
6012 0 0       0 ($L = shift @lines) =~ /^#/ or die "Line expected to be inter-row comment line No. $cR: <<<$L>>>"
6013             }
6014 0         0 my $l = length $L[0];
6015 0   0     0 $l == length or die "Lengths of lines encoding rect do not match: expect $l, got `" . join("\n",'',@L,'') . "'" for @L[1..$#L];
6016             $l == $opt->{rect_rows_cols}[1] * $opt->{rect_horizontal_counts}[$c] + ($opt->{rect_horizontal_counts}[$c] - 1)*$c0
6017             or die "Wrong line length in rect: expect $opt->{rect_rows_cols}[1] * $opt->{rect_horizontal_counts}[$c] gaps=$c0, got $l in `"
6018 0   0     0 . join("\n",'',@L,'') . "'" for @L[1..$#L];
6019 0         0 while (length $L[0]) {
6020 0         0 my @c;
6021 0         0 push @c, split //, substr $_, 0, $opt->{rect_rows_cols}[1], '' for @L;
6022 0   0     0 $_ eq $opt->{empty} and $_ = undef for @c;
6023 0         0 push @out, [map [$_], @c];
6024 0 0 0     0 next unless $c0 and length $L[0]; # Inter-col gap
6025 0         0 for my $i (0..$#L) {
6026 0 0       0 next unless (my $gap = substr $L[$i], 0, $c0, '') =~ /\S/;
6027 0         0 die "Inter-column gap not whitespace: line No. $i (0-based), gap No. $#out: <<<$gap>>>"
6028             }
6029             }
6030 0         0 $c++;
6031             }
6032 0         0 die "Too few vertical rect: got $c, expect ", scalar @{ $opt->{rect_horizontal_counts} }, " in `" . join("\n",'',@lines,'') . "'"
6033 0 0       0 if $c != @{ $opt->{rect_horizontal_counts} };
  0         0  
6034 0         0 my(%seen, %out);
6035 0   0     0 $seen{$_}++ and die "Duplicate layer name `$_'" for @{$opt->{layer_names}};
  0         0  
6036 0         0 @out{ @{$opt->{layer_names}} } = @out;
  0         0  
6037 0         0 for my $i ( 0 .. ($#{ $opt->{layer_names} } - 1) ) {
  0         0  
6038 0         0 my($base,$shift) = ($out[$i], $out[$i+1]);
6039 0   0     0 $out{$opt->{layer_names}[$i] . '²'} ||= [ map [$base->[$_][0], $shift->[$_][0]], 0..$#$base ];
6040 0 0       0 next if $i > $#{ $opt->{layer_names} } - 3;
  0         0  
6041 0         0 ($base,$shift) = ($out[$i+2], $out[$i+3]);
6042 0   0     0 $out{$opt->{layer_names}[$i] . '²⁺'} ||= [ map [$base->[$_][0], $shift->[$_][0]], 0..$#$base ];
6043             }
6044 0         0 my $ii = 0;
6045 0         0 for my $pre_row ( 0 .. @{ $opt->{rect_horizontal_counts} } - 2) {
  0         0  
6046 0         0 my $C = $opt->{rect_horizontal_counts}[$pre_row];
6047 0         0 for my $iii ( 0 .. $C - 1) {
6048 0         0 my $I = $ii + $iii;
6049 0         0 my $i = $I + $C;
6050 0 0       0 next if $i > $#{ $opt->{layer_names} }; # Next row may be shorter
  0         0  
6051 0         0 my($base,$shift) = ($out[$i], $out[$i+1]);
6052 0   0     0 $out{$opt->{layer_names}[$I] . '₁'} ||= [ map [$base->[$_][0]], 0..$#$base ];
6053 0 0       0 next if $i > $#{ $opt->{layer_names} } - 1;
  0         0  
6054 0   0     0 $out{$opt->{layer_names}[$I] . '₂'} ||= [ map [$base->[$_][0], $shift->[$_][0]], 0..$#$base ];
6055 0 0       0 next if $i > $#{ $opt->{layer_names} } - 3;
  0         0  
6056 0         0 ($base,$shift) = ($out[$i+2], $out[$i+3]);
6057 0   0     0 $out{$opt->{layer_names}[$I] . '₂₊'} ||= [ map [$base->[$_][0], $shift->[$_][0]], 0..$#$base ];
6058             }
6059 0         0 $ii += $C;
6060             }
6061 0         0 \%out, [($opt->{rect_rows_cols}[1]) x $opt->{rect_rows_cols}[0]];
6062             }
6063            
6064             sub get_deep ($$@) {
6065 0     0 0 0 my($self, $h) = (shift, shift);
6066 0 0       0 return $h unless @_;
6067 0         0 my $k = shift @_;
6068 0 0       0 return unless exists $h->{$k};
6069 0         0 $self->get_deep($h->{$k}, @_);
6070             }
6071            
6072             sub get_deep_via_parents ($$$@) { # quadratic algorithm
6073 0     0 0 0 my($self, $h, $idx, $IDX) = (shift, shift, shift);
6074             #warn "Deep: `@_'";
6075 0 0       0 ((defined $h) ? return $h : return) unless @_;
    0          
6076 0         0 my $k = pop @_;
6077             {
6078             #warn "Deep::: `@_'";
6079 0         0 my $H = $self->get_deep($h, @_);
  0         0  
6080             (@_ or return), $IDX++, # Start extraction from array
6081 0 0 0     0 pop, redo unless exists $H->{$k};
6082 0         0 my $v = $H->{$k};
6083             #warn "Deep -> `$v'";
6084 0 0 0     0 return $v unless ref($v || 1) and $IDX and defined $idx;
      0        
      0        
6085 0         0 return $v->[$idx];
6086             }
6087 0         0 return;
6088             }
6089            
6090             sub fill_kbd_layers ($$) { # We do not do deep processing here...
6091 0     0 0 0 my($self, $h, %o, %c, %O) = (shift, shift);
6092 0         0 my @K = grep m(^\[unparsed]/(KBD|RECT)\b), @{$h->{'[keys]'}};
  0         0  
6093             # my $H = $h->{'[unparsed]'};
6094 0         0 for my $k (@K) {
6095 0         0 my (@parts, @h) = split m(/), $k;
6096 0   0     0 ref $self and push @h, $self->get_deep($self, @parts[1..$_]) || {} for 0..$#parts;
      0        
6097 0   0     0 push @h, $self->get_deep($h, @parts[1..$_]) || {} for 0..$#parts; # Drop [unparsed]/ prefix...
6098 0   0     0 push @h, $self->get_deep($h, @parts[0..$_]) || {} for -1..$#parts;
6099 0 0       0 my ($in, $counts, $offsets) = ($k =~ m(^\[unparsed]/KBD\b) ? $self->decode_kbd_layers( reverse @h )
6100             : $self->decode_rect_layers( reverse @h ) );
6101 0   0     0 exists $o{$_} and die "Visual spec `$k' overwrites exiting layer `$k'" for keys %$in;
6102 0         0 my $cnt = (@o{keys %$in} = values %$in);
6103 0         0 @c{keys %$in} = ($counts) x $cnt;
6104 0 0       0 @O{keys %$in} = ($offsets) x $cnt if $offsets;
6105             }
6106 0         0 \%o, \%c, \%O
6107             }
6108            
6109             sub key2hex ($$;$) {
6110 0     0 0 0 my ($self, $k, $ignore) = (shift, shift, shift);
6111 0 0 0     0 return -1 if $ignore and not defined $k;
6112 0         0 return sprintf '%04x', ord $k; # if ord $k <= 0xFFFF;
6113             # sprintf '%06x', ord $k;
6114             }
6115            
6116             sub keyORarray2hex ($$;$) {
6117 0     0 0 0 my ($self, $k, $ignore) = (shift, shift, shift);
6118 0 0 0     0 return -1 if $ignore and not defined $k;
6119 0 0 0     0 $k = $k->[0] if $k and ref $k;
6120 0         0 $self->key2hex($k, $ignore);
6121             }
6122            
6123             sub keys2hex ($$;$) {
6124 0     0 0 0 my ($self, $k, $ignore) = (shift, shift, shift);
6125 0 0 0     0 return -1 if $ignore and not defined $k;
6126 0         0 return join '.', map {sprintf '%04x', ord} split //, $k; # if ord $k <= 0xFFFF;
  0         0  
6127             # sprintf '%06x', ord $k;
6128             }
6129            
6130             sub coverage_hex_sub($$$) { # Unfinished!!! XXXX UNUSED
6131 0     0 0 0 my ($self, $layer, $to) = (shift, shift, shift);
6132             ++$to->{ $self->key2hex($_->[0], 'undef_ok') }, ++$to->{ $self->key2hex($_->[1], 'undef_ok') }
6133 0         0 for @{$self->{layers}{$layer}};
  0         0  
6134             }
6135            
6136             # my %MANUAL_MAP = qw( 0020 0020 00a0 00a0 2007 2007 ); # We insert entry for SPACE manually
6137             # my %MANUAL_MAP_ch = map chr hex, %MANUAL_MAP;
6138            
6139             sub coverage_hex($$) {
6140 0     0 0 0 my ($self, $face) = (shift, shift);
6141 0         0 my $layers = $self->{faces}{$face}{layers};
6142 0   0     0 my $to = ($self->{faces}{$face}{'[coverage_hex]'} ||= {}); # or die "Panic!"; # Synthetic faces may not have this...
6143 0         0 my @Layers = map $self->{layers}{$_}, @$layers;
6144 0         0 for my $sub (@Layers) {
6145 0         0 ++$to->{ $self->keyORarray2hex($_, 'undef_ok') } for map +(@$_[0,1]), @$sub;
6146             }
6147             }
6148            
6149             sub deep_copy($$) {
6150 0     0 0 0 my ($self, $o) = (shift, shift);
6151 0 0       0 return $o unless ref $o;
6152 0 0       0 return [map $self->deep_copy($_), @$o] if "$o" =~ /^ARRAY\(/; # We should not have overloaded elements
6153 0 0       0 return {map $self->deep_copy($_), %$o} if "$o" =~ /^HASH\(/;
6154             }
6155             sub DEEP_COPY($@) {
6156 0     0 0 0 my ($self) = (shift);
6157 0         0 map $self->deep_copy($_), @_;
6158             }
6159            
6160             sub deep_undef_by_hash($$@) {
6161 0     0 0 0 my ($self, $h) = (shift, shift);
6162 0         0 for (@_) {
6163 0 0       0 next unless defined;
6164 0 0       0 if (ref $_) {
    0          
6165 0 0       0 die "a reference not an ARRAY in deep_undef_by_hash()" unless 'ARRAY' eq ref $_;
6166 0         0 $self->deep_undef_by_hash($h, @$_);
6167             } elsif ($h->{$_}) {
6168 0         0 undef $_
6169             }
6170             }
6171             }
6172            
6173             # Make symbols from the first half-face ($h1) to be accessible in the second face ($H1/$H2)
6174             sub pre_link_layers ($$$;$$) { # Un-obscure non-alphanum bindings from the first face; assign in the direction $hh ---> $HH
6175 0     0 0 0 my ($self, $hh, $HH, $skipfix, $skipwarn) = (shift, shift, shift, shift, shift); # [Main, AltGr-Main,...], [Secondary, AltGr-Secondary,...]
6176 0         0 my ($hn,$Hn, %seen_deobsc) = map $self->{faces}{$_}{layers}, $hh, $HH;
6177             #warn "Link $hh --> $HH;\t(@$hn) -> (@$Hn)" if "$hh $HH" =~ /00a9/i;
6178 0 0       0 die "Can't link sets of layers `$hh' `$HH' of different sizes: ", scalar @$hn, " != ", scalar @$Hn if @$hn != @$Hn;
6179            
6180 0         0 my $already_linked = $self->{faces}{$hh}{'[linked]'}{$HH}++;
6181 0         0 $self->{faces}{$HH}{'[linked]'}{$hh}++;
6182 0         0 for my $L (@$Hn) {
6183 0 0       0 next if $skipfix;
6184             die "Layer `$L' of face `$HH' is being relinked via `$HH' -> `$hh'???"
6185 0 0       0 if $self->{layers}{'[ini_copy]'}{$L};
6186             #warn "ini_copy: `$L'";
6187 0         0 $self->{layers}{'[ini_copy]'}{$L} = $self->deep_copy($self->{layers}{$L});
6188             }
6189 0         0 for my $K (0..$#{$self->{layers}{$hn->[0]}}) { # key number
  0         0  
6190             #warn "One key data, FROM: K=$K, layer=<", join( '> <', map $self->{layers}{$_}[$K], @$Hn), '>' if "$hh $HH" =~ /00a9/i;
6191 0         0 my @h = map $self->{layers}{$_}[$K], @$hn; # arrays of [lowercase,uppercase]
6192             #warn "One key data, TO: K=$K, layer=<", join( '> <', map $self->{layers}{$_}[$K], @$Hn), '>' if "$hh $HH" =~ /00a9/i;
6193 0         0 my @H = map $self->{layers}{$_}[$K], @$Hn;
6194 0 0 0     0 my @p = map [map {$_ and ref and $_->[2]} @$_], @h; # Prefix
  0         0  
6195 0 0 0     0 my @c = map [map {($_ and ref) ? $_->[0] : $_} @$_], @h; # deep copy, remove extra info
  0         0  
6196 0 0 0     0 my @C = map [map {($_ and ref) ? $_->[0] : $_} @$_], @H;
  0         0  
6197             # Find which of keys on $H[0] obscure symbol keys from $h[0]
6198 0 0 0     0 my @symb0 = grep {$p[0][$_] or ($c[0][$_] || '') =~ /[\W_]/} 0, 1; # not(wordchar but not _): prefix/symbols on $h[0]
  0         0  
6199             defined $H[0][$_] or not defined $C[0][$_] or $skipwarn
6200             or warn "Symbol char `$c[0][$_]' not copied to the second face while the slot is empty"
6201 0   0     0 for @symb0;
      0        
      0        
6202 0 0       0 my @obsc = grep { defined $C[0][$_] and $c[0][$_] ne $C[0][$_]} @symb0; # undefined positions will be copied later
  0         0  
6203             #warn "K=$K,\tobs=@obsc;\tsymb0=@symb0";
6204             # If @obsc == 1, put on non-shifted location; may overwrite only ?-binding if it exists
6205             #return unless @obsc;
6206 0         0 my %map;
6207 0         0 my @free_first = ((grep {not defined $C[1][$_]} 0, 1), grep defined $C[1][$_], 0, 1);
  0         0  
6208 0 0 0     0 @free_first = (1,0) if 1 == ($obsc[0] || 0) and $free_first[0] = 0 and not defined $C[1][1]; # un-Shift ONLY if needed
      0        
      0        
6209 0 0       0 @map{@obsc} = @free_first[0 .. $#obsc] unless $skipfix;
6210             # %map = map +($_, $free_first[$map{$_}]), keys %map;
6211 0         0 for my $k (keys %map) {
6212 0 0 0     0 if ($skipfix) {
    0          
6213 0 0       0 my $s = $k ? ' (shifted)' : '';
6214             warn "Key `$C[0][$k]'$s in layer $Hn->[0] does not match symbol $c[0][$k] in layer $hn->[0], and skipfix is requested...\n"
6215 0 0 0     0 unless ref($skipwarn || '') ? $skipwarn->{$c[0][$k]} : $skipwarn;
    0          
6216             } elsif (defined $C[1][$map{$k}] and $p[0][$k]) {
6217 0         0 warn "Prefix `$c[0][$k]' in layer $hn->[0] obscured on a key with `$C[1][$map{$k}]' in layer=1: $Hn->[0]"
6218             } else {
6219 0 0       0 if (defined $C[1][$map{$k}]) {
6220 0 0       0 next if $seen_deobsc{$c[0][$k]}; # See ъЪ + palochkas obscuring \| on the secondary \|-key in RussianPhonetic
6221             # So far, the only "obscuring" with useful de-obscuring is when the obscuring symbol is a letter
6222 0 0 0     0 die "existing secondary AltGr-binding `$C[1][$map{$k}]' blocks de-obscuring `$c[0][$k]';\n symbols to de-obscure are at positions [@symb0] in [@{$c[0]}]"
  0         0  
6223             unless ($C[0][$k] || '.') =~ /[\W\d_]/;
6224             next
6225 0         0 }
6226 0         0 $H[1][$map{$k}] = $h[0][$k]; # !!!! Modify in place
6227 0         0 $seen_deobsc{$c[0][$k]}++;
6228             }
6229             }
6230             # Inherit keys from $h
6231 0 0       0 for my $L (0..($skipfix? -1 : $#H)) {
6232 0         0 for my $shift (0,1) {
6233 0 0       0 next if defined $H[$L][$shift];
6234 0         0 $H[$L][$shift] = $h[$L][$shift];
6235             }
6236             }
6237 0 0       0 next if $already_linked;
6238 0         0 for my $i (0..@$hn) { # layer type
6239 0         0 for my $j (0,1) { # case
6240             #??? ++$seen_hex[$_]{ key2hex(($_ ? $key2 : $key1)->[$i][$j], 'undef') } for 0,1;
6241 0 0 0     0 push @{$self->{faces}{$hh}{need_extra_keys_to_access}{$HH}}, $H[$i][$j] if defined $C[$i][$j] and not defined $h[$i][$j];
  0         0  
6242 0 0 0     0 push @{$self->{faces}{$HH}{need_extra_keys_to_access}{$hh}}, $h[$i][$j] if defined $c[$i][$j] and not defined $H[$i][$j];
  0         0  
6243            
6244             }
6245             }
6246             }
6247             }
6248            
6249             # Make symbols from the first half-face ($h1) to be accessible in the second face ($H1/$H2)
6250             sub link_layers ($$$;$$) { # Un-obscure non-alphanum bindings from the first keyboard
6251 0     0 0 0 my ($self, $hh, $HH, $skipfix, $skipwarn) = (shift, shift, shift, shift, shift); # [Main, AltGr-Main,...], [Secondary, AltGr-Secondary,...]
6252 0         0 $self->pre_link_layers ($hh, $HH, $skipfix, $skipwarn);
6253             #warn "Linking with FIX: $hh, $HH" unless $skipfix;
6254             # We expect that $hh is base-face, and $HH is a satellite.
6255 0         0 $self->face_make_backlinks($HH, $self->{faces}{$HH}{'[char2key_prefer_first]'}, $self->{faces}{$HH}{'[char2key_prefer_last]'}, $skipfix, 'skipwarn');
6256             # To insert Flip_AltGr_Key into a face, we need to know where it is on the base face, and put it into the corresponding
6257             # slot of the satellite face. After face_make_backlinks(), we can find it in the base face.
6258             # Moreover, we must do it BEFORE calling faces_link_via_backlinks().
6259 0 0       0 if (defined (my $flip = $self->{faces}{$hh}{'[Flip_AltGr_Key]'})) {{
6260 0 0       0 defined ( my $flipped = $self->{faces}{$HH}{'[invAltGr_Accessor]'} ) or last;
  0         0  
6261             # warn "adding AltGr-inv for $hh, accessor=", $self->key2hex($flipped);
6262 0         0 $flip = $self->charhex2key($flip);
6263             # warn "face_back on $hh: ", join ' ', keys %{$self->{face_back}{$hh} || {}};
6264 0 0       0 if (my $where = $self->{face_back}{$hh}{$flip}) {
6265 0         0 my($l, $k, $shift) = @{ $where->[0] };
  0         0  
6266             # warn "Hex face_back l=$l, k=$k, shift-$shift on $hh";
6267 0         0 my($L, $expl, $dead) = ($self->{faces}{$HH}{layers}, '???');
6268 0         0 $L = $self->{layers}{$L->[$l]};
6269 0         0 my $C = my $c = $L->[$k][$shift];
6270 0 0 0     0 $c = $c->[0], $dead = $C->[2], $expl = $C->[3] || '???' if $c and ref $c;
      0        
6271 0   0     0 my $DEAD = $dead || '';
6272 0 0 0     0 warn "adding Flip_AltGr => <<$flipped>> to $hh\'s satellite $HH: already occuplied by <<<$c>>> (via $expl), dead=$DEAD"
      0        
6273             if defined $c and ($c ne $flipped or not $dead);
6274 0         0 $L->[$k][$shift] = [$flipped, undef, 1, 'Prefix for AltGr inversion'];
6275 0         0 delete $self->{faces}{$hh}{'Face_link_map'}{$HH}; # Reuse old copy
6276             # warn "Added to $HH; k=$k\[$l, $shift]";
6277             } else {
6278 0         0 warn "failed: adding AltGr-inv for $hh, flip=$flip, accessor=", $self->key2hex($flipped);
6279             }
6280             }}
6281 0         0 $self->face_make_backlinks($hh, $self->{faces}{$hh}{'[char2key_prefer_first]'}, $self->{faces}{$hh}{'[char2key_prefer_last]'}, 'skip');
6282 0         0 $self->faces_link_via_backlinks($hh, $HH);
6283             # $self->faces_link_via_backlinks($HH, $hh);
6284             }
6285            
6286             sub face_make_backlinks($$$$;$$) { # It is crucial to proceed layers in
6287             # parallel: otherwise the semantic of char2key_prefer_first suffers
6288 0   0 0 0 0 my ($self, $F, $prefer_first, $prefer_last, $skipfix, $skipwarn) = (shift, shift, shift || {}, shift || {}, shift, shift);
      0        
6289             #warn "Making backlinks for `$F'";
6290 0         0 my $LL = $self->{faces}{$F}{layers};
6291 0 0       0 if ($self->{face_back}{$F}) { # reuse old copy
6292 0 0       0 return if $skipfix; # reuse old copy
6293 0         0 die "An obsolete copy of `$F' is cashed";
6294             }
6295 0   0     0 my $seen = ($self->{face_back}{$F} ||= {}); # maps char to array of possitions it appears in, each [key, shift]
6296             # Since prefer_first should better operate in terms of keys, not layers; so the loop in $k should be the external one
6297 0         0 my $last = $#{ $self->{layers}{$LL->[0]} };
  0         0  
6298 0         0 my %warn;
6299 0         0 for my $k (0..$last) {
6300 0         0 for my $Lc (0..$#$LL) {
6301 0         0 my $L = $LL->[$Lc];
6302             # $self->layer_make_backlinks($_, $prefer_first) for @$L;
6303 0         0 my $a = $self->{layers}{$L};
6304 0 0       0 unless ($#$a == $last) { # Detect typos if we can (i.e., if no overflow into special ranges)
6305 0         0 my $fst = 1e100; # infinity
6306 0   0     0 $fst > $_->[0] and $fst = $_->[0] for values %start_SEC;
6307 0 0 0     0 die "Layer `$L' has lastchar $#$a, expected $last" unless $last >= $fst or $#$a >= $fst;
6308             }
6309             ##########
6310 0         0 for my $shift (0..$#{$a->[$k]}) {
  0         0  
6311 0 0       0 next unless defined (my $c = $a->[$k][$shift]);
6312 0 0       0 $c = $c->[0] if 'ARRAY' eq ref $c; # Treat prefix keys as usual chars
6313 0 0       0 if ($prefer_first->{$c}) {
6314             #warn "Layer `$L' char `$c': prefer first";
6315 0 0 0     0 @{ $seen->{$c} } = reverse @{ $seen->{$c} } if $seen->{$c} and $prefer_last->{$c}; # prefer 2nd of 3 (2nd from the end)
  0         0  
  0         0  
6316 0         0 push @{ $seen->{$c} }, [$Lc, $k, $shift];
  0         0  
6317             } else {
6318 0 0 0     0 $warn{$c}++ if @{ $seen->{$c} || [] } and not $prefer_last->{$c} and $c ne ' '; # XXXX Special-case ' ' ????
  0 0 0     0  
6319 0         0 unshift @{ $seen->{$c} }, [$Lc, $k, $shift];
  0         0  
6320             }
6321             }
6322             }
6323             }
6324 0 0 0     0 warn "The following chars appear several times in face `$F', but are not clarified\n\t (by `char2key_prefer_first', `char2key_prefer_last'):\n\t<",
6325             join('> <', sort keys %warn), '>' if %warn and not $skipwarn;
6326             }
6327            
6328             sub flip_layer_N ($$$) { # Increases layer number if number of layers is >2 (good for order Plain/AltGr/S-Ctrl)
6329 0     0 0 0 my ($self, $N, $max) = (shift, shift, shift);
6330 0 0       0 return 0 if $N == $max;
6331 0         0 $N + 1
6332             }
6333            
6334             sub faces_link_via_backlinks($$$;$) { # It is crucial to proceed layers in
6335             # parallel: otherwise the semantic of char2key_prefer_first suffers
6336 0     0 0 0 my ($self, $F1, $F2, $no_inic) = (shift, shift, shift, shift);
6337 0 0       0 return if $self->{faces}{$F1}{'Face_link_map'}{$F2}; # Reuse old copy
6338             #warn "Making links for `$F1' -> `$F2'";
6339 0 0       0 my $seen = $self->{face_back}{$F1} or die "Panic: no backlinks on $F1!"; # maps char to array of positions it appears in, each [layer, key, shift]
6340 0         0 my $LL = $self->{faces}{$F2}{layers};
6341             #!$no_inic and $self->{layers}{'[ini_copy1]'}{$_} and warn "ini_copy1 of `$_' exists" for @$LL;
6342             #!$no_inic and $self->{layers}{'[ini_copy]'}{$_} and warn "ini_copy of `$_' exists" for @$LL;
6343 0   0     0 my @LL = map $self->{layers}{'[ini_copy1]'}{$_} || $self->{layers}{'[ini_copy]'}{$_} || $self->{layers}{$_}, @$LL;
6344 0 0       0 @LL = map $self->{layers}{$_}, @$LL if $no_inic;
6345 0         0 my($maxL, %r, %altR) = $#LL;
6346             # XXXX Must use $self->{layers}{'[ini_copy]'}{$L} for the target
6347 0         0 for my $c (sort keys %$seen) {
6348 0         0 my $arr = $seen->{$c};
6349 0 0       0 warn "Empty back-mapping array for `$c' in face `$F1'" unless @$arr;
6350             # if (@$arr > 1) {
6351             # }
6352             my ($to) = grep defined, (map {
6353             #warn "Check `$c': <@$_> ==> <", (defined $LL[$_->[0]][$_->[1]][$_->[2]] ? $LL[$_->[0]][$_->[1]][$_->[2]] : 'undef'), '>';
6354 0         0 $LL[$_->[0]][$_->[1]][$_->[2]]
  0         0  
6355             } @$arr);
6356 0         0 my ($To) = grep defined, (map { $LL[$self->flip_layer_N($_->[0], $maxL)][$_->[1]][$_->[2]] } @$arr);
  0         0  
6357 0         0 $r{$c} = $to; # Keep prefix keys as array refs
6358 0         0 $altR{$c} = $To; # Ditto
6359             }
6360 0         0 $self->{faces}{$F1}{'Face_link_map'}{$F2} = \%r;
6361 0         0 $self->{faces}{$F1}{'Face_link_map_INV'}{$F2} = \%altR;
6362             }
6363            
6364             sub charhex2key ($$) {
6365 0     0 0 0 my ($self, $c) = (shift, shift);
6366 0 0       0 return chr hex $c if $c =~ /^[0-9a-f]{4,}$/i;
6367 0         0 $c
6368             }
6369            
6370             sub __manyHEX($$) { # for internal use only
6371 0     0   0 my ($self, $s) = (shift, shift);
6372 0         0 $s =~ s/\.?(\b[0-9a-f]{4,}\b)\.?/ chr hex $1 /ieg;
  0         0  
6373 0         0 $s
6374             }
6375            
6376             sub stringHEX2string ($$) { # One may surround HEX by ".", but only if needed. If not needed, "." is preserved...
6377 0     0 0 0 my ($self, $s) = (shift, shift);
6378 0         0 $s =~ s/(?:\b\.)?((?:\b[0-9a-f]{4,}\b(?:\.\b)?)+)/ $self->__manyHEX("$1") /ieg;
  0         0  
6379 0         0 $s
6380             }
6381            
6382             sub layer_recipe ($$) {
6383 0     0 0 0 my ($self, $l) = (shift, shift);
6384 0 0       0 return unless exists $self->{layer_recipes}{$l};
6385 0         0 $self->recipe2str($self->{layer_recipes}{$l})
6386             }
6387            
6388             sub massage_faces ($) {
6389 0     0 0 0 my $self = shift;
6390             # warn "Massaging faces...";
6391 0         0 for my $f (keys %{$self->{faces}}) { # Needed for (pre_)link_layers...
  0         0  
6392 0 0 0     0 next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6393             #warn "Massaging face `$f'...";
6394 0         0 for my $key ( qw( Flip_AltGr_Key Diacritic_if_undef DeadChar_DefaultTranslation DeadChar_32bitTranslation extra_report_DeadChar
6395             PrefixChains ctrl_after_modcol create_alpha_ctrl keep_missing_ctrl output_layers
6396             output_layers_WIN output_layers_XKB skip_extra_layers_WIN Prefix_Base_Altern
6397             layers_modifiers layers_mods_keys mods_keys_KBD AltGrInv_AltGr_as_Ctrl
6398             ComposeKey_Show AltGr_Invert_Show Apple_Override Apple_Duplicate Apple_HexInput
6399             ComposeKey Explicit_AltGr_Invert Auto_Diacritic_Start CapsLOCKoverride) ) {
6400 0         0 $self->{faces}{$f}{"[$key]"} = $self->get_deep_via_parents($self, undef, 'faces', (split m(/), $f), $key);
6401             }
6402             $self->{faces}{$f}{'[char2key_prefer_first]'}{$_}++ # Make a hash
6403 0 0       0 for @{ $self->{faces}{$f}{char2key_prefer_first} || [] } ;
  0         0  
6404             $self->{faces}{$f}{'[char2key_prefer_last]'}{$_}++ # Make a hash
6405 0 0       0 for @{ $self->{faces}{$f}{char2key_prefer_last} || [] } ;
  0         0  
6406 0 0       0 $self->{faces}{$f}{'[AltGrInv_AltGr_as_Ctrl]'} = 1 unless defined $self->{faces}{$f}{'[AltGrInv_AltGr_as_Ctrl]'};
6407            
6408 0         0 my $idx = $self->get_deep($self, 'faces', (split m(/), $f), 'MetaData_Index');
6409             # defined $self->{faces}{$f}{"[$_]"} and not ref $self->{faces}{$f}{"[$_]"}
6410             # or
6411             $self->{faces}{$f}{"[$_]"} = $self->get_deep_via_parents($self, $idx, 'faces', (split m(/), $f), $_)
6412 0         0 for qw(LRM_RLM ALTGR SHIFTLOCK);
6413            
6414 0         0 my %R = qw(ComposeKey_Show ⎄ AltGr_Invert_Show ⤨); # On Apple only
6415 0   0     0 defined $self->{faces}{$f}{"[$_]"} or $self->{faces}{$f}{"[$_]"} = $R{$_} for keys %R;
6416             $self->{faces}{$f}{"[ComposeKey_Show]"}[0] = '⎄' # Make a safe default
6417 0 0 0     0 if ref $self->{faces}{$f}{"[ComposeKey_Show]"} and not length $self->{faces}{$f}{"[ComposeKey_Show]"}[0];
6418            
6419 0         0 my ($compK, %compK) = $self->{faces}{$f}{'[ComposeKey]'};
6420 0 0 0     0 if ($compK and ref $compK) {
    0          
6421 0         0 for my $cK (@$compK) {
6422 0         0 my @kkk = split /,/, $cK;
6423 0 0 0     0 $compK{ $self->key2hex($self->charhex2key($kkk[3])) }++ if defined $kkk[3] and length $kkk[3];
6424             }
6425             } elsif (defined $compK) {
6426 0         0 $compK{ $self->key2hex($self->charhex2key($compK)) }++;
6427             }
6428 0         0 $self->{faces}{$f}{'[ComposeKeys]'} = \%compK;
6429            
6430 0 0       0 unless ($self->{faces}{$f}{layers}) {
6431 0 0       0 next unless $self->{face_recipes}{$f};
6432 0         0 $self->face_by_face_recipe($f, $f);
6433             }
6434 0 0       0 for my $ln ( 0..$#{$self->{faces}{$f}{layers} || []} ) {
  0         0  
6435 0         0 my $ll = my $l = $self->{faces}{$f}{layers}[$ln];
6436 0 0       0 next if $self->{layers}{$l}; # Else, auto-vivify
6437             #warn "Creating layer `$l' for face `$f'...";
6438 0         0 my @r = $self->layer_recipe($l);
6439 0 0       0 $ll = $r[0] if @r;
6440 0         0 warn "Massaging: Using layout_recipe `$ll' for layer '$l'\n" if debug_face_layout_recipes and exists $self->{layer_recipes}{$l};
6441 0         0 $ll = $self->make_translated_layers($ll, $f, [$ln], '0000');
6442             #warn "... Result `@$ll' --> $self->{layers}{$ll->[0]}";
6443 0 0       0 $self->{layers}{$l} = $self->{layers}{$ll->[0]} unless $self->{layers}{$l}; # Could autovivify in between???
6444             }
6445 0         0 (my ($seen, $seen_dead), $self->{faces}{$f}{'[dead_in_VK]'}) = $self->massage_VK($f);
6446 0         0 $self->{faces}{$f}{'[dead_in_VK_array]'} = $seen_dead;
6447 0         0 $self->{faces}{$f}{'[coverage_hex]'}{$self->key2hex($_)}++ for @$seen;
6448 0 0       0 for my $S (@{ $self->{faces}{$f}{AltGrCharSubstitutions} || []}) {
  0         0  
6449 0         0 my $s = $self->stringHEX2string($S);
6450 0         0 $s =~ s/\p{Blank}(?=\p{NonspacingMark})//g;
6451 0 0       0 die "Expect 2 chars in AltGr-char substitution rule; I see <$s> (from <$S>)" unless 2 == (my @s = split //, $s);
6452 0         0 push @{ $self->{faces}{$f}{'[AltSubstitutions]'}{$s[0]} }, [$s[1], 'manual'];
  0         0  
6453 0 0 0     0 push @{ $self->{faces}{$f}{'[AltSubstitutions]'}{lc $s[0]} }, [lc $s[1], 'manual']
  0         0  
6454             if lc $s[0] ne $s[0] and lc $s[1] ne $s[1];
6455 0 0 0     0 push @{ $self->{faces}{$f}{'[AltSubstitutions]'}{uc $s[0]} }, [uc $s[1], 'manual']
  0         0  
6456             if uc $s[0] ne $s[0] and uc $s[1] ne $s[1];
6457             }
6458 0 0       0 s/^\s+//, s/\s+$//, $_ = $self->stringHEX2string($_) for @{ $self->{faces}{$f}{Import_Prefix_Keys} || []};
  0         0  
6459 0 0       0 my %h = @{ $self->{faces}{$f}{Import_Prefix_Keys} || []};
  0         0  
6460 0 0       0 $self->{faces}{$f}{'[imported2key]'} = \%h if %h;
6461 0         0 my ($l0, $c);
6462 0 0       0 unless ($c = $self->{layer_counts}{$l0 = $self->{faces}{$f}{layers}[0]}) {
6463 0         0 $l0 = $self->get_deep_via_parents($self, undef, 'faces', (split m(/), $f), 'geometry_via_layer');
6464 0 0       0 $c = $self->{layer_counts}{$l0} if defined $l0;
6465             }
6466 0 0       0 my $o = $self->{layer_offsets}{$l0} if defined $l0;
6467 0 0       0 $self->{faces}{$f}{'[geometry]'} = $c if $c;
6468 0 0       0 $self->{faces}{$f}{'[g_offsets]'} = $o if $o;
6469             }
6470 0         0 for my $f (keys %{$self->{faces}}) { # Needed for face_make_backlinks: must know which keys in faces will be finally present
  0         0  
6471 0 0 0     0 next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6472 0 0       0 for my $F (@{ $self->{faces}{$f}{AltGrCharSubstitutionFaces} || []}) { # Now has a chance to have real layers
  0         0  
6473 0         0 for my $L (0..$#{$self->{faces}{$f}{layers}}) {
  0         0  
6474 0         0 my $from = $self->{faces}{$f}{layers}[$L];
6475 0 0       0 next unless my $to = $self->{faces}{$F}{layers}[$L];
6476 0         0 $_ = $self->{layers}{$_} for $from, $to;
6477 0         0 for my $k (0..$#$from) {
6478 0 0 0     0 next unless $from->[$k] and $to->[$k];
6479 0         0 for my $shift (0..1) {
6480 0 0 0     0 next unless defined (my $s = $from->[$k][$shift]) and defined (my $ss = $to->[$k][$shift]);
6481 0   0     0 $_ and ref and $_ = $_->[0] for $s, $ss;
      0        
6482 0         0 push @{ $self->{faces}{$f}{'[AltSubstitutions]'}{$s} }, [$ss, "F=$F"];
  0         0  
6483             }
6484             }
6485             }
6486             }
6487             } # ^^^ This is not used yet???
6488 0         0 for my $f (keys %{$self->{faces}}) { # Needed for face_make_backlinks: must know which keys in faces will be finally present
  0         0  
6489 0 0 0     0 next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6490 0 0       0 for my $N (0..$#{ $self->{faces}{$f}{AltGrCharSubstitutionLayers} || []}) { # Now has a chance to have real layers
  0         0  
6491 0         0 my $TO = my $to = $self->{faces}{$f}{AltGrCharSubstitutionLayers}[$N];
6492 0 0       0 my $from = $self->{faces}{$f}{layers}[$N] or next;
6493 0         0 $_ = $self->{layers}{$_} for $from, $to;
6494 0         0 for my $k (0..$#$from) {
6495 0 0 0     0 next unless $from->[$k] and $to->[$k];
6496 0         0 for my $shift (0..1) {
6497 0 0 0     0 next unless defined (my $s = $from->[$k][$shift]) and defined (my $ss = $to->[$k][$shift]);
6498 0   0     0 $_ and ref and $_ = $_->[0] for $s, $ss;
      0        
6499 0         0 push @{ $self->{faces}{$f}{'[AltSubstitutions]'}{$s} }, [$ss, "L=$TO"];
  0         0  
6500             }
6501             }
6502             }
6503             }
6504 0         0 for my $f (keys %{$self->{faces}}) { # Linking uses the number of slots in layer 0 as the limit; fill to make into max
  0         0  
6505 0 0 0     0 next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6506 0         0 my $L = $self->{faces}{$f}{layers};
6507 0         0 my @last = map $#{$self->{layers}{$_}}, @$L;
  0         0  
6508 0         0 my $last = $last[0];
6509 0   0     0 $last < $_ and $last = $_ for @last;
6510 0         0 push @{$self->{layers}{$L->[0]}}, [] for 1..($last-$last[0]);
  0         0  
6511             }
6512 0         0 for my $f (keys %{$self->{faces}}) { # Needed for face_make_backlinks: must know which keys in faces will be finally present
  0         0  
6513 0 0 0     0 next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6514 0 0       0 next unless defined (my $o = $self->{faces}{$f}{LinkFace});
6515 0         0 $self->export_layers($o, $f); # Process recipes
6516 0         0 $self->pre_link_layers($o, $f); # May add keys to $f
6517             # warn("pre_link <$o> <$f>\n") if defined $o;
6518             }
6519 0         0 for my $f (keys %{$self->{faces}}) {
  0         0  
6520 0 0 0     0 next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6521 0         0 $self->face_make_backlinks($f, $self->{faces}{$f}{'[char2key_prefer_first]'}, $self->{faces}{$f}{'[char2key_prefer_last]'});
6522             }
6523 0         0 for my $f (keys %{$self->{faces}}) {
  0         0  
6524 0 0 0     0 next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6525 0         0 my $o = $self->{faces}{$f}{LinkFace};
6526 0 0       0 next unless defined $o;
6527 0         0 $self->faces_link_via_backlinks($f, $o);
6528 0         0 $self->faces_link_via_backlinks($o, $f);
6529             }
6530 0         0 for my $f (keys %{$self->{faces}}) {
  0         0  
6531 0 0 0     0 next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6532 0 0       0 if (defined( my $r = $self->{faces}{$f}{"[CapsLOCKoverride]"} )) {
6533 0         0 warn "Massaging CapsLock for face `$f'...\n" if debug_face_layout_recipes;
6534 0         0 $self->{faces}{$f}{'[CapsLOCKlayers]'} = $self->layers_by_face_recipe($r, $f, $r);
6535             }
6536 0   0     0 my ($DDD, $export, $vk) = map $self->{faces}{$f}{"[$_]"} ||= {}, qw(DEAD export dead_in_VK);
6537 0   0     0 my ($ddd) = map $self->{faces}{$f}{"[$_]"} ||= [], qw(dead);
6538 0         0 $self->coverage_hex($f);
6539 0         0 my $S = $self->{faces}{$f}{layers};
6540 0         0 my ($c,%s,@d) = 0;
6541 0 0       0 for my $D (@{$self->{faces}{$f}{layerDeadKeys} || []}) { # deprecated...
  0         0  
6542 0 0       0 $c++, next unless length $D; # or $D ~= /^\s*--+$/ ; # XXX How to put empty elements in an array???
6543 0         0 $D =~ s/^\s+//;
6544 0         0 (my $name, my @k) = split /\s+/, $D;
6545 0         0 @k = map $self->charhex2key($_), @k;
6546             die "name of layerDeadKeys' element in face `$f' does not match:\n\tin `$D'\n\t`$name' vs `$self->{faces}{$f}{layers}[$c]'"
6547 0 0       0 unless $self->{faces}{$f}{layers}[$c] =~ /^\Q$name\E(<.*>)?$/; # Name might have changed in VK processing
6548 0   0     0 1 < length and die "not a character as a deadkey: `$_'" for @k;
6549 0         0 $ddd->[$c] = {map +($_,1), @k};
6550 0   0     0 ($s{$_}++ or push @d, $_), $DDD->{$_} = 1 for @k;
6551 0         0 $c++;
6552             }
6553 0 0       0 for my $k (split /\p{Blank}+(?:\|{3}\p{Blank}+)?/,
6554             (defined $self->{faces}{$f}{faceDeadKeys} ? $self->{faces}{$f}{faceDeadKeys} : '')) {
6555 0 0       0 next unless length $k;
6556 0         0 $k = $self->charhex2key($k);
6557 0 0       0 1 < length $k and die "not a character as a deadkey: `$k'";
6558 0         0 $ddd->[$_]{$k} = 1 for 0..$#{ $self->{faces}{$f}{layers} }; # still used...
  0         0  
6559 0         0 $DDD->{$k} = 1;
6560 0 0       0 $s{$k}++ or push @d, $k;
6561             }
6562 0 0       0 for my $k (split /\p{Blank}+/, (defined $self->{faces}{$f}{ExportDeadKeys} ? $self->{faces}{$f}{ExportDeadKeys} : '')) {
6563 0 0       0 next unless length $k;
6564 0         0 $k = $self->charhex2key($k);
6565 0 0       0 1 < length $k and die "not a character as an exported deadkey: `$k'";
6566 0         0 $export->{$k} = 1;
6567             }
6568 0 0       0 if (my $LL = $self->{faces}{$f}{'[ini_layers]'}) {
6569 0         0 my @out;
6570 0         0 for my $L ( @$LL ) {
6571 0         0 push @out, "$L++prefix+";
6572 0         0 my $l = $self->{layers}{$out[-1]} = $self->deep_copy($self->{layers}{$L});
6573 0         0 for my $n (0 .. $#$l) {
6574 0         0 my $K = $l->[$n];
6575 0         0 for my $k (@$K) {
6576             #warn "face `$f' layer `$L' ini_layers_prefix: key `$k' marked as a deadkey" if defined $k and $DDD->{$k};
6577 0 0 0     0 $k = [$k] if defined $k and not ref $k; # Allow addition of doc strings
6578 0 0 0     0 if (defined $k and ($DDD->{$k->[0]} or $vk->{$k->[0]})) {
      0        
6579 0   0     0 @$k[1,2] = ($f, $k->[2] || ($export->{$k->[0]} ? 2 : 1)); # Is exportable?
6580             }
6581             }
6582             }
6583             }
6584 0         0 $self->{faces}{$f}{'[ini_layers_prefix]'} = \@out;
6585 0         0 $LL = $self->{faces}{$f}{'[ini_filled_layers]'} = [ @{ $self->{faces}{$f}{layers} } ]; # Deep copy
  0         0  
6586 0         0 my @OUT;
6587 0         0 for my $L ( @$LL ) {
6588 0         0 push @OUT, "$L++PREFIX+";
6589 0         0 my $l = $self->{layers}{$OUT[-1]} = $self->deep_copy($self->{layers}{$L});
6590 0         0 for my $n (0 .. $#$l) {
6591 0         0 my $K = $l->[$n];
6592 0         0 for my $k (@$K) {
6593             #warn "face `$f' layer `$L' layers_prefix: key `$k' marked as a deadkey" if defined $k and $DDD->{$k};
6594 0 0 0     0 $k = [$k] if defined $k and not ref $k; # Allow addition of doc strings
6595 0 0 0     0 if (defined $k and ($DDD->{$k->[0]} or $vk->{$k->[0]})) {
      0        
6596 0   0     0 @$k[1,2] = ($f, $k->[2] || ($export->{$k->[0]} ? 2 : 1)); # Is exportable?
6597             }
6598             }
6599             }
6600             }
6601 0         0 $self->{faces}{$f}{layers} = \@OUT;
6602             } else {
6603 0         0 warn "Face `$f' has no ini_layers";
6604             }
6605 0         0 $self->{faces}{$f}{'[dead_array]'} = \@d;
6606 0 0 0     0 for my $D (@{$self->{faces}{$f}{faceDeadKeys2} || $self->{faces}{$f}{layerDeadKeys2} || []}) { # layerDeadKeys2 obsolete
  0         0  
6607 0         0 $D =~ s/^\s+//; $D =~ s/\s+$//;
  0         0  
6608 0         0 my @k = split //, $self->stringHEX2string($D);
6609 0 0       0 2 != @k and die "not two characters as a chained deadkey: `@k'";
6610             #warn "dead2 for <@k>";
6611 0         0 $self->{faces}{$f}{'[dead2]'}{$k[0]}{$k[1]}++;
6612             # $k[1] is "untranslated"; it is not good for [DEAD]:
6613             #$self->{faces}{"$f###" . $self->key2hex($k[0])}{'[DEAD]'}{$k[1]}++;
6614             }
6615             }
6616             $self
6617 0         0 }
6618            
6619             sub massage_hash_values($) {
6620 0     0 0 0 my($self) = (shift);
6621 0         0 for my $K ( @{$self->{'[keys]'}} ) {
  0         0  
6622 0         0 my $h = $self->get_deep($self, split m(/), $K);
6623 0 0       0 $_ = $self->charhex2key($_) for @{ $h->{char2key_prefer_first} || []}, @{ $h->{char2key_prefer_last} || []};
  0 0       0  
  0         0  
6624             }
6625            
6626             }
6627             #use Dumpvalue;
6628            
6629             sub print_codepoint ($$;$) {
6630 0     0 0 0 my ($self, $k, $prefix) = (shift, shift, shift);
6631 0 0       0 my $K = ($k =~ /$rxCombining/ ? " $k" : $k);
6632 0 0       0 $prefix = '' unless defined $prefix;
6633 0         0 my $kk = join '.', map $self->key2hex($_), split //, $k;
6634 0         0 my $UN = join ' + ', map $self->UName($_, 'verbose', 'vbell'), split //, $k;
6635 0         0 printf "%s%s\t<%s>\t%s\n", $prefix, $kk, $K, $UN;
6636             }
6637            
6638             sub require_unidata_age ($) {
6639 0     0 0 0 my $self = shift;
6640 0         0 my $f = $self->get_NamesList;
6641 0 0       0 $self->load_compositions($f) if defined $f;
6642            
6643 0         0 $f = $self->get_AgeList;
6644 0 0 0     0 $self->load_uniage($f) if defined $f and not $self->{Age};
6645 0         0 $self;
6646             }
6647            
6648             sub print_coverage_string ($$) {
6649 0     0 0 0 my ($self, $s, %seen) = (shift, shift);
6650 0         0 $seen{$_}++ for split //, $s;
6651            
6652 0         0 my $f = $self->get_NamesList;
6653 0 0       0 $self->load_compositions($f) if defined $f;
6654            
6655 0         0 $f = $self->get_AgeList;
6656 0 0 0     0 $self->load_uniage($f) if defined $f and not $self->{Age};
6657            
6658 0         0 require Unicode::UCD;
6659            
6660 0         0 $self->print_codepoint($_) for sort keys %seen;
6661             }
6662            
6663             sub print_coverage ($$) {
6664 0     0 0 0 my ($self, $F) = (shift, shift);
6665            
6666 0         0 my $f = $self->get_NamesList;
6667 0 0       0 $self->load_compositions($f) if defined $f;
6668            
6669 0         0 $f = $self->get_AgeList;
6670 0 0 0     0 $self->load_uniage($f) if defined $f and not $self->{Age};
6671            
6672 0         0 my $file = $self->{'[file]'};
6673 0   0     0 my $app = (defined $file and @$file > 1 and 's');
6674 0 0       0 $file = (defined $file) ? "file$app @$file" : 'string descriptor';
6675 0         0 my $v = $self->{VERSION};
6676 0 0       0 $file .= " version $v" if defined $v;
6677 0 0       0 $file .= " Unicode tables version $self->{uniVersion}" if defined $self->{uniVersion};
6678            
6679 0         0 print "############# Generated with UI::KeyboardLayout v$UI::KeyboardLayout::VERSION for $file, face=$F\n#\n";
6680            
6681 0         0 my $is32 = $self->{faces}{$F}{'[32-bit]'};
6682 0 0       0 my $cnt32 = keys %{$is32 || {}};
  0         0  
6683 0         0 my $c1 = @{ $self->{faces}{$F}{'[coverage1only]'} }; # - $cnt32;
  0         0  
6684 0         0 my $c2 = @{ $self->{faces}{$F}{'[coverage1]'} } - @{ $self->{faces}{$F}{'[coverage1only]'} };
  0         0  
  0         0  
6685 0         0 my $more = ''; #$cnt32 ? " (and up to $cnt32 not available on Windows - at end of this section above FFFF)" : '';
6686 0         0 my @multi;
6687 0         0 for my $n (0, 1) {
6688 0         0 $multi[$n]{$_}++ for grep 1 < length, @{ $self->{faces}{$F}{"[coverage$n]"} };
  0         0  
6689             }
6690 0         0 my @multi_c = map { scalar keys %{$multi[$_]} } 0, 1;
  0         0  
  0         0  
6691 0 0       0 my %comp = %{ $self->{faces}{$F}{'[inCompose]'} || {} };
  0         0  
6692 0         0 delete $comp{$_} for @{ $self->{faces}{$F}{"[coverage0]"} }, @{ $self->{faces}{$F}{"[coverage1]"} };
  0         0  
  0         0  
6693 0 0       0 my @comp = grep {2 > length and 0x10000 > ord} sort keys %comp;
  0         0  
6694             printf "######### %i = %i + %i + %i + %i bindings [1-char + base multi-char-strings (MCS) + “extra layers” MCS + only via Compose key]\n",
6695 0         0 @{ $self->{faces}{$F}{'[coverage0]'} } + $c1 + $c2 + @comp,
6696 0         0 @{ $self->{faces}{$F}{'[coverage0]'} } + $c1 + $c2 - $multi_c[0] - $multi_c[1],
  0         0  
6697             $multi_c[0], $multi_c[1], scalar @comp;
6698             printf "######### %i = %i + %i + %i%s [direct + via single prefix keys and “extra layers” (both=%i) + via repeated prefix key] chars\n",
6699 0         0 @{ $self->{faces}{$F}{'[coverage0]'} } + $c1 + $c2 - $multi_c[0] - $multi_c[1],
6700 0         0 scalar @{ $self->{faces}{$F}{'[coverage0]'} } - $multi_c[0],
6701 0         0 $c1 - $multi_c[1], $c2, $more, @{ $self->{faces}{$F}{'[coverage00+]'} } + $c1 - $multi_c[0] - $multi_c[1];
  0         0  
6702 0         0 for my $k (@{ $self->{faces}{$F}{'[coverage00+]'} }) {
  0         0  
6703 0         0 $self->print_codepoint($k);
6704             }
6705 0         0 print "############# Base multi-char strings:\n";
6706 0         0 for my $k (@{ $self->{faces}{$F}{'[coverage00++]'} }) {
  0         0  
6707 0         0 $self->print_codepoint($k);
6708             }
6709 0         0 print "############# Via single prefix keys:\n";
6710 0         0 for my $k (@{ $self->{faces}{$F}{'[coverage1only]'} }) {
  0         0  
6711 0 0       0 $self->print_codepoint($k) if 2 > length $k;
6712             }
6713 0         0 print "############# Multi-char via single prefix keys:\n";
6714 0         0 for my $k (@{ $self->{faces}{$F}{'[coverage1only]'} }) {
  0         0  
6715 0 0       0 $self->print_codepoint($k) if 1 < length $k;
6716             }
6717 0         0 my $h1 = $self->{faces}{$F}{'[coverage1only_hash]'};
6718 0         0 print "############# Via repeated prefix keys:\n";
6719 0         0 for my $k (@{ $self->{faces}{$F}{'[coverage1]'} }) {
  0         0  
6720 0 0 0     0 $h1->{$k} or $self->print_codepoint($k) if 2 > length $k;
6721             }
6722 0         0 print "############# Multi-char via repeated prefix keys:\n";
6723 0         0 for my $k (@{ $self->{faces}{$F}{'[coverage1]'} }) {
  0         0  
6724 0 0 0     0 $h1->{$k} or $self->print_codepoint($k) if 1 < length $k;
6725             }
6726 0         0 print "############# Only via Compose key:\n";
6727 0         0 for my $k (@comp) {
6728 0         0 $self->print_codepoint($k, '= ');
6729             }
6730 0         0 print "############# Have lost the competition (for prefixed position), but available elsewhere:\n";
6731 0         0 for my $k (sort keys %{ $self->{faces}{$F}{'[in_dia_chains]'} }) {
  0         0  
6732 0 0 0     0 next unless $self->{faces}{$F}{'[coverage_hash]'}{$k} and not $self->{faces}{$F}{'[from_dia_chains]'}{$k};
6733 0         0 $self->print_codepoint($k, '+ '); # May be in from_dia_chains, but be obscured later...
6734             }
6735 0         0 print "############# Have lost the competition (not counting those explicitly prohibited by \\\\):\n";
6736 0         0 for my $k (sort keys %{ $self->{faces}{$F}{'[in_dia_chains]'} }) {
  0         0  
6737 0 0       0 next if $self->{faces}{$F}{'[coverage_hash]'}{$k};
6738 0         0 $self->print_codepoint($k, '- ');
6739             }
6740 0         0 my ($tot_diac, $lost_diac) = (0,0);
6741             $tot_diac++, $self->{faces}{$F}{'[coverage_hash]'}{$_} || $lost_diac++
6742 0   0     0 for keys %{ $self->{'[map2diac]'} };
  0         0  
6743 0         0 print "############# Lost among known classified modifiers/standalone/combining ($lost_diac/$tot_diac):\n";
6744 0         0 for my $k (sort keys %{ $self->{'[map2diac]'} }) {
  0         0  
6745 0 0       0 next if $self->{faces}{$F}{'[coverage_hash]'}{$k};
6746 0         0 $self->print_codepoint($k, '?- ');
6747             }
6748 0         0 print "############# Per key list:\n";
6749 0         0 my $OOut = $self->print_table_coverage($F);
6750 0         0 my ($OUT, $CC, $CC1) = ('', 0, 0);
6751 0         0 for my $r ([0x2200, 0x40], [0x2240, 0x40], [0x2280, 0x40], [0x22c0, 0x40],
6752             [0x27c0, 0x30], [0x2980, 0x40], [0x29c0, 0x40],
6753             [0x2a00, 0x40], [0x2a40, 0x40], [0x2a80, 0x40], [0x2ac0, 0x40], [0xa720, 0x80-0x20], [0xa780, 0x80] ) {
6754 0 0 0     0 my $C = join '', grep { (0xa720 >= ord $_ or $self->{UNames}{$_}) and !$self->{faces}{$F}{'[coverage_hash]'}{$_} }
  0         0  
6755             map chr($_), $r->[0]..($r->[0]+$r->[1]-1); # before a720, the tables are filled up...
6756 0 0       0 ${ $r->[0] < 0xa720 ? \$CC : \$CC1 } += length $C;
  0         0  
6757 0         0 $OUT .= "-==-\t$C\n";
6758             }
6759 0         0 print "############# Not covered in the math+latin-D ranges ($CC+$CC1):\n$OUT";
6760 0         0 ($OUT, $CC, $CC1) = ('', 0, 0);
6761 0         0 for my $r ([0x2200, 0x80], [0x2280, 0x80],
6762             [0x27c0, 0x30], [0x2980, 0x80],
6763             [0x2a00, 0x80], [0x2a80, 0x80], [0xa720, 0x100-0x20] ) {
6764 0         0 my $C = join '', grep {(0xa720 >= ord $_ or $self->{UNames}{$_}) and !$self->{faces}{$F}{'[coverage_hash]'}{$_}
6765 0 0 0     0 and !$self->{faces}{$F}{'[in_dia_chains]'}{$_}} map chr($_), $r->[0]..($r->[0]+$r->[1]-1);
      0        
6766 0 0       0 ${ $r->[0] < 0xa720 ? \$CC : \$CC1 } += length $C;
  0         0  
6767 0         0 $OUT .= "-==-\t$C\n";
6768             }
6769 0         0 print "############# Not competing, in the math+latin-D ranges ($CC+$CC1):\n$OUT";
6770 0         0 $OOut
6771             }
6772            
6773             my %html_esc = qw( & & < < > > );
6774             my %ctrl_special = qw( \r Enter \n Control-Enter \b BackSpace \x7f Control-Backspace \t Tab
6775             \x1b Esc; Control-[ \x1d Control-] \x1c Control-\ ^C Control-Break \x1e Control-^ \x1f Control-_ \x00 Control-@);
6776             my %alt_symb;
6777 1     1   27174 { no warnings 'qw';
  1         3  
  1         150  
6778             # ZWS ZWNJ ZWJ LRM RLM WJ=ZWNBSP Func Times Sep Plus
6779             my %a = (qw(200b ∅ 200c ‸ 200d & 200e → 200f ← 2060 ⊕ 2061 () 2062 × 2063 | 2064 +),
6780             # SPC NBSP obs-N obs-M n m m/3 m/4 m/6 figure=digit punctuation thin hair Soft-hyphen
6781             qw(0020 ␣ 00a0 ⍽ 2000 N 2001 M 2002 n 2003 m 2004 ᵐ⁄₃ 2005 ᵐ⁄₄ 2006 ᵐ⁄₆ 2007 ᵈ 2008 , 2009 ᵐ⁄₅ 200a ᵐ⁄₈ 00ad -),
6782             # LineSep ParSep LRE RLE PopDirForm LRO RLO narrowNBSP
6783             qw(2028 ⏎ 2029 ¶ 202a ⇒ 202b ⇐ 202c ↺ 202d ⇉ 202e ⇇ 202f ⁿ));
6784             @alt_symb{map chr hex, keys %a} = values %a;
6785             }
6786            
6787             # Make: span for control, soft-hyphen, white-space; include in with popup; include in span with special highlight
6788             sub char_2_html_span ($$$$$$;@) {
6789 0     0 0 0 my ($self, $base_c, $C, $c, $F, $opts, @types, $expl, $title, $vbell) = @_;
6790 0         0 my $aInv = $self->charhex2key($self->{faces}{$F}{'[Flip_AltGr_Key]'});
6791 0 0 0     0 $expl = $C->[3] if 'ARRAY' eq ref $C and $C->[3];
6792 0 0       0 $expl =~ s/(?=\p{NonspacingMark})/ /g if $expl;
6793 0   0     0 my $prefix = (ref $C and $C->[2]);
6794 0         0 my $cc = $c;
6795 0   0     0 $aInv = ($base_c || 'N/A') eq $aInv;
6796 0   0     0 my $docs = ($prefix and $self->{faces}{$F}{'[prefixDocs]'}{$self->key2hex($cc)}); # or $pre and warn "No docs: face=`$F', c=`$cc'\n";
6797 0 0       0 $docs =~ s/([''&])/sprintf '&#x%02x;', ord $1/ge if defined $docs;
  0         0  
6798             # warn "... is_D2: ", $self->array2string([$c, $baseK[$L][$shift]]);
6799 0         0 $c =~ s/(?=$rxCombining)/\x{25cc}/go; # dotted circle ◌ 25CC
6800 0         0 $c =~ s/([&<>])/$html_esc{$1}/g;
6801 0         0 my $create_a_c = $self->{faces}{$F}{'[create_alpha_ctrl]'};
6802 0 0       0 $create_a_c = $create_alpha_ctrl unless defined $create_a_c;
6803 0   0     0 my $alpha_ctrl = ($create_a_c and $cc =~ /[\cA-\cZ]/);
6804 0 0 0     0 my $with_shift = (($create_a_c > 1 and $alpha_ctrl) ? '(Shift-)' : '');
6805 0   0     0 $c =~ s{([\x00-\x1F\x7F])}{ my $C = $self->control2prt("$1"); my $S = $ctrl_special{$C} || '';
  0         0  
  0         0  
6806 0 0 0     0 ($S and $S .= ", "), $S .= "Control-$with_shift".chr(0x40+ord $1) if $alpha_ctrl;
6807 0 0       0 $C = "$C" if $S; $C }ge;
  0         0  
6808 0   0     0 my $type = ($cc =~ /[^\P{Blank}\x00-\x1f]/ && 'WS'); # Blank and not control char
6809 0         0 my ($fill, $prefill, $zw) = ('', '');
6810 0 0 0     0 if ($type or $c =~ /($rxZW)$/o) {
6811 0 0       0 my $alt = ($alt_symb{$cc} ? qq( convention="$alt_symb{$cc}") : '');
6812 0         0 $fill = ""; # Soft hyphen etc
6813             }
6814 0 0       0 if ($type) { # Putting WS inside l makes gaps between adjacent WS blocks
6815 0         0 $prefill = '';
6816 0         0 $fill .= '';
6817             }
6818 0 0       0 push @types, 'no-mirror-rtl' if "\x{34f}" eq $cc; # CGJ
6819 0   0     0 $zw = !!$fill || $cc eq "\x{034f}";
6820 0         0 $vbell = !defined $C;
6821 0 0       0 unless (defined $title) {
6822 0   0     0 $title = ((ord $cc >= 0x80 or $cc eq ' ') && sprintf '%04X %s', ord $cc, $self->UName($cc, 'verbose', $vbell));
6823 0 0 0     0 if ($title and $docs) {
6824 0         0 $title = "$docs (on $title)";
6825             }
6826 0   0     0 $title ||= ($docs || '');
      0        
6827 0 0 0     0 if (defined $expl and length $expl and (1 or 0x7f <= ord $cc)) {
      0        
6828 0 0       0 $title .= ' ' if length $title;
6829 0         0 $title .= " {via $expl}";
6830             }
6831 0 0 0     0 $title .= ' (visual bell indicates unassigned keypress)' if $title and !$expl and $vbell;
      0        
6832 0 0       0 $title = 'This prefix key accesses this column with AltGr-invertion' if $aInv;
6833 0 0       0 $title =~ s/([''&])/sprintf '&#x%02x;', ord $1/ge if $title;
  0         0  
6834 0 0       0 $title = qq( title='$title') if $title;
6835             }
6836 0 0 0     0 if ($type) { # Already covered
    0 0        
    0 0        
    0 0        
    0 0        
    0          
    0          
    0          
    0          
    0          
6837             } elsif ($zw) {
6838 0         0 push @types,'ZW';
6839             } elsif (not defined $C) {
6840 0         0 push @types,'vbell';
6841             } elsif ($title =~ /(\b(N-ARY|BIG(?!\s+YUS\b)|GREEK\s+PROSGEGRAMMENI|KORONIS|SOF\s+PASUQ|PUNCTUATION\s+(?:GERESH|GERSHAYIM)|PALOCHKA|CYRILLIC\s.*\s(DZE|JE|QA|WE|A\s+IE)|ANO\s+TELEIA|GREEK\s+QUESTION\s+MARK)|"\w+\s+(?:BIG|LARGE))\b.*\s+\[/) { # "0134 BIG GUY#"
6842 0         0 push @types,'nAry';
6843             } elsif ($title =~ /\b(OPERATOR|SIGN|SYMBOL|PROOF|EXISTS|FOR\s+ALL|(DIVISION|LOGICAL)\b.*)\s+\[/) {
6844 0         0 push @types,'operator';
6845             } elsif ($title =~ /\b(RELATION|PERPENDICULAR|PARALLEL\s*TO|DIVIDES|FRACTION\s+SLASH)\s+\[/) {
6846 0         0 push @types,'relation';
6847             } elsif ($title =~ /\[.*\b(IPA)\b|\bCLICK\b/) {
6848 0         0 push @types,'ipa';
6849             } elsif ($title =~ /\bLETTER\s+[AEUIYO]\b/ and
6850             $title =~ /\b(WITH|AND)\s+(HOOK\s+ABOVE|HORN)|(\s+(WITH|AND)\s+(CIRCUMFLEX|BREVE|ACUTE|GRAVE|TILDE|DOT\s+BELOW)\b){2}/) {
6851 0         0 push @types,'viet';
6852             } elsif (0 <= index(lc '⁊ǷꝥƕǶᵹ', lc $cc) or 0xa730 <= ord $cc and 0xa78b > ord $cc or 0xa7fb <= ord $cc and 0xa7ff >= ord $cc) {
6853 0         0 push @types,'paleo';
6854             } elsif ($title =~ /(\s+(WITH|AND)\s+((DOUBLE\s+)?\w+(\s+(BELOW|ABOVE))?)\b){2}/) {
6855 0         0 push @types,'doubleaccent';
6856             }
6857 0 0 0     0 push @types, ($1 ? 'withSubst' : 'isSubst') if ($expl || '') =~ /\sSubst\{(\S*\}\s+\S)?/;
    0          
6858 0 0       0 push @types, 'altGrInv' if $aInv;
6859 0 0       0 my $q = ("@types" =~ /\s/ ? "'" : '');
6860             # ($prefill, $fill) = ("$prefill", "$fill");
6861 0 0       0 @types = " class=$q@types$q" if @types;
6862 0 0 0     0 my($T,$OPT) = ($opts && $opts->{ltr} ? ('bdo', ' dir=ltr') : ('span', '')); # Just `span´ does not work in FF15
6863 0 0 0     0 $c = '†' if $aInv and $cc ne ($base_c || 'N/A'); #  
      0        
6864 0         0 "<$T$OPT@types$title>$prefill$c$fill"
6865             }
6866            
6867             sub print_table_coverage ($$;$$) {
6868 0   0 0 0 0 my ($self, $F, $html, $extra_headers) = (shift, shift, shift, shift || '');
6869 0         0 my $file = $self->{'[file]'};
6870 0   0     0 my $app = (defined $file and @$file > 1 and 's');
6871 0 0       0 my $f = (defined $file) ? "file$app @$file" : 'string descriptor';
6872 0         0 my $v = $self->{VERSION};
6873 0 0       0 $f .= " version $v" if defined $v;
6874 0 0       0 $f .= " Unicode tables version $self->{uniVersion}" if defined $self->{uniVersion};
6875 0 0       0 print <
6876            
6877             "http://www.w3.org/TR/html4/loose.dtd">
6878            
6879            
6880            
6881             $extra_headers
6924            
6925            
6926             [1] />"; $COLS ), next unless $dFace; "; $header\n \n" " if $html; # Do not make RTL chars mix up the order
6927             EOP
6928 0         0 my($LL, $INV, %s, @d, %access, %docs) = ($self->{faces}{$F}{layers}, $self->{faces}{$F}{'[Flip_AltGr_Key]'});
6929 0 0       0 $s{$self->charhex2key($INV)}++ if defined $INV; # Skip in reports '
6930 0         0 my @LL = map $self->{layers}{$_}, @$LL;
6931 0 0 0     0 $s{$_}++ or push @d, $_ for map @{ $self->{faces}{$F}{"[$_]"} || [] }, qw(dead_array dead_in_VK_array extra_report_DeadChar);
  0         0  
6932 0         0 my (@A, %isD2, @Dface, @DfaceKey, %d_seen) = [];
6933 0         0 my $compK = $self->{faces}{$F}{'[ComposeKeys]'};
6934             #warn 'prefix keys to report: <', join('> <', @d), '>';
6935 0         0 for my $ddK (@d) {
6936 0         0 (my $dK = $ddK) =~ s/^\s+//;
6937 0         0 my $c = $self->key2hex($self->charhex2key($dK));
6938 0 0       0 next if $d_seen{$c}++;
6939             ($compK->{$c} or warn("??? Skip non-array prefix key `$c' for face `$F', k=`$dK'")), next
6940 0 0 0     0 unless defined (my $FF = $self->{faces}{$F}{'[deadkeyFace]'}{$c});
6941 0         0 $access{$FF} = [$self->charhex2key($dK)];
6942 0         0 push @Dface, $FF;
6943 0         0 push @DfaceKey, $c;
6944 0         0 $docs{$FF} = $self->{faces}{$F}{'[prefixDocs]'}{$c}; # and warn "Found docs: face=`$F', c=`$c'\n";
6945 0         0 push @A, [$self->charhex2key($dK)];
6946             }
6947            
6948 0         0 my ($lastDface, $prevCol, $COLS, @colOrn, %S, @joinedPairs) = ($#Dface, -1, '', [qw(0 column1)]);
6949 0 0       0 for my $kk (split /\p{Blank}+\|{3}\p{Blank}+/,
6950             (defined $self->{faces}{$F}{faceDeadKeys} ? $self->{faces}{$F}{faceDeadKeys} : ''), -1) {
6951 0         0 my $cnt = 0;
6952 0   0     0 length and $cnt++ for split /\p{Blank}+/, $kk;
6953 0         0 push @joinedPairs, $cnt;
6954             }
6955 0         0 pop @joinedPairs;
6956 0         0 my $done = 0;
6957 0         0 push @colOrn, [$done += $_, 'endPair'] for @joinedPairs;
6958 0         0 my @skip_sections;
6959 0         0 for my $s (values %start_SEC) {
6960 0         0 $skip_sections[$_]++ for $s->[0]..($s->[0]+$s->[1]-1)
6961             }
6962            
6963 0         0 for my $reported (1, 0) {
6964 0 0       0 for my $DD (@{ $self->{faces}{$F}{$reported ? 'LayoutTable_add_double_prefix_keys' : 'faceDeadKeys2'} }) {
  0         0  
6965 0         0 (my $dd = $DD) =~ s/^\s+//;
6966             # XXXX BUG in PERL??? This gives 3: DB<4> x scalar (my ($x, $y) = split //, 'ab')
6967 0 0       0 2 == (my (@D) = split //, $self->stringHEX2string($dd)) or die "Not a double character in LayoutTable_add_double_prefix_keys for `$F': `$DD' -> `", $self->stringHEX2string($dd), "'";
6968 0 0       0 my $map1 = $self->{faces}{$F}{'[deadkeyFaceHexMap]'}{$self->key2hex($D[0])}
    0          
6969             or ($reported ? die "Can't find prefix key face for `$D[0]' in `$F'" : next); # inverted faces bring havoc
6970 0 0       0 defined (my $Dead2 = $map1->{$self->key2hex($D[1])}) or die "Can't map `$D[1]' in `$F'+prefix `$D[0]'"; # in hex already
6971 0 0       0 $Dead2 = $Dead2->[0] if 'ARRAY' eq ref $Dead2;
6972 0 0       0 defined (my $ddd = $self->{faces}{$F}{'[deadkeyFace]'}{$Dead2}) or die "Can't find prefix key face for `$D[1]' -> `$Dead2' in `$F'+prefix `$D[0]'";
6973 0 0       0 next if $S{"@D"}++;
6974 0 0       0 push(@Dface, $ddd), push @DfaceKey, $Dead2 if $reported;
6975 0   0     0 $access{$ddd} ||= \@D;
6976 0         0 $docs{$ddd} = $self->{faces}{$F}{'[prefixDocs]'}{$Dead2};
6977 0 0       0 push @A, \@D if $reported;
6978             # warn "set is_D2: @D";
6979 0         0 $isD2{$D[0]}{$D[1]}++;
6980             }
6981             }
6982 0 0       0 push @colOrn, [$lastDface+1, 'pre_ExtraCols'] if $#Dface != $lastDface;
6983 0         0 for my $orn (@colOrn) {
6984 0         0 my $skip = $orn->[0] - $prevCol - 1;
6985 0 0       0 warn("Multiple classes on columns of report unsupported: face=$F, col [@$orn]"), next if $skip < 0;
6986 0         0 $prevCol = $orn->[0];
6987 0 0       0 my $many = $skip > 1 ? " span=$skip" : '';
6988 0 0       0 $skip = $skip > 0 ? "\n " : '';
6989 0         0 $COLS .= "$skip\n
6990             }
6991 0 0       0 print <
6992            
6993            
6994             EOP
6995 0   0     0 my ($k, $first_ctrl, $post_ctrl, @last_in_row) = (-1, map $self->{faces}{$F}{"[$_]"} || 0, qw(start_ctrl end_ctrl));
6996 0 0       0 $last_in_row[ $k += $_ ]++ for @{ $self->{faces}{$F}{'[geometry]'} || [] };
  0         0  
6997             #warn 'prefix key faces to report: <', join('> <', @Dface), '>';
6998 0         0 my @maps = (undef, map $self->{faces}{$F}{'[deadkeyFaceHexMap]'}{$_}, @DfaceKey); # element of Dface may be false if this is non-autonamed AltGr-inverted face
6999 0 0       0 my $dead = $html ? "\x{2620}" : "\x{2620}";
7000 0 0       0 my $dead_i = $html ? "\x{2620}" : "\x{2620}";
7001 0         0 my $header = '';
7002 0         0 for my $dFace ('', @Dface) { # '' is no-dead
7003 0         0 my $base_t = 'Characters immediately on keys (without prefix keys); the first two are without/with Shift, two others same, but with added AltGr (excluding the special-key zone)';
7004 0         0 my $prefix_t = 'After tapping a prefix key, the base keys are replaced by what is in the column of the prefix key';
7005 0 0       0 $header .= qq( ↓Base Prefix→
7006 0         0 my @a = map {(my $a = $_) =~ s/^(?=$rxCombining)/\x{25cc}/o; $a } @{ $access{$dFace} };
  0         0  
  0         0  
  0         0  
7007 0         0 my $docs = $docs{$dFace};
7008 0 0       0 $docs =~ s/([''&])/sprintf '&#x%02x;', ord $1/ge if $docs;
  0         0  
7009 0 0       0 my $withDocs = (defined $docs ? "@a" : "@a");
7010 0         0 $header .= " $withDocs
7011             }
7012 0 0       0 print "
7013             if $html;
7014 0         0 my $vbell = '♪';
7015 0         0 my $OOut = '';
7016 0         0 for my $n ( 0 .. $#{ $LL[0] } ) {
  0         0  
7017 0         0 my ($out, $out_c, $prev, @KKK, $base_c) = ('', 0, '');
7018 0         0 my @baseK;
7019 0 0 0     0 next if $n >= $first_ctrl and $n < $post_ctrl or $skip_sections[$n];
      0        
7020 0         0 for my $dn (0..@Dface) { # 0 is no-dead
7021 0 0 0     0 next if $dn and not $maps[$dn];
7022 0 0       0 $out .= $html ? '' : ($prev =~ /\X{7}/ ? ' ' : "\t") if length $out;
    0          
    0          
7023 0 0       0 my $is_D2 = $isD2{ @{$A[$dn]} == 1 ? $A[$dn][0] : 'n/a' };
  0         0  
7024             # warn "is_D2: ", $self->array2string([$dn, $is_D2, $A[$dn], $A[$dn][0]]);
7025 0         0 my $o = '';
7026 0         0 for my $L (0..$#$LL) {
7027 0         0 for my $shift (0..1) {
7028 0         0 my $c = $LL[$L][$n][$shift];
7029 0         0 my ($pre, $expl, $C, $expl1, $invert_dead) = ('', '', $c);
7030 0 0       0 $o .= ' ', next unless defined $c;
7031 0         0 $out_c++;
7032 0 0 0     0 $pre = $dead if not $dn and 'ARRAY' eq ref $c and $c->[2];
      0        
7033 0 0       0 $c = $c->[0] if 'ARRAY' eq ref $c;
7034 0 0       0 $KKK[$L][$shift] = $c unless $dn;
7035 0         0 $base_c = $KKK[$L][$shift];
7036             # warn "int_struct -> dead; face `$F', KeyPos=$n, Mods=$L, shift=$shift, ch=$c\n" if $pre;
7037 0 0       0 if ($dn) {
7038 0         0 $C = $c = $maps[$dn]{$self->key2hex($c)};
7039 0 0       0 $c = $vbell unless defined $c;
7040 0 0 0     0 $invert_dead = (3 == ($c->[2] || 0) || (3 << 3) == ($c->[2] || 0)) if ref $c;
7041 0 0 0     0 $pre = $invert_dead ? $dead_i : $dead if 'ARRAY' eq ref $c and $c->[2];
    0          
7042 0 0       0 $c = $c->[0] if 'ARRAY' eq ref $c;
7043 0         0 $c = $self->charhex2key($c);
7044             } else {
7045             # warn "coverage0_prefix -> dead; face `$F', KeyPos=$n, Mods=$L, shift=$shift, ch=$c\n" if $self->{faces}{$F}{'[coverage0_prefix]'}{$c};
7046 0 0 0     0 $invert_dead = (3 == ($c->[2] || 0) || (3 << 3) == ($c->[2] || 0)) if ref $c;
7047 0 0 0     0 $pre = $invert_dead ? $dead_i : $dead if $pre or $self->{faces}{$F}{'[coverage0_prefix]'}{$c};
    0          
7048             }
7049 0 0       0 $baseK[$L][$shift] = $c unless $dn;
7050 0 0 0     0 $pre ||= $dead if $dn and $is_D2->{$baseK[$L][$shift]};
      0        
7051            
7052 0 0       0 if ($html) {
7053 0         0 $c = $self->char_2_html_span($base_c, $C, $c, $F, {ltr => 1}, 'l');
7054             } else {
7055 0         0 $c =~ s/(?=$rxCombining)/\x{25cc}/go; # dotted circle ◌ 25CC
7056 0         0 $c =~ s{([\x00-\x1F\x7F])}{ $self->control2prt("$1") }ge;
  0         0  
7057             }
7058 0         0 $c = "$pre$c";
7059 0         0 $o .= $c;
7060             }
7061             }
7062 0         0 $o =~ s/ +$//;
7063 0         0 $prev = $o;
7064 0         0 $out .= $o;
7065             }
7066 0 0       0 my $class = $last_in_row[$n] ? ' class=lastKeyInKRow' : '';
7067 0 0       0 $out = " $out
7068 0 0       0 $OOut .= "$out\n", print "$out\n" if $out_c;
7069             }
7070 0 0       0 my @extra = map {(my $s = $_) =~ s/^\s+//; "\n\n

$s"} @{ $self->{faces}{$F}{TableSummaryAddHTML} || [] };

  0         0  
  0         0  
  0         0  
7071 0         0 my $create_a_c = $self->{faces}{$F}{'[create_alpha_ctrl]'};
7072 0 0       0 $create_a_c = $create_alpha_ctrl unless defined $create_a_c;
7073 0   0     0 my $extra_ctrl = ($create_a_c >= 1) && '/[/]/\\';
7074 0   0     0 $extra_ctrl .= ($create_a_c >= 2) && '/^/_';
7075 0   0     0 my $more .= ($create_a_c >= 1) && ' Most of Ctrl-letters are omitted from the table; deduce them from reports for C/H/I/J/M/Z.';
7076 0 0       0 print <
7077            
7078            
7079            
7080             @extra

Highlights (homographs and special needs): zero-width or SOFT HYPHEN: , whitespace: , Vietnamese; other double-accent; paleo-Latin;

7081             or IPA.
7082             Or name having RELATION, PERPENDICULAR,
7083             PARALLEL, DIVIDES, FRACTION SLASH; or BIG, LARGE, N-ARY, CYRILLIC PALOCHKA/DZE/JE/QA/WE/A-IE,
7084             ANO TELEIA, KORONIS, PROSGEGRAMMENI, GREEK QUESTION MARK, SOF PASUQ, PUNCTUATION GERESH/GERSHAYIM; or OPERATOR, SIGN,
7085             SYMBOL, PROOF, EXISTS, FOR ALL, DIVISION, LOGICAL; or AltGr-inverter prefix;
7086             or via a rule involving/exposing a “BlueKey” substitution rule.
7087             (Some browsers fail to show highlights for whitespace/zero-width.)
7088            

Vertical lines separate: the column of the base face, paired

7089             prefix keys with “inverted bindings”, and explicitly selected multi-key prefixes. Horizontal lines separate key rows of
7090             the keyboard (including a fake row with the “left extra key” [one with <> or \\| - it is missing on many keyboards]
7091             and the KP_Decimal key [often marked as . Del on numeric keypad]); the last group is for semi-fake keys for
7092             Enter/C-Enter/Backspace/C-Backspace/Tab and C-Break$extra_ctrl (make sense after prefix keys) and special keys explicitly added
7093             in .kbdd files (usually SPACE).$more
7094            

Hover mouse over any appropriate place to get more information.

7095             In popups: brackets enclose Script, Range, “1st Unicode version with this character”;
7096             braces enclose “the reason why this position was assigned to this character” (VisLr means that a visual table was
7097             used; in Subst{HOW}, L=Layer and F=Face mean that a “BlueKey” substitution rule was defined
7098             via a special layer/face).
7099            
7100            
7101             EOP
7102 0         0 $OOut
7103             }
7104            
7105             sub coverage_face0 ($$;$) {
7106 0     0 0 0 my ($self, $F, $after_import, $after) = (shift, shift, shift);
7107 0         0 my $H = $self->{faces}{$F};
7108 0         0 my $LL = $H->{layers};
7109 0 0       0 return $H->{'[coverage0]'} if exists $H->{'[coverage0]'};
7110 0         0 my (%seen, %seen_prefix, %imported);
7111 0 0       0 my $d = { %{ $H->{'[DEAD]'} || {} }, %{ $H->{'[dead_in_VK]'} || {} } };
  0 0       0  
  0         0  
7112             # warn "coverage0 for `$F'" if $after_import;
7113 0         0 for my $l (@$LL) {
7114 0         0 my $L = $self->{layers}{$l};
7115 0         0 for my $k (@$L) {
7116             warn "Face `$F', layer `$l': coverage check is run too late: after the importation translation is performed"
7117 0 0 0     0 if not $after_import and $F !~ /^(.*)##Inv#([a-f0-9]{4,})$/is and grep {defined and ref and $_->[4]} @$k;
  0 0 0     0  
      0        
7118 0 0 0     0 $seen{ref() ? $_->[0] : $_}++ for grep {defined and !(ref and $_->[2]) and !$d->{ref() ? $_->[0] : $_}} @$k;
  0 0 0     0  
    0          
7119 0 0 0     0 $seen_prefix{ref() ? $_->[0] : $_}++ for grep {defined and (ref and $_->[2] or $d->{ref() ? $_->[0] : $_})} @$k;
  0 0 0     0  
    0          
7120 0 0 0     0 $imported{"$_->[0]:$_->[1]"}++ for grep {defined and ref and 2 == ($_->[2] || 0)} @$k; # exportable
  0   0     0  
7121             }
7122 0 0       0 unless ($after++) {
7123 0         0 $H->{'[layer0coverage0]'} = [sort keys %seen];
7124             }
7125             }
7126 0         0 $H->{'[coverage0_prefix]'} = \%seen_prefix;
7127 0         0 $H->{'[coverage0]'} = [sort keys %seen];
7128 0 0       0 $H->{'[coverage00]'} = [grep { 2>length and 0x10000 > ord } @{$H->{'[coverage0]'}}];
  0         0  
  0         0  
7129 0   0     0 $H->{'[coverage0+]'} = [grep {!(2>length and 0x10000 > ord)} @{$H->{'[coverage0]'}}];
  0         0  
  0         0  
7130 0         0 $H->{'[coverage00+]'} = [grep { 2>length } @{$H->{'[coverage0]'}}];
  0         0  
  0         0  
7131 0         0 $H->{'[coverage00++]'} = [grep { 1{'[coverage0]'}}];
  0         0  
  0         0  
7132 0         0 $H->{'[imported]'} = [sort keys %imported];
7133 0         0 $H->{'[coverage00hash]'} = { map { ($_, 1) } @{ $H->{'[coverage00]'} } };
  0         0  
  0         0  
7134 0         0 $H->{'[coverage0]'};
7135             }
7136            
7137             # %imported is analysed: if manual deadkey is specified, this value is used, otherwised new value is generated and rememebered.
7138             # (but is not put in the keymap???]
7139             sub massage_imported ($$) {
7140 0     0 0 0 my ($self, $f) = (shift, shift);
7141 0 0       0 return unless my ($F, $KKK) = $f =~ /^(.*)###([a-f0-9]{4,})$/is;
7142 0         0 my $H = $self->{faces}{$F};
7143 0 0       0 for my $i ( @{ $self->{faces}{$f}{'[imported]'} || [] } ) {
  0         0  
7144 0 0       0 my($k,$face) = $i =~ /^(.):(.*)/s or die "Unrecognized imported: `$i'";
7145 0         0 my $K;
7146 0 0 0     0 if (exists $H->{'[imported2key]'}{$i} or exists $H->{'[imported2key_auto]'}{$i}) {
    0 0        
7147 0 0       0 $K = exists $H->{'[imported2key]'}{$i} ? $H->{'[imported2key]'}{$i} : $H->{'[imported2key_auto]'}{$i};
7148             } elsif ($H->{'[coverage0_prefix]'}{$k} or $H->{'[auto_dead]'}{$k}) { # it is already used
7149             # Assign a fake prefix key to imported map
7150             warn("Imported prefix keys exist, but Auto_Diacritic_Start is not defined in face `$F'"), return
7151 0 0       0 unless defined $H->{'[first_auto_dead]'};
7152 0         0 $K = $H->{'[imported2key_auto]'}{$i} = $self->next_auto_dead($H);
7153             } else { # preserve the prefix key
7154 0         0 $K = $H->{'[imported2key_auto]'}{$i} = $k;
7155 0         0 $H->{'[auto_dead]'}{$k}++;
7156             }
7157 0 0       0 my $LL = $self->{faces}{$face}{'[deadkeyLayers]'}{$self->key2hex($k)}
7158             or die "Cannot import a deadkey `$k' from `$face'";
7159 0         0 $LL = [@$LL]; # Deep copy, so may override
7160 0         0 my $KK = $self->key2hex($K);
7161 0 0       0 if (my $over = $H->{'[AdddeadkeyLayers]'}{$KK}) {
7162             #warn "face `$F': additional bindings for deadkey $KK exist.\n";
7163 0         0 $LL = [$self->make_translated_layers_stack($over, $LL)];
7164             }
7165 0         0 $H->{'[imported2key_all]'}{"$k:$face"} = $self->charhex2key($KK);
7166 0         0 $H->{'[deadkeyLayers]'}{$KK} = $LL;
7167 0         0 my $new_facename = "$F#\@#\@#\@$i";
7168 0         0 $self->{faces}{$new_facename}{layers} = $LL;
7169 0         0 $H->{'[deadkeyFace]'}{$KK} = $new_facename;
7170 0         0 $self->link_layers($F, $new_facename, 'skipfix', 'no-slot-warn');
7171            
7172 0         0 $self->coverage_face0($new_facename);
7173             }
7174             }
7175            
7176             sub massage_imported2 ($$) {
7177 0     0 0 0 my ($self, $f) = (shift, shift);
7178 0         0 warn "... Importing into face=`$f" if debug_import;
7179 0 0       0 return unless my ($F, $KKK) = ($f =~ /^(.*)###([a-f0-9]{4,})$/is); # what about multiple prefixes???
7180 0 0       0 return unless my $HH = $self->{faces}{$F}{'[imported2key_all]'};
7181 0         0 my $H = $self->{faces}{$f};
7182 0         0 warn "Importing into face=`$F' prefix=$KKK" if debug_import;
7183 0         0 my $LL = $H->{layers};
7184 0         0 my @unresolved;
7185 0         0 for my $l (@$LL) {
7186 0         0 my $L = $self->{layers}{$l};
7187 0         0 for my $k (@$L) {
7188 0 0 0     0 for my $kk (grep {defined and ref and $_->[2]} @$k) { # exportable
  0         0  
7189 0         0 $kk = [@$kk]; # deep copy
7190 0 0       0 if (2 == $kk->[2]) { # exportable
7191 0 0       0 my $v = (defined $kk->[4] ? $kk->[4] : $kk->[0]);
7192 0         0 my $j = $HH->{"$v:$kk->[1]"};
7193             # push(@unresolved, "$v:$kk->[1]"),
7194 0 0       0 warn "Can't resolve `$v:$kk->[1]' to an imported dead key, face=`$F' prefix=$KKK; layer=$l"
7195             unless defined $j;
7196 0         0 warn "Importing `$v:$kk->[1]' as `$j', face=`$F' prefix=$KKK; layer=$l" if debug_import;
7197 0         0 @$kk[0,4] = ($j, $v);
7198             } else {
7199             #warn "massage_imported2: shift $kk->[2] <<= 3 key `$kk->[0]' face `$f' layer `$l'\n" if $kk->[2] >> 3;
7200 0         0 $kk->[2] >>= 3; # ByPairs makes <<= 3 !
7201             }
7202             }
7203             }
7204             }
7205 0         0 delete $self->{faces}{$f}{'[coverage0]'};
7206 0         0 $self->coverage_face0($f, 'after_import'); # recalculate
7207             # $H->{'[unresolved_imported]'} = \@unresolved if @unresolved;
7208             }
7209            
7210             sub massage_char_substitutions($$) { # Read $self->{Substitutions}
7211 0     0 0 0 my($self, $data) = (shift, shift);
7212 0 0       0 die "Too late to load char substitutions" if $self->{Compositions};
7213 0 0       0 for my $K (keys %{ $data->{Substitutions} || {}}) {
  0         0  
7214 0         0 my $arr = $data->{Substitutions}{$K};
7215 0         0 for my $S (@$arr) {
7216 0         0 my $s = $self->stringHEX2string($S);
7217 0         0 $s =~ s/\p{Blank}(?=\p{NonspacingMark})//g;
7218 0 0       0 die "Expect 2 chars in substitution rule; I see <$s> (from <$S>)" unless 2 == (my @s = split //, $s);
7219 0         0 $self->{'[Substitutions]'}{""}{$s[0]} = [[0, $s[1]]]; # Format as in Compositions
7220 0 0 0     0 $self->{'[Substitutions]'}{""}{lc $s[0]} = [[0, lc $s[1]]]
7221             if lc $s[0] ne $s[0] and lc $s[1] ne $s[1];
7222 0 0 0     0 $self->{'[Substitutions]'}{""}{uc $s[0]} = [[0, uc $s[1]]]
7223             if uc $s[0] ne $s[0] and uc $s[1] ne $s[1];
7224             }
7225             }
7226             }
7227            
7228             sub new_from_configfile ($$) {
7229 0     0 0 0 my ($class, $F) = (shift, shift);
7230 0 0       0 open my $f, '< :utf8', $F or die "Can't open `$F' for read: $!";
7231 0         0 my $s = do {local $/; <$f>};
  0         0  
  0         0  
7232 0 0       0 close $f or die "Can't close `$F' for read: $!";
7233             #warn "Got `$s'";
7234 0         0 my $self = $class->new_from_configfile_string($s);
7235 0         0 push @{$self->{'[file]'}}, $F;
  0         0  
7236 0         0 $self;
7237             }
7238            
7239             sub parse_add_configfile ($$) {
7240 0     0 0 0 my ($self, $F) = (shift, shift);
7241 0 0       0 open my $f, '< :utf8', $F or die "Can't open `$F' for read: $!";
7242 0         0 my $s = do {local $/; <$f>};
  0         0  
  0         0  
7243 0 0       0 close $f or die "Can't close `$F' for read: $!";
7244             #warn "Got `$s'";
7245 0         0 $self->parse_add_configstring($s, $self);
7246             # Dumpvalue->new()->dumpValue($self);
7247 0         0 push @{$self->{'[file]'}}, $F;
  0         0  
7248 0         0 $self;
7249             }
7250            
7251             sub new_from_configfile_string ($$) {
7252 0     0 0 0 my ($class, $ss) = (shift, shift);
7253 0 0       0 die "too many arguments to UI::KeyboardLayout->new_from_configfile_string" if @_;
7254 0         0 my $self = $class->new;
7255 0         0 $self->parse_add_configstring($ss, $self);
7256             # Dumpvalue->new()->dumpValue($self);
7257 0         0 $self->massage_full;
7258             }
7259            
7260             sub massage_full($) {
7261 0     0 0 0 my $self = shift;
7262 0         0 my ($layers, $counts, $offsets) = $self->fill_kbd_layers($self);
7263 0         0 @{$self->{layers}}{keys %$layers} = values %$layers;
  0         0  
7264 0         0 @{$self->{layer_counts} }{keys %$counts} = values %$counts;
  0         0  
7265 0         0 @{$self->{layer_offsets}}{keys %$offsets} = values %$offsets;
  0         0  
7266 0         0 $self->massage_hash_values;
7267 0         0 $self->massage_diacritics; # Read $self->{Diacritics}
7268 0         0 $self->massage_char_substitutions($self); # Read $self->{Substitutions}
7269 0         0 $self->massage_faces;
7270            
7271 0         0 $self->massage_deadkeys_win($self); # Process (embedded) MSKLC-style deadkey maps
7272 0         0 $self->scan_for_DeadKey_Maps(); # Makes a direct-access synonym, scan for DeadKey_Maps* keys
7273 0         0 $self->create_DeadKey_Maps();
7274 0         0 $self->create_composite_layers; # Needs to be after simple deadkey maps are known
7275            
7276 0         0 for my $F (keys %{ $self->{faces} }) {
  0         0  
7277 0 0 0     0 next if 'HASH' ne ref $self->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
7278 0         0 $self->coverage_face0($F); # creates coverage0, imported array (c0 excludes diacritics), coverage0_prefix hash
7279             }
7280 0         0 for my $F (keys %{ $self->{faces} }) {
  0         0  
7281 0 0 0     0 next if 'HASH' ne ref $self->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
7282 0         0 $self->massage_imported($F); # calc new values for imported prefix keys, augments imported maps with Add-maps
7283             }
7284 0         0 for my $F (keys %{ $self->{faces} }) {
  0         0  
7285 0 0 0     0 next if 'HASH' ne ref $self->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
7286 0         0 $self->massage_imported2($F); # changes imported prefix keys to appropriate values for the target personality
7287             }
7288 0         0 $self->create_prefix_chains;
7289 0         0 $self->create_inverted_faces;
7290 0         0 $self->link_composite_layers; # Needs to be after imported keys are reassigned...
7291 0         0 for my $F (keys %{ $self->{faces} }) { # Fine-tune inverted-AltGr faces
  0         0  
7292 0 0 0     0 next if 'HASH' ne ref $self->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
7293 0 0       0 next if $F =~ /#\@?#\@?(Inv)?#\@?/; # Face-on-a-deadkey
7294            
7295 0         0 my $D = $self->{faces}{$F}{'[deadkeyFace]'};
7296 0         0 my $Ex = $self->{faces}{$F}{'[AltGr_Invert_Show]'};
7297 0         0 for my $d (keys %$D) {
7298 0         0 $self->{faces}{$F}{'[deadkeyFaceHexMap]'}{$d} = $self->linked_faces_2_hex_map($F, $D->{$d});
7299 0 0       0 defined (my $auto_inv_AltGr = $self->{faces}{$F}{'[deadkeyInvAltGrKey]'}{$d}) or next;
7300 0         0 my $b1 = $self->{faces}{$F}{'[deadkeyFaceInvAltGr]'}{my $a = $self->charhex2key($auto_inv_AltGr)};
7301 0 0       0 $self->{faces}{$F}{'[deadkeyFaceHexMapInv]'}{$d} = $self->linked_faces_2_hex_map($F, $b1) if $b1;
7302 0         0 my $D = $self->{faces}{$F}{'[prefixDocs]'}{$d};
7303 0 0       0 $self->{faces}{$F}{'[prefixDocs]'}{$self->key2hex($a)} = 'AltGr-inverted: ' . (defined $D ? $D : "[[$d]]");
7304 0         0 my $S = $self->{faces}{$F}{'[Show]'}{$d};
7305 0 0       0 $self->{faces}{$F}{'[Show]'}{$self->key2hex($a)} = (defined $S ? $S : $self->charhex2key($d)) . $Ex;
7306             }
7307            
7308 0         0 my($flip_AltGr, @protect_chr) = $self->{faces}{$F}{'[Flip_AltGr_Key]'}; # Who put it into deadkeyFace???
7309 0 0       0 if (defined $flip_AltGr) {
7310 0         0 $flip_AltGr = $self->key2hex($self->charhex2key($flip_AltGr));
7311 0         0 push @protect_chr, $flip_AltGr;
7312             $self->{faces}{$F}{'[prefixDocs]'}{$flip_AltGr} = 'AltGr-inverted base face'
7313 0 0       0 unless defined $self->{faces}{$F}{'[prefixDocs]'}{$flip_AltGr};
7314 0 0       0 $self->{faces}{$F}{'[Show]'}{$flip_AltGr} = $Ex unless defined $self->{faces}{$F}{'[Show]'}{$flip_AltGr};
7315             }
7316 0   0     0 my $expl = $self->{faces}{$F}{'[Explicit_AltGr_Invert]'} || [];
7317 0         0 for my $i (1..(@$expl/2)) {
7318 0         0 my @C = map $self->key2hex($expl->[2*$i + $_]), -2, -1;
7319 0         0 push @protect_chr, $C[1];
7320 0         0 my $D = $self->{faces}{$F}{'[prefixDocs]'}{$C[0]};
7321 0 0       0 $self->{faces}{$F}{'[prefixDocs]'}{$C[1]} = 'AltGr-inverted: ' . (defined $D ? $D : "[[$C[0]]]");
7322 0         0 my $S = $self->{faces}{$F}{'[Show]'}{$C[0]};
7323 0 0       0 $self->{faces}{$F}{'[Show]'}{$C[1]} = (defined $S ? $S : $self->charhex2key($C[0])) . $Ex;
7324             }
7325 0         0 $self->{faces}{$F}{'[auto_dead]'}{ord $self->charhex2key($_)}++ for @protect_chr;
7326             # warn " Keys HexMap: ", join ', ', sort keys %{$self->{faces}{$F}{'[deadkeyFaceHexMap]'}};
7327             }
7328            
7329 0         0 for my $F (keys %{ $self->{faces} }) { # Finally, collect the stats
  0         0  
7330 0 0 0     0 next if 'HASH' ne ref $self->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
7331 0 0       0 next if $F =~ /#\@?#\@?(Inv)?#\@?/; # Face-on-a-deadkey
7332 0         0 my %seenExtra;
7333 0 0       0 my @extras = ( "@{ $self->{faces}{$F}{'[output_layers]'} || [''] }" =~ /\bprefix(?:\w*)=([0-9a-fA-F]{4,6}\b|.(?![^ ]))/g );
  0         0  
7334 0         0 my %is_extra = map { ($self->charhex2key($_), 1) } @extras; # extra layers (on bizarre modifiers)
  0         0  
7335 0         0 for my $deadKEY ( sort keys %{ $self->{faces}{$F}{'[deadkeyFace]'}} ) {
  0         0  
7336 0         0 my $deadKey = $self->charhex2key($deadKEY);
7337 0 0       0 next unless $is_extra{$deadKey};
7338 0         0 my $FFF = $self->{faces}{$F}{'[deadkeyFace]'}{$deadKEY};
7339 0 0       0 my $cov1 = $self->{faces}{$FFF}{'[coverage0]'} # XXXX not layer0coverage0 - may slide down to layer0
7340             or warn("Deadkey `$deadKey' on face `$F' -> unmassaged face"), next;
7341             $seenExtra{$_}++
7342 0 0 0     0 for map {ref() ? $_->[0] : $_} grep !(ref and $_->[2]), @$cov1; # Skip 2nd level deadkeys
  0         0  
7343             }
7344 0         0 $self->{faces}{$F}{'[coverageExtra]'} = \%seenExtra;
7345            
7346 0 0       0 next unless my $prefix = $self->{faces}{$F}{'[ComposeKey]'};
7347 0         0 $self->auto_dead_can_wrap($F); # All manual deadkeys are set, so auto may be flexible
7348 0         0 $self->create_composekey($F, $prefix);
7349             }
7350            
7351 0         0 for my $F (keys %{ $self->{faces} }) { # Finally, collect the stats
  0         0  
7352 0 0 0     0 next if 'HASH' ne ref $self->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
7353 0 0       0 next if $F =~ /#\@?#\@?(Inv)?#\@?/; # Face-on-a-deadkey
7354 0         0 my($seen_prefix, %seen0, %seen00, %seen1, %seen1only, %seenExtra) = $self->{faces}{$F}{'[coverage0_prefix]'};
7355             # warn("Face `$F' has no [deadkeyFace]"),
7356 0 0       0 next unless $self->{faces}{$F}{'[deadkeyFace]'};
7357             # next;
7358 0         0 my (%check_later, %coverage1_prefix);
7359             # warn "...... face `$F',\tprefixes0 ", keys %$seen_prefix;
7360             # $seen_prefix = {%$seen_prefix}; # Deep copy
7361             # $seen_prefix->{$_}++ for @{ $self->{faces}{$F}{'[dead_in_VK_array]'} || [] };
7362 0 0       0 my @extras = ( "@{ $self->{faces}{$F}{'[output_layers]'} || [''] }" =~ /\bprefix(?:\w*)=([0-9a-fA-F]{4,6}\b|.(?![^ ]))/g );
  0         0  
7363 0         0 my %is_extra = map { ($self->charhex2key($_), 1) } @extras; # extra layers (on bizarre modifiers)
  0         0  
7364 0         0 for my $deadKEY ( sort keys %{ $self->{faces}{$F}{'[deadkeyFace]'}} ) {
  0         0  
7365 0 0       0 unless (%seen0) { # Do not calculate if $F has no deadkeys...
7366 0         0 $seen0{$_}++ for @{ $self->{faces}{$F}{'[coverage00]'} };
  0         0  
7367 0         0 %seen00 = %seen0;
7368             }
7369             ### XXXXX Directly linked faces may have some chars unreachable via the switch-prefixKey
7370 0         0 my ($deadKey, $not_in_0) = $self->charhex2key($deadKEY);
7371             # It does not make sense to not include it into the summary: 0483 on US is such...
7372 0 0       0 $not_in_0++, $check_later{$deadKey}++ unless $seen_prefix->{$deadKey}; # For multi-prefix maps, and extra layers
7373 0         0 my ($FFF, @dd2) = $self->{faces}{$F}{'[deadkeyFace]'}{$deadKEY};
7374 0 0       0 my $cov1 = $self->{faces}{$FFF}{$is_extra{$deadKey} ? '[coverage0]' : '[coverage00]'} # XXXX not layer0coverage0 - may slide down to layer0
    0          
7375             or warn("Deadkey `$deadKey' on face `$F' -> unmassaged face"), next;
7376             ($seen0{$_}++ or $seen1{$_}++),
7377             ($not_in_0 and not $is_extra{$deadKey}) || $seen00{$_} || $seen1only{$_}++, # Only for multi-prefix maps
7378             $is_extra{$deadKey} && $seenExtra{$_}++ # Only for extra modifiers maps
7379 0 0 0     0 for map {ref() ? $_->[0] : $_} grep !(ref and $_->[2]), @$cov1; # Skip 2nd level deadkeys
  0   0     0  
      0        
      0        
7380 0 0       0 if (my $d2 = $self->{faces}{$F}{'[dead2]'}{$deadKey}) {
7381 0         0 my $map = $self->linked_faces_2_hex_map($F, $FFF);
7382             # warn "linked map (face=$F) = ", keys %$d2;
7383 0 0 0     0 @dd2 = map $self->charhex2key($_), map {($_ and ref $_) ? $_->[0] : $_} map $map->{$self->key2hex($_)}, keys %$d2;
  0         0  
7384             # warn "sub-D2 (face=$F) = ", @dd2;
7385             }
7386             #warn "2nd level prefixes for `$deadKey': ", keys %{$self->{faces}{$FFF}{'[coverage0_prefix]'} || {}};
7387             #warn "2nd level prefixes for `$deadKey': <@dd2> ", keys %{$self->{faces}{$F}{'[dead2]'}{$deadKey} || {}};
7388 0 0       0 unless ($not_in_0) {
7389             # warn "sub-cov0 (face=$F) = ", keys %{ $self->{faces}{$FFF}{'[coverage0_prefix]'} || {} };
7390 0 0       0 $coverage1_prefix{$_}++ for keys %{ $self->{faces}{$FFF}{'[coverage0_prefix]'} || {} };
  0         0  
7391             # warn "sub-D2 (face=$F) = ", @dd2;
7392 0         0 $coverage1_prefix{$_}++ for @dd2;
7393             }
7394             # warn "...... deadkey `$deadKey' reached0 in face `$F'" unless $not_in_0;
7395             }
7396            
7397 0   0     0 my @check = grep { !$coverage1_prefix{$_} and !$is_extra{$_} } keys %check_later;
  0         0  
7398 0 0       0 my @only_extra = grep { !$coverage1_prefix{$_} and $is_extra{$_} } keys %check_later;
  0         0  
7399 0         0 $self->{faces}{$F}{'[only_extra]'} = { map {($_, 1)} @only_extra };
  0         0  
7400            
7401 0 0       0 my $_s = (@check > 1 ? 's' : '');
7402 0 0       0 warn("Prefix key$_s <@check> not reached (without double prefix keys?) in face `$F'; later=", keys %check_later, " ; cov1=", keys %coverage1_prefix) if @check;
7403 0         0 $self->{faces}{$F}{'[coverage1]'} = [sort keys %seen1];
7404 0         0 $self->{faces}{$F}{'[coverage1only]'} = [sort keys %seen1only];
7405 0         0 $self->{faces}{$F}{'[coverage1only_hash]'} = \%seen1only;
7406 0         0 $self->{faces}{$F}{'[coverage_hash]'} = \%seen0;
7407 0         0 $self->{faces}{$F}{'[coverageExtra]'} = \%seenExtra;
7408             }
7409             $self
7410 0         0 }
7411            
7412             sub massage_deadkeys_win ($$) {
7413 0     0 0 0 my($self, $h, @process, @to) = (shift, shift);
7414 0         0 my @K = grep m(^\[unparsed]/DEADKEYS\b), @{$h->{'[keys]'}};
  0         0  
7415             # warn "Found deadkey sections `@K'";
7416             # my $H = $h->{'[unparsed]'};
7417 0         0 for my $k (@K) {
7418 0         0 push @process, $self->get_deep($h, (split m(/), $k), 'unparsed_data');
7419 0         0 (my $k1 = $k) =~ s(^\[unparsed]/)();
7420 0         0 push @to, $k1
7421             }
7422 0         0 @K = grep m(^DEADKEYS\b), @{$h->{'[keys]'}};
  0         0  
7423 0         0 for my $k (@K) {
7424 0         0 my $slot = $self->get_deep($h, split m(/), $k);
7425 0 0       0 next unless exists $slot->{klc_filename};
7426             open my $fh, '< :encoding(UTF-16)', $slot->{klc_filename}
7427 0 0       0 or die "open of =`$slot->{klc_filename}' failed: $!";
7428 0         0 local $/;
7429 0         0 my $in = <$fh>;
7430 0         0 push @process, $in;
7431 0         0 push @to, $k;
7432             }
7433 0         0 for my $k1 (@to) {
7434             #warn "DK sec `$k' -> `$v', <", join('> <', keys %{$h->{'[unparsed]'}{DEADKEYS}{la_ru}}), ">";
7435             #warn "DK sec `$k' -> `$v', <$h->{'[unparsed]'}{DEADKEYS}{la_ru}{unparsed_data}>";
7436 0         0 my $v = shift @process;
7437 0         0 my($o,$d,$t) = $self->read_deadkeys_win($v); # Translation tables, names, rest of input
7438 0         0 my (@parts, @h) = split m(/), $k1;
7439 0         0 my %seen = (%$o, %$d);
7440 0         0 for my $kk (keys %seen) {
7441             #warn "DK sec `$k1', deadkey `$kk'. Map: ", $self->array2string( [%{$o->{$kk} || {}}] );
7442 0         0 my $slot = $self->get_deep($h, @parts, $kk);
7443             warn "Deadkey `$kk' defined for `$k1' conflicts with previous definition"
7444 0 0 0     0 if $slot and grep exists $slot->{$_}, qw(map name);
7445 0 0       0 $self->put_deep($h, $o->{$kk}, @parts, $kk, 'map') if exists $o->{$kk};
7446 0 0       0 $self->put_deep($h, $d->{$kk}, @parts, $kk, 'name') if exists $d->{$kk};
7447             }
7448             }
7449             $self
7450 0         0 }
7451            
7452             # http://bepo.fr/wiki/Pilote_Windows
7453             # http://www.phon.ucl.ac.uk/home/wells/dia/diacritics-revised.htm#two
7454             # http://msdn.microsoft.com/en-us/library/windows/desktop/ms646280%28v=vs.85%29.aspx
7455            
7456 1     1   6942 my %oem_keys = do {{ no warnings 'qw' ; reverse (qw(
  1         2  
  1         6784  
7457             OEM_MINUS -
7458             OEM_PLUS =
7459             OEM_4 [
7460             OEM_6 ]
7461             OEM_1 ;
7462             OEM_7 '
7463             OEM_3 `
7464             OEM_5 \
7465             OEM_COMMA ,
7466             OEM_PERIOD .
7467             OEM_2 /
7468             OEM_102 \#
7469             SPACE #
7470             DECIMAL .#
7471             DECIMAL ,#
7472             ABNT_C1 /#
7473             ABNT_C1 ¥
7474             ABNT_C1 ¦
7475             )) }}; #'# Here # marks "second occurence" of keys...
7476             # Extra bindings: see http://www.fysh.org/~zefram/keyboard/xt_scancodes.txt (after “===”)
7477             # e005 Messenger (or Files); e007 Redo; e008 undo; e009 ApplicationLeft; e00a Paste;
7478             # e00b,e011,e012,e01f ScrollWheel-to-key-emulation
7479             # e013 Word; e014 Excel; e015 Calendar; e016 Log Off; e017 Cut; e018 Copy; e01e ApplicationRight
7480             # e03b -- e044 (Microsoft/Logitech Fkeys_without_Flock, F1...F10)
7481             # e063 Wake; e064 My Pictures [or Keypad-) ]
7482             # For type 4 of keyboard (same as types 1,3, except OEM_AX, (NON)CONVERT, ABNT_C1)
7483             # except KANA,(NON)CONVERT,; scancode of YEN,| for OEM_8 is our invention; after OEM_8 all is junk (non-scancodes???)...
7484             my %scan_codes = (reverse qw(
7485             02 1
7486             03 2
7487             04 3
7488             05 4
7489             06 5
7490             07 6
7491             08 7
7492             09 8
7493             0a 9
7494             0b 0
7495             0c OEM_MINUS
7496             0d OEM_PLUS
7497             10 Q
7498             11 W
7499             12 E
7500             13 R
7501             14 T
7502             15 Y
7503             16 U
7504             17 I
7505             18 O
7506             19 P
7507             1a OEM_4
7508             1b OEM_6
7509             1e A
7510             1f S
7511             20 D
7512             21 F
7513             22 G
7514             23 H
7515             24 J
7516             25 K
7517             26 L
7518             27 OEM_1
7519             28 OEM_7
7520             29 OEM_3
7521             2b OEM_5
7522             2c Z
7523             2d X
7524             2e C
7525             2f V
7526             30 B
7527             31 N
7528             32 M
7529             33 OEM_COMMA
7530             34 OEM_PERIOD
7531             35 OEM_2
7532             39 SPACE
7533             56 OEM_102
7534             53 DECIMAL
7535            
7536             01 ESCAPE
7537             0C OEM_MINUS
7538             0D OEM_PLUS
7539             0E BACK
7540             0F TAB
7541             1A OEM_4
7542             1B OEM_6
7543             1C RETURN
7544             1D LCONTROL
7545             27 OEM_1
7546             28 OEM_7
7547             29 OEM_3
7548             2A LSHIFT
7549             2B OEM_5
7550             33 OEM_COMMA
7551             34 OEM_PERIOD
7552             35 OEM_2
7553             36 RSHIFT
7554             37 MULTIPLY
7555             38 LMENU
7556             3A CAPITAL
7557             3B F1
7558             3C F2
7559             3D F3
7560             3E F4
7561             3F F5
7562             40 F6
7563             41 F7
7564             42 F8
7565             43 F9
7566             44 F10
7567             45 NUMLOCK
7568             46 SCROLL
7569             47 HOME
7570             48 UP
7571             49 PRIOR
7572             4A SUBTRACT
7573             4B LEFT
7574             4C CLEAR
7575             4D RIGHT
7576             4E ADD
7577             4F END
7578             50 DOWN
7579             51 NEXT
7580             52 INSERT
7581             e053 DELETE
7582             54 SNAPSHOT
7583             56 OEM_102
7584             57 F11
7585             58 F12
7586             59 CLEAR
7587             5A OEM_WSCTRL
7588             5B OEM_FINISH
7589             5C OEM_JUMP
7590             5C OEM_AX
7591             5D EREOF
7592             5E OEM_BACKTAB
7593             5F OEM_AUTO
7594             62 ZOOM
7595             63 HELP
7596             64 F13
7597             65 F14
7598             66 F15
7599             67 F16
7600             68 F17
7601             69 F18
7602             6A F19
7603             6B F20
7604             6C F21
7605             6D F22
7606             6E F23
7607             6F OEM_PA3
7608             70 KANA
7609             71 OEM_RESET
7610             73 ABNT_C1
7611             76 F24
7612             79 CONVERT
7613             7B NONCONVERT
7614             7B OEM_PA1
7615             7C TAB
7616             7E ABNT_C2
7617             7F OEM_PA2
7618             e010 MEDIA_PREV_TRACK
7619             e019 MEDIA_NEXT_TRACK
7620             e01C RETURN
7621             e01D RCONTROL
7622             e020 VOLUME_MUTE
7623             e021 LAUNCH_APP2
7624             e022 MEDIA_PLAY_PAUSE
7625             e024 MEDIA_STOP
7626             e02E VOLUME_DOWN
7627             e030 VOLUME_UP
7628             e032 BROWSER_HOME
7629             e035 DIVIDE
7630             e037 SNAPSHOT
7631             e038 RMENU
7632             e046 CANCEL
7633             e047 HOME
7634             e048 UP
7635             e049 PRIOR
7636             e04B LEFT
7637             e04D RIGHT
7638             e04F END
7639             e050 DOWN
7640             e051 NEXT
7641             e052 INSERT
7642             e053 DELETE
7643             e05B LWIN
7644             e05C RWIN
7645             e05D APPS
7646             e05E POWER
7647             e05F SLEEP
7648             e065 BROWSER_SEARCH
7649             e066 BROWSER_FAVORITES
7650             e067 BROWSER_REFRESH
7651             e068 BROWSER_STOP
7652             e069 BROWSER_FORWARD
7653             e06A BROWSER_BACK
7654             e06B LAUNCH_APP1
7655             e06C LAUNCH_MAIL
7656             e06D LAUNCH_MEDIA_SELECT
7657             e11D PAUSE
7658            
7659             7D OEM_8
7660            
7661             10 SHIFT
7662             11 CONTROL
7663             12 MENU
7664             15 KANA
7665             15 HANGUL
7666             17 JUNJA
7667             18 FINAL
7668             19 HANJA
7669             19 KANJI
7670             1C CONVERT
7671             1D NONCONVERT
7672             1E ACCEPT
7673             1F MODECHANGE
7674             29 SELECT
7675             2A PRINT
7676             2B EXECUTE
7677            
7678             60 NUMPAD0
7679             61 NUMPAD1
7680             62 NUMPAD2
7681             63 NUMPAD3
7682             64 NUMPAD4
7683             65 NUMPAD5
7684             66 NUMPAD6
7685             67 NUMPAD7
7686             68 NUMPAD8
7687             69 NUMPAD9
7688             6C SEPARATOR
7689             B4 MEDIA_LAUNCH_MAIL
7690             B5 MEDIA_LAUNCH_MEDIA_SELECT
7691             B6 MEDIA_LAUNCH_APP1
7692             B7 MEDIA_LAUNCH_APP2
7693            
7694             E5 PROCESSKEY
7695             E7 PACKET
7696             F6 ATTN
7697             F7 CRSEL
7698             F8 EXSEL
7699             FA PLAY
7700             FC NONAME
7701             FD PA1
7702             FE OEM_CLEAR
7703            
7704             )); # http://www.opensource.apple.com/source/WebCore/WebCore-1C25/platform/gdk/KeyboardCodes.h
7705             # the part after PAUSE is junk...
7706            
7707             # [ ] \ space
7708             my %oem_control = (qw(
7709             OEM_4 [001b
7710             OEM_6 ]001d
7711             OEM_5 \001c
7712             SPACE 0020
7713             OEM_102 \001c
7714             )); # In ru layouts, only entries which match the char are present
7715             my %do_control = map /^(.)(.+)/, values %oem_control;
7716             $do_control{' '} = '0020';
7717             delete $do_control{0};
7718            
7719             my %default_bind = ( (map {( "NUMPAD$_" => [[$_]] )} 0..9 ),
7720             TAB => [["\t", "\t"]],
7721             ADD => [["+", "+"]],
7722             SUBTRACT => [["-", "-"]],
7723             MULTIPLY => [["*", "*"]],
7724             DIVIDE => [["/", "/"]],
7725             RETURN => [["\r", "\r"], ["\n"]],
7726             BACK => [["\b", "\b"], ["\x7f"]],
7727             ESCAPE => [["\e", "\e"], ["\e"]],
7728             CANCEL => [["\cC", "\cC"], ["\cC"]],
7729             );
7730            
7731             sub get_VK ($$) {
7732 0     0 0 0 my ($self, $f) = (shift, shift);
7733 0 0       0 $self->get_deep_via_parents($self, undef, 'faces', (split m(/), $f), 'VK') || {}
7734             # $self->{faces}{$f}{VK} || {}
7735             }
7736            
7737             my $min_sec;
7738             sub last_pre_funckeys($$) {
7739 0     0 0 0 my ($self, $l0) = (shift, shift);
7740 0 0       0 unless (defined $min_sec) {
7741 0         0 $min_sec = 1e300;
7742 0   0     0 $min_sec > $_->[0] and $min_sec = $_->[0] for values %start_SEC;
7743             }
7744 0         0 my $post_main = @{ $self->{layers}{$l0} };
  0         0  
7745 0 0       0 if ($post_main >= $min_sec) {
7746 0         0 $post_main = $min_sec;
7747 0         0 while ($post_main > 0) {
7748 0 0       0 last if grep defined, map {ref() ? $_->[0] : $_} grep defined, @{ $self->{layers}{$l0}[$post_main - 1] || [] };
  0 0       0  
  0 0       0  
7749 0         0 $post_main--;
7750             }
7751             }
7752 0         0 $post_main;
7753             }
7754            
7755             sub massage_VK ($$) {
7756 0     0 0 0 my ($self, $f, %seen, %seen_dead, @dead, @ctrl) = (shift, shift);
7757 0         0 my $l0 = $self->{faces}{$f}{layers}[0];
7758 0         0 my $post_main = $self->last_pre_funckeys($l0);
7759            
7760 0 0       0 if (my $LF = $self->{faces}{$f}{LinkFace}) {
7761 0         0 my $l00 = ($self->export_layers($LF))->[0];
7762 0         0 my $post_main0 = $self->last_pre_funckeys($l00);
7763 0 0       0 $post_main = $post_main0 if $post_main0 > $post_main;
7764             }
7765            
7766 0 0       0 if (defined (my $b = $self->{faces}{$f}{BaseLayer})) { # Cannot bump into known keycodes
7767 0 0 0     0 $b = $self->make_translated_layers($b, $f, [0])->[0] if defined $b and not $self->{layers}{$b};
7768 0         0 my $post_main0 = $self->last_pre_funckeys($b);
7769 0 0       0 $post_main = $post_main0 if $post_main0 > $post_main;
7770             }
7771             ## warn "post_main=$post_main; layer=$l0 min_sec=$min_sec";
7772            
7773 0         0 $self->{faces}{$f}{'[non_VK]'} = $post_main;
7774 0         0 my $create_a_c = $self->{faces}{$f}{'[create_alpha_ctrl]'};
7775 0 0       0 $create_a_c = $create_alpha_ctrl unless defined $create_a_c;
7776 0 0       0 my $EXTR = [ ["\r","\n"], ["\b","\x7F"], ["\t","\cC"], ["\x1b","\x1d"], # Enter/C-Enter/Bsp/C-Bsp/Tab/Cancel/Esc=C-[/C-]
    0          
7777             ["\x1c", ($create_a_c ? "\cZ" : ())], ($create_a_c>1 ? (["\x1e", "\x1f"], ["\x00"]) : ())]; # C-\ C-z, C-^ C-_
7778 0 0       0 if ($create_a_c) {
7779 0         0 my %s;
7780 0         0 push @ctrl, scalar @$EXTR;
7781 0         0 $s{$_}++ for $self->flatten_arrays($EXTR);
7782 0         0 my @ctrl_l = grep !$s{$_}, map chr($_), 1..26;
7783 0         0 push @$EXTR, [shift @ctrl_l, shift @ctrl_l] while @ctrl_l > 1;
7784 0 0       0 push @$EXTR, [@ctrl_l] if @ctrl_l;
7785 0         0 push @ctrl, scalar @$EXTR;
7786             }
7787 0         0 my @extra = ( $EXTR, map [([]) x @$EXTR], 1..$#{ $self->{faces}{$f}{layers} } );
  0         0  
7788 0         0 my $VK = $self->get_VK($f);
7789 0         0 $self->{faces}{$f}{'[VK_off]'} = \ my %VK_off;
7790 0         0 $self->{faces}{$f}{'[scancodes]'} = \ my %scan;
7791 0         0 for my $K (reverse sort keys %$VK) { # want SPACE to come before ABNT_* and OEM_102
7792 0         0 my ($v, @C) = $VK->{$K};
7793 0 0 0     0 $v->[0] = $scan_codes{$K} or die("Can't find the scancode for the VK key `$K'")
7794             unless length $v->[0];
7795 0         0 $scan{$K} = $v->[0];
7796             # warn 'Key: <', join('> <', @$v), '>';
7797 0         0 my $c = 0;
7798 0         0 $VK_off{$K} = @{ $extra[0] }; # Where in the layouts is the VK key
  0         0  
7799 0         0 for my $k (@$v[1..$#$v]) {
7800 0 0       0 ($k, my $dead) = ($k =~ /^(.+?)(\@?)$/) or die "Empty key in VK list";
7801 0 0       0 $seen{$k eq '-1' ? '' : ($k = $self->charhex2key($k))}++;
7802 0 0 0     0 $seen_dead{$k}++ or push @dead, $k if $dead and $k ne '-1';
      0        
7803 0 0       0 my $kk = ($k eq '-1' ? undef : $k);
7804 0 0       0 push @{ $extra[int($c/2)] }, [] unless $c % 2;
  0         0  
7805 0 0       0 push @{ $extra[int($c/2)][-1] }, ($dead ? [$kk, undef, 1] : $kk); # $extra[$N] is [[$k0, $k1] ...]
  0         0  
7806 0 0       0 $kk .= $dead if defined $kk;
7807 0         0 push @C, $kk;
7808 0         0 $c++;
7809             }
7810             # warn 'Key: <', join('> <', @C), '>';
7811 0         0 @$v = ($v->[0], @C); # update the entry in %$VK
7812             }
7813 0         0 $self->{faces}{$f}{'[ini_layers]'} = [ @{ $self->{faces}{$f}{layers} } ]; # Deep copy
  0         0  
7814 0 0       0 if (@extra) {
7815 0         0 my($start_append, @Ln);
7816 0         0 for my $l (0 .. $#{ $self->{faces}{$f}{layers} } ) { # Assume that in every layer a few positions after end of the
  0         0  
7817 0         0 my $oLn = my $Ln = $self->{faces}{$f}{layers}[$l]; # first layer are empty
7818 0         0 my $L = $self->{layers}{$Ln};
7819 0 0       0 unless ($l) {
7820 0         0 $start_append = $post_main;
7821 0         0 $self->{faces}{$f}{'[start_ctrl0]'} = $start_append;
7822 0   0     0 $self->{faces}{$f}{'[start_ctrl]'} = $start_append + ($ctrl[0]||0);
7823 0   0     0 $self->{faces}{$f}{'[end_ctrl]'} = $start_append + ($ctrl[1]||0);
7824 0         0 $_ += $start_append for values %VK_off;
7825             }
7826 0         0 my @L = map [$_->[0], $_->[1]], @$L; # Each element is []; 1-level deep copy
7827 0 0       0 warn "Main keys + ctrl slots overwrite FUNKEYS sections" if $start_append + @{ $extra[$l] } > $min_sec;
  0         0  
7828 0   0     0 $L[$start_append+$_] ||= [] for 0..$#{ $extra[$l] }; # Avoid splicing after the end of array
  0         0  
7829 0         0 splice @L, $start_append, @{ $extra[$l] }, @{ $extra[$l] };
  0         0  
  0         0  
7830 0         0 push @Ln, ($Ln .= "<$f>");
7831 0         0 $self->{layers}{$Ln} = \@L;
7832             # At this moment ini_copy should not exist yet
7833 0 0       0 warn "ini_copy of `$oLn' exists; --> `$Ln'" if $self->{layers}{'[ini_copy]'}{$oLn};
7834             # $self->{layers}{'[ini_copy]'}{$Ln} = $self->{layers}{'[ini_copy]'}{$oLn} if $self->{layers}{'[ini_copy]'}{$oLn};
7835             #??? Why does not this works???
7836             #warn "ini_copy1: `$Ln' --> `$oLn'";
7837 0         0 $self->{layers}{'[ini_copy1]'}{$Ln} = $self->deep_copy($self->{layers}{$oLn});
7838             }
7839 0         0 $self->{faces}{$f}{layers} = \@Ln;
7840             }
7841 0         0 ([keys %seen], \@dead, \%seen_dead)
7842             }
7843            
7844             sub format_key ($$$$) {
7845 0     0 0 0 my ($self, $k, $dead, $used) = (shift, shift, shift, shift);
7846 0 0       0 return -1 unless defined $k;
7847 0 0       0 my $mod = ($dead ? '@' : '') and $used->{$k}++;
    0          
7848 0 0       0 return "$k$mod" if $k =~ /^[A-Z0-9]$/i;
7849 0 0 0     0 return '%%' if 1 != length $k or ord $k > 0xFFFF;
7850 0         0 $self->key2hex($k) . $mod;
7851             }
7852            
7853             # wget -O - http://cgit.freedesktop.org/xorg/proto/xproto/plain/keysymdef.h | perl -C31 -wlne 'next unless /\bXK_(\w+)\s+0x00([a-fA-F\d]+)/; print chr hex $2, qq(\t$1)' > ! oooo1
7854             # wget -O - http://cgit.freedesktop.org/xorg/proto/xproto/plain/keysymdef.h | perl -C31 -wlne "next unless /\bXK_(\w+)\s+0x([a-fA-F\d]+)\s+\/\*(?:\(?|\s+)U\+([a-fA-F\d]+)/; print chr hex $3, qq(\t$1)" > oooo3
7855            
7856             # See XK_ARMENIAN for an alternative way to encode Unicode to XK_: 0x1000587 /* U+0587
7857             my(%KeySyms,%deadSyms,%invKeySyms);
7858             sub load_KeySyms($) {
7859 0 0   0 0 0 return if %KeySyms;
7860 0         0 my$self = shift;
7861 0 0       0 my $names = $self->get__value('KeySyms') or return;
7862 0         0 my(%macro);
7863 0         0 for my $fn (@$names) {
7864 0 0       0 open my $fh, '<', $fn or warn("Cannot open $fn: $!"), next;
7865 0         0 while (defined(my $l = <$fh>)) {
7866 0         0 chomp $l;
7867 0 0       0 $deadSyms{$1}++ if $l =~ /\bXK_dead_(\w+)\s+0x([a-fA-F\d]+)\b/;
7868 0         0 my $dup = ( $l =~ m[\bXK_(\w+)\s+0x([a-fA-F\d]+)\s+/\*.*\b(obsolete|alias)\b] );
7869 0 0       0 next unless $l =~ m[\bXK_(\w+)\s+0x([a-fA-F\d]+)\s+/\*\s*(\()?U\+([a-fA-F\d]+)];
7870 0 0 0     0 warn "not yet defined: <$l>" if $dup and not $macro{$2};
7871 0 0       0 warn "sym re-defined: <$l>" if $KeySyms{$1};
7872             # warn "macro re-defined: <$l>\n" if $macro{$2} and not $dup; # several offenders
7873 0         0 $KeySyms{$1} = my $c = chr hex $4;
7874 0 0       0 $invKeySyms{$c} = $1 unless $3;
7875 0 0       0 $macro{$2} = $1 unless $dup;
7876             }
7877             }
7878             }
7879            
7880             sub format_key_XKB ($$$$) { ##### Unfinished
7881 0     0 0 0 my ($self, $k, $dead, $used) = (shift, shift, shift, shift);
7882 0 0       0 return 'NoSymbol' unless defined $k;
7883 0 0       0 $self->load_KeySyms unless %KeySyms;
7884 0 0       0 my $mod = ($dead ? 'dead_' : '') and $used->{$k}++;
    0          
7885 0 0       0 return "multichar=<$k>???" if 1 != length $k;
7886 0         0 my $sym = $invKeySyms{$k};
7887 0 0 0     0 return "$mod$sym" if defined $sym and (not $mod or exists $deadSyms{$sym});
      0        
7888 0 0 0     0 if ($mod and my $D = $self->{'[map2diac]'}{$k}) {
7889 0         0 my $DD = $self->{'[diacritics]'}{$D};
7890             # warn "... diac($k): ", join ' ', map @$_, @$DD;
7891 0         0 for my $c (map @$_, @$DD) { # flatten the list
7892 0 0       0 next unless defined (my $SYM = $invKeySyms{$c});
7893 0 0       0 return "$mod$SYM" if exists $deadSyms{$SYM}; # Try other chars on the same diacritic-list
7894 0 0       0 next unless $SYM =~ s/^ascii//;
7895 0 0       0 return "$mod$SYM" if exists $deadSyms{$SYM}; # Try other chars on the same diacritic-list
7896             }
7897             }
7898 0 0       0 $sym = sprintf 'U%0' . (ord $k > 0xFFFF ? 6 : 4) . 'x', ord $k unless defined $sym;
    0          
7899 0 0       0 return "<$mod$sym>???" if $mod;
7900 0         0 return $sym;
7901             }
7902            
7903             sub auto_capslock($$) {
7904 0     0 0 0 my ($self, $u) = (shift, shift);
7905 0         0 my %fix = qw( ӏ Ӏ ); # Perl 5.8.8 uc is wrong
7906 0 0 0     0 return 0 unless defined $u->[0] and defined $u->[1] and $u->[0] ne $u->[1];
      0        
7907 0 0 0     0 return 1 if ($fix{$u->[0]} || uc($u->[0])) eq $u->[1];
7908 0 0 0     0 return 1 if ($fix{$u->[0]} || ucfirst($u->[0])) eq $u->[1];
7909 0         0 return 0;
7910             }
7911            
7912             sub flatten_unit ($$$$) {
7913 0     0 0 0 my ($self, $face, $N, $E) = (shift, shift, shift, shift);
7914 0         0 my(%ss, $cnt); # Set Control-KEY if is [ or ] or \
7915 0         0 my @KK;
7916 0         0 my $L = $self->{faces}{$face}{layers};
7917 0         0 my $b = @$L;
7918 0 0       0 if ($E) { # = $self->{faces}{$face}{'[output_layers_XKB]'} || $self->{faces}{$face}{'[output_layers]'}) {
7919 0         0 for my $Ln (@$E) { # Construct $ss from the numbered layers
7920 0 0       0 next unless $Ln =~ /^\s*\d+\s*$/;
7921 0 0       0 die "Layer number too large in output_layers" unless $Ln < $b;
7922 0         0 my $LL = $L->[$Ln]; # Numeric are for numbered layers
7923 0         0 my $l = $self->{layers}{$LL}[$N];
7924 0         0 my(@CC, @pp, @OK);
7925 0         0 my(%s1, @was);
7926 0         0 for my $sh (0..$#$l) { # These `map´s have 1 arg
7927 0 0       0 my @C = map {defined() ? (ref() ? $self->dead_with_inversion(!'hex', $_, $face, $self->{faces}{$face}) : $_) : $_} $l->[$sh];
  0 0       0  
7928 0 0       0 my @p = map {defined() ? (ref() ? $_->[2] : 0 ) : 0 } $l->[$sh];
  0 0       0  
7929 0 0       0 next unless defined (my $c = $C[0]);
7930 0   0     0 my $pref = !!$p[0] || 0;
7931 0         0 $ss{"$pref$c"}++;
7932             }
7933             }
7934             }
7935 0   0     0 my $extra = $E || [0..$b-1];
7936 0 0 0     0 if ($extra and defined $N) { # $N not supported on VK...
7937 0         0 for my $f (0..$#$extra) {
7938             # warn "Extra layer number $f, base=$b requested while the character N=$N has " . (scalar @$u) . " layers" if $f+$b <= $#$u;
7939 0         0 my($notsame, $case, $LL, $num);
7940 0 0       0 if ((my $lll = $extra->[$f]) =~ /^\s*\d+\s*$/) {
7941 0 0       0 die "Layer number too large in output_layers" unless $lll < $b;
7942 0         0 $LL = [$L->[$lll]]; # Numeric are for numbered layers
7943 0         0 $num = 1;
7944             } else {
7945 0 0       0 $lll =~ s/^prefix(NOTSAME(case)?)?=// or die "Extra layer: expected `prefix=PREFIX', see: `$extra->[$f]'";
7946 0         0 ($notsame, $case) = ($1,$2);
7947 0         0 my $c = $self->key2hex($self->charhex2key($lll));
7948 0 0       0 $LL = $self->{faces}{$face}{'[deadkeyLayers]'}{$c} or die "Unknown prefix character `$c´ in extra layers";
7949             }
7950 0         0 my @L = map $self->{layers}{$_}[$N], @$LL;
7951 0         0 my(@CC, @pp, @OK);
7952             # With notsame, squeeze a face into a layer; dups are marked “free”, so have a chance to squeeze Alt to Shift (w/o “case”)
7953 0 0       0 for my $l (@L[0 .. ($notsame ? $b-1 : 0)]) {
7954 0         0 my(%s1, @unsh); # s1 is "seen in this layer"
7955 0         0 for my $sh (0..$#$l) { # These `map´s have 1 arg
7956 0 0       0 my @C = map {defined() ? (ref() ? $self->dead_with_inversion(!'hex', $_, $face, $self->{faces}{$face}) : $_) : $_} $l->[$sh];
  0 0       0  
7957 0 0       0 my @p = map {defined() ? (ref() ? $_->[2] : 0 ) : 0 } $l->[$sh];
  0 0       0  
7958 0 0       0 next unless defined (my $c = $C[0]);
7959 0   0     0 my $pref = !!$p[0] || 0;
7960 0 0       0 ($CC[$sh], $pp[$sh]) = ($c, $pref) unless defined $CC[$sh]; # fallback
7961 0 0       0 $cnt++ if defined $CC[$sh];
7962 0 0       0 next if $num;
7963             # $ss{$C[0]}++ if $num;
7964 0 0 0     0 ($CC[$sh], $pp[$sh], $OK[$sh], $s1{"$pref$c"}) = ($c, $pref, 1,1) if !$OK[$sh] and not $ss{"$pref$c"};
7965             ($CC[$sh], $pp[$sh], $OK[$sh], $s1{"$unsh[1]$unsh[0]"}) = (@unsh, 1,1) # use unshifted if needed
7966 0 0 0     0 if $sh and !$OK[$sh] and defined $unsh[0] and not $ss{"$unsh[1]$unsh[0]"} and not $s1{"$unsh[1]$unsh[0]"};
      0        
      0        
      0        
7967 0 0 0     0 @unsh = ($c, $pref) unless $case or $sh; # move AltGr-LETTER to Shift-LETTER if free (may omit `or $sh´)
7968 0 0       0 $cnt++ if defined $CC[$sh];
7969             }
7970             }
7971             # Avoid read-only values (can get via $#KK) which cannot be autovivified
7972 0 0       0 push @KK, ([]) x (2*$f - @KK) if @KK < 2*$f; # splice can't do with a gap after the end of array
7973 0         0 splice @KK, 2*$f, 0, map [$CC[$_], $f-$b, $pp[$_]], 0..$#CC;
7974             }
7975             }
7976 0 0       0 return unless $cnt;
7977 0         0 return \@KK;
7978             }
7979            
7980             my %double_scan_VK = ('56 OEM_102' => '7D OEM_8', # ISO vs JIS (right) keyboard
7981             # '73 ABNT_C1' => '7E ABNT_C2', # ABNT (right) = JIS (left) keyboard vs ABNT (numpad)
7982             # '53 DECIMAL' => '7E ABNT_C2', # NUMPAD-period vs ABNT (numpad) [Does not work??? DECIMAL too late?]
7983             '34 OEM_PERIOD' => '7E ABNT_C2', # period vs ABNT (numpad)
7984             '7B NONCONVERT' => '79 CONVERT'); # JIS keyboard: left of SPACE, right of SPACE
7985             my %shift_control_extra = (2 => "\x00", 6 => "\x1e", OEM_MINUS => "\x1f");
7986            
7987             { my(%seen, %seen_scan, %seen_VK, @add_scan_VK, @ligatures, @decimal);
7988 0     0 0 0 sub reset_units ($) { @decimal = @ligatures = @add_scan_VK = %seen_scan = %seen_VK = %seen = () }
7989            
7990             sub output_unit00 ($$$$$$$;$$) { # $U->[$i] is the entry for the key in the layer No. $i
7991 0     0 0 0 my ($self, $face, $k, $UU, $N, $deadkeys, $Used, $known_scancode, $skippable) = (shift, shift, shift, shift, shift, shift, shift, shift, shift);
7992 0 0 0     0 my $sc = ($known_scancode or $scan_codes{$k}) or warn("Can't find the scancode for the key `$k'"), return;
7993 0         0 my ($cnt, @KK) = 0;
7994 0         0 my $skip = grep $k eq $_, @{$self->{faces}{$face}{'[skip_extra_layers_WIN]'}};
  0         0  
7995             my $flat = $self->flatten_unit($face, $N,
7996             (!$skip and $self->{faces}{$face}{'[output_layers_WIN]'}
7997 0 0 0     0 || $self->{faces}{$face}{'[output_layers]'}))
7998             and $cnt++;
7999 0 0       0 @KK = ($cnt ? @$flat : map [], @$UU);
8000            
8001 0         0 my(@cntrl); # Set Control-KEY if is [ or ] or \ etc
8002 0         0 my @U = @KK;
8003 0 0       0 $#U = 3 if $#U > 3;
8004 0   0     0 $_ and ref $_ and $_ = $_->[0] for @U;
      0        
8005 0         0 my @u = [@U[0,1]];
8006 0 0       0 $u[1] = [@U[2,3]] if @U > 2;
8007 0         0 my $b = $KK[0];
8008 0 0 0     0 $b = $b->[0] if $b and ref $b;
8009 0 0 0     0 @cntrl = chr hex $do_control{$b} if $do_control{$b || 'N/A'}; # \ ---> ^\
8010 0 0 0     0 @cntrl = @{ $default_bind{$k}[1] } if !@cntrl and $default_bind{$k}[1];
  0         0  
8011 0         0 my $create_a_c = $self->{faces}{$face}{'[create_alpha_ctrl]'};
8012 0 0       0 $create_a_c = $create_alpha_ctrl unless defined $create_a_c;
8013 0 0 0     0 @cntrl = (chr(0x1F & ord $k)) x $create_a_c if $k =~ /^[A-Z]$/ and $create_a_c;
8014 0 0 0     0 @cntrl = (undef, $shift_control_extra{$k}) if $create_a_c > 1 and $shift_control_extra{$k};
8015 0   0     0 $cnt ||= @cntrl;
8016 0 0 0     0 return if $skippable and not $cnt;
8017            
8018 0         0 my $CL;
8019 0 0 0     0 if (my $Caps = $self->{faces}{$face}{'[CapsLOCKlayers]'} and defined $N) { # $N not supported on VK...
8020 0         0 $CL = [map $self->{layers}{$_}[$N], @$Caps];
8021             # warn "See CapsLock layers: <<<", join('>>> <<<', @$Caps), ">>>";
8022             }
8023 0 0       0 if ($skippable) {
8024 0         0 for my $shft (0,1) {
8025 0 0 0     0 $KK[$shft] = [$default_bind{$k}[0][$shft], 0] if not defined $KK[$shft][0] and defined $default_bind{$k}[0][$shft];
8026             ### $KK[$shft] = [$decimal[$shft], 0] if $k eq 'DECIMAL' and @decimal;
8027             }
8028             }
8029 0         0 my $pre_ctrl = $self->{faces}{$face}{'[ctrl_after_modcol]'};
8030 0 0       0 $pre_ctrl = 2*$ctrl_after unless defined $pre_ctrl;
8031 0         0 $#cntrl = $create_a_c - 1; # if $pre_ctrl < 2*@$u or $self->{faces}{$face}{'[keep_missing_ctrl]'};
8032 0 0       0 warn "cac=$create_a_c #cntrl=$#cntrl pre=$pre_ctrl \@KK=", scalar @KK if $pre_ctrl > @KK;
8033 0         0 splice @KK, $pre_ctrl, 0, map [$_, 0], @cntrl;
8034 0 0       0 splice @KK, 15, 0, [undef, 0] if @KK >= 16; # col=15 is the fake one
8035            
8036 0 0       0 if ($k eq 'DECIMAL') { # may be described both via visual maps and NUMPAD
8037 0 0       0 my @d = @{ $decimal[1] || [] };
  0         0  
8038 0   0     0 defined $KK[$_][0] or $KK[$_] = $d[$_] for 0..$#d; # fill on the second round
8039 0         0 @decimal = ([$k, \@u, $sc, $Used], [@KK]);
8040 0         0 return;
8041             }
8042             # warn "Undefined \$N ==> <<<", join '>>> <<<', map $_->[0], @KK unless defined $N; # SPACE and ABNT_C1 ???
8043 0         0 $self->output_unit_KK($k, \@u, $sc, $Used, $CL, @KK);
8044             }
8045            
8046             sub output_unit_KK($$@) {
8047 0     0 0 0 my ($self, $k, $u, $sc, $Used, $CL, @KK) = @_;
8048 0   0     0 my @K = map $self->format_key($_->[0], $_->[2], $Used->[$_->[1] || 0]), @KK;
8049             #warn "keys with ligatures: <@K>" if grep $K[$_] eq '%%', 0..$#K;
8050 0         0 push @ligatures, map [$k, $_, $KK[$_][0]], grep $K[$_] eq '%%', 0..$#K;
8051 0         0 my $keys = join "\t", @K;
8052 0         0 my @kk = map $_->[0], @KK;
8053 0 0       0 my $fill = ((8 <= length $k) ? '' : "\t");
8054 0 0       0 my $expl = join ", ", map +(defined() ? (0x20 > ord() ? '^'.chr(0x40+ord) : $_) : ' '), @kk;
    0          
8055 0 0       0 my $expl1 = exists $self->{UNames} ? "\t// " . join ", ", map +((defined $_) ? $self->UName($_) : ' '), @kk : '';
    0          
8056 0   0     0 my($CL0, $extra) = ($CL and $CL->[0]);
8057 0 0 0     0 undef $CL0 unless $CL0 and @$CL0 and grep defined, map { ($_ and ref $_) ? $_->[0] : $_ } @$CL0;
  0 0 0     0  
      0        
8058             # warn "u0($k) = <$u->[0]>" if defined $u->[0];
8059 0 0       0 my $capslock = (defined $CL0 ? 2 : $self->auto_capslock($u->[0]));
8060             # warn "u1($k) = <$u->[1]>" if defined $u->[1];
8061 0         0 $capslock |= (($self->auto_capslock($u->[1])) << 2);
8062 0 0       0 $capslock = 'SGCap' if $capslock == 2; # Not clear if we can combine a string SGCap with 0x4 in a .klc file
8063 0 0       0 if ($CL0) {
8064 0         0 my $a_cl = $self->auto_capslock($u->[0]);
8065 0 0       0 my @KKK = @KK[$a_cl ? (1,0) : (0,1)];
8066 0 0 0     0 defined(($CL0->[$_] and ref $CL0->[$_]) ? $CL0->[$_][0] : $CL0->[$_]) and $KKK[$_] = $CL0->[$_] for 0, 1;
      0        
8067             # my @c = map { ($_ and ref $_) ? $_->[0] : $_ } @$CL0;
8068             # my @d = map { ($_ and ref $_) ? $_->[2] : {} } @$CL0; # dead
8069             # my @f = map $self->format_key($c[$_], $d[$_], ), 0 .. $#$CL0;
8070             # $extra = [@f];
8071 0   0     0 $extra = [map $self->format_key($_->[0], $_->[2], $Used->[$_->[1] || 0]), @KKK];
8072             }
8073 0         0 $seen_scan{$sc}++;
8074 0         0 $seen_VK{$k}++;
8075 0         0 ($sc, $k, $fill, <
8076             $capslock\t$keys\t// $expl$expl1
8077             EOP
8078             }
8079            
8080             sub output_unit0 ($$$$$$$;$$) {
8081 0 0   0 0 0 my(@i) = &output_unit00 or return;
8082 0   0     0 my @add = split '/', ($double_scan_VK{uc "$i[0] $i[1]"} || '');
8083             #warn "<<<<< Secondary key <$add> for <$i[0] $i[1]>" if $add;
8084 0         0 push @add_scan_VK, map [split(/ /, $_), @i[2,3]], grep $_, @add;
8085 0 0       0 my $add = ($i[4] ? "-1\t-1\t\t0\t" . join("\t", @{$i[4]}) . "\n" : '');
  0         0  
8086 0         0 "$i[0]\t$i[1]$i[2]\t$i[3]$add"
8087             }
8088            
8089             sub output_added_units ($) {
8090 0     0 0 0 my ($self, @i, @o, @dec) = shift;
8091 0         0 for my $i (@add_scan_VK) {
8092 0 0 0     0 next if $seen_scan{$i->[0]} or $seen_VK{$i->[1]}; # Cannot duplicate either one...
8093 0         0 push @i, $i;
8094             }
8095 0 0       0 if ($decimal[0]) {
8096             # @decimal = ([$self->output_unit_KK($k, $u, $sc, $Used, @KK)], [@KK]);
8097 0         0 my ($k, $u, $sc, $Used) = @{$decimal[0]};
  0         0  
8098 0         0 push @dec, [$self->output_unit_KK($k, $u, $sc, $Used, undef, @{$decimal[1]})];
  0         0  
8099             }
8100 0         0 for my $i (@i, @dec) {
8101 0 0       0 my $add = ($i->[4] ? "-1\t-1\t\t0\t" . join("\t", @{$i->[4]}) . "\n" : '');
  0         0  
8102 0         0 push @o, "$i->[0]\t$i->[1]$i->[2]\t$i->[3]$add";
8103             }
8104             @o
8105 0         0 }
8106            
8107             my $enc_UTF16LE;
8108             sub to_UTF16LE_units ($) {
8109 0     0 0 0 my $k = shift;
8110 0 0       0 unless ($k =~ /^[\x00-\x{FFFF}]*$/) {
8111 0 0       0 (require Encode), $enc_UTF16LE = Encode::find_encoding('UTF-16LE') unless $enc_UTF16LE;
8112 0 0       0 die "Can't arrange encoding to UTF-16LE" unless $enc_UTF16LE;
8113 0         0 $k = $enc_UTF16LE->encode($k);
8114             # warn join '> <', ($k =~ /(..)/sg); # Can't use decode() on surrogates...
8115             # warn join '> <', map {unpack 'v', $_} ($k =~ /(..)/sg); # Can't use decode() on surrogates...
8116 0         0 $k = join '', map chr(unpack 'v', $_), ($k =~ /(..)/sg); # Can't use decode() on surrogates...
8117             }
8118 0         0 $k;
8119             }
8120            
8121             sub output_ligatures ($) {
8122 0     0 0 0 my ($self, @o, %s) = shift;
8123 0         0 for my $l (@ligatures) {
8124 0 0       0 warn("Repeated LIGATURE $l->[0] $l->[1]"), next if $s{"$l->[0] $l->[1]"}++;
8125 0         0 my $k = to_UTF16LE_units $l->[2];
8126 0         0 my @k = ((map $self->key2hex($_), split //, $k), ('') x 4);
8127 0 0       0 my @expl = exists $self->{UNames} ? "// " . join " + ", map $self->UName($_), split //, $l->[2] : ();
8128 0 0       0 my $add = ((8 <= length $l->[0]) ? '' : "\t");
8129 0         0 push @o, (join "\t", "$l->[0]$add", $l->[1], @k[0..3], @expl) . "\n";
8130             }
8131             @o
8132 0         0 }
8133            
8134             sub base_unit ($$$$) {
8135 0     0 0 0 my ($self, $basesub, $u, $ingroup, $k) = (shift, shift, shift, shift);
8136 0 0       0 if (!$ingroup) {
8137 0         0 my @c = map $self->{layers}{$_}[$u][0], @$basesub;
8138 0         0 my($c) = grep defined, @c;
8139 0 0       0 my $c0 = $c = $c->[0] if 'ARRAY' eq ref $c;
8140 0 0       0 warn "base_u($u) undefined" unless defined $c;
8141 0 0       0 $c .= '#' if $seen{uc $c}++;
8142 0 0       0 $c = '#' if $c eq ' ';
8143 0         0 $c = uc $c;
8144 0         0 return [0, $c, $c0]
8145             } # Now do the VK groups
8146 0         0 for my $v (values %start_SEC) {
8147 0 0 0     0 $k = $v->[2]($self, $u, $v), last if $v->[0] <= $u and $v->[0] + $v->[1] > $u;
8148             }
8149 0         0 [1, $k]
8150             }
8151            
8152             sub output_unit ($$$$$$$$) { # $u is an ordinal of a key
8153 0     0 0 0 my ($self, $face, $layers, $u, $deadkeys, $Used, $canskip, $baseK, $k) = (shift, shift, shift, shift, shift, shift, shift, shift);
8154 0         0 my $U = [map $self->{layers}{$_}[$u], @$layers];
8155 0 0       0 defined ($k = $baseK->[$u]) or return;
8156 0         0 $self->output_unit0($face, $k, $U, $u, $deadkeys, $Used, undef, $canskip);
8157             }
8158             }
8159            
8160             sub output_layout_win ($$$$$$$) {
8161 0     0 0 0 my ($self, $face, $layers, $deadkeys, $Used, $cnt, $baseK) = (shift, shift, shift, shift, shift, shift, shift);
8162             # die "Count of non-VK entries mismatched: $cnt vs ", scalar @{$self->{layers}{$layers->[0]}}
8163             # unless $cnt <= scalar @{$self->{layers}{$layers->[0]}};
8164 0         0 map $self->output_unit($face, $layers, $_, $deadkeys, $Used, $_ >= $cnt, $baseK), 0..$#$baseK;
8165             }
8166            
8167             sub output_VK_win ($$$) {
8168 0     0 0 0 my ($self, $face, $Used, @O) = (shift, shift, shift);
8169 0         0 my $VK = $self->{faces}{$face}{'[VK_off]'};
8170 0         0 for my $k (keys %$VK) {
8171 0         0 my $off = $VK->{$k};
8172 0         0 my $scan = $self->{faces}{$face}{'[scancodes]'}{$k};
8173 0         0 push @O, $self->output_unit0($face, $k, undef, $off, [], $Used, $scan);
8174             # my ($self, $face, $k, $U, $N, $deadkeys, $Used, $known_scancode, $skippable) = (shift, shift, shift, shift, shift, shift, shift, shift, shift);
8175             }
8176             @O
8177 0         0 }
8178            
8179             sub read_deadkeys_win ($$) {
8180 0     0 0 0 my ($self, $t, $dead, $next, @p, %o) = (shift, shift, '', '');
8181            
8182 0         0 $t =~ s(\s*//.*)()g; # remove comments
8183 0         0 $t =~ s([^\S\n]+$)()gm; # remove trailing whitespace (including \r!)
8184             # deadkey lines, empty lines, HEX HEX keymap lines
8185 0 0       0 $t =~ s/(^(?=DEADKEY)(?:(?:(?:DEADKEY|\s*[0-9a-f]{4,})\s+[0-9a-f]{4,})?(?:\n|\Z))*)(?=(.*))/DEADKEYS\n\n/mi
8186             and ($dead, $next) = ($1, $2);
8187 0 0 0     0 warn "Unknown keyword follows deadkey descriptions in MSKLC map file: `$next'; dead=<$dead>"
8188             if length $next and not $next =~ /^(KEYNAME|LIGATURE|COPYRIGHT|COMPANY|LOCALENAME|LOCALEID|VERSION|SHIFTSTATE|LAYOUT|ATTRIBUTES|KEYNAME_EXT|KEYNAME_DEAD|DESCRIPTIONS|LANGUAGENAMES|ENDKBD)$/i;
8189             # $dead =~ /\S/ or warn "EMPTY DEADKEY section";
8190             #warn "got `$dead' from `$t'";
8191            
8192             # when a pattern has parens, split does not remove the leading empty fields (?!!!)
8193 0         0 (undef, my %d) = split /^DEADKEY\s+([0-9a-f]+)\s*\n/im, $dead;
8194 0         0 for my $d (keys %d) {
8195             #warn "split `$d' from `$d{$d}'";
8196 0         0 @p = split /\n+/, $d{$d};
8197 0         0 my @bad;
8198 0 0       0 die "unrecognized part in deadkey map for $d: `@bad'"
8199             if @bad = grep !/^\s*([0-9a-f]+)\s+([0-9a-f]+)$/i, @p;
8200 0         0 %{$o{lc $d}} = map /^\s*([0-9a-f]+)\s+([0-9a-f]+)/i, @p;
  0         0  
8201             }
8202            
8203             # empty lines, HEX "NAME" lines
8204 0 0       0 if ($t =~ s/^KEYNAME_DEAD\n((?:(?:\s*[0-9a-f]{4,}\s+".*")?(?:\n|\Z))*)(?=(.*))/KEYNAMES_DEAD\n\n/mi) {
    0          
8205 0         0 ($dead, $next) = ($1,$2);
8206 0 0 0     0 warn "Unknown keyword follows deadkey names descriptions in MSKLC map file: `$next'"
8207             if length $next and not $next =~ /^(DEADKEY|KEYNAME|LIGATURE|COPYRIGHT|COMPANY|LOCALENAME|LOCALEID|VERSION|SHIFTSTATE|LAYOUT|ATTRIBUTES|KEYNAME_EXT|KEYNAME_DEAD|DESCRIPTIONS|LANGUAGENAMES|ENDKBD)$/i;
8208 0 0       0 $dead =~ /\S/ or warn "EMPTY KEYNAME_DEAD section";
8209 0         0 %d = map /^([0-9a-f]+)\s+"(.*)"\s*$/i, split /\n\s*/, $dead;
8210 0         0 $d{lc $_} = $d{$_} for keys %d;
8211 0   0     0 $self->{'[seen_knames]'} ||= {};
8212 0         0 @{$self->{'[seen_knames]'}}{map {chr hex $_} keys %d} = values %d; # XXXX Overwrites older values
  0         0  
  0         0  
8213             } elsif ($dead =~ /\S/) {
8214 0         0 warn "no KEYNAME_DEAD section found" if 0;
8215             }
8216 0         0 \%o, \%d, $t; # %o - translation tables; %d - names; $t is what is left of input
8217             }
8218            
8219             sub massage_template ($$$) {
8220 0     0 0 0 my ($self, $t, $r, %seen, %miss) = (shift, shift, shift);
8221 0 0       0 my $keys = join '|', sort {length $b <=> length $a or $a cmp $b} keys %$r; # Prefer matching a longer key
  0         0  
8222 0         0 $t =~ s/($keys)/ # warn "Plugging in `$1'";
8223 0         0 $seen{$1}++, $r->{$1} /ge; # Can't use \b: see SORT_ORDER_ID_ LOCALE_ID
8224 0   0     0 $seen{$_} or $miss{$_}++ for keys %$r;
8225 0 0       0 warn "The following parts missing in the template: ", join ' ', sort keys %miss if %miss;
8226 0         0 $t
8227             }
8228            
8229             # http://msdn.microsoft.com/en-us/library/dd373763
8230             # http://msdn.microsoft.com/en-us/library/dd374060
8231             my $template_win = <<'EO_TEMPLATE';
8232             KBD DLLNAME "LAYOUTNAME"
8233            
8234             COPYRIGHT "(c) COPYR_YEARS COMPANYNAME"
8235            
8236             COMPANY "COMPANYNAME"
8237            
8238             LOCALENAME "LOCALE_NAME"
8239            
8240             LOCALEID "SORT_ORDER_ID_LOCALE_ID"
8241            
8242             VERSION 1.0
8243            
8244             SHIFTSTATE
8245            
8246             BITS_TEMPLATE
8247             ATTRIBS
8248             LAYOUT ;an extra '@' at the end is a dead key
8249            
8250             //SC VK_ Cap COL_HEADERS
8251             //-- ---- ---- COL_EXPL
8252             LAYOUT_KEYS
8253             DO_LIGA
8254             DEADKEYS
8255            
8256             KEYNAME
8257            
8258             01 Esc
8259             0e Backspace
8260             0f Tab
8261             1c Enter
8262             1d Ctrl
8263             2a Shift
8264             36 "Right Shift"
8265             37 "Num *"
8266             38 Alt
8267             39 Space
8268             3a "Caps Lock"
8269             3b F1
8270             3c F2
8271             3d F3
8272             3e F4
8273             3f F5
8274             40 F6
8275             41 F7
8276             42 F8
8277             43 F9
8278             44 F10
8279             45 Pause
8280             46 "Scroll Lock"
8281             47 "Num 7"
8282             48 "Num 8"
8283             49 "Num 9"
8284             4a "Num -"
8285             4b "Num 4"
8286             4c "Num 5"
8287             4d "Num 6"
8288             4e "Num +"
8289             4f "Num 1"
8290             50 "Num 2"
8291             51 "Num 3"
8292             52 "Num 0"
8293             53 "Num Del"
8294             54 "Sys Req"
8295             57 F11
8296             58 F12
8297             5C AX
8298             70 KANA
8299             73 "ABNT C1"
8300             79 CONVERT
8301             7c F13
8302             7d F14
8303             7e F15
8304             7f F16
8305             80 F17
8306             81 F18
8307             82 F19
8308             83 F20
8309             84 F21
8310             85 F22
8311             86 F23
8312             87 F24
8313            
8314             KEYNAME_EXT
8315            
8316             1c "Num Enter"
8317             1d "Right Ctrl"
8318             35 "Num /"
8319             37 "Prnt Scrn"
8320             38 "Right Alt"
8321             45 "Num Lock"
8322             46 Break
8323             47 Home
8324             48 Up
8325             49 "Page Up"
8326             4b Left
8327             4d Right
8328             4f End
8329             50 Down
8330             51 "Page Down"
8331             52 Insert
8332             53 Delete
8333             54 <00>
8334             56 Help
8335             5b "Left Windows"
8336             5c "Right Windows"
8337             5d Application
8338            
8339             KEYNAMES_DEAD
8340            
8341             DESCRIPTIONS
8342            
8343             LOCALE_ID LAYOUTNAME
8344            
8345             LANGUAGENAMES
8346            
8347             LOCALE_ID LANGUAGE_NAME
8348            
8349             ENDKBD
8350            
8351             EO_TEMPLATE
8352             # "
8353            
8354             my $template_osx = <<'EO_TEMPLATE';
8355            
8356            
8357            
8358            
8359            
8360            
8361            
8362            
8363            
8364            
8365            
8366            
8367            
8368            
8369            
8370            
8371            
8372            
8373            
8374            
8375            
8376            
8377            
8378            
8379            
8380            
8381            
8382            
8383            
8384            
8385            
8386            
8387            
8388            
8389            
8390            
8391            
8392            
8393            
8394            
8395            
8396            
8397            
8398            
8399            
8400            
8401            
8402            
8403            
8404            
8405             OSX_KEYMAP_0_AND_COMMAND
8406            
8407            
8408            
8409             OSX_KEYMAP_SHIFT
8410            
8411            
8412            
8413             OSX_KEYMAP_CAPS
8414            
8415            
8416            
8417             OSX_KEYMAP_OPTION
8418            
8419            
8420            
8421             OSX_KEYMAP_OPTION_SHIFT
8422            
8423            
8424            
8425             OSX_KEYMAP_OPTION_CAPS
8426            
8427            
8428            
8429             OSX_KEYMAP_OPTION_COMMAND
8430            
8431            
8432            
8433             OSX_KEYMAP_CTRL
8434            
8435            
8436            
8437             OSX_KEYMAP_COMMAND
8438            
8439            
8440            
8441            
8442             OSX_ACTIONS_BASE
8443            
8444             OSX_ACTIONS
8445            
8446            
8447            
8448             OSX_TERMINATORS_BASE
8449            
8452             OSX_TERMINATORS2
8453            
8454            
8455             EO_TEMPLATE
8456             # "
8457            
8458             sub KEY2hex ($$) {
8459 0     0 0 0 my ($self, $k) = (shift, shift);
8460 0 0       0 return $self->key2hex($k) unless 'ARRAY' eq ref $k;
8461             #warn "see a deadkey `@$k'";
8462 0         0 $k = [@$k]; # deeper copy
8463 0         0 $k->[0] = $self->key2hex($k->[0]);
8464 0         0 $k;
8465             }
8466            
8467             sub linked_faces_2_hex_map ($$$$) {
8468 0     0 0 0 my ($self, $name, $b, $inv) = (shift, shift, shift, shift);
8469 0         0 my $L = $self->{faces}{$name};
8470 0 0       0 my $remap = $L->{$inv ? 'Face_link_map_INV' : 'Face_link_map'}{$b};
8471 0         0 die "Face `$b' not linked to face `$name'; HAVE: <", join('> <', keys %{$L->{Face_link_map}}), '>'
8472 0 0 0     0 if $self->{faces}{$b} != $L and not $remap;
8473             ### my $cover = $L->{'[coverage_hex]'} or die "Face $name not preprocessed";
8474             # warn "Keys of the Map `$name' -> '$b': <", join('> <', keys %$remap), '>';
8475             # $remap ||= {map +(chr hex $_, chr hex $remap->{$_}), keys %$cover}; # This one in terms of chars, not hex
8476 0         0 my @k = keys %$remap;
8477             # warn "Map `$name' -> '$b': <", join('> <', map +($self->key2hex($_), $self->key2hex($remap->{$_})), @k), '>';
8478 0 0       0 return { map +($self->key2hex($_), (defined $remap->{$_} ? $self->KEY2hex($remap->{$_}) : undef)), @k }
8479             }
8480            
8481             my $dead_descr;
8482             #my %control = split / /, "\n \\n \r \\r \t \\t \b \\b \cC \\x03 \x7f \\x7f \x1b \\x1b \x1c \\x1c \x1d \\x1d";
8483             my %control = split / /, "\n \\n \r \\r \t \\t \b \\b";
8484             $control{$_->[0]} ||= $_->[1] for map [chr($_), '^'.chr(0x40+$_)], 1..26;
8485             sub control2prt ($$) {
8486 0     0 0 0 my($self, $c) = (shift, shift);
8487 0 0 0     0 return $c unless ord $c < 0x20 or ord $c == 0x7f;
8488 0 0       0 $control{$c} or sprintf '\\x%02x', ord $c;
8489             }
8490            
8491             sub dead_with_inversion ($$$$$) {
8492 0     0 0 0 my($self, $is_hex, $to, $nameF, $H) = (shift, shift, shift, shift, shift);
8493 0   0     0 my $invert_dead = (3 == ($to->[2] || 0) or 3 == (($to->[2] || 0) >> 3));
8494 0         0 $to = $to->[0];
8495 0 0       0 if ($invert_dead) {
8496 0 0       0 $to = $self->key2hex($to) unless $is_hex;
8497 0 0       0 defined ($to = $H->{'[deadkeyInvAltGrKey]'}{$to}) or die "Cannot invert prefix key `$to' in face `$nameF'";
8498             # warn "invert $to in face=$nameF, inv=$invertAlt0 --> $inv\n";
8499 0 0       0 $to = $self->key2hex($to) if $is_hex;
8500             }
8501 0         0 $to;
8502             }
8503            
8504             sub output_deadkeys ($$$$$$;$) {
8505 0     0 0 0 my ($self, $nameF, $d, $Dead2, $flip_AltGr_hex, $prefix_flippedMap_hex, $OUT_Apple) = (shift, shift, shift, shift, shift, shift, shift);
8506 0         0 my $H = $self->{faces}{$nameF};
8507             # warn "emit `$nameF' d=`$d' f=$H->{'[deadkeyFace]'}{$d}";
8508             # if (my $unres = $H->{'[unresolved_imported]'}) {
8509             # warn "Can't resolve `@$unres' to an imported dead key; face=`$nameF'" unless $H->{'[unresolved_imported_warned]'}++;
8510             # }
8511             #warn "See dead2 in <$nameF> for <$d>" if $dead2;
8512 0   0     0 my $dead2 = ($Dead2 || {})->{$self->charhex2key($d)} || {};
8513 0 0       0 my(@sp, %sp) = map {(my $in = $_) =~ s/(?<=.)\@$//s; $in} @{ ($self->get_VK($nameF))->{SPACE} || [] };
  0         0  
  0         0  
  0         0  
8514 0         0 @sp = map $self->charhex2key($_), @sp;
8515 0         0 @sp{@sp[1..$#sp]} = (0..$#sp); # The leading elt is the scancode
8516            
8517 0         0 my @maps = map $H->{"[deadkeyFaceHexMap$_]"}{$d}, '', 'Inv';
8518 0 0       0 pop @maps unless defined $maps[-1];
8519 0         0 my($D, @DD) = ($d, $d, $prefix_flippedMap_hex);
8520 0         0 my ($OUT, $keys) = '';
8521             # There are 3 situations:
8522             # 0) process one map without AltGr-inversion; 1) Process one map which is the AltGr-inversion of the principal one;
8523             # 2) process one map with AltGr-inversion (in 1-2 the inversion may have a customization put over it).
8524             # The problem is to recognize when deadkeys in the inversion come from non-inverted one, or from customization
8525             # And, in case (1), we must consider flip_AltGr specially... (the case (2) is now treated during face preparation)
8526 0   0     0 my($is_invAltGr_Base_with_chain, $AMap, $default) = ($D eq ($flip_AltGr_hex || 'n/a') and $H->{'[have_AltGr_chain]'});
8527 0         0 $default = $self->default_char($nameF);
8528 0 0       0 $default = $self->key2hex($default) if defined $default;
8529 0 0 0     0 if ($#maps or $is_invAltGr_Base_with_chain) { # One of the maps we will process is AltGr-inverted; calculate AltGr-inversion
8530 0         0 $self->faces_link_via_backlinks($nameF, $nameF, 'no_ini'); # Create AltGr-invert self-mapping
8531 0         0 $AMap = $self->linked_faces_2_hex_map($nameF, $nameF, 1);
8532             #warn "deadkey=$D flip=$flip_AltGr_hex" if defined $default;;
8533             }
8534 0         0 my($docs, $map_AltGr_over, $over_dead2) = ($H->{'[prefixDocs]'}{$D}, {}, {});
8535 0 0       0 if ($is_invAltGr_Base_with_chain) {
8536 0 0       0 if (my $override_InvAltGr = $H->{'[InvAltGrFace]'}{''}) { # NOW: needed only for invAltGr
8537 0         0 $map_AltGr_over = $self->linked_faces_2_hex_map($nameF, $override_InvAltGr);
8538             }
8539 0 0 0     0 $over_dead2 = $Dead2->{$self->charhex2key($flip_AltGr_hex)} || {} if defined $flip_AltGr_hex; # used in CyrPhonetic v0.04
8540 0         0 $dead2 = { %{ $H->{'[DEAD]'} }, %{ $H->{'[dead_in_VK]'} } };
  0         0  
  0         0  
8541             # $docs ||= 'AltGr-inverted base face';
8542             }
8543 0         0 my @enhMap = ({}, {});
8544 0 0 0     0 if (!$OUT_Apple and ($D ne ($flip_AltGr_hex || 'n/a')) and defined( my $extra = $H->{'[Prefix_Base_Altern]'} )) {{
      0        
      0        
8545 0         0 $self->export_layers($extra, $nameF); # Process recipes
  0         0  
8546 0 0       0 my $dF = $self->{faces}{$nameF}{'[deadkeyFace]'}{$d} or last; # may be Compose etc
8547             # or warn "d=$d; <", join(' ', keys %{$self->{faces}{$nameF}{'[deadkeyFace]'}}), '>';
8548 0         0 $self->face_make_backlinks($extra, undef, undef, 'skipfix'); # no prefer-1st/last; without skipfix errors on 2nd call
8549 0         0 $self->faces_link_via_backlinks($extra, $dF);
8550 0         0 @enhMap = map $self->linked_faces_2_hex_map($extra, $dF, $_), 0, 1;
8551             }}
8552            
8553             # warn "output map for `$D' invert=", !!$is_invAltGr_Base_with_chain, ' <',join('> <', sort keys %$dead2),'>';
8554 0         0 for my $invertAlt0 (0..$#maps) {
8555 0   0     0 my $invertAlt = $invertAlt0 || $is_invAltGr_Base_with_chain;
8556 0         0 my $map = $maps[$invertAlt0];
8557 0         0 $d = $DD[$invertAlt0];
8558 0         0 my $enhMap = $enhMap[$invertAlt0];
8559 0         0 $map = {%$enhMap, %$map};
8560 0 0       0 my $docs1 = (defined $docs ? sprintf("\t// %s%s", ($invertAlt0 ? 'AltGr inverted: ' : ''), $docs) : '');
    0          
8561 0         0 $OUT .= "DEADKEY\t$d$docs1\n\n";
8562 0         0 my $OUT_Apple_map = $d;
8563             # Good order: first alphanum, then punctuation, then space
8564 0         0 my @keys = sort keys %$map; # Sorting not OK for 6-byte keys - but can't have them on Win
8565             @keys = (grep(( lc(chr hex $_) ne uc(chr hex $_)and not $sp{chr hex $_} ), @keys),
8566             grep(((lc(chr hex $_) eq uc chr hex $_ and (chr hex $_) !~ /\p{Blank}/) and not $sp{chr hex $_}), @keys),
8567 0   0     0 grep((((lc(chr hex $_) eq uc chr hex $_ and (chr hex $_) =~ /\p{Blank}/) or $sp{chr hex $_}) and $_ ne '0020'), @keys),
      0        
      0        
8568             grep( $_ eq '0020', @keys)); # make SPACE last
8569 0         0 for my $n (@keys) { # Not OK for 6-byte keys (impossible on Win)
8570             # warn "doing $n\n";
8571 0         0 my ($to, $import_dead, $EXPL) = $map->{$n};
8572 0 0 0     0 if ($to and 'ARRAY' eq ref $to) {
8573 0         0 $EXPL = $to->[3];
8574 0 0       0 $EXPL =~ s/(?=\p{NonspacingMark})/ /g if $EXPL;
8575 0   0     0 $import_dead = (1 <= ($to->[2] || 0)); # was: exportable; now: any dead
8576 0         0 $to = $self->dead_with_inversion('hex', $to, $nameF, $H);
8577             }
8578 0 0 0     0 warn "0000: face `$nameF' d=`$d': $n --> $to" if $to and $to eq '0000';
8579 0         0 my $map_n = $map->{$n};
8580 0 0 0     0 $map_n = $map_n->[0] if $map_n and ref $map_n;
8581 0 0 0     0 $H->{'[32-bit]'}{chr hex $map_n}++, next if hex $n > 0xFFFF and $map_n; # Cannot be put in a map...
8582 0 0 0     0 if ($to and hex $to > 0xFFFF) { # Value cannot be put in a map...
8583             # warn "32-bit: n=$n map{n}=$map_n to=$to";
8584 0         0 $H->{'[32-bit]'}{chr hex $map_n}++;
8585 0 0       0 next unless defined ($to = $H->{'[DeadChar_32bitTranslation]'});
8586 0         0 $to =~ s/^\s+//; $to =~ s/\s+$//;
  0         0  
8587 0         0 $to = $self->key2hex($to);
8588             }
8589 0         0 my $was_to = $to;
8590 0 0 0     0 $to ||= $default or next;
8591             # Tricky: dead keys may come from the override map (which is indexed by NOT-INVERTED KEYS!); it is already merged into
8592             # the map - unless for inverted base face
8593             my ($alt_n, $use_dead2) = (($is_invAltGr_Base_with_chain and defined $map_AltGr_over->{$n})
8594             ? ($n, $over_dead2)
8595 0 0 0     0 : (($invertAlt ? $AMap->{$n} : $n), $dead2));
    0          
8596 0 0 0     0 $alt_n = $alt_n->[0] if $alt_n and ref $alt_n; # AMap may have "complex" values
8597             #warn "$D --> $d, `$n', `$alt_n', `$AMap->{$n}'; `$map_AltGr_over->{$n}' i=$invertAlt i0=$invertAlt0 d=$use_dead2->{chr hex $alt_n}";
8598             #warn "... n=`$n', alt=`$alt_n' Amap=`$AMap->{$n}'\n" if $AMap->{$n};
8599 0 0 0     0 my $DEAD = ( (defined $alt_n and $use_dead2->{chr hex $alt_n}) ? '@' : '' );
8600             #warn "AltGr flip: $nameF:$D: $n --> $H->{'[dead2_AltGr_chain]'}{$D}" if $n eq ($flip_AltGr_hex || 'n/a');
8601 0         0 my $from = $self->control2prt(chr hex $n);
8602             # This is now done inside the map:
8603 0         0 if (0 and (hex $n) == hex ($flip_AltGr_hex || 'ffffff') and @maps == 2 and !$invertAlt) {
8604             if (defined $was_to or $DEAD) {
8605             warn "AltGr_Flip key=", hex $n, " overwrites '$was_to', DEAD=", $DEAD||$import_dead||0, " on face=$nameF\[$d]";
8606             }
8607             ($DEAD, $to) = ('@', $DD[1]); # Join Inv to not-Inv on $flip_AltGr_hex; Do not overwrite existing binding... Warn???
8608             }
8609 0 0 0     0 $to = $default
      0        
      0        
      0        
      0        
      0        
8610             if !($DEAD or $import_dead)
8611             and defined $default and (0x7f == hex $to or 0x20 > hex $to) and (0x7f == hex $n or 0x20 > hex $n);
8612 0 0 0     0 if (($DEAD or $import_dead) and $d eq $to) {
      0        
8613 0 0 0     0 if (($flip_AltGr_hex or 'n/a') eq $d) { # This is what routinely happens in Flip_AltGr face
8614 0         0 $import_dead = $DEAD = '';
8615 0   0     0 $to = $H->{'[DeadChar_32bitTranslation]'} || '003f'; # ? = U+003f
8616 0         0 $to =~ s/^\s+//; $to =~ s/\s+$//;
  0         0  
8617 0         0 $to = $self->key2hex($to);
8618 0         0 $EXPL = 'removal of immediate deadkey loop';
8619             } else {
8620 0         0 warn "Immediate deadkey loop: face `$nameF' d=`$d': $n --> $to";
8621             }
8622             }
8623 0 0       0 my $expl = exists $self->{UNames} ? "\t// " . join "\t-> ", # map $self->UName($_),
8624             # chr hex $n, chr hex $map->{$n} : '';
8625             $self->UName(chr hex $n), $self->UName(chr hex $to, 'verbose', 'vbell') : '';
8626 0 0 0     0 $expl .= " (via $EXPL)" if $expl and $EXPL;
8627 0         0 my $to1 = $self->control2prt(chr hex $to);
8628             # warn "Both import_dead and DEAD properties hold for `$from' --> '$to1' via deadkey $d face=$nameF" if $DEAD and $import_dead;
8629 0 0       0 $DEAD = '@' if $import_dead;
8630 0         0 $OUT .= sprintf "%s\t%s%s\t// %s -> %s%s\n", $n, $to, $DEAD, $from, $to1, $expl;
8631 0 0 0     0 $OUT_Apple->{$n}{$OUT_Apple_map} = [$to, undef, $DEAD && 1] if $OUT_Apple and 0x20 <= hex $n and 0x7f != hex $n;
      0        
      0        
8632             }
8633 0         0 $OUT .= "\n";
8634 0   0     0 $keys ||= @keys;
8635             }
8636 0 0       0 warn "DEADKEY $d for face `$nameF' empty" unless $keys;
8637 0         0 (!!$keys, $OUT, $OUT_Apple)
8638             }
8639            
8640             sub massage_diacritics ($) { # "
8641 0     0 0 0 my ($self) = (shift);
8642 0         0 my %char2dia;
8643 0         0 for my $dia (sort keys %{$self->{Diacritics}}) { # Make order deterministic
  0         0  
8644 0         0 my @v = map { (my $v = $_) =~ s/\p{Blank}//g; $v } @{ $self->{Diacritics}{$dia} };
  0         0  
  0         0  
  0         0  
8645             # $self->{'[map2diac]'}{$_} = $dia for split //, join '', @v; # XXXX No check for duplicates???
8646 0         0 my @vv = map [ split // ], @v;
8647 0         0 for my $cc ( [ map @$_, @vv[0..3] ], [ map @$_, @vv[4..$#v] ] ) { # modifiers, combining
8648 0         0 $char2dia{$cc->[$_]}{$_} = $dia for 0..$#$cc; # XXXX No check for duplicates at the same distance???
8649             }
8650 0         0 $self->{'[diacritics]'}{$dia} = \@vv;
8651             }
8652 0         0 for my $c (keys %char2dia) {
8653 0         0 my @pos = sort {$a <=> $b} keys %{ $char2dia{$c} };
  0         0  
  0         0  
8654             # warn("map2diac( $c ): @pos; ", join '; ', values %{ $char2dia{$c} });
8655 0         0 $self->{'[map2diac]'}{$c} = $char2dia{$c}{$pos[0]}; # prefer the earliest possible occurence
8656             }
8657             }
8658            
8659             sub extract_diacritic ($$$$$$@) {
8660 0     0 0 0 my ($self, $dia, $idx, $which, $need, $skip2, @elt0) = (shift, shift, shift, shift, shift, shift);
8661 0         0 my @v = map @$_, my $elt0 = shift; # first one full
8662 0 0       0 push @v, map @$_[($skip2 ? 2 : 0)..$#$_], @_; # join the rest, omitting the first 2 (assumed: accessible in other ways)
8663 0 0 0     0 @elt0 = $elt0 if $skip2 and $skip2 eq 'skip2-include0';
8664 0 0       0 push @v, grep defined, map @$_[0..1], @elt0, @_ if $skip2;
8665             # @v = grep +((ord $_) >= 128 and $_ ne $dia), @v;
8666 0         0 @v = grep +(ord $_) >= 0x80, @v;
8667 0 0 0     0 die "diacritic ` $dia ' has no $which no.$idx (0-based) assigned"
8668             unless $idx >= $need or defined $v[$idx];
8669             # warn "Translating for dia=<$dia>: idx=$idx <$which> -> <$v[$idx]> of <@v>" if defined $v[$idx];
8670 0         0 return $v[$idx];
8671             }
8672            
8673             sub diacritic2self ($$$$$$$$$) {
8674 0     0 0 0 my ($self, $dia, $c, $face, $N, $space, $c_base, $c_noalt, $seen_before) = (shift, shift, shift, shift, shift, shift, shift, shift, shift);
8675             # warn("Translating for dia=<$dia>: got undef"),
8676 0 0       0 return $c unless defined $c;
8677             # $c = $c->[0] if 'ARRAY' eq ref $c; # Prefix keys behave as usual keys
8678             # return undef if
8679 0   0     0 my $prefix = (ref $c and $c->[2]); # Ignore deadkeys (unless we act on $c_base or $c_noalt - UNIMPLEMENTED);
8680 0   0     0 $_ and 'ARRAY' eq ref $_ and $_ = $_->[0] for $c, $c_base, $c_noalt; # Prefix keys behave as usual keys
      0        
8681             #warn " Translating for dia=<$dia>: got <$c>";
8682 0 0       0 die "` $dia ' not a known diacritic" unless my $name = $self->{'[map2diac]'}{$dia};
8683 0 0       0 my $v = $self->{'[diacritics]'}{$name} or die "Panic!";
8684 0 0       0 my ($first) = grep 0x80 <= ord, @{$v->[0]} or die "diacritic ` $dia ' does not define any non-7bit modifier";
  0         0  
8685 0 0       0 return $first if $c eq ' ';
8686 0         0 my $spaces = keys %$space;
8687 0         0 my $flip_AltGr = $self->{faces}{$face}{'[Flip_AltGr_Key]'};
8688 0 0       0 $flip_AltGr = $self->charhex2key($flip_AltGr) if defined $flip_AltGr;
8689 0 0       0 $flip_AltGr = 'n/a' unless defined $flip_AltGr;
8690 0   0     0 my $is_flip_AltGr = (defined $flip_AltGr and $prefix and $c eq $flip_AltGr);
8691 0 0 0     0 if ($c eq $dia and $prefix) {
8692             #warn "Translating2combining dia=<$dia>: got <$c> --> <$v->[4][0]>";
8693             # This happens with caron which reaches breve as the first:
8694             # warn "The diacritic ` $dia ' differs from the first non-7bit entry ` $first ' in its list" unless $dia eq $first;
8695 0 0       0 die "diacritic ` $dia ' has no default combining char assigned" unless defined $v->[4][0];
8696 0         0 return $v->[4][0];
8697             }
8698 0   0     0 my $limits = $self->{Diacritics_Limits}{ALL} || [(0) x 7];
8699 0 0 0     0 if ($space->{$c}) { # SPACE is handled above (we assume it is on index 0)...
    0 0        
    0 0        
8700             # ~ and ^ have only 3 spacing variants; one of them must be on ' ' - and we omit the first 2 of non-principal block...
8701 0         0 return $self->extract_diacritic($dia, $space->{$c}, 'spacing variant', $limits->[0], 'skip2', @$v[0..3]);
8702             } elsif (0 <= (my $off = index "\r\t\n\x1b\x1d\x1c\b\x7f\x1e\x1f\x00", $c)
8703             and not $prefix) { # Enter, Tab, C-Enter, C-[, C-], C-\, Bspc, C-Bspc, C-^, C-_, C-@
8704             # ~ and ^ have only 3 spacing variants; one of them must be on ' '
8705 0         0 return $self->extract_diacritic($dia, $spaces + $off, 'spacing variant', $limits->[0], 'skip2', @$v[0..3]);
8706             } elsif (!$spaces and $c =~ /^\p{Blank}$/ and not $prefix) { # NBSP and, (eg) Thin space 2007 -> second/third modifier
8707             # ~ and ^ have only 3 spacing variants; one of them must be on ' '
8708 0         0 my @pre = grep /^\p{Blank}$/, keys %$seen_before; # no prefix keys in $seen_before
8709 0 0       0 push @pre, 'something' unless $seen_before->{' '}; # there is no sense to address slot number 0
8710 0         0 return $self->extract_diacritic($dia, scalar @pre, 'spacing variant', $limits->[0], 'skip2', @$v[0..3]);
8711             }
8712 0 0 0     0 if ($c eq "|" or $c eq "\\" and not $prefix) {
      0        
8713             #warn "Translating2vertical dia=<$dia>: got <$c> --> <$v->[4][0]>"; # Skip2 would hurt, since macron+\ is defined:
8714 0         0 return $self->extract_diacritic($dia, ($c eq "|"), 'vertical+etc spacing variant', $limits->[2], !'skip2', @$v[2..3]);
8715             }
8716 0 0 0     0 if ($N == 1 and $c_noalt and ($c_noalt eq "|" or $c_noalt eq "\\")) {
      0        
      0        
8717             #warn "Translating2vertical dia=<$dia>: got <$c> --> <$v->[4][0]>"; # Skip2 would hurt, since macron+\ is defined:
8718 0         0 return $self->extract_diacritic($dia, ($c_noalt eq "|"), 'vertical+dotlike combining', $limits->[6], 'skip2', @$v[6,7,4,5]);
8719             }
8720 0 0 0     0 if ($c eq "/" or $c eq "?" and not $prefix) {
      0        
8721 0         0 return $self->extract_diacritic($dia, ($c eq "?"), 'prime-like+etc spacing variant', $limits->[3], 'skip2', @$v[3]);
8722             }
8723 0 0 0     0 if ($c_noalt and ($c_noalt eq "'" or $c_noalt eq '"')) {
      0        
8724 0         0 return $self->extract_diacritic($dia, 1 + ($c_noalt eq '"') + 2*$N, 'combining', $limits->[4], 'skip2', @$v[4..7]); # 1 for double-prefix
8725             }
8726 0 0 0     0 if ($c eq "_" or $c eq "-" and not $prefix) {
      0        
8727 0         0 return $self->extract_diacritic($dia, ($c eq "_"), 'lowered+etc spacing variant', $limits->[1], 'skip2', @$v[1..3]);
8728             }
8729 0 0 0     0 if ($N == 1 and $c_noalt and ($c_noalt eq "_" or $c_noalt eq "-")) {
      0        
      0        
8730 0         0 return $self->extract_diacritic($dia, ($c_noalt eq "_"), 'lowered combining', $limits->[5], 'skip2', @$v[5..7,4]);
8731             }
8732 0 0 0     0 if ($N == 1 and $c_noalt and ($c_noalt eq ";" or $c_noalt eq ":")) {
      0        
      0        
8733 0         0 return $self->extract_diacritic($dia, ($c_noalt eq ":"), 'combining for symbols', $limits->[7], 'skip2', @$v[7,4..6]);
8734             }
8735 0 0 0     0 if ($N == 1 and defined $c_base and 0 <= (my $ind = index "`1234567890=[],.'", $c_base)) {
      0        
8736 0         0 return $self->extract_diacritic($dia, 2 + $ind, 'combining', $limits->[4], 'skip2-include0', @$v[4..7]); # -1 for `, 1+2 for double-prefix and AltGr-/?
8737             }
8738 0 0 0     0 if ($N == 0 and 0 <= (my $ind = index "[{]}", $c) and not $prefix) {
      0        
8739 0         0 return $self->extract_diacritic($dia, 2 + $ind, 'combining for symbols', $limits->[7], 'skip2-include0', @$v[7,4..6]);
8740             }
8741 0 0 0     0 if ($N == 1 and $c_noalt and ($c_noalt eq "/" or $c_noalt eq "?")) {
      0        
      0        
8742 0         0 return $self->extract_diacritic($dia, 6 + ($c_noalt eq "?"), 'combining for symbols', $limits->[7], 'skip2-include0', @$v[7,4..6]);
8743             }
8744 0         0 return undef;
8745             }
8746            
8747             sub diacritic2self_2 ($$$$$$) { # Takes a key: array of arrays [lc,uc]
8748 0     0 0 0 my ($self, $dia, $c, $face, $space, @out, %seen) = (shift, shift, shift, shift, shift);
8749 0         0 my $c0 = $c->[0][0]; # Base character
8750 0         0 for my $N (0..$#$c) {
8751 0         0 my($c1, @res) = $c->[$N];
8752 0         0 for my $shift (0..$#$c1) {
8753 0         0 my($c2, $pref) = $c1->[$shift];
8754 0         0 push @res, $self->diacritic2self($dia, $c2, $face, $N, $space, $c0, $c->[0][$shift], \%seen);
8755 0 0       0 $pref = $c2->[2], $c2 = $c2->[0] if ref $c2;
8756 0 0 0     0 $seen{$c2}++ if defined $c2 and not $pref;
8757             }
8758 0         0 push @out, \@res;
8759             }
8760             @out
8761 0         0 }
8762            
8763             # Combining stuff:
8764             # perl -C31 -MUnicode::UCD=charinfo -le 'sub n($) {(charinfo(ord shift) || {})->{name}} for (0x20..0x10ffff) {next unless (my $c = chr) =~ /\p{NonspacingMark}/; (my $n = n($c)) =~ /^COMBINING\b/ or next; printf qq(%04x\t%s\t%s\n), $_, $c, $n}' >cc
8765             # perl -C31 -MUnicode::UCD=charinfo -le 'sub n($) {(charinfo(ord shift) || {})->{name}} for (0x20..0x10ffff) {next unless (my $c = chr) =~ /\p{NonspacingMark}/; (my $n = n($c)) =~ /^COMBINING\b/ and next; printf qq(%04x\t%s\t%s\n), $_, $c, $n}' >cc
8766            
8767             sub cache_dialist ($@) { # downstream, it is crucial that a case pair comes from "one conversion"
8768 0     0 0 0 my ($self, %seen, %caseseen, @out) = (shift);
8769 0         0 warn("caching dia: [@_]") if warnCACHECOMP;
8770 0         0 for my $d (@_) {
8771 0 0       0 next unless my $h = $self->{Compositions}{$d};
8772 0         0 $seen{$_}++ for keys %$h;
8773             }
8774 0         0 for my $c (keys %seen) {
8775 0 0       0 next if $caseseen{$c};
8776             # uc may include a wrong guy: uc(ſ) is S, and this may break the pair s/S if ſ comes before s, and S gets a separate binding;
8777             # so be very conservative with which case pair we include...
8778 0 0 0     0 my @case = grep { $_ ne $c and $seen{$_} and lc $_ eq lc $c } lc $c, uc $c or next;
  0 0       0  
8779 0         0 push @case, $c;
8780 0         0 $caseseen{$_} = \@case, delete $seen{$_} for @case;
8781             } # Currently (?), downstream does not distinguish case pairs from Shift-pairs...
8782 0         0 for my $cases ( values %caseseen, map [$_], keys %seen ) { # To avoid pairing symbols, keep them in separate slots too
8783 0         0 my (@dia, $to);
8784 0         0 for my $dia (@_) {
8785 0 0       0 push @dia, $dia if grep $self->{Compositions}{$dia}{$_}, @$cases;
8786             }
8787 0         0 for my $diaN (0..$#dia) {
8788             $to = $self->{Compositions}{$dia[$diaN]}{$_} and
8789             (warnCACHECOMP and warn("cache dia; c=`$_' of `@$cases'; dia=[$dia[$diaN]]")),
8790 0   0     0 $out[$diaN]{$_} = $to for @$cases;
8791             }
8792             }
8793             #warn("caching dia --> ", scalar @out);
8794             @out
8795 0         0 }
8796            
8797             my %cached_aggregate_Compositions;
8798             sub dia2list ($$) {
8799 0     0 0 0 my ($self, $dia, @dia) = (shift, shift);
8800             #warn "Split dia `$dia'";
8801 0 0       0 if ((my ($pre, $mid, $post) = split /(\+|--)/, $dia, 2) > 1) { # $mid is not counted in that "2"
8802 0         0 for my $p ($self->dia2list($pre)) {
8803 0         0 push @dia, map "$p$mid$_", $self->dia2list($post);
8804             }
8805             # warn "Split dia to `@dia'";
8806 0         0 return @dia;
8807             }
8808 0 0       0 return $dia if $dia =~ /^!?\\/; # (De)Penalization lists
8809 0         0 $dia = $self->charhex2key($dia);
8810 0 0       0 unless ($dia =~ /^-?(\p{NonspacingMark}|<(?:font=)?[-\w!]+>|(maybe_)?[ul]c(first)?|dectrl)$/) {
8811 0 0       0 die "` $dia ' not a known diacritic" unless my $name = $self->{'[map2diac]'}{$dia};
8812 0 0       0 my $v = $self->{'[diacritics]'}{$name} or die "A spacing character <$dia> was requested to be treated as a composition one, but we do not know translation";
8813 0 0       0 die "Panic!" unless defined ($dia = $v->[4][0]);
8814             }
8815 0 0       0 if ($dia =~ /^(-)?<(reverse-)?any(1)?-(other-)?\b([-\w]+?)\b((?:-![-\w]+\b)*)>$/) {
8816 0   0     0 my($neg, $rev, $one, $other, $match, $rx, $except, @except)
8817             = ($1||'', $2, $3, $4, $5, "(?:(?
8818 0         0 my $cached;
8819 0         0 (my $dia_raw = $dia) =~ s/^-//;
8820 0 0       0 $cached = $cached_aggregate_Compositions{$dia_raw} and return map "$neg$_", @$cached;
8821            
8822 0         0 @except = map { s/^(?=\w)/\\b/; s/(?<=\w)$/\\b/; $_} @except;
  0         0  
  0         0  
  0         0  
8823 0 0       0 $except = join('|', @except[1..$#except]), $except = qr($except) if @except;
8824             #warn "Exceptions: $except" if @except;
8825 0         0 $rx =~ s/-/\\b\\W+\\b/g;
8826 0         0 my ($A, $B, $AA, $BB);
8827 0         0 my @out = keys %{$self->{Compositions}};
  0         0  
8828 0         0 @out = grep !/^Cached\d+=
8829 0 0 0     0 @out = grep {length > 1 ? /$rx/ : (lc $self->UName($_) || '') =~ /$rx/ } @out;
  0         0  
8830 0 0 0     0 @out = grep {length > 1 ? !/$except/ : (lc $self->UName($_) || '') !~ /$except/ } @out;
  0         0  
8831             # make before ; penalize those with and/over inside
8832 0         0 @out = sort {($A=$a) =~ s/>/\cA/g, ($B=$b) =~ s/>/\cA/g; ($AA=$a) =~ s/\w+\W*/a/g, ($BB=$b) =~ s/\w+\W*/a/g; # Number of words
  0         0  
  0         0  
8833 0 0 0     0 /.\b(and|over)\b./ and s/^/~/ for $A,$B; $AA cmp $BB or $A cmp $B or $a cmp $b} @out;
  0   0     0  
8834 0 0       0 @out = grep length($match) != length, @out if $other;
8835 0 0       0 @out = grep !/\bAND\s/, @out if $one;
8836 0 0       0 @out = reverse @out if $rev; # xor $reverse;
8837 0 0 0     0 if (!dontCOMPOSE_CACHE and @out > 1 and not $neg) { # Optional caching; will modify composition tables
8838 0         0 my @cached = $self->cache_dialist(@out); # but not decomposition ones, hence `not $neg'
8839 0         0 @out = map "Cached$_=$dia_raw", 0..$#cached;
8840 0         0 $self->{Compositions}{$out[$_]} = $cached[$_] for 0..$#cached;
8841 0         0 $cached_aggregate_Compositions{$dia} = \@out;
8842             }
8843 0 0       0 @out = map "-$_", @out if $neg;
8844 0         0 return @out;
8845             } else { # etc
8846             #warn "Dia=`$dia'";
8847 0         0 return $dia;
8848             }
8849             }
8850            
8851             sub flatten_arrays ($$) {
8852 0     0 0 0 my ($self, $a) = (shift, shift);
8853 0 0       0 warn "method flatten_arrays() takes one argument" if @_;
8854 0 0 0     0 return $a unless ref($a || '') eq 'ARRAY';
8855 0         0 map $self->flatten_arrays($_), @$a;
8856             }
8857            
8858             sub array2string ($$) {
8859 0     0 0 0 my ($self, $a) = (shift, shift);
8860 0 0       0 warn "method array2string() takes one argument" if @_;
8861 0 0       0 return '(undef)' unless defined $a;
8862 0 0 0     0 return "<$a>" unless ref($a || '') eq 'ARRAY';
8863 0         0 '[ ' . join(', ', map $self->array2string($_), @$a) . ' ]';
8864             }
8865            
8866             sub dialist2lists ($$) {
8867 0     0 0 0 my ($self, $Dia, @groups) = (shift, shift);
8868 0         0 for my $group (split /\|/, $Dia, -1) {
8869 0         0 my @dia;
8870 0         0 for my $dia (split /,/, $group) {
8871 0         0 push @dia, $self->dia2list($dia);
8872             }
8873 0         0 push @groups, \@dia; # Do not omit empty groups
8874             } # Now get all the chars, and precompile results for them
8875             @groups
8876 0         0 }
8877            
8878             sub document_char ($$$;$) {
8879 0     0 0 0 my ($self, $c, $doc, $old) = (shift, shift, shift, shift);
8880 0 0 0     0 return $c if not defined $c or not defined $doc;
8881 0 0 0     0 $doc = "$old->[3] ⇒ $doc" if $old and ref $old and defined $old->[3];
      0        
8882 0 0       0 $c = [$c] unless ref $c;
8883 0 0       0 $c->[3] = $doc if defined $doc;
8884 0         0 $c
8885             }
8886            
8887             sub document_chars_on_key ($$$;$) { # Usable with all_layers
8888 0     0 0 0 my ($self, $c, $doc, $old, @o) = (shift, shift, shift, shift);
8889 0         0 for my $layer (@$c) {
8890 0         0 push @o, [ map {$self->document_char($_, $doc, $old)} @$layer ];
  0         0  
8891             }
8892             @o
8893 0         0 }
8894            
8895             #use Dumpvalue;
8896             my %translators = ( Id => sub ($) {shift}, Empty => sub ($) { return undef },
8897             dectrl => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c;
8898             return undef if 0x20 <= ord $c; chr(0x40 + ord $c)},
8899             maybe_ucfirst => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c; ucfirst $c},
8900             maybe_lc => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c; lc $c},
8901             maybe_uc => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c; uc $c},
8902             ucfirst => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c;
8903             my $c1 = ucfirst $c; return undef if $c1 eq $c; $c1},
8904             lc => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c;
8905             my $c1 = lc $c; return undef if $c1 eq $c; $c1},
8906             uc => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c;
8907             my $c1 = uc $c; return undef if $c1 eq $c; $c1} );
8908             sub make_translator ($$$$$) { # translator may take some values from "environment"
8909             # (such as which deadkey is processed), so caching is tricky: if does -> $used_deadkey reflects this
8910             # The translator should return exactly one value (possibly undef) so that map TRANSLATOR, list works intuitively.
8911 0   0 0 0 0 my ($self, $name, $deadkey, $face, $N, $used_deadkey) = (shift, shift, shift || 0, shift, shift, ''); # $deadkey used eg for diagnostics
8912 0 0       0 die "Undefined recipe in a translator for face `$face', layer $N on deadkey `$deadkey'" unless defined $name;
8913 0 0       0 if ($name =~ /^Imported\[([\/\w]+)(?:,([\da-fA-F]{4,}))?\]$/) {
8914 0 0       0 my($d, @sec) = (($2 ? "$2" : undef), split m(/), "$1");
8915 0 0       0 $d = $deadkey, $used_deadkey ="/$deadkey" unless defined $d;
8916 0 0       0 my $fromKBDD = $self->get_deep($self, 'DEADKEYS', @sec, lc $d, 'map') # DEADKEYS/bepo with 00A4 ---> DEADKEYS/bepo/00a4
8917             or die "DEADKEYS section for `$d' with parts `@sec' not found";
8918             # indexed by lc hex
8919 0 0   0   0 return sub { my $cc=my $c=shift; return $c unless defined $c; $c = $c->[0] if 'ARRAY' eq ref $c; defined($c = $fromKBDD->{$self->key2hex($c)}) or return $c; $self->document_char(chr hex $c, $name, $cc) }, '';
  0 0       0  
  0 0       0  
  0         0  
  0         0  
  0         0  
8920             }
8921 0 0       0 die "unrecognized Imported argument: `$1'" if $name =~ /^Imported(\[.*)/s;
8922 0 0       0 return $translators{$name}, '' if $translators{$name};
8923 0 0       0 if ($name =~ /^PrefixDocs\[(.+)\]$/) {
8924 0         0 $self->{faces}{$face}{'[prefixDocs]'}{$deadkey} = $1;
8925 0         0 return $translators{Empty}, '';
8926             }
8927 0 0       0 if ($name =~ /^Show\[(.+)\]$/) {
8928 0         0 $self->{faces}{$face}{'[Show]'}{$deadkey} = $self->stringHEX2string($1);
8929 0         0 return $translators{Empty}, '';
8930             }
8931 0 0       0 if ($name =~ /^HTML_classes\[(.+)\]$/) {
8932 0 0       0 (my @c = split /,/, "$1") % 3 and die "HTML_classes[] for key `$deadkey' not come in triples";
8933 0   0     0 my $C = ( $self->{faces}{$face}{'[HTML_classes]'}{$deadkey || ''} ||= {} ); # Above, deadkey is ||= 0
      0        
8934             # warn "I create HTML_classes for face=$face, prefix=`$deadkey'";
8935 0         0 while (@c) {
8936 0         0 my ($where, $class, $chars) = splice @c, 0, 3;
8937 0         0 ( $chars = $self->stringHEX2string($chars) ) =~ s/\p{Blank}(?=\p{NonspacingMark})//g;
8938 0         0 push @{ $C->{$where}{$_} }, $class for split //, $chars;
  0         0  
8939             }
8940 0         0 return $translators{Empty}, '';
8941             }
8942 0 0       0 if ($name =~ /^Space(Self)?2Id(?:\[(.+)\])?$/) {
8943 0 0       0 my $dia = $self->charhex2key((defined $2) ? $2 : do {$used_deadkey = "/$deadkey"; $deadkey}); # XXXX `do' is needed, comma does not work
  0         0  
  0         0  
8944 0 0       0 my $self_OK = $1 ? $dia : 'n/a';
8945 0 0 0 0   0 return sub ($) { my $c = (shift() || '[none]'); $c = $c->[0] if 'ARRAY' eq ref $c; # Prefix key as usual letter
  0         0  
8946 0 0 0     0 ($c eq ' ' or $c eq $self_OK and defined $dia) ? $self->document_char($dia, $name) : undef }, $used_deadkey;
  0         0  
8947             }
8948 0 0       0 if ($name =~ /^ShiftFromTo\[(.+)\]$/) {
8949 0         0 my ($f,$t) = split /,/, "$1";
8950 0         0 $_ = hex $self->key2hex($self->charhex2key($_)) for $f, $t;
8951 0         0 $t -= $f; # Treat prefix keys as usual keys:
8952 0 0   0   0 return sub ($) { my $cc=my $c=shift; return $c unless defined $c; $c = $c->[0] if 'ARRAY' eq ref $c; $self->document_char(chr($t + ord $c), $name, $cc) }, '';
  0 0       0  
  0         0  
  0         0  
  0         0  
8953             }
8954 0 0       0 if ($name =~ /^SelectRX\[(.+)\]$/) {
8955 0         0 my ($rx) = qr/$1/; # Treat prefix keys as usual keys:
8956 0 0   0   0 return sub ($) { my $cc = my $c=shift; defined $c or return $c; $c = $c->[0] if 'ARRAY' eq ref $c; return undef unless $c =~ $rx; $cc }, '';
  0 0       0  
  0 0       0  
  0         0  
  0         0  
  0         0  
8957             }
8958 0 0       0 if ($name =~ /^FlipShift$/) {
8959 0 0   0   0 return sub ($) { my $c = shift; defined $c or return $c; map [@$_[1,0]], @$c }, '', 'all_layers';
  0         0  
  0         0  
  0         0  
8960             }
8961 0 0       0 if ($name =~ /^AssignTo\[(\w+),(\d+)\]$/) {
8962 0         0 my ($sec, $cnt) = ($1, $2);
8963 0 0       0 $cnt = 0, warn "Unrecognized section `$sec' in AssignTo" unless my $S = $start_SEC{$sec};
8964 0 0       0 warn("Too many keys ($cnt) put into section `$sec', max=$S->[1]"), $cnt = $S->[1] if $cnt > $S->[1];
8965 0 0   0   0 my $toTarget = sub { my $slot = shift; return unless $slot < $cnt; $slot + $S->[0] };
  0         0  
  0         0  
  0         0  
8966 0     0   0 return sub ($) { @{shift()} }, '', ['all_layers', $toTarget];
  0         0  
  0         0  
8967             }
8968 0 0       0 if ($name =~ /^FromTo(FlipShift)?\[(.+)\]$/) {
8969 0         0 my $flip = $1;
8970 0         0 my ($f,$t) = split /,/, "$2", 2;
8971             exists $self->{layers}{$_} or $_ = ($self->make_translated_layers($_, $face, [$N], $deadkey))->[0]
8972 0   0     0 for $f, $t; # Be conservative for caching...
8973 0         0 my $B = "~~~{$f>>>$t}";
8974 0         0 $_ = $self->{layers}{$_} for $f, $t;
8975 0         0 my (%h, $kk);
8976 0         0 for my $k (0..$#$f) {
8977 0 0 0     0 my @fr = map {($_ and ref) ? $_->[0] : $_} @{$f->[$k]};
  0         0  
  0         0  
8978 0 0 0     0 my @to = map {($_ and ref) ? $_->[0] : $_} @{$t->[$k]};
  0         0  
  0         0  
8979 0 0       0 if ($flip) {
8980 0 0       0 $h{defined($kk = $fr[$_]) ? $kk : ''} = $to[1-$_] for 0,1;
8981             } else {
8982 0 0       0 $h{defined($kk = $fr[$_]) ? $kk : ''} = $to[$_] for 0,1;
8983             }#
8984             } # Treat prefix keys as usual keys:
8985 0 0   0   0 return sub ($) { my $cc = my $c = shift; defined $c or return $c; $c = $c->[0] if 'ARRAY' eq ref $c; $self->document_char($h{$c}, $name, $cc) }, $B;
  0 0       0  
  0         0  
  0         0  
  0         0  
8986             }
8987 0 0       0 if ($name =~ /^InheritPrefixKeys\[(.+)\]$/) {
8988 0         0 my $base = $1;
8989             exists $self->{layers}{$_} or $_= ($self->make_translated_layers($_, $face, [$N], $deadkey))->[0]
8990 0   0     0 for $base;
8991 0         0 my $baseL = $self->{layers}{$base};
8992 0         0 my (%h);
8993 0         0 for my $k (0..$#$baseL) {
8994 0         0 for my $shift (0..1) {
8995 0 0       0 my $C = $baseL->[$k][$shift] or next;
8996 0 0 0     0 next unless ref $C and $C->[2]; # prefix
8997 0         0 $h{"$N $k $shift $C->[0]"} = $C;
8998             }
8999             } # Treat prefix keys as usual keys:
9000 0 0 0 0   0 return sub ($) { my $c = shift; defined $c or return $c; return $c if 'ARRAY' eq ref $c and $c->[2]; $h{"@_ $c"} or $c }, $base;
  0 0       0  
  0 0       0  
  0         0  
  0         0  
9001             }
9002 0 0       0 if ($name =~ /^ByColumns\[(.+)\]$/) {
9003 0 0       0 my @chars = map {length() ? $self->charhex2key($_) : undef} split /,/, "$1";
  0         0  
9004 0 0       0 my $g = $self->{faces}{$face}{'[geometry]'}
9005             or die "Face `$face' has no associated layer with geometry info; did you set geometry_via_layer?";
9006 0   0     0 my $o = ($self->{faces}{$face}{'[g_offsets]'} or [(0) x @$g]);
9007 0         0 $o = [@$o]; # deep copy
9008 0         0 my ($tot, %c) = 0;
9009             # warn "geometry: [@$g] [@$o]";
9010 0         0 for my $r (@$g) {
9011 0         0 my $off = shift @$o;
9012 0         0 $c{$tot + $_} = $_ + $off for 0..($r-1);
9013 0         0 $tot += $r;
9014             }
9015 0 0 0 0   0 return sub ($$$$) { (undef, my ($L, $k, $shift)) = @_; return undef if $L or $shift or $k >= $tot; $self->document_char($chars[$c{$k}], "ByColumn[$c{$k}]") }, '';
  0   0     0  
  0         0  
  0         0  
9016             }
9017 0 0       0 if ($name =~ /^ByRows\[(.+)\]$/) {
9018 0         0 s(^\s+(?!\s|///\s+))(), s((?
9019 0         0 my (@recipes, @subs) = split m(\s+///\s+), $recipes;
9020 0         0 my $LL = $#{ $self->{faces}{$face}{layers} }; # Since all_layers, we are called only for layer 0; subrecipes may need more
  0         0  
9021 0         0 for my $rec (@recipes) {
9022 0 0   0   0 push(@subs, sub {return undef}), next unless length $rec;
  0         0  
9023             #warn "recipe=`$rec'; face=`$face'; N=$N; deadkey=`$deadkey'; last_layer=$LL";
9024 0         0 my ($tr) = $self->make_translator_for_layers( $rec, $deadkey, $face, [0..$LL] );
9025             #warn " done";
9026 0         0 push @subs, $tr;
9027             }
9028 0 0       0 my $g = $self->{faces}{$face}{'[geometry]'}
9029             or die "Face `$face' has no associated layer with geometry info; did you set geometry_via_layer?";
9030 0         0 my ($tot, $row, %r) = (0, 0);
9031             # warn "geometry: [@$g] [@$o]";
9032 0         0 for my $r (@$g) {
9033 0         0 $r{$tot + $_} = $row for 0..($r-1);
9034 0         0 $tot += $r;
9035 0         0 $row++;
9036             }
9037             # return sub ($$$$) { (undef, undef, my $k) = @_; return undef if $k >= $tot; return undef if $#recipes < (my $r = $r{$k});
9038             # die "Undefined recipe: row=$row; face=`$face'; N=$N; deadkey=`$deadkey'; ARGV=(@_)" unless $subs[$r];
9039             # goto &{$subs[$r]} }, '';
9040 0 0 0 0   0 return sub ($$) { (undef, my $k) = @_; return [] if $k >= $tot or $#recipes < (my $r = $r{$k});
  0         0  
9041 0 0       0 die "Undefined recipe: row=$row; face=`$face'; N=$N; deadkey=`$deadkey'; ARGV=(@_)" unless $subs[$r];
9042 0         0 goto &{$subs[$r]} }, '', 'all_layers';
  0         0  
  0         0  
9043             }
9044 0 0       0 if ($name =~ /^(?:Diacritic|Mutate)(SpaceOK)?(Hack)?(2Self)?(DupsOK)?(32OK)?(?:\[(.+)\])?$/) {
9045 0         0 my ($spaceOK, $hack, $toSelf, $dupsOK, $w32OK) = ($1, $2, $3, $4, $5);
9046 0 0       0 my $Dia = ((defined $6) ? $6 : do {$used_deadkey ="/$deadkey"; $deadkey}); # XXXX `do' is needed, comma does not work
  0         0  
  0         0  
9047 0 0       0 if ($toSelf) {
9048 0 0       0 die "Mutate2Self does not make sense with SpaceOK/Hack/DupsOK/32OK" if grep $_, $hack, $spaceOK, $dupsOK, $w32OK;
9049 0         0 $Dia = $self->charhex2key($Dia);
9050 0 0       0 my(@sp, %sp) = map {(my $in = $_) =~ s/(?<=.)\@$//s; $in} @{ ($self->get_VK($face))->{SPACE} || [] };
  0         0  
  0         0  
  0         0  
9051 0         0 @sp = map $self->charhex2key($_), @sp;
9052 0         0 my $flip_AltGr = $self->{faces}{$face}{'[Flip_AltGr_Key]'};
9053 0 0       0 $flip_AltGr = $self->charhex2key($flip_AltGr) if defined $flip_AltGr;
9054 0 0       0 @sp = grep $flip_AltGr ne $_, @sp if defined $flip_AltGr; # It has a different function...
9055 0         0 @sp{@sp[1..$#sp]} = (0..$#sp); # The leading elt is the scancode
9056             # warn "SPACE on $Dia: <", join('> <', %sp), '>';
9057             return sub ($) {
9058 0     0   0 $self->document_chars_on_key([$self->diacritic2self_2($Dia, shift, $face, \%sp)], $name)
9059 0         0 }, $used_deadkey, 'all_layers';
9060             }
9061            
9062 0         0 my $isPrimary;
9063 0 0       0 $Dia =~ s/^\+// and $isPrimary++; # Wait until are expanded
9064            
9065 0         0 my $f = $self->get_NamesList;
9066 0 0       0 $self->load_compositions($f) if defined $f;
9067            
9068 0         0 $f = $self->get_AgeList;
9069 0 0 0     0 $self->load_uniage($f) if defined $f and not $self->{Age};
9070             # New processing: - = strip 1 from end; -3/ = strip 1 from the last 3
9071             #warn "Doing `$Dia'";
9072             #print "Doing `$Dia'\n";
9073             #warn "Age of <à> is <$self->{Age}{à}>";
9074 0         0 $Dia =~ s(){ (my $R = $1) =~ s/-/_/g;
  0         0  
9075 0 0       0 die "Named recipe `$1' unknown" unless exists $self->{faces}{$face}{"Named_DIA_Recipe__$R"};
9076             # (my $r = $self->{faces}{$face}{"Named_DIA_Recipe__$R"}) =~ s/^\s+//;
9077 0         0 $self->recipe2str($self->{faces}{$face}{"Named_DIA_Recipe__$R"}) }ge;
9078 0 0       0 $Dia =~ s/\|{3,4}/|/g if $isPrimary;
9079 0         0 my($skip, $limit, @groups, @groups2, @groups3) = (0);
9080 0         0 my($have4, @Dia) = (1, split /\|\|\|\|/, $Dia, -1);
9081 0 0       0 $have4 = 0, @Dia = split /\|\|\|/, $Dia, -1 if 1 == @Dia;
9082 0 0       0 if (1 < @Dia) {
9083 0 0       0 die "Too many |||- or ||||-sections in <$Dia>" if @Dia > 3;
9084 0         0 my @Dia2 = split /\|\|\|/, $Dia[1], -1;
9085 0 0       0 die "Too many |||-sections in the second ||||-section in <$Dia>" if @Dia2 > 2;
9086             # splice @Dia, 1, 1, @Dia2;
9087 0 0       0 @Dia2 = @Dia, shift @Dia2 unless $have4;
9088 0 0       0 $skip = (@Dia2 > 1 ? 1 + ($Dia2[0] =~ tr/|/|/) : 0);
9089 0 0 0     0 $Dia[1] .= "|$Dia[2]", pop @Dia if not $have4 and @Dia == 3;
9090             # $limit = 1 + ($Dia[-1] =~ tr/|/|/) + $skip;
9091 0         0 $limit = 0; # Not needed with the current logic...
9092 0         0 my @G = map [$self->dialist2lists($_)], @Dia; # will reverse when merging many into one cached...
9093 0         0 @groups = @{shift @G};
  0         0  
9094 0 0       0 @groups2 = @{shift @G} if @G;
  0         0  
9095 0 0       0 @groups3 = @{shift @G} if @G;
  0         0  
9096             } else {
9097 0         0 @groups = $self->dialist2lists($Dia);
9098             }
9099             #warn "Dia `$Dia' -> ", $self->array2string([$limit, $skip, @groups]);
9100 0         0 my $L = $self->{faces}{$face}{layers};
9101 0         0 my @L = map $self->{layers}{$_}, @$L;
9102 0   0     0 my $Sub = $self->{faces}{$face}{'[AltSubstitutions]'} || {};
9103             # warn "got AltSubstitutions: <",join('> <', %$Sub),'>' if $Sub;
9104             return sub {
9105 0     0   0 my $K = shift; # bindings of the key
9106 0 0       0 return ([]) x @$K unless grep defined, $self->flatten_arrays($K); # E.g, ByPairs and SelectRX produce many empty entries...
9107             #warn "Undefined base key for diacritic <$Dia>: <", join('> <', map {defined() ? $_ : '[undef]'} $self->flatten_arrays($K)), '>' unless defined $K->[0][0];
9108             #warn "Input for <$Dia>: <", join('> <', map {defined() ? $_ : '[undef]'} $self->flatten_arrays($K)), '>';
9109 0         0 my $base = $K->[0][0];
9110 0 0       0 $base = '' unless defined $base;
9111 0 0       0 $base = $base->[0] if ref $base;
9112 0 0 0     0 return ([]) x @$K if not $spaceOK and $base eq ' '; # Ignore possiblity that SPACE is a deadKey
9113 0         0 my $sorted = $self->sort_compositions(\@groups, $K, $Sub, $dupsOK, $w32OK);
9114 0         0 my ($sorted2, $sorted3, @idx_sorted3);
9115 0 0       0 $sorted2 = $self->sort_compositions(\@groups2, $K, $Sub, $dupsOK, $w32OK) if @groups2;
9116 0 0       0 $sorted3 = $self->sort_compositions(\@groups3, $K, $Sub, $dupsOK, $w32OK) if @groups3;
9117 0 0       0 @idx_sorted3 = @$sorted + (@groups2 ? @$sorted2 : 0) if @groups3; # used for warnings only
    0          
9118             $self->{faces}{$face}{'[in_dia_chains]'}{$_}++
9119 0 0 0     0 for grep defined, ($hack ? () : map {($_ and ref) ? $_->[0] : $_}
  0 0       0  
9120             # index as $res->[group][penalty_N][double_occ][layer][NN][shift]
9121 0 0       0 map {$_ ? @$_ : ()} map {$_ ? @$_ : ()} map {$_ ? @$_ : ()} map {$_ ? @$_ : ()} map {$_ ? @$_ : ()}
  0 0       0  
  0 0       0  
  0 0       0  
  0 0       0  
9122 0 0       0 @$sorted, @{$sorted2 || []}, @{$sorted3 || []});
  0 0       0  
9123             # map {($_ and ref) ? $_->[0] : $_} map @{$_||[]}, @out
9124 0         0 require Dumpvalue if printSORTEDLISTS;
9125 0         0 Dumpvalue->new()->dumpValue(["Key $base", $sorted]) if printSORTEDLISTS;
9126 0         0 warn $self->report_sorted_l($base, [@$sorted, @{$sorted2 || []}, @{$sorted3 || []}], [scalar @$sorted, $skip + scalar @{$sorted || []}, @idx_sorted3])
9127             if warnSORTEDLISTS;
9128 0         0 my $LLL = '';
9129 0 0       0 if ($sorted2) {
9130 0         0 my (@slots, @LL);
9131 0         0 for my $l (0..$#L) {
9132 0         0 push @slots, $self->shift_pop_compositions($sorted2, $l, !'from end', !'omit', $limit, $skip, my $ll = []);
9133 0         0 push @LL, $ll;
9134 0         0 print 'From Layers <', join('> <', map {defined() ? $_ : 'undef'} @$ll), ">\n" if printSORTEDLISTS;
9135 0         0 $LLL .= ' | ' . join(' ', map {defined() ? $_ : 'undef'} @$ll) if warnSORTEDLISTS;
9136             }
9137 0         0 print 'TMP Extracted ', $self->array2string($slots[0]), "\n" if printSORTEDLISTS;
9138 0         0 print 'TMP Extracted ', $self->array2string([@slots[1..$#slots]]), " deadKey=$deadkey\n" if printSORTEDLISTS;
9139 0   0     0 my $appended = $self->append_keys($sorted3 || $sorted2, \@slots, \@LL, !$sorted3 && 'prepend');
      0        
9140 0         0 Dumpvalue->new()->dumpValue(["Key $base; II", $sorted2]) if printSORTEDLISTS;
9141 0         0 if (warnSORTEDLISTS) {
9142             $LLL =~ s/^[ |]+//;
9143             $_++ for @idx_sorted3; # empty or 1 elt
9144             warn "TMP Extracted: ", $self->array2string(\@slots), " from layers $LLL\n"; # 1 is for what is prepended by append_keys()
9145             warn $self->report_sorted_l($base, [@$sorted, @$sorted2, @{$sorted3 || []}], # Where to put bold/dotted-bold separators:
9146             [scalar @$sorted, !!$appended + $skip + scalar @$sorted, @idx_sorted3], ($appended ? [1 + scalar @$sorted] : ()));
9147             }
9148             }
9149 0         0 my(@out, %seen);
9150 0         0 for my $Ln (0..$#L) {
9151 0         0 $out[$Ln] = $self->shift_pop_compositions($sorted, $Ln);
9152 0 0 0     0 $seen{$_}++ for grep defined, map {($_ and ref) ? $_->[0] : $_} @{$out[$Ln]};
  0         0  
  0         0  
9153             }
9154 0         0 for my $L (@out) { # $L is an array indexed by shift state
9155 0 0 0     0 $L = [map {(not $_ or ref $_) ? $_ : [$_,undef,undef,'Diacritic operator']} @$L];
  0         0  
9156             }
9157             # Insert non-yet-inserted characters from $sorted2, $sorted3
9158 0         0 for my $extra (['from end', $sorted2, 2], [0, $sorted3, 3]) {
9159 0 0       0 next unless $extra->[1];
9160 0         0 $self->deep_undef_by_hash(\%seen, $extra->[1]);
9161 0         0 for my $Ln (0..$#L) {
9162 0         0 my $o = $out[$Ln];
9163 0 0 0     0 unless (defined $o->[0] and defined $o->[1]) {
9164 0         0 my $o2 = $self->shift_pop_compositions($extra->[1], $Ln, $extra->[0], !'omit', !'limit', 0, undef, defined $o->[0], defined $o->[1]);
9165 0 0 0     0 $o2 = [map {(!defined $_ or ref) ? $_ : [$_,undef,undef,"Diacritic operator (choice $extra->[2])"]} @$o2];
  0         0  
9166 0   0     0 defined $o->[$_] or $o->[$_] = $o2->[$_] for 0,1;
9167 0 0 0     0 $seen{$_}++ for grep defined, map {($_ and ref) ? $_->[0] : $_} @$o;
  0         0  
9168             }
9169             }
9170             }
9171 0         0 print 'Extracted ', $self->array2string(\@out), " deadKey=$deadkey\n" if printSORTEDLISTS;
9172 0         0 warn 'Extracted ', $self->array2string(\@out), " deadKey=$deadkey\n" if warnSORTEDLISTS;
9173             $self->{faces}{$face}{'[from_dia_chains]'}{$_}++
9174 0 0 0     0 for grep defined, ($hack ? () : map {($_ and ref) ? $_->[0] : $_} map @{$_||[]}, @out);
  0 0       0  
  0 0       0  
9175             #warn "Age of <à> is <$self->{Age}{à}>";
9176             #warn "Output: <", join('> <', map {defined() ? $_ : '[undef]'} $self->flatten_arrays(\@out)), '>';
9177 0         0 return @out;
9178 0         0 }, $used_deadkey, 'all_layers';
9179             }
9180 0 0       0 if ($name =~ /^DefinedTo\[(.+)\]$/) {
9181 0         0 my $to = $self->charhex2key($1);
9182 0 0   0   0 return sub ($) { my $c = shift; defined $c or return $c; $self->document_char($to, 'DefinedTo', $c) }, '';
  0         0  
  0         0  
  0         0  
9183             }
9184 0 0       0 if ($name =~ /^ByPairs((Inv)?Prefix)?(Apple)?\[(.+)\]$/) {
9185 0         0 my ($prefix, $invert, $Apple, $in, @Pairs, %Map) = ($1, $2, $3, $4);
9186 0         0 $in =~ s/^\s+//;
9187 0         0 @Pairs = split /\s+(?!\p{NonspacingMark})/, $in;
9188 0         0 for my $p (@Pairs) {
9189 0         0 while (length $p) {
9190 0 0       0 die "Odd number of characters in a ByPairs map <$in>"
9191             unless $p =~ s/^((?:\p{Blank}\p{NonspacingMark}|(?:\b\.)?[0-9a-f]{4,}\b(?:\.\b)?|.){2})//i;
9192 0         0 (my $Pair = $1) =~ s/\p{Blank}//g;
9193             #warn "Pair = <$Pair>";
9194             # Cannot do it earlier, since HEX can introduce new blanks
9195 0         0 $Pair =~ s/(?<=[0-9a-f]{4})\.$//i; # Remove . which was on \b before extracting substring
9196 0         0 $Pair = $self->stringHEX2string($Pair);
9197             #warn " --> <$Pair>";
9198 0 0       0 die "Can't split ByPairs rule into a pair: I see <$Pair>" unless 2 == scalar (my @c = split //, $Pair);
9199             die qq("From" character <$c[0] duplicated in a ByPairs map <$in>)
9200 0 0       0 if exists $Map{$c[0]};
9201 0 0       0 $Map{$c[0]} = ($prefix ? [$c[1], undef, ($invert ? 3 : 1)<<3] : $c[1]); # massage_imported2 makes >> 3
    0          
9202             }
9203             }
9204 0 0       0 die "Empty ByPairs map <$in>" unless %Map; # Treat prefix keys as usual keys:
9205 0 0       0 if ($Apple) {
9206 0         0 $self->{faces}{$face}{'[AppleMap]'}[$N]{$_} = $Map{$_} for keys %Map;
9207 0         0 %Map = ();
9208             }
9209 0 0   0   0 return sub ($) { my $c = shift; defined $c or return $c; $c = $c->[0] if 'ARRAY' eq ref $c; $self->document_char($Map{$c}, 'explicit tuneup') }, '';
  0 0       0  
  0         0  
  0         0  
  0         0  
9210             }
9211 0         0 my $map = $self->get_deep($self, 'DEADKEYS', split m(/), $name);
9212 0 0       0 die "Can't resolve character map `$name'" unless defined $map;
9213 0 0       0 unless (exists $map->{map}) {{
9214 0         0 my($k1) = keys %$map;
  0         0  
9215 0 0 0     0 die "Character map `$name' does not contain HEX: `$k1'" if %$map and not $k1 =~ /^[0-9a-f]{4,}$/;
9216 0 0       0 die "Character map is a parent-type map, but no deadkey to use specified" unless defined $deadkey;
9217 0         0 my $Map = { map +(chr hex $_, $map->{$_}), keys %$map };
9218             die "Character map `$name' does not contain `$deadkey', contains <", (join '> <', keys %$map), ">"
9219 0 0       0 unless exists $Map->{chr hex $deadkey};
9220 0 0       0 $map = $Map->{chr hex $deadkey}, $used_deadkey = "/$deadkey" if %$Map;
9221 0 0       0 $map = {map => {}}, warn "Character map for `$name' empty" unless %$map;
9222             }}
9223 0 0       0 die "Can't resolve character map `$name' `map': <", (join '> <', %$map), ">" unless defined $map->{map};
9224 0         0 $map = $map->{map};
9225 0         0 my $Map = { map +(chr hex $_, chr hex($map->{$_})), keys %$map }; # hex form is not unique
9226             ( sub ($) { # Treat prefix keys as usual keys:
9227 0 0   0   0 my $c = shift; defined $c or return $c; $c = $c->[0] if 'ARRAY' eq ref $c; $self->document_char($Map->{$c}, "DEADKEYS=$name")
  0 0       0  
  0         0  
  0         0  
9228 0         0 }, $used_deadkey )
9229             }
9230            
9231             sub depth1_A_translator($$) { # takes a ref to an array of chars
9232 0     0 0 0 my ($self, $tr) = (shift, shift);
9233             return sub ($) {
9234 0     0   0 my $in = shift;
9235 0         0 [map $tr->($_), @$in]
9236             }
9237 0         0 }
9238            
9239             sub depth2_translator($$) { # takes a ref to an array of arrays of chars
9240 0     0 0 0 my ($self, $tr) = (shift, shift);
9241             return sub ($$) {
9242 0     0   0 my ($in, $k, @out) = (shift, shift);
9243 0         0 for my $L (0..$#$in) {
9244 0         0 my $Tr = $tr->[$L];
9245 0 0       0 die "Undefined translator for layer=$L; total=", scalar @$tr unless defined $Tr;
9246 0         0 push @out, [map $Tr->($in->[$L][$_], $L, $k, $_), 0..$#{$in->[$L]}]
  0         0  
9247             }
9248             @out
9249 0         0 }
9250 0         0 }
9251            
9252             sub make_translator_for_layers ($$$$$) { # translator may take some values from "environment"
9253             # (such as which deadkey is processed), so caching is tricky: if does -> $used_deadkey reflects this
9254             # The translator should return exactly one value (possibly undef) so that map TRANSLATOR, list works intuitively.
9255 0   0 0 0 0 my ($self, $name, $deadkey, $face, $NN) = (shift, shift, shift || 0, shift, shift); # $deadkey used eg for diagnostics
9256 0         0 my ($Tr, $used, $for_layers) = $self->make_translator( $name, $deadkey, $face, $NN->[0] );
9257 0 0       0 ($for_layers, my $cvt) = (ref $for_layers ? @$for_layers : $for_layers);
9258 0 0       0 return $Tr, [map "$used![$_]", @$NN], $cvt if $for_layers;
9259 0         0 my @Tr = map [$self->make_translator($name, $deadkey, $face, $_)], @$NN;
9260 0         0 $self->depth2_translator([map $_->[0], @Tr]), [map $_->[1], @Tr], $cvt;
9261             }
9262            
9263             sub make_translated_layers_tr ($$$$$$$) { # Apply translation map
9264 0     0 0 0 my ($self, $layers, $tr, $append, $deadkey, $face, $NN) = (shift, shift, shift, shift, shift, shift, shift);
9265 0         0 my ($Tr, $used, $cvt) = $self->make_translator_for_layers($tr, $deadkey, $face, $NN);
9266             #warn " tr=<$tr>, key=<$deadkey>, used=<$used>";
9267 0   0     0 my @new_names = map "$tr$used->[$_]($layers->[$_])$append" . ($append and $NN->[$_]), 0..$#$NN;
9268 0 0       0 return @new_names unless grep {not exists $self->{layers}{$_}} @new_names;
  0         0  
9269             # warn "Translating via `$tr' from layer [$layer]: <", join('> <', map "@$_", @{$self->{layers}{$layer}}), '>';
9270 0         0 my (@L, @LL) = map $self->{layers}{$_}, @$layers;
9271 0         0 for my $n (0..$#{$L[0]}) { # key number
  0         0  
9272 0         0 my @C = $Tr->( [ map $L[$_][$n], 0..$#L ], $n ); # rearrange one key into $X[$Layer][$shift]
9273 0 0       0 if ($cvt) {
9274 0   0     0 defined $cvt->($n) and $LL[$_][$cvt->($n)] = $C[$_] for 0..$#L;
9275             } else {
9276 0         0 push @{$LL[$_]}, $C[$_] for 0..$#L;
  0         0  
9277             }
9278             }
9279 0         0 $self->{layers}{$new_names[$_]} = $LL[$_] for 0..$#L;
9280             @new_names
9281 0         0 }
9282            
9283             sub key2string ($$) {
9284 0     0 0 0 my ($self, $key, @o) = (shift, shift);
9285 0 0       0 return '<>' unless defined $key;
9286 0 0       0 return '[]' unless grep defined, @$key;
9287 0         0 for my $k (@$key) {
9288 0 0       0 push(@o, 'undef'), next unless defined $k;
9289 0 0       0 push @o, ((ref $k) ? (defined $k->[0] ? $k->[0] : '') : $k);
    0          
9290             }
9291             "[@o]"
9292 0         0 }
9293            
9294             sub layer2string ($$) {
9295 0     0 0 0 my ($self, $layer, $last, $rest) = (shift, shift, -1, '');
9296 0         0 my @o = map $self->key2string($_), @$layer;
9297 0   0     0 2 < length $o[$_] and $last = $_ for 0..$#o;
9298 0 0       0 $rest = '...' if $last != $#o;
9299 0         0 (join ' ', @o[0..$last]) . $rest
9300             }
9301            
9302             sub make_translated_layers_stack ($$@) { # Stacking
9303 0     0 0 0 my ($self, @out, $ref) = (shift);
9304 0         0 my $c = @{$_[0]};
  0         0  
9305             @$_ == $c or die "Stacking: number of layers ", scalar(@$_), " != number of layers $c of the first elt"
9306 0   0     0 for @_;
9307 0         0 for my $lN (0..$c-1) { # layer Number
9308 0         0 my @layers = map $_->[$lN], @_;
9309 0         0 push @out, "@layers";
9310 0         0 if (debug_stacking) {
9311             warn "Stack in-layer $lN `$_': ", $self->layer2string($self->{layers}{$_}), "\n" for @layers;
9312             }
9313 0 0       0 next if exists $self->{layers}{"@layers"};
9314 0         0 my (@L, @keys) = map $self->{layers}{$_}, @layers;
9315 0         0 for my $lI (0..$#L) {
9316 0         0 my $l = $L[$lI];
9317             # warn "... Layer$lN: `$layers[$lI]'..." if debug_stacking;
9318 0         0 for my $k (0..$#$l) {
9319 0         0 for my $kk (0..$#{$l->[$k]}) {
  0         0  
9320 0         0 if (debug_STACKING and defined( my $cc = $l->[$k][$kk] )) {
9321             $cc = $cc->[0] if ref $cc;
9322             warn "...... On $k/$kk (${lI}th lN=$lN): I see `$cc': ", !defined $keys[$k][$kk], "\n" ;
9323             }
9324 0 0 0     0 $keys[$k][$kk] = $l->[$k][$kk] if defined $l->[$k][$kk] and not defined $keys[$k][$kk]; # Shallow copy
9325             }
9326 0   0     0 $keys[$k] ||= [];
9327             }
9328             }
9329 0         0 $self->{layers}{"@layers"} = \@keys;
9330 0         0 warn "Stack out-layer $lN `@layers':\n\t", $self->layer2string(\@keys), "\n" if debug_stacking;
9331             }
9332 0         0 warn 'Stack out-layers:', (join "\n\t", '', @out), "\n" if debug_stacking;
9333 0         0 @out;
9334             }
9335            
9336             sub make_translated_layers_noid ($$$@) { # Stacking
9337 0     0 0 0 my ($self, $whole, $refr, @out, $ref, @seen) = (shift, shift, shift);
9338 0         0 my $c = @$refr;
9339             #warn "noid: join ", scalar @_, " faces of $c layers; ref=[@$refr] first=[@{$_[0]}]";
9340             @$_ == $c or die "Stacking: number of layers ", scalar(@$_), " != number of layers $c of the reference face"
9341 0   0     0 for @_;
9342 0         0 my @R = map $self->{layers}{$_}, @$refr;
9343 0 0       0 if ($whole) {
9344 0         0 my $last = $#{$R[0]};
  0         0  
9345 0         0 for my $key (0..$last) {
9346 0         0 for my $l (@R) {
9347 0 0       0 $seen[$key]{$_}++ for map {ref() ? $_->[0] : $_} grep defined, @{ $l->[$key] };
  0         0  
  0         0  
9348             #warn "$key of $last: keys=", join(',',keys %{$seen[$key]});
9349             }
9350             }
9351             }
9352 0         0 my $name = 'NOID([' . join('], [', map {join ' +++ ', @$_} @_) . '])';
  0         0  
9353 0         0 for my $l (0..$c-1) {
9354 0         0 my (@layers) = map $_->[$l], @_;
9355 0 0       0 if ($whole) {
9356 0         0 $name .= "'" # Keep names of layers distinct, but since they are all interdependent, do not construct basing on layer names
9357             } else {
9358 0         0 $name = "NOID[$refr->[$l]](" . (join ' +++ ', @layers) . ')'
9359             }
9360 0         0 push @out, $name;
9361             #warn ". Doing layer number $l, name=`$name'...";
9362 0 0       0 next if exists $self->{layers}{$name};
9363 0         0 my ($Refr, @L, @keys) = map $self->{layers}{$_}, $refr->[$l], @layers;
9364 0         0 for my $ll (@L) {
9365             #warn "... Another layer for $l...";
9366 0         0 for my $k (0..$#$ll) {
9367 0         0 for my $kk (0..$#{$ll->[$k]}) {
  0         0  
9368             #warn "...... On $k/$kk: I see `$ll->[$k][$kk]'; seen=`$seen[$k]{$ll->[$k][$kk]}'; keys=", join(',',keys %{$seen[$k]}) if defined $ll->[$k][$kk];
9369 0         0 my $ch = $ll->[$k][$kk];
9370 0         0 my $rch = $R[$l][$k][$kk];
9371 0 0 0     0 $ch = $ch->[0] if $ch and ref $ch;
9372 0 0 0     0 $rch = $rch->[0] if $rch and ref $rch;
9373             $keys[$k][$kk] = $ll->[$k][$kk] # Deep copy
9374             if defined $ch and not defined $keys[$k][$kk]
9375 0 0 0     0 and ($whole ? !$seen[$k]{$ch} : $ch ne ( defined $rch ? $rch : '' ));
    0 0        
    0          
9376             }
9377 0   0     0 $keys[$k] ||= [];
9378             }
9379             }
9380 0         0 $self->{layers}{$name} = \@keys;
9381             }
9382 0         0 warn "NOID --> <@out>\n" if debug_noid;
9383 0         0 @out;
9384             }
9385            
9386             sub paren_match_q ($$) {
9387 0     0 0 0 my ($self, $s) = (shift, shift);
9388 0         0 ($s =~ tr/(/(/) == ($s =~ tr/)/)/)
9389             }
9390            
9391             sub brackets_match_q ($$) {
9392 0     0 0 0 my ($self, $s) = (shift, shift);
9393 0         0 ($s =~ tr/[/[/) == ($s =~ tr/]/]/)
9394             }
9395            
9396             sub join_min_paren_brackets_matched ($$@) {
9397 0     0 0 0 my ($self, $join, @out) = (shift, shift, shift);
9398             #warn 'joining <', join('> <', @out, @_),'>';
9399 0         0 while (@_) {
9400 0   0     0 while (@_ and not ($self->paren_match_q($out[-1]) and $self->brackets_match_q($out[-1]))) {
      0        
9401 0         0 $out[-1] .= $join . shift;
9402             }
9403 0 0       0 push @out, shift if @_;
9404             }
9405             @out
9406 0         0 }
9407            
9408             sub face_by_face_recipe ($$$) {
9409 0     0 0 0 my($self, $f, $base) = (shift, shift, shift);
9410 0 0       0 return if $self->{faces}{$f}{layers};
9411 0 0       0 return unless $self->{face_recipes}{$f};
9412             die "Can't determine number of layers in face `$f': face_recipe exists, but not numLayers"
9413 0 0       0 unless defined (my $n = $self->{faces}{$base}{numLayers});
9414 0         0 warn "Massaging face `$f': use face_recipes...\n" if debug_face_layout_recipes;
9415 0         0 $self->{faces}{$f}{layers} = [('Empty') x $n]; # Preliminary (so know the length???)
9416 0         0 $self->{faces}{$f}{layers} = $self->layers_by_face_recipe($f, $base);
9417             }
9418            
9419             sub layers_by_face_recipe ($$$;$) {
9420 0     0 0 0 my ($self, $face, $base, $r) = (shift, shift, shift, shift);
9421 0         0 my $R = $self->{face_recipes}{$face};
9422 0 0 0     0 unless (defined $r or defined $R) {
9423 0 0       0 if ($face =~ /^(\w+)(?:(⁴)|₄)$/) {
9424 0 0       0 $R = ($2 ? "Layers($1²+$1²⁺)" : "Layers($1²+$1₂)");
9425             }
9426             }
9427 0 0 0     0 die "No face recipe for `$face' found" unless $R or defined $r;
9428 0 0       0 $r = $R if $R;
9429 0         0 $r = $self->recipe2str($r);
9430             #print "face recipe `$face'\n";
9431 0         0 my $LL = $self->{faces}{$base}{layers};
9432 0         0 warn "Using face_recipes for `$face', base=$base ==> `$r'\n" if debug_face_layout_recipes;
9433 0         0 my $L = $self->{faces}{$face}{layers} = $self->make_translated_layers($r, $base, [0..$#$LL]);
9434             #print "face recipe `$face' -> ", $self->array2string($L), "\n";
9435             # warn "Using face_recipes `$face' -> ", $self->array2string($L) if debug_face_layout_recipes;
9436 0         0 warn "Massaged face `$face' ->", (join "\n\t", '', @$L), "\n" if debug_face_layout_recipes;
9437             #warn "face recipe `$face' --> ", $self->array2string([map $self->{layers}{$_}, @$L]);
9438 0         0 $L;
9439             }
9440            
9441             sub export_layers ($$$;$) {
9442 0     0 0 0 my ($self, $face, $base, $full) = (shift, shift, shift, shift);
9443             # warn "Doing FullFace on <$face>, base=<$base>\n" if $full;
9444             ($full ? undef : $self->{faces}{$face}{'[ini_layers_prefix]'} || $self->{faces}{$face}{'[ini_layers]'}) ||
9445             $self->{faces}{$face}{layers}
9446 0 0 0     0 || $self->layers_by_face_recipe($face, $base)
    0 0        
9447             }
9448            
9449             sub pseudo_layer ($$$$;$) {
9450 0     0 0 0 my ($self, $recipe, $face, $N, $deadkey) = (shift, shift, shift, shift, shift);
9451 0         0 my $ll = my $l = $self->pseudo_layer0($recipe, $face, $N);
9452             # warn "Pseudo-layer recipe `$recipe', face=`$face', N=$N ->\n\t$l\n" if $recipe =~ /Greek__/;
9453             #warn("layer recipe: `$l'"),
9454 0 0       0 ($l = $self->layer_recipe($l)) =~ s/^\s+// if exists $self->{layer_recipes}{$ll};
9455 0         0 warn "pseudo_layer(`$recipe'): Using layout_recipe `$l' for layer '$ll'\n" if debug_face_layout_recipes and exists $self->{layer_recipes}{$ll};
9456 0 0       0 return $l if $self->{layers}{$l};
9457 0         0 ($self->make_translated_layers($l, $face, [$N]))->[0]
9458             # die "Component `$l' of a pseudo-layer cannot be resolved"
9459             }
9460            
9461             sub pseudo_layer0 ($$$$) {
9462 0     0 0 0 my ($self, $recipe, $face, $N) = (shift, shift, shift, shift);
9463 0 0       0 if ($recipe eq 'LinkFace') {
9464 0 0       0 my $L = $self->{faces}{$face}{LinkFace} or die "Face `$face' has no LinkFace";
9465 0         0 return ($self->export_layers($L, $face))->[$N];
9466             }
9467 0 0       0 return ($self->export_layers($face, $face))->[$N] if $recipe eq 'Self';
9468 0 0       0 if ($recipe =~ /^Layers\((.*\+.*)\)$/) {
9469 0         0 my @L = split /\+/, "$1";
9470 0         0 return $L[$N];
9471             }
9472 0         0 my $N1 = $self->flip_layer_N($N, $#{ $self->{faces}{$face}{layers} });
  0         0  
9473 0 0       0 if ($recipe eq 'FlipLayersLinkFace') {
9474 0 0       0 my $L = $self->{faces}{$face}{LinkFace} or die "Face `$face' has no LinkFace";
9475 0         0 return ($self->export_layers($L, $face))->[$N1];
9476             }
9477             #warn "Doing flip/face via `$recipe', N=$N, N1=$N1, face=`$face'";
9478 0 0       0 return ($self->export_layers($face, $face))->[$N1] if $recipe eq 'FlipLayers';
9479             # my $gr_debug = ($recipe =~ /Greek__/);
9480 0         0 if (debug_PERL_dollar1_scoping) {
9481             return ($self->export_layers("$3", $face, !!$1))->[$2 ? $N : $N1]
9482             if $recipe =~ /^(Full)?(?:(Face)|FlipLayers)\((.*)\)$/;
9483             } else {
9484 0         0 my $m1; # Apparently, in perl5.10, if replace $m1 by $1 below, $1 loses its TRUE value between match and evaluation of $1
9485             # ($gr_debug and warn "Pseudo-layer `$recipe', face=`$face', N=$N, N1=$N1\n"),
9486 0 0 0     0 return ($self->export_layers("$3", $face, !!$1))->[$m1 ? $N : $N1]
    0          
9487             if $recipe =~ /^(Full)?(?:(Face)|FlipLayers)\((.*)\)$/ and ($m1 = $2, 1);
9488             }
9489 0 0       0 if ($recipe =~ /^prefix(NOTSAME(case)?)?=(.+)$/) { # `case´ unsupported
9490             # Analogue of NOID with the principal layers as reference, and layers of DeadKey as sources
9491 0         0 my($notsame, $case) = ($1,$2);
9492 0         0 my $hexPrefix = $self->key2hex($self->charhex2key($3));
9493 0         0 $self->ensure_DeadKey_Map($face, $hexPrefix);
9494 0 0       0 my $layers = $self->{faces}{$face}{'[deadkeyLayers]'}{$hexPrefix} or die "Unknown prefix character `$hexPrefix´ in layers-from-prefix-key";
9495 0 0 0     0 return $layers->[$N] if $N or not $notsame;
9496 0         0 my $name = "NOTSAME[$face]$layers->[$N]";
9497 0 0       0 return $self->{layers}{$name} if $self->{layers}{$name};
9498 0         0 my @LL = map $self->{layers}{$_}, @$layers;
9499 0         0 my $L0 = $self->{faces}{$face}{layers};
9500 0         0 my @L0 = map $self->{layers}{$_}, @$L0;
9501 0         0 my @OUT;
9502 0         0 for my $charN (0..$face->{'[non_VK]'}-1) {
9503 0         0 my (@L, %ss) = map $_->[$charN], @LL;
9504 0         0 for my $layers0 (map $_->[$charN], @$L0) {
9505 0         0 for my $sh (@$layers0) {
9506 0 0       0 $ss{ref($sh) ? $sh->[0] : $sh}++ if defined $sh;
    0          
9507             }
9508             }
9509 0         0 my(@CC, @pp, @OK);
9510 0 0 0     0 for my $l (@L[0 .. (($notsame && !$N) ? @{ $self->{faces}{$face}{layers} } - 1 : 0)]) {
  0         0  
9511 0         0 my(%s1, @was, @out);
9512 0         0 for my $sh (0..$#$l) { # $self->dead_with_inversion(!'hex', $_, $face, $self->{faces}{$face})
9513 0 0       0 my @C = map {defined() ? (ref() ? $_->[0] : $_) : $_} $l->[$sh];
  0 0       0  
9514 0 0       0 my @p = map {defined() ? (ref() ? $_->[2] : 0 ) : 0 } $l->[$sh];
  0 0       0  
9515 0 0 0     0 ($CC[$sh], $pp[$sh]) = ($C[0], $p[0]) if not defined $CC[$sh] and defined $C[0];
9516 0 0 0     0 ($CC[$sh], $pp[$sh], $OK[$sh], $s1{$C[0]}) = ($C[0], $p[0], 1,1) if !$OK[$sh] and defined $C[0] and not $ss{$C[0]};
      0        
9517             ($CC[$sh], $pp[$sh], $OK[$sh], $s1{$was[0]}) = (@was, 1,1) # use unshifted if needed
9518 0 0 0     0 if $sh and !$OK[$sh] and defined $C[0] and defined $was[0] and not $ss{$was[0]} and not $s1{$was[0]};
      0        
      0        
      0        
      0        
9519 0 0       0 @was = ($C[0], $p[0]) unless $sh; # may omit `unless´
9520             # $cnt++ if defined $CC[$sh];
9521             }
9522             }
9523 0         0 push @OUT, \@CC;
9524             }
9525 0         0 $self->{layers}{$name} = \@OUT;
9526 0         0 return $name;
9527             }
9528 0         0 die "Unrecognized Face recipe `$recipe'"
9529             }
9530            
9531             # my @LL = map $self->{layers}{'[ini_copy1]'}{$_} || $self->{layers}{'[ini_copy]'}{$_} || $self->{layers}{$_}, @$LL;
9532            
9533             # A stand-alone word is either LinkFace, or is interpreted as a name of
9534             # translation function applied to the current face.
9535             # A name which is an argument to a function is allowed to be a layer name
9536             # (but note that then both layers of the face will be mapped to that same
9537             # layer - unless one restricts the recipe to a particular layer 0/1 of the
9538             # face).
9539             # In particular: to specify a layer, use Id(LayerName).
9540             #use Dumpvalue;
9541             sub make_translated_layers ($$$$;$$) { # support Self/FlipLayers/LinkFace/FlipShift, stacking and maps
9542 0     0 0 0 my ($self, $recipe, $face, $NN, $deadkey, $noid, $append, $ARG) = (shift, shift, shift, shift, shift, shift, '');
9543             # XXX We can't cache created layer by name, since it depends on $recipe and $N too???
9544             # return $recipe if exists $self->{layers}{$recipe};
9545             # my $FACE = $recipe . join '===', '', @$NN, '';
9546             # return $self->{faces}{$FACE}{layers} if exists $self->{faces}{$FACE}{layers};
9547 0         0 while ($recipe =~ /^Shortcut\(([^()]+)\)$/) { # Same as Face(), but does not disable $deadkey; no caching...
9548 0 0       0 die "No face recipe for `$1' found" unless my $r = $self->{face_recipes}{$1};
9549 0         0 $recipe = $self->recipe2str($r);
9550 0         0 warn "Using face_recipes for `$1', base=$face ==> `$recipe'\n" if debug_face_layout_recipes;
9551             }
9552 0 0       0 my @parts = grep /\S/, $self->join_min_paren_brackets_matched('', split /(\s+)/, $recipe)
9553             or die "Whitespace face recipe `$recipe'?!";
9554 0 0       0 if (@parts > 1) {
9555             #warn "parts of the translation spec: <", join('> <', @parts), '>';
9556 0         0 my @layers = map $self->make_translated_layers($_, $face, $NN, $deadkey), @parts;
9557 0         0 warn "Stacking/NOID for layers `@parts'", (join "\n\t", '', map {join ' &&& ', @$_} @layers), "\n" if debug_noid or debug_stacking;
9558             #print "Stacking for `$recipe'\n" if $DEBUG;
9559             #Dumpvalue->new()->dumpValue(\@layers) if $DEBUG;
9560 0 0       0 return [$self->make_translated_layers_noid($noid eq 'NotSameKey', @layers)]
9561             if $noid;
9562 0         0 return [$self->make_translated_layers_stack(@layers)];
9563             }
9564 0 0       0 return [map $self->pseudo_layer($recipe, $face, $_), @$NN]
9565             if $recipe =~ /^(prefix(?:NOTSAME(?:case)?)?=.*|(FlipLayers)?LinkFace|FlipLayers|Self|((Full)?(Face|FlipLayers)|Layers)\([^()]+\))$/;
9566 0         0 $recipe =~ s/^(FlipShift)$/$1(Self)/;
9567 0 0       0 if ( $recipe =~ /\)$/ ) {
9568 0 0       0 if ( $recipe =~ /^[^(]*\[/ ) { # Tricky: allow () inside Func[](args)
9569 0         0 my $pos;
9570 0         0 while ( $recipe =~ /(?=\]\()/g ) {
9571 0 0       0 $pos = 1 + pos $recipe, last if $self->brackets_match_q(substr $recipe, 0, 1 + pos $recipe)
9572             }
9573 0 0       0 die "Can't parse `$recipe' as Func[Arg1](Arg2)" unless $pos;
9574 0         0 $ARG = substr $recipe, $pos + 1, length($recipe) - $pos - 2;
9575 0         0 $recipe = substr $recipe, 0, $pos;
9576             } else {
9577 0         0 my $o = $recipe;
9578 0 0       0 ($recipe, $ARG) = ($recipe =~ /^(.*?)\((.*)\)$/s) or warn "Can't parse recipe `$o'";
9579             }
9580             } else {
9581 0         0 $ARG = '';
9582             }
9583             #warn "Translation sub-spec: recipe = <$recipe>, ARG=<$ARG>";
9584 0 0       0 if ($recipe =~ /^If(Not)?Prefix\[(.*)\]$/s) { # No embedded \\]
9585 0         0 my $neg = $1;
9586 0         0 my @prefix = map $self->key2hex($self->charhex2key($_)), split /,/, "$2";
9587             ### warn "dk=<$deadkey> prefix=<@prefix>" if defined $deadkey;
9588 0 0 0     0 return $self->make_translated_layers($ARG, $face, $NN, $deadkey, $noid)
      0        
9589             if defined($deadkey) and ($neg xor grep $_ eq $deadkey, @prefix);
9590 0         0 ($recipe, $ARG) = ('Empty', [('Empty') x @$NN]);
9591             }
9592 0 0       0 if (length $ARG) {
9593 0 0       0 if (exists $self->{layers}{$ARG}) {
    0          
9594 0         0 $ARG = [($ARG) x @$NN];
9595             } elsif (!ref $ARG) {
9596 0 0       0 ($ARG = $self->layer_recipe($ARG)) =~ s/^\s+// if exists $self->{layer_recipes}{my $a = $ARG};
9597 0         0 warn "make_translated_layers: Using layout_recipe `$ARG' for layer '$a'\n" if debug_face_layout_recipes and exists $self->{layer_recipes}{$a};
9598 0         0 ($noid) = ($recipe =~ /^(NotId|NotSameKey)$/);
9599 0         0 $ARG = $self->make_translated_layers($ARG, $face, $NN, $deadkey, $noid);
9600 0 0       0 return $ARG if $noid;
9601             }
9602             } else {
9603 0         0 $ARG = [map $self->{faces}{$face}{layers}[$_], @$NN];
9604 0         0 $append = "#$face#";
9605             }
9606 0         0 [$self->make_translated_layers_tr($ARG, $recipe, $append, $deadkey, $face, $NN)]; # Either we saw (), or $recipe is not a face recipe!
9607             }
9608            
9609             sub massage_translated_layers ($$$$;$) {
9610 0     0 0 0 my ($self, $in, $face, $NN, $deadkey) = (shift, shift, shift, shift, shift, '');
9611             #warn "Massaging `$deadkey' for `$face':$N";
9612 0 0       0 return $in unless my $r = $self->get_deep($self, 'faces', (my @p = split m(/), $face), '[Diacritic_if_undef]');
9613 0         0 $r =~ s/^\s+//;
9614             #warn " -> end recipe `$r'";
9615 0         0 my $post = $self->make_translated_layers($r, $face, $NN, $deadkey);
9616 0         0 return [$self->make_translated_layers_stack($in, $post)];
9617             }
9618            
9619             sub default_char ($$) {
9620 0     0 0 0 my ($self, $F) = (shift, shift);
9621 0         0 my $default = $self->get_deep($self, 'faces', $F, '[DeadChar_DefaultTranslation]');
9622 0 0       0 $default =~ s/^\s+//, $default = $self->charhex2key($default) if defined $default;
9623 0         0 $default;
9624             }
9625            
9626             sub create_inverted_face ($$$$$) {
9627 0     0 0 0 my ($self, $F, $KK, $chain, $flip_AltGr) = (shift, shift, shift, shift, shift);
9628 0         0 my $H = $self->{faces}{$F};
9629 0         0 my $auto_chr = $H->{'[deadkeyInvAltGrKey]'}{$KK};
9630 0         0 my $new_facename = $H->{'[deadkeyFaceInvAltGr]'}{$auto_chr};
9631 0         0 my ($LL, %Map) = $H->{'[deadkeyLayers]'}{$KK};
9632 0 0       0 $LL = $H->{layers} if $KK eq '';
9633             %Map = ($flip_AltGr, [$chain->{$KK and $self->charhex2key($KK)}, undef, 1, 'AltGrInv-faces-chain'])
9634 0 0 0     0 if defined $flip_AltGr and defined $chain->{$KK and $self->charhex2key($KK)};
      0        
      0        
9635 0         0 $self->patch_face($LL, $new_facename, $H->{"[InvdeadkeyLayers]"}{$KK}, $KK, \%Map, $F, 'invert');
9636            
9637             # warn "Joining <$F>, <$new_facename>";
9638 0         0 $self->link_layers($F, $new_facename, 'skipfix', 'no-slot-warn');
9639 0 0 0     0 if ($KK eq '' and defined $flip_AltGr) {
9640 0         0 $H->{'[deadkeyFace]'}{$self->key2hex($flip_AltGr)} = $H->{'[deadkeyFaceInvAltGr]'}{$auto_chr};
9641             }
9642 0 0       0 if ($H->{"[InvdeadkeyLayers]"}{$KK}) { # There are overrides for the inverted face. Make a map for them...
9643             #warn "Overriding face for inverted `$KK' in face $F; new_facename=$new_facename";
9644 0         0 $H->{'[InvAltGrFace]'}{$KK} = "$new_facename\@override";
9645 0         0 $self->{faces}{"$new_facename\@override"}{layers} = $H->{"[InvdeadkeyLayers]"}{$KK};
9646 0         0 $self->link_layers($F, "$new_facename\@override", 'skipfix', 'no-slot-warn');
9647             }
9648 0         0 $new_facename;
9649             }
9650            
9651             sub auto_dead_can_wrap ($$) { # Call after all the manually set prefix key are already established, so one can avoid them
9652 0     0 0 0 my ($self, $F) = (shift, shift);
9653 0         0 $self->{faces}{$F}{'[ad_can_wrap]'}++
9654             }
9655            
9656             sub next_auto_dead ($$) {
9657 0     0 0 0 my ($self, $H, $o) = (shift, shift);
9658 0 0       0 unless ($H->{'[autodead_wrapped]'}) {
9659 0   0     0 1 while $H->{'[auto_dead]'}{ $o = $H->{'[first_auto_dead]'}++ }++ and ($o < 0x1000 or not $H->{'[ad_can_wrap]'}); # Bug in kbdutool
      0        
9660 0 0 0     0 $H->{'[first_auto_dead]'} = 0xa0 if $o >= 0x1000 and $H->{'[ad_can_wrap]'} and not $H->{'[autodead_wrapped]'}++;
      0        
9661             }
9662 0 0       0 if ($H->{'[autodead_wrapped]'}) { # This does not deal with manual assignment of inverted prefixes??? Inv_AltGr???
9663 0   0     0 1 while $H->{'[auto_dead]'}{ $o = $H->{'[first_auto_dead]'}++ }++ or $H->{'[deadkeyFaceHexMap]'}{$self->key2hex(chr $o)};
9664             # if ($o == 0x00a3) {
9665             # warn "$o: Keys HexMap: ", join ', ', sort keys %{$H->{'[deadkeyFaceHexMap]'}};
9666             # }
9667             }
9668 0         0 chr $o;
9669             }
9670            
9671             sub recipe2str ($$) {
9672 0     0 0 0 (undef, my $recipe) = (shift, shift);
9673 0 0       0 if ('ARRAY' eq ref $recipe) {
9674 0         0 $recipe = [@$recipe]; # deep copy
9675 0         0 s/\s+$//, s/^\s+// for @$recipe;
9676 0         0 s/(?
9677 0         0 $recipe = join '', @$recipe;
9678             }
9679 0         0 $recipe =~ s/^\s+//;
9680 0         0 $recipe
9681             }
9682            
9683             sub scan_for_DeadKey_Maps ($) { # Makes a direct-access synonym, scan for DeadKey_Maps* keys
9684 0     0 0 0 my ($self, %h, $expl) = (shift);
9685             #Dumpvalue->new()->dumpValue($self);
9686 0         0 my @F = grep m(^faces(/.*)?$), @{$self->{'[keys]'}};
  0         0  
9687 0         0 for my $FF (@F) {
9688 0         0 (my $F = $FF) =~ s(^faces/?)();
9689 0         0 my(@FF, @HH) = split m(/), $FF;
9690 0 0 0     0 next if @FF == 1 or $FF[-1] eq 'VK';
9691 0         0 my @FF1 = @FF;
9692 0         0 push(@HH, $self->get_deep($self, @FF1)), pop @FF1 while @FF1; # All the parents
9693 0         0 my $H = $HH[0];
9694 0 0       0 next if $H->{PartialFace};
9695 0 0 0     0 $self->{faces}{$F} = $H if $F =~ m(/) and exists $H->{layers}; # Make a direct-access copy
9696             #warn "Face section `${FF}'s parents: ", scalar @HH;
9697             #warn "Mismatch of hashes for `$FF'" unless $self->{faces}{$F} == $H;
9698            
9699             # warn "compositing: faces `$F'; -> <", (join '> <', %$H), ">";
9700 0         0 for my $HH (@HH) {
9701 0         0 for my $k ( keys %$HH ) {
9702             # warn "\t`$k' -> `$HH->{$k}'";
9703 0 0       0 next unless $k =~ m(^DeadKey_(Inv|Add)?Map([0-9a-f]{4,})?(?:_(\d+))?$)i;
9704             #warn "\t`$k' -> `$HH->{$k}'";
9705 0   0     0 my($inv, $key, $layers) = ($1 || '', $2, $3);
9706 0 0       0 $key = $self->key2hex($self->charhex2key($key)) if defined $key; # get rid of uc/lc hex problem
9707             # XXXX The problem is that the parent may define layers in different ways (_0,_1 or no); ignore it for now...
9708 0 0 0     0 $H->{'[DeadKey__Maps]'}{$key || ''}{$inv}{(defined $layers) ? $layers : 'All'} ||= $HH->{$k};
      0        
9709             }
9710             }
9711             }
9712             }
9713            
9714             #use Dumpvalue;
9715             sub ensure_DeadKey_Map_by_recipe ($$$$;$$) {
9716 0   0 0 0 0 my ($self, $F, $hexPrefix, $recipe, $layers, $inv) = (shift, shift, shift, shift, shift, shift || '');
9717 0         0 my $H = $self->{faces}{$F};
9718 0 0 0     0 return if $H->{"[${inv}deadkeyLayersCreated]"}{$hexPrefix}{$layers and "@$layers"}++;
9719             #Dumpvalue->new()->dumpValue($self);
9720 0         0 my $massage = !($recipe =~ s/\s+NoDefaultTranslation$//);
9721 0   0     0 $layers ||= [ 0 .. $#{$self->{faces}{$F}{layers}} ];
  0         0  
9722             #warn "Doing key `$hexPrefix' inv=`$inv' face=`$F', recipe=`$recipe'";
9723 0         0 my $new = $self->make_translated_layers($recipe, $F, $layers, $hexPrefix);
9724 0 0 0     0 $new = $self->massage_translated_layers($new, $F, $layers, $hexPrefix) if $massage and not $inv;
9725 0         0 for my $NN (0..$#$layers) { # Create a layer according to the spec
9726             #warn "DeadKey Layer for face=$F; layer=$layer, k=$k:\n\t$HH->{$k}, key=`", ($hexPrefix||''),"'\n\t\t";
9727             #$DEBUG = $hexPrefix eq '0192';
9728             #print "Doing key `$hexPrefix' face=$F layer=`$layer' recipe=`$recipe'\n" if $DEBUG;
9729             #Dumpvalue->new()->dumpValue($self->{layers}{$new}) if $DEBUG;
9730             #warn "new=<<<", join('>>> <<<', @$new),'>>>';
9731 0         0 $H->{"[${inv}deadkeyLayers]"}{$hexPrefix}[$layers->[$NN]] = $new->[$NN];
9732             #warn "Face `$F', layer=$layer key=$hexPrefix\t=> `$new'" if $H->{layers}[$layer] =~ /00a9/i;
9733             #Dumpvalue->new()->dumpValue($self->{layers}{$new}) if $self->charhex2key($hexPrefix) eq chr 0x00a9;
9734             }
9735             }
9736            
9737             sub ensure_DeadKey_Map ($$$;$) {
9738 0     0 0 0 my ($self, $F, $hexPrefix, $hexPrefixWas, %h, $expl) = (shift, shift, shift, shift);
9739 0 0       0 $hexPrefixWas = $hexPrefix unless defined $hexPrefixWas;
9740 0         0 my $H = $self->{faces}{$F};
9741 0         0 my $v0 = $H->{'[DeadKey__Maps]'}{$hexPrefixWas};
9742 0         0 for my $inv (keys %$v0) {
9743 0         0 my $v1 = $v0->{$inv};
9744 0 0 0     0 my $K = (($inv and "$inv $hexPrefix" eq "Inv 0000") ? '' : $hexPrefix);
9745 0         0 for my $layers (keys %$v1) {
9746 0         0 my $recipe = $self->recipe2str($v1->{$layers});
9747 0 0       0 $layers = ($layers eq 'All' ? '' : [$layers]);
9748 0         0 $self->ensure_DeadKey_Map_by_recipe($F, $K, $recipe, $layers, $inv);
9749             }
9750             }
9751             }
9752            
9753             sub create_DeadKey_Maps ($) {
9754 0     0 0 0 my ($self, %h, $expl) = (shift);
9755             #Dumpvalue->new()->dumpValue($self);
9756 0         0 for my $F (keys %{ $self->{faces} }) {
  0         0  
9757 0 0 0     0 next if 'HASH' ne ref $self->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
9758 0         0 my $H = $self->{faces}{$F};
9759 0         0 my $flip_AltGr = $H->{'[Flip_AltGr_Key]'};
9760 0 0       0 $flip_AltGr = (defined $flip_AltGr) ? $self->charhex2key($flip_AltGr) : 'N/A';
9761             # Treat first the specific maps (for one deadkey) then the deadkeys which were not seen via the universal map
9762 0         0 for my $key (keys %{$H->{'[DeadKey__Maps]'}}) {
  0         0  
9763 0         0 my $v0 = $H->{'[DeadKey__Maps]'}{$key};
9764             my @keys = (($key ne '')
9765             ? $key
9766 0   0     0 : (grep {not $H->{'[DeadKey__Maps]'}{$_} and not $H->{'[ComposeKeys]'}{$_}}
9767 0 0       0 map $self->key2hex($_), grep $_ ne $flip_AltGr, keys %{ $H->{'[DEAD]'} }));
  0         0  
9768 0         0 $self->ensure_DeadKey_Map($F, $_, $key) for @keys;
9769             }
9770             }
9771             }
9772            
9773             #use Dumpvalue;
9774             sub create_composite_layers ($) {
9775 0     0 0 0 my ($self, %h, $expl) = (shift);
9776             #Dumpvalue->new()->dumpValue($self);
9777 0         0 for my $F (keys %{ $self->{faces} }) {
  0         0  
9778 0 0 0     0 next if 'HASH' ne ref $self->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
9779 0         0 my $H = $self->{faces}{$F};
9780 0 0       0 next if $H->{PartialFace};
9781 0 0       0 next unless $H->{'[deadkeyLayers]'}; # Are we in a no-nonsense Face-hash with defined deadkeys?
9782             #warn "Face: <", join( '> <', %$H), ">";
9783 0         0 my $layerL = @{ $self->{layers}{ $H->{layers}[0] } }; # number of keys in the face (in the principal layer)
  0         0  
9784 0         0 my $first_auto_dead = $H->{'[Auto_Diacritic_Start]'};
9785 0 0       0 $H->{'[first_auto_dead]'} = ord $self->charhex2key($first_auto_dead) if defined $first_auto_dead;
9786 0         0 for my $KK (sort keys %{$H->{'[deadkeyLayers]'}}) { # Given a deadkey: join layers into a face, and link to it
  0         0  
9787 0         0 for my $layer ( 0 .. $#{ $H->{layers} } ) {
  0         0  
9788             #warn "Checking for empty layers, Face `$face', layer=$layer key=$KK";
9789             $self->{layers}{"[empty$layerL]"} ||= [map[], 1..$layerL], $H->{'[deadkeyLayers]'}{$KK}[$layer] = "[empty$layerL]"
9790 0 0 0     0 unless defined $H->{'[deadkeyLayers]'}{$KK}[$layer]
9791             }
9792             # Join the syntetic layers (now well-formed) into a new synthetic face:
9793 0         0 my $new_facename = "$F###$KK";
9794 0         0 $self->{faces}{$new_facename}{layers} = $H->{'[deadkeyLayers]'}{$KK};
9795 0         0 $H->{'[deadkeyFace]'}{$KK} = $new_facename;
9796             #warn "Joining <$F>, <$new_facename>";
9797             # $self->link_layers($F, $new_facename, 'skipfix', 'no-slot-warn'); # Now moved to link_composite_layers
9798             }
9799             }
9800             $self
9801 0         0 }
9802            
9803             sub create_prefix_chains ($) {
9804 0     0 0 0 my ($self, %h, $expl) = (shift);
9805 0         0 my @F = grep m(^faces(/.*)?$), @{$self->{'[keys]'}};
  0         0  
9806 0         0 for my $FF (@F) {
9807 0         0 (my $F = $FF) =~ s(^faces/?)();
9808 0         0 my(@FF, @HH) = split m(/), $FF;
9809 0 0 0     0 next if @FF == 1 or $FF[-1] eq 'VK';
9810 0         0 push(@HH, $self->get_deep($self, @FF)), pop @FF while @FF;
9811 0         0 my($H, %KK) = $HH[0];
9812 0 0       0 for my $chain ( @{ $H->{'[PrefixChains]'} || [] } ) {
  0         0  
9813 0         0 (my $c = $chain) =~ s/^\s+//;
9814 0 0       0 my @prefix = map { $_ and $self->charhex2key($_) } split /,/, $c, -1; # trailing empty means all are prefixes
  0         0  
9815 0 0       0 length(my $trail_nonprefix = $prefix[-1]) or pop @prefix;
9816 0         0 my $start = shift @prefix;
9817 0 0       0 warn "PrefixChain for `$start' in font `$F' is empty" unless @prefix > 1;
9818 0         0 for my $Kn (1..$#prefix) {
9819 0         0 my($from, $to) = @prefix[$Kn-1, $Kn];
9820 0   0     0 $KK{$from}{$start} = [$to, undef, $Kn != $#prefix || !$trail_nonprefix, 'PrefixChains'];
9821             }
9822             }
9823 0         0 for my $K (keys %KK) {
9824 0         0 my $KK = $self->key2hex($K);
9825 0 0       0 die "Key `$KK=$K' in PrefixChain for font=`$F' is not a prefix" unless my $KF = $H->{'[deadkeyFace]'}{$KK};
9826 0         0 my $new_facename = "$F*==>*Chain*$KK";
9827 0         0 my $LL = $H->{'[deadkeyLayers]'}{$KK};
9828 0         0 $self->patch_face($LL, $new_facename, undef, "chain-in-$KK", $KK{$K}, $F, !'invert');
9829 0         0 $H->{'[deadkeyFace]'}{$KK} = $new_facename;
9830 0         0 $H->{'[deadkeyLayers]'}{$KK} = $self->{faces}{$new_facename}{layers};
9831 0         0 $self->coverage_face0($new_facename, 'after import');
9832             }
9833             }
9834             $self
9835 0         0 }
9836            
9837             sub link_composite_layers ($) { # as above, but finish
9838 0     0 0 0 my ($self, %h, $expl) = (shift);
9839 0         0 my @F = grep m(^faces(/.*)?$), @{$self->{'[keys]'}};
  0         0  
9840 0         0 for my $FF (@F) {
9841 0         0 (my $F = $FF) =~ s(^faces/?)();
9842 0         0 my(@FF, @HH) = split m(/), $FF;
9843 0 0 0     0 next if @FF == 1 or $FF[-1] eq 'VK';
9844 0         0 push(@HH, $self->get_deep($self, @FF)), pop @FF while @FF;
9845 0         0 my $H = $HH[0];
9846 0         0 for my $new_facename (values %{$H->{'[deadkeyFace]'}}) {
  0         0  
9847             #warn "Joining <$F>, <$new_facename>";
9848 0         0 $self->link_layers($F, $new_facename, 'skipfix', 'no-slot-warn');
9849             }
9850             }
9851             $self
9852 0         0 }
9853            
9854             sub create_inverted_faces ($) {
9855 0     0 0 0 my ($self) = (shift);
9856             #Dumpvalue->new()->dumpValue($self);
9857 0         0 for my $F (keys %{$self->{faces} }) {
  0         0  
9858 0 0 0     0 next if 'HASH' ne ref $self->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
9859 0         0 my $H = $self->{faces}{$F};
9860 0 0       0 next unless $H->{'[deadkeyLayers]'}; # Are we in a no-nonsense Face-hash with defined deadkeys?
9861 0   0     0 my $expl = $H->{'[Explicit_AltGr_Invert]'} || [];
9862 0 0       0 $expl = [], warn "Odd number of elements of Explicit_AltGr_Invert in face $F, ignore" if @$expl % 2;
9863 0         0 $expl = {map $self->charhex2key($_), @$expl};
9864            
9865             #warn "Face: <", join( '> <', %$H), ">";
9866 0         0 my $layerL = @{ $self->{layers}{ $H->{layers}[0] } }; # number of keys in the face (in the principal layer)
  0         0  
9867 0         0 for my $KK (sort keys %{$H->{'[deadkeyLayers]'}}) { # Create AltGr-inverted face if there is at least one key in the AltGr face:
  0         0  
9868 0         0 my $LL = $H->{'[deadkeyLayers]'}{$KK};
9869             # To check that a key is defined, we do not care about whether a shift-state is encoded as a string, or as an array:
9870 0 0 0     0 next unless defined $H->{'[first_auto_dead]'} and grep defined, map $self->flatten_arrays($_), map $self->{layers}{$_}, @$LL[1..$#$LL];
9871 0 0       0 $H->{'[deadkeyInvAltGrKey]'}{''} = $self->next_auto_dead($H) unless exists $H->{'[deadkeyInvAltGrKey]'}{''}; # Prefix key for principal invertred face
9872             my $auto_chr = $H->{'[deadkeyInvAltGrKey]'}{$KK} =
9873 0 0       0 ((exists $expl->{$self->charhex2key($KK)}) ? $expl->{$self->charhex2key($KK)} : $self->next_auto_dead($H));
9874 0         0 $H->{'[deadkeyFaceInvAltGr]'}{$auto_chr} = "$F##Inv#$KK";
9875 0         0 $self->{faces}{ $H->{'[deadkeyFace]'}{$KK} }{'[invAltGr_Accessor]'} = $auto_chr;
9876             }
9877 0 0       0 next unless defined (my $flip_AltGr = $H->{'[Flip_AltGr_Key]'});
9878 0         0 $flip_AltGr = $self->charhex2key($flip_AltGr);
9879 0 0       0 $H->{'[deadkeyFaceInvAltGr]'}{ $H->{'[deadkeyInvAltGrKey]'}{''} } = "$F##Inv#" if exists $H->{'[deadkeyInvAltGrKey]'}{''};
9880 0         0 my ($prev, %chain) = '';
9881 0 0       0 for my $k ( @{ $H->{chainAltGr} || [] }) {
  0         0  
9882 0         0 my $K = $self->charhex2key($k);
9883 0         0 my $KK = $self->key2hex($K);
9884             warn("Deadkey ` $K ' of face $F has no associated AltGr-inverted face"), next
9885 0 0       0 unless exists $H->{'[deadkeyInvAltGrKey]'}{$KK};
9886 0         0 $chain{$prev} = $H->{'[deadkeyInvAltGrKey]'}{$KK};
9887             #warn "chain `$prev' --> `$K' => $H->{'[deadkeyInvAltGrKey]'}{$KK}";
9888             # $H->{'[dead2_AltGr_chain]'}{(length $prev) ? $self->key2hex($prev) : ''}++;
9889 0         0 $prev = $K;
9890             }
9891 0 0       0 $H->{'[have_AltGr_chain]'} = 1 if length $prev;
9892 0         0 for my $KK (keys %{$H->{'[deadkeyInvAltGrKey]'}}) { # Now know which deadkeys take inversion, and via what prefix
  0         0  
9893 0         0 my $new = $self->create_inverted_face($F, $KK, \%chain, $flip_AltGr);
9894 0         0 $self->coverage_face0($new);
9895             }
9896             # We do not link the AltGr-inverted faces to the "parent" faces here. Currently, it should be done when
9897             # outputting a kbd description...
9898             }
9899             $self
9900 0         0 }
9901            
9902             #use Dumpvalue;
9903             sub patch_face ($$$$$$$;$) { # flip layers paying attention to linked AltGr-inverted faces, and overrides
9904 0     0 0 0 my ($self, $LL, $newname, $prefix, $mapId, $Map, $face, $inv, @K) = (shift, shift, shift, shift, shift, shift, shift, shift);
9905 0 0       0 if (%$Map) { # Borrow from make_translated_layer_tr()
9906 0 0   0   0 my $Tr = sub ($) { my $c = shift; defined $c or return $c; $c = $c->[0] if ref $c; my $o = $Map->{$c} ;
  0 0       0  
  0         0  
  0         0  
9907             #warn "Tr: `$c' --> `$o'" if defined $o;
9908             #$o
9909 0         0 };
9910 0         0 $Tr = $self->depth1_A_translator($Tr);
9911 0         0 my $LLL = $self->{faces}{$face}{layers};
9912 0 0       0 my $mod_name = ($inv ? 'AltGr' : '');
9913 0         0 for my $n (0..$#$LL) { # Layer number
9914 0         0 my $new_Name = "$face##Chain$mod_name#$n.." . $mapId;
9915             #warn "AltGr-chaining: name=$new_Name; `$chainKey' => `$nextL'";
9916 0   0     0 $self->{layers}{$new_Name} ||= [ map $Tr->($_), @{ $self->{layers}{ $LLL->[$n] } }];
  0         0  
9917 0         0 push @K, $new_Name;
9918             }
9919             }
9920 0 0       0 my @prefix = $prefix ? $prefix : ();
9921 0         0 my @n1 = (0..$#$LL);
9922 0 0       0 @n1 = map $self->flip_layer_N($_, $#$LL), @n1 if $inv;
9923 0         0 my @invLL = @$LL[@n1];
9924 0 0       0 push @prefix, \@K if @K;
9925 0         0 $self->{faces}{$newname}{layers} = [$self->make_translated_layers_stack(@prefix, \@invLL)];
9926             }
9927            
9928             # use Dumpvalue;
9929             my %subst_Shift = qw( -- - -S S t- t tS T ); # There is no space for 8 MODs, so we contract tS into T
9930             sub fmt_bitmap_mods ($$$;$) {
9931 0     0 0 0 my ($self, $b, $col, $short, @b) = (shift, shift, shift, shift, qw(Shift Ctrl Alt Kana Roya Loya Z t));
9932 0 0       0 my ($j, $empty, @ind) = ($short ? ('', '-', 1..$#b, 0) : ("\t", '', 0..$#b)); # better have Shift at end (Ctrl-Alt-Shift)...
9933 0 0       0 my $O = join $j, map {($b & (1<<$_)) ? ($short ? substr $b[$_], 0, 1 : $b[$_]) : $empty} @ind;
  0 0       0  
9934 0 0       0 $O =~ s/(..)$/$subst_Shift{$1}/ if $short;
9935 0         0 $O =~ s/\t+$//;
9936 0 0       0 $O = 'Invalid' if $col == 15;
9937 0         0 $O
9938             }
9939            
9940             sub BaseKeys ($$) {
9941 0     0 0 0 my($self, $K) = (shift, shift);
9942 0         0 my $F = $self->get_deep($self, @$K); # Presumably a face hash, as in $K = [qw(faces US)]
9943 0 0       0 return $F->{baseKeysWin} if $F->{baseKeysWin};
9944 0         0 my $cnt = $F->{'[non_VK]'};
9945 0         0 my $b = $F->{BaseLayer};
9946 0         0 my $layers = $F->{layers};
9947 0 0 0     0 $b = $self->make_translated_layers($b, $K->[-1], [0])->[0] if defined $b and not $self->{layers}{$b};
9948 0 0       0 my $basesub = [((defined $b) ? $b : ()), $F->{layers}[0]];
9949 0         0 my $max = -1;
9950 0   0     0 $max < $#{$self->{layers}{$_}} and $max = $#{$self->{layers}{$_}} for @$basesub;
  0         0  
  0         0  
9951 0   0     0 $max < $_->[0] + $_->[1] and $max = $_->[0] + $_->[1] for values %start_SEC;
9952             # warn "Basekeys: max=$max; cnt=$cnt";
9953 0         0 my(@o, @oo);
9954             #
9955             # warn("base: max=$max cnt=$cnt");
9956 0         0 for my $u (0..$max) {
9957 0         0 my $c = $self->base_unit($basesub, $u, $u >= $cnt); # [0 || 1 (in_main_island), VK, raw]
9958 0         0 my($k, $kk) = ($c->[1], $c->[2]); # uc(With prepended #), orig (or undef if not array)
9959 0 0       0 if (!$c->[0]) { # Main island of keyboard
9960 0 0 0     0 $k = $oem_keys{$k} or warn("Can't find a key with VKEY `$c', unit=$u, lim=$cnt"), return
9961             unless $k =~ /^[A-Z0-9]$/;
9962             } else {
9963 0         0 my $U = [map $self->{layers}{$_}[$u], @$layers];
9964 0         0 my $keys = grep defined, map $self->flatten_arrays($_->[$u]), @$U;
9965 0 0 0     0 $keys and warn "Can't find the range of keys to which unit `$u' belongs (max=$max; cnt=$cnt)" unless defined $k;
9966 0         0 $kk = $k;
9967             }
9968 0         0 push @o, $k;
9969 0         0 push @oo, $kk;
9970             }
9971 0         0 my $o = $F->{'[VK_off]'};
9972 0         0 for my $b (\@o, \@oo) { # Explicitly add via-VK keys
9973 0         0 for my $vk (keys %$o) {
9974 0 0       0 warn "[@$K]: $vk defined on \@$o->{$vk} as $b->[$o->{$vk}]" if defined $b->[$o->{$vk}];
9975 0 0       0 $b->[$o->{$vk}] = $vk unless defined $b->[$o->{$vk}];
9976             # warn "[@$K]: $vk \@ $o->{$vk}"; # SPACE @ 116 (on izKeys)
9977             }
9978             }
9979             # warn "BaseKeys: @o";
9980 0         0 $F->{baseKeysRaw} = \@oo;
9981 0         0 $F->{baseKeysWin} = \@o;
9982             }
9983            
9984            
9985             sub fill_win_template ($$$;$$) {
9986 0     0 0 0 my @K = qw( COMPANYNAME LAYOUTNAME COPYR_YEARS LOCALE_NAME LOCALE_ID DLLNAME SORT_ORDER_ID_ LANGUAGE_NAME );
9987 0         0 my ($self, $t, $k, $dummy, $dummyDscr, %h) = (shift, shift, shift, shift, shift);
9988 0         0 $self->reset_units;
9989 0         0 my $B = $self->BaseKeys($k);
9990             # Dumpvalue->new()->dumpValue($self);
9991 0         0 my $idx = $self->get_deep($self, @$k, 'MetaData_Index');
9992 0         0 $h{$_} = $self->get_deep_via_parents($self, $idx, @$k, $_) for @K;
9993 0 0       0 $h{LAYOUTNAME} = "KBD Layout $h{DLLNAME}" if $dummyDscr; # error "the required resource DATABASE is missing" from setup.exe
9994 0         0 my $LLL = length($h{LAYOUTNAME}) + grep ord >= 0x10000, split //, $h{LAYOUTNAME};
9995 0 0       0 warn "The DESCRIPTION of the layout [@$k] is longer than 63 chars;\n the name shown in LanguageBar/Settings may be empty"
9996             if $LLL > 63;
9997 0         0 $h{LAYOUTNAME} =~ s/([\\""])/\\$1/g; # C-like syntax (directly copied to resource files???)
9998             # warn "Translate: ", %h;
9999 0         0 my $F = $self->get_deep($self, @$k); # Presumably a face hash, as in $k = [qw(faces US)]
10000 0         0 $F->{'[dead-used]'} = [map {}, @{$F->{layers}}]; # Which of deadkeys are reachable on the keyboard
  0         0  
10001 0         0 my $cnt = $F->{'[non_VK]'};
10002 0 0       0 if (grep $F->{"[$_]"}, qw(LRM_RLM ALTGR SHIFTLOCK)) {
10003 0         0 $h{ATTRIBS} = (join "\n ", "\nATTRIBUTES", grep $F->{"[$_]"}, qw(LRM_RLM ALTGR SHIFTLOCK)) . "\n" ;
10004             } else {
10005 0         0 $h{ATTRIBS} = ''; # default
10006             }
10007 0 0       0 if ($dummy) {
10008 0         0 @h{qw(DO_LIGA COL_HEADERS COL_EXPL KEYNAMES_DEAD DEADKEYS)} = ('') x 5;
10009 0         0 @h{qw(LAYOUT_KEYS BITS_TEMPLATE)} = (<
10010             10 Q 0 q -1 -1 // LATIN SMALL LETTER Q, ,
10011             EOT
10012             0 // Column 4 :
10013             1 // Column 5 : Shift
10014             2 // Column 6 : Ctrl
10015             3 // Column 7 : Shift Ctrl
10016             6 // Column 12 : Ctrl Alt t
10017             7 // Column 13 : Shift Ctrl Alt t
10018             EOT
10019             } else {
10020 0         0 $h{LAYOUT_KEYS} = join '', $self->output_layout_win($k->[-1], $F->{layers}, $F->{'[dead]'}, $F->{'[dead-used]'}, $cnt, $B);
10021             # $h{LAYOUT_KEYS} .= join '', $self->output_VK_win($k->[-1], $F->{'[dead-used]'});
10022 0         0 $h{LAYOUT_KEYS} .= join '', $self->output_added_units();
10023            
10024 0         0 $h{DO_LIGA} = join '', $self->output_ligatures();
10025 0 0       0 $h{DO_LIGA} = <
10026            
10027             LIGATURE
10028            
10029             // VK_ ModCol# Char0 Char1 Char2 Char3
10030             // --------- ------- ----- ----- ----- -----
10031            
10032            
10033             EOPREF
10034            
10035             ### Deadkeys??? need_extra_keys_to_access???
10036 0         0 my ($OUT, $OUT_NAMES) = ('', "KEYNAME_DEAD\n\n");
10037            
10038 0         0 my $f = $self->get_AgeList;
10039 0 0 0     0 $self->load_uniage($f) if defined $f and not $self->{Age};
10040            
10041 0         0 my($flip_AltGr_hex, %nn) = $F->{'[Flip_AltGr_Key]'};
10042 0 0       0 $flip_AltGr_hex = $self->key2hex($self->charhex2key($flip_AltGr_hex)) if defined $flip_AltGr_hex;
10043 0         0 for my $deadKey ( sort keys %{ $F->{'[deadkeyFaceHexMap]'} } ) {
  0         0  
10044 0 0       0 next if $F->{'[only_extra]'}{$self->charhex2key($deadKey)};
10045 0         0 my $auto_inv_AltGr = $F->{'[deadkeyInvAltGrKey]'}{$deadKey};
10046 0 0       0 $auto_inv_AltGr = $self->key2hex($auto_inv_AltGr) if defined $auto_inv_AltGr;
10047             #warn "flipkey=$flip_AltGr_hex, dead=$deadKey" if defined $flip_AltGr_hex;
10048 0         0 (my $nonempty, my $MAP) = $self->output_deadkeys($k->[-1], $deadKey, $F->{'[dead2]'}, $flip_AltGr_hex, $auto_inv_AltGr);
10049 0         0 $OUT .= "$MAP\n";
10050 0 0       0 my @K = ($deadKey, ($auto_inv_AltGr ? $auto_inv_AltGr : ()));
10051 0   0     0 my @N = map $self->{DEADKEYS}{$_} || $self->{'[seen_knames]'}{chr hex $_} || $F->{'[prefixDocs]'}{$_} || $self->UName($_), @K;
10052 0         0 s/(?=[""\\])/\\/g for @N;
10053             # if (defined $N and length $N) {
10054 0         0 $nn{$K[$_]} = $N[$_] for 0..$#K;
10055             # }# else { warn "DeadKey `$deadKey' for face `@$k' has no name associated" }
10056             }
10057             # Apparently, if the name table is too long, the keyboard is not activatable (installs OK on Win7_64,
10058             # is in Settings' list, but is not in the panel's list). Omit the multiple-Compose entries as a workaround...
10059 0   0     0 $nn{$_} =~ /\bCompose\s+(Compose\b|(?!key)\S+)/ or $OUT_NAMES .= qq($_\t"$nn{$_}"\n) for sort keys %nn;
10060             #warn "Translate: ", %h;
10061 0         0 $h{DEADKEYS} = $OUT;
10062 0         0 $h{KEYNAMES_DEAD} = $OUT_NAMES;
10063 0         0 my %mods = qw( S 1 C 2 A 4 K 8 X 16 Y 32 Z 64 T 128 R 16 L 32);
10064 0         0 $_ += 0 for values %mods; # Convert to numbers, so | works as expected
10065 0         0 my @cols;
10066 0 0       0 my %tr_mods_keys = ( @{ $F->{'[mods_keys_KBD]'} || [qw(rA CA)] } );
  0         0  
10067 0   0     0 my $mods_keys = $F->{'[layers_mods_keys]'} || ['', 'rA'];
10068 0   0     0 my $mods = $F->{'[layers_modifiers]'} || []; # || ['', 'CA']; # Plain, and Control-Alt
10069 0 0       0 $#$mods = $#$mods_keys if $#$mods < $#$mods_keys;
10070 0         0 for my $MOD ( @$mods ) {
10071 0         0 my $mask = 0;
10072 0 0       0 my $mod = ((defined $MOD) ? $MOD : ''); # Copy
10073 0 0       0 unless ($mod =~ /\S/) {
10074 0         0 my @K = grep /./, split /(?<=[A-Z])(?=[rl]?[A-Z])/, $mods_keys->[scalar @cols];
10075             #warn "cols=(@cols), K=(@K)\n";
10076 0         0 $mod = join '', map $tr_mods_keys{$_}, @K;
10077             }
10078 0         0 $mask |= $mods{$_} for split //, $mod;
10079 0         0 push @cols, $mask;
10080             }
10081 0         0 @cols = map {($_, $_ | $mods{S})} @cols; # Add shift
  0         0  
10082            
10083 0   0     0 my($ctrl_f,$ctrl_F) = ($mods{C}, $tr_mods_keys{lC} || $tr_mods_keys{C} || $tr_mods_keys{rC} || 'C'); # Prefer left-Ctrl
10084             # $ctrl_f |= $mods{$_} for split //, $ctrl_F; # kbdutool complains if there is no column for 'C'
10085            
10086 0         0 my $pre_ctrl = $self->get_deep($self, @$k, '[ctrl_after_modcol]');
10087 0 0       0 $pre_ctrl = 2*$ctrl_after unless defined $pre_ctrl;
10088 0         0 my $create_a_c = $self->get_deep($self, @$k, '[create_alpha_ctrl]');
10089 0 0       0 $create_a_c = $create_alpha_ctrl unless defined $create_a_c;
10090 0 0       0 splice @cols, $pre_ctrl, 0, $ctrl_f, ($create_a_c>1 ? $ctrl_f|$mods{S} : ()); # Control (and maybe Control-Shift)
10091 0 0       0 splice @cols, 15, 0, $mods{A} if @cols >= 16; # col=15 is the fake one; assigning it to Alt is the best palliative to fixing MSKLC
10092 0         0 $h{COL_HEADERS} = join "\t", map sprintf('%-3d[%d]', $cols[$_], $_), 0..$#cols;
10093 0         0 $h{COL_EXPL} = join "\t", map $self->fmt_bitmap_mods($cols[$_], $_, 'short'), 0..$#cols;
10094 0         0 $h{BITS_TEMPLATE} = join "\n", map { "$cols[$_]\t// Column " . (4+$_) . " :\t" . $self->fmt_bitmap_mods($cols[$_], $_) } 0..$#cols;
  0         0  
10095             # $h{BITS_TEMPLATE} =~ s(^(?=.*\bInvalid$))(#)m; # XXX Actually, MSKLC is not ignoring the leading #
10096             }
10097 0         0 $self->massage_template($template_win, \%h);
10098             }
10099            
10100             sub AppleMap_i_j ($$$$$;$$$$);
10101             sub AppleMap_prefix ($$;$$$$$$);
10102            
10103             # https://developer.apple.com/library/mac/technotes/tn2056/_index.html
10104             sub fill_osx_template ($$) {
10105 0     0 0 0 my @K = qw( OSX_LAYOUTNAME LAYOUTNAME OSX_ID OSX_ADD_VERSION OSX_DUP_KEYS COPYR_YEARS COMPANYNAME );
10106 0         0 my ($self, $k, %h, %ids) = (shift, shift);
10107 0         0 $self->reset_units;
10108 0         0 my $B = $self->BaseKeys($k);
10109             # Dumpvalue->new()->dumpValue($self);
10110 0         0 my $idx = $self->get_deep($self, @$k, 'MetaData_Index');
10111 0         0 $h{$_} = $self->get_deep_via_parents($self, $idx, @$k, $_) for @K;
10112            
10113 0   0     0 $h{OSX_LAYOUTNAME} ||= $h{LAYOUTNAME};
10114 0         0 delete $h{LAYOUTNAME};
10115 0 0       0 $h{OSX_ID} = -17 unless defined $h{OSX_ID}; # (Arbitrary) Negative number
10116 0         0 my $v = $self->{VERSION};
10117 0 0 0     0 if (defined $v and defined $h{OSX_ADD_VERSION}) {
10118 0 0       0 if ($h{OSX_ADD_VERSION} > 0) {
    0          
    0          
10119 0         0 my $c = $h{OSX_ADD_VERSION} - 1;
10120 0         0 $h{OSX_LAYOUTNAME} =~ s/^(\s*(\S+($|\s+)){$c}\S+)(?!\S)/$1 v$v/;
10121             } elsif ($h{OSX_ADD_VERSION} < -1) {
10122 0         0 my $c = -$h{OSX_ADD_VERSION} - 2;
10123 0         0 $h{OSX_LAYOUTNAME} =~ s/((?
10124             } elsif ($h{OSX_ADD_VERSION} == -1) {
10125 0         0 $h{OSX_LAYOUTNAME} =~ s/\z/ v$v/;
10126             } else {
10127 0         0 $h{OSX_LAYOUTNAME} =~ s/^/v$v /;
10128             }
10129             }
10130 0         0 delete $h{OSX_ADD_VERSION};
10131 0         0 my $dupk = delete $h{OSX_DUP_KEYS};
10132 0 0       0 $dupk = {@$dupk} if $dupk;
10133            
10134             # OSX_CREATOR version OSX_CREATOR_VERSION on OSX_EDIT_DATE
10135 0         0 my $file = $self->{'[file]'};
10136 0   0     0 my $app = (defined $file and @$file > 1 and 's');
10137 0 0       0 $file = (defined $file) ? "keyboard layout file$app @$file" : 'string descriptor';
10138 0 0       0 $file .= " version $v" if defined $v;
10139 0 0       0 $file .= " Unicode tables version $self->{uniVersion}" if defined $self->{uniVersion};
10140 0         0 $h{OSX_CREATOR} = "UI::KeyboardLayout";
10141 0         0 $h{OSX_CREATOR_VERSION} = "$UI::KeyboardLayout::VERSION with $file";
10142 0         0 my @t = (gmtime)[5,4,3,2,1,0];
10143 0         0 $t[0] += 1900; $t[1]++;
  0         0  
10144 0         0 $h{OSX_EDIT_DATE} = sprintf '%d-%02d-%02d at %d:%02d:%02d GMT', @t;
10145            
10146 0         0 my $F = $self->get_deep($self, @$k);
10147 0         0 my($flip_AltGr_hex, %nn) = $F->{'[Flip_AltGr_Key]'};
10148 0 0       0 $flip_AltGr_hex = $self->key2hex($self->charhex2key($flip_AltGr_hex)) if defined $flip_AltGr_hex;
10149 0         0 my %map; # Indexed by hex (??? What about UTF-16???)
10150 0         0 for my $deadKey ( sort keys %{ $F->{'[deadkeyFaceHexMap]'} } ) {
  0         0  
10151 0 0       0 next if $F->{'[only_extra]'}{$self->charhex2key($deadKey)};
10152 0         0 my $auto_inv_AltGr = $F->{'[deadkeyInvAltGrKey]'}{$deadKey};
10153 0 0       0 $auto_inv_AltGr = $self->key2hex($auto_inv_AltGr) if defined $auto_inv_AltGr;
10154             #warn "flipkey=$flip_AltGr_hex, dead=$deadKey" if defined $flip_AltGr_hex;
10155 0         0 $self->output_deadkeys($k->[-1], $deadKey, $F->{'[dead2]'}, $flip_AltGr_hex, $auto_inv_AltGr, \%map);
10156             }
10157            
10158 0         0 my %how = qw( OSX_KEYMAP_0_AND_COMMAND 0;0;0
10159             OSX_KEYMAP_SHIFT 0;1;0
10160             OSX_KEYMAP_CAPS 0;0;1
10161             OSX_KEYMAP_OPTION 1;0;0
10162             OSX_KEYMAP_OPTION_SHIFT 1;1;0
10163             OSX_KEYMAP_OPTION_CAPS 1;0;1
10164             OSX_KEYMAP_OPTION_COMMAND 1;0;0
10165             OSX_KEYMAP_CTRL 0;0;0;-1
10166             OSX_KEYMAP_COMMAND 0;0;0;1
10167             ); # In US Extended, OPT-CMD is the same as OPT
10168             # OSX_KEYMAP_COMMAND_AS_BASE 0;0;0;0
10169 0   0     0 my($OVERR, $ov) = $F->{'[Apple_Override]'} || [];
10170 0         0 for my $o (@$OVERR) {
10171 0         0 my($K, $dead, $out) = split /,/, $o, 3;
10172 0 0       0 if ($out =~ /^hex[46]\z/) {
10173 0         0 $out = ['lit', $out]
10174             } else {
10175 0         0 $out = [0, $self->stringHEX2string($out)]
10176             }
10177 0         0 $ov->{$K} = [$out->[1], undef, $dead, $out->[0]];
10178             }
10179 0   0     0 my $DUP = $F->{'[Apple_Duplicate]'} || [0x6e, 10, 0x47, 10, 0x66, 49, 0x68, 49]; # Mnu => ISO, KP_Clear => ISO, L/R-SPace => Space
10180 0         0 $ov->{dup} = {@$DUP};
10181 0         0 $ov->{extra_actions} = {};
10182 0         0 for my $m (keys %how) {
10183 0         0 my($l, $shift, $capsl, $use_base) = split /;/, $how{$m};
10184 0         0 $h{$m} = $self->AppleMap_i_j ($k, $l, $shift, $capsl, $use_base, \%ids, \%map, $ov);
10185             }
10186             # warn "Need separate OSX_KEYMAP_COMMAND for k=$k\n" unless $h{OSX_KEYMAP_COMMAND} eq $h{OSX_KEYMAP_0_AND_COMMAND};
10187             # my $vbell = $self->get_deep_via_parents($self, undef, @$k, '[DeadChar_DefaultTranslation]');
10188             # $vbell =~ s/^\s+(?=.)//, $vbell = $self->charhex2key($vbell) if defined $vbell;
10189             # undef $vbell; # Terminators are used as visual feedback when prefix is pressed!
10190 0         0 my($S, %act) = $F->{'[Show]'};
10191 0         0 @h{qw(OSX_ACTIONS_BASE OSX_ACTIONS OSX_TERMINATORS_BASE OSX_TERMINATORS2)}
10192             = map +($self->AppleMap_prefix(\%ids, 'do_initiating', $_, \%map, $S, $ov, \%act),
10193             $self->AppleMap_prefix(\%ids, !'do_initiating', $_, \%map, $S, $ov, \%act)), '', 'term';
10194            
10195 0         0 $self->massage_template($template_osx, \%h);
10196             }
10197            
10198             my $unused = <<'EOR';
10199             # extract compositions, add to char downgrades; -> composition, => compatibility composition
10200             perl -wlne "$k=$1, next if /^([\da-f]+)/i; undef $a; $a = qq($k -> $1) if /^\s+:\s*([0-9A-F]+(?:\s+[0-9A-F]+)*)/; $a = qq($k => $2 $1) if /^\s+#\s*((?:<.*?>\s+)?)([0-9A-F]+(?:\s+[0-9A-F]+)*)/; next unless $a; $a =~ s/\s*$/ / unless $a =~ />\s+\S.*\s\S/; print $a" NamesList.txt >compose2b-NamesList.txt
10201             # expand recursively
10202             perl -wlne "/^(.+?)\s+([-=])>\s+(.+?)\s*$/ or die; $t{$1} = $3; $h{$1}=$2; sub t($); sub t($) {my $i=shift; return $n{$i} if exists $n{$i}; return $i unless $t{$i}; $t{$i} =~ /^(\S+)(.*)/ or die $i; return t($1).$2} END{print qq($_\t:$h{$_} ), join q( ), sort split /\s+/, t($_) for sort {hex $a <=> hex $b} keys %t}" compose2b-NamesList.txt >compose3c-NamesList.txt
10203            
10204             #### perl -wlne "($k,$r)=/^(\S+)\s+:[-=]\s+(.*?)\s*$/ or die; $k{$r} = $k; $r{$k}=$r; END { for my $k (sort {hex $a <=> hex $b} keys %r) { my @r = split /\s+/, $r{$k}; for my $o (1..$#r) {my @rr = @r; splice @rr, $o, 1; my ($rr,$kk) = join q( ), @rr; print qq($k\t<= $kk ), $r[$o] if $kk = $k{$rr}} } }" compose3c-NamesList.txt >compose4-NamesList.txt
10205             perl -wlne "($k,$h,$r)=/^(\S+)\s+:([-=])\s+(.*?)\s*$/ or die; $k{$r} = $k; $r{$k}=$r; $hk{$k}=$hr{$r}= ($h eq q(=)); END { for my $k (sort {hex $a <=> hex $b} keys %r) { my $h = $hk{$k}; my @r = split /\s+/, $r{$k}; print qq($k\t:$h $r{$k}) and next if @r == 2; for my $o (1..$#r) {my @rr = @r; splice @rr, $o, 1; my ($rr,$kk) = join q( ), @rr; print qq($k\t<= $kk ), $r[$o] if $kk = $k{$rr}} } }" compose3c-NamesList.txt >compose4-NamesList.txt
10206            
10207            
10208             # Recursively decompose; :- composition, := compatibility composition
10209             perl -wlne "/^(.+?)\s+([-=])>\s+(.+?)\s*$/ or die; $t{$1} = $3; $h{$1}=$2 if $2 eq q(=); sub t($); sub t($) {my $i=shift; return $n{$i} if exists $n{$i}; return $i unless $t{$i}; $t{$i} =~ /^(\S+)(.*)/ or die $i; my @rr = t($1); return $rr[0].$2, $h{$i} || $rr[1]} END{my(@rr, $h); @rr=t($_), $h = $rr[1] || q(-), (@i = split /\s+/, $rr[0]), print qq($_\t:$h ), join q( ), $i[0], sort @i[1..$#i] for sort {hex $a <=> hex $b} keys %t}" compose2b-NamesList.txt >compose3e-NamesList.txt
10210             # Recompose parts to get "merge 2" decompositions; <- and <= if involve composition, :- and := otherwise
10211             perl -wlne "($k,$h,$r)=/^(\S+)\s+:([-=])\s+(.*?)\s*$/ or die; $k{$r} = $k; $r{$k}=$r; $hk{$k}=$hr{$r}= ($h eq q(=) ? q(=) : undef); END { for my $k (sort {hex $a <=> hex $b} keys %r) { my $h = $hk{$k} || q(-); my @r = split /\s+/, $r{$k}; print qq($k\t:$h $r{$k}) and next if @r == 2; my %s; for my $o (1..$#r) {my @rr = @r; next if $s{$rr[$o]}++; splice @rr, $o, 1; my ($rr,$kk) = join q( ), @rr; print qq($k\t<), $hk{$k} || $hr{$kk} || q(-), qq( $kk ), $r[$o] if $kk = $k{$rr}} } }" compose3e-NamesList.txt >compose4b-NamesList.txt
10212             # List of possible modifiers for each char, introduced by -->, separated by //
10213             perl -C31 -wlne "sub f($) {my $i=shift; return $i unless $i=~/^\w/; qq($i ).chr hex $i} sub ff($) {join q( ), map f($_), split /\s+/, shift} my($c,$B,$m) = /^(\S+)\s+[:<][-=]\s+(\S+)\s+(\S+)\s*$/ or die; push @{$c{$B}}, ff qq($m $c); END { for my $k (sort {hex $a <=> hex $b} keys %c) { print f($k), qq(\t--> ), join q( // ), sort @{$c{$k}} } }" compose4b-NamesList.txt >compose5d-NamesList.txt
10214             # Find what appears as modifiers:
10215             perl -F"\s+//\s+|\s+-->\s+" -wlane "s/\s+[0-9A-F]{4,}(\s\S+)?\s*$//, print for @F[1..$#F]" ! | sort -u >!-words
10216            
10217             Duplicate: 0296 <== [ 003F ] ==> <1 0295> (prefered)
10218             <ʖ> LATIN LETTER INVERTED GLOTTAL STOP
10219             <ʕ> LATIN LETTER PHARYNGEAL VOICED FRICATIVE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10220             Duplicate: 0384 <== [ 0020 0301 ] ==> <1 00B4> (prefered)
10221             <΄> GREEK TONOS
10222             <´> ACUTE ACCENT at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10223             Duplicate: 1D43 <== [ 0061 ] ==> <1 00AA> (prefered)
10224             <ᵃ> MODIFIER LETTER SMALL A
10225             <ª> FEMININE ORDINAL INDICATOR at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10226             Duplicate: 1D52 <== [ 006F ] ==> <1 00BA> (prefered)
10227             <ᵒ> MODIFIER LETTER SMALL O
10228             <º> MASCULINE ORDINAL INDICATOR at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10229             Duplicate: 1D9F <== [ 0065 ] ==> <1 1D4C> (prefered)
10230             <ᶟ> MODIFIER LETTER SMALL REVERSED OPEN E
10231             <ᵌ> MODIFIER LETTER SMALL TURNED OPEN E at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10232             Duplicate: 1E7A <== [ 0055 0304 0308 ] ==> <0 01D5> (prefered)
10233             <Ṻ> LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
10234             <Ǖ> LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10235             Duplicate: 1E7B <== [ 0075 0304 0308 ] ==> <0 01D6> (prefered)
10236             <ṻ> LATIN SMALL LETTER U WITH MACRON AND DIAERESIS
10237             <ǖ> LATIN SMALL LETTER U WITH DIAERESIS AND MACRON at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10238             Duplicate: 1FBF <== [ 0020 0313 ] ==> <1 1FBD> (prefered)
10239             <᾿> GREEK PSILI
10240             <᾽> GREEK KORONIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10241             Duplicate: 2007 <== [ 0020 ] ==> <1 00A0> (prefered)
10242             < > FIGURE SPACE
10243             < > NO-BREAK SPACE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10244             Duplicate: 202F <== [ 0020 ] ==> <1 00A0> (prefered)
10245             < > NARROW NO-BREAK SPACE
10246             < > NO-BREAK SPACE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10247             Duplicate: 2113 <== [ 006C ] ==> <1 1D4C1> (prefered)
10248             <ℓ> SCRIPT SMALL L
10249             <퓁> MATHEMATICAL SCRIPT SMALL L at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10250             Duplicate: 24B8 <== [ 0043 ] ==> <1 1F12B> (prefered)
10251             <Ⓒ> CIRCLED LATIN CAPITAL LETTER C
10252             <> CIRCLED ITALIC LATIN CAPITAL LETTER C at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10253             Duplicate: 24C7 <== [ 0052 ] ==> <1 1F12C> (prefered)
10254             <Ⓡ> CIRCLED LATIN CAPITAL LETTER R
10255             <> CIRCLED ITALIC LATIN CAPITAL LETTER R at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10256             Duplicate: 2E1E <== [ 007E ] ==> <1 2A6A> (prefered)
10257             <⸞> TILDE WITH DOT ABOVE
10258             <⩪> TILDE OPERATOR WITH DOT ABOVE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10259             Duplicate: 33B9 <== [ 004D 0056 ] ==> <1 1F14B> (prefered)
10260             <㎹> SQUARE MV MEGA
10261             <> SQUARED MV at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10262             Duplicate: FC03 <== [ 064A 0649 0654 ] ==> <1 FBF9> (prefered)
10263             <ﰃ> ARABIC LIGATURE YEH WITH HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM
10264             <ﯹ> ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10265             Duplicate: FC68 <== [ 064A 0649 0654 ] ==> <1 FBFA> (prefered)
10266             <ﱨ> ARABIC LIGATURE YEH WITH HAMZA ABOVE WITH ALEF MAKSURA FINAL FORM
10267             <ﯺ> ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH HAMZA ABOVE WITH ALEF MAKSURA FINAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10268             Duplicate: FD55 <== [ 062A 062C 0645 ] ==> <1 FD50> (prefered)
10269             <ﵕ> ARABIC LIGATURE TEH WITH MEEM WITH JEEM INITIAL FORM
10270             <ﵐ> ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10271             Duplicate: FD56 <== [ 062A 062D 0645 ] ==> <1 FD53> (prefered)
10272             <ﵖ> ARABIC LIGATURE TEH WITH MEEM WITH HAH INITIAL FORM
10273             <ﵓ> ARABIC LIGATURE TEH WITH HAH WITH MEEM INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10274             Duplicate: FD57 <== [ 062A 062E 0645 ] ==> <1 FD54> (prefered)
10275             <ﵗ> ARABIC LIGATURE TEH WITH MEEM WITH KHAH INITIAL FORM
10276             <ﵔ> ARABIC LIGATURE TEH WITH KHAH WITH MEEM INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10277             Duplicate: FD5D <== [ 0633 062C 062D ] ==> <1 FD5C> (prefered)
10278             <ﵝ> ARABIC LIGATURE SEEN WITH JEEM WITH HAH INITIAL FORM
10279             <ﵜ> ARABIC LIGATURE SEEN WITH HAH WITH JEEM INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10280             Duplicate: FD87 <== [ 0644 062D 0645 ] ==> <1 FD80> (prefered)
10281             <ﶇ> ARABIC LIGATURE LAM WITH MEEM WITH HAH FINAL FORM
10282             <ﶀ> ARABIC LIGATURE LAM WITH HAH WITH MEEM FINAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10283             Duplicate: FD8C <== [ 0645 062C 062D ] ==> <1 FD89> (prefered)
10284             <ﶌ> ARABIC LIGATURE MEEM WITH JEEM WITH HAH INITIAL FORM
10285             <ﶉ> ARABIC LIGATURE MEEM WITH HAH WITH JEEM INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10286             Duplicate: FD92 <== [ 0645 062C 062E ] ==> <1 FD8E> (prefered)
10287             <ﶒ> ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM
10288             <ﶎ> ARABIC LIGATURE MEEM WITH KHAH WITH JEEM INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10289             Duplicate: FDB5 <== [ 0644 062D 0645 ] ==> <1 FD88> (prefered)
10290             <ﶵ> ARABIC LIGATURE LAM WITH HAH WITH MEEM INITIAL FORM
10291             <ﶈ> ARABIC LIGATURE LAM WITH MEEM WITH HAH INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10292             Duplicate: FE34 <== [ 005F ] ==> <1 FE33> (prefered)
10293             <︴> PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
10294             <︳> PRESENTATION FORM FOR VERTICAL LOW LINE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10295            
10296             Duplicate: 0273 <== [ 006E ] ==> <1 014B> (prefered)
10297             <ɳ> LATIN SMALL LETTER N WITH RETROFLEX HOOK
10298             <ŋ> LATIN SMALL LETTER ENG at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10299             Duplicate: 1DAF <== [ 006E ] ==> <1 1D51> (prefered)
10300             <ᶯ> MODIFIER LETTER SMALL N WITH RETROFLEX HOOK
10301             <ᵑ> MODIFIER LETTER SMALL ENG at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10302             Duplicate: 2040 <== [ 007E ] ==> <1 203F> (prefered)
10303             <⁀> CHARACTER TIE
10304             <‿> UNDERTIE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10305             Duplicate: 207F <== [ 004E ] ==> <1 014A> (prefered)
10306             <ⁿ> SUPERSCRIPT LATIN SMALL LETTER N
10307             <Ŋ> LATIN CAPITAL LETTER ENG at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10308             Duplicate: 224B <== [ 007E ] ==> <1 2248> (prefered)
10309             <≋> TRIPLE TILDE
10310             <≈> ALMOST EQUAL TO at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10311             Duplicate: 2256 <== [ 003D ] ==> <1 224D> (prefered)
10312             <≖> RING IN EQUAL TO
10313             <≍> EQUIVALENT TO at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10314             Duplicate: 2257 <== [ 003D ] ==> <1 224D> (prefered)
10315             <≗> RING EQUAL TO
10316             <≍> EQUIVALENT TO at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10317             Duplicate: 225E <== [ 225F ] ==> <1 225C> (prefered)
10318             <≞> MEASURED BY
10319             <≜> DELTA EQUAL TO at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10320             Duplicate: 2263 <== [ 003D ] ==> <1 2261> (prefered)
10321             <≣> STRICTLY EQUIVALENT TO
10322             <≡> IDENTICAL TO at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10323             Duplicate: 2277 <== [ 003D 0338 ] ==> <1 2276> (prefered)
10324             <≷> GREATER-THAN OR LESS-THAN
10325             <≶> LESS-THAN OR GREATER-THAN at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10326             Duplicate: 2279 <== [ 003D ] ==> <1 2278> (prefered)
10327             <≹> NEITHER GREATER-THAN NOR LESS-THAN
10328             <≸> NEITHER LESS-THAN NOR GREATER-THAN at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10329             Duplicate: 2279 <== [ 003D 0338 0338 ] ==> <1 2278> (prefered)
10330             <≹> NEITHER GREATER-THAN NOR LESS-THAN
10331             <≸> NEITHER LESS-THAN NOR GREATER-THAN at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10332             Duplicate: 2982 <== [ 003A ] ==> <1 2236> (prefered)
10333             <⦂> Z NOTATION TYPE COLON
10334             <∶> RATIO at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10335             Duplicate: 2993 <== [ 0028 ] ==> <1 2985> (prefered)
10336             <⦓> LEFT ARC LESS-THAN BRACKET
10337             <⦅> LEFT WHITE PARENTHESIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10338             Duplicate: 2994 <== [ 0029 ] ==> <1 2986> (prefered)
10339             <⦔> RIGHT ARC GREATER-THAN BRACKET
10340             <⦆> RIGHT WHITE PARENTHESIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10341             Duplicate: 2995 <== [ 0029 ] ==> <1 2986> (prefered)
10342             <⦕> DOUBLE LEFT ARC GREATER-THAN BRACKET
10343             <⦆> RIGHT WHITE PARENTHESIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10344             Duplicate: 2996 <== [ 0028 ] ==> <1 2985> (prefered)
10345             <⦖> DOUBLE RIGHT ARC LESS-THAN BRACKET
10346             <⦅> LEFT WHITE PARENTHESIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10347             Duplicate: 29BC <== [ 0025 ] ==> <1 2030> (prefered)
10348             <⦼> CIRCLED ANTICLOCKWISE-ROTATED DIVISION SIGN
10349             <‰> PER MILLE SIGN at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10350             Duplicate: 2A17 <== [ 222B ] ==> <1 2A10> (prefered)
10351             <⨗> INTEGRAL WITH LEFTWARDS ARROW WITH HOOK
10352             <⨐> CIRCULATION FUNCTION at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10353             Duplicate: 2A34 <== [ 00D7 ] ==> <1 22C9> (prefered)
10354             <⨴> MULTIPLICATION SIGN IN LEFT HALF CIRCLE
10355             <⋉> LEFT NORMAL FACTOR SEMIDIRECT PRODUCT at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10356             Duplicate: 2A35 <== [ 00D7 ] ==> <1 22CA> (prefered)
10357             <⨵> MULTIPLICATION SIGN IN RIGHT HALF CIRCLE
10358             <⋊> RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10359             Duplicate: 2A36 <== [ 00D7 ] ==> <1 2A2F> (prefered)
10360             <⨶> CIRCLED MULTIPLICATION SIGN WITH CIRCUMFLEX ACCENT
10361             <⨯> VECTOR OR CROSS PRODUCT at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10362             Duplicate: 2A50 <== [ 00D7 ] ==> <1 2A33> (prefered)
10363             <⩐> CLOSED UNION WITH SERIFS AND SMASH PRODUCT
10364             <⨳> SMASH PRODUCT at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10365             Duplicate: 2ACF <== [ 25C1 ] ==> <1 2A1E> (prefered)
10366             <⫏> CLOSED SUBSET
10367             <⨞> LARGE LEFT TRIANGLE OPERATOR at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10368             Duplicate: 2AFB <== [ 2223 ] ==> <1 2AF4> (prefered)
10369             <⫻> TRIPLE SOLIDUS BINARY RELATION
10370             <⫴> TRIPLE VERTICAL BAR BINARY RELATION at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10371             Duplicate: 2AFB <== [ 007C ] ==> <1 2AF4> (prefered)
10372             <⫻> TRIPLE SOLIDUS BINARY RELATION
10373             <⫴> TRIPLE VERTICAL BAR BINARY RELATION at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10374             Duplicate: 2AFD <== [ 002F ] ==> <1 2215> (prefered)
10375             <⫽> DOUBLE SOLIDUS OPERATOR
10376             <∕> DIVISION SLASH at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10377             Duplicate: 2AFF <== [ 007C ] ==> <1 2AFE> (prefered)
10378             <⫿> N-ARY WHITE VERTICAL BAR
10379             <⫾> WHITE VERTICAL BAR at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10380             Duplicate: 3018 <== [ 0028 ] ==> <1 27EE> (prefered)
10381             <〘> LEFT WHITE TORTOISE SHELL BRACKET
10382             <⟮> MATHEMATICAL LEFT FLATTENED PARENTHESIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10383             Duplicate: 3019 <== [ 0029 ] ==> <1 27EF> (prefered)
10384             <〙> RIGHT WHITE TORTOISE SHELL BRACKET
10385             <⟯> MATHEMATICAL RIGHT FLATTENED PARENTHESIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10386             Duplicate: A760 <== [ 0059 ] ==> <1 A73C> (prefered)
10387             <Ꝡ> LATIN CAPITAL LETTER VY
10388             <Ꜽ> LATIN CAPITAL LETTER AY at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10389             Duplicate: A761 <== [ 0079 ] ==> <1 A73D> (prefered)
10390             <ꝡ> LATIN SMALL LETTER VY
10391             <ꜽ> LATIN SMALL LETTER AY at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10392             Duplicate: 1D4C1 <== [ 006C ] ==> <1 2113> (prefered)
10393             <𝓁> MATHEMATICAL SCRIPT SMALL L
10394             <ℓ> SCRIPT SMALL L at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10395             Duplicate: 1F12B <== [ 0043 ] ==> <1 24B8> (prefered)
10396             <🄫> CIRCLED ITALIC LATIN CAPITAL LETTER C
10397             <Ⓒ> CIRCLED LATIN CAPITAL LETTER C at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10398             Duplicate: 1F12C <== [ 0052 ] ==> <1 24C7> (prefered)
10399             <🄬> CIRCLED ITALIC LATIN CAPITAL LETTER R
10400             <Ⓡ> CIRCLED LATIN CAPITAL LETTER R at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10401             Duplicate: 1F14B <== [ 004D 0056 ] ==> <1 33B9> (prefered)
10402             <🅋> SQUARED MV
10403             <㎹> SQUARE MV MEGA at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10404             Duplicate: A789 <== [ 003A ] ==> <1 02F8> (prefered)
10405             <꞉> MODIFIER LETTER COLON
10406             <˸> MODIFIER LETTER RAISED COLON at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 8032, <$f> line 39278.
10407             Duplicate: 02EF <== [ 0020 0306 ] ==> <1 02EC> (prefered)
10408             <˯> 02EF MODIFIER LETTER LOW DOWN ARROWHEAD
10409             <ˬ> 02EC MODIFIER LETTER VOICING at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 8634, <$f> line 39278.
10410             Duplicate: 2B95 <== [ 2192 ] ==> <1 27A1> (prefered)
10411             <⮕> 2B95 RIGHTWARDS BLACK ARROW
10412             <➡> 27A1 BLACK RIGHTWARDS ARROW at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 9828, <$f> line 43944.
10413             Duplicate: 1F7C6 <== [ 2727 ] ==> <1 2726> (prefered)
10414             <🟆> 1F7C6 FOUR POINTED BLACK STAR
10415             <✦> 2726 BLACK FOUR POINTED STAR at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 9828, <$f> line 43944.
10416             Duplicate: 27C2 <== [ 005F ] ==> <1 221F> (prefered)
10417             <⟂> 27C2 PERPENDICULAR
10418             <∟> 221F RIGHT ANGLE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 10537, <$f> line 43944.
10419             Duplicate: 2ADB <== [ 0028 ] ==> <1 220B> (prefered)
10420             <⫛> 2ADB TRANSVERSAL INTERSECTION
10421             <∋> 220B CONTAINS AS MEMBER at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 10537, <$f> line 43944.
10422             Duplicate: 1F5A4 <== [ 2661 ] ==> <1 2665> (prefered)
10423             <🖤> 1F5A4 BLACK HEART
10424             <♥> 2665 BLACK HEART SUIT at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 10488, <$f> line 48770.
10425             EOR
10426            
10427             my (%known_dups) = map +($_,1), # As of Unicode 9.0 (questionable: 2982 2ACF 2ADB)
10428             qw(0296 0384 1D43 1D52 1D9F 1E7A 1E7B 1FBF 2007
10429             202F 2113 24B8 24C7 2E1E 33B9 FC03 FC68 FD55 FD56 FD57 FD5D FD87 FD8C
10430             FD92 FDB5 FE34 2B95 1F7C6 27C2 2ADB 1F5A4
10431             0273 1DAF 2040 207F 224B 2256 2257 225E 2263 2277 2279 2982 2993 2994 2995 2996 29BC
10432             2A17 2A34 2A35 2A36 2A50 2ACF 2AFB 2AFD 2AFF 3018 3019 A760 A761 1D4C1 1F12B 1F12C 1F14B A789 02EF);
10433            
10434             sub decompose_r($$$$); # recursive
10435             sub decompose_r($$$$) { # returns array ref, elts are [$compat, @expand]
10436 0     0 0 0 my ($self, $t, $i, $cache, @expand) = (shift, shift, shift, shift);
10437 0 0       0 return $cache->{$i} if $cache->{$i};
10438 0 0       0 return $cache->{$i} = [[0, $i]] unless my $In = $t->{$i};
10439 0         0 for my $in (@$In) {
10440 0         0 my $compat = $in->[0];
10441             #warn "i=<$i>, compat=<$compat>, rest=<$in->[1]>";
10442 0         0 my $expand_in = $self->decompose_r($t, $in->[1], $cache);
10443 0         0 $expand_in = $self->deep_copy($expand_in);
10444             #warn "Got: $in->[1] -> <@$expand> from $i = <@$in>";
10445 0         0 for my $expand (@$expand_in) {
10446 0 0 0     0 warn "Expansion funny: <@$expand>" if @$expand < 2 or $expand->[0] !~ /^[01]$/;
10447 0         0 $compat = ( shift(@$expand) | $compat);
10448 0 0       0 warn "!Malformed: $i -> $compat <@$expand>" if $expand->[0] =~ /^[01]$/;
10449 0         0 push @expand, [ $compat, @$expand, @$in[2..$#$in] ];
10450             }
10451             }
10452 0         0 return $cache->{$i} = \@expand;
10453             }
10454            
10455 0 0   0 0 0 sub fromHEX ($) { my $i = shift; $i =~ /^\w/ and hex $i}
  0         0  
10456            
10457             my %operators = (DOT => ['MIDDLE DOT', 'FULL STOP'], RING => ['DEGREE SIGN'], DIAMOND => ['WHITE DIAMOND'],
10458             'DOUBLE SOLIDUS' => ['PARALLEL TO'], MINUS => ['HYPHEN-MINUS']);
10459            
10460             # THIS IS A MULTIMAP (later entry for a TARGER wins)! ■□ ◼◻ ◾◽ ◇◆◈⟐⟡⟢⟣⌺ △▲▵▴▽▼▿▾⟁⧊⧋
10461             my %uni_manual = (phonetized => [qw( 0 ə s ʃ z ʒ j ɟ v ⱱ n ɳ N ⁿ n ŋ V ɤ ! ǃ ? ʔ ¿ ʕ | ǀ f ʄ F ǂ x ʘ X ǁ
10462             g ʛ m ɰ h ɧ d ᶑ C ʗ)], # z ɮ (C ʗ is "extras")
10463             phonetize2 => [qw( e ɘ E ɞ i ɻ I ɺ)], # Use some capitalized sources (no uc variants)...
10464             phonetize3 => [qw( a ɒ A Ɒ e ɜ E ɝ)], # Use some capitalized sources (no uc variants)...
10465             phonetize0 => [qw( e ə)],
10466             paleo => [qw( & ⁊ W Ƿ w ƿ h ƕ H Ƕ G Ȝ g ȝ )],
10467             # cut&paste from http://en.wikipedia.org/wiki/Coptic_alphabet
10468             # perl -C31 -wne "chomp; ($uc,$lc,undef,undef,$gr) = split /\t/;($ug,$lg)=split /,\s+/, $gr; print qq( $lg $lc $ug $uc)" coptic2 >coptic-tr
10469             # Fix stigma, koppa; p/P are actually 900; a/A are for AKHMIMIC KHEI (variant of KHEI on h/H);
10470             # 2e17 ⸗ double hyphen; sampi's are duplicated in both places
10471             greek2coptic => [qw(
10472             α ⲁ Α Ⲁ β ⲃ Β Ⲃ γ ⲅ Γ Ⲅ δ ⲇ Δ Ⲇ ε ⲉ Ε Ⲉ ϛ ⲋ Ϛ Ⲋ ζ ⲍ Ζ Ⲍ η ⲏ Η Ⲏ ϙ ϭ Ϙ Ϭ ϡ ⳁ Ϡ Ⳁ
10473             θ ⲑ Θ Ⲑ ι ⲓ Ι Ⲓ κ ⲕ Κ Ⲕ λ ⲗ Λ Ⲗ μ ⲙ Μ Ⲙ ν ⲛ Ν Ⲛ ξ ⲝ Ξ Ⲝ ο ⲟ Ο Ⲟ
10474             π ⲡ Π Ⲡ ρ ⲣ Ρ Ⲣ σ ⲥ Σ Ⲥ τ ⲧ Τ Ⲧ υ ⲩ Υ Ⲩ φ ⲫ Φ Ⲫ χ ⲭ Χ Ⲭ ψ ⲯ Ψ Ⲯ ω ⲱ Ω Ⲱ )],
10475             latin2extracoptic => [qw( - ⸗
10476             s ϣ S Ϣ f ϥ F Ϥ x ϧ X Ϧ h ϩ H Ϩ j ϫ J Ϫ t ϯ T Ϯ p ⳁ P Ⳁ a ⳉ A Ⳉ )],
10477             addline => [qw( 0 ∅ ∅ ⦱ + ∦ ∫ ⨏ • ⊝ / ⫽ ⫽ ⫻ ∮ ⨔ × ⨳ × ⩐ )], # ∮ ⨔ a cheat
10478             addhline => [qw( = ≣ = ≡ ≡ ≣ † ‡ + ∦ / ∠ | ∟ . ∸ ∨ ⊻ ∧ ⊼ ◁ ⩤ * ⩮
10479             ⊨ ⫢ ⊦ ⊧ ⊤ ⫧ ⊥ ⫨ ⊣ ⫤ ⊳ ⩥ ⊲ ⩤ ⋄ ⟠ ∫ ⨍ ⨍ ⨎ • ⦵ ( ∈ ) ∋
10480             ∪ ⩌ ∩ ⩍ ≃ ≅ ⨯ ⨲ )], # conflict with modifiers: qw( _ ‗ ); ( ∈ ) ∋ not very useful - but logical - with ∈∋ as bluekeys... 2 ƻ destructive
10481             addvline => [qw( ⊢ ⊩ ⊣ ⫣ ⊤ ⫪ ⊥ ⫫ □ ⎅ | ‖ ‖ ⦀ ∫ ⨒ ≢ ⩨ ⩨ ⩩ • ⦶
10482             \ ⫮ ° ⫯ . ⫰ ⫲ ⫵ ∞ ⧞ = ⧧ ⧺ ⧻ + ⧺ ∩ ⨙ ∪ ⨚ 0 ⦽ _ ⟂ _ ∟ )], # + ⫲
10483             addtilde => [qw( 0 ∝ / ∡ \ ∢ ∫ ∱ ∮ ⨑ : ∻ - ≂ ≠ ≆ ~ ≋ ~ ≈ ∼ ≈ ≃ ≊ ≈ ≋ = ≌
10484             ≐ ≏ ( ⟅ ) ⟆ ∧ ⩄ ∨ ⩅ ∩ ⩆ ∪ ⩇ )], # not on 2A**
10485             adddot => [qw( : ⫶ " ∵ ∫ ⨓ ∮ ⨕ □ ⊡ ◇ ⟐ ( ⦑ ) ⦒ ≟ ≗ ≐ ≑
10486             - ┄ — ┄ ─ ┈ ━ ┅ ═ ┉ | ┆ │ ┊ ┃ ┇ ║ ┋ )], # ⫶ is tricolon, not vert. … "; (m-)dash/bar, (b)[h/v]draw, bold/dbl
10487             adddottop => [qw( + ∔ )],
10488             addleft => [qw( = ≔ × ⨴ × ⋉ \ ⋋ + ⨭ → ⧴ ∫ ⨐ ∫ ⨗ ∮ ∳ ⊂ ⟈ ⊃ ⫐ ⊳ ⧐ ⊢ ⊩ ⊩ ⊪ ⊣ ⟞
10489             ◇ ⟢ ▽ ⧨ ≡ ⫢ • ⥀ ⋈ ⧑ ≟ ⩻ ≐ ≓ | ⩘ ≔ ⩴ ⊲ ⫷)], # × ⨴ is hidden
10490             addright => [qw( = ≕ × ⨵ × ⋊ / ⋌ + ⨮ - ∹ ∫ ⨔ ∮ ∲ ⊂ ⫏ ⊃ ⟉ ⊲ ⧏ ⊢ ⟝ ⊣ ⫣
10491             ◇ ⟣ △ ⧩ • ⥁ ⋈ ⧒ ≟ ⩼ ≐ ≒ | ⩗ ⊳ ⫸ : ⧴)], # × ⨵ is hidden
10492             sharpen => [qw( < ≺ > ≻ { ⊰ } ⊱ ( ⟨ ) ⟩ ∧ ⋏ ∨ ⋎ . ⋄ ⟨ ⧼ ⟩ ⧽ ∫ ⨘
10493             ⊤ ⩚ ⊥ ⩛ ◇ ⟡ ▽ ⧍ • ⏣ ≟ ≙ + ⧾ - ⧿)], # ⋆
10494             unsharpen => [qw( < ⊏ > ⊐ ( ⟮ ) ⟯ ∩ ⊓ ∪ ⊔ ∧ ⊓ ∨ ⊔ . ∷ ∫ ⨒ ∮ ⨖ { ⦉ } ⦊
10495             / ⧄ \ ⧅ ° ⧇ ◇ ⌺ • ⌼ ≟ ≚ ≐ ∺ ( 〘 ) 〙 )], # + ⊞ - ⊟ * ⊠ . ⊡ × ⊠, ( ⦗ ) ⦘ ( 〔 ) 〕
10496             whiten => [qw( [ ⟦ ] ⟧ ( ⟬ ) ⟭ { ⦃ } ⦄ ⊤ ⫪ ⊥ ⫫ ; ⨟ ⊢ ⊫ ⊣ ⫥ ⊔ ⩏ ⊓ ⩎ ∧ ⩓ ∨ ⩔ _ ‗ = ≣
10497             : ⦂ | ⫾ | ⫿ • ○ < ⪡ > ⪢ ⊓ ⩎ ⊔ ⩏ )], # or blacken □ ■ ◻ ◼ ◽ ◾ ◇ ◆ △ ▲ ▵ ▴ ▽ ▼ ▿ ▾
10498             quasisynon => [qw( ∈ ∊ ∋ ∍ ≠ ≶ ≠ ≷ = ≸ = ≹ ≼ ⊁ ≽ ⊀ ≺ ⋡ ≻ ⋠ < ≨ > ≩ Δ ∆
10499             ≤ ⪕ ≥ ⪖ ⊆ ⊅ ⊇ ⊄ ⊂ ⊉ ⊃ ⊈ ⊏ ⋣ ⊐ ⋢ ⊳ ⋬ ⊲ ⋭ … ⋯ / ⟋ \ ⟍
10500             ( ⦇ ) ⦈ [ ⨽ ] ⨼ ∅ ⌀
10501             ⊤ ⫟ ⊥ ⫠ ⟂ ⫛ □ ∎ ▽ ∀ ‖ ∥ ≟ ≞ ≟ ≜ ~ ‿ ~ ⁀ ■ ▬ )], # ( ⟬ ) ⟭ < ≱ > ≰ ≤ ≯ ≥ ≮ * ⋆
10502             amplify => [qw( < ≪ > ≫ ≪ ⋘ ≫ ⋙ ∩ ⋒ ∪ ⋓ ⊂ ⋐ ⊃ ⋑ ( ⟪ ) ⟫ ∼ ∿ = ≝ ∣ ∥ . ⋮
10503             ∈ ∊ ∋ ∍ - − / ∕ \ ∖ √ ∛ ∛ ∜ ∫ ∬ ∬ ∭ ∭ ⨌ ∮ ∯ ∯ ∰ : ⦂ ` ⎖
10504             : ∶ ≈ ≋ ≏ ≎ ≡ ≣ × ⨯ + ∑ Π ∏ Σ ∑ ρ ∐ ∐ ⨿ ⊥ ⟘ ⊤ ⟙ ⟂ ⫡ ; ⨾ □ ⧈ ◇ ◈
10505             ⊲ ⨞ ⊢ ⊦ △ ⟁ ∥ ⫴ ⫴ ⫼ / ⫽ ⫽ ⫻ • ● ⊔ ⩏ ⊓ ⩎ ∧ ⩕ ∨ ⩖ ▷ ⊳ ◁ ⊲
10506             ⋉ ⧔ ⋊ ⧕ ⋈ ⧓ ⪡ ⫷ ⪢ ⫸ ≟ ≛ ≐ ≎ ⊳ ⫐ ⊲ ⫏ { ❴ } ❵ × ⨶ )], # ` ⋆ ☆ ⋆ ★ ; ˆ ∧ conflicts with combining-ˆ; * ∏ stops propagation *->×->⋈, : ⦂ hidden; ∥ ⫴; × ⋈ not needed; ∰ ⨌ - ???; ≃ ≌ not useful
10507             turnaround => [qw( ∧ ∨ ∩ ∪ ∕ ∖ ⋏ ⋎ ∼ ≀ ⋯ ⋮ … ⋮ ⋰ ⋱ _ ‾
10508             8 ∞ ∆ ∇ Α ∀ Ε ∃ ∴ ∵ ≃ ≂
10509             ∈ ⫛ ∈ ∋ ∋ ⫙ ∉ ∌ ∊ ∍ ∏ ∐ ± ∓ ⊓ ⊔ ≶ ≷ ≸ ≹ ⋀ ⋁ ⋂ ⋃ ⋉ ⋊ ⋋ ⋌ ⋚ ⋛ ≤ ⋜ ≥ ⋝ ≼ ⋞ ≽ ⋟ )], # XXXX Can't do both directions
10510             superize => [qw( h ʱ ' ʹ < ˂ > ˃ ^ ˑ ( ˓ ) ˒ ⊢ ˫ 0 ᵊ * ˟ × ˟ ~ ﹋ ≈ ﹌ ─ ‾
10511             □ ⸋ . ⸳ @ ♭), '#' => '♯'], # ' Additions to !
10512             subize => [qw( < ˱ > ˲ _ ˍ ' ˏ " ˶ ˵ ˵ . ˳ ° ˳ ˘ ˯ ˘ ˬ ( ˓ ) ˒ 0 ₔ ~ ﹏ ═ ‗), ',' => '¸'], # '
10513             subize2 => [qw( < ˂ > ˃ )], # these are in older Unicode, so would override if in subize
10514             # Most of these are for I/O on very ancient systems (only ∘ and ∅ are not auto-detected on quadapl):
10515             aplbox => [qw( | ⌷ = ⌸ ÷ ⌹ ◇ ⌺ ∘ ⌻ ○ ⌼ / ⍁ \ ⍂ < ⍃ > ⍄ ← ⍇ → ⍈ ∨ ⍌ Δ ⍍ ↑ ⍐ ∧ ⍓ ∇ ⍔ ↓ ⍗ ' ⍞ : ⍠ ≠ ⍯ ? ⍰ ∅ ⎕ )], #'
10516             round => [qw( < ⊂ > ⊃ = ≖ = ≗ = ≍ ∫ ∮ ∬ ∯ ∭ ∰ ∼ ∾ - ⊸ □ ▢ ∥ ≬ ‖ ≬ • ⦁
10517             … ∴ ≡ ≋ ⊂ ⟃ ⊃ ⟄ ⊤ ⫙ ⊥ ⟒ ( ⦖ ) ⦕ ( ⦓ ) ⦔ ( ⦅ ) ⦆ ⊳ ⪧ ⊲ ⪦ ≟ ≘ ≐ ≖ . ∘
10518             [ ⟬ ] ⟭ { ⧼ } ⧽ % ⦼ % ‰ × ⦻ ⨯ ⨷ ∧ ∩ ∨ ∪ )]); # = ≈
10519            
10520             sub parse_NameList ($$) {
10521 0     0 0 0 my ($self, $f, $k, $kk, $name, $_c, %basic, %cached_full, %compose, $version,
10522             %into2, %ordered, %candidates, %N, %comp2, %NM, %BL, $BL, %G, %NS) = (shift, shift);
10523 0         0 binmode $f; # NameList.txt is in Latin-1, not unicode
10524 0         0 while (my $s = <$f>) { # extract compositions, add to char downgrades; -> composition, => compatibility composition
10525 0 0       0 if ($s =~ /^\@\@\@\s+The\s+Unicode\s+Standard\s+(.*?)\s*$/i) {
10526 0         0 $version = $1;
10527             }
10528 0 0       0 if ($s =~ /^([\da-f]+)\b\s*(.*?)\s*$/i) {
10529 0         0 my ($K, $Name, $C, $t) = ($1, $2, $self->charhex2key("$1"));
10530 0         0 $N{$Name} = $K;
10531 0         0 $NM{$C} = $Name; # Not needed for compositions, but handy for user-visible output
10532 0         0 $BL{$C} = $self->charhex2key($BL); # Used for sorting
10533             # Finish processing of preceding text
10534 0 0       0 if (defined $kk) { # Did not see (official) decomposition
10535             # warn("see combining: $K $C $Name"),
10536 0 0 0     0 $NS{$_c}++ if $name =~ /\bCOMBINING\b/ and not ($_c =~ /\p{NonSpacingMark}/);
10537 0 0       0 if ($name =~ /^(.*?)\s+(?:(WITH)\s+|(?=(?:OVER|ABOVE|PRECEDED\s+BY|BELOW(?=\s+LONG\s+DASH))\s+\b(?!WITH\b|AND\b)))(.*?)\s*$/) {
10538 0         0 push @{$candidates{$k}}, [$1, $3];
  0         0  
10539 0         0 my ($b, $with, $ext) = ($1, $2, $3);
10540 0         0 my @ext = split /\s+AND\s+/, $ext;
10541 0 0 0     0 if ($with and @ext > 1) {
10542 0         0 for my $i (0..$#ext) {
10543 0         0 my @ext1 = @ext;
10544 0         0 splice @ext1, $i, 1;
10545 0         0 push @{$candidates{$k}}, ["$b WITH ". (join ' AND ', @ext1), $ext[$i]];
  0         0  
10546             }
10547             }
10548             }
10549 0 0       0 if ($name =~ /^(.*)\s+(?=OR\s)(.*?)\s*$/) { # Find the latest possible...
10550 0         0 push @{$candidates{$k}}, [$1, $2];
  0         0  
10551             }
10552 0 0       0 if (($t = $name) =~ s/\b(COMBINING(?=\s+CYRILLIC\s+LETTER)|BARRED|SLANTED|APPROXIMATELY|ASYMPTOTICALLY|(?
10553 0         0 push @{$candidates{$k}}, [$t, "calculated-$+"];
  0         0  
10554 0 0       0 $candidates{$k}[-1][1] .= '-epigraphic' if $t =~ /\bEPIGRAPHIC\b/; # will be massaged away from $t later
10555             $candidates{$k}[-1][0] =~ s/\s+SYMBOL$// and $candidates{$k}[-1][1] .= '-symbol'
10556 0 0 0     0 if $candidates{$k}[-1][1] =~ /\bLUNATE\b/;
10557             # warn("smallcapital $name"),
10558 0 0       0 $candidates{$k}[-1][1] .= '-smallcaps' if $t =~ /\bSMALL\s+CAPITAL\b/; # will be massaged away from $t later
10559             # warn "Candidates: <$candidates{$k}[0]>; <$candidates{$k}[1]>";
10560             }
10561 0 0       0 if (($t = $name) =~ s/\b(WHITE|BLACK|CIRCLED)\s+//) {
10562 0         0 push @{$candidates{$k}}, [$t, "fake-$1"];
  0         0  
10563             }
10564 0 0       0 if (($t = $name) =~ s/\bBLACK\b/WHITE/) {
10565 0         0 push @{$candidates{$k}}, [$t, "fake-black"];
  0         0  
10566             }
10567 0 0       0 if (($t = $name) =~ s/^(?:RAISED|MODIFIER\s+LETTER(?:\s+RAISED)?(\s+LOW)?)\s+//) {
10568 0 0       0 push @{$candidates{$k}}, [$t, $1 ? "fake-sub" : "fake-super"];
  0         0  
10569             }
10570 0 0       0 if (($t = $name) =~ s/\bBUT\s+NOT\b/OR/) {
10571 0         0 push @{$candidates{$k}}, [$t, "fake-but-not"];
  0         0  
10572             }
10573 0 0       0 if (($t = $name) =~ s/(^LATIN\b.*\b\w)UM((?:\s+ROTUNDA)?)$/$1$2/) { # Paleo-latin
10574 0         0 push @{$candidates{$k}}, [$t, "fake-umify"];
  0         0  
10575             }
10576 0 0 0     0 if ((0xa7 == ((hex $k)>>8)) and ($t = $name) =~ s/\b(\w|CO|VEN)(?!\1)(\w)$/$2/) { # Paleo-latin (CON/VEND + digraph)
10577 0         0 push @{$candidates{$k}}, [$t, "fake-paleocontraction-by-last"];
  0         0  
10578             }
10579 0 0       0 if (($t = $name) =~ s/(?:(\bMIDDLE-WELSH)\s+)?\b(\w)(?=\2$)//) {
10580 0 0       0 push @{$candidates{$k}}, [$t, "fake-doubleletter" . ($1 ? "-$1" : '')];
  0         0  
10581             }
10582 0 0       0 if (($t = $name) =~ s/\b(APL\s+FUNCTIONAL\s+SYMBOL)\s+\b(.*?)\b\s*\b((?:UNDERBAR|TILDE|DIAERESIS|VANE|STILE|JOT|OVERBAR|BAR)(?!$))\b\s*/$2/) {
10583             #warn "APL: $k ($name) --> <$t>; <$1> <$3>";
10584 0         0 push @{$candidates{$k}}, [$t, "calculated-$1-$3apl"];
  0         0  
10585 0         0 my %s = qw(UP DOWN DOWN UP); # misprint in the official name???
10586 0         0 $candidates{$k}[-1][0] =~ s/\b(UP|DOWN)(?=\s+TACK\b)/$s{$1}/;
10587             }
10588             # Allow QUAD at end only if $2 is not-empty
10589 0 0       0 if (($t = $name) =~ s/\b(APL\s+FUNCTIONAL\s+SYMBOL)\s+\b(.*?)\b\s*\b(QUAD(?:(?!$)|(?!\2))|(?:UNDERBAR|TILDE|DIAERESIS|VANE|STILE|JOT|OVERBAR|BAR)$)\b\s*/$2/) {
    0          
10590             #warn "APL: $k ($name) --> <$t>; <$1> <$3>";
10591 0         0 push @{$candidates{$k}}, [$t, "calculated-$1-$3apl"];
  0         0  
10592 0         0 my %s = qw(UP DOWN DOWN UP); # misprint in the official name???
10593 0         0 $candidates{$k}[-1][0] =~ s/\b(UP|DOWN)(?=\s+TACK\b)/$s{$1}/;
10594             } elsif (($t = $name) =~ s/\b(APL\s+FUNCTIONAL\s+SYMBOL)\s+//) {
10595             #warn "APL: $k ($name) --> <$t>; <$1> <$3>";
10596 0         0 push @{$candidates{$k}}, [$t, "calculated-$1"];
  0         0  
10597 0         0 my %s = qw(UP DOWN DOWN UP); # misprint in the official name???
10598 0         0 $candidates{$k}[-1][0] =~ s/\b(UP|DOWN)(?=\s+TACK\b)/$s{$1}/;
10599             }
10600 0 0       0 if (($t = $name) =~ s/\b(LETTER\s+SMALL\s+CAPITAL)/CAPITAL LETTER/) {
10601 0         0 push @{$candidates{$k}}, [$t, "smallcaps"];
  0         0  
10602             }
10603 0 0 0     0 if (($t = $name) =~ s/\b(LETTER\s+)E([SZN])[HG]$/$1$2/ # esh/eng/ezh
      0        
      0        
      0        
10604             # next two not triggered since this is actually decomposed:
10605             or ($t = $name) =~ s/(?<=\bLETTER\sV\s)WITH\s+RIGHT\s+HOOK$//
10606             or ($t = $name) =~ s/\bDOTLESS\s+J\s+WITH\s+STROKE$/J/
10607             or $name eq 'LATIN SMALL LETTER SCHWA' and $t = 'DIGIT ZERO') {
10608 0         0 push @{$candidates{$k}}, [$t, "phonetized"] if 0;
10609             }
10610             }
10611 0         0 ($k, $name, $_c) = ($K, $Name, $C);
10612 0 0       0 $G{$k} = $name if $name =~ /^GREEK\s/; # Indexed by hex
10613 0         0 $kk = $k;
10614 0         0 next;
10615             }
10616 0 0       0 if ($s =~ /^\@\@\s+([\da-f]+)\b/i) {
10617 0 0       0 die unless $s =~ /^\@\@\s+([\da-f]+)\s.*\s([\da-f]+)\s*$/i;
10618 0         0 $BL = $1;
10619             }
10620 0         0 my $a; # compatibility_p, composed, decomposition string
10621 0 0       0 $a = [0, split /\s+/, "$1"] if $s =~ /^\s+:\s*([0-9A-F]+(?:\s+[0-9A-F]+)*)/;
10622 0 0 0     0 $a = [1, split /\s+/, "$2"], ($1 and push @$a, $1)
10623             if $s =~ /^\s+#\s*(?:(<.*?>)\s+)?([0-9A-F]+(?:\s+[0-9A-F]+)*)/; # Put at end
10624 0 0       0 next unless $a;
10625 0 0       0 if ($a->[-1] eq '') {{ # Clarify
10626 0         0 my ($math, $type) = ('', '');
  0         0  
10627             # warn("Unexpected name with : <$name>"), unless $name =~ s/^MATHEMATICAL\s+// and $math = "math-";
10628 0 0 0     0 warn("Unexpected name with : $k <$name>"), last # In BMP, MATHEMATICAL is omited
      0        
      0        
10629             unless $name =~ /^(?:MATHEMATICAL\s+)?((?:(?:BLACK-LETTER|FRAKTUR|BOLD|ITALIC|SANS-SERIF|DOUBLE-STRUCK|MONOSPACE|SCRIPT)\b\s*?)+)(?=\s+(?:SMALL|CAPITAL|DIGIT|NABLA|PARTIAL|N-ARY|\w+\s+SYMBOL)\b)/
10630             or $name =~ /^HEBREW\s+LETTER\s+(WIDE|ALTERNATIVE)\b/
10631             or $name =~ /^(ARABIC\s+MATHEMATICAL(?:\s+(?:INITIAL|DOTLESS|STRETCHED|LOOPED|TAILED|DOUBLE-STRUCK))?)\b/
10632             or $name =~ /^(PLANCK|INFORMATION)/; # information source
10633 0 0       0 $type = $1 if $1;
10634 0         0 $type =~ s/BLACK-LETTER/FRAKTUR/; # http://en.wikipedia.org/wiki/Black-letter#Unicode
10635 0         0 $type =~ s/INFORMATION/Letterlike/; # http://en.wikipedia.org/wiki/Letterlike_Symbols_%28Unicode_block%29
10636 0 0       0 $type = '=' . join '-', map lc($_), split /\s+/, $type if $type;
10637 0         0 $a->[-1] = "";
10638             }}
10639 0 0       0 push @$a, '' unless @$a > 2;
10640 0         0 push @{$basic{$k}}, $a; # 1 2044 --\
  0         0  
10641 0 0 0     0 undef $kk unless $a->[-1] eq '' # Disable guesswork processing
      0        
      0        
      0        
10642             or @$a == 3 and (chr hex $a->[-2]) =~ /\W|\p{Lm}/ and $a->[-1] !~ /^[-1]) =~ /\w/;
10643             # print "@$a";
10644             }
10645             # $candidates{'014A'} = ['LATIN CAPITAL LETTER N', 'faked-HOOK']; # Pretend on ENG...
10646             # $candidates{'014B'} = ['LATIN SMALL LETTER N', 'faked-HOOK']; # Pretend on ENG...
10647             # XXXX Better have this together with pseudo-upgrade???
10648 0         0 push @{$candidates{'00b5'}}, ['GREEK SMALL LETTER MU', 'faked-calculated-SYMBOL']; # Pretend on MICRO SIGN...
  0         0  
10649             # $candidates{'00b5'} = ['GREEK SMALL LETTER MU', 'calculated-SYMBOL']; # Pretend on MICRO SIGN...
10650 0         0 for my $k (keys %basic) { # hex
10651 0         0 for my $exp (@{$basic{$k}}) {
  0         0  
10652 0         0 my $base = $exp->[1]; # hex
10653 0         0 my $name = $NM{$self->charhex2key($base)};
10654 0 0 0     0 next if not $name and ($k =~ /^[12]?F[89A]..$/ or hex $base >= 0x4E00 and hex $base <= 0x9FCC); # ideographs; there is also 3400 region...
      0        
10655 0 0       0 warn "Basic: `$k' --> `@$exp', base=`$base' --> `",$self->charhex2key($base),"'" unless $name;
10656 0 0       0 if ((my $NN = $name) =~ s/\s+OPERATOR$//) {
10657             #warn "operator: `$k' --> <$NN>, `@$exp', base=`$base' --> `",$self->charhex2key($base),"'";
10658 0 0       0 push @{$candidates{$k}}, [$_, @$exp[2..$#$exp]] for $NN, @{ $operators{$NN} || []};
  0         0  
  0         0  
10659             }
10660             }
10661             }
10662 0         0 for my $how (keys %uni_manual) { # Some stuff is easier to describe in terms of char, not names
10663 0         0 my $map = $uni_manual{$how};
10664 0 0       0 die "manual translation map for $how has an odd number of entries" if @$map % 2;
10665             # for my $from (keys %$map) {
10666 0         0 while (@$map) {
10667 0         0 my $to = pop @$map; # Give precedence to later entries
10668 0         0 my $from = pop @$map;
10669 0         0 for my $shift (0,1) {
10670 0 0       0 if ($shift) {
10671 0         0 my ($F, $T) = (uc $from, uc $to);
10672 0 0 0     0 next unless $F ne $from and $T ne $to;
10673 0         0 ($from, $to) = ($F, $T);
10674             }
10675 0         0 push @{$candidates{uc $self->key2hex($to)}}, [$NM{$from}, "manual-$how"];
  0         0  
10676             }
10677             }
10678             }
10679 0         0 for my $g (keys %G) {
10680 0 0       0 (my $l = my $name = $G{$g}) =~ s/^GREEK\b/LATIN/ or die "Panic";
10681 0 0       0 next unless my $L = $N{$l}; # is HEX
10682             #warn "latinize: $L\t$l";
10683 0         0 push @{$candidates{$L}}, [$name, 'faked-latinize'];
  0         0  
10684 0 0       0 next unless my ($lat, $first, $rest, $add) = ($l =~ /^(LATIN\s+(?:SMALL|CAPITAL)\s+LETTER\s+(\w))(\w+)(?:\s+(\S.*))?$/);
10685 0 0       0 $lat =~ s/P$/F/, $first = 'F' if "$first$rest" eq 'PHI';
10686 0 0       0 die unless my $LL = $N{$lat};
10687 0 0       0 $add = (defined $add ? "-$add" : ''); # None of 6.1; only iIuUaAgGdf present of 6.1
10688 0         0 push @{$candidates{$L}}, [$lat, "faked-greekize$add"];
  0         0  
10689             #warn "latinize++: $L\t$l;\t`$add'\t$lat";
10690             }
10691 0         0 my %iu_TR = qw(INTERSECTION CAP UNION CUP);
10692 0         0 my %_TR = map { (my $in = $_) =~ s/_/ /g; $in } qw(SMALL_VEE LOGICAL_OR
  0         0  
  0         0  
10693             UNION_OPERATOR_WITH_DOT MULTISET_MULTIPLICATION
10694             UNION_OPERATOR_WITH_PLUS MULTISET_UNION
10695             DEL NABLA
10696             QUOTE APOSTROPHE
10697             SQUISH VERTICAL_LINE
10698             SLASH SOLIDUS
10699             BACKSLASH REVERSE_SOLIDUS
10700             DIVIDE DIVISION_SIGN
10701             QUESTION QUESTION_MARK
10702             UP_CARET LOGICAL_AND
10703             DOWN_CARET LOGICAL_OR
10704             JOT DEGREE_SIGN);
10705 0         0 my($_TR_rx) = map qr/$_/, join '|', keys %_TR;
10706 0         0 for my $c (keys %candidates) { # Done after all the names are known; hex of the char
10707 0         0 my ($CAND, $app, $t, $base, $b) = ($candidates{$c}, '');
10708 0         0 for my $Cand (@$CAND) { # (all keys in hex) [MAYBE_CHAR_NAME, how_obtained]
10709             #warn "candidates: $c <$Cand->[0]>, <@$Cand[1..$#$Cand]>";
10710             # An experiment shows that the FORMS are properly marked as non-canonical decompositions; so they are not needed here
10711 0 0       0 (my $with = my $raw = $Cand->[1]) =~ s/\s+(SIGN|SYMBOL|(?:FINAL|ISOLATED|INITIAL|MEDIAL)\s+FORM)$//
10712             and $app = " $1"; # $app is just a candidate; actually, not useful at all
10713 0         0 for my $Mod ( (map ['', $_], $app, '', ' SIGN', ' SYMBOL', ' OF', ' AS MEMBER', ' TO'), # `SUBSET OF', `CONTAINS AS MEMBER', `PARALLEL TO'
10714             (map [$_, ''], 'WHITE ', 'WHITE UP-POINTING ', 'N-ARY '), ['WHITE ', ' SUIT'] ) {
10715 0         0 my ($prepend, $append) = @$Mod;
10716 0 0 0     0 next if $raw =~ /-SYMBOL$/ and 0 <= index($append, "SYMBOL"); #
10717 0         0 warn "raw=`$raw', prepend=<$prepend>, append=<$append>, base=$Cand->[0]\n" if debug_GUESS_MASSAGE;
10718 0         0 $t++;
10719 0         0 $b = "$prepend$Cand->[0]$append";
10720 0 0       0 $b =~ s/\bTWO-HEADED\b/TWO HEADED/ unless $N{$b};
10721 0 0       0 $b =~ s/\bTIMES\b/MULTIPLICATION SIGN/ unless $N{$b};
10722 0 0       0 $b =~ s/(?:(?<=\bLEFT)|(?<=RIGHT))(?=\s+ARROW\b)/WARDS/ unless $N{$b};
10723 0 0       0 $b =~ s/\bLINE\s+INTEGRATION\b/CONTOUR INTEGRAL/ unless $N{$b};
10724 0 0       0 $b =~ s/\bINTEGRAL\s+AVERAGE\b/INTEGRAL/ unless $N{$b};
10725 0 0       0 $b =~ s/\s+(?:SHAPE|OPERATOR|NEGATED)$// unless $N{$b};
10726 0 0       0 $b =~ s/\bCIRCLED\s+MULTIPLICATION\s+SIGN\b/CIRCLED TIMES/ unless $N{$b};
10727 0 0       0 $b =~ s/^(CAPITAL|SMALL)\b/LATIN $1 LETTER/ unless $N{$b}; # TURNED SMALL F
10728 0 0       0 $b =~ s/\b(CAPITAL\s+LETTER)\s+SMALL\b/$1/ unless $N{$b}; # Q WITH HOOK TAIL
10729 0 0       0 $b =~ s/\bEPIGRAPHIC\b/CAPITAL/ unless $N{$b}; # XXXX is it actually capital?
10730             $b =~ s/^LATIN\s+LETTER\s+SMALL\s+CAPITAL\b/LATIN CAPITAL LETTER/ # and warn "smallcapital -> <$b>"
10731 0 0 0     0 if not $N{$b} or $with=~ /smallcaps/; # XXXX is it actually capital?
10732 0 0       0 $b =~ s/^GREEK\s+CAPITAL\b(?!=\s+LETTER)/GREEK CAPITAL LETTER/ unless $N{$b};
10733 0 0       0 $b =~ s/^GREEK\b(?!\s+(?:CAPITAL|SMALL)\s+LETTER)/GREEK SMALL LETTER/ unless $N{$b};
10734 0 0       0 $b =~ s/^CYRILLIC\b(?!\s+(?:CAPITAL|SMALL)\s+LETTER)(?=\s+LETTER\b)/CYRILLIC SMALL/ unless $N{$b};
10735 0 0       0 $b =~ s/\bEQUAL(\s+TO\s+SIGN\b)?/EQUALS SIGN/ unless $N{$b};
10736 0 0       0 $b =~ s/\bMINUS\b/HYPHEN-MINUS/ unless $N{$b};
10737 0 0       0 $b =~ s/\b(SQUARE\s+)(INTERSECTION|UNION)(?:\s+OPERATOR)?\b/$1$iu_TR{$2}/ unless $N{$b};
10738 0 0       0 $b =~ s/(?<=WARDS)$/ ARROW/ unless $N{$b}; # APL VANE
10739             # warn "_TR: <$1> in $b; <>" if $b =~ /\b($_TR_rx)\b/ and not $_TR{$1};
10740 0 0       0 $b =~ s/\b($_TR_rx)\b/$_TR{$1}/ unless $N{$b};
10741             $b = "GREEK SMALL LETTER $b" and ($b =~ /\bDELTA\b/ and $b =~ s/\bSMALL\b/CAPITAL/)
10742 0 0 0     0 if not $N{$b} and $N{"GREEK SMALL LETTER $b"};
      0        
      0        
10743             # $b =~ s/\bDOT\b/FULL STOP/ unless $N{$b};
10744             # $b =~ s/^MICRO$/GREEK SMALL LETTER MU/ unless $N{$b};
10745            
10746 0         0 warn " b =`$b', prepend=<$prepend>, append=<$append>, base=$Cand->[0]\n" if debug_GUESS_MASSAGE;
10747 0 0       0 if (defined ($base = $N{$b})) {
10748 0 0       0 undef $base, next if $base eq $c;
10749 0 0       0 $with = $raw if $t;
10750 0         0 warn "<$Cand->[0]> WITH <$Cand->[1]> resolved via SIGN/SYMBOL/.* FORM: strip=<$app> add=<$prepend/$append>\n"
10751             if debug_GUESS_MASSAGE and ($append or $app or $prepend);
10752             last
10753 0         0 }
10754             }
10755 0 0       0 if (defined $base) {
    0          
10756 0         0 $base = [$base];
10757             } elsif ($raw =~ /\bOPERATOR$/) {
10758 0 0       0 $base = [map $N{$_}, @{ $operators{$Cand->[0]} }] if exists $operators{$Cand->[0]};
  0         0  
10759             }
10760 0 0       0 (warnUNRES and warn("Unresolved: <$Cand->[0]> WITH <$Cand->[1]>")), next unless defined $base;
10761 0         0 my @modifiers = split /\s+AND\s+/, $with;
10762 0 0       0 @modifiers = map { s/\s+/-/g; /^[\da-f]{4,}$/i ? $_ : "" } @modifiers;
  0         0  
  0         0  
10763             #warn " $c --> <@$base>; <@modifiers>...\t$b <- $NM{chr hex $c}" ;
10764 0         0 unshift @{$basic{$c}}, [1, $_, @modifiers] for @$base;
  0         0  
10765 0 0       0 if ($b =~ s/\s+(OPERATOR|SIGN)$//) { # ASTERISK (note that RING is a valid name, but has no relation to RING OPERATOR
10766 0 0       0 unshift @{$basic{$c}}, [1, $base, @modifiers] if defined ($base = $N{$b}); # ASTERISK
  0         0  
10767             #$base = '[undef]' unless defined $base;
10768             #warn("operator via <$b>, <$c> => `$base'");
10769             (debug_OPERATOR and warn "operator: `$c' ==> `$_', <@modifiers> via <$b>\n"),
10770 0 0       0 unshift @{$basic{$c}}, [1, $_, @modifiers] for map $N{$_}, @{ $operators{$b} || [] }; # ASTERISK
  0         0  
  0         0  
10771             }
10772             # push @{$candidates{$k}}, [$_, @$exp[2..$#$exp]] for $NN, @{ $operators{$NN} || []};
10773             # $basic{$c} = [ [1, $base, @modifiers ] ]
10774             }
10775             }
10776 0         0 $self->decompose_r(\%basic, $_, \%cached_full) for keys %basic; # Now %cached_full is fully expanded - has trivial expansions too
10777 0 0       0 for my $c (sort {fromHEX $a <=> fromHEX $b or $a cmp $b} keys %cached_full) { # order of chars in Unicode matters (all keys in hex)
  0         0  
10778 0         0 my %seen_compose;
10779 0         0 for my $exp (@{ $cached_full{$c} }) {
  0         0  
10780 0         0 my @exp = @$exp; # deep copy
10781 0 0       0 die "Expansion too short: <@exp>" if @exp < 2;
10782 0 0       0 next if @exp < 3; # Skip trivial decompositions
10783 0         0 my $compat = shift @exp;
10784 0         0 my @PRE = @exp;
10785 0         0 my $base = shift @exp;
10786 0 0       0 @exp = ($base, sort {fromHEX $a <=> fromHEX $b or $a cmp $b} @exp); # Any order will do; do not care about Unicode rules
  0         0  
10787             #warn "Malformed: [@exp]" if "@exp" =~ /^
10788 0 0       0 next if $seen_compose{"$compat; @exp"}++; # E.g., WHITE may be added in several ways...
10789 0 0       0 push @{$ordered{$c}}, [$compat, @exp > 3 ? @exp : @PRE]; # with 2 modifiers order does not matter for the algo below, but we catch U"¯ vs U¯".
  0         0  
10790 0         0 warn qq(Duplicate: $c <== [ @exp ] ==> <@{$compose{"@exp"}[0]}> (prefered)\n\t<), chr hex $c,
10791             qq(>\t$c\t$NM{chr hex $c}\n\t<), chr hex $compose{"@exp"}[0][1], qq(>\t$compose{"@exp"}[0][1]\t$NM{chr hex $compose{"@exp"}[0][1]})
10792 0 0 0     0 if $compose{"@exp"} and "@exp" !~ /<(font|pseudo-upgrade)>/ and $c ne $compose{"@exp"}[0][1] and not $known_dups{$c};
      0        
      0        
10793             #warn "Compose rule: `@exp' ==> $compat, `$c'";
10794 0         0 push @{$compose{"@exp"}}, [$compat, $c];
  0         0  
10795             }
10796             } # compose mapping done
10797 0 0       0 for my $c (sort {fromHEX $a <=> fromHEX $b or $a cmp $b} keys %ordered) { # all nontrivial! Order of chars in Unicode matters...
  0         0  
10798 0         0 my(%seen_compose, %seen_contract) = ();
10799 0         0 for my $v (@{ $ordered{$c} }) { ## When (FOO and FOO OPERATOR) + tilde are both remapped to X: X+operator == X
  0         0  
10800 0         0 my %seen;
10801 0         0 for my $off (reverse(2..$#$v)) {
10802             # next if $seen{$v->[$off]}++; # chain of compat, or 2A76 -> ?2A75 003D < = = = >
10803 0         0 my @r = @$v; # deep copy
10804 0         0 splice @r, $off, 1;
10805 0         0 my $compat = shift @r;
10806             #warn "comp: $compat, $c; $off [@$v] -> $v->[$off] + [@r]";
10807 0 0       0 next if $seen_compose{"$compat; $v->[$off]; @r"}++;
10808             # next unless my $contracted = $compose{"@r"}; # This omits trivial compositions
10809 0 0       0 my $contracted = [@{$compose{"@r"} || []}]; # Deep copy
  0         0  
10810             # warn "Panic $c" if @$contracted and @r == 1;
10811 0 0       0 push @$contracted, [0, @r] if @r == 1; # Not in %compose
10812             # QUAD-INT: may be INT INT INT INT, may be INT amp INT INT etc; may lead to same compositions...
10813             #warn "contraction: $_->[0]; $compat; $c; $v->[$off]; $_->[1]" for @$contracted;
10814 0 0       0 @$contracted = grep {$_->[1] ne $c and not $seen_contract{"$_->[0]; $compat; $v->[$off]; $_->[1]"}++} @$contracted;
  0         0  
10815             #warn " contraction: $_->[0]; $compat; $c; $v->[$off]; $_->[1]" for @$contracted;
10816 0         0 for my $contr (@$contracted) { # May be empty: Eg, fractions decompose into 2 3 and cannot be composed in 2 steps
10817 0   0     0 my $calculated = $contr->[0] || $off != $#$v;
10818 0         0 push @{ $into2{$self->charhex2key($c)} }, [(($compat | $contr->[0])<<1)|$calculated, $self->charhex2key($contr->[1]), $self->charhex2key($v->[$off])]; # each: compat, char, combine
  0         0  
10819 0         0 push @{ $comp2{$v->[$off]}{$contr->[1]} }, [ (($compat | $contr->[0])<<1)|$calculated, $c]; # each: compat, char
  0         0  
10820             }
10821             }
10822             }
10823             } # (de)compose-into-2 mapping done
10824 0         0 for my $h2 (values %comp2) { # Massage into the natural order - prefer canonical (de)compositions
10825 0         0 for my $h (values %$h2) { # RValues!!! [compat, charHEX] each
10826             # my @a = sort { "@$a" cmp "@$b" } @$h;
10827 0 0       0 my @a = sort { $a->[0] <=> $b->[0] or $self->charhex2key($a->[1]) cmp $self->charhex2key($b->[1]) } @$h;
  0         0  
10828 0         0 $h = \@a;
10829             }
10830             }
10831 0         0 \%into2, \%comp2, \%NM, \%BL, \%NS, $version
10832             }
10833            
10834             sub print_decompositions($;$) {
10835 0     0 0 0 my $self = shift;
10836 0 0       0 my $dec = @_ ? shift : do { my $f = $self->get_NamesList;
  0         0  
10837 0 0       0 $self->load_compositions($f) if defined $f;
10838 0         0 $self->{Decompositions}} ;
10839 0         0 for my $c (sort keys %$dec) {
10840 0         0 my $arr = $dec->{$c};
10841 0 0       0 my @out = map +($_->[0] ? '? ' : '= ') . "@$_[1,2]", @$arr;
10842 0         0 print "$c\t->\t", join(",\t", @out), "\n";
10843             }
10844             }
10845            
10846             sub print_compositions($$) {
10847 0 0   0 0 0 goto &print_compositions_ch if @_ == 1;
10848 0         0 my ($self, $comp) = (shift, shift);
10849 0 0       0 for my $c (sort {fromHEX $a <=> fromHEX $b or $a cmp $b} keys %$comp) { # composing char
  0         0  
10850 0         0 print "$c\n";
10851 0 0       0 for my $b (sort {fromHEX $a <=> fromHEX $b or $a cmp $b} keys %{$comp->{$c}}) { # base char
  0         0  
  0         0  
10852 0         0 my $arr = $comp->{$c}{$b};
10853 0 0       0 my @out = map +($_->[0] ? '?' : '=') . $_->[1], @$arr;
10854 0         0 print "\t$b\t->\t", join(",\t\t", @out), "\n";
10855             }
10856             }
10857             }
10858            
10859             sub print_compositions_ch($$) {
10860 0     0 0 0 my $self = shift;
10861 0 0       0 my $comp = @_ ? shift : do { my $f = $self->get_NamesList;
  0         0  
10862 0 0       0 $self->load_compositions($f) if defined $f;
10863 0         0 $self->{Compositions}} ;
10864 0         0 for my $c (sort keys %$comp) { # composing char
10865 0         0 print "$c\n";
10866 0         0 for my $b (sort keys %{$comp->{$c}}) { # base char
  0         0  
10867 0         0 my $arr = $comp->{$c}{$b};
10868 0 0       0 my @out = map +($_->[0] ? '? ' : '= ') . $_->[1], @$arr;
10869 0         0 print "\t$b\t->\t", join(",\t\t", @out), "\n";
10870             }
10871             }
10872             }
10873            
10874             sub load_compositions($$) {
10875 0     0 0 0 my ($self, $comp, @comb) = (shift, shift);
10876 0 0       0 return $self if $self->{Compositions};
10877 0 0       0 my %comp = %{ $self->{'[Substitutions]'} || {} };
  0         0  
10878 0 0       0 open my $f, '<', $comp or die "Can't open $comp for read";
10879 0         0 ($self->{Decompositions}, $comp, $self->{UNames}, $self->{UBlock}, $self->{exComb}, $self->{uniVersion}) = $self->parse_NameList($f);
10880 0 0       0 close $f or die "Can't close $comp for read";
10881             #warn "(De)Compositions and UNames loaded";
10882             # Having hex as index is tricky: is it 4-digits or more? Is it in uppercase?
10883 0 0       0 for my $c (sort {fromHEX $a <=> fromHEX $b or $a cmp $b} keys %$comp) { # composing char
  0         0  
10884 0 0       0 for my $b (sort {fromHEX $a <=> fromHEX $b or $a cmp $b} keys %{$comp->{$c}}) { # base char
  0         0  
  0         0  
10885 0         0 my $arr = $comp->{$c}{$b};
10886 0         0 my @out = map [$self->charhex2key($_->[0]), $self->charhex2key($_->[1])], @$arr;
10887 0         0 $comp{$self->charhex2key($c)}{$self->charhex2key($b)} = \@out;
10888             }
10889             }
10890 0         0 $self->{Compositions} = \%comp;
10891 0         0 my $comb = join '', keys %{$self->{exComb}}; # should not have metachars here...
  0         0  
10892 0 0       0 $rxCombining = qr/\p{nonSpacingMark}|[$comb]/ if $comb;
10893 0         0 $self
10894             }
10895            
10896             sub load_uniage($$) {
10897 0     0 0 0 my ($self, $fn) = (shift, shift);
10898             # get_AgeList
10899 0 0       0 open my $f, '<', $fn or die "Can't open `$fn' for read: $!";
10900 0         0 local $/;
10901 0         0 my $s = <$f>;
10902 0 0       0 close $f or die "Can't close `$fn' for read: $!";
10903 0         0 $self->{Age} = $self->parse_derivedAge($s);
10904 0         0 $self
10905             }
10906            
10907             sub load_unidata($$) {
10908 0     0 0 0 my ($self, $comp) = (shift, shift);
10909 0         0 $self->load_compositions($comp);
10910 0 0       0 return $self unless @_;
10911 0         0 $self->load_uniage(shift);
10912             }
10913            
10914             my(%charinfo, %UName_v); # Unicode::UCD::charinfo extremely slow
10915             sub UName($$$;$) {
10916 0     0 0 0 my ($self, $c, $verbose, $vbell, $app, $n, $i, $A) = (shift, shift, shift, shift, '');
10917 0         0 $c = $self->charhex2key($c);
10918 0 0 0     0 return $UName_v{$c} if $verbose and exists $UName_v{$c} and ($vbell or 0x266a != ord $c);
      0        
      0        
10919 0 0 0     0 if (not exists $self->{UNames} or $verbose) {
10920 0         0 require Unicode::UCD;
10921 0   0     0 $i = ($charinfo{$c} ||= Unicode::UCD::charinfo(ord $c) || {});
      0        
10922 0         0 $A = $self->{Age}{$c};
10923 0   0     0 $n = $self->{UNames}{$c} || ($i->{name}) || "<$c>";
10924 0 0 0     0 if ($verbose and (%$i or $A)) {
      0        
10925 0         0 my $scr = $i->{script};
10926 0         0 my $bl = $i->{block};
10927 0         0 $scr = join '; ', grep defined, $scr, $bl, $A;
10928 0 0 0     0 $scr = "Com/MiscSym1.1" if $vbell and 0x266a == ord $c; # EIGHT NOTE: we use as "visual bell"
10929 0 0       0 $app = " [$scr]" if length $scr;
10930             }
10931 0 0 0     0 return($UName_v{$c} = "$n$app") if $verbose and ($vbell or 0x266a != ord $c);
      0        
10932 0         0 return "$n$app"
10933             }
10934 0 0       0 $self->{UNames}{$c} || ($c =~ /[\x{d800}-\x{dfff}\x00-\x1f\x7f-\xAF]/ ? '['.$self->key2hex($c).']' : "[$c]")
    0          
10935             }
10936            
10937             sub parse_derivedAge ($$) {
10938 0     0 0 0 my ($self, $s, %C) = (shift, shift);
10939 0         0 for my $l (split /\n/, $s) {
10940 0 0       0 next if $l =~ /^\s*(#|$)/;
10941 0 0       0 die "Unexpected line in DerivedAge: `$l'"
10942             unless $l =~ /^([0-9a-f]{4,})(?:\.\.([0-9a-f]{4,}))?\s*;\s*(\d\.\d)\b/i;
10943 0   0     0 $C{chr $_} = $3 for (hex $1) .. hex($2 || $1);
10944             }
10945 0         0 \%C;
10946             }
10947            
10948             # use Dumpvalue;
10949             # my $first_time_dump;
10950             my %warned_decomposed;
10951             sub get_compositions ($$$$;$) { # Now only the undo-brach is used...
10952 0     0 0 0 my ($self, $m, $C, $undo, $unAltGr, @out) = (shift, shift, shift, shift, shift);
10953             # return unless defined $C and defined (my $r = $self->{Compositions}{$m}{$C});
10954             # Dumpvalue->new()->dumpValue($self->{Compositions}) unless $first_time_dump++;
10955 0 0       0 return undef unless defined $C;
10956 0 0       0 $C = $C->[0] if 'ARRAY' eq ref $C; # Treat prefix keys as usual keys
10957 0         0 warn "doing <$C> <@$m>: undo=$undo C=", $self->key2hex($C), ", maps=", join ' ', map $self->key2hex($_), @$m if warnDO_COMPOSE; # if $m eq 'A';
10958 0 0       0 if ($undo) {
10959 0 0       0 return undef unless my $dec = $self->{Decompositions}{$C};
10960             # order in @$m matters; so does one in Decompositions - but less so
10961             # Hence the external loop should be in @$m
10962 0         0 for my $M (@$m) {
10963 0         0 push @out, $_ for grep $M eq $_->[2], @$dec;
10964 0 0       0 if (@out) { # We took the first guy from $m which allows such decomposition
10965             warn "Decomposing <$C> <$M>: multiple answers: <", (join '> <', map "@$_", @out), ">"
10966 0 0 0     0 if @out > 1 and not $warned_decomposed{$C,$M}++;
10967 0         0 warn "done undo <$C> <@$m>: -> ", $self->array2string(\@out) if warnDO_COMPOSE; # if $m eq 'A';
10968 0         0 return $out[0][1]
10969             }
10970             }
10971 0         0 return undef;
10972             }
10973 0 0       0 if ($unAltGr) {{
10974 0 0       0 last unless $unAltGr = $unAltGr->{$C};
  0         0  
10975 0         0 my(@seen, %seen);
10976 0         0 for my $comp ( @$m ) {
10977 0         0 my $a1 = $self->{Compositions}{$comp}{$unAltGr};;
10978 0 0 0     0 push @seen, $a1 if $a1 and not $seen{$a1->[0][1]}++;
10979             #warn "Second binding `$a1->[0][1]' for `$unAltGr' (on `$C') - after $seen[0][0][1]" if @seen == 2;
10980 0 0 0     0 next unless defined (my $a2 = $self->{Compositions}{$comp}{$C}) or @seen == 2;
10981             #warn " --> AltGr-binding `$a2->[0][1]' (on `$C')" if @seen == 2 and defined $a2;
10982 0 0 0     0 warn "Conflict between the second binding `$a1->[0][1]' for `$unAltGr' and AltGr-binding `$a2->[0][1]' (on `$C')"
      0        
      0        
10983             if $a2 and $a1 and @seen == 2 and $a1->[0][1] ne $a2->[0][1];
10984 0   0     0 return ((@seen == 2 and $a1) or $a2)->[0][1];
10985             }
10986             }}
10987 0 0       0 return undef unless my ($r) = grep defined, map $self->compound_composition($_,$C), @$m;
10988 0 0 0     0 warn "Composing <$C> <@$m>: multiple answers: <", (join '> <', map "@$_", @$r), ">" unless @$r == 1 or $C eq ' ';
10989             # warn("done <$C> <$m>: <$r->[0][1]>"); # if $m eq 'A';
10990 0         0 $r->[0][1]
10991             }
10992            
10993             sub compound_composition ($$$) {
10994 0     0 0 0 my ($self, $M, $C, $doc, $doc1, @res, %seen) = (shift, shift, shift, '', '');
10995 0 0       0 return undef unless defined $C;
10996 0 0 0     0 $doc1 = $C->[3] if 'ARRAY' eq ref $C and defined $C->[3]; # may be used via
10997 0 0       0 $doc = "$doc1 ⇒ " if length $doc1;
10998 0 0       0 $C = $C->[0] if 'ARRAY' eq ref $C;
10999 0         0 warn "composing `$M' with base <$C>" if warnDO_COMPOSE;
11000 0         0 $C = [[0, $C, $doc1]]; # Emulate element of return of Compositions ("one translation, explicit")
11001 0         0 for my $m (reverse split /\+|-(?=-)/, $M) {
11002 0         0 my @res;
11003 0 0       0 if ($m =~ /^(?:-|(?:[ul]c(?:first)?|dectrl)$)/) {
11004 0 0       0 if ($m =~ s/^-//) {
    0          
    0          
    0          
    0          
11005 0         0 @res = map $self->get_compositions([$m], $_->[1], 'undo'), @$C;
11006 0         0 @res = map [[0,$_]], grep defined, @res;
11007             } elsif ($m eq 'lc') {
11008 0 0 0     0 @res = map {($_->[1] eq lc($_->[1]) or 1 != length lc($_->[1])) ? () : [[0, lc $_->[1]]]} @$C
  0         0  
11009             } elsif ($m eq 'uc') {
11010 0 0 0     0 @res = map {($_->[1] eq uc($_->[1]) or 1 != length uc($_->[1])) ? () : [[0, uc $_->[1]]]} @$C
  0         0  
11011             } elsif ($m eq 'ucfirst') {
11012 0 0 0     0 @res = map {($_->[1] eq ucfirst($_->[1]) or 1 != length ucfirst($_->[1])) ? () : [[0, ucfirst $_->[1]]]} @$C
  0         0  
11013             } elsif ($m eq 'dectrl') {
11014 0 0       0 @res = map {(0x20 <= ord($_->[1])) ? () : [[0, chr(0x40 + ord $_->[1])]]} @$C
  0         0  
11015             } else {
11016 0         0 die "Panic"
11017             }
11018             } else {
11019             #warn "compose `$m' with bases <", join('> <', map $_->[1], @$C), '>';
11020 0         0 @res = map $self->{Compositions}{$m}{$_->[1]}, @$C;
11021             }
11022 0         0 @res = map @$_, grep defined, @res;
11023 0 0       0 return undef unless @res;
11024 0         0 $C = [map [$_->[0], $_->[1], "$doc$M"], @res];
11025             }
11026             $C
11027 0         0 }
11028            
11029             sub compound_composition_many ($$$$) { # As above, but takes an array of [char, docs]
11030 0     0 0 0 my ($self, $M, $CC, $ini, @res) = (shift, shift, shift, shift);
11031 0 0       0 return undef unless $CC;
11032 0 0 0     0 my $doc = (($ini and ref $ini and defined $ini->[3]) ? "$ini->[3] ⇒ Subst{" : '');
11033 0   0     0 my $doc1 = $doc && '}';
11034 0         0 for my $C (@$CC) {
11035             # $C = $C->[0] if 'ARRAY' eq ref $C;
11036 0 0       0 next unless defined $C;
11037 0         0 my $in = $self->compound_composition($M, [$C->[0], undef, undef, "$doc$C->[1]$doc1"]);
11038 0 0       0 push @res, @$in if defined $in;
11039             }
11040 0 0       0 return undef unless @res;
11041             \@res
11042 0         0 }
11043            
11044             # Design goals: we assign several diacritics to a prefix key (possibly with
11045             # AltGr on the "Base key" and/or other "multiplexers" in between). We want:
11046             # *) a lc/uc paired result to sit on Shift-paired keypresses;
11047             # *) avoid duplication among multiplexers (a secondary goal);
11048             # *) allow some diacritics in the list to be prefered ("groups" below);
11049             # *) when there is a choice, prefer non-bizzare (read: with smaller Unicode
11050             # "Age" version) binding to be non-multiplexed.
11051             # We allow something which was not on AltGr to acquire AltGr when it gets a
11052             # diacritic.
11053            
11054             # It MAY happen that an earlier binding has empty slots,
11055             # but a later binding exists (to preserve lc/uc pairing, and shift-state)
11056            
11057             ### XXXX Unclear: how to catenate something in front of such a map...
11058             # we do $composition->[0][1], which means we ignore additional compositions! And we ignore HOW, instead of putting it into penalty
11059            
11060             sub sort_compositions ($$$$$;$) {
11061 0     0 0 0 my ($self, $m, $C, $Sub, $dupsOK, $w32OK, @res, %seen, %Penalize, %penalize, %OK, %ok, @C) = (shift, shift, shift, shift, shift, shift);
11062 0         0 warn "compounding ", $self->array2string($C) if warnSORTCOMPOSE;
11063 0         0 for my $c (@$C) {
11064 0 0 0     0 push @C, [map {($_ and 'ARRAY' eq ref $_) ? $_->[0] : $_} @$c]
  0         0  
11065             }
11066 0         0 my $char = $C[0][0];
11067 0 0       0 $char = 'N/A' unless defined $char;
11068 0         0 for my $MM (@$m) { # |-groups
11069 0         0 my(%byPenalty, @byLayers);
11070 0         0 for my $M (@$MM) { # diacritic in a group; may flatten each layer, but do not flatten separately each shift state: need to pair uc/lc
11071 0 0       0 if ((my $P = $M) =~ s/^(!)?\\(\\)?//) {
11072 0         0 my($neg, $strong) = ($1, $2);
11073             # warn "Penalize: <$P>"; # Actually, it is not enough to penalize; one should better put it in a different group...
11074 0 0       0 if ($P =~ s/\[(.*)\]$//) {
11075             #$P = $self->stringHEX2string($P);
11076 0         0 my $match;
11077 0   0     0 $char eq $_ and $match++ for split //, $self->stringHEX2string("$1");
11078 0 0       0 next unless $match;
11079             }
11080             #$P = $self->stringHEX2string($P);
11081 0 0       0 if ($neg) {
11082 0 0       0 $strong ? $OK{$_}++ : $ok{$_}++ for split //, $P;
11083             } else {
11084 0 0       0 $strong ? $Penalize{$_}++ : $penalize{$_}++ for split //, $P;
11085             }
11086             next
11087 0         0 }
11088 0         0 for my $L (0..$#C) { # Layer number; indexes a shift-pair
11089             # my @res2 = map {defined($_) ? $self->{Compositions}{$M}{$_} : undef } @{ $C[$L] };
11090 0         0 my @Res2 = map $self->compound_composition($M, $_), @{ $C->[$L] }; # elt: [$synth, $char]
  0         0  
11091 0         0 my @working_with = grep defined, @{ $C[$L] }; # ., KP_Decimal gives [. undef]
  0         0  
11092 0         0 warn "compound `$M' of [@working_with] -> ", $self->array2string(\@Res2) if warnSORTCOMPOSE;
11093 0         0 (my $MMM = $M) =~ s/(^|\+)$//; # Hack: the rule always fails if present, empty always succeeds
11094             my @Res3 = map $self->compound_composition_many($MMM, (defined() ? $Sub->{($_ and ref) ? $_->[0] : $_} : $_), $_),
11095 0 0 0     0 @{ $C->[$L] };
  0 0       0  
11096 0         0 warn "compound+ `$M' of [@working_with] -> ", $self->array2string(\@Res3) if warnSORTCOMPOSE;
11097 0         0 for my $shift (0..$#Res3) {
11098 0 0       0 if (defined $Res2[$shift]) {
11099 0 0       0 push @{ $Res2[$shift]}, @{$Res3[$shift]} if $Res3[$shift]
  0         0  
  0         0  
11100             } else {
11101 0         0 $Res2[$shift] = $Res3[$shift]
11102             }
11103             }
11104             # defined $Res2[$_] ? ($Res3[$_] and push @{$Res2[$_]}, @{$Res2[$_]}) : ($Res2[$_] = $Res3[$_]) for 0..$#Res3;
11105 0         0 @Res2 = $self->DEEP_COPY(@Res2);
11106 0         0 my ($ok, @ini_compat);
11107 0         0 do {{ # Run over found translations
11108 0 0       0 my @res2 = map {defined() ? $_->[0] : undef} @Res2; # process next unprocessed translations
  0         0  
  0         0  
11109 0   0     0 defined and (shift(@$_), (@$_ or undef $_)) for @Res2; # remove what is being processed
      0        
11110 0         0 $ok = grep $_, @res2;
11111 0 0 0     0 @res2 = map {(not defined() or (!$dupsOK and $seen{$_->[1]}++)) ? undef : $_} @res2; # remove duplicates
  0         0  
11112 0 0       0 my @compat = map {defined() ? $_->[0] : undef} @res2;
  0         0  
11113 0 0       0 my @_from_ = map {defined() ? $_->[2] : undef} @res2;
  0         0  
11114 0   0     0 defined and s/((?
11115 0 0       0 @res2 = map {defined() ? $_->[1] : undef} @res2;
  0         0  
11116 0 0 0     0 @res2 = map {0x10000 > ord($_ || 0) ? $_ : undef} @res2 unless $w32OK; # remove those needing surrogates
  0 0       0  
11117 0   0     0 defined $ini_compat[$_] or $ini_compat[$_] = $compat[$_] for 0..$#compat;
11118 0 0       0 my @extra_penalty = map {!!$compat[$_] and $ini_compat[$_] < $compat[$_]} 0..$#compat;
  0         0  
11119 0 0       0 next unless my $cnt = grep defined, @res2;
11120 0         0 my($penalty, $p) = [('zzz') x @res2]; # above any "5.1", "undef" ("unassigned"???)
11121             # Take into account the "compatibility", but give it lower precedence than the layer:
11122             # for no-compatibility: do not store the level;
11123             defined $res2[$_] and $penalty->[$_] gt ( $p = ($OK{$res2[$_]} ? '+' : '-') . ($self->{Age}{$res2[$_]} || 'undef') .
11124             ($ok{$res2[$_]} ? '+' : '-') . "#$extra_penalty[$_]#" . ($self->{UBlock}{$res2[$_]} || '') )
11125 0 0 0     0 and $penalty->[$_] = $p for 0..$#res2;
    0 0        
      0        
      0        
11126 0   0     0 my $have1 = not (defined $res2[0] and defined $res2[1]); # Prefer those with both entries
11127             # Break a non-lc/uc paired translations into separate groups
11128 0   0     0 my $double_occupancy = ($cnt == 2 and $res2[0] ne $res2[1] and lc $res2[0] eq lc $res2[1]); # Case fold
11129 0         0 warn " seeing random-double, penalties <$penalty->[0]>, <$penalty->[1]>\n" if warnSORTCOMPOSE;
11130 0 0 0     0 next if $double_occupancy and grep {defined and $Penalize{$_}} @res2;
  0 0       0  
11131 0 0 0     0 if ($double_occupancy and grep {defined and $penalize{$_}} @res2) {
  0 0       0  
11132 0   0     0 defined $res2[$_] and $penalty->[$_] = "zzz$penalty->[$_]" for 0..$#res2;
11133             } else {
11134 0   0     0 defined and $Penalize{$_} and $cnt--, $have1=1, undef $_ for @res2;
      0        
11135 0   0     0 defined $res2[$_] and $penalize{$res2[$_]} and $penalty->[$_] = "zzz$penalty->[$_]" for 0..$#res2;
      0        
11136             }
11137 0 0       0 next unless $cnt;
11138 0 0 0     0 if (not $double_occupancy and $cnt == 2 and (1 or $penalty->[0] ne $penalty->[1])) { # Break (penalty here is not a good idea???)
      0        
11139 0         0 warn " breaking random-double, penalties <$penalty->[0]>, <$penalty->[1]>\n" if warnSORTCOMPOSE;
11140 0         0 push @{ $byPenalty{"$penalty->[0]1"}[0][$L] }, [ [$res2[0],undef,undef,$_from_[0]]];
  0         0  
11141 0         0 push @{ $byPenalty{"$penalty->[1]1"}[0][$L] }, [undef, [$res2[1],undef,undef,$_from_[1]]];
  0         0  
11142 0         0 next; # Now: $double_occupancy or $cnt == 1 or $penalty->[0] eq $penalty->[1]
11143             }
11144 0 0       0 $p = (defined $res2[0] ? $penalty->[0] : 'zzz'); # may have been undef()ed due to Penalty...
11145 0 0 0     0 $p = $penalty->[1] if @$penalty > 1 and defined $res2[1] and $p gt $penalty->[1];
      0        
11146 0         0 push @{ $byPenalty{"$p$have1"}[$double_occupancy][$L] },
11147             # [map {defined $res2[$_] ? $res2[$_] : undef} 0..$#res2];
11148 0 0       0 [map {defined $res2[$_] ? [$res2[$_],undef,undef,$_from_[$_]] : undef} 0..$#res2];
  0         0  
11149             }} while $ok;
11150 0         0 warn " --> combined of [@working_with] -> ", $self->array2string([\@res, %byPenalty]) if warnSORTCOMPOSE;
11151             }
11152             } # sorted bindings, per Layer
11153 0         0 push @res, [ @byPenalty{ sort keys %byPenalty } ]; # each elt is an array ref indexed by layer number; elt of this is [lc uc]
11154             }
11155             #warn 'Compositions: ', $self->array2string(\@res);
11156             \@res
11157 0         0 } # index as $res->[group][penalty_N][double_occ][layer][NN][shift]
11158            
11159             sub equalize_lengths ($$@) {
11160 0   0 0 0 0 my ($self, $extra, $l) = (shift, shift || 0, 0);
11161 0   0     0 $l <= length and $l = length for @_;
11162 0         0 $l += $extra;
11163 0   0     0 $l > length and $_ .= ' ' x ($l - length) for @_;
11164             }
11165            
11166             sub report_sorted_l ($$$;$$) { # 6 levels: |-group, priority, double-occupancy, layer, count, shift
11167 0     0 0 0 my ($self, $k, $sorted, $bold, $bold1, $top2, %bold) = (shift, shift, shift, shift, shift);
11168 0 0 0     0 $k = $k->[0] if 'ARRAY' eq ref($k || 0);
11169 0 0       0 $k = '' unless defined $k;
11170 0 0 0     0 $k = "<$k>" if defined $k and $k !~ /[^┃┋║│┆\s]/;
11171 0         0 my @L = ($k, ''); # Up to 100 layers - an overkill, of course??? One extra level to store separators...
11172 0 0       0 $bold{$_} = '┋' for @{$bold1 || []};
  0         0  
11173 0 0       0 $bold{$_} = '┃' for @{$bold || []};
  0         0  
11174 0         0 for my $group (0..$#$sorted) { # Top level
11175 0         0 $self->equalize_lengths(0, @L);
11176 0   0     0 $_ .= ' ' . ($bold{$group} || '║') for @L;
11177 0         0 my $prio2;
11178 0         0 for my $prio (@{ $sorted->[$group] }) {
  0         0  
11179 0 0       0 if ($prio2++) {
11180 0         0 $self->equalize_lengths(0, @L);
11181 0         0 $_ .= ' │' for @L;
11182             }
11183 0         0 my $double2;
11184 0         0 for my $double (reverse @$prio) {
11185 0 0       0 if ($double2++) {
11186 0         0 $self->equalize_lengths(0, @L);
11187 0         0 $_ .= ' ┆' for @L;
11188             }
11189 0         0 for my $layer (0..$#$double) {
11190 0         0 for my $set (@{$double->[$layer]}) {
  0         0  
11191 0         0 for my $shift (0,1) {
11192 0 0       0 next unless defined (my $k = $set->[$shift]);
11193 0 0       0 $k = $k->[0] if ref $k;
11194 0 0       0 $k = " $k" if $k =~ /$rxCombining/;
11195 0 0       0 if (2*$layer + $shift >= $#L) { # Keep last layer pristine for correct separators...
11196 0         0 my $add = 2*$layer + $shift - $#L + 1;
11197 0         0 push @L, ($L[-1]) x $add;
11198             }
11199 0         0 $L[ 2*$layer + $shift ] .= " $k";
11200             }
11201             }
11202             }
11203             }
11204             }
11205             }
11206 0   0     0 pop @L while @L and $L[-1] !~ /[^┃┋║│┆\s]/;
11207 0         0 join "\n", @L, '';
11208             }
11209            
11210             sub append_keys ($$$$;$) { # $KK is [[lc,uc], ...]; modifies $C in place
11211 0     0 0 0 my ($self, $C, $KK, $LL, $prepend, @KKK, $cnt) = (shift, shift, shift, shift, shift);
11212 0         0 for my $L (0..$#$KK) { # $LL contains info about from which layer the given binding was stolen
11213 0         0 my $k = $KK->[$L];
11214 0 0 0     0 next unless defined $k and (defined $k->[0] or defined $k->[1]);
      0        
11215 0         0 $cnt++;
11216 0 0       0 my @kk = map {$_ and ref $_ ? $_->[0] : $_} @$k;
  0 0       0  
11217 0   0     0 my $paired = (@$k == 2 and defined $k->[0] and defined $k->[1] and $kk[0] ne $kk[1] and $kk[0] eq lc $kk[1]);
11218 0 0 0     0 my @need_special = map { $LL and $L and defined $k->[$_] and defined $LL->[$L][$_] and 0 == $LL->[$L][$_]} 0..$#$k;
  0   0     0  
      0        
11219 0 0       0 if (my $special = grep $_, @need_special) { # count
11220 0 0       0 ($prepend ? push(@{ $KKK[$paired][0] }, $k) : unshift(@{ $KKK[$paired][0] }, $k)),
  0 0       0  
  0         0  
11221             next if $special == grep defined, @$k;
11222 0         0 $paired = 0;
11223 0 0       0 my $to_level0 = [map { $need_special[$_] ? $k->[$_] : undef} 0..$#$k];
  0         0  
11224 0 0       0 $k = [map {!$need_special[$_] ? $k->[$_] : undef} 0..$#$k];
  0         0  
11225 0 0       0 $prepend ? push @{ $KKK[$paired][0] }, $to_level0 : unshift @{ $KKK[$paired][0] }, $to_level0;
  0         0  
  0         0  
11226             }
11227 0 0       0 $prepend ? push @{ $KKK[$paired][$L] }, $k : unshift @{ $KKK[$paired][$L] }, $k; # 0: layer has only one slot
  0         0  
  0         0  
11228             }
11229             #print "cnt=$cnt\n";
11230 0 0       0 return unless $cnt;
11231 0 0       0 push @$C, [[@KKK]] unless $prepend; # one group of one level of penalty
11232 0 0       0 unshift @$C, [[@KKK]] if $prepend; # one group of one level of penalty
11233 0         0 1
11234             }
11235            
11236             sub shift_pop_compositions ($$$;$$$$) { # Limit is how many groups to process
11237 0   0 0 0 0 my($self, $C, $L, $backwards, $omit, $limit, $ignore_groups, $store_level, $skip_lc, $skip_uc)
      0        
      0        
11238             = (shift, shift, shift, shift, shift || 0, shift || 1e100, shift || 0, shift, shift, shift);
11239 0         0 my($do_lc, $do_uc) = (!$skip_lc, !$skip_uc);
11240 0   0     0 my($both, $first, $out_lc, $out_uc, @out, @out_levels, $have_out, $groupN) = ($do_lc and $do_uc);
11241 0 0       0 my @G = $backwards ? reverse @$C : @$C;
11242 0         0 for my $group (@G[$omit..$#G]) {
11243 0 0       0 last if --$limit < 0;
11244 0         0 $groupN++;
11245 0         0 for my $penalty_group (@$group) { # each $penalty_group is indexed by double_occupancy and layer
11246             # each layer in sorted; if $both, we prefer to extract a paired translation; so it is enough to check the first elt on each layer
11247 0         0 my $group_both = $both;
11248 0 0       0 if ($both) {
11249 0 0 0     0 $group_both = 0 unless $penalty_group->[1] and @{ $penalty_group->[1][$L] || [] } or @{ $penalty_group->[1][0] || [] };
  0 0 0     0  
  0 0       0  
11250             } # if $group_both == 0, and $both: double-group is empty, so we can look only in single/unrelated one.
11251             # if $both = $group_both == 0: may not look in double group, so can look only in single/unrelated one
11252             # if $both = $group_both == 1: must look in double-group only.
11253 0 0       0 for my $Set (($L ? [0, $penalty_group->[$group_both][0]] : ()), [$L, $penalty_group->[$group_both][$L]]) {
11254 0         0 my $set = $Set->[1];
11255 0 0 0     0 next unless $set and @$set; # @$set consists of [unshifted, shifted] pairs
11256 0 0       0 if ($group_both) { # we know we meet a double element at start of the group
11257 0 0       0 my $OUT = $backwards ? pop @$set : shift @$set; # we know we meet a double element at start of the group
11258 0 0       0 return [] if $groupN <= $ignore_groups;
11259 0 0       0 @$store_level = ($Set->[0]) x 2 if $store_level;
11260 0         0 return $OUT;
11261             }
11262             ## or ($both and defined $elt->[0] and defined $elt->[1]);
11263 0         0 my $spliced = 0;
11264 0 0       0 for my $eltA ($backwards ? map($#$set - $_, 0..$#$set) : 0..$#$set) {
11265 0         0 my $elt = $eltA - $spliced;
11266 0   0     0 my $lc_ok = ($do_lc and defined $set->[$elt][0]);
11267 0   0     0 my $uc_ok = ($do_uc and defined $set->[$elt][1]);
11268 0 0 0     0 next if not ($lc_ok or $uc_ok);
11269 0   0     0 my $have_both = (defined $set->[$elt][0] and defined $set->[$elt][1]);
11270 0   0     0 my $found_both = ($lc_ok and $uc_ok); # If defined $have_out, cannot have $found_both; moreover $have_out ne $uc_ok
11271 0 0 0     0 die "Panic!" if defined $have_out and ($found_both or $have_out eq $uc_ok);
      0        
11272             # next if not $found_both and defined $have_out and $have_out eq $uc_ok;
11273 0 0       0 my $can_splice = $have_both ? $both : 1;
11274 0 0       0 my $can_return = $both ? $have_both : 1;
11275 0         0 my $OUT = my $out = $set->[$elt]; # Can't return yet: @out may contain a part of info...
11276 0 0 0     0 unless ($groupN <= $ignore_groups or defined $have_out and $have_out eq $uc_ok) { # In case !$do_return or $have_out
      0        
11277 0         0 $out[$uc_ok] = $out->[$uc_ok]; # In case !$do_return or $have_out
11278 0         0 $out_levels[$uc_ok] = $Set->[0];
11279             }
11280             #warn 'Doing <', join('> <', map {defined() ? $_ : 'undef'} @{ $set->[$elt] }), "> L=$L; splice=$can_splice; return=$can_return; lc=$lc_ok uc=$uc_ok";
11281 0 0       0 if ($can_splice) { # Now: $both and not $have_both; must edit in place
11282 0         0 splice @$set, $elt, 1;
11283 0 0       0 $spliced++ unless $backwards;
11284             } else { # Must edit in place
11285 0         0 $OUT = [@$out]; # Deep copy
11286 0         0 undef $out->[$uc_ok]; # only one matched...
11287             }
11288 0 0       0 $OUT = [] if $groupN <= $ignore_groups;
11289 0 0       0 if ($can_return) {
11290 0 0       0 if ($found_both) {
11291 0 0       0 @$store_level = map {$_ and $Set->[0]} @$OUT if $store_level;
  0 0       0  
11292 0         0 return $OUT;
11293             } else {
11294 0 0       0 @$store_level = @out_levels if $store_level;
11295 0         0 return \@out;
11296             }
11297             # return($found_both ? $OUT : \@out);
11298             } # Now: had $both and !$had_both; must condinue
11299 0         0 $have_out = $uc_ok;
11300 0         0 $both = 0; # $group_both is already FALSE
11301 0 0       0 ($lc_ok ? $do_lc : $do_uc) = 0;
11302             #warn "lc/uc: $do_lc/$do_uc";
11303             }
11304             }
11305             }
11306             }
11307 0 0       0 @$store_level = @out_levels if $store_level;
11308             return \@out
11309 0         0 }
11310            
11311             my ($rebuild_fake, $rebuild_style) = ("\n\t\t\t/* To be auto-generated */\n", <<'EOR');
11312            
11313             .klayout span, .klayout-wrapper .over-shift {
11314             font-size: 29pt ;
11315             font-weight: bolder;
11316             text-wrap: none;
11317             white-space: nowrap;
11318             }
11319             .klayout kbd, .asSpan { display: inline-block; }
11320             .asSpan2 { display: inline-table; }
11321            
11322             /* Not used; allows /-diagonals to be highlighted with nth-last-of-type() */
11323             .klayout kbd.hidden-align { display: none; }
11324            
11325             kbd span.lc, kbd span.uc { display: inline; }
11326            
11327             /* Hide lc only if in .uc or hovering over -uc and not inside; similarly for uc */
11328             /* States: .klayout-wrapper:not(:hover) | .klayout.uclc:hover NORMAL = UCLC
11329             .klayout-uc:hover .klayout:not(:hover) UC
11330             .klayout-wrapper:hover .klayout-uc:not(:hover) LC */
11331             .klayout.lc kbd span.uc, .klayout.uc kbd span.lc,
11332             .klayout-uc:hover:not(:active) .klayout:not(.lc):not(:hover) kbd span.lc,
11333             .klayout-uc:hover:active .klayout:not(.uc):not(:hover) kbd span.uc,
11334             .klayout-wrapper:hover:not(:active) .klayout-uc:not(:hover) .klayout:not(.uc) kbd span.uc,
11335             .klayout-wrapper:hover:active .klayout-uc:not(:hover) .klayout:not(.lc) kbd span.lc { display: none; }
11336            
11337             /* These should be active unless hovering over wrapper, and not internal .klayout */
11338             .klayout.uclc:hover kbd span.uc, .klayout.uclc:hover kbd span.lc,
11339             .klayout.uclc.force kbd span.uc, .klayout.uclc.force kbd span.lc,
11340             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc:not(.do-alt) kbd span.uc,
11341             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc:not(.do-alt) kbd span.lc {
11342             font-size: 70%;
11343             }
11344             .klayout.uclc:hover kbd span.uc, .klayout.uclc:hover kbd span.lc,
11345             .klayout.uclc:not(.in-wrapper) kbd span.uc, .klayout.uclc:not(.in-wrapper) kbd span.lc,
11346             .klayout.uclc.force kbd span.uc, .klayout.uclc.force kbd span.lc,
11347             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc.do-alt kbd span.uc,
11348             .klayout.uclc.do-alt:hover kbd span.uc,
11349             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc.do-alt kbd span.lc,
11350             .klayout.uclc.do-alt:hover kbd span.lc,
11351             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc:not(.do-alt) kbd span.uc,
11352             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc:not(.do-alt) kbd span.lc {
11353             position: absolute;
11354             z-index: 10;
11355             border: 1px dotted green;
11356             line-height: 0.8em; /* decreasing this moves up; should be changed with padding-bottom */
11357             }
11358             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc kbd span.uc,
11359             .klayout-wrapper .klayout-uc .klayout.uclc:hover kbd span.uc,
11360             .klayout.uclc kbd span.uc {
11361             right: 0.2em;
11362             top: -0.05em;
11363             padding-bottom: 0.15em; /* Less makes _ not fit inside border... */
11364             }
11365             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc kbd span.lc,
11366             .klayout-wrapper .klayout-uc .klayout.uclc:hover kbd span.lc,
11367             .klayout.uclc kbd span.lc {
11368             left: 0.2em;
11369             bottom: 0em;
11370             }
11371             /* Same for left/right placement */
11372             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc kbd span.uc.on-left,
11373             .klayout-wrapper .klayout-uc .klayout.uclc:hover kbd span.uc.on-left,
11374             .klayout.uclc:not(.in-wrapper) kbd span.uc.uc.on-left { /* repeat is needed to protect against :not(.base) about 25lines below */
11375             left: 0.35em;
11376             right: auto;
11377             }
11378             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc kbd span.lc.on-left,
11379             .klayout-wrapper .klayout-uc .klayout.uclc:hover kbd span.lc.on-left,
11380             .klayout.uclc:not(.in-wrapper) kbd span.lc.lc.on-left {
11381             left: 0.0em;
11382             }
11383             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc kbd span.uc.on-right,
11384             .klayout-wrapper .klayout-uc .klayout.uclc:hover kbd span.uc.on-right,
11385             .klayout.uclc:not(.in-wrapper) kbd span.uc.uc.on-right {
11386             right: 0.0em;
11387             }
11388             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc kbd span.lc.on-right,
11389             .klayout-wrapper .klayout-uc .klayout.uclc:hover kbd span.lc.on-right,
11390             .klayout.uclc:not(.in-wrapper) kbd span.lc.lc.on-right {
11391             left: auto;
11392             right: 0.35em;
11393             }
11394             .klayout kbd span:not(.base):not(.base-uc):not(.base-lc).on-right
11395             { left: auto; right: 0.0em; position: absolute; }
11396             .klayout kbd span:not(.base):not(.base-uc):not(.base-lc).on-left
11397             { left: 0.0em; right: auto; position: absolute; }
11398             .klayout kbd .on-right:not(.prefix), .on-right-ex { color: firebrick; }
11399             .klayout kbd .on-right:not(.prefix).vbell { color: Coral; }
11400             .klayout kbd .on-left { z-index: 10; }
11401             .klayout kbd .on-right { z-index: 9; }
11402            
11403             .klayout-wrapper:hover .klayout.uclc:not(:hover) kbd.shift {outline: 6px dotted green;}
11404            
11405             kbd span, kbd div { vertical-align: bottom; } /* no effect ???!!! */
11406            
11407             kbd {
11408             color: #444;
11409             /* line-height: 1.6em; */
11410             width: 1.4em; /* +0.24em border +0.08em margin; total 1.72em */
11411            
11412             /* +0.3em border; */
11413             min-height: 0.83em; /* These two should be changed together to get uc letters centered... */
11414             line-height: 0.75em; /* Increasing by the same amount works fine??? */
11415             /* One also needs to change the vertical offsets of arrows from_*, and System-key icon */
11416            
11417             text-align: center;
11418             cursor: pointer;
11419             padding: 0.0em 0.0em 0.0em 0.0em;
11420             margin: 0.04em;
11421             white-space: nowrap;
11422             vertical-align: top;
11423             position: relative;
11424            
11425             background-color: #FFFFFF;
11426            
11427             background-image: -moz-linear-gradient(left, rgba(0,0,0,0.2), rgba(64,64,64,0.2), rgba(64,64,64,0.2), rgba(128,128,128,0.2));
11428             background-image: -webkit-gradient(linear, left top, right top, color-stop(0%,rgba(0,0,0,0.2)), color-stop(33%,rgba(64,64,64,0.2)), color-stop(66%,rgba(64,64,64,0.2)), color-stop(100%,rgba(128,128,128,0.2)));
11429             background-image: -webkit-linear-gradient(left, rgba(0,0,0,0.2) 0%, rgba(64,64,64,0.2) 33%, rgba(64,64,64,0.2) 66%, rgba(128,128,128,0.2) 100%);
11430             background-image: -o-linear-gradient(left, rgba(0,0,0,0.2) 0%, rgba(64,64,64,0.2) 33%, rgba(64,64,64,0.2) 66%, rgba(128,128,128,0.2) 100%);
11431             background-image: -ms-linear-gradient(left, rgba(0,0,0,0.2) 0%, rgba(64,64,64,0.2) 33%, rgba(64,64,64,0.2) 66%, rgba(128,128,128,0.2) 100%);
11432             background-image: linear-gradient(0deg, rgba(0,0,0,0.2) 0%, rgba(64,64,64,0.2) 33%, rgba(64,64,64,0.2) 66%, rgba(128,128,128,0.2) 100%);
11433             filter: progid:DXImageTransform.Microsoft.gradient( startColorstr='#dddddd', endColorstr='#e5e5e5',GradientType=1 );
11434            
11435             border-top: solid 0.1em #CCC;
11436             border-right: solid 0.12em #AAA;
11437             border-bottom: solid 0.2em #999;
11438             border-left: solid 0.12em #BBB;
11439             -webkit-border-radius: 0.22em;
11440             -moz-border-radius: 0.22em;
11441             border-radius: 0.22em;
11442             z-index: 0;
11443            
11444             -webkit-box-shadow:
11445             0.03em 0.1em 0.1em 0.06em #888,
11446             0.05em 0.1em 0.06em 0.06em #aaa;
11447             -moz-box-shadow:
11448             0.03em 0.1em 0.1em 0.06em #888,
11449             0.05em 0.1em 0.06em 0.06em #aaa;
11450             box-shadow:
11451             0.03em 0.1em 0.1em 0.00em #888 ,
11452             0.05em 0.1em 0.06em 0.0em #aaa ;
11453             }
11454            
11455             kbd:hover, .klayout-wrapper:hover .klayout:not(:hover) kbd.shift {
11456             color: #222;
11457             background-image: -moz-linear-gradient(left, rgba(128,128,128,0.2), rgba(192,192,192,0.2), rgba(192,192,192,0.2), rgba(255,255,255,0.2));
11458             background-image: -webkit-gradient(linear, left top, right top, color-stop(0%,rgba(128,128,128,0.2)), color-stop(33%,rgba(192,192,192,0.2)), color-stop(66%,rgba(192,192,192,0.2)), color-stop(100%,rgba(255,255,255,0.2)));
11459             background-image: -webkit-linear-gradient(left, rgba(128,128,128,0.2) 0%, rgba(192,192,192,0.2) 33%, rgba(192,192,192,0.2) 66%, rgba(255,255,255,0.2) 100%);
11460             background-image: -o-linear-gradient(left, rgba(128,128,128,0.2) 0%, rgba(192,192,192,0.2) 33%, rgba(192,192,192,0.2) 66%, rgba(255,255,255,0.2) 100%);
11461             background-image: -ms-linear-gradient(left, rgba(128,128,128,0.2) 0%, rgba(192,192,192,0.2) 33%, rgba(192,192,192,0.2) 66%, rgba(255,255,255,0.2) 100%);
11462             background-image: linear-gradient(0deg, rgba(128,128,128,0.2) 0%, rgba(192,192,192,0.2) 33%, rgba(192,192,192,0.2) 66%, rgba(255,255,255,0.2) 100%);
11463             filter: progid:DXImageTransform.Microsoft.gradient( startColorstr='#e5e5e5', endColorstr='#ffffff',GradientType=1 );
11464             }
11465             kbd:active, kbd.selected, .klayout-uc:hover:not(:active) .klayout:not(:hover) kbd.shift, .klayout-wrapper:active .klayout-uc:not(:hover) kbd.shift {
11466             margin-top: 0.14em; /* This variant is with "solid" buttons, the commented one is with "rubber" ones */
11467             border-top: solid 0.10em #CCC;
11468             border-right: solid 0.12em #9a9a9a; /* Make right/bottom a tiny way darker */
11469             border-bottom: solid 0.1em #8a8a8a;
11470             border-left: solid 0.12em #BBB;
11471             /* margin-top: 0.11em;
11472             border-top: solid 0.13em #999;
11473             border-right: solid 0.12em #BBB;
11474             border-bottom: solid 0.1em #CCC;
11475             border-left: solid 0.12em #AAA; */
11476             padding: 0.0em 0.0em 0.0em 0.0em;
11477            
11478             -webkit-box-shadow:
11479             0.05em 0.03em 0.1em 0.1em #aaa;
11480             -moz-box-shadow:
11481             0.05em 0.03em 0.1em 0.1em #aaa;
11482             box-shadow:
11483             0.05em 0.03em 0.1em 0em #aaa;
11484            
11485             }
11486             kbd img {
11487             padding-left: 0.25em;
11488             vertical-align: middle;
11489             height: 22px; width: 22px;
11490             opacity: 0.8;
11491             }
11492             kbd:hover img {
11493             opacity: 1;
11494             }
11495             kbd span.shrink {
11496             font-size: 85%;
11497             }
11498             .klayout.do-altgr kbd span.shrink.altgr {
11499             font-size: 72%;
11500             }
11501             kbd .small {
11502             font-size: 62%;
11503             }
11504             kbd .vsmall {
11505             font-size: 39%;
11506             }
11507            
11508             kbd .base, kbd .base-lc, kbd .base-uc {
11509             -webkit-touch-callout: none;
11510             -webkit-user-select: none;
11511             -khtml-user-select: none;
11512             -moz-user-select: none;
11513             -ms-user-select: none;
11514             -o-user-select: none;
11515             user-select: none;
11516             }
11517            
11518             /* Special rules for do-alt-display. Without alt2, places the base on left and right;
11519             with alt2, places base on the left (unless base-right is present) */
11520            
11521             /* .klayout.do-alt.uclc kbd span.lc, .klayout.do-alt.uclc kbd span.uc { */
11522             .klayout.do-alt.uclc:not(.in-wrapper) kbd span.uc, .klayout.do-alt.uclc:not(.in-wrapper) kbd span.lc,
11523             .klayout.do-alt.uclc:hover kbd span.uc, .klayout.do-alt.uclc:hover kbd span.lc,
11524             .klayout.do-alt.uclc.force kbd span.uc, .klayout.do-alt.uclc.force kbd span.lc,
11525             .klayout-wrapper:not(:hover) .klayout-uc .klayout.do-alt.uclc kbd span.uc,
11526             .klayout-wrapper:not(:hover) .klayout-uc .klayout.do-alt.uclc kbd span.lc {
11527             font-size: 85%;
11528             }
11529            
11530             .klayout.do-alt.sz125 kbd span.uc, .klayout.do-alt.sz125 kbd span.lc, /* exclude below: too specific otherwise */
11531             .klayout.do-alt.sz125 kbd span:not(.lc):not(.uc):not(.base):not(.base-uc):not(.base-lc):not(.shrink):not(.small):not(.vsmall) {
11532             font-size: 125%;
11533             line-height: 0.98em; /* decreasing this moves up; should be changed with padding-bottom */
11534             /* padding-bottom: 0.1em; */ /* Less makes _ not fit inside border... */
11535             }
11536             .klayout.do-alt.sz120 kbd span.uc, .klayout.do-alt.sz120 kbd span.lc, /* exclude below: too specific otherwise */
11537             .klayout.do-alt.sz120 kbd span:not(.lc):not(.uc):not(.base):not(.base-uc):not(.base-lc):not(.shrink):not(.small):not(.vsmall) {
11538             font-size: 120%;
11539             line-height: 1.02em; /* decreasing this moves up; should be changed with padding-bottom */
11540             /* padding-bottom: 0.1em; */ /* Less makes _ not fit inside border... */
11541             }
11542             .klayout.do-alt kbd span.uc, .klayout.do-alt kbd span.lc, /* exclude below: too specific otherwise */
11543             .klayout.do-alt.sz115 kbd span.uc, .klayout.do-alt.sz115 kbd span.lc,
11544             .klayout.do-alt kbd span:not(.lc):not(.uc):not(.base):not(.base-uc):not(.base-lc):not(.shrink):not(.small):not(.vsmall),
11545             .klayout.do-alt.sz115 kbd span:not(.lc):not(.uc):not(.base):not(.base-uc):not(.base-lc):not(.shrink):not(.small):not(.vsmall) {
11546             font-size: 115%;
11547             line-height: 1.05em; /* decreasing this moves up; should be changed with padding-bottom */
11548             /* padding-bottom: 0.1em; */ /* Less makes _ not fit inside border... */
11549             }
11550             .klayout.do-alt.sz110 kbd span.uc, .klayout.do-alt.sz110 kbd span.lc, /* exclude below: too specific otherwise */
11551             .klayout.do-alt.sz110 kbd span:not(.lc):not(.uc):not(.base):not(.base-uc):not(.base-lc):not(.shrink):not(.small):not(.vsmall) {
11552             font-size: 110%;
11553             line-height: 1.12em; /* decreasing this moves up; should be changed with padding-bottom */
11554             /* padding-bottom: 0.1em; */ /* Less makes _ not fit inside border... */
11555             }
11556             .klayout.do-alt.sz100 kbd span.uc, .klayout.do-alt.sz100 kbd span.lc, /* exclude below: too specific otherwise */
11557             .klayout.do-alt.sz100 kbd span:not(.lc):not(.uc):not(.base):not(.base-uc):not(.base-lc):not(.shrink):not(.small):not(.vsmall) {
11558             line-height: 1.2em; /* decreasing this moves up; should be changed with padding-bottom */
11559             /* padding-bottom: 0.1em; */ /* Less makes _ not fit inside border... */
11560             }
11561            
11562             .klayout.do-alt kbd span.base-lc, .klayout.do-alt kbd span.base-uc {
11563             font-size: 90%;
11564             }
11565             .klayout.do-alt.alt2 kbd span.base-lc, .klayout.do-alt.alt2 kbd span.base-uc {
11566             font-size: 80%;
11567             }
11568            
11569             .klayout.do-alt kbd span.base-uc {
11570             right: 15%;
11571             top: 35%; /* Combine rel-parent and rel-us offsets : */
11572             }
11573             .klayout.do-alt kbd span.base-lc {
11574             left: 15%;
11575             bottom: 25%; /* Combine rel-parent and rel-us offsets : */
11576             }
11577             .klayout.do-alt.alt2 kbd span.base-uc {
11578             left: 35%;
11579             top: 30%; /* Combine rel-parent and rel-us offsets : */
11580             }
11581             .klayout.do-alt.alt2 kbd span.base-lc {
11582             left: 15%;
11583             bottom: 25%; /* Combine rel-parent and rel-us offsets : */
11584             }
11585             .klayout.do-alt.alt2.base-right kbd span.base-uc {
11586             right: 15%;
11587             left: auto; /* Combine rel-parent and rel-us offsets : */
11588             }
11589             .klayout.do-alt.alt2.base-right kbd span.base-lc {
11590             right: 35%;
11591             left: auto; /* Combine rel-parent and rel-us offsets : */
11592             }
11593             .klayout.do-alt.alt2.base-center kbd span.base-uc {
11594             left: 60%; /* Combine rel-parent and rel-us offsets : */
11595             }
11596             .klayout.do-alt.alt2.base-center kbd span.base-lc {
11597             left: 40%; /* Combine rel-parent and rel-us offsets : */
11598             }
11599            
11600             .klayout.do-alt kbd span.base {
11601             font-size: 120%;
11602             left: 25%;
11603             top: 65%; /* Combine rel-parent and rel-us offsets : */
11604             }
11605             .klayout.do-alt.large-base.large-base kbd span.base { /* Make .large-base override .alt2 */
11606             font-size: 200%;
11607             left: 50%;
11608             top: 50%; /* Combine rel-parent and rel-us offsets : */
11609             }
11610             .klayout.do-alt.alt2 kbd span.base {
11611             font-size: 110%;
11612             left: 25%;
11613             top: 75%; /* Combine rel-parent and rel-us offsets : */
11614             }
11615             .klayout.do-alt.alt2.base-right kbd span.base {
11616             right: 25%;
11617             left: auto; /* Combine rel-parent and rel-us offsets : */
11618             }
11619             .klayout.do-alt.alt2.base-center kbd span.base {
11620             left: 50%; /* Combine rel-parent and rel-us offsets : */
11621             }
11622             .klayout.do-alt kbd span.base, .klayout.do-alt kbd span.base-lc, .klayout.do-alt kbd span.base-uc {
11623             position: absolute;
11624             z-index: -1;
11625            
11626             opacity: 0.25;
11627             filter: alpha(opacity=25); /* IE6-IE8 */
11628            
11629             color: blue;
11630             line-height: 1em; /* Tight-fitting box */
11631             height: 1em;
11632             width: 1em;
11633             margin: -0.5em -0.5em -0.5em -0.5em; /* -0.5em is the geometric center */
11634             }
11635             .klayout.do-alt kbd {
11636             min-height: 1.2em; /* Should be changed together to get uc letters centered... */
11637             line-height: 1.2em; /* Increasing by the same amount works fine??? */
11638             }
11639             .klayout.do-altgr span.altgr {outline: 9px dotted green;}
11640            
11641             kbd.with_x-NONONO:before {
11642             position: absolute;
11643             z-index: -10;
11644            
11645             opacity: 0.25;
11646             filter: alpha(opacity=25); /* IE6-IE8 */
11647            
11648             content: "✖";
11649             color: red;
11650             font-size: 120%;
11651            
11652             line-height: 1em; /* Tight-fitting box */
11653             height: 1em;
11654             width: 1em;
11655            
11656             top: 50%; /* Combine rel-parent and rel-us offsets : */
11657             left: 50%;
11658             margin: -0.43em 0 0 -0.5em; /* -0.5em is the geometric center; but it is not in the center of ✖...*/
11659             }
11660             kbd.from_sw:after, kbd.from_ne:after, kbd.from_nw:after, kbd.to_ne:after, kbd.to_nw:before, kbd.to_w:after, kbd.from_w:after {
11661             position: absolute;
11662             z-index: 1;
11663             font-size: 80%;
11664             color: red;
11665             text-shadow: 1px 1px #ffff88, -1px -1px #ffff88, -1px 1px #ffff88, 1px -1px #ffff88;
11666             text-shadow: 1px 1px rgba(255,255,0,0.3), -1px -1px rgba(255,255,0,0.3), -1px 1px rgba(255,255,0,0.3), 1px -1px rgba(255,255,0,0.3);
11667             }
11668             kbd.from_sw.grn:after, kbd.from_ne.grn:after, kbd.from_nw.grn:after, kbd.to_ne.grn:after, kbd.to_nw.grn:before, kbd.to_w.grn:after, kbd.from_w.grn:after {
11669             color: green;
11670             }
11671             kbd.from_sw.blu:after, kbd.from_ne.blu:after, kbd.from_nw.blu:after, kbd.to_ne.blu:after, kbd.to_nw.blu:before, kbd.to_w.blu:after, kbd.from_w.blu:after {
11672             color: blue;
11673             }
11674             kbd.from_sw.ylw:after, kbd.from_ne.ylw:after, kbd.from_nw.ylw:after, kbd.to_ne.ylw:after, kbd.to_nw.ylw:before, kbd.to_w.ylw:after, kbd.from_w.ylw:after {
11675             color: #FFB400;
11676             }
11677             kbd.from_sw:not(.pure), kbd.xfrom_sw, kbd.from_ne:not(.pure), kbd.from_nw:not(.pure), kbd.to_ne:not(.pure), kbd.to_nw:not(.pure) {
11678             text-shadow: 1px 1px yellow, -1px -1px yellow, -1px 1px yellow, 1px -1px yellow;
11679             }
11680             kbd.from_sw:after {
11681             left: -0.0em;
11682             bottom: -0.65em;
11683             }
11684             kbd.from_sw:after, kbd.to_ne:after {
11685             content: "⇗";
11686             }
11687             kbd.from_se:after, kbd.to_nw:before {
11688             content: "⇖";
11689             }
11690             kbd.from_ne:after, kbd.from_nw:after {
11691             top: -0.55em;
11692             }
11693             kbd.to_ne:after, kbd.to_nw:before { top: -0.85em;}
11694             kbd.to_nw:before { left: 0.01em;}
11695             kbd.from_ne:after { content: "⇙"; }
11696             kbd.from_ne:after, kbd.to_ne:after { right: -0.0em; }
11697             kbd.from_nw:after { content: "⇘"; left: -0.0em; }
11698             kbd.to_w:after, kbd.from_w:after {
11699             top: 45%;
11700             left: -0.7em;
11701             }
11702             kbd.to_w.high:after, kbd.from_w.high:after {
11703             top: -15%;
11704             left: -0.5em;
11705             }
11706             kbd.to_w:after { content: "⇐"; }
11707             kbd.from_w:after { content: "⇒"; }
11708            
11709             /* Compensate for higher keys */
11710             .klayout.do-alt kbd.from_sw:after {
11711             bottom: -0.90em;
11712             }
11713             .klayout.do-alt kbd.from_ne:after, .klayout.do-alt kbd.from_nw:after {
11714             top: -0.85em;
11715             }
11716            
11717             span.prefix {
11718             color: yellow;
11719             text-shadow: 1px 1px black, -1px -1px black, -1px 1px black, 1px -1px black;
11720             }
11721             span.prefix.prefix2 {
11722             text-shadow: 1px 1px black, -1px -1px black, -1px 1px black, 1px -1px black,
11723             3px 0px firebrick, -3px 0px firebrick, 0px 3px firebrick, 0px -3px firebrick;
11724             }
11725             span.very-special {
11726             text-shadow: 1px 1px lime, -1px -1px lime, -1px 1px lime, 1px -1px lime;
11727             }
11728             span.special {
11729             text-shadow: 2px 2px dodgerblue, -2px -2px dodgerblue, -2px 2px dodgerblue, 2px -2px dodgerblue;
11730             }
11731             .thinspecial span.special {
11732             text-shadow: 1px 1px dodgerblue, -1px -1px dodgerblue, -1px 1px dodgerblue, 1px -1px dodgerblue;
11733             }
11734             span.not-surr:not(.prefix) {
11735             text-shadow: 2px 2px white, -2px -2px white, -2px 2px white, 2px -2px white;
11736             }
11737             span.need-learn {
11738             text-shadow: 1px 1px coral, -1px -1px coral, -1px 1px coral, 1px -1px coral;
11739             }
11740             span.need-learn.on-right {
11741             text-shadow: 1px 1px black, -1px -1px black, -1px 1px black, 1px -1px black,
11742             2px 2px coral, -2px -2px coral, -2px 2px coral, 2px -2px coral;
11743             }
11744             span.may-guess {
11745             text-shadow: 1px 1px yellow, -1px -1px yellow, -1px 1px yellow, 1px -1px yellow;
11746             }
11747            
11748             kbd.win_logo.ubuntu:before {
11749             content: url(http://linux.bihlman.com/wp-content/plugins/wp-useragent/img/24/os/ubuntu-2.png);
11750             }
11751             kbd.win_logo:before {
11752             position: absolute;
11753             z-index: -10;
11754            
11755             content: url(40px-computer_glenn_rolla_01.svg.med.png);
11756             height: 100%;
11757             width: 100%;
11758            
11759             top: 0%; /* Combine rel-parent and rel-us offsets : */
11760             left: 0%;
11761             /* margin: -0.5em -0.5em -0.5em -0.5em; */ /* -0.5em is the geometric center */
11762             }
11763             .do-alt kbd.win_logo:before { /* How to vcenter automatically??? */
11764             top: 20%;
11765             }
11766            
11767             /* Mark vowel's diagonals (for layout of diacritics) */
11768             .ddiag .arow > kbd:nth-of-type(2), .ddiag .arow > kbd:nth-last-of-type(7),
11769             .diag .arow > kbd:nth-of-type(2), .diag .arow > kbd:nth-of-type(7),
11770             .diag .drow > kbd:nth-of-type(2), .diag .drow > kbd:nth-of-type(7),
11771             .diag .arow > kbd:nth-of-type(10), .diag .drow > kbd:nth-of-type(10), kbd.red-bg
11772             { background-color: #ffcccc; }
11773             .ddiag .arow > kbd:nth-last-of-type(6), .ddiag .arow > kbd:nth-of-type(4),
11774             .diag .arow > kbd:nth-of-type(8), .diag .arow > kbd:nth-of-type(3),
11775             .diag .drow > kbd:nth-of-type(8), .diag .drow > kbd:nth-of-type(3), kbd.green-bg
11776             { background-color: #ccffcc; }
11777             .ddiag .arow > kbd:nth-last-of-type(8), .ddiag .arow > kbd:nth-last-of-type(5),
11778             .diag .arow > kbd:nth-of-type(9), .diag .arow > kbd:nth-of-type(4),
11779             .diag .drow > kbd:nth-of-type(9), .diag .drow > kbd:nth-of-type(4), kbd.blue-bg
11780             { background-color: #ccccff; }
11781            
11782             /* Mark non-vowel's diagonals (for layout of diacritics) */
11783             .hide45end .arow > kbd:nth-of-type(5), .hide45end .arow > kbd:nth-of-type(6),
11784             .hide45end .arow > kbd:nth-of-type(11),
11785             .hide45end .drow > kbd:nth-of-type(5), .hide45end .drow > kbd:nth-of-type(6),
11786             .hide45end .drow > kbd:nth-of-type(11), kbd.semi-hidden
11787             { opacity: 0.45; }
11788            
11789             span.vbell { color: SandyBrown; }
11790             span.three-cases { outline: 3px dotted yellow; }
11791             span.three-cases-long { outline: 3px dotted MediumSpringGreen; }
11792            
11793             span.withSubst { outline: 1px dotted blue; outline-offset: -1px; }
11794             span.isSubst { outline: 1px solid blue; outline-offset: -1px; }
11795            
11796             .use-operator span.operator { background-color: rgb(255,192,203) /*pink*/; }
11797             span.relation { background-color: rgb(255,160,122) /*lightsalmon*/; }
11798             span.ipa { background-color: rgb(173,255,47) /*greenyellow*/; }
11799             span.nAry { background-color: rgb(144,238,144) /*lightgreen*/; }
11800             span.paleo { background-color: rgb(240,230,140) /*Khaki*/; }
11801             .use-viet span.viet { background-color: rgb(220,220,220) /*Gainsboro*/; }
11802             div:not(.no-doubleaccent) span.doubleaccent { background-color: rgb(255,228,196) /*Bisque*/; }
11803             span.ZW { background-color: rgb(220,20,60) /*crimson*/; }
11804             span.WS { background-color: rgb(128,0,0) /*maroon*/; }
11805            
11806             .use-operator span.operator { background-color: rgba(255,192,203,0.5) /*pink*/; }
11807             span.relation { background-color: rgba(255,160,122,0.5) /*lightsalmon*/; }
11808             span.ipa { background-color: rgba(173,255,47,0.5) /*greenyellow*/; }
11809             span.nAry { background-color: rgba(144,238,144,0.5) /*lightgreen*/; }
11810             span.paleo { background-color: rgba(240,230,140,0.5) /*Khaki*/; }
11811             .use-viet span.viet { background-color: rgba(220,220,220,0.5) /*Gainsboro*/; }
11812             div:not(.no-doubleaccent) span.doubleaccent { background-color: rgba(255,228,196,0.5) /*Bisque*/; }
11813             span.ZW { background-color: rgba(220,20,60,0.5) /*crimson*/; }
11814             span.WS { background-color: rgba(128,0,0,0.5) /*maroon*/; }
11815            
11816             span.lFILL[convention]:before { content: attr(convention);
11817             color: white;
11818             font-size: 50%; }
11819            
11820             span.lFILL:not([convention]) { margin: 0ex 0.35ex; }
11821             span.l-NONONO { margin: 0ex 0.06ex; }
11822             span.yyy { padding: 0px !important; }
11823            
11824             div.rtl-hover:hover div:not(:hover) kbd span:not(.no-mirror-rtl):not(.base):not(.base-uc):not(.base-lc) { direction: rtl; }
11825            
11826             div.zero { position: relative;}
11827             div.zero div.over-shift { position: absolute; height: 1.13em; z-order: 999;}
11828             /* div.zero div.over-shift { outline: 3px dotted yellow;} */
11829             .do-alt + div.zero div.over-shift { height: 1.5em; }
11830             div.zero.l div.over-shift { left: 0.04pt; width: 4.24em;}
11831             div.zero.r div.over-shift { left: 21.12em; width: 3.56em;} /* (1.72em - 0.04em) × 10 + 4.24em + 0.08 */
11832             div.zero.tp div.over-shift { top: 7.8em;}
11833             .over-shift-outline div.zero.btm div.over-shift { outline: 3px dotted blue;}
11834             div.zero.btm div.over-shift { bottom: 1.13em;}
11835             .do-alt + div.zero.btm div.over-shift { bottom: 1.5em;}
11836             /* div.zero:hover { outline: 6px dotted yellow;} */
11837            
11838             EOR
11839            
11840             sub apply_filter_div ($$;$) {
11841 0   0 0 0 0 my($self, $txt, $opt) = (shift, shift, shift || {});
11842 0 0       0 $txt =~ s(^(]*\skbd_rebuild="([^""]*?)"[^'">]*>).*?^(
  0         0  
11843 0         0 ( $1 . ($opt->{fake} ? $rebuild_fake : $self->html_keyboard_diagram("$2", $opt)) . $3 )msge;
11844             $txt;
11845             }
11846 0   0 0 0 0 sub apply_filter_style ($$;$) {
11847 0 0       0 my($self, $txt, $opt) = (shift, shift, shift || {});
  0         0  
11848 0         0 $txt =~ s(^(\s*/\*\s*START\s+auto-generated\s+style\s*\*/).*?(/\*\s*END\s+auto-generated\s+style\s*\*/))
11849             ( $1 . ($opt->{fake} ? $rebuild_fake : $rebuild_style) . $2 )msge;
11850             $txt;
11851             }
11852            
11853             my @HTML_KBD_FIXED = ('
11854            
11855             ',
11856             'Backspace
11857            
11858            
Tab',
11859             '
11860            
11861            
CapsLock',
11862             'Enter
11863            
11864            
Shift',
11865             'Shift
11866            
11867            
CtrlAlt',
11868             'AltGrMenuCtrl
11869            
11870             ');
11871 0     0 0 0
11872             sub classes_by_chars ($$$$$$$$$$) {
11873 0         0 my ($self, $h_classes, $opt, $layer, $lc0, $uc0, $lc, $uc, $k_base, $k, %cl) =
11874 0         0 (shift, shift, shift, shift, shift, shift, shift, shift, shift, shift);
11875 0         0 for my $L ('', $layer) {
  0         0  
11876 0         0 for my $c (grep defined, $lc0, $uc0) {
11877 0         0 $cl{$_}++ for @{ $h_classes->{"$k_base$L"}{$c} }; # k for key-based-on-background char
  0         0  
11878             for my $o (@$opt) {
11879             $cl{$_}++ for @{ $h_classes->{"$k_base$L=$o"}{$c} } # k=opt for key-based-on-background char
11880 0         0 }
11881 0         0 }
  0         0  
11882 0         0 for my $c (grep defined, $lc, $uc) {
11883 0         0 $cl{$_}++ for @{ $h_classes->{"$k$L"}{$c} }; # K for key-based-on-foreground char
  0         0  
11884             for my $o (@$opt) {
11885             $cl{$_}++ for @{ $h_classes->{"$k$L=$o"}{$c} } # K=opt for key-based-on-background char
11886             }
11887 0         0 }
11888             }
11889             keys %cl;
11890             }
11891 0     0 0 0
11892 0 0       0 sub apply_kmap($$$) {
11893 0 0       0 my ($self, $kmap, $c) = (shift, shift, shift);
11894 0 0       0 return $c unless $kmap;
11895 0 0       0 $c = $c->[0] if ref $c;
11896 0         0 return $c unless defined ($c = $kmap->{$self->key2hex($c)});
11897 0         0 return chr hex $c unless ref $c;
11898 0         0 $c = [@$c]; # deep copy
11899             $c->[0] = chr hex $c->[0];
11900             $c;
11901             }
11902 0     0 0 0
11903 0         0 sub do_keys ($$$@) { # calculate classes related to the “whole key”, and emit the “content” of the key
11904 0         0 my ($self, $opt, $base, $out, $lc0, $uc0, %c_classes) = (shift, shift, 1, '');
11905 0   0     0 for my $in (@_) {
11906 0   0     0 my ($lc, $uc, $f, $kmap, $layerN, $h_classes, $name, @classes) = @$in;
11907 0 0       0 $kmap and $_ = $self->apply_kmap($kmap, $_) for ($lc, $uc);
11908             ref and $_ = $_->[0] for $lc, $uc;
11909 0         0 ($lc0, $uc0) = ($lc, $uc), $base = 0 if $base;
11910             # k/K for key-based-on-(background/foreground) char; k=opt/K=opt likewise
11911 0         0 $c_classes{$_}++ for $self->classes_by_chars($h_classes, $opt, $layerN, $lc0, $uc0, $lc, $uc, 'k', 'K');
11912 0 0       0 }
11913 0 0       0 my @extra = sort keys %c_classes;
11914             my $q = ("@extra" =~ /\s/ ? '"' : '');
11915 0         0 my $cl = @extra ? " class=$q@extra$q" : '';
11916             # push @extra, 'from_se' if $k[0][0] =~ /---/i; # lc, uc, $h_classes, name, classes:
11917             join '', $out, "", (map $self->a_pair($opt, $lc0, $uc0, $self->apply_kmap($_->[3], $_->[0]),
11918             $self->apply_kmap($_->[3], $_->[1]),
11919             $_->[2], $_->[4], $_->[5], $_->[6], [@$_[7..$#$_]]), @_), ''
11920 0     0 0 0 }
  0         0  
11921            
11922 0     0 0 0 sub h($) { (my $c = shift) =~ s/([&<>])/$html_esc{$1}/g; $c }
11923 0 0       0 sub tags_by_rx {
11924 0         0 my ($c, @o) = shift;
11925 0         0 die "Need odd number of arguments" if @_ & 1;
11926 0 0       0 while (@_) {
11927             my $tag = shift;
11928 0         0 push @o, $tag if $c =~ shift;
11929             }
11930             return @o;
11931             }
11932 0   0 0 0 0
11933             sub a_pair ($$$$$$$$$$;@) {
11934             my($self, $opts, $lc0, $uc0, $LC, $UC, $F, $layerN, $h_classes, $name, $extra) =
11935 0 0 0     0 (shift, shift, shift, shift, shift, shift, shift, shift, shift, shift, shift || []);
  0         0  
11936             # warn "See lc prefix $LC->[0] " if ref $LC and $LC->[2];
11937 0         0 my ($lc1, $uc1) = map {(defined and ref()) ? $_->[0] : $_} $LC, $UC;
11938 0         0
11939             $extra = [@$extra];
11940 0 0       0 my $e = @$extra;
  0         0  
11941            
11942             my ($lc, $uc) = map {defined() ? $_ : '♪'} $lc1, $uc1;
11943 0         0 # return join '', map {defined() ? $_ : ''} $lc, $uc;
  0         0  
11944 0   0     0
11945 0   0     0 my $opt = { map {($_, 1)} @$opts };
11946 0 0 0     0 my $base = (($name || '') eq 'base');
      0        
      0        
      0        
11947 0 0       0 my $prefix2 = (ref($LC) and ref($UC) and $LC->[2] and $UC->[2] && $uc eq $lc);
11948 0   0     0 if ($prefix2 or ($uc eq ucfirst $lc and $lc eq lc $uc and $lc ne 'ß' and defined($lc1) == defined($uc1))) {
      0        
11949             if ($uc ne $lc) {
11950 0 0 0     0 ref and $_->[2] and die "Do not expect a character `$_->[0]' to be a deadkey..." for $LC, $UC;
  0   0     0  
      0        
11951 0 0 0     0 }
      0        
11952 0 0 0     0 my @pref_i = map { ref $_ and (3 == ($_->[2] || 0) or (3 << 3) == ($_->[2] || 0)) } $LC, $UC;
      0        
11953 0         0 $prefix2 and $pref_i[1] and not $pref_i[0] and unshift @$extra, 'prefix2';
11954             $LC and ref $LC and $LC->[2] and unshift @$extra, 'prefix';
11955             push @$extra, $self->classes_by_chars($h_classes, $opts, $layerN, $lc0, undef, $lc1, undef, 'c', 'C');
11956 0 0 0     0 # unshift @$extra, tags_by_rx $lc, 'need-learn' => ($opt->{cyr} ? qr/N-A/i : qr/[ϝϙϲͻϿϾͲ℧ϗ]N-A/i);
    0          
11957             # push @$extra, 'vbell' unless defined $lc1;
11958 0 0       0 push @$extra, (1 < length uc $lc1 ? 'three-cases-long' : 'three-cases')
11959 0 0       0 if defined $lc1 and uc $lc1 ne ucfirst $lc1;
11960 0         0 push @$extra, $name if $name;
11961 0 0       0 my $q = ("@$extra" =~ /\s/ ? '"' : '');
11962 0 0       0 @$extra = sort @$extra;
11963             my $cl = @$extra ? " class=$q@$extra$q" : '';
11964             $base ? "" . h($uc) . "" : $self->char_2_html_span(undef, $UC, $uc, $F, {}, @$extra)
11965 0         0 # "" . $out . "";
11966 0         0 } else {
11967             my (@e_lc, @e_uc);
11968 0   0     0 my @do = ([$lc, [], 'lc', $LC, $lc0, $lc1], [$uc, [], 'uc', $UC, $uc0, $uc1]);
  0   0     0  
      0        
11969 0   0     0 # warn "See lc prefix $LC->[0] " if ref $LC and $LC->[2];
  0   0     0  
      0        
      0        
11970 0         0 $_->[3] and ref $_->[3] and $_->[3][2] and push @{$_->[1]}, 'prefix' for @do;
11971             $_->[3] and ref $_->[3] and (3 == ($_->[3][2] || 0) or (3 << 3) == ($_->[3][2] || 0)) and push @{$_->[1]}, 'prefix2' for @do;
11972 0         0 push @{$_->[1]}, $self->classes_by_chars($h_classes, $opts, $layerN, $_->[4], undef, $_->[5], undef, 'c', 'C'),
11973 0         0 tags_by_rx $_->[0], 'not-surr' => qr/[„‚“‘”’«‹»›‐–—―‒‑‵‶‷′″‴⁗〃´]/i # white
  0         0  
11974             for @do;
11975 0 0       0 push @{$_->[1]}, 'vbell' for grep !defined $_->[5], @do;
  0         0  
  0         0  
11976 0         0 join '', map {
  0         0  
11977 0 0       0 push @{$_->[1]}, ($name ? "$name-$_->[2]" : $_->[2]);
11978 0 0       0 my $ee = [sort @$extra, @{$_->[1]}];
11979             my $q = ("@$ee" =~ /\s/) ? '"' : '';
11980             my $o = ($base ? "" . h($_->[0]) . ""
11981             : $self->char_2_html_span(undef, $_->[3], $_->[0], $F, {}, @$ee));
11982             # "[2]$q>$o";
11983             } @do;
11984             }
11985             }
11986            
11987 0     0 0 0 my $kbdrow = 0;
11988 0 0       0 sub keys2html_diagram ($$$$@) {
  0         0  
11989 0   0     0 my ($self, $opts, $cnt, $new_row) = (shift, shift, shift, shift);
11990 0         0 my %opts = map { /^\w+=/ ? split /=/, $_, 2 : ($_, 1)} @$opts;
11991 0 0       0 my $off = (($opts{oneRow} && $kbdrow++) || 0) % 3;
11992 0         0 $off = "\xA0" x (2*$off);
11993             my @fixed = ($opts{oneRow} ? ("$off") : @HTML_KBD_FIXED);
11994 0         0 my $out = shift @fixed;
11995 0   0     0 # $cnt = $#{$layers_info->[0]} if $cnt > $#{$layers_info->[0]};
11996 0   0     0 my @keys = (0..($cnt-1));
11997 0         0 my $start = ($opts{startKey} || 0) % $cnt;
11998 0         0 my $CNT = $opts{cntKeys} || $cnt;
11999             @keys = (@keys) x ( 1 + int( ($start+$CNT-1)/$cnt ) );
12000 0         0 @keys = @keys[$start .. ($start + $CNT - 1)];
12001 0 0 0     0 KEY:
12002 0         0 for my $kn (@keys) { # Ordinal of keyboard's key
12003 0         0 $out .= (shift(@fixed) || '') if $new_row->{$kn};
12004 0         0 my ($symb, @keys, $last) = 0;
12005 0         0 for my $KK (@_) { # Layers
  0         0  
12006             my($layer, @rest) = @$KK; # rest = face, kmap, layerN, class_hash, name, classes
12007 0         0 push @keys, [@{$layer->[$kn]}[0,1], @rest];
12008             }
12009 0         0 $out .= $self->do_keys($opts, @keys);
12010 0 0       0 }
12011 0         0 $out .= join '', @fixed;
12012             $out .= "" if $opts{oneRow};
12013             $out
12014             }
12015 0     0 0 0
12016 0         0 sub html_keyboard_diagram ($$$) {
12017 0         0 my($self, $OPT, $global_opt, @opt, @layers, $face0, $is_layer) = (shift, shift, shift);
12018 0 0       0 my %tr = qw(l 0 c 1 h 2);
12019 0 0       0 for my $arg (split /\s+/, $OPT) {
12020             push(@opt, $arg), next if $arg =~ s(^/opt=)(); # BELOW: `base' becomes NAME, `on-right' becomes CLASSES
12021 0 0       0 die "unrecognized `rebuild' option: `$arg'" # +=l,0,0 +base=l,0,0 +=l,0,1 +=l,ƒ,0 on-right+=c,0,1
12022 0 0 0     0 unless my($classes, $name, $f, $prefix, $which) = ( $arg =~ m{^((?:[-\w]+(?:,[-\w]+)*)?)\+([-\w]*)=(\w+),([\da-f]{4}|[^\x20-\x7e][^,]*|[02]?),(\d+|-)$}i );
12023 0         0 $f = $self->{face_shortcuts}{$f} if exists $self->{face_shortcuts}{$f};
12024 0         0 $face0 ||= $f unless $which eq '-';
12025 0   0     0 $prefix =~ s/◌(?=\p{NonspacingMark})//g;
12026 0 0 0     0 $prefix = $self->charhex2key($prefix);
12027             my $L = ($which eq '-' and $which = 0, [$f]);
12028 0 0 0     0 warn "unknown layer $L->[0]" if $L and not $self->{layers}{$L->[$which]};
      0        
12029 0 0 0     0 die "html_keyboard_diagram(): unknown face `$f'"
12030             unless $L ||= ($self->{faces}{$f}{layers} or $self->export_layers($f, $f));
12031             my $kmap = $self->{faces}{$f}{'[deadkeyFaceHexMap]'}{$self->key2hex($prefix)}
12032             or not length $prefix or die "output_html_keyboard_diagram(): Unknown prefix key `$prefix' for face $f";
12033 0   0     0 # create_composite_layers() translates 0000 key to ''
12034 0         0 # warn "I see HTML_classes for face=$f, prefix=`$prefix'" if $self->{faces}{$f}{'[HTML_classes]'}{length $prefix ? $self->key2hex($prefix) : ''};
12035             my $h_classes = $self->{faces}{$f}{'[HTML_classes]'}{length $prefix ? $self->key2hex($prefix) : ''} || {};
12036 0 0       0 push(@layers, [$self->{layers}{$L->[$which]}, $f, $kmap, $which, $h_classes, $name, split /,/, $classes]);
12037 0         0 }
12038 0 0       0 die "there must be exactly one /opt= argument in <<$OPT>>" unless @opt == 1;
  0         0  
12039 0 0       0 my $opt = [split /,/, $opt[0], -1];
12040 0         0 my ($cnt, @g, %new_row) = (0, @{ $self->{faces}{$face0}{'[geometry]'} || [] }); # keep only 1 from the last row
12041 0         0 @g or die "Face `$face0' has no associated layer with geometry info; did you set geometry_via_layer?";
12042 0         0 pop @g;
12043 0 0       0 $new_row{ $cnt += $_ }++ for @g;
12044             my ($pre, $post) = ('', '');
12045 0 0       0 ($pre, $post) = ("\n
", "
\nHover mouse here to see how characters look in RTL context.\n")
12046 0         0 if grep /^rtl-hover(-Trivia)?$/, @$opt;
12047             $post .= " Trivia: note mirroring of <{[()]}>." if grep /^rtl-hover-Trivia$/, @$opt;
12048             $pre . $self->keys2html_diagram($opt, $cnt+1, \%new_row, @layers) . $post;
12049             }
12050            
12051            
12052 1     1   40894 # These preloaded symbols are enough to cover single-UTF-16 bindings in .Compose (except circled katakana/hangul)
  1         4  
  1         2855  
12053             my @enc_dotcompose; # Have many-to-1, inverting hash would lose info; Do not distinguish Left/leftarrow etc.
12054             { no warnings 'qw';
12055             @enc_dotcompose = (qw#
12056             ` grave
12057             ' apostrophe
12058             " quotedbl
12059             ~ asciitilde
12060             ! exclam
12061             ? question
12062             @ at
12063             #, # `
12064             qw!
12065             # numbersign
12066             $ dollar
12067             % percent
12068             ^ asciicircum
12069             & ampersand
12070             * asterisk
12071             ( parenleft
12072             ) parenright
12073             [ bracketleft
12074             ] bracketright
12075             { braceleft
12076             } braceright
12077             - minus
12078             + plus
12079             = equal
12080             _ underscore
12081             < less
12082             > greater
12083             \ backslash
12084             / slash
12085             | bar
12086             , comma
12087             . period
12088             : colon
12089             ; semicolon
12090             _bar underbar
12091            
12092            
12093             ¡ exclamdown
12094             ¢ cent
12095             £ sterling
12096             ¤ currency
12097             ¥ yen
12098             ¦ brokenbar
12099             § section
12100             ¨ diaeresis
12101             © copyright
12102             ª ordfeminine
12103             « guillemotleft
12104             ¬ notsign
12105             ­ hyphen
12106             ® registered
12107             ¯ macron
12108             ° degree
12109             ± plusminus
12110             ² twosuperior
12111             ³ threesuperior
12112             ´ acute
12113             µ mu
12114             ¶ paragraph
12115             · periodcentered
12116             ¸ cedilla
12117             ¹ onesuperior
12118             º masculine
12119             » guillemotright
12120             ¼ onequarter
12121             ½ onehalf
12122             ¾ threequarters
12123             ¿ questiondown
12124             À Agrave
12125             Á Aacute
12126             Â Acircumflex
12127             Ã Atilde
12128             Ä Adiaeresis
12129             Å Aring
12130             Æ AE
12131             Ç Ccedilla
12132             È Egrave
12133             É Eacute
12134             Ê Ecircumflex
12135             Ë Ediaeresis
12136             Ì Igrave
12137             Í Iacute
12138             Î Icircumflex
12139             Ï Idiaeresis
12140             Ð ETH
12141             Ð Eth
12142             Ñ Ntilde
12143             Ò Ograve
12144             Ó Oacute
12145             Ô Ocircumflex
12146             Õ Otilde
12147             Ö Odiaeresis
12148             × multiply
12149             Ø Oslash
12150             Ø Ooblique
12151             Ù Ugrave
12152             Ú Uacute
12153             Û Ucircumflex
12154             Ü Udiaeresis
12155             Ý Yacute
12156             Þ THORN
12157             Þ Thorn
12158             ß ssharp
12159             à agrave
12160             á aacute
12161             â acircumflex
12162             ã atilde
12163             ä adiaeresis
12164             å aring
12165             æ ae
12166             ç ccedilla
12167             è egrave
12168             é eacute
12169             ê ecircumflex
12170             ë ediaeresis
12171             ì igrave
12172             í iacute
12173             î icircumflex
12174             ï idiaeresis
12175             ð eth
12176             ñ ntilde
12177             ò ograve
12178             ó oacute
12179             ô ocircumflex
12180             õ otilde
12181             ö odiaeresis
12182             ÷ division
12183             ø oslash
12184             ø ooblique
12185             ù ugrave
12186             ú uacute
12187             û ucircumflex
12188             ü udiaeresis
12189             ý yacute
12190             þ thorn
12191             ÿ ydiaeresis
12192            
12193             Cyr_ђ Serbian_dje
12194             ѓ Macedonia_gje
12195             є Ukrainian_ie
12196             Cyr_ѕ Macedonia_dse
12197             Cyr_і Ukrainian_i
12198             Cyr_ї Ukrainian_yi
12199             Cyr_ћ Serbian_tshe
12200             Cyr_ќ Macedonia_kje
12201             ґ Ukrainian_ghe_with_upturn
12202             Cyr_ў Byelorussian_shortu
12203             № numerosign
12204             Cyr_Ђ Serbian_DJE
12205             Ѓ Macedonia_GJE
12206             Є Ukrainian_IE
12207             Cyr_Ѕ Macedonia_DSE
12208             Cyr_І Ukrainian_I
12209             Cyr_Ї Ukrainian_YI
12210             Cyr_Ћ Serbian_TSHE
12211             Cyr_Ќ Macedonia_KJE
12212             Ґ Ukrainian_GHE_WITH_UPTURN
12213             Cyr_Ў Byelorussian_SHORTU
12214            
12215             ’sq rightsinglequotemark
12216             ‘sq leftsinglequotemark
12217             • enfilledcircbullet
12218             ♀ femalesymbol
12219             ♂ malesymbol
12220             NBSP nobreakspace
12221             … ellipsis
12222             ∩# intersection
12223             ∫ integral
12224             ≤ lessthanequal
12225             ≥ greaterthanequal
12226            
12227             d` dead_grave
12228             d' dead_acute
12229             d^ dead_circumflex
12230             d~ dead_tilde
12231             d¯ dead_macron
12232             dd# dead_breve----
12233             d^. dead_abovedot
12234             d" dead_diaeresis
12235             d^° dead_abovering
12236             d'' dead_doubleacute
12237             d^v dead_caron
12238             d, dead_cedilla
12239             dd# dead_ogonek---
12240             d_ι dead_iota
12241             d_voiced dead_voiced_sound
12242             d_½voiced dead_semivoiced_sound
12243             d. dead_belowdot
12244             dd# dead_hook---
12245             dd# dead_horn---
12246             d/ dead_stroke
12247             d^, dead_abovecomma
12248             dd# dead_abovereversedcomma---
12249             d`` dead_doublegrave
12250             d``# dead_double_grave
12251             d_° dead_belowring
12252             d__ dead_belowmacron
12253             dd# dead_belowcircumflex---
12254             d_~ dead_belowtilde
12255             dd# dead_belowbreve---
12256             d_" dead_belowdiaeresis
12257             d_invbrev dead_invertedbreve
12258             d_inv_brev dead_inverted_breve
12259             d_, dead_belowcomma
12260             dd# dead_currency
12261            
12262             d^( dead_dasia
12263             d^) dead_psili
12264            
12265             Ś Sacute
12266             Š Scaron
12267             Ş Scedilla
12268             Ť Tcaron
12269             Ź Zacute
12270             Ž Zcaron
12271             Ż Zabovedot
12272             ą aogonek
12273             ˛ ogonek
12274             ł lstroke
12275             ľ lcaron
12276             ś sacute
12277             ˇ caron
12278             š scaron
12279             ş scedilla
12280             ť tcaron
12281             ź zacute
12282             ˝ doubleacute
12283             ž zcaron
12284             ż zabovedot
12285             Ŕ Racute
12286             Ă Abreve
12287             Ĺ Lacute
12288             Ć Cacute
12289             Č Ccaron
12290             Ę Eogonek
12291             Ě Ecaron
12292             Ď Dcaron
12293             Đ Dstroke
12294             Ń Nacute
12295             Ň Ncaron
12296             Ő Odoubleacute
12297             Ř Rcaron
12298             Ů Uring
12299             Ű Udoubleacute
12300             Ţ Tcedilla
12301             ŕ racute
12302             ă abreve
12303             ĺ lacute
12304             ć cacute
12305             č ccaron
12306             ę eogonek
12307             ě ecaron
12308             ď dcaron
12309             đ dstroke
12310             ń nacute
12311             ň ncaron
12312             ő odoubleacute
12313             ř rcaron
12314             ů uring
12315             ű udoubleacute
12316             ţ tcedilla
12317             ˙ abovedot
12318            
12319             Ŗ Rcedilla
12320             Ĩ Itilde
12321             Ļ Lcedilla
12322             Ē Emacron
12323             Ģ Gcedilla
12324             Ŧ Tslash
12325             ŗ rcedilla
12326             ĩ itilde
12327             ļ lcedilla
12328             ē emacron
12329             ģ gcedilla
12330             ŧ tslash
12331             Ŋ ENG
12332             ŋ eng
12333             Ā Amacron
12334             Į Iogonek
12335             Ė Eabovedot
12336             Ī Imacron
12337             Ņ Ncedilla
12338             Ō Omacron
12339             Ķ Kcedilla
12340             Ų Uogonek
12341             Ũ Utilde
12342             Ū Umacron
12343             ā amacron
12344             į iogonek
12345             ė eabovedot
12346             ī imacron
12347             ņ ncedilla
12348             ō omacron
12349             ķ kcedilla
12350             ų uogonek
12351             ũ utilde
12352             ū umacron
12353            
12354             Ơ Ohorn
12355             ơ ohorn
12356             Ư Uhorn
12357             ư uhorn
12358            
12359             < leftcaret
12360             > rightcaret
12361             ∨ downcaret
12362             ∧ upcaret
12363             ¯ overbar
12364             ⊤ downtack
12365             ∩ upshoe
12366             ⌊ downstile
12367             _ underbar
12368             ∘ jot
12369             ⎕ quad
12370             ⊥ uptack
12371             ○ circle
12372             ⌈ upstile
12373             ∪ downshoe
12374             ⊃ rightshoe
12375             ⊂ leftshoe
12376             ⊣ lefttack
12377             ⊢ righttack
12378            
12379             ≤ lessthanequal
12380             ≠ notequal
12381             ≥ greaterthanequal
12382             ∫ integral
12383             ∴ therefore
12384             ∝ variation
12385             ∞ infinity
12386             ∇ nabla
12387             ∼ approximate
12388             ≃ similarequal
12389             ⇔ ifonlyif
12390             ⇒ implies
12391             ≡ identical
12392             √ radical
12393             ⊂ includedin
12394             ⊃ includes
12395             ∩ intersection
12396             ∪ union
12397             ∧ logicaland
12398             ∨ logicalor
12399             ∂ partialderivative
12400             ƒ function
12401             ← leftarrow
12402             ↑ uparrow
12403             → rightarrow
12404             ↓ downarrow
12405             ◆ soliddiamond
12406             ▒ checkerboard
12407            
12408             CP Multi_key
12409            
12410             +# KP_Add
12411             -# KP_Subtract
12412             *# KP_Multiply
12413             /# KP_Divide
12414             .# KP_Decimal
12415             =# KP_Equal
12416             SPC# KP_Space
12417            
12418             ← Left → Right ↑ Up ↓ Down
12419             !, map {("$_#", "KP_$_")} 0..9);
12420             } # `
12421            
12422             my %dec_dotcompose = reverse @enc_dotcompose;
12423             # perl -C31 -wne "/^(.)\tCyrillic_(\w+)/ and print qq($2 $1 )" oooo3 >oooo-cyr
12424             # perl -C31 -wne "/^(.)\thebrew_(\w+)/ and print qq($2 $1 )" oooo3 >oooo-heb
12425             my %cyr = qw( GHE_bar Ғ ghe_bar ғ ZHE_descender Җ zhe_descender җ KA_descender Қ ka_descender қ KA_vertstroke Ҝ ka_vertstroke ҝ
12426             EN_descender Ң en_descender ң U_straight Ү u_straight ү U_straight_bar Ұ u_straight_bar ұ HA_descender Ҳ
12427             ha_descender ҳ CHE_descender Ҷ che_descender ҷ CHE_vertstroke Ҹ che_vertstroke ҹ SHHA Һ shha һ SCHWA Ә schwa ә
12428             I_macron Ӣ i_macron ӣ O_bar Ө o_bar ө U_macron Ӯ u_macron ӯ io ё je ј lje љ nje њ dzhe џ IO Ё JE Ј LJE Љ NJE Њ
12429             DZHE Џ yu ю a а be б tse ц de д ie е ef ф ghe г ha х i и shorti й ka к el л em м en н o о pe п ya я er р es с te т
12430             u у zhe ж ve в softsign ь yeru ы ze з sha ш e э shcha щ che ч hardsign ъ YU Ю A А BE Б TSE Ц DE Д IE Е EF Ф GHE Г
12431             HA Х I И SHORTI Й KA К EL Л EM М EN Н O О PE П YA Я ER Р ES С TE Т U У ZHE Ж VE В SOFTSIGN Ь YERU Ы ZE З SHA Ш E Э
12432             SHCHA Щ CHE Ч HARDSIGN Ъ );
12433             my %heb = qw( doublelowline ‗ aleph א bet ב gimel ג dalet ד he ה waw ו zain ז chet ח tet ט yod י finalkaph ך kaph כ lamed ל
12434             finalmem ם mem מ finalnun ן nun נ samech ס ayin ע finalpe ף pe פ finalzade ץ zade צ qoph ק resh ר shin ש taw ת
12435             beth ב gimmel ג daleth ד samekh ס zayin ז het ח teth ט zadi צ kuf ק taf ת );
12436             my %grk = qw( ALPHAaccent Ά EPSILONaccent Έ ETAaccent Ή IOTAaccent Ί IOTAdieresis Ϊ OMICRONaccent Ό UPSILONaccent Ύ
12437             UPSILONdieresis Ϋ OMEGAaccent Ώ accentdieresis ΅ horizbar ― alphaaccent ά epsilonaccent έ etaaccent ή iotaaccent ί
12438             iotadieresis ϊ iotaaccentdieresis ΐ omicronaccent ό upsilonaccent ύ upsilondieresis ϋ upsilonaccentdieresis ΰ
12439             omegaaccent ώ ALPHA Α BETA Β GAMMA Γ DELTA Δ EPSILON Ε ZETA Ζ ETA Η THETA Θ IOTA Ι KAPPA Κ LAMDA Λ LAMBDA Λ MU Μ
12440             NU Ν XI Ξ OMICRON Ο PI Π RHO Ρ SIGMA Σ TAU Τ UPSILON Υ PHI Φ CHI Χ PSI Ψ OMEGA Ω alpha α beta β gamma γ delta δ
12441             epsilon ε zeta ζ eta η theta θ iota ι kappa κ lamda λ lambda λ mu μ nu ν xi ξ omicron ο pi π rho ρ sigma σ
12442             finalsmallsigma ς tau τ upsilon υ phi φ chi χ psi ψ omega ω );
12443             $dec_dotcompose{"Cyrillic_$_"} = "Cyr_$cyr{$_}" for keys %cyr;
12444             $dec_dotcompose{"hebrew_$_"} = "heb_$heb{$_}" for keys %heb;
12445             $dec_dotcompose{"Greek_$_"} = "Gr_$grk{$_}" for keys %grk;
12446 0     0 0 0
12447 0         0 sub shorten_dotcompose ($$;$) { # Shorten but leave readable disambiguous (to allow more concise printout)
12448 0         0 shift; # self [Later we massage out Cyr_ Gr_ uni_ prefixes
12449 0         0 (my $in = shift) =~ s/\b(Cyr|Ukr|Gr|heb|Ar)[a-z]+(?=_)/$1/;
12450 0 0       0 $in =~ s/\b(dead)(?=_)/d/;
  0         0  
12451 0         0 $in =~ s/\b(Gr_\w+dier|d_diaer)esis/$1/;
12452             $in =~ s/^U([a-fA-F\d]{4,6})$/ 'uni_' . chr hex $1 /e if shift;
12453             $in
12454             }
12455 0     0 0 0
12456 0         0 sub dec_dotcompose ($$;$) {
12457 0 0       0 my($self, $in, $dec_U) = (shift, shift, shift);
12458 0 0       0 my($pre, $post) = split /:/, $in, 2;
12459 0 0       0 $post or warn("Can't parse <<$in>>"), return;
12460 0         0 my @pre = ($pre =~ /<(\w+)>/g) or warn("Unknown format of IN in <<$in>>"), return;
12461             my($p) = ($post =~ /"(.+?)"/) or warn("Unknown format of OUT in <<$in>>"), return;
12462 0 0       0 @pre = map { exists $KeySyms{$_}
    0          
12463 0         0 ? $KeySyms{$_}
12464             : ( exists $dec_dotcompose{$_} ? $dec_dotcompose{$_} : $self->shorten_dotcompose($_, $dec_U) ) } @pre;
12465             (@pre, $p)
12466             }
12467            
12468 0     0 0 0 # Stats: about 250 in: egrep "CP.*d_|d_.*CP" o-std
12469 0         0 sub process_dotcompose ($$$;$) {
12470 0 0       0 my($self, $fh, $sub, $dec_U) = (shift, shift, shift, shift);
12471 0 0       0 while (<$fh>) {
12472 0 0       0 next if /^\s*(#|include\b)/;
12473 0         0 next unless /\S/;
12474             next unless my @in = $self->dec_dotcompose($_, $dec_U);
12475             $sub->($self, $in[-1], @in[0..$#in-1]);
12476             }
12477             }
12478 0   0 0 0 0
12479             sub filter_dotcompose ($;$) {
12480 0     0   0 my ($self, $fh) = (shift, shift || \*ARGV);
12481 0         0 $self->process_dotcompose($fh, sub ($$@) {
12482 0         0 my($self, $out) = (shift, shift);
12483             print "@_ $out\n"; # Two spaces to allow for combining marks
12484             });
12485             }
12486 0     0 0 0
12487 0 0       0 sub put_val_deep ($$$$@) {
12488 0         0 my($self, $h, $term, $val, $k) = (shift, shift, shift, shift, shift);
12489 0         0 die "No key(s) in put_val_deep()" unless @_;
12490 0 0       0 while (@_) {
12491 0         0 my $oh = $h;
12492 0 0       0 $h->{$k} = {} unless defined $h->{$k};
12493 0 0       0 $h = $h->{$k};
12494 0         0 if ('HASH' ne ref $h) {
12495 0         0 die "Encountered non-HASH in put_val_deep(): <$k>" unless $term;
12496             my $ov = $h;
12497 0         0 $h = $oh->{$k} = { $term => $ov };
12498             }
12499 0 0       0 $k = shift;
12500 0 0 0     0 }
    0          
12501 0         0 if (exists $h->{$k}) {
12502             if (not ref $h->{$k}) {
12503 0         0 $h->{$k} = $val; # later rule wins
12504             } elsif ($term and 'HASH' eq ref $h) {
12505 0         0 $h->{$k}{$term} = $val;
12506             } else {
12507             die "Encountered non-HASH in put_val_deep(): <$k>";
12508 0         0 }
12509             } else {
12510             $h->{$k} = $val; # later rule wins
12511             }
12512             }
12513 0     0 0 0
12514 0         0 sub compose_array_2_hash ($$$$) {
12515 0         0 my($self, $a, $h, $opt) = (@_);
12516 0 0       0 for my $l (@$a) {
12517 0         0 my($out, $term, @in) = @$l;
12518 0 0       0 my $Term = (ref $term ? $term->{term} : $term) ;
12519             $self->put_val_deep( $h, $term, $self->key2hex($out), map $self->key2hex($_), @in);
12520             $self->put_val_deep( $opt, $term, $term, map $self->key2hex($_), @in) if ref $term;
12521             }
12522             }
12523 0     0 0 0
12524 0 0       0 sub compose_line_2_array ($$$$$@) {
12525 0         0 my($self, $a, $out, $massage, $term, @in) = (@_);
12526             if ($massage) {
12527 0 0       0 s/^(uni|Gr|Cyr|heb)_(?![\x00-\x7e])(?=.$)//, s/^space$/ / for @in; # copy
12528 0         0 #warn "compose: @in $out";
12529             return unless $in[0] eq 'CP';
12530             shift @in;
12531             }
12532             # Filter warnings via: egrep -v " d[^ ]|#" 00b | egrep -- "^---CP:" >00b2
12533 0 0 0     0 (printSkippedComposeKey and warn("---CP: @in $out")), # The last make sense only in the context of keysymbol operations???
  0 0 0     0  
      0        
12534             return if 1 != length $out or 0x10000 <= ord $out
12535 0         0 or grep {1 != length or 0x10000 <= ord} @in or grep $out eq $_, @in; # Allow for one char only
12536             #warn "CP: @in $out";
12537             push @$a, [$out, $term, @in];
12538             }
12539 0     0 0 0
12540             sub compose_2_array ($$$$@) {
12541 0 0       0 my($self, $method, $fh, $a) = (shift, shift, shift, shift);
    0          
    0          
12542            
12543 0     0   0 if ($method eq 'dotcompose') {
12544 0         0 $self->process_dotcompose($fh, sub ($$@) {
12545 0         0 my($self, $out) = (shift, shift);
12546             $self->compose_line_2_array($a, $out, 'massage', !!'terminate', @_);
12547 0         0 }, 'decode U');
12548 0 0       0 } elsif ($method eq 'entity') {
12549 0         0 while (my $line = <$fh>) {
12550 0         0 next unless $line =~ /^\s*
12551 0         0 my($out, @in) = (chr hex "$1", split /\s*,\s*/, "$2");
12552 0         0 $in[0] =~ s/\s+$//;
  0         0  
12553 0         0 @in = split /\s*,\s*/, $in[0];
12554 0 0 0     0 @in = sort {length($a) <=> length($b)} @in;
12555 0         0 for my $in (@in) { # Avoid entries more than 2x longer than the shortest possible
12556 0         0 next if length($in) > $avoid_overlong_synonims_Entity*length $in[0] or length($in) > $maxEntityLen;
12557             my @IN = split //, $in;
12558             $self->compose_line_2_array($a, $out, !'massage', $self->key2hex(' '), @IN);
12559             }
12560 0         0 }
12561 0         0 } elsif ($method eq 'rfc1345') { # http://tools.ietf.org/html/rfc1345
12562 0 0       0 my %cvt = qw(gt > lt < amp &);
12563 0 0       0 while (my $line = <$fh>) {
12564 0         0 next unless ($line =~ /^\s+SP\s+0020\s+SPACE\s*$/) .. ($line =~ /^
12565 0 0       0 next unless $line =~ /^\s+(\S+)\s+([a-fA-F\d]{4})\s/;
12566 0         0 my($out, $in) = (chr hex "$2", "$1");
12567 0 0       0 next if "$2" =~ /^e0/i; # Skip private parts
12568 0         0 $in =~ s/&([lg]t|amp);/$cvt{$1}/g;
12569 0         0 next if 1 == length $in;
12570             my @IN = split //, $in;
12571 0         0 $self->compose_line_2_array($a, $out, !'massage', $self->key2hex(' '), @IN);
12572             }
12573 0         0 $self->compose_line_2_array($a, '€', !'massage', $self->key2hex(' '), 'E', 'u'); # http://en.wikipedia.org/wiki/Unicode_input#Character_mnemonics
12574             } else {
12575             die "Unknown compose parser: $method";
12576             }
12577             }
12578 0     0 0 0
12579 0 0       0 sub composefile_2_array ($$$$@) {
12580 0         0 my($self, $method, $fn, $a) = (shift, shift, shift, shift);
12581 0 0       0 open my $fh, '< :encoding(utf8)', $fn or die "Can't open `$fn' for read: $!";
12582             $self->compose_2_array($method, $fh, $a);
12583             close $fh or die "Can't close `$fn' for read: $!";
12584             }
12585 0     0 0 0
12586 0         0 sub merge_hash_to ($$$) { # We do NOT do deep copy
12587 0 0 0     0 my($self, $from, $to) = (shift, shift, shift);
12588 0 0       0 for my $k (keys %$from) { # ignore if the existing value is not hash
12589 0         0 next if 'HASH' ne ref($to->{$k} || {}); # existing non-hash (terminator) wins over a terminator or a longer binding
12590             $to->{$k} = $from->{$k}, next unless exists $to->{$k}; # existing hash wins over new terminator.
12591             $self->merge_hash_to($from->{$k}, $to->{$k});
12592             }
12593             }
12594 0     0 0 0
12595 0 0       0 sub create_composeArray ($$$) {
12596 0         0 my ($self, $key, $method) = (shift, shift, shift);
12597 0         0 my $names = $self->get__value($key) or return;
12598 0         0 my @A;
12599 0         0 for my $fn (@$names) {
12600             $self->composefile_2_array($method, $fn, my $a = []);
12601             push @A, $a;
12602             # $self->compose_array_2_hash($a, my $h = {});
12603             # $self->merge_hash_to($h, $H);
12604             # warn "CP< ", join ', ', keys %$h;
12605 0         0 }
12606             # warn "CP= ", join ', ', keys %$H;
12607             \@A;
12608             }
12609 0     0 0 0
12610 0         0 sub compose_Array_2_hash ($$) {
12611 0         0 my ($self, $A) = (shift, shift);
12612 0         0 my($H, $OPT) = ({}, {}); # indexed by HEX
12613 0         0 for my $a (@$A) {
12614 0         0 $self->compose_array_2_hash($a, my $h = {}, my $opt = {});
12615             $self->merge_hash_to($h, $H);
12616             $self->merge_hash_to($opt, $OPT);
12617             # warn "CP< ", join ', ', keys %$h;
12618 0         0 }
12619             # warn "CP= ", join ', ', keys %$H;
12620             $H;
12621             }
12622 0     0 0 0
12623 0   0     0 sub composehash_2_prefix ($$$$$$$$) {
12624 0 0       0 my($self, $F, $prefix, $h, $n, $prefixCompose, $show, $comp_show) = (shift, shift, shift, shift, shift, shift, shift, shift);
  0         0  
12625 0 0 0     0 my($H, $added) = ($self->{faces}{$F}, $h->{'[Added]'} || {});
  0   0     0  
12626 0 0       0 my(%orig, %map, %seen) = map { ( $_, exists($added->{$_}) ? $added->{$_} : $_ ) } keys %$h;
12627 0         0 for my $c (sort {($added->{$a} || '') cmp ($added->{$b} || '') or $a cmp $b} keys %$h) { # order affects the order of auto-prefixes
12628 0 0 0     0 next if $c =~ /^\[(G?Prefix(_Show)?|Added)\]$/;
    0          
12629 0         0 my $v = $h->{$c};
12630             if (ref $v and $seen{"$v"}) {
12631 0   0     0 $v = $seen{"$v"};
12632 0         0 } elsif (ref $v) {
12633             my $p = $v->{'[Prefix]'} || $self->key2hex($self->next_auto_dead($H));
12634 0 0       0 my $cc = $c; # Name should not reflect linking
12635 0         0 # warn(" [@$n] $cc => $added->{$c}"),
12636 0 0       0 $cc = $added->{$c} if exists $added->{$c};
12637 0 0       0 my $name_append = my $name_show = chr hex $cc;
12638 0 0       0 $name_append = 'Compose' if $name_append eq $self->charhex2key($prefixCompose);
12639             $name_show = '⎄' if $name_show eq $self->charhex2key($prefixCompose);
12640 0         0 $name_append = $self->key2hex($name_append) if $name_append =~ /\s/;
12641 0         0 # $name_show = $self->key2hex($name_show) if $name_show =~ /\s/ and $name_show ne ' ';
12642 0         0 my $c;
12643 0 0       0 ($name_show = "$show$name_show")
12644 0         0 =~ s[^((⎄[₁₂₃₄₅₆₇₈₉]?|\Q$comp_show\E){2,})][ $2 . (($c = length($1)/length($2)) =~ tr/0-9/⁰¹²³⁴⁵⁶⁷⁸⁹/, $c) ]e;
12645 0         0 $name_show = $v->{'[Prefix_Show]'} if defined $v->{'[Prefix_Show]'};
12646 0         0 $self->composehash_2_prefix($F, $p, $v, my $nn = [@$n, $name_append], $prefixCompose, $name_show, $comp_show);
12647 0         0 $self->{faces}{$F}{'[prefixDocs]'}{$p} = "@$nn";
12648             $self->{faces}{$F}{'[Show]'}{$p} = $name_show;
12649 0         0 $v = $seen{"$v"} = [$p, undef, 1];
12650 0         0 } else {
12651             $H->{'[inCompose]'}{$self->charhex2key($v)}++;
12652 0         0 $v = [$v];
12653             }
12654 0         0 $map{$c} = $v;
12655             }
12656             $H->{'[deadkeyFaceHexMap]'}{$prefix} = \%map;
12657             }
12658 0     0 0 0
12659 0         0 sub composehash_add_linked ($$$$) {
12660 0 0       0 my($self, $hexH, $charH, $prefCharH, $delay, %add) = (shift, shift, shift, shift, {});
12661 0 0       0 for my $h (keys %$hexH) {
12662 0 0       0 $self->composehash_add_linked($hexH->{$h}, $charH, $prefCharH) if ref $hexH->{$h};
12663 0         0 next unless defined (my $to = $charH->{my $c = chr hex $h});
12664 0         0 $to = $to->[0] if ref $to;
12665 0 0       0 my $toC = $self->charhex2key($to);
12666 0         0 my $back = $prefCharH->{$toC};
12667 0 0       0 $back = $back->[0] if ref $back;
12668             my $now = $h eq $self->key2hex($back);
12669             next if exists $hexH->{$to = $self->key2hex($to)};
12670 0         0 # warn " ... link $c to $toC (now=$now, back = $prefCharH->{$toC}) @{$prefCharH->{$toC}||[]})";
12671 0 0       0 # warn " ... link $c to $toC (now=$now, back = $back)";
12672             $add{$to} = $h;
12673 0 0       0 ($now ? $hexH : $delay)->{$to} = $hexH->{$h};
12674             }
12675 0 0       0 $hexH->{'[Added]'} = \%add if %add;
12676             # warn " ... almost done";
12677             %$hexH = (%$delay, %$hexH) if keys %$delay;
12678             }
12679 0     0 0 0
12680 0         0 sub create_composekey ($$$) {
12681 0   0     0 my($self, $F, $prefix, @PREFIX) = (shift, shift, shift);
12682 0 0 0     0 my $linkedF = $self->{faces}{$F}{LinkFace};
  0   0     0  
12683 0   0     0 my $linked = $linkedF && $self->{faces}{$linkedF}{Face_link_map}{$F};
12684             $linked &&= {map {ref($_ || 0) ? $_->[0] : $_} %$linked};
12685             my $rlinked = $linked && $self->{faces}{$F}{Face_link_map}{$linkedF};
12686             # $linked ||= {};
12687             # warn " Compose: $F: F linked to $linked->{F}" if $linked and $linked->{F};
12688             # $F eq 'Latin' and
12689 0 0 0     0 # warn " Compose: $F: ", join ', ', sort keys %{$self->{faces}{$linkedF}{Face_link_map}{$F}}
12690 0         0 # if $self->{faces}{$linkedF}{Face_link_map}{$F};
  0         0  
12691 0   0     0 if ($prefix and ref $prefix) {
      0        
12692 0         0 @PREFIX = map { my @a = split /,/;
12693             defined $a[$_] and length $a[$_] and $a[$_] = $self->key2hex($self->charhex2key($a[$_])) for 3,4;
12694 0         0 [@a]} @$prefix;
12695 0         0 } else {
12696             $prefix = $self->key2hex($self->charhex2key($prefix));
12697             @PREFIX = ( ['ComposeFiles', 'dotcompose', 'warn', $prefix, ''],
12698             ['EntityFiles', 'entity', 'warn', '', $prefix],
12699 0         0 ['rfc1345Files', 'rfc1345', 'warn', '', $prefix]);
12700 0         0 }
12701 0         0 $self->load_KeySyms;
12702 0 0       0 my $p0 = my $first_prefix = $PREFIX[0][3]; # use for first found map
  0         0  
12703 0 0       0 my @Hashes;
12704 0         0 my @Arrays = @{ $self->{'[ComposeArrays]'} || [] };
12705 0         0 unless (@Arrays) { # Shared between faces
12706 0         0 my @Show;
12707 0         0 for my $i (0..$#PREFIX) { # FileList, type, OK_to_miss, prefix, prefix-in-last ... prefix-in-pre-last ...
12708 0 0 0     0 my $pref = $PREFIX[$i];
12709 0 0       0 my $arr;
12710 0         0 unless ($arr = $self->create_composeArray($pref->[0], $pref->[1]) and @$arr) {
12711             warn "Compose list of type $pref->[1] could not be created from FileList variable $pref->[0]" if $pref->[2];
12712 0         0 next;
12713 0         0 }
12714             push @Arrays, [$arr, $pref];
12715 0         0 push @Show, $i;
12716 0         0 }
12717             $self->{'[ComposeArrays]'} = \@Arrays;
12718 0         0 $self->{'[ComposeShowIdx]'} = \@Show;
12719             }
12720 0         0 my($v, $vv) = map $self->{faces}{$F}{$_}, qw( [coverage00hash] [coverageExtra] );
12721 0         0 # warn "Filter hashes $F ", scalar keys %$v, ' ', scalar keys %$vv, ' ', scalar @{$self->{faces}{$F}{'[coverage00]'}};
12722 0         0 for my $A (@Arrays) { # one per type
12723 0         0 my($arr, $pref) = @$A;
12724 0         0 my @NN;
12725 0         0 for my $a (@$arr) { # $a one per input file
12726 0         0 my @N;
12727 0 0 0     0 for my $l (@$a) {
  0         0  
12728             my($out, $term, @in) = @$l;
12729             next if grep {not ($v->{$_} or $vv->{$_})} @in;
12730 0         0 # my $c;
12731             # warn "in=<@in>, k=$c, 00=", !!$v->{$c}, " Extra=", !!$vv->{$c} if ($c) = grep {ord() <= 0x30ff and ord >= 0x30f0} @in;
12732 0         0 push @N, $l;
12733             }
12734             push @NN, \@N;
12735             }
12736 0         0 # warn "Compose face=$F: keys <@$arr> @$pref";
12737             # warn "Compose face=$F: keys ", join ' ', map scalar @$_, @$arr;
12738 0         0 push @Hashes, [$self->compose_Array_2_hash(\@NN), $pref];
12739 0         0 }
12740 0         0 my @hashes;
12741 0         0 my $Comp_show = $self->{faces}{$F}{'[ComposeKey_Show]'};
12742 0         0 my $IDX = $self->{'[ComposeShowIdx]'};
12743 0         0 for my $i (0..$#Hashes) { # Now process separately for every personality --- NOT YET
12744 0         0 my $H = $Hashes[$i];
12745 0 0       0 my($chained, $hash, $pref) = ('G', @$H); # Global
12746 0         0 $hash = $self->deep_copy($hash);
12747 0         0 $self->composehash_add_linked($hash, $linked, $rlinked) if $linked;
12748 0 0 0     0 my $pref0 = $pref->[3];
    0 0        
12749 0         0 my $prefix_repeat;
12750 0 0       0 if (@hashes and defined $pref->[4] and length $pref->[4]) {
12751 0         0 die "Chain-ComposeKey $pref->[4] already bound in the previous ComposeHash, keys = ", join ', ', keys %{$hashes[-1]{$pref->[4]}}
12752 0         0 if $hashes[-1]{$pref->[4]};
12753             $hashes[-1]{$pref->[4]} = $hash; # Bind to double/etc press
12754 0         0 $chained = '';
12755 0         0 } elsif ($first_prefix) { # The previous type could be not found; use the first defined accessor
12756             $pref0 = $first_prefix;
12757 0         0 undef $first_prefix;
12758             } else {
12759 0         0 warn "Hanging ComposeHash (no access prefix key) for ", join('///', @$pref);
12760 0 0       0 }
12761 0 0 0     0 push @hashes, $hash;
12762             $hash->{"[${chained}Prefix]"} = $pref0 if length $pref0;
12763 0 0       0 $hash->{"[Prefix_Show]"} = $Comp_show->[$IDX->[$i]] if ref $Comp_show and length $Comp_show->[$IDX->[$i]];
12764 0         0 }
12765 0         0 return unless @hashes;
12766 0         0 my @idx = split //, '₁₂₃₄₅₆₇₈₉';
12767 0         0 my $c = 0;
12768 0         0 for my $i ( 0..$#hashes ) {
12769 0 0       0 my $h = $hashes[$i];
12770 0 0       0 my $I = $IDX->[$i];
12771 0         0 next unless my $p = $h->{'[GPrefix]'}; # Not chained (chained are processed as subhashes by composehash_2_prefix()
12772 0 0       0 my $post = ($c ? "[$c]" : '');
12773 0         0 my $comp_show = $h->{'[Prefix_Show]'};
12774 0 0       0 unless (defined $comp_show) {
12775 0 0       0 my $c1;
12776 0         0 my $spost = ($c ? (($c1 = $c) =~ tr/0-9/₀₁₂₃₄₅₆₇₈₉/, $c1) : '');
12777             if (ref $Comp_show) { # Elt0 has a sane default
12778 0         0 $comp_show = "$Comp_show->[0]$spost";
12779             } else {
12780             $comp_show = "$Comp_show$spost";
12781 0         0 }
12782             }
12783 0         0 $self->{faces}{$F}{'[Show]'}{$p} = $comp_show;
12784 0         0 # push @Show, (ref $comp_show ? $comp_show->[$i] : $comp_show);
12785 0         0 $self->composehash_2_prefix($F, $p, $h, ["Compose$post"], $p0, $comp_show, $comp_show);
12786             $self->{faces}{$F}{'[prefixDocs]'}{$p} = "Compose$post key";
12787             ++$c;
12788             }
12789             }
12790 0   0 0 0 0
      0        
      0        
12791             sub XKB_key ($$$;$$$$) { # unfinished ( ##### is for Apple parts needing work)
12792 0         0 my($self, $K, $i, $use_base, $dd, $map, $override) =
12793 0         0 (shift, shift, shift, shift, shift || {}, shift || {}, shift || {dup => {}});
12794 0         0 my($sh, $caps, $l); ##### were needed on Apple
12795 0         0 my $A2l; ##### = [ @{ $self->AppleMap_Base($K) } ]; # Deep copy
12796 0         0 my $dup = $override->{dup};
12797             for my $from (keys %$dup) {
12798 0         0 $A2l->[$from] = $A2l->[$dup->{$from}];
12799 0         0 }
  0         0  
12800 0         0 my $F = $self->get_deep($self, @$K); # Presumably a face hash, as in $K = [qw(faces US)]
12801 0   0     0 my $L = [map $self->{layers}{$_}, @{$F->{layers}}];
12802 0 0 0     0 $L = $L->[$l];
  0 0 0     0  
12803 0         0 my $B = $use_base && $self->BaseKeys($K); # Partially implemented: use BaseKeys instead of the real $F (VK_ code)
12804 0 0       0 $B = [map {defined() && /^\w$/ ? lc $_ : $_} @$B] if ($use_base || 0) > 0;
12805 0         0 my @AppleMap; ##### = _AppleMap unless @AppleMap;
12806 0 0 0     0 warn 'AppleMap too long' if $#AppleMap >= 127;
12807             my($I, $d, $c, $force_o) = ($A2l->[$i], 0); # offset inside the layout array
12808 0 0 0     0 $c = $override->{"$l-$sh-$caps-vk=$i"} || $override->{"$l-$sh--vk=$i"} unless $use_base; # $caps is 0 or 1
    0          
12809 0 0 0     0 # $force_o++ if defined $use_base and $use_base eq '0';
    0          
12810 0         0 $c = $use_base ? $B->[$I] : $L->[$I][$sh] if not defined $c and defined $I;
12811 0 0       0 if (($use_base || 0) < 0) { # Control
    0          
    0          
12812             $force_o++;
12813 0         0 if (!defined $c) { # ignore
12814             } elsif ($c =~ /^[A-Z]$/) {
12815             $c = chr( 1 + ord($c) - ord 'A');
12816             } elsif ($c !~ /^[-0-9=.*\/+]$/) {
12817             ##### $c = $OEM2ctrl{$c}; # mostly undef
12818 0         0 }
12819 0 0       0 } elsif ($use_base) {
    0          
12820             my $tr;
12821             if (!defined $c) { # ignore
12822             ##### } elsif (defined($tr = $OEM2cmd{$c})) {
12823 0         0 ##### $c = $tr;
12824 0         0 } elsif (defined($tr = $oem_control{$c})) {
12825             $tr =~ s/(?<=.).*//;
12826 0         0 $c = $tr;
12827             } else {
12828             undef $c;
12829 0 0       0 }
12830             }
12831             $c = $AppleMap[$i] unless defined $c; # Fallback to US (apparently, there is no unbound "ASCII" keys in maps???); dbg to "\xffff" #
12832            
12833             ##### $o .= <
12834 0 0 0     0 #####
12835 0 0       0 #####EOK
12836             $d = $c->[2] || 0 if ref $c;
12837             $c = $c->[0] if ref $c;
12838 0 0 0     0 # On windows, CapsLock flips the case; on Mac, it upcases
12839 0 0       0 # ($c) = grep {$_ ne $c} uc $c, ucfirst lc $c, lc $c if !$d and $caps and (lc $c ne uc $c or lc $c ne ucfirst lc $c);
12840 0 0       0 $c = uc $c if !$d and $caps;
12841 0   0     0 $dd->{$c}[1]++ if $d > 0; # 0 for normal char, 1 for base prefix; not for hex4/hex6
12842 0 0       0 $override->{extra_actions}{$c}++ if $d < 0;
12843 0 0 0     0 my $M = (!$force_o and $d >= 0 and $map->{$self->keys2hex($c)});
12844 0 0 0     0 my $pr = $M ? 'a_' : '';
    0 0        
    0          
12845 0 0       0 $dd->{$c}[0] = $c if $M or $d > 0; # 0 for normal char, 1 for base prefix
12846 0         0 my($how, $pref) = ($d || $M) ? ('action', ($M ? 'a_' : '') . ($d > 0 ? 'pr_' : (!$d && '_'))) : ('output', '');
12847             ($how eq 'output') ? XML_format_UTF_16 $c : XML_format $c;
12848             return <
12849            
12850             EOK
12851             }
12852            
12853             my %CapsTypes = qw(
12854             0 TWO_LEVEL 1 ALPHABETIC
12855             00 FOUR_LEVEL 10 FOUR_LEVEL_SEMIALPHABETIC
12856             01 FOUR_LEVEL_ANTISEMIALPHABETIC 11 FOUR_LEVEL_ALPHABETIC
12857             );
12858            
12859             # Some untested:
12860             my %XKB_map = (qw( ` TLDE \ BKSL OEM_102 LSGT ABNT_C1 AE13
12861             SPACE SPCE ESCAPE ESC PRSC PRSC SCLK SCLK PAUS PAUS
12862             DECIMAL KODL ABNT_C2 KPPT APP MENU RETURN RTRN
12863             DIVIDE KPDV MULTIPLY KPMU SUBTRACT KPSU ADD KPAD
12864             #RETURN KPEN #DELETE KPDE
12865 1     1 0 8 ), ' ' => 'SPCE'); # `
12866 1         7 sub XKB_map () { # Only the main island
12867 4         8 my $r = 4;
12868 4         14 for my $row (qw( 1234567890-= qwertyuiop[] asdfghjkl;' zxcvbnm,./ )) { # '
12869 4         19 my $c = chr($r + ord 'A');
12870 4         9 my @C = split //, $row;
12871             $XKB_map{uc $C[$_]} = sprintf "A$c%02d", $_ + 1 for 0 .. $#C;
12872 1         26 $r--;
12873 1         9 }
12874 1         4 $XKB_map{"F$_"} = sprintf 'FK%02d', $_ for 1..24; # Mac Aluminium has F19
12875 1         3 $XKB_map{"NUMPAD$_"} = "KO$_" for 0..9;
12876 1         10 my @kp = qw(INSERT END DOWN NEXT LEFT CLEAR RIGHT HOME UP PRIOR DELETE);
12877 1         8 my @kpX = qw(INS END DOWN PGDN LEFT KP5 RGHT HOME UP PGUP DELE);
12878             $XKB_map{"#$kp[$_]"} = "KP$_" for 0..9; # XXX ??? Not supported yet
12879             $XKB_map{$kp[$_]} = $kpX[$_] for 0..10;
12880             }
12881             XKB_map;
12882 0     0 0  
12883 0 0 0       sub output_unit_XKB ($$$$$$$) {
12884 0   0       my ($self, $face, $N, $k, $kraw, $decimal, $Used) = (shift, shift, shift, shift, shift, shift, shift);
12885             return unless defined $k or defined $kraw;
12886 0 0 0       my $sc = ($XKB_map{$k} or $XKB_map{$kraw} or warn("Can't find the scancode for the key `$k', kraw=`$kraw'"), "k=$k");
12887             my $flat = $self->flatten_unit($face, $N,
12888 0           $self->{faces}{$face}{'[output_layers_XKB]'} || $self->{faces}{$face}{'[output_layers]'})
12889 0           or return;
12890 0 0 0       my @KK = @$flat;
12891 0           my $CL;
12892             if (my $Caps = $self->{faces}{$face}{'[CapsLOCKlayers]'} and defined $N) { # $N not supported on VK...
12893             $CL = [map $self->{layers}{$_}[$N], @$Caps];
12894 0 0 0       # warn "See CapsLock layers: <<<", join('>>> <<<', @$Caps), ">>>";
12895             }
12896 0           if ( # $skippable and
12897 0 0         not defined $KK[0][0] and not defined $KK[1][0]) {
12898             for my $shft (0,1) {
12899             $KK[$shft] = [$default_bind{$k}[0][$shft], 0] if defined $default_bind{$k}[0][$shft];
12900             ### $KK[$shft] = [$decimal[$shft], 0] if $k eq 'DECIMAL' and @decimal;
12901             }
12902 0 0         }
12903 0 0        
  0            
12904 0           if ($k eq 'DECIMAL') { # may be described both via visual maps and NUMPAD
12905 0   0       my @d = @{ $decimal->[1] || [] };
12906 0 0         my $finalize = $decimal->[2];
12907             defined $KK[$_][0] or $KK[$_] = $d[$_] for 0..$#d; # fill on the second round
12908             @$decimal = ([$N], [@KK]), return unless $finalize;
12909             }
12910             # warn "Undefined \$N ==> <<<", join '>>> <<<', map $_->[0], @KK unless defined $N; # SPACE and ABNT_C1 ???
12911             ##### $self->output_unit_KK($k, $u, $sc, $Used, $CL, @KK);
12912             ##### }
12913             #####
12914 0   0       ##### sub output_unit_KK($$@) {
12915             ##### my ($self, $k, $u, $sc, $Used, $CL, @KK) = @_;
12916             my @K = map $self->format_key_XKB($_->[0], $_->[2], $Used->[$_->[1] || 0]), @KK;
12917 0           #warn "keys with ligatures: <@K>" if grep $K[$_] eq '%%', 0..$#K;
12918 0           ##### push @ligatures, map [$k, $_, $KK[$_][0]], grep $K[$_] eq '%%', 0..$#K;
12919             my $keys = join ",\t", @K;
12920 0           my @kk = map $_->[0], @KK;
12921 0           # Separate CL chars not easily supported on XKB ??? Need up to 8 keysyms per entry?
12922 0 0         my $u = [[@KK[0,1]], [@KK[2,3]]];
12923             my $cl_idx = join '', map $self->auto_capslock($_), @$u;
12924 0           my $cl_type = $CapsTypes{$cl_idx} or die "Unknown CapsLock mask: $cl_idx";
12925             # return ($sc, $cl_type, $keys);
12926 0   0       return qq( key $sc\t{ type="$cl_type",\t[ $keys ] };\n);
12927 0 0 0      
  0 0 0        
      0        
12928 0 0         my($CL0, $Extra) = ($CL and $CL->[0]);
12929 0           undef $CL0 unless $CL0 and @$CL0 and grep defined, map { ($_ and ref $_) ? $_->[0] : $_ } @$CL0;
12930 0 0         my $capslock = (defined $CL0 ? 2 : $self->auto_capslock($u->[0]));
12931 0 0         $capslock |= (($self->auto_capslock($u->[1])) << 2);
12932 0           $capslock = 'SGCap' if $capslock == 2; # Not clear if we can combine a string SGCap with 0x4 in a .klc file
12933 0 0         if ($CL0) {
12934 0 0 0       my $a_cl = $self->auto_capslock($u->[0]);
      0        
12935             my @KKK = @KK[$a_cl ? (1,0) : (0,1)];
12936             defined(($CL0->[$_] and ref $CL0->[$_]) ? $CL0->[$_][0] : $CL0->[$_]) and $KKK[$_] = $CL0->[$_] for 0, 1;
12937             # my @c = map { ($_ and ref $_) ? $_->[0] : $_ } @$CL0;
12938             # my @d = map { ($_ and ref $_) ? $_->[2] : {} } @$CL0; # dead
12939 0   0       # my @f = map $self->format_key($c[$_], $d[$_], ), 0 .. $#$CL0;
12940             # $Extra = [@f];
12941             $Extra = [map $self->format_key_XKB($_->[0], $_->[2], $Used->[$_->[1] || 0]), @KKK];
12942 0           }
12943             # ($sc, $capslock, $keys, $Extra);
12944             "$sc,\t$capslock,\t$keys,\t$Extra\n";
12945             }
12946 0     0 0  
12947 0           sub output_layout_XKB ($$) {
12948             my ($self, $k) = (shift, shift, shift, shift);
12949             my $B = $self->BaseKeys($k);
12950 0           # Dumpvalue->new()->dumpValue($self);
12951 0           # warn "Translate: ", %h;
  0            
12952 0           my $F = $self->get_deep($self, @$k); # Presumably a face hash, as in $k = [qw(faces US)]
12953             $F->{'[dead-usedX]'} = [map {}, @{$F->{layers}}]; # Which of deadkeys are reachable on the keyboard
12954             my $BB = $F->{baseKeysRaw};
12955 0           # die "Count of non-VK entries mismatched: $cnt vs ", scalar @{$self->{layers}{$layers->[0]}}
12956 0           # unless $cnt <= scalar @{$self->{layers}{$layers->[0]}};
12957 0           my $face = join '/', @$k[1..$#$k];
12958             my $decimal = [];
12959 0 0 0       my @o = map $self->output_unit_XKB($face, $_, $B->[$_], $BB->[$_], $decimal, $F->{'[dead-usedX]'}), 0..$#$B;
12960             push @o, $self->output_unit_XKB($face, $decimal->[0][0], $B->[$decimal->[0][0]], $BB->[$decimal->[0][0]],
12961 0           $decimal, $F->{'[dead-usedX]'})
12962             if @$decimal and ++$decimal->[2];
12963             join '', @o;
12964             }
12965            
12966             my(@AppleSym, %AppleSym);
12967             sub _AppleMap () { # http://forums.macrumors.com/archive/index.php/t-780577.html
12968 0     0     # https://github.com/tekezo/Karabiner/blob/version_10.7.0/src/bridge/generator/keycode/data/KeyCode.data
12969             # It has a definition of 0x34; moreover, it also defines some keys above 0x80 (including ≤ 0x80 on some German keyboard???)
12970             chomp(my $lst = <<'EOF'); # 0..50; 65..92; 93..95 ↱KEYPAD; · = special ↱JIS (≥93=0x5d)
12971             asdfhgzxcv§bqweryt123465=97-80]ou[ip·lj'k;\,/nm.· `··············.·*·+·····/··-··=01234567·89¥_,
12972 0           EOF
12973 0           # ' # KEYPAD above starts on 65=0x41
12974             my @lst = split //, $lst;
12975 0           my $last = $#lst;
12976             # in addition to US Extended, we defined 64, 73 (BR), 102, 104 (hex 40 49 66 68) and 93-95 from JIS
12977             my @kVK_ = split /\n/, <
12978             24 Return 0d
12979             30 Tab 09
12980             ####31 Space
12981             33 Delete 08
12982             34 Enter_PowerBook 03 # Same as KeypadEnter
12983             35 Escape 1b
12984             37 Command
12985             38 Shift
12986             39 CapsLock
12987             3A Option
12988             3B Control
12989             3C RightShift
12990             3D RightOption
12991             3E RightControl
12992             3F Function
12993             40 F17 +
12994             42 ????????????? 1d # Same as RightArrow
12995             46 ?????????????? 1c # Same as LeftArrow
12996             47 ANSI_KeypadClear 1b # ??? Same as Escape
12997             48 VolumeUp 1f # ??? Same as DownArrow
12998             49 VolumeDown + # C1 of ABNT: /
12999             4A Mute
13000             ###4B ANSI_KeypadDivide /
13001             4C ANSI_KeypadEnter 03
13002             4D ??????? 1e # Same as UpArrow
13003             4F F18 +
13004             50 F19 +
13005             5A F20
13006             60 F5 +
13007             61 F6 +
13008             62 F7 +
13009             63 F3 +
13010             64 F8 +
13011             65 F9 +
13012             67 F11 +
13013             69 F13 +
13014             6A F16 +
13015             6B F14 +
13016             6D F10 +
13017             6E __PC__Menu +
13018             6F F12 +
13019             71 F15 +
13020             72 Help 05
13021             73 Home 01
13022             74 PageUp 0b
13023             75 ForwardDelete 7f
13024             76 F4 +
13025             77 End 04
13026             78 F2 +
13027             79 PageDown 0c
13028             7A F1 +
13029             7B LeftArrow 1c
13030             7C RightArrow 1d
13031             7D DownArrow 1f
13032             7E UpArrow 1e
13033             # ISO keyboards only
13034             ####0A ISO_Section §
13035             # JIS keyboards only
13036             ####5D JIS_Yen ¥
13037             ####5E JIS_Underscore _
13038             ####5F JIS_KeypadComma ,
13039             66 JIS_Eisu SPACE # Left of space (On CapsLock on Windows; compare http://commons.wikimedia.org/wiki/File:MacBookProJISKeyboard-1.jpg with http://en.wikipedia.org/wiki/Keyboard_layout#Japanese)
13040             68 JIS_Kana SPACE # Right of space (as on Windows, but without intervening key)
13041             # Defined in US Extended:
13042             6C ?????? +
13043             70 ?????? +
13044             # ?????
13045             ###BRIGHTNESS_DOWN 0x91
13046             ###BRIGHTNESS_UP 0x90
13047             ###DASHBOARD 0x82
13048             ###EXPOSE_ALL 0xa0
13049             ###LAUNCHPAD 0x83
13050             ###MISSION_CONTROL 0xa0
13051             #
13052             ###GERMAN_PC_LESS_THAN 0x80
13053 0           ###PC_POWER 0x7f
13054 0           EOF
13055 0 0         my %seen;
13056 0           for my $i (0..$#lst) {
13057             if ($lst[$i] eq '·') {
13058 0   0       undef $lst[$i];
13059 0           } else {
13060             my $pref = (defined $AppleSym{$lst[$i]} and '#');
13061             $AppleSym{"$pref$lst[$i]"} = $i;
13062             }
13063 0           }
13064 0           # $AppleSym{'#'} = $AppleSym{' '}; # Space is in a table as #
13065 0 0         my %map = ('+' => "\x10", 'SPACE' => ' ');
13066 0 0         for my $kVK (@kVK_) {
13067 0           warn ("unexpected OSX scan: <<$kVK>>"), next unless $kVK =~ /^\s*(#)|([A-F\d]{2})\s+(\?+|\w+)\s*(.*)/i;
13068 0           next if $1;
13069 0           my($hex, $name, $rest, $comment) = ($2, $3, $4);
13070 0 0         $AppleSym[hex $hex] = $name;
13071 0 0         $AppleSym{$name} = hex $hex;
13072             if (length $rest) {
13073 0 0         warn ("unexpected OSX scan expansion in $hex/$name: <<$rest>>"), next
    0          
13074 0 0         unless ( my($HEX,$lit,$sp), $comment) = ( $rest =~ /^(?:(?:([A-F\d]{2})|([^\w\s+])|(SPACE|\+))\s*)?(?:#\s*(.*))?$/i );
13075             if ($sp) {
13076 0           $rest = $map{$sp} or warn "Bad map in OSX basemap"
13077             } elsif ($HEX) {
13078 0           $rest = chr hex $HEX;
13079             } else {
13080 0           $rest = $lit;
13081 0 0 0       }
13082 0           my $idx = hex $hex;
13083             $idx > $last or not defined $lst[$idx] or warn "Non-special <<$lst[$idx]>> when overriding offset=$idx=hex($hex) in OSX basemap";
13084             $lst[$idx] = $rest;
13085             }
13086 0           }
13087             @lst
13088             }
13089            
13090             my @AppleMap;
13091            
13092             # Extra keys on Windows side: INSERT, and duplication-by-NumLock of the keypad.
13093             # Extra keys on Apple side: CLEAR on the KP, and KP-Equal.
13094            
13095             # Current solution: merge win-KP_Clear with apple-KP_CLear (1st in the center, 2nd in the ul-corner!)
13096             # merge INSERT with KP=
13097            
13098             # How to work with NumLock-modifications? There are 3 states: NumLock-, Base-, Shift.
13099            
13100             # Not in Apple maps:
13101 1     1   3893 # F21-F24 HOME UP PRIOR DIVIDE LEFT CLEAR RIGHT MULTIPLY END DOWN NEXT SUBTRACT INSERT DELETE RETURN ADD NUMPAD0-NUMPAD9
  1         2  
  1         4245  
13102             my %Apple_recode;
13103             { no warnings 'qw';
13104             %Apple_recode = (qw(
13105             DIVIDE #/ MULTIPLY * SUBTRACT #- ADD + DECIMAL #.
13106             RETURN ANSI_KeypadEnter DELETE ForwardDelete #\ § OEM_102 §
13107             PRIOR PageUp CLEAR ANSI_KeypadClear NEXT PageDown INSERT #=
13108             ABNT_C1 VolumeDown APP __PC__Menu
13109             ), SPACE => ' ', map +("NUMPAD$_", "#$_"), 0..9);
13110             }
13111             my %Apple_skip = map +($_, 1), (map "F$_", 21..24); #, (map "NUMPAD$_", 0..9);
13112             # ==> HOME UP PRIOR LEFT CLEAR RIGHT END DOWN NEXT INSERT DELETE RETURN
13113             # ==> PRIOR CLEAR NEXT INSERT
13114 0     0 0  
13115 0           sub AppleMap_Base ($$) {
13116 0 0         my($self, $K) = (shift, shift);
13117 0 0         my $F = $self->get_deep($self, @$K); # Presumably a face hash, as in $K = [qw(faces US)]
13118 0 0         return $F->{Apple2layout} if $F->{Apple2layout};
13119 0           @AppleMap = _AppleMap unless @AppleMap;
13120 0           warn 'AppleMap too long' if $#AppleMap >= 127;
13121 0           $self->reset_units;
13122 0           my $BB = $self->BaseKeys($K); # VK per position (except via-VK keys)
13123 0           my $B = $F->{baseKeysRaw}; # chars on key (if the first occurence???) OR VK
13124 0           my(@o, @A, @AA); # A: kbdd --> Apple; AA: Apple --> kbdd
13125 0           $_ = [@$_] for $B, $BB; # 1-level deep copy
13126 0           my $o = $F->{'[VK_off]'};
13127 0 0         for my $b (()) { # Explicitly add via-VK keys
13128 0 0         for my $vk (keys %$o) {
13129             warn "[@$K]: $vk defined on \@$o->{$vk} as $b->[$o->{$vk}]" if defined $b->[$o->{$vk}];
13130             $b->[$o->{$vk}] = $vk unless defined $b->[$o->{$vk}];
13131             # warn "[@$K]: $vk \@ $o->{$vk}"; # SPACE @ 116 (on izKeys)
13132             }
13133             }
13134 0           # warn "[[@$K]] @$B\n\t@$BB\n";
13135 0           # warn "\t", !(grep $_ eq ' ', @$B), "\t", !(grep $_ eq ' ', @$BB), "\n";
13136 0           for my $i (0..$#$B) { # Primary mappings
13137 0 0         my $k = $B->[$i];
13138 0 0         my $kk = $BB->[$i];
13139 0 0 0       next unless defined $k;
13140 0 0         $A[$i] = $AppleSym{$kk}, next if exists $AppleSym{$kk};
13141 0 0 0       $A[$i] = $AppleSym{$Apple_recode{$kk}}, next if exists $AppleSym{$Apple_recode{$kk} || 123};
13142 0 0         $A[$i] = $AppleSym{$k}, next if exists $AppleSym{$k};
13143 0 0         $A[$i] = $AppleSym{$Apple_recode{$k}}, next if exists $AppleSym{$Apple_recode{$k} || 123};
13144 0 0         $A[$i] = "\u\L$k" . 'Arrow', next if exists $AppleSym{"\u\L$k" . 'Arrow'};
13145 0           $A[$i] = "\u\L$k", next if exists $AppleSym{"\u\L$k"};
13146             next if $Apple_skip{$k};
13147 0           push @o, $k;
13148 0 0         }
13149 0 0         for my $i (0..126) { # Primary backwards mappings
13150 0           next unless defined $A[$i];
13151             warn "Duplicate backward Apple mapping: old=$AA[$A[$i]] --> $A[$i] <-- $i=new" if defined $AA[$A[$i]];
13152 0           $AA[$A[$i]] = $i;
13153 0 0 0       }
      0        
      0        
13154 0           for my $i (0..126) { # Secondary backwards mappings
13155             next if defined $AA[$i] or ($AppleSym[$i] || '') !~ /^#(.)$/ or not defined $AA[$AppleSym{$1}];
13156 0 0         $AA[$i] = $AA[$AppleSym{$1}]
13157 0           }
13158 0           warn "Not in Apple maps: @o" if @o;
13159             $F->{layout2Apple} = \@A;
13160             $F->{Apple2layout} = \@AA;
13161             }
13162            
13163             # fake is needed (apparently, the compiler does not allocate the named states smartly???)
13164             my @state_cnt = qw( 4of4 4096 3of4 256 2of4 16 1of4 0 0of4 0
13165             1of6 0 2of6 2 3of6 16 4of6 256 0of6 0
13166             );
13167             my @state_cnt_a = (@state_cnt, qw(
13168             5of6 4 6of6 64
13169             )); # At end, so may be skipped via merge_states_6_and_4
13170             my @state_cnt_b = (@state_cnt, qw(
13171             5of6 64 6of6 64
13172             ));
13173             my $in_group_4of6_plan_c = 2;
13174             my @state_cnt_c = (@state_cnt, '5of6' => 16 * $in_group_4of6_plan_c, '6of6' => 64);
13175             my $use_plan_b; # unimplemented
13176             my $use_plan_c = 1; # untested
13177 0     0 0  
13178 0           sub alloc_slots ($$) {
13179 0           my($tot, $a, %start) = (shift, shift);
13180 0           my @a = @$a; # deep copy
13181 0           while (@a) {
13182 0           my($how, $c) = splice @a, 0, 2;
13183             $start{$how} = [$tot, $tot+$c-1];
13184 0           $tot += $c;
13185             }
13186             \%start;
13187             }
13188 0     0 0  
13189 0 0         sub output_state_range ($$$$$$) { # Apparently, only ranges up to 256 states are supported.
13190 0           my($self, $from, $to, $mult, $next, $out, $o) = (shift, shift, shift, shift, shift, shift, ''); # $out is the ord(OUTPUT)
13191 0           $o .= "\t\t\t\n" if $to - $from > 255;
13192 0           while ($to - $from > 255) {
13193 0 0         $o .= $self->output_state_range($from, $from+255, $mult, $next, $out);
13194 0 0         $from += 256;
13195             $out += 256*$mult if defined $out;
13196 0 0         $next += 256*$mult if defined $next;
13197 0           }
13198 0 0         XML_format($out = chr $out) if defined $out;
13199 0 0         my @out;
13200 0           push @out, qq(next="$next") if defined $next;
13201             push @out, qq(output="$out") if defined $out ;
13202             $o .= <
13203 0          
13204             EOS
13205             $o
13206             }
13207            
13208             my $merge_states_6_and_4 = 1;
13209             my $do_hex5 = 0; # Won’t install with this… (Even with $merge_states_6_and_4)
13210 0     0 0  
13211 0 0         sub output_hex_input ($$$) { # only 4-hex-digits input supported now. First state in $states{'1of4'}[0].
13212 0 0         my($self, $states, $HEX, $o) = (shift, shift, shift, '');
13213             unless ($HEX =~ /[0-9a-f]/i) {
13214             return $do_hex5 ? <
13215            
13216            
13217            
13218             EOS
13219            
13220            
13221 0           EOS
13222 0           }
  0            
13223 0           my $i = hex $HEX;
13224             my @O = map { [$states->{($_+1).'of4'}[0] + $i] } 0..3;
13225             $O[4] = [undef, $i];
13226 0           # $O[4] = qq(output="$HEX;");
13227             # $O[4] = qq(next="5000");
13228             $o .= <
13229            
13230            
13231             EOS
13232             #
13233             #
13234             #
13235             #
13236             $o .= <output_state_range($states->{"${_}of4"}[0], $states->{"${_}of4"}[1], 16, $O[$_][0], $O[$_][1])
13237 0          
13238             EOS
13239             for 2..4; # ($HEX eq '9' ? 4 : 3); # 2..4; bisect installation problems here
13240            
13241 0           # return $o unless 15 >= hex $HEX; # debugging only
  0            
13242 0          
13243 0           @O = map { [$states->{($_+1).'of6'}[0] + $i] } 0..5;
13244 0 0         $O[2][0]--; # We start with U+01..., not U+00....
13245             $O[6] = [undef, 0xDC00 + $i];
13246             $o .= $do_hex5 ? <
13247            
13248             EOS
13249            
13250             EOS
13251             # $states->{"2of6"}[0] is U+0xxxxx=hex5 hex5 and hex6 differs only in treatment of 0, and of 1 0
13252             # $states->{"2of6"}[1] is U+1xxxxx hex5: 1 0 —→ U+010xxx
13253             # $states->{"3of6"}[0] is U+01xxxx hex6: 1 0 —→ U+10xxxx
13254 0 0         # $states->{"3of6"}[1] is U+10xxxx hex5: 0 —→ hex4, 1 —→ U+01xxxx, rest X —→ U+0Xxxx
13255             # hex6: 0 —→ hex5, 1 —→ U+1xxxxx, rest X —→ U+0Xxxx
13256             $o .= <
13257            
13258 0 0 0       EOS
13259             # What follows is a complete mess, since with $do_hex5 the resulting layout won’t install
13260             $o .= <
13261            
13262            
13263 0 0 0      
13264             EOS
13265             $o .= <
13266 0 0 0      
13267             EOS
13268             $o .= <
13269 0 0        
13270             EOS
13271             $o .= <
13272            
13273            
13274            
13275 0 0        
13276             EOS
13277             $o .= <
13278            
13279 0 0 0      
13280             EOS
13281             $o .= <
13282            
13283             EOS
13284             $o .= <output_state_range($states->{"${_}of6"}[0], $states->{"${_}of6"}[1], 16, $O[$_][0], $O[$_][1])
13285 0          
13286             EOS
13287             for 3; # ($HEX eq '9' ? 4 : 3); # 2..4; bisect installation problems here
13288             # VARIANT (A): for every one of 256 states, individually emit a surrogate (with multiplier 4), and set the next state (in B..B+3)
13289             # VARIANT (C): for every $in_group of 256 states, emit its surrogate (with multiplier 4).
13290             # This creates a spread of "next states" of size M-3, with M = 4*$in_group.
13291 0 0 0       # Create next state in ranges (B .. B+M-3) (B+M .. B+2M-3) (B+2M .. B+3M-3) (B+3M .. B+4M-3)
13292 0 0         # depending on ($i & 3). [Later, we should process every range with multiplier=0.]
13293 0 0         my $next_base = ($merge_states_6_and_4 and not $use_plan_c) ? $states->{"3of4"}[0] + 0xDC : $states->{"5of6"}[0];
13294 0 0         my $in_group = $use_plan_c ? $in_group_4of6_plan_c : 1;
13295             my $spread_next = $use_plan_c ? 4*$in_group_4of6_plan_c - 3 : 1;
13296             $o .= $use_plan_c ? <
13297            
13301             EOS
13302 0          
13303 0           EOS
13304 0           for my $j (0 .. ((0x100/$in_group)-1)) {
13305 0 0         my($J, $n, $O) = ($states->{"4of6"}[0] + $j*$in_group, $next_base + ($i & 0x3)*$spread_next, 0xD800 + 4*$j*$in_group + ($i>>2));
13306 0           XML_format($O = chr $O);
13307 0           if ($use_plan_c) {
13308             my $T = $J + $in_group_4of6_plan_c -1;
13309             $o .= <
13310            
13311 0           EOS
13312             } else {
13313             $o .= <
13314            
13315             EOS
13316             # ($HEX eq '9' ? 4 : 3); # 2..4; bisect installation problems here
13317 0 0         }
13318 0 0         }
13319 0           if ($use_plan_c) {
13320             my $doc = $merge_states_6_and_4 ? '; redirect to low surrogates' : '';
13321             $o .= <
13322            
13325             EOS
13326 0           for my $k (1 .. $in_group_4of6_plan_c) {
13327 0           # for my $j (0 .. 3) {
13328 0 0         my $n = $next_base + ($k-1)*4;
13329 0           my $T = $n + 3;
13330             my $next = ($merge_states_6_and_4 ? $states->{"4of4"}[0] + 0xDC0 + $i: $O[5][0]);
13331             $o .= <
13332            
13333             EOS
13334             }
13335 0 0         }
13336            
13337 0 0         unless ($merge_states_6_and_4) {
13338             $o .= $self->output_state_range($states->{"${_}of6"}[0], $states->{"${_}of6"}[1], 16, $O[$_][0], $O[$_][1])
13339             for ($use_plan_c ? 6 : 5) .. 6; # ($HEX eq '9' ? 4 : 3); # 2..4; bisect installation problems here
13340 0           }
13341             $o
13342             }
13343 0     0 0  
13344 0           sub output_hex_term ($$) { # only 4-hex-digits input supported now. First state in $states{'1of4'}[0].
13345             my($self, $states) = (shift, shift);
13346             my $o = <
13347            
13348 0          
13349 0           EOS
13350 0           my @hd = (0..9, 'A'..'F');
13351 0           for my $n (1 .. 3) {
13352 0           for my $i (0 .. ((16**$n)-1)) {
13353 0           my $N = $n + 1;
13354 0           my $I = $states->{"${N}of4"}[0] + $i;
13355             my $hex = sprintf "%0${n}X", $i;
13356             $o .= <
13357            
13358             EOS
13359 0 0         }
13360             }
13361             $o .= $do_hex5 ? <
13362            
13363            
13364            
13365             EOS
13366            
13367            
13368 0           EOS
13369            
13370 0           return $o; # the rest creates problems: see iz-Latin-hex6-vis3a.keylayout
13371            
13372             $o .= <
13373            
13374 0          
13375 0           EOS
13376 0           for my $n (2 .. 3) {
13377 0           for my $i (0 .. ((16**($n-1))-1)) {
13378 0           my $N = $n + 1;
13379 0           my $I = $states->{"${N}of6"}[0] + $i;
13380             my $hex = sprintf "%0${n}X", $i + 16**($n-2);
13381             $o .= <
13382            
13383             EOS
13384             }
13385 0           }
13386             $o
13387             }
13388            
13389             my $junkHEX = <
13390             After +0yz or +10z (16*16 states); instead of 4434 should put 4434 + 0..3
13391            
13392            
13393             WRONG!!! Need different multipliers for next and for output; so need 256 individual declarations
13394             Instead: use multiplier="4" (so that the output char is correct; next state takes 4K values, out of which we
13395             need only last two bits (manually inserted via next="" above); so we need 1K declarations for per-ultimate???
13396            
13397             So: maybe have 16 declarations for "After +0yz or +10z"; this way, next state takes 64 values, of which
13398             we may make account for by 16 declarations. (32 total per 22 chars 0-9a-fA-F.)
13399            
13400             Or: maybe have 16 declarations for "After +0yz or +10z"; each creates a range of 64 possible "next" states;
13401             but we create 4 groups of such states. So we may make account for by 4 declarations. (20 total per 22 chars 0-9a-fA-F.)
13402             EOJ
13403            
13404             #sub XML_format ($) { $_[0] =~ s/([&""''\x00-\x1f\x7f-\x9f\s<>]|$rxCombining|$rxZW)/ sprintf '&#x%04X;', ord $1 /ego;
13405             # # Avoid "Malformed UTF-8 character (fatal)" by not puting in a REx
13406 0     0 0   # $_[0] =~ s/(.)/ sprintf '&#x%04X;', ord $1 /ego if length $_[0] eq 1 and 0xd000 <= ord $_[0] and 0xdfff >= ord $_[0]}
13407 0           sub XML_format ($) {
13408 0 0 0       my @c = split //, $_[0];
13409 0           for my $c (@c) {
13410             if (0xd000 <= ord $c and 0xdfff >= ord $c) {
13411 0           $c = sprintf '&#x%04X;', ord $c;
  0            
13412             } else {
13413             $c =~ s/([&""''\x00-\x1f\x7f-\x9f\s<>]|$rxCombining|$rxZW)/ sprintf '&#x%04X;', ord $1 /ego;
13414 0           }
13415             }
13416             $_[0] = join '', @c;
13417 0     0 0   }
13418 0           sub XML_format_UTF_16 ($) {
13419             $_[0] = to_UTF16LE_units $_[0];
13420             XML_format $_[0];
13421             }
13422            
13423             my %OEM2ctrl = (qw( OEM_102 0 OEM_MINUS), "\x1f", OEM_4 => "\x1b", OEM_5 => "\x1c", OEM_6 => "\x1d",
13424             CLEAR => "\x1b"); # [, \, ]
13425 0   0 0 0   my %OEM2cmd = (qw( OEM_102 § OEM_MINUS - ));
      0        
      0        
13426             sub AppleMap_i_j ($$$$$;$$$$) { # http://forums.macrumors.com/archive/index.php/t-780577.html
13427 0           my($self, $K, $l, $sh, $caps, $use_base, $dd, $map, $override) =
  0            
13428 0           (shift, shift, shift, shift, shift, shift, shift || {}, shift || {}, shift || {dup => {}});
13429 0           my $A2l = [ @{ $self->AppleMap_Base($K) } ]; # Deep copy
13430 0           my $dup = $override->{dup};
13431             for my $from (keys %$dup) {
13432 0           $A2l->[$from] = $A2l->[$dup->{$from}];
13433 0           }
  0            
13434 0           my $F = $self->get_deep($self, @$K); # Presumably a face hash, as in $K = [qw(faces US)]
13435 0   0       my $L = [map $self->{layers}{$_}, @{$F->{layers}}];
13436 0 0 0       $L = $L->[$l];
  0 0 0        
13437 0 0         my $B = $use_base && $self->BaseKeys($K); # Partially implemented: use BaseKeys instead of the real $F (VK_ code)
13438 0 0         $B = [map {defined() && /^\w$/ ? lc $_ : $_} @$B] if ($use_base || 0) > 0;
13439 0           @AppleMap = _AppleMap unless @AppleMap;
13440 0           warn 'AppleMap too long' if $#AppleMap >= 127;
13441 0           my $o = '';
13442 0 0 0       for my $i (0..127) {
13443             my($I, $d, $c, $force_o) = ($A2l->[$i], 0); # offset inside the layout array
13444 0 0 0       $c = $override->{"$l-$sh-$caps-vk=$i"} || $override->{"$l-$sh--vk=$i"} unless $use_base; # $caps is 0 or 1
    0          
13445 0 0 0       # $force_o++ if defined $use_base and $use_base eq '0';
    0          
13446 0           $c = $use_base ? $B->[$I] : $L->[$I][$sh] if not defined $c and defined $I;
13447 0 0         if (($use_base || 0) < 0) { # Control
    0          
    0          
13448             $force_o++;
13449 0           if (!defined $c) { # ignore
13450             } elsif ($c =~ /^[A-Z]$/) {
13451 0           $c = chr( 1 + ord($c) - ord 'A');
13452             } elsif ($c !~ /^[-0-9=.*\/+]$/) {
13453             $c = $OEM2ctrl{$c}; # mostly undef
13454             }
13455 0           } elsif ($use_base) {
13456 0 0         # warn "COMMAND-SPACE: c=<$c> OEM=<$OEM2cmd{$c}> ctrl=$oem_control{$c}" if 49 == $i;
    0          
    0          
    0          
13457             my $tr;
13458 0           if (!defined $c) { # ignore
13459             } elsif (defined($tr = $OEM2cmd{$c})) {
13460 0           $c = $tr;
13461             } elsif ($c eq 'SPACE') { # %oem_control does follow the pattern below
13462 0           $c = ' ';
13463 0           } elsif (defined($tr = $oem_control{$c})) {
13464             $tr =~ s/(?<=.).*//;
13465 0           $c = $tr;
13466             } else {
13467             undef $c;
13468 0 0         }
13469             }
13470 0 0         $c = $AppleMap[$i] unless defined $c; # Fallback to US (apparently, there is no unbound "ASCII" keys in maps???); dbg to "\xffff" #
13471            
13472             $o .= <
13473 0 0 0      
13474 0 0         EOK
13475             $d = $c->[2] || 0 if ref $c;
13476             $c = $c->[0] if ref $c;
13477 0 0 0       # On windows, CapsLock flips the case; on Mac, it upcases
13478 0 0         # ($c) = grep {$_ ne $c} uc $c, ucfirst lc $c, lc $c if !$d and $caps and (lc $c ne uc $c or lc $c ne ucfirst lc $c);
13479 0 0         $c = uc $c if !$d and $caps;
13480 0   0       $dd->{$c}[1]++ if $d > 0; # 0 for normal char, 1 for base prefix; not for hex4/hex6
13481 0 0         $override->{extra_actions}{$c}++ if $d < 0;
13482 0 0 0       my $M = (!$force_o and $d >= 0 and $map->{$self->keys2hex($c)});
13483 0 0 0       my $pr = $M ? 'a_' : '';
    0 0        
    0          
13484 0 0         $dd->{$c}[0] = $c if $M or $d > 0; # 0 for normal char, 1 for base prefix
13485 0           my($how, $pref) = ($d || $M) ? ('action', ($M ? 'a_' : '') . ($d > 0 ? 'pr_' : (!$d && '_'))) : ('output', '');
13486             ($how eq 'output') ? XML_format_UTF_16 $c : XML_format $c;
13487             $o .= <
13488            
13489             EOK
13490 0           }
13491             $o
13492             }
13493            
13494 0   0 0 0   my $hex_states;
      0        
13495 0           sub AppleMap_prefix_map ($$$$$;$$) {
13496 0 0         my($o, $self, $kk, $pref, $M, $v, $doHEX, $override) = ('', shift, shift, shift, shift || {}, shift, shift, shift || {});
13497 0 0         XML_format (my $k = $kk);
13498 0           my $pr = $M ? 'a_' : '';
13499             my $prefix = $pref ? 'pr_' : '_';
13500             $o .= <
13501            
13502 0           EOK
13503 0 0         # A character and a prefix key with the same ordinal differ only in this:
13504 0           XML_format (my $oo = $v->[0]);
13505             my $todo = $pref ? qq(next="st_$oo") : qq(output="$oo");
13506             $o .= <
13507 0 0        
  0            
13508 0           EOK
13509 0           for my $st (sort keys %{$M || {}}) {
13510 0           my $v0 = $M->{$st};
13511 0           XML_format ($st = my $st0 = chr hex $st);
13512             my $KK = $self->key2hex($kk);
13513 0   0       my $ST0 = $self->key2hex($st0);
13514 0   0       my $v = $override->{"+$st0+$kk"} || $override->{"+$ST0+$kk"}
13515 0 0         || $override->{"+$st0+$KK"} || $override->{"+$ST0+$KK"} || $v0;
13516 0 0         my($d, $T) = $v->[2] || 0;
    0          
13517 0           $T = chr hex $v->[0] if $d >= 0;
13518 0           if ($d > 0) {
13519             XML_format $T;
13520 0           $T = qq(next="st_$T");
13521             } elsif ($d < 0) { # Literal state
13522 0           $T = qq(next="$v->[0]");
13523 0           } else {
13524             XML_format_UTF_16 $T;
13525 0           $T = qq(output="$T");
13526             }
13527             $o .= <
13528            
13529 0 0 0       EOK
13530 0           }
13531             $o .= $self->output_hex_input($hex_states, $v->[0]) if $doHEX and $v->[0] =~ /^[-u\x20_+=0-9a-f]\z/i;
13532             $o .= <
13533 0          
13534             EOK
13535             $o;
13536             }
13537 0   0 0 0  
13538 0           sub AppleMap_prefix ($$;$$$$$$) { # http://forums.macrumors.com/archive/index.php/t-780577.html
13539             my($self, $dd, $do_initing, $term, $map, $show, $override, $act) = (shift, shift, shift, shift, shift || {}, shift, shift, shift);
13540 0 0         my $o = '';
  0            
13541 0 0 0      
13542 0           my %e = %{ $override->{extra_actions} || {}}; # Deep copy
13543 0           ($do_hex5 and $e{hex5}++), $e{hex6}++ if $e{hex4};
13544 0 0         my @o = @$override{grep /^\+/, keys %$override}; # honest bindings, not extra_actions/etc
13545 0           @o = map chr hex $_->[0], grep $_->[2] > 0, @o; # dead keys
13546 0           unless (%$act) { # Treat states created by the actions only
13547 0           my %states;
13548 0           $states{$_}++ for keys(%e), @o, grep $dd->{$_}[1], keys %$dd;
13549 0 0         for my $v (values %$map) { # hash indexed by the prefix key
13550 0           for my $out (values %$v) {
13551 0           next if not $out->[2];
13552 0           my $k = $self->charhex2key($out->[0]);
13553 0 0 0       $states{$k}++;
13554             my $v;
13555             $act->{$k} = [$k] unless $v = $dd->{$k} and $v->[1]; # Skip if terminator was already created; do not create fake values
13556 0           }
13557 0 0         }
    0          
13558             my $states = 10 + keys(%states); # Was 4100; 10: "just in case"
13559             $hex_states = alloc_slots( $states, $use_plan_c ? \@state_cnt_c : ($use_plan_b ? \@state_cnt_b : \@state_cnt_a));
13560 0 0 0       }
13561 0          
13562             if ($term and not $do_initing) { # Treat states created by the actions only
13563             $dd = $act; # A terminator MUST be created for every state
13564 0           }
13565 0          
13566 0           my $doHEX = grep $e{"hex$_"}, 4,5,6;
13567 0           for my $kk (sort keys %$dd) {
13568 0 0         my $v = $dd->{$kk};
13569             XML_format (my $k = $kk);
13570 0 0         next if !!$do_initing != !!$v->[1];
13571 0          
13572 0 0         if ($term) {
13573 0           my $Show = $show->{$self->key2hex($kk)};
13574 0           $Show = $kk unless defined $Show;
13575 0           $Show =~ s/^(?=$rxCombining)/ /;
13576 0           XML_format $Show;
13577             $o .= qq(\t\n);
13578             next;
13579 0           }
13580 0          
13581             my $M = $map->{$self->keys2hex($kk)};
13582 0 0 0       $o .= $self->AppleMap_prefix_map($kk, $do_initing, $M, $v, $doHEX, $override);
13583 0   0       }
13584             for my $a ( ($do_initing and not $term) ? sort keys %e : () ) {
13585             my $add = ($a =~ /^hex4\z/ and ($do_hex5 ? <
13586            
13587            
13588             EOS
13589 0          
13590             EOS
13591             $o .= <
13592            
13593            
13594             $add
13595 0 0 0       EOS
      0        
13596 0           }
13597             $o .= $self->output_hex_term($hex_states) if $term and $doHEX and not $do_initing; # Do only once, at the end
13598             $o
13599             }
13600            
13601             1;
13602            
13603             __END__