File Coverage

blib/lib/UI/KeyboardLayout.pm
Criterion Covered Total %
statement 33 3842 0.8
branch 1 2610 0.0
condition 1 1769 0.0
subroutine 10 208 4.8
pod 0 172 0.0
total 45 8601 0.5


line stmt bran cond sub pod time code
1             package UI::KeyboardLayout;
2            
3             $VERSION = $VERSION = "0.70";
4            
5             binmode $DB::OUT, ':utf8' if $DB::OUT; # (older) Perls had "Wide char in Print" in debugger otherwise
6             binmode $DB::LINEINFO, ':utf8' if $DB::LINEINFO; # (older) Perls had "Wide char in Print" in debugger otherwise
7            
8 1     1   30094 use strict;
  1         3  
  1         40  
9 1     1   1194 use utf8;
  1         12  
  1         8  
10 1   50 1   244 BEGIN { my $n = ($ENV{UI_KEYBOARDLAYOUT_DEBUG} || 0);
11 1 50       11 if ($n =~ /^0x/i) {
12 0         0 $n = hex $n;
13             } else {
14 1         3 $n += 0;
15             }
16 1         67 eval "sub debug() { $n }";
17             # 1 2 4 8 0x10 0x20
18 1         6 my @dbg = (qw( debug_face_layout_recipes debug_GUESS_MASSAGE debug_OPERATOR debug_import debug_stacking debug_noid ),
19             # 0x40 0x80 0x100 0x200 0x400 0x800 0x1000
20             qw(warnSORTEDLISTS printSORTEDLISTS warnSORTCOMPOSE warnDO_COMPOSE warnCACHECOMP dontCOMPOSE_CACHE warnUNRES),
21             # 0x2000 0x4000
22             qw(debug_STACKING printSkippedComposeKey),
23             '_debug_PERL_dollar1_scoping');
24 1         2 my $c = 0; # printSORTEDLISTS: Dumpvalue to STDOUT (implementation detail!)
25 1         19 my @dbg_b = map $n & (1<<$_), 0..31;
26 1         4 for (@dbg) {
27 16         1094 eval "sub $_ () {$dbg_b[$c++]}";
28             }
29             }
30             sub debug_PERL_dollar1_scoping () { debug & 0x1000000 }
31            
32             my $ctrl_after = 1; # In "pairs of nonShift/Shift-columns" (1 simplifies output of BACK/ESCAPE/RETURN/CANCEL)
33             my $create_alpha_ctrl = 2;
34             my %start_SEC = (FKEYS => [96, 24, sub { my($self,$u,$v)=@_; 'F' . (1+$u-$v->[0]) }],
35             ARROWS => [128, 16,
36             sub { my($self,$u,$v)=@_;
37             (qw(HOME UP PRIOR DIVIDE LEFT CLEAR RIGHT MULTIPLY END DOWN NEXT SUBTRACT INSERT DELETE RETURN ADD))[$u-$v->[0]]}],
38             NUMPAD => [144, 16,
39             sub { my($self,$u,$v)=@_;
40             ((map { ($_ > 10 ? 'F' : "NUMPAD") . $_} 7..9,14,4..6,15,1..3,16,0), 'DECIMAL')[$u-$v->[0]]}]);
41             my $maxEntityLen = 111; # Avoid overflow of prefix char above 0fff in kbdutool (but now can channel them to smaller values)
42             my $avoid_overlong_synonims_Entity = 20; # These two are currently disabled
43            
44 84     84 0 546 sub toU($) { substr+(qq(\x{fff}).shift),1 } # Some bullshit one must do to make perl's Unicode 8-bit-aware (!)
45            
46             #use subs qw(chr lc);
47 1     1   2650 use subs qw(chr lc uc ucfirst);
  1         52  
  1         10  
48            
49             #BEGIN { *CORE::GLOGAL::chr = sub ($) { toU CORE::chr shift };
50             # *CORE::GLOGAL::lc = sub ($) { CORE::lc toU shift };
51             #}
52             ### Remove ß ẞ :
53             ## my %fix = qw( ԥ Ԥ ԧ Ԧ ӏ Ӏ ɀ Ɀ ꙡ Ꙡ ꞑ Ꞑ ꞧ Ꞧ ɋ Ɋ ꞩ Ꞩ ȿ Ȿ ꞓ Ꞓ ꞥ Ꞥ ); # Perl 5.8.8 uc is wrong with palochka, 5.10 with z with swash tail
54             my %fix = qw( ԥ Ԥ ԧ Ԧ ӏ Ӏ ɀ Ɀ ꙡ Ꙡ ꞑ Ꞑ ꞧ Ꞧ ɋ Ɋ ß ẞ ꞩ Ꞩ ȿ Ȿ ꞓ Ꞓ ꞥ Ꞥ ℊ Ɡ ϳ Ϳ ); # Perl 5.8.8 uc is wrong with palochka, 5.10 with z with swash tail
55             my %unfix = reverse %fix;
56            
57 84     84   178 sub chr($) { local $^W = 0; toU CORE::chr shift } # Avoid illegal character 0xfffe etc warnings...
  84         185  
58 0 0   0     sub lc($) { my $in = shift; $unfix{$in} || CORE::lc toU $in }
  0            
59 0 0   0     sub uc($) { my $in = shift; $fix{$in} || CORE::uc toU $in }
  0            
60 0 0   0     sub ucfirst($) { my $in = shift; $fix{$in} || CORE::ucfirst toU $in }
  0            
61            
62             # We use this for printing, not for reading (so we can use //o AFTER the UCD is read)
63 1     1   519 my $rxCombining = qr/\p{NonspacingMark}/; # The initial version matches what Perl knows
  1         5  
  1         21  
64             my $rxZW = qr/\p{Line_Break: ZW}|[\xAD\x{200b}-\x{200f}\x{2060}-\x{2064}\x{fe00}-\x{fe0f}]/;
65            
66 0     0 0   sub rxCombining { $rxCombining }
67            
68             =pod
69            
70             =encoding UTF-8
71            
72             =head1 NAME
73            
74             UI::KeyboardLayout - Module for designing keyboard layouts
75            
76             =head1 SYNOPSIS
77            
78             #!/usr/bin/perl -wC31
79             use UI::KeyboardLayout;
80             use strict;
81            
82             # Download from http://www.unicode.org/Public/UNIDATA/
83             UI::KeyboardLayout::->set_NamesList("$ENV{HOME}/Downloads/NamesList.txt");
84            
85             UI::KeyboardLayout::->set__value('ComposeFiles', # CygWin too
86             ['/usr/share/X11/locale/en_US.UTF-8/Compose']);
87             UI::KeyboardLayout::->set__value('EntityFiles',
88             ["$ENV{HOME}/Downloads/bycodes.html"]);
89             UI::KeyboardLayout::->set__value('rfc1345Files',
90             ["$ENV{HOME}/Downloads/rfc1345.html"]);
91            
92             my $i = do {local $/; open $in, '<', 'MultiUni.kbdd' or die; <$in>};
93             # Init from in-memory copy of the configfile
94             my $k = UI::KeyboardLayout:: -> new_from_configfile($i)
95             -> fill_win_template( 1, [qw(faces CyrillicPhonetic)] );
96             print $k;
97            
98             open my $f, '<', "$ENV{HOME}/Downloads/NamesList.txt" or die;
99             my $k = UI::KeyboardLayout::->new();
100             my ($d,$c,$names,$blocks,$extraComb,$uniVersion) = $k->parse_NameList($f);
101             close $f or die;
102             $k->print_decompositions($d);
103             $k->print_compositions ($c);
104            
105             UI::KeyboardLayout::->set_NamesList("$ENV{HOME}/Downloads/NamesList.txt",
106             "$ENV{HOME}/Downloads/DerivedAge.txt");
107             my $l = UI::KeyboardLayout::->new();
108             $l->print_compositions;
109             $l->print_decompositions;
110            
111             UI::KeyboardLayout::->set_NamesList("$ENV{HOME}/Downloads/NamesList-6.1.0d8.txt",
112             "$ENV{HOME}/Downloads/DerivedAge-6.1.0d13.txt"));
113             my $l = UI::KeyboardLayout::->new_from_configfile('examples/EurKey++.kbdd');
114             for my $F (qw(US CyrillicPhonetic)) {
115             # Open file, select()
116             print $l->fill_win_template(1,[qw(faces US)]);
117             $l->print_coverage(q(US));
118             print $l->fill_osx_template([qw[faces US)]);
119             }
120            
121             perl -wC31 UI-KeyboardLayout\examples\grep_nameslist.pl "\b(ALPHA|BETA|GAMMA|DELTA|EPSILON|ZETA|ETA|THETA|IOTA|KAPPA|LAMDA|MU|NU|XI|OMICRON|PI|RHO|SIGMA|TAU|UPSILON|PHI|CHI|PSI|OMEGA)\b" ~/Downloads/NamesList.txt >out-greek
122            
123             =head1 AUTHORS
124            
125             Ilya Zakharevich, ilyaz@cpan.org
126            
127             =head1 DESCRIPTION
128            
129             In this section, a "keyboard" has a certain "character repertoir" (which characters may be
130             entered using this keyboard), and a mapping associating a character in the repertoir
131             to a keypress or to several (sequential or simultaneous) keypresses. A small enough keyboard
132             may have a pretty arbitrary mapping and remain useful (witness QUERTY
133             vs Dvorak vs Colemac). However, if a keyboard has a sufficiently large repertoir,
134             there must be a strong logic ("orthogonality") in this association - otherwise
135             the most part of the repertoir will not be useful (except for people who have an
136             extraordinary memory - and are ready to invest part of it into the keyboard).
137            
138             "Character repertoir" needs of different people vary enormously; observing
139             the people around me, I get a very narrow point of view. But it is the best
140             I can do; what I observe is that many of them would use 1000-2000 characters
141             if they had a simple way to enter them; and the needs of different people do
142             not match a lot. So to be helpful to different people, a keyboard should have
143             at least 2000-3000 different characters in the repertoir. (Some ballpark
144             comparisons: L
145             has about 2800 characters; L corresponds
146             to about 3600 Unicode characters.)
147            
148             To access these characters, how much structure one needs to carry in memory? One can
149             make a (trivial) estimate from below: on Windows, the standard US keyboard allows
150             entering 100 - or 104 - characters (94 ASCII keys, SPACE, ENTER, TAB - moreover, C-ENTER,
151             BACKSPACE and C-BACKSPACE also produce characters; so do C-[, C-] and C-\
152             C-Break in most layouts!). If one needs about 30 times more, one could do
153             with 5 different ways to "mogrify" a character; if these mogrifications
154             are "orthogonal", then there are 2^5 = 32 ways of combining them, and
155             one could access 32*104 = 3328 characters.
156            
157             Of course, the characters in a "reasonable repertoir" form a very amorphous
158             mass; there is no way to introduce a structure like that which is "natural"
159             (so there is a hope for "ordinary people" to keep it in memory). So the
160             complexity of these mogrification is not in their number, but in their
161             "nature". One may try to decrease this complexity by having very easy to
162             understand mogrifications - but then there is no hope in having 5 of them
163             - or 10, or 15, or 20.
164            
165             However, we B that many people I able to memorise the layout of
166             70 symbols on a keyboard. So would they be able to handle, for example, 30
167             different "natural" mogrifications? And how large a repertoir of characters
168             one would be able to access using these mogrifications?
169            
170             This module does not answer these questions directly, but it provides tools
171             for investigating them, and tools to construct the actually working keyboard
172             layouts based on these ideas. It consists of the following principal
173             components:
174            
175             =over 4
176            
177             =item Unicode table examiner
178            
179             distills relations between different Unicode characters from the Unicode tables,
180             and combines the results with user-specified "manual mogrification" rules.
181             From these automatic/manual mogrifications, it constructs orthogonal scaffolding
182             supporting Unicode characters (we call it I, but it
183             is a major generalization of the corresponding Unicode consortium's terms).
184            
185             =item Layout constructor
186            
187             allows building keyboard layouts based on the above mogrification rules, and
188             on other visual and/or logical directives. It combines the bulk-handling
189             ability of automatic rule-based approach with a flexibility provided by
190             a system of manual overrides. (The rules are read from a F<.kbdd> L
191             Description> file|/"Keyboard description files">.
192            
193             =item System-specific software layouts
194            
195             may be created basing on the "theoretical layout" made by the layout
196             constructor — currently only on Windows (only via F route) and OS X.
197            
198             =item Report/Debugging framework
199            
200             creates human-readable descriptions of the layout, and/or debugging reports on
201             how the layout creation logic proceeded.
202            
203             =back
204            
205             The last (and, probably, the most important) component of the distribution is
206             L created using this toolset.
207            
208             =head1 Keyboard description files
209            
210             =head2 Syntax
211            
212             I could not find an appropriate existing configuration file format, so was
213             farced to invent yet-another-config-file-format. Sorry...
214            
215             Config file is for initialization of a tree implementing a hash of hashes of
216             hashes etc whole leaves are either strings or arrays of strings, and keys are
217             words. The file consists of I<"sections">; each section fills a certain hash
218             in the tree.
219            
220             Sections are separated by "section names" which are sequences of word
221             character and C (possibly empty) enclosed in square brackets.
222             C<[]> is a root hash, then C<[word]> is a hash reference by key C in the
223             root hash, then C<[word/another]> is a hash referenced by element of the hash
224             referenced by C<[word]> etc. Additionally, a section separator may look like
225             C<< [visual -> wordsAndSlashes] >>.
226            
227             Sections are of two type: normal and visual. A normal section
228             consists of comments (starting with C<#>) and assignments. An assignment is
229             in one of 4 forms:
230            
231             word=value
232             +word=value
233             @word=value,value,value,value
234             /word=value/value/value/value
235            
236             The first assigns a string C to the key C in the hash of the
237             current section. The second adds a value to an array referenced by the key
238             C; the other two add several values. Trailing whitespace is stripped.
239            
240             Any string value without end-of-line characters and trailing whitespace
241             can be added this way (and values without commas or without slash can
242             be added in bulk to arrays). In particular, there may be no whitespace before
243             C<=> sign, and the whitespace after C<=> is a part of the value.
244            
245             Visual sections consist of comments, assignments, and C, which
246             is I of the section. Comments
247             after the last assignment become parts of the content. The content is
248             preserved as a whole, and assigned to the key C; trailing
249             whitespace is stripped. (This is the way to insert a value containing
250             end-of-line-characters.)
251            
252             In the context of this distribution, the intent of visual sections is to be
253             parsed by a postprocessor. So the only purpose of explicit assignments in a
254             visual section is to configure how I is parsed; after the parsing
255             is done (and the result is copied elsewhere in the tree) these values should
256             better be not used.
257            
258             =head2 Semantic of visual sections
259            
260             Two types of visual sections are supported: C and C. A content of
261             C section is just an embedded (part of) F<.klc> file. We can read deadkey
262             mappings and deadkey names from such sections. The name of the section becomes the
263             name of the mapping functions which may be used inside the C rule
264             (or in a recipe for a computed layer).
265            
266             A content of C section consists of C<#>-comment lines and "the mapping
267             lines"; every "mapping line" encodes one row in a keyboard (in one or several
268             layouts). (But the make up of rows of this keyboard may be purely imaginary;
269             it is normal to have a "keyboard" with one row of numbers 0...9.)
270             Configuration settings specify how many lines are per row, and how many layers
271             are encoded by every line, and what are the names of these layers:
272            
273             visual_rowcount # how many config lines per row of keyboard
274             visual_per_row_counts # Array of length visual_rowcount
275             visual_prefixes # Array of chars; <= visual_rowcount (miss=SPACE)
276             prefix_repeat # How many times prefix char is repeated (n/a to SPACE)
277             in_key_separator # If several layers per row, splits a key-descr
278             layer_names # Where to put the resulting keys array
279             in_key_separator2 # If one of entries is longer than 1 char, join by this
280             # (optional)
281            
282             Each line consists of a prefix (which is ignored except for sanity checking), and
283             whitespace-separated list of key descriptions. (Whitespace followed by a
284             combining character is not separating.) Each key description is split using
285             C into slots, one slot per layout. (The leading
286             C is not separating.) Each key/layout
287             description consists of one or two entries. An entry is either two dashes
288             C<--> (standing for empty), or a hex number of length >=4, or a string.
289             (A hex numbers must be separated by C<.> from neighbor word
290             characters.) A loner character which has a different uppercase is
291             auto-replicated in uppercase (more precisely, titlecase) form. Missing or empty key/layout description
292             gives two empty entries (note that the leading key/layout description cannot
293             be empty; same for "the whole key description" - use the leading C<-->.
294            
295             If one of the entries in a slot is a string of length ≥ 2, one must separate
296             the entries by C. Likewise, if a slot has only one entry,
297             and it is longer than 1 char, it must be started or terminated by C.
298            
299             To simplify BiDi keyboards, a line may optionally be prefixed with the L|http://en.wikipedia.org/wiki/Unicode_character_property#Bidirectional_writing>
300             character; if so, it may optionally be ended by spaces and the L|http://en.wikipedia.org/wiki/Unicode_character_property#Bidirectional_writing> character.
301             For compatibility with other components, layer names should not contain characters C<+()[]>.
302            
303             =head2 Inclusion of F<.klc> files
304            
305             Instead of including a F<.klc> file (or its part) verbatim in a visual
306             section, one can make a section C with
307             a key C. Filename will be included and parsed as a C
308             visual section (with name C???). (Currently only UTF-16
309             files are supported.)
310            
311             =head2 Metadata
312            
313             A metadata entry is either a string, or an array. A string behaves as
314             if were an array with the string repeated sufficiently many times. Each
315             personality defines C which chooses the element of the arrays.
316             The entries
317            
318             COMPANYNAME LAYOUTNAME COPYR_YEARS LOCALE_NAME LOCALE_ID
319             DLLNAME SORT_ORDER_ID_ LANGUAGE_NAME
320            
321             should be defined in the personality section, or above this section in the
322             configuration tree. (Used when output Windows F<.klc> files and OS X
323             F<.keylayout> files.)
324            
325             OSX_ADD_VERSION OSX_LAYOUTNAME
326            
327             The first one is the ordinal of the word after which to insert the version
328             into C (OS X allows layout names longer than the limit of 64 UTF-16
329             codepoints of Windows); the second one allows a completely different name.
330            
331             Optional metadata currently consists only of C key (the protocol
332             version; hardwired now as C<1.0>).
333            
334             =head2 Layer/Face/Prefix-key Recipes
335            
336             The sections C and C contain instructions how
337             to build Layers and Faces out of simpler elements. Similar recipes appear
338             as values of C entries in a face. Such a "recipe" is
339             executed with I: a base face name, a layer number, and a prefix
340             character (the latter is undefined when the recipe is a layer recipe or
341             face recipe). (The recipe is free to ignore the parameters; for example, most
342             recipes ignore the prefix character even when they are "prefix key" recipes.)
343            
344             The recipes and the visual sections are the most important components of the description
345             of a keyboard group.
346            
347             To construct layers of a face, a face recipe is executed several times with different
348             "layer number" parameter. In contrast, in simplest cases a layer recipe is executed
349             once. However, when the layer is a part of a compound ("parent") recipe, it inherits
350             the "parameters" from the parent. In particular, it may be executed several times with
351             different face name (if used in different faces), or with different layer number (if used
352             - explicitly or explicitly - in different layer slots; for example, C
353             in a face/prefix-key recipe will execute the C recipe separately for all the
354             layer numbers; or one can use C together with
355             C). Depending on the recipe, these calls may result in the same layout
356             of the resulting layers, or in different layouts.
357            
358             A recipe may be of three kinds: it is either a "first comer wins" which is a space-separated collection of
359             simpler recipes, or C, or a "mutator": C or just C.
360             All recipes must be C<()>-balanced
361             and C<[]>-balanced; so must be the C; in turn, the C is either a
362             layer name, or another recipe. A layer name must be defined either in a visual C section,
363             or be a key in the C section (so it should not have C<+()[]> characters),
364             or be the literal C.
365             When C is processed, first, the resulting layer(s) of the C recipe
366             are calculated; then the layer(s) are processed by the C (one key at a time).
367            
368             The most important C keywords are C (with argument a face name, defined either
369             via a C section, or via C) and C (with argument
370             of the form C, with layer names defined as above). Both
371             select the layer (out of a face, or out of a list) with number equal to the "layer number parameter" in the context
372             of the recipe. The C builder is similar to C, but chooses the "other"
373             layer ("cyclically the next" layer if more than 2 are present).
374            
375             The other selectors are C, C and C; they
376             operate on the base face or face associated to the base face.
377            
378             The simplest forms of C are C (note that
379             C/C/C return C when case-conversion results in no
380             change; use C/C/C if one wants them to behave
381             as Perl operators). Recall that a layer
382             is nothing more than a structure associating a pair "unshifted/shifted character" to the key number, and that
383             these characters may be undefined. These simplest mutators modify these characters
384             independently of their key numbers and shift state (with C making all of
385             them undefined). Similar user-defined simple mutators are C;
386             here C consists of pairs "FROM TO" of characters (with optional spaces between pairs);
387             characters not appearing as FROM become undefined by C.
388             (As usual, characters may be replaced by hex numbers with 4 or more hex digits;
389             separate the number from a neighboring word character by C<.> [dot].)
390            
391             All mutators must have a form C or C, with C
392             C<(),[]>-balanced. Other simple mutators are C (converts
393             control-char [those between 0x00 and 0x1f] to the corresponding [uppercase] character),
394             C (adds a constant to the [numerical code of the] input character
395             so that C becomes C), C (keeps input characters
396             which match, converts everything else to C), C
397             (similar to C, but pairs all characters in the layers based on their position),
398             C (all defined characters are converted to C).
399            
400             The mutator C is similar to , but takes the F<.klc>-style
401             visual C section as the description of the mutation. C may
402             be followed by a character as in C; if not, C is the prefix key from
403             the recipe's execution parameters.
404            
405             The simple mutator C has flavors: one can append C or C
406             to the name, and the resulting characters become prefix keys (the “C-inverted”
407             prefix followed by C behaves as non-inverted prefix followed by C).
408            
409             Some mutators pay attention not only to what the character is, but how it is
410             accessible on the given key: such are C, C,
411             C. Some other mutators also take into
412             account how the key is positioned with respect to the other keys.
413            
414             C assigns a character
415             to a particular column of the keyboard. Which keys are in which columns is
416             governed by how the corresponding
417             visual layer is formatted (shifted to the right by C array of the
418             visual layer). This visual layer is one associated to the face by the
419             C key (and the face is the parameter face of the
420             mutator). C is a comma-separated list;
421             empty positions map to the undefined character.
422            
423             C chooses a mutator based on the row of the keyboard. On the top row,
424             it is the first mutator which is chosen, etc. The list C is separated by C
425             surrounded by whitespace.
426            
427             The mutator C converts some non-prefix characters to prefix
428             characters; the conversion happens if the argument of the mutator coincides with
429             what is at the corresponding position in C, and this position contains
430             a prefix character. (Nowadays this mutator is not very handy — most of its uses
431             may be accomplished by having I prefix characters in appropriate faces.)
432            
433             The mutators C, C process their
434             argument in a special way: the characters in C which duplicated the characters
435             present (on the same key, and possibly with the same modifiers) in C are
436             ignored. The remaining characters are combined “as usual” with “the first comer wins”.
437            
438             The most important mutator is C (and its flavors). (See L mutator>.)
439            
440             Note that C is similar to a selector;
441             it is the only way to insert a
442             layer without a selector, since a bareword is interpreted as a C; C is a synonym
443             of C (repeated as many times as there are layers
444             in the parameter "base face").
445            
446            
447             The recipes in a space-separated list of recipes ("first comer wins") are
448             interpreted independently to give a collection of layers to combine; then,
449             for every key numbers and both shift states, one takes the leftmost recipe
450             which produces a defined character for this position, and the result is put
451             into the resulting layer.
452            
453             Keep in mind that to understand what a recipe does, one should trace
454             its description right-to-left order: for example, C creates
455             a layout where C<:> is at position of C<.>, but on the second [=other] layer (essentially,
456             if the base layout is the standard one, it binds the character C<:> to the keypress C).
457            
458             To simplify formatting of F<.kbdd> files, a recipe may be an array reference.
459             The string may be split on spaces, or split after comma or C<|>.
460            
461             =head2 The C mutator
462            
463             The essense of C is to have several mutation rules and choose I
464             of the results of application of these rules. Grouping the rules allows
465             one a flexible way to control what I actually means. The rules may
466             be separated by comma, by C<|>, or by C<|||> (interchangeable with C<||||>).
467            
468             In the simplest case of grouping, C form a C<|>-separated list, and
469             each group consists of one rule. Then I result is one coming from
470             an earlier rule. The groups are separated by C<|>, and the rules inside the
471             group are separated by comma; if more than one rule appears in a group, a
472             different kind of competition appears (inside the group).
473            
474             The I of the generated characters is a list C
475             UNICODE_BLOCK, IN_CASE_PAIR, FROM_NON_ALTGR_POSITION>
476             with lexicographical order (the earlier element is stronger that ones after it).
477             Here C describes whether a character is generated by
478             Unicode compositing (versus “compatibility compositing” or other
479             “artificially generated” mogrifiers); the older age wins, as well as
480             honest compositing, earlier Unicode blocks, as well as case pairs and
481             characters from non-C-positions. (Experience shows that these rules
482             have a pretty good correlation with being “more suitable for human consumption”.)
483            
484             Moreover, quality in case-pairs is equalized by assigning the strongest
485             I of two. Such pairs are always considered “tied together” when
486             they compete with other characters. (In particular, if a single character
487             with higher quality occupies one of C positions, a
488             case pair with lower quality is completely ignored; so the “other” position
489             may be taken by a single character with yet lower quality.)
490            
491             In addition, the characters which lost the competition for
492             non-C-positions are considered I on C-positions. (With
493             boosted priority compared to mutated C-characters; see above.)
494            
495             This mutator comes in several flavors: one can append to its name
496             C/C/C/C<32OK> (in this
497             order). Unless C is specified, it will not modify characters on a key
498             which produces C when used without modifiers. Unless C<32OK> is specified, it
499             will not produce Unicode characters after C<0xFFFF> (the default is to follow
500             the brain-damaged semantic of prefix keys on Windows). Unless C is
501             specified, the result is optimized by removing duplicates (per key) generated
502             by application of C. With the C modifier, the generated characters
503             are not counted as “obtained by logical rules” when statistics for the generated
504             keyboard layout are calculated.
505            
506             =head2 Linked prefixes
507            
508             On top of what is explained above, there is a way to arrange “linking” of two prefix keys;
509             this linking allows characters which cannot be fit on one (prefixed) key to
510             “migrate” to unassigned positions on the otherwise-prefixed key. (This is
511             similar to migration from non-C-position to C-position.)
512             This is achieved by using mutator rules of the following form:
513            
514             primary = +PRE-GROUPS1|||SHARED||||POST-GROUPS1
515             secondary = PRE-GROUPS2||||PRE-GROUPS1|||SHARED||||POST-GROUPS2
516            
517             Groups with digits are not shared (specific to a particular prefix); C is
518             (effectively) reverted when accessed from the secondary prefix; for the
519             secondary key, the recipies from C which were used in the primary
520             key are removed from C, and are appended to the end of C;
521             the C are skipped when finding assignments for the secondary
522             prefix.
523            
524             In the primary recipe, C<|||> and C<||||> are interchangeable with C<|>.
525             Moreover, if C is empty, the secondary recipe should be written as
526            
527             secondary = PRE-GROUPS2|||PRE-GROUPS1|||SHARED
528            
529             if C is empty, this should be written as one of
530            
531             secondary = PRE-GROUPS2|||SHARED
532             secondary = PRE-GROUPS2||||SHARED
533             secondary = PRE-GROUPS2||||SHARED||||POST-GROUPS2
534            
535             These rules are to allow macro-ization of the common parts of the primary
536             and secondary recipe. Put the common parts as a value of the key
537             C (here C<***> denotes a word), and replace them by
538             the macro C<< >> in the recipes.
539            
540             B: the primary key recipe starts with the C<+> character; it
541             forces interpretation of C<|||> and C<||||> as of ordinary C<|>.
542            
543             If not I, the top-level groups are formed by C<||||> (if present), otherwise by C<|||>.
544             The number of top-level groups should be at most 3. The second of C<||||>-groups
545             may have at most 2 C<|||>-groups; there should be no other subdivision. This way,
546             there may be up to 4 groups with different roles.
547            
548             The second of 3 toplevel C<|||>-groups, or the first of two sublevel C<|||>-groups
549             is the “skip” group. The last of two or three toplevel C<|||>-groups (or of
550             sublevel C<|||>-groups, or the 2nd toplevel C<||||>-group without subdivisions) is the
551             inverted group; the 3rd of toplevel C<||||>-groups is the “extra” group.
552            
553             “Penalize/prohibit” lists start anew in every top-level group.
554            
555             =head2 Atomic mutators rules
556            
557             As explained above, the individual RULES in C may be
558             separated by C<,> or C<|>, or C<|||> or C<||||>. Such an individual
559             rule is a combination of I combined by C<+> operators,
560             and/or preceded by C<-> prefix (with understanding that C<+-> must
561             be replaced by C<-->). The prefix C<-> means I of the
562             rule; the operator C<+> is the composition of the rules.
563            
564             B the atomic rule C<< >> converts its input character into
565             its superscript forms (if such forms exist; for example, C may
566             be converted to C<ᵃ> or C<ª>). The atomic rules C, C, C
567             behave the same as the corresponding MUTATORs. The atomic rule C
568             converts a control-character to the corresponding “uppercase” character:
569             C<^A> is converted to C, and C<^\> is converted to C<\>. (The last
570             4 rules cannot be inverted by C<->.)
571            
572             The composition is performed (as usual) from right to left. B the
573             indivial rule C<< +lc+dectrl >> converts C<^A> to C<ᵃ> or C<ª>.
574            
575             In addition to rules listed above, the atomic rules may be of the
576             following types:
577            
578             =over
579            
580             =item *
581            
582             A hex number with ≥4 digits, or a character: implements the composition
583             inverting (compatibility or not) Unicode decompositions into two characters;
584             the character in the rule must the first character of the decomposition.
585             Here “Unicode decompositions” are either deduced from Unicode decomposition
586             rules (with compatibility decompositions having lower priority), or deduced
587             basing on splitting the name of the character into parts.
588            
589             =item *
590            
591             C<< >> is an inversion of a Unicode decomposition which goes from
592             1 character to 1 character.
593            
594             =item *
595            
596             Flavors of characters C<< >> from Unicode tables come from Unicode
597             1-character to 1-character decompositions
598             marked with C<< >>. B C<< >> for a subscript form;
599             or C<< >>.
600            
601             =item *
602            
603             C<< >> rules TBC ..........................................
604            
605             =item *
606            
607             Calculated rules C<< >> are extracted by a
608             heuristic algorithm which tries to parse the Unicode name of the character.
609            
610             For the best understanding of what these rules produce, inspect
611             results of print_compositions(), print_decompositions() methods documented
612             in L<"SYNOPSIS">. The following “keywords” are processed by the algorithm:
613            
614             WITH, OVER, ABOVE, PRECEDED BY, BELOW (only with LONG DASH)
615            
616             are separators;
617            
618             COMBINING CYRILLIC LETTER, BARRED, SLANTED, APPROXIMATELY, ASYMPTOTICALLY,
619             SMALL (not near LETTER), ALMOST, SQUARED, BIG, N-ARY, LARGE, LUNATE,
620             SIDEWAYS DIAERESIZED, SIDEWAYS OPEN, INVERTED, ARCHAIC, EPIGRAPHIC,
621             SCRIPT, LONG, MATHEMATICAL, AFRICAN, INSULAR, VISIGOTHIC, MIDDLE-WELSH,
622             BROKEN, TURNED, INSULAR, SANS-SERIF, REVERSED, OPEN, CLOSED, DOTLESS, TAILLESS, FINAL
623             BAR, SYMBOL, OPERATOR, SIGN, ROTUNDA, LONGA, IN TRIANGLE, SMALL CAPITAL (as smallcaps)
624            
625             are modifiers. For an C, one scans for
626            
627             QUAD, UNDERBAR, TILDE, DIAERESIS, VANE, STILE, JOT, OVERBAR, BAR
628            
629             TBC ..........................................
630            
631             =item *
632            
633             Additionally, C are considered C variants of
634             their middle letter, as well as C of C<0>.
635            
636             =item *
637            
638             C<< >> rules are obtained by scanning the name for
639            
640             WHITE, BLACK, CIRCLED, BUT NOT
641            
642             as well as for C (as C), paleo-Latin digraphs and C
643             (as C), doubled-letters
644             (as C), C doubled-letters
645             (as C), C (possibly with C
646             or C; as C).
647            
648             =item *
649            
650             Manual prearranged rules TBC ..........................................
651            
652             =item *
653            
654             C<< >> Explicit named substitution rules TBC ..........................................
655            
656             =item *
657            
658             C<< >> Prohibits handling non-substituted input TBC ..........................................
659            
660             =item *
661            
662             C<< >> rules TBC ..........................................
663            
664             =back
665            
666             =head2 Input substitution in atomic rules
667            
668             TBC ..........................................
669            
670             =head2 The C mutator
671            
672             TBC ..............................
673            
674             =head2 Pseudo-mutators for generation of documentation
675            
676             A few mutators do not introduce any characters (in other words, they behave as
677             C) but are used for their side effects: in prefix-key recipes,
678             C introduces documentation of what the prefix key is intended
679             for. Likewise, C allows adding CSS classes to highlight
680             parts of HTML output generated by this module, the parts corresponding to selected
681             characters in a face.
682            
683             C is a comma-separated list, every triple in the
684             list being C. C is one of C/C (which
685             add formatting to the key containing one of the C) or C/C
686             (which add formatting to an individual character displayed on the key),
687             one can add a digit to C to limit to a particular layer in the face
688             (useful when a character appears several times in a face).
689             The lower-case variants select characters basing on the I of a key.
690             One can also append C<=CONTEXT> to C, then the class is added only if
691             C appears as one of the options for the HTML output generator.
692            
693             The CSS rules generated by this module support several classes directly; the
694             rest should be supported by the user-supplied rules. The classes with existing
695             support are: on keys
696            
697             to_w from_w # generate arrows between keys
698             from_nw from_ne to_nw to_ne # generate arrows between keys; will yellow-outline
699             pure # unless combined with this
700             red-bg green-bg blue-bg # tint the key as the whole (as background)
701            
702             On characters
703            
704             very-special need-learn may-guess # provide green/brown/yellow-outlines
705             special # provide blue outline (thick unless combined with
706             thinspecial # <-- this)
707            
708             =head2 Extra CSS classes for documentation
709            
710             In additional, several CSS classes are auto-generated basing on Unicode
711             properties of the character. TBC ........................
712            
713             =head2 Debugging mutators
714            
715             If the bit 0x40 of the environment variable C
716             (decimal or C<0xHEX>) is set, debugging output for mutators is enabled:
717            
718             r ║ ║ ┆ ║ ṙ ṛ ┆ ║ ║ ║ ║ ⓡ ┆
719             ║ ║ ┆ ║ Ṙ Ṛ ┆ ║ ║ ║ ║ Ⓡ ┆
720             ║ ║ ặ ┆ ║ ┆ ║ ║ ║ ║ ┆
721             ║ ║ Ặ ┆ ║ ┆ ║ ║ ║ ║ ┆
722             Extracted [ …list… ] deadKey=00b0
723            
724             The output contains a line per character assigned to the keyboard key (if
725             there are 2 layers, each with lc/uc variants, there are 4 lines); empty lines are
726             omitted. The first column indicates the base character (lc of the 1st layer) of
727             the key; the separator C<║> indicates C<|>-groups in the mutator. Above, the first
728             group produces no mutations, the second group mutates only the characters in
729             the second layer, and the third group produces two mutations per a character in
730             the first layer. The 7th group is also producing mogrifications on the 1st layer.
731            
732             The next example clarifies C<┆>-separator: to the left of it are mogrifications which
733             come in case pairs, to the right are mogrifications where mogrified-lc is not
734             a case pair of mogrified-uc:
735            
736             t ║ ║ ᵵ ║ ꞇ ┆ ʇ ║ ┆ ║
737             ║ ║ ║ Ꞇ ┆ ᴛ ║ ┆ ║
738             ║ ║ ║ ┆ ║ ꝧ ┆ ║
739             ║ ║ ║ ┆ ║ Ꝧ ┆ ║
740             Extracted [ …list… ] deadKey=02dc
741            
742             In this one, C<│> separates mogrifications with different priorities (based on
743             Unicode ages, whether the atomic mutator was compatibility/synthetic one, and the
744             Unicode block).
745            
746             / ║ ║ ║ ║ ║ │ ∴ ║ ║
747             ║ ║ ║ ║ ║ │ ≘ ≗ ║ ║
748             ║ ║ ║ ║ ║ / │ ⊘ ║ ║
749             Extracted [ …list… ] deadKey=00b0
750            
751             For secondary mogrifiers, where the distinction between C<|||> and C<|>
752             matters, some of the C<║>-separators are replaced by C<┃>. Additionally,
753             there are two rounds of extraction: first the characters corresponding
754             to the primary mogrifier are TMP-extracted (from the groups PRE-GROUPS1,
755             COMMON); then what is the extracted from COMMON is put back at the
756             effective end (at the end of POST-GROUPS2, or, if no such, at
757             the beginning of COMMON):
758            
759             t ║ ║ ᵵ ┃ ┃ ʇ │ │ ꞇ ┆ ║
760             ║ ║ ┃ ┃ │ ᴛ │ Ꞇ ┆ ║
761             ║ ║ ┃ ┃ │ │ ꝧ ┆ ║
762             ║ ║ ┃ ┃ │ │ Ꝧ ┆ ║
763             TMP Extracted: <…list…> from layers 0 0 | 0 0
764             t ║ ║ ᵵ ┃ ꞇ ┆ ʇ ┋ ┃ ┆ │ ┆ │ ┆ ║
765             ║ ║ ┃ Ꞇ ┆ ᴛ ┋ ┃ ┆ │ ┆ │ ┆ ║
766             ║ ║ ┃ ┆ ┋ ┃ ┆ │ ┆ │ ꝧ ┆ ║
767             ║ ║ ┃ ┆ ┋ ┃ ┆ │ ┆ │ Ꝧ ┆ ║
768             Extracted [ …list… ] deadKey=02dc
769            
770             In the second part of the debugging output, the part of common which is put
771             back is separated by C<┋>.
772            
773             When bit 0x80 is set, much more lower-level debugging info is printed. The
774             arrays at separate depth mean: group number, priority, not-cased-pair, layer
775             number, subgroup, is-uc. When bit 0x100 is set, the debugging output for
776             combining atomic mutators is enabled.
777            
778             =head2 Personalities
779            
780             A personality C is defined in the section C. (C may
781             include slashes - untested???)
782            
783             An array C gives the list of layers forming the face. (As of version
784             0.03, only 2 layers are supported.) The string C is a “fallback”
785             face: if a keypress is not defined by C, it would be taken from
786             C; additionally, it affects the C key bindings: for example,
787             if C has C where C has C<γ>, and there is a binding for
788             C, the same binding applies for C.
789             .........
790            
791             =head2 Substitutions
792            
793             In section C one defines composition rules which may be
794             used on par with composition rules extracted from I.
795             An array C is converted to a hash accessible as C<< >> from
796             a C filter of satellite face processor. An element of the the array
797             must consist of two characters (the first is mapped to the second one). If
798             both characters have upper-case variants, the translation between these variants
799             is also included.
800            
801             =head2 Classification of diacritics
802            
803             The section C contains arrays each describing a class of
804             diacritic marks. Each array may contain up to 7 elements, each
805             consising of diacritic marks in the order of similarity to the
806             "principal" mark of the array. Combining characters may be
807             preceded by horizontal space. Seven elements should contain:
808            
809             Surrogate chars; 8bit chars; Modifiers
810             Modifiers below (or above if the base char is below)
811             Vertical (or Comma-like or Doubled or Dotlike or Rotated or letter-like) Modifiers
812             Prime-like or Centered modifiers
813             Combining
814             Combining below (or above if base char is below)
815             Vertical combining and dotlike Combining
816            
817             These lists determine what a C filter of satellite face processor
818             will produce when followed by whitespace characters
819             (possibly with modifiers) C. (So, if F<.kbdd> file
820             uses C) this determines what diacritic prefix keys produce.
821            
822             =head2 Compose Key
823            
824             The scalar configuration variable C controls the ID of the prefix
825             key to access F<.Compose> composition rules. The rules are read from files
826             in the class/object variable; set this variable with
827            
828             $self->set__value('ComposeFiles', [@Files]); # Class name (instead of $self) is OK here
829            
830             The format of the files is the same as for X11’s F<.Compose> (but C are
831             not supported); only compositions starting with C<< >>, having no
832             deadkeys, and (on Windows) expanding to 1 UTF-16 codepoint are processed. (See
833             L<“systematic” parts of rules in the standard
834             F<.XCompose>|"“Systematic” parts of rules in a few .XCompose"> — see lines with postfix C.)
835            
836             Repeating this prefix twice accesses characters via their HTML/MathML entity names. The files
837             are as above (the variable name is C); the format is the same as in
838             F.
839            
840             Repeating this prefix 3 times accesses characters via their C codes;
841             the variable C contains files in the format of F.
842             It is recommended to download these files (or the later flavors)
843            
844             http://www.x.org/releases/X11R7.6/doc/libX11/Compose/en_US.UTF-8.html
845             http://www.w3.org/TR/xml-entity-names/bycodes.html
846             http://tools.ietf.org/html/rfc1345
847            
848             See L<"SYNOPSIS"> for an example. Note that this mechanism does not assign this
849             prefix key to any particular position on the keyboard layout; this should be
850             done elsewhere. Implementation detail: if some of these 3 maps cannot be created,
851             they are skipped (so less than 3 chained maps are created).
852            
853             For more control, one can make this configuration variable into an array. The
854             value C is equivalent to the array with elements
855            
856             ComposeFiles,dotcompose,warn,KEY
857             EntityFiles,entity,warn,,KEY
858             rfc1345Files,rfc1345,warn,,KEY
859            
860             Five comma-separated fields are: the variable controlling the filelist,
861             the type of files in the filelist (only the 3 listed types are supported now),
862             whether to warn when a particular flavor
863             of composition table could not be loaded, the global access prefix, the prefix
864             for access from the previous element (chained access).
865            
866             If C (etc.) has more than 1 file, bindings from earlier files
867             take precedence over bindings from the later ones. If the same sequence is
868             bound several times inside a file, a later binding takes precedence.
869            
870             =head2 Names of prefix keys
871            
872             Section C defines naming of prefix keys. If not named there (or in
873             processed F<.klc> files), the C property will be used; if none,
874             Unicode name of the character will be used.
875            
876             =head2 More than 2 layers and/or exotic modifier keys
877            
878             This is controlled by C, C, and C
879             configuration arrays. TBC..................................
880            
881             =head2 CAVEATS for German/French/BÉPO/Neo keyboards
882            
883             Non-US keycaps: the character "a" is on C<(VK_)A>, but its scancode is now different.
884             E.g., French's A is on 0x10, which is US's Q. Our table of scancodes is
885             currently hardwired. Some pictures and tables are available on
886            
887             http://bepo.fr/wiki/Pilote_Windows
888            
889             With this module, the scancode and the C-code for a position in a layout
890             are calculated via the C configuration variable; the first recognized
891             character at the given position of this layer is translated to
892             the C-code (using a hardwired table). The mapping of C-codes
893             to scancodes is currently hardwired.
894            
895             For “unusual” keys, one can use the C subsection of the face to describe
896             its scancode (the first entry in the array) and the bindings. If the scancode
897             is empty, the name of the key is translated to a scancode using the hardwired
898             tables.
899            
900             =head1 Keyboards: on ease of access (What makes an easy-to-use keyboard layout)
901            
902             The content of this section has no I relationship to the functionality
903             of this module. However, we feel that it is better that the user of this
904             module understands these concerns. Moreover, it is these concerns which
905             lead to the principles underlying the functionality of this module.
906            
907             =head2 On the needs of keyboard layout users
908            
909             Let's start with trivialities: different people have different needs
910             with respect to keyboard layouts. For a moment, ignore the question
911             of the repertoir of characters available via keyboard; then the most
912             crucial distinction corresponds to a certain scale. In absense of
913             a better word, we use a provisional name "the required typing speed".
914            
915             One example of people on the "quick" (or "rabid"?) pole of this scale are
916             people who type a lot of text which is either "already prepared", or for
917             which the "quality of prose" is not crucial. Quite often, these people may
918             type in access of 100 words per minute. For them, the most important
919             questions are of physical exhaustion from typing. The position
920             of most frequent letters relative to the "rest" finger position, whether
921             frequently typed together letters are on different hands (or at least
922             not on the same/adjacent fingers), the distance fingers must travel
923             when typing common words, how many keypresses are needed to reach
924             a letter/symbol which is not "on the face fo the keyboard" - their
925             primary concerns are of this kind.
926            
927             On the other, "deliberate", pole these concerns cease to be crucial.
928             On this pole are people who type while they "create" the text, and
929             what takes most of their focus is this "creation" process. They may
930             "polish their prose", or the text they write may be overburdened by
931             special symbols - anyway, what they concentrate on is not the typing itself.
932            
933             For them, the details of the keyboard layout are important mostly in
934             the relation to how much they I the writer from the other
935             things the writer is focused on. The primary question is now not
936             "how easy it is to type this", but "how easy it is to I how
937             to type this". The focus transfers from the mechanics of finger movements
938             to the psycho/neuro/science of memory.
939            
940             These questions are again multifaceted: there are symbols one encounters
941             every minute; after you recall once how to access them, most probably
942             you won't need to recall them again - until you have a long interval when
943             you do not type. The situation is quite different with symbols you need
944             once per week - most probably, each time you will need to call them again
945             and again. If such rarely used symbols/letters are frequenct (since I
946             of them appear), it is important to have an easy way to find how to type them;
947             on the other hand, probably there is very little need for this way to
948             be easily memorizable. And for symbols which you need once per day, one needs
949             both an easy way to find how to type them, I the way to type them should
950             better be easily memorizable.
951            
952             Now add to this the fact that for different people (so: different usage
953             scenarios) this division into "all the time/every minute/every day/every week"
954             categories is going to be different. And one should not forget important
955             scenario of going to vacation: when you return, you need to "reboot" your
956             typing skills from the dormant state.
957            
958             =head2 On “mixing” several “allied” layouts
959            
960             On the other hand, note that the questions discussed above are more or less
961             orthogonal: if the logic of recollection requires ω to be related in some
962             way to the W-key,
963             then it does not matter where the W-key is on the keyboard - the same logic
964             is applicable to the QWERTY base layou t, or BÉPO one, or Colemak, or Dvorak.
965             This module concerns itself I with the questions of "consistency" and
966             the related question of "the ease of recall"; we care only about which symbols
967             relate to which "base keys", and do not care about where the base key sit on
968             the physical keyboard.
969            
970             B The “main island” of the keyboard contains a 4×10 rectangle
971             of keys. So if a certain collection of special keys may be easily memorized
972             as a rectangular table, it is nice to be able to map this table to the
973             physical keyboard layout. This module contains tool making this task easy.
974            
975             Now consider the question of the character repertoir: a person may need ways
976             to type "continuously" in several languages; quite often one must must type
977             a “standalone” foreign word in a sentence; in addition to this, there may
978             be a need to I type "standalone" characters or symbols outside
979             the repertoir of these languages. Moreover, these languages may use different
980             scripts (such as Polish/Bulgarian/Greek/Arabic/Japanese), or may share a
981             "bulk" of their characters, and differ only in some "exceptional letters".
982             To add insult to injury, these "exceptional letters" may be rare in the language
983             (such as ÿ in French or à in Swedish) or may have a significant letter frequency
984             (such as é in French) or be somewhere in between (such as ñ in Spanish).
985            
986             And the non-language symbols do not need to be the I symbols (although
987             often they are). An Engish-language discussion of etimology at the coffee table
988             may lead to a need to write down a word in polytonic greek, or old norse;
989             next moment one would need to write a phonetic transcription in IPA/APA
990             symbols. A discussion of keyboard layout may involve writing down symbols
991             for non-character keys of the keyboard. A typography freak would optimize
992             a document by fine-tuned whitespaces. Almost everybody needs arrows symbols,
993             and many people would use box drawing characters if they had a simple access
994             to them.
995            
996             Essentially, this means that as far as it does not impacts other accessibility
997             goals, it makes sense to have unified memorizable access to as many
998             symbols/characters as possible. (An example of impacting other aspects:
999             MicroSoft's (and IBM's) "US International" keyboards steal characters C<`~'^">:
1000             typing them produces "unexpected results" - they are deadkeys. This
1001             significantly simplifies entering characters with accents, but makes it
1002             harder to enter non-accented characters.)
1003            
1004             =head2 The simplest rules of design of “large” keyboard layouts
1005            
1006             One of the most known principles of design of human-machine interaction
1007             is that "simple common tasks should be simple to perform, and complicated
1008             tasks should be possible to perform". I strongly disagree with this
1009             principle - IMO, it lacks a very important component: "a gradual increase
1010             in complexity". When a certain way of doing things is easy to perform, and another
1011             similar way is still "possible to perform", but on a very elevated level
1012             of complexity, this leads to a significant psychological barrier erected
1013             between these two ways. Even when switching from the first way to the other one
1014             has significant benefits, this barrier leads to self-censorship. Essentially,
1015             people will
1016             ignore the benefits even if they exceed the penalty of "the elevated level of
1017             complexity" mentioned above. And IMO self-censorship is the worst type of
1018             censorship. (There is a certain similarity between this situation and that
1019             of "self-fulfilled prophesies". "People won't want to do this, so I would not
1020             make it simpler to do" - and now people do not want to do this...)
1021            
1022             So I would add another clause to the law above: "and moderately complicated
1023             tasks should remain moderately hard to perform". What does it tell us in
1024             the situation of keyboard layout? One can separate several levels of
1025             complexity.
1026            
1027             =over 10
1028            
1029             =item Basic:
1030            
1031             There should be some "base keyboards": keyboard layouts used for continuous
1032             typing in a certain language or script. Access from one base keyboard to
1033             letters of another should be as simple as possible.
1034            
1035             =item By parts:
1036            
1037             If a symbol can be thought of as a combination of certain symbols accessible
1038             on the base keyboard, one should be able to "compose" the symbol: enter it
1039             by typing a certain "composition prefix" key then the combination (as far
1040             as the combination is unambiguously associated to one symbol).
1041            
1042             The "thoughts" above should be either obvious (as in "combining a and e should
1043             give æ") or governed by simple mneumonic rules; the rules should cover as
1044             wide a range as possible (as in "Greek/Coptic/Hebrew/Russian letters are
1045             combined as G/C/H/R and the corresponding Latin letter; the correspondence is
1046             phonetic, or, in presence of conflicts, visual").
1047            
1048             =item Quick access:
1049            
1050             As many non-basic letters as possible (of those expected to appear often)
1051             should be available via shortcuts. Same should be applicable to starting
1052             sequences of composition rules (such as "instead of typing C
1053             and C<'> one can type C).
1054            
1055             =item Smart access
1056            
1057             Certain non-basic characters may be accessible by shortcuts which are not
1058             based on composition rules. However, these shortcuts should be deducible
1059             by using simple mneumonic rules (such as "to get a vowel with `-accent,
1060             type C-key with the physical keyboard's key sitting below the vowel key").
1061            
1062             =item Superdeath:
1063            
1064             If everything else fails, the user should be able to enter a character by
1065             its Unicode number (preferably in the most frequently referenced format:
1066             hexadecimal).
1067            
1068             =back
1069            
1070             =over
1071            
1072             B This does not seem to be easily achievable, but it looks like a very nifty
1073             UI: a certain HotKey is reserved (e.g., C);
1074             when it is tapped, and a character-key is pressed (for example, B) a
1075             menu-driven interface pops up where user may navigate to different variants
1076             of B, Beta, etc - each of variants with a hotkey to reach I, and with
1077             instructions how to reach it later from the keyboard without this UI.
1078            
1079             Also: if a certain timeout passes after pressing the initial HotKey, an instruction
1080             what to do next should appear.
1081            
1082             =back
1083            
1084             =head2 The finer rules of design of “large” keyboard layouts
1085            
1086             Here are the finer points elaborating on the levels of complexity discussed above:
1087            
1088             =over 4
1089            
1090             =item 1
1091            
1092             It looks reasonable to allow "fuzzy mneumonic rules": the rules which specify
1093             several possible variants where to look for the shortcut (up to 3-4 variants).
1094             If/when one forgets the keying of the shortcut, but remembers such a rule,
1095             a short experiment with these positions allows one to reconstruct the lost
1096             memory.
1097            
1098             =item
1099            
1100             The "base keyboards" (those used for continuous typing in a certain language
1101             or script) should be identical to some "standard" widely used keyboards.
1102             These keyboards should differ from each other in position of keys used by the
1103             scripts only; the "punctuation keys" should be in the same position. If a
1104             script B has more letters than a script A, then a lot of
1105             "punctuation" on the layout A will be replaced by letters in the layout B.
1106             This missing punctuation should be made available by pressing a modifier
1107             (C? compare with L's top row).
1108            
1109             =item
1110            
1111             If more than one base keyboard is used, there must be a quick access:
1112             if one needs to enter one letter from layout B when the active layout is A, one
1113             should not be forced to switch to B, type the letter, then switch back
1114             to A. It should better be available I on a prefixed combination "C".
1115            
1116             =item
1117            
1118             One should consider what the C does when the layouts A
1119             and B are identical on a particular key (e.g., punctuation). One can go with the "Occam's
1120             razor" approach and make the C prefix into the do-nothing identity map.
1121             The alternative is make it access some symbols useful both for
1122             script A and script B. It is a judgement call.
1123            
1124             Note that there is a gray area when layouts A and B are not identical,
1125             but a key C produces punctuation in layout A, and a letter in layout
1126             B. Then when in layout B, this punctuation is available on C,
1127             so, in principle, C would duplicate the functionality
1128             of C. Compare with "there is more than one way to do it" below;
1129             remember that OS (or misbehaving applications) may make some keypresses
1130             "unavailable". I feel that in these situations, “having duplication” is
1131             a significant advantage over “having some extra symbols available”.
1132            
1133             =item
1134            
1135             The considerations in two preceding parts are applicable also in the
1136             case when there are more “allied” layouts than A and B. Ways to make it possible
1137             are numerous: one can have several alternative C’s, B one
1138             can use a I prefix key C. With a large enough
1139             collection of layouts, a combination of both approaches may be visualized
1140             as a chain of layout
1141            
1142             S< >… C B C
1143            
1144             here we have two quick access prefix keys, the left one C, and the right one
1145             C. Superscripts C<² ³ …> mean “pressing the prefix key several times”;
1146             the prefix keys move one left/right along the chain of layouts.
1147            
1148             =item
1149            
1150             The three preceding parts were concerned with entering one character from
1151             an “allied” layout. To address another frequent need, entering one word
1152             from an “allied” layout, yet another approach may be needed. The solution may
1153             be to use a certain combination of modifier keys. (How to choose useful
1154             combinations? See: L<"A convenient assignment of KBD* bitmaps to modifier keys">.)
1155            
1156             (Using “exotic” modifier keys may be impossible in some badly coded applications.
1157             This should not stop one from implementing this feature: sometimes one has a choice
1158             from several applications performing the same task. Moreover, since this feature
1159             is a “frill”, there is no pressing need to have it I available.)
1160            
1161             =item
1162            
1163             Paired symbols (such as such as ≤≥, «», ‹›, “”, ‘’ should be put on paired
1164             keyboard's keys: <> or [] or ().
1165            
1166             =item
1167            
1168             "Directional symbols" (such as arrows) should be put either on numeric keypad
1169             or on a 3×3 subgrid on the letter-part of the keyboard (such as QWE/ASD/ZXC).
1170             (Compare with [broken?] implementation in L.)
1171            
1172             =item
1173            
1174             for symbols that are naturally thought of as sitting in a table, one can
1175             create intuitive mapping of quite large tables to the keyboard. Split each
1176             key in halves by a horizontal line, think of C as sitting in the
1177             top half. Then ignoring C<`~> key and most of punctuation on the right
1178             hand side, keyboard becomes an 8×10 grid. Taking into account C
1179             modifier (either as an extra bit, or as splitting a key by a horizontal line),
1180             one can map up to 8×10×2 (or 8×20) table to a keyboard.
1181            
1182             B Think of L.
1183            
1184             =item
1185            
1186             Cheatsheets are useful. And there are people who are ready to dedicate a
1187             piece of their memory to where on a layout is a particularly useful to them
1188             symbol. So even if there is no logical position for a certain symbol, but
1189             there is an empty slot on layout, one should not hesitate in using this slot.
1190            
1191             However, this I distractive to people who do not want to dedicate
1192             their memory to "special cases". So it makes sense to have three kinds of
1193             cheatsheets for layouts: one with special cases ignored (useful for most
1194             people), one with all general cases ignored (useful for checks "is this
1195             symbol available in some place I do not know about" and for memorization),
1196             and one with all the bells and whistles.
1197            
1198             (Currently this module allows emitting HTML keyboard layouts with such
1199             information indicated by classes in markup. The details may be treated
1200             by the CSS rules.)
1201            
1202             =item
1203            
1204             "There is more than one way to do it" is not a defect, it is an asset.
1205             If it is a reasonable expectation to find a symbol X on keypress K', and
1206             the same holds for keypress K'' I they both do not conflict with other
1207             "being intuitive" goals, go with both variants. Same for 3 variants, 4
1208             - now you get my point.
1209            
1210             B The standard Russian phonetic layout has Ё on the C<^>-key; on the
1211             other hand, Ё is a variant of Е; so it makes sense to have Ё available on
1212             C as well. Same for Ъ and Ь.
1213            
1214             =item
1215            
1216             Dead keys which are "abstract" (as opposed to being related to letters
1217             engraved on physical keyboard) should better be put on modified state
1218             of "zombie" keys of the keyboard (C, C, C, C).
1219            
1220             B Making C a prefix key may lead to usability issues
1221             for people used to type CAPITALIZED PHRASES by keeping C pressed
1222             all the time. As a minimum, the symbols accessed via C
1223             should be strikingly different from those produced by C so that
1224             such problems are noted ASAP. Example: on the first sight, producing
1225             C on C or C
1226             looks like a good idea. Do not do this: the visually undistinguishable
1227             C would lead to significantly hard-to-debug problems if
1228             it was unintentional.
1229            
1230             =back
1231            
1232            
1233             =head2 Explanation of keyboard layout terms used in the docs
1234            
1235             The aim of this module is to make keyboard layout design as simple as
1236             possible. It turns out that even very elaborate designs can be made
1237             quickly and the process is not very error-prone. It looks like certain
1238             venues not tried before are now made possible; at least I'm not aware of
1239             other attempts in this direction. One can make layouts which can be
1240             "explained" very concisely, while they contain thousand(s) of accessible
1241             letters.
1242            
1243             Unfortunately, being on unchartered territories, in my explanations I'm
1244             forced to use home-grown terms. So be patient with me... The terms are
1245             I, I, I and I. (One may want compare them
1246             with what ISO 9995 does: L…. On
1247             the other hand, most parts of ISO 9995 look as remote from being ergonomic
1248             [in the sense discussed in these sections] as one may imagine!)
1249            
1250             In what follows,
1251             the words I and I are used interchangeably. A I
1252             means a physical key on a keyboard tapped (possibly together with
1253             one of modifiers C, C - or, rarely, L<[right] C|http://www.microsoft.com/resources/msdn/goglobal/keyboards/kbdcan.html>;
1254             more advanced layouts may use “extra” modifiers). The key C
1255             is often marked as such on the keycap, otherwise it is just the "right" C key; at least
1256             on Windows, for many simple layouts it can be replaced by C. What is a I?
1257             Tapping such a key does not produce any letter, but modifies what the next
1258             keypress would do (sometimes it is called a I; in C terms,
1259             it is probably a I. Sometimes, prefix keys may be “chained”; then
1260             insertion of a character happens not on the second keypress, but on the third one [or fourth/etc]).
1261            
1262             To describe which character (or a prefix) is produced by a keypress one must describe
1263             I: which prefix keys were already tapped, and which modifier keys are
1264             currently pressed. It is natural to consider the C modifier specially: let’s
1265             remove it from the context; now given a context, a keypress may produce two characters:
1266             one with C, one without. A I describe such a pair of characters (or
1267             prefixes) for every key of the keyboard.
1268            
1269             So, the plain I is the part of keyboard layout accessible by using only
1270             non-prefix keys (possibly in combination with C). Many keyboard layouts
1271             have up to 2 additional layers accessible without prefix keys: the C-layer and C-layer.
1272            
1273             On the simplest layouts, such as "US" or "Russian", there is no prefix keys or “extra”
1274             modifier keys -
1275             but this is only feasible for languages which use very few characters with
1276             diacritic marks. However, note that most layouts do not use
1277             C-layer - sometimes it is claimed that this causes problems with
1278             system/application interaction.
1279            
1280             A I consists of the layers of the layout accessible with a particular
1281             combination of prefix keys. The I consists of the plain layer
1282             and “additional prefix-less layers” of the layout;
1283             it is the part of layout accessible without switching "sticky state" and
1284             without using prefix keys. There may be up to 3 layers (Plain, C, C)
1285             per face on the standard Windows keyboard layouts. A I is a face exposed after pressing
1286             a prefix key (or a chain of prefix keys).
1287            
1288             A I is a collection of faces: the primary face, plus one face per
1289             a defined prefix-key (or a prefix chain). Finally, a I is a collection of personalities
1290             (switchable by sticky keys [like C] and/or in other system-specific ways)
1291             designed to work smoothly together. For example, in multi-script settings, there may be:
1292            
1293             =over 4
1294            
1295             =item *
1296            
1297             one personality per script (e.g., Latin/Greek/Cyrillic/Arabic);
1298            
1299             =item *
1300            
1301             every personality may have several script-specific additional (“satellite”) faces (one per a particular diacritic for Latin
1302             personality, one for regional/historic “flavors” for Cyrillic personality, one per aspiration type for Greek personality, etc);
1303            
1304             =item *
1305            
1306             every personality may also have “liason” faces accessing the base faces of other personalities;
1307            
1308             =item *
1309            
1310             with chained prefixes, it is easy to design intuitive ways to access satellite faces of other personalities;
1311             then every personality will also contain the satellite faces of I personalities (on different prefix chains!).
1312            
1313             =item *
1314            
1315             For access to “technical symbols” (currencies/math/IPA etc), the personalities may share a certain collection
1316             of faces assigned to the same prefix keys.
1317            
1318             =back
1319            
1320             =head2 Example of keyboard layout groups
1321            
1322             Start with a I elaborate example (it is more or less a simplified variant
1323             of the L layout|http://k.ilyaz.org>. A keyboard layout group may consist of
1324             phonetically matched Latin and Cyrillic personalities, and visually matched Greek
1325             and Math personalities. Several prefix-keys may be shared by all 4 of these
1326             personalities; in addition, there would be 4 prefix-keys allowing access to primary
1327             faces of these 4 personalities from other personalities of the group. Also, there
1328             may be specialised prefix keys tuned for particular need of entering Latin script,
1329             Cyrillic script, Greek script, and Math.
1330            
1331             Suppose that there are 8 specialized-for-Latin prefix-keys (for example, name them
1332            
1333             grave/tilde/hat/breve/ring_above/macron/acute/diaeresis
1334            
1335             although in practice each one of them may do more than the name suggests).
1336             Then the Latin personality will have the following 13 faces:
1337            
1338             Primary/Latin-Primary/Cyrillic-Primary/Greek-Primary/Math-Primary
1339             grave/tilde/hat/breve/ring_above/macron/acute/diaeresis
1340            
1341             B Here Latin-Primary is the face one gets when one presses
1342             the Access-Latin prefix-key when in Latin mode; it may be convenient to define
1343             it to be the same as Primary - or maybe not. For example, if one defines it
1344             to be Greek-Primary, then this prefix-key has a convenient semantic of flipping
1345             between Latin and Greek modes for the next typed character: when in
1346             Latin, C would enter α, when in Greek, the same keypresses
1347             [now meaning "Latin-PREFIX-KEY α"] would enter "a".
1348            
1349             Assume that the only “extra” modifier used by the layout is C. Then each of
1350             these faces would consists of two layers: the plain one, and the C-
1351             one. For example, pressing C with a key on Greek face could add
1352             diaeresis to a vowel, or use a modified ("final" or "symbol") "glyph" for
1353             a consonant (as in σ/ς θ/ϑ). Or, on Latin face, C may produce æ. Or, on a
1354             Cyrillic personality, AltGr-я (ya) may produce ѣ (yat').
1355            
1356             Likewise, the Greek personality may define special prefix-keys to access polytonic
1357             greek vowels. “Chaining” these prefix keys after the C prefix
1358             key would make it possible to enter polytonic Greek letters from non-Greek
1359             personalities without switching to the Greek personality.
1360            
1361             With such a keyboard layout group, to type one Greek word in a Cyrillic text one
1362             would switch to the Greek personality, then back to Cyrillic; but when all one
1363             need to type now is only one Greek letter, it may be easier to use the
1364             "Greek-PREFIX-KEY letter" combination, and save switching back to the
1365             Cyrillic personality. (Of course, for this to work the letter should be
1366             on the primary face of the Greek personality.)
1367            
1368             How to make it possible to easily enter a short Greek word when in Cyrillic mode?
1369             If one uses one more “extra” modifier key (say, C), one could
1370             reserve combinations of modifiers with this key to “use” other personality. Say,
1371             C would enter Greek β, C would enter
1372             Cyrillic б, etc.
1373            
1374             =head2 “Onion rings” approach to keyboard layout groups
1375            
1376             Looks too complicated? Try to think about it in a different way: there
1377             are many faces in a keyboard layout group; break them into 3 "onion rings":
1378            
1379             =over 4
1380            
1381             =item I faces
1382            
1383             one can "switch to a such a face" and type continuously using
1384             this face without pressing prefix keys. In other words, these faces
1385             can be made "active" (in an OS-dependent way).
1386            
1387             When one CORE face is active, the letters in another CORE face are still
1388             accessible by pressing one particular prefix key before each of these
1389             letters. This prefix key does not depend on which core face is
1390             currently "active".
1391            
1392             =item I faces
1393            
1394             one cannot "switch to them", however, letters
1395             in these faces are accessible by pressing one particular prefix key
1396             before this letter. This prefix key does not depend on which
1397             core face is currently "active".
1398            
1399             =item I faces
1400            
1401             one cannot "switch to them", and letters in these faces
1402             are accessible from one particular core face only. One must press a
1403             prefix key before every letter in such faces.
1404            
1405             (In presence of “chained prefixes”, the description is less direct:
1406             these faces are much easier to access from one particular CORE face.
1407             From another CORE face, one must preceed this prefix key by the
1408             access-that-CORE-face prefix.)
1409            
1410             =back
1411            
1412             For example, when entering a mix of Latin/Cyrillic scripts and math,
1413             it makes sense to make the base-Latin and base-Cyrillic faces into
1414             the core; it is convenient when (several) Math faces and a Greek face
1415             can be made universally accessible. On the other hand, faces containing
1416             diacritized Latin letters and diacritized Cyrillic letters should better
1417             be made satellite; this avoids a proliferation of prefix keys which would
1418             make typing slower.
1419            
1420             Comparing to the terms of the preceding section, the CORE faces correspond
1421             to personalities. A personality I the base face from other personalities;
1422             it may also import satellite faces from other personalities.
1423            
1424             In a personality, one should make access to satellite faces, the imported
1425             CORE faces, and the universally accessible faces as simple as possible.
1426             If “other” satellite faces are imported, the access to them may be more
1427             cumbersome.
1428            
1429             =head2 Large Latin layouts: on access to diacritic marks
1430            
1431             Every prefix key has a numeric I. On Windows, there are situations
1432             when this numeric ID may be visible to the user. (This module makes every
1433             effort to make this happen as rarely as possible. However, this effort
1434             blows up the size of the layout DLL, and at some moment one may hit the
1435             L.
1436             To reduce the size of the DLL, the module makes a triage, and won’t protect the ID from leaking in some rare cases.)
1437             When such a leak happens, what the user sees is the character with this codepoint.
1438             So it makes sense to choose the ID to be the codepoint of a character “related
1439             to what the prefix key ‘does’”.
1440            
1441             The logic: if the prefix keys add some diacritic, the ID should be the
1442             I related to this diacritic: either
1443             C’s 8-bit characters with high bit set, or
1444             if none with the needed glyph, suitable non-Latin-1 "spacing modifier letters" or
1445             "spacing clones of diacritics".
1446            
1447             If followed by “special keys”, one should be able to access other related
1448             modifier letters and combining characters (see L<"Classification of diacritics">
1449             and the section C in L
1450             layout|http://search.cpan.org/~ilyaz/UI-KeyboardLayout/examples/izKeys.kbdd>);
1451             one possible convenient choice is:
1452            
1453             =over 4
1454            
1455             =item The second press of the prefix key
1456            
1457             The principal combining mark;
1458            
1459             =item SPACE
1460            
1461             The primary non-ASCII spacing modifier letter;
1462            
1463             =item SPACE-related (NBSP, or C, or C)
1464            
1465             The secondary/ternary/etc modifier letter;
1466            
1467             =item digits (possibly with C and/or C)
1468            
1469             related combining marks (with C and/or C, other categories
1470             from L<"Classification of diacritics">).
1471            
1472             =item C<'> or C<"> (possibly with C)
1473            
1474             secondary/ternary/etc combining marks (or, if these are on
1475             digits, replace by prime-shape modifier chars).
1476            
1477             =back
1478            
1479             =head2 The choice of prefix keys
1480            
1481             Some stats on prefix keys: C uses 41 prefix keys for diacritics (but 15 are fake, see below!);
1482             L uses 24|http://www.macfreek.nl/memory/Mac_Keyboard_Layout> (not counting prefix №, action=specials
1483             on L:
1484            
1485             "'@2#3%5^67*8AaCcEeGghHjJ KkMmNnQqRrsUuvwWYyZz‘’“ default=terminator
1486             №ʺʹƧƨƐɛƼƽƄƅ⁊ȢȣƏəƆɔƎǝƔɣƕǶƞȠ K’ĸƜɯŊŋƢƣƦʀſƱʊʌƿǷȜȝƷʒʻʼʽ №
1487            
1488             ); bépo uses 20, while EurKey uses 8, and L uses 5|http://www.macfreek.nl/memory/Mac_Keyboard_Layout>.
1489             On the other end of spectrum, there are 10 US keyboard keys with "calculatable" relation to Latin diacritics:
1490            
1491             `~^-'",./? --- grave/tilde/hat/macron/acute/diaeresis/cedilla/dot/stroke/hook-above
1492            
1493             To this list one may add a "calculatable" key C<$> as I;
1494             on the other hand, one should probably remove C since C should better
1495             be "set in stone" to denote C<¿>. If one adds Greek, then the calculatable positions
1496             for aspiration are on C<[ ]> (or on C<( )>). Of widely used Latin diacritics, this
1497             leaves out I (and doubled I);
1498             these diacretics should be either “mixed in” with similar "calculatable" diacritics
1499             (for example, may either create a character with cedilla, or with
1500             ogonek — depending on the character), or should be assigned on less intuitive positions.
1501            
1502             Extra prefix keys of L|http://www.pentzlin.com/info2-9995-3-V3.pdf>:
1503             I.
1504             Additionally, the following diacritics produce only 4 precomposed characters: ṲṳḀḁ, so their use as prefix characters is questionable:
1505             I
1506             (Here ↓ is a shortcut for C, same with ↑ for C, and ↗ for C; ↺ means C, and ² means C.
1507             Combined arrows expand to multiple diacritics.)
1508            
1509             (Keep in mind that this list is just a conjecture; the standard does not distinguish combining characters
1510             and prefix keys, so it is not clear which keypresses produce combining characters, and which are prefix keys.)
1511            
1512             =head2 What follows is partially deprecated
1513            
1514             Parts of following subsections is better explained in
1515             L;
1516             some other parts duplicate
1517            
1518             =head2 On principles of intuitive design of Latin keyboard
1519            
1520             Using tricks described below, it is easy to create a convenient map of vowels
1521             with 3 diacritics `¨´ to the QWERTY keyboad. However, some common
1522             (meaning: from Latin-1–10 of ISO 8859) letters from Latin alphabet
1523             cannot be composed this way; they are B<ÆÐÞÇIJØŒß>
1524             (one may need to add B<ªº>, as well as B<¡¿> for non-alphabetical symbols). It is crucial
1525             that these letters may be entered by an intuitively clear key of the keyboard.
1526             There is an obvious ASCII letter associated to each of these (e.g., B associated to the thorn
1527             B<Þ>), and in the best world just pressing this letter with C-modifier
1528             would produce the desired symbol.
1529            
1530             Note that ª may be associated to @; then º may be mapped to the nearby 2.
1531            
1532             There is only one conflict: both B<Ø>,B<Œ> "want" to be entered as C;
1533             this is the ONLY piece of arbitrariness in the design so far. After
1534             resolving this conflict, C-keys B are assigned their meanings,
1535             and cannot carry other letters (call them the “stuck in stone keys”).
1536            
1537             (Other keys "stuck in stone" are dead keys: it is important to have the
1538             glyph etched on these keyboard's keys similar to the task they perform.)
1539            
1540             Then there are several non-alphabetical symbols accessible through ISO 8859
1541             encodings. Assigning them C- access is another important task to perform.
1542             Some of these symbols come in pairs, such as ≤≥, «», ‹›, “”, ‘’; it makes
1543             sense to assign them to paired keyboard's keys: <> or [] or ().
1544            
1545             However, this task is in conflict of interests with yet another (!) task, so
1546             let us explain the needs answered by that task first.
1547            
1548             One can always enter accented letters using dead keys; but many people desire a
1549             quickier way to access them, by just pressing AltGr-key (possibly with
1550             shift). The most primitive keyboard designs (such as IBM International
1551             or Apple’s US (Extended)
1552            
1553             http://www.borgendale.com/uls.htm
1554             http://www.macfreek.nl/memory/Mac_Keyboard_Layout
1555            
1556             ) omit this step and assign only the NECESSARY letters for AltGr- access.
1557             (Others, like MicroSoft International, assign only a very small set.)
1558            
1559             This problem breaks into two tasks, choosing a repertoir of letters which
1560             will be typable this way, and map them to the keys of the keyboard.
1561             For example, EurKey choses to use ´¨`-accented characters B (except
1562             for B<Ỳ>), plus B<ÅÑ>; MicroSoft International does C<ÄÅÉÚÍÓÖÁÑß> only (and IBM
1563             International does
1564             none); Bepo does only B<ÉÈÀÙŸ> (but also has the Azeri B<Ə> available - which is
1565             not in ISO 8819 - and has B<Ê> on the 105th key "C<2nd \|>"),
1566             L has none
1567             (at least if one does not count uc characters without lc counterparts), same for L
1568            
1569             http://bepo.fr/wiki/Manuel
1570             http://bepo.fr/wiki/Utilisateur:Masaru # old version of .klc
1571             http://www.jlg-utilities.com/download/us_jlg.klc
1572             http://tlt.its.psu.edu/suggestions/international/accents/codemacext.html
1573             or look for "a graphic of the special characters" on
1574             http://web.archive.org/web/20080717203026/http://homepage.mac.com/thgewecke/mlingos9.html
1575            
1576             =head2 Our solution
1577            
1578             First, the answer (the alternative, illustrated description is on
1579             L):
1580            
1581             =over 10
1582            
1583             =item Rule 0:
1584            
1585             non-ASCII letters which are not accented by B<` ´ ¨ ˜ ˆ ˇ ° ¯ ⁄> are entered by
1586             C-keys "obviously associated" to them. Supported: B<ÆÐÞÇIJŒß>.
1587            
1588             =item Rule 0a:
1589            
1590             Same is applicable to B<Ê> and B<Ñ>.
1591            
1592             =item Rule 1:
1593            
1594             Vowels B accented by B<¨´`> are assigned the so called I<"natural position">:
1595             3 “alphabetic” rows of keyboard are allocated to accents (B<¨> is the top, B<´> is the middle, B<`> is
1596             the bottom row of 3 alphabetic-rows on keyboard - so B<À> is on B-row),
1597             and are on the same diagonal as the base letter. For left-hand
1598             vowels (B,B) the diagonal is in the direction of \, for right hand
1599             voweles (B,B,B,B) - in the direction of /.
1600            
1601             =item Rule 1a:
1602            
1603             If the "natural position" is occupied, the neighbor key in the
1604             direction of "the other diagonal" is chosen. (So for B,B it is
1605             the /-diagonal, and for right-hand vowels B it is the \-diag.)
1606            
1607             =item Rule 1b:
1608            
1609             This neighbor key is below unless the key is on bottom row - then it is above.
1610            
1611             Supported by rules "1": all but B<ÏËỲ>.
1612            
1613             =item Rule 2:
1614            
1615             Additionally, B<Å>,B<Ø>,B<Ì> are available on keys B,B

,B.

1616             B<ª> is on B<@>, and B<º> is on the nearby B<2>.
1617            
1618             =back
1619            
1620             =head2 Clarification:
1621            
1622             B<0.> If you remember only Rule 0, you still can enter all Latin-1 letter using
1623             Rule 0; all you need to remember that most of the dead keys are at “obvious”
1624             positions: for L|http://k.ilyaz.org> it is B<`';"~^.,-/> for B<`´¨¨˜ˆ°¸¯ ̸>
1625             (B<¨> is repeated on B<;">!) and B<6> for B<ˇ> (memorizable as “opposite” of B<^> for B<ˆ>).
1626            
1627             (What the rule 0 actually says is: "You do not need to memorize me". ;-)
1628            
1629             (If you need a diacritic which is only I to one of the listed diacritics,
1630             there is a good chance that the dead key above L.)
1631            
1632             B<1.> If all you remember are rules 1,1a, you can calculate the position of the
1633             AltGr-key for AEYUIO accented by `´¨ up to a choice of 3 keys (the "natural
1634             key" and its 2 neighbors) - which are quick to try all if you forgot the
1635             precise position. If you remember rules 1,1ab, then this choice is down to
1636             2 possible candidates.
1637            
1638             Essentially, all you must remember in details is that the "natural positions"
1639             form a B — \ on left, / on right, and in case of bad luck you
1640             should move in the direction of other diagonal one step. Then a letter is
1641             either in its "obvious position", or in one of 3 modifications of the
1642             “natural position”.
1643            
1644             Note that these rules cover I the Latin letters appearing in
1645             Latin-1..Latin-10, I we resolve the B<Œ/Ø>-conflict by putting B<Œ> to the key B (since
1646             B<Ø> may be entered using CB)!
1647            
1648             =head2 Motivations:
1649            
1650             It is important to have a logical way to quickly understand whether a letter
1651             is quickly accessible from a keyboard, and on which key. (Or, maybe, to find
1652             a small set of keys on which a letter may be present — then, if one forgets,
1653             it is possible to quickly un-forget by trying a small number of keys).
1654            
1655             In fact, the problem of choosing “the optimal” assignment (by minimizing the
1656             rules to remember) has almost unique solution. Understanding this solution
1657             (to a problem which is essentially combinatorial optimization) may be a great help
1658             in memorizing the rules.
1659            
1660             The idea: we assign alphabetical Latin characters only to alphabetical keys
1661             on the keyboard; this frees the way to use (paired) symbol keys to enter (paired)
1662             Unicode symbols. Now observe the diagonals on the alphabetic part of the
1663             keyboard: \-diagonals (like B) and /-diagonals (like B). Each diagonal
1664             contains 3 (or less) alphabetic keys; what we want is to assign ¨-accent to the top
1665             one, ´-accent to the middle one, and `-accent to the bottom one.
1666            
1667             On the left-hand part of the keyboard, use \-diagonals, on the right-hand
1668             part use /-diagonals; now each diagonal contains EXACTLY 3 alphabetic keys.
1669             Moreover, the diagonals which contain vowels B do not intersect!
1670            
1671             If we have not decided to have keys set in stone, this would be all - we
1672             would get "completely predictable" access to B<´¨`>-accented characters B.
1673             For example, B<Ÿ> would be accessible on CB, B<Ý> on CB, B<Ỳ> on CB.
1674             Unfortunately, the diagonals contain keys C set in stone. So we need
1675             a way to "move away" from these keys. The rule is very simple: we move
1676             one step away in the direction of "other" diagonal (/-diagonal on the left
1677             half, and \-diagonal on the right half) one step down (unless we start
1678             on keys B, B where "down" is impossible and we move up to B or B).
1679            
1680             Examples: B<Ä> is on B, B<Á> "wants to be" on B (used for C<Æ>), so it is moved to
1681             C; B<Ö> wants to be on B (already used for B<Ø> or B<Œ>), and is moved away to B;
1682             B<È> wants to be on B (occupied by B<Ç>), but is moved away to B.
1683            
1684             There is no way to enter B<Ï> using this layout (unless we agree to move it
1685             to the "8*" key, which may conflict with convenience of entering typographic
1686             quotation marks). Fortunately, this letter is rare (comparing even to B<Ë>
1687             which is quite frequent in Dutch). So there is no big deal that it is not
1688             available for "handy" input - remember that one can always use deadkeys.
1689            
1690             http://en.wikipedia.org/wiki/Letter_frequency#Relative_frequencies_of_letters_in_other_languages
1691            
1692             Note that the keys B

and B are not engaged by this layout; since B

1693             is a neighbor of B, it is natural to use it to resolve the conflict
1694             between B<Ø> or B<Œ> (which both want to be set in stone on B). This leaves
1695             only the key B unengaged; but what we do not cover are two keys B<Å> and B<Ñ>
1696             which are relatively frequent in Latin-derived European languages.
1697            
1698             Note that B<Ì> is moderately frequent in Italian, but B<Ñ> is much more frequent
1699             in Spanish. Since B<Ì> and B<Ñ> want to be on the same key (which on many keyboards is taken by
1700             B<Ñ>), it makes sense to prefer B<Ñ>… Likewise, B<Ê> is much more frequent
1701             than B<Ë>; switch them.
1702            
1703             This leaves only the key B unassigned, I a very rare B<Ỳ> on B. In
1704             L|http://k.ilyaz.org>, one puts B<Å> and B<Ì> there. This completes
1705             the explanation of the rule 2.
1706            
1707             =head2 On possibilities of merging 2 diacritics on one prefix key
1708            
1709             With many diacritics, and the limited mnemonically-viable positions on
1710             the keyboard, it makes sense to merge several diacritics on the same prefix key.
1711             Possible candidates are cedilla/ogonek/comma-below (on C),
1712             dot-above/ring-above/dot-below (on C), caron/breve, circumflex/inverted-breve (on C
1713             In some cases, only one of the diacretics would be applicable to a particular character.
1714             Otherwise, one must decide which of several choices to prefer. The notes below may be
1715             useful when designing such preferences. (This module can take most of such choices
1716             automatically due to knowledge of L
1717             of characters; this age correlates well with expected frequency of use.)
1718            
1719             Another trick discussed below is implementing a rare diacritic X by applying the diacretic Y to a character
1720             with pre-composed diacritic Z.
1721            
1722             U-caron: ǔ, Ǔ which is used to indicate u in the third tone of Chinese language pinyin.
1723             But U-breve ŭ/Ŭ is used in Latin encodings.
1724             Ǧ/ǧ (G with caron) is used, but only in "exotic" or old languages (has no
1725             combined form - while G-breve ğ/Ğ is in Latin encodings.
1726             A-breve Ă: A-caron Ǎ is not in Latin-N; apparently, is used only in pinyin,
1727             zarma, Hokkien, vietnamese, IPA, transliteration of Old Latin, Bible and Cyrillic's big yus.
1728            
1729             In EurKey: only a takes breve, the rest take caron (including G but not U)
1730            
1731             Merging ° and dot-accent ˙ in Latin-N: only A and U take °, and they
1732             do not take dot-accent. In EurKey: also small w,y take ring accent; same in
1733             Bepo - but they do not take dot accent in Latin-N.
1734            
1735             Double-´ and cornu (both on a,u only) can be taken by ¨ or ˙ on letters with
1736             ¨ precombined (in Unicode ¨ is not precombined with diaeresis or dots).
1737             But one must special-case Ë and Ï and Ø (have Ê and IJ instead; IJ takes no accents,
1738             but Ê takes acute, grave, tilde and dot below...)! Æ takes acute and macron; Ø takes acute.
1739            
1740             Actually, cornu=horn is only on o,u, so using dot/ring on ö and ü is very viable...
1741            
1742             So for using AltGr-letter after deadkeys: diaeresis can take dot above, hat and wedge, diaeresis.
1743             Likewise, ` and ´ are not precombined together (but there is a combined
1744             combining mark). So one can do something else on vowels (ogonek?).
1745            
1746             Applying ´ to `-accented forms: we do not have ỳ (on AltGr-keys), so must use "the natural position"
1747             which is mixed with Ñ (takes no accents) and Ç (takes acute!!!).
1748            
1749             s, t do not precombine with `; so can use for the "alternative cedilla".
1750            
1751             Only a/u/w/y take ring, and they do not take cedilla. Can merge.
1752            
1753             Bepo's hook above; ảɓƈɗẻểƒɠɦỉƙɱỏƥʠʂɚƭủʋⱳƴỷȥ ẢƁƇƊẺỂƑƓỈƘⱮỎƤƬỦƲⱲƳỶȤ
1754            
1755             perl -wlnae "next unless /HOOK/; push @F, shift @F; print qq(@F)" NamesList.txt | sort | less
1756            
1757             Of capital letters only T and Y take different kinds of hooks... (And for T both are in Latin-Extended-B...)
1758            
1759            
1760             =head1 Useful tidbits from Unicode mailing list
1761            
1762             =for html
1763            
1764            
1765             =head2 On keyboards
1766            
1767             On MS keyboard (absolutely wrong!)
1768            
1769             http://unicode.org/mail-arch/unicode-ml/y2012-m05/0268.html
1770            
1771             Symbols for Keyboard keys:
1772            
1773             http://unicode.org/mail-arch/unicode-ml/Archives-Old/UML009/0204.html
1774             “Menu key” variations:
1775             http://unicode.org/mail-arch/unicode-ml/Archives-Old/UML009/0239.html
1776             Role of ISO/IEC 9995, switchable keycaps
1777             http://unicode.org/mail-arch/unicode-ml/Archives-Old/UML009/0576.html
1778            
1779             On the other hand, having access to text only math symbols makes it possible to implement it in computer languages, making source code easier to read.
1780            
1781             Right now, I feel there is a lack of keyboard maps. You can develop them on your own, but that is very time consuming.
1782            
1783             http://unicode.org/mail-arch/unicode-ml/y2011-m04/0117.html
1784            
1785             Fallback in “smart keyboards” interacting with Text-Service unaware applications
1786            
1787             http://unicode.org/mail-arch/unicode-ml/y2014-m03/0165.html
1788            
1789             Keyboards - agreement (5 scripts at end)
1790            
1791             ftp://ftp.cen.eu/CEN/Sectors/List/ICT/CWAs/CWA-16108-2010-MEEK.pdf
1792            
1793             Need for a keyboard, keyman examples; why "standard" keyboards are doomed
1794            
1795             http://unicode.org/mail-arch/unicode-ml/y2010-m01/0015.html
1796             http://unicode.org/mail-arch/unicode-ml/y2010-m01/0022.html
1797             http://unicode.org/mail-arch/unicode-ml/y2010-m01/0036.html
1798             http://unicode.org/mail-arch/unicode-ml/y2010-m01/0053.html
1799            
1800             =head2 History of Unicode
1801            
1802             Unicode in 1889
1803            
1804             http://www.archive.org/stream/unicodeuniversa00unkngoog#page/n3/mode/2up
1805            
1806             Structure of development of Unicode
1807            
1808             http://unicode.org/mail-arch/unicode-ml/y2006-m07/0056.html
1809             http://unicode.org/mail-arch/unicode-ml/y2005-m07/0099.html
1810             I don't have a problem with Unicode. It is what it is; it cannot
1811             possibly be all things to all people:
1812             http://unicode.org/mail-arch/unicode-ml/y2005-m07/0101.html
1813            
1814             Control characters’ names
1815            
1816             http://unicode.org/mail-arch/unicode-ml/y2014-m03/0036.html
1817            
1818             Compromizes vs reality
1819            
1820             http://unicode.org/mail-arch/unicode-ml/y2010-m02/0106.html
1821             http://unicode.org/mail-arch/unicode-ml/y2010-m02/0117.html
1822            
1823             Stability of normalization
1824            
1825             http://unicode.org/mail-arch/unicode-ml/y2005-m07/0055.html
1826            
1827             Universality vs affordability
1828            
1829             http://unicode.org/mail-arch/unicode-ml/y2007-m07/0157.html
1830            
1831             Drachma
1832            
1833             http://unicode.org/mail-arch/unicode-ml/y2012-m05/0167.html
1834             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3866.pdf
1835            
1836             w-ring is a stowaway
1837            
1838             http://unicode.org/mail-arch/unicode-ml/y2012-m04/0043.html
1839            
1840             History of squared pH (and about what fits into ideographic square)
1841            
1842             http://unicode.org/mail-arch/unicode-ml/y2012-m02/0123.html
1843             http://unicode.org/mail-arch/unicode-ml/y2013-m09/0111.html
1844            
1845             Silly quotation marks: 201b, 201f
1846            
1847             http://en.wikipedia.org/wiki/Quotation_mark_glyphs
1848             http://unicode.org/mail-arch/unicode-ml/y2006-m06/0300.html
1849             http://unicode.org/mail-arch/unicode-ml/y2006-m06/0317.html
1850             http://en.wikipedia.org/wiki/Comma
1851             http://en.wikipedia.org/wiki/%CA%BBOkina
1852             http://en.wikipedia.org/wiki/Saltillo_%28linguistics%29
1853             http://unicode.org/mail-arch/unicode-ml/y2006-m06/0367.html
1854             http://unicode.org/unicode/reports/tr8/
1855             under "4.6 Apostrophe Semantics Errata"
1856            
1857             OHM: In modern usage, for new documents, this character should not be used
1858            
1859             http://unicode.org/mail-arch/unicode-ml/y2011-m08/0060.html
1860            
1861             Uppercase eszett ß ẞ
1862            
1863             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0007.html
1864             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0008.html
1865             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0142.html
1866             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0045.html
1867             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0147.html
1868             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0170.html
1869             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0196.html
1870            
1871             Should not use (roman numerals)
1872            
1873             http://unicode.org/mail-arch/unicode-ml/y2007-m11/0253.html
1874            
1875             Colors in Unicode names
1876            
1877             http://unicode.org/mail-arch/unicode-ml/y2011-m03/0100.html
1878            
1879             Xerox and interrobang
1880            
1881             http://unicode.org/mail-arch/unicode-ml/y2005-m04/0035.html
1882            
1883             Tibetian (history of encoding, relative difficulty of handling comparing to cousins)
1884            
1885             http://unicode.org/mail-arch/unicode-ml/y2013-m04/0036.html
1886             http://unicode.org/mail-arch/unicode-ml/y2013-m04/0040.html
1887            
1888             Translation of 8859 to 10646 for Latvian was MECHANICAL
1889            
1890             http://unicode.org/mail-arch/unicode-ml/y2013-m06/0057.html
1891            
1892             Hyphens:
1893            
1894             http://unicode.org/mail-arch/unicode-ml/y2009-m10/0038.html
1895            
1896             NOT and BROKEN BAR
1897            
1898             http://unicode.org/mail-arch/unicode-ml/y2007-m12/0207.html
1899             http://www.cs.tut.fi/~jkorpela/latin1/ascii-hist.html#5C
1900            
1901             Combining power of generative features - implementor's view
1902            
1903             http://unicode.org/mail-arch/unicode-ml/y2004-m09/0145.html
1904            
1905             =head2 Greek and about
1906            
1907             OXIA vs TONOS
1908            
1909             http://www.tlg.uci.edu/~opoudjis/unicode/unicode_gkbkgd.html#oxia
1910            
1911             Greek letters for non-Greek
1912            
1913             http://stephanus.tlg.uci.edu/~opoudjis/unicode/unicode_interloping.html#ipa
1914            
1915             Macron and breve in Greek dictionaries
1916            
1917             http://www.unicode.org/mail-arch/unicode-ml/y2013-m08/0011.html
1918            
1919             LAMBDA vs LAMDA
1920            
1921             http://unicode.org/mail-arch/unicode-ml/y2010-m06/0063.html
1922            
1923             COMBINING GREEK YPOGEGRAMMENI equilibristic (depends on a vowel?)
1924            
1925             http://unicode.org/mail-arch/unicode-ml/y2006-m06/0299.html
1926             http://unicode.org/mail-arch/unicode-ml/y2006-m06/0308.html
1927             http://www.tlg.uci.edu/~opoudjis/unicode/unicode_adscript.html
1928             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0046.html
1929            
1930             =head2 Latin, Cyrillic, Hebrew, etc
1931            
1932             Book Spine reading direction
1933            
1934             http://www.artlebedev.com/mandership/122/
1935            
1936             What is a "Latin" char
1937            
1938             http://unicode.org/forum/viewtopic.php?f=23&t=102
1939            
1940             Federal vs regional aspects of Latinization (a lot of flak; cp1251)
1941            
1942             http://peoples.org.ru/stenogramma.html
1943            
1944             Yiddish digraphs
1945            
1946             http://unicode.org/mail-arch/unicode-ml/y2011-m10/0121.html
1947            
1948             Cyrillic Script, Unicode status (+combining)
1949            
1950             http://scriptsource.org/cms/scripts/page.php?item_id=entry_detail&uid=ngc339csy8
1951             http://scriptsource.org/cms/scripts/page.php?item_id=entry_detail&uid=ktxptbccph
1952            
1953             The IBM 1401 Hebrew Letter Key
1954            
1955             http://www.qsm.co.il/Hebrew/HebKey.htm
1956            
1957             GOST 10859
1958            
1959             http://unicode.org/mail-arch/unicode-ml/y2009-m09/0082.html
1960             http://www.mailcom.com/besm6/ACPU-128.jpg
1961            
1962             Hebrew char input
1963            
1964             http://rishida.net/scripts/pickers/hebrew/
1965             http://rishida.net/scripts/uniview/#title
1966            
1967             Cyrillic soup
1968            
1969             http://czyborra.com/charsets/cyrillic.html
1970            
1971             How to encode Latin-in-fraktur
1972            
1973             http://unicode.org/mail-arch/unicode-ml/y2007-m01/0279.html
1974             http://unicode.org/mail-arch/unicode-ml/y2007-m01/0263.html
1975            
1976             The presentation of the existing COMBINING CEDILLA which has three major forms [ȘșȚț and Latvian Ģģ]
1977            
1978             http://unicode.org/mail-arch/unicode-ml/y2013-m06/0045.html
1979             http://unicode.org/mail-arch/unicode-ml/y2013-m06/0066.html
1980            
1981             =head2 Math and technical texts
1982            
1983             Missing: .... skew-orthogonal complement
1984            
1985             Math Almost-Text encoding
1986            
1987             http://unicode.org/notes/tn28/UTN28-PlainTextMath-v3.pdf
1988             http://unicode.org/mail-arch/unicode-ml/y2011-m10/0018.html
1989             For me 1/2/3/4 means unambiguously ((1/2)/3)/4, i.e. 1/(2*3*4)
1990            
1991             Unicode mostly encodes characters that are in use or have been
1992             encoded in other standards. While not semantically agnostic, it is
1993             much less oriented towards semantic clarifications and
1994             distinctions than many people might hope for (and this includes
1995             me, some of the time at least).
1996            
1997             Horizontal/vertical line/arrow extensions
1998            
1999             http://unicode.org/charts/PDF/U2300.pdf
2000             http://unicode.org/mail-arch/unicode-ml/y2003-m07/0513.html
2001             http://std.dkuug.dk/JTC1/SC2/WG2/docs/n2508.htm
2002            
2003             Pretty-printing text math
2004            
2005             http://code.google.com/p/sympy/wiki/PrettyPrinting
2006            
2007             Sub/Super on a terminal
2008            
2009             http://unicode.org/mail-arch/unicode-ml/y2008-m07/0028.html
2010            
2011             CR symbols
2012            
2013             http://unicode.org/mail-arch/unicode-ml/y2006-m07/0163.html
2014            
2015             Math layout
2016            
2017             http://unicode.org/mail-arch/unicode-ml/y2007-m01/0303.html
2018            
2019             Attempts of classification
2020            
2021             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n4384.pdf
2022             http://std.dkuug.dk/JTC1/SC2/WG2/
2023            
2024             Buttons Target Also=not-in-series-of-n4384
2025             square 1🞌 2⬝ 3🞍 4▪ 5◾ 6◼ 7■ s⬛ (solid=s⬛)
2026             box 1□ 2🞎 3🞏 4🞐 5🞑 6🞒 7🞓 o⬜ 1🞔 2▣ 3🞕 🞖 =white square (open=o⬜) also: ▫◽◻⌑⧈⬚⸋⊡
2027             black circle 1⋅ 2∙ 3🞄 4⦁ 5⦁ 6⚫ 7● also: ·
2028             ring 1○ 2⭘ 3🞆 4🞆 5🞇 6🞈 7🞉 1⊙ 2🞊 3⦿ 🞋 =white circle also: ⊚⌾◌⚪⚬⨀◦⦾
2029             black diamond 1🞗 2🞘 3⬩ 4🞙 5⬥ 6◆
2030             white diamond ◇ 1🞚 2◈ 3🞛 🞜 also: ⋄
2031             black lozenge 1🞝 2🞞 3⬪ 4🞟 5⬧ 6⧫
2032             white lozenge ◊ 🞠
2033             centered n-gon 3⯅ 4⯀ 5⬟ 6⬣ 8⯃
2034             cent on-corner 3⯆ 4⯁ 5⯂ 6⬢ 8⯄ (also ⯇ ⯈)
2035             cross 1🞡 2🞢 3🞣 4🞤 5🞥 6🞦 7🞧
2036             saltire 1🞨 2🞩 3🞪 4🞫 5🞬 6🞭 7🞮 ≈ times (rotated cross)
2037             5-asterisk 1🞯 2🞰 3🞱 4🞲 5🞳 6🞴
2038             6-asterisk 1🞵 2🞶 3🞷 4🞸 5🞹 6🞺
2039             8-asterisk 1🞻 2🞼 3🞽 4🞾 5🞿
2040             light star 3🟀 4🟄 5🟉 6✶ 8🟎 12🟒
2041             medium star 3🟁 4🟅 5★ 6🟋 8🟏 12🟓
2042             (heavy) star 3🟂 4🟆 5🟊 6🟌 8🟐 12✹
2043             pinwheel 3🟃 4🟇 5✯ 6🟍 8🟑 12🟔 lighter: ✵
2044            
2045             =head2 Unicode and linguists
2046            
2047             Linguists mailing lists
2048            
2049             http://unicode.org/mail-arch/unicode-ml/y2009-m06/0066.html
2050            
2051             Obsolete IPA
2052            
2053             http://unicode.org/mail-arch/unicode-ml/y2009-m01/0487.html
2054             http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3Asubhead%3D%2F%28%3Fi%29archaic%2F%3A]+&g=
2055            
2056             Teutonista (vowel guide p11, kbd p13)
2057            
2058             http://www.sprachatlas.phil.uni-erlangen.de/materialien/Teuthonista_Handbuch.pdf
2059            
2060             Glottals
2061            
2062             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0151.html
2063             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0163.html
2064             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0202.html
2065             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0205.html
2066            
2067             =head2 Spaces, invisible characters, VS
2068            
2069             Substitute blank
2070            
2071             http://unicode.org/mail-arch/unicode-ml/y2011-m07/0101.html
2072            
2073             Representing invisible characters
2074            
2075             http://unicode.org/mail-arch/unicode-ml/y2011-m07/0094.html
2076            
2077             Ignorable glyphs
2078            
2079             http://unicode.org/mail-arch/unicode-ml/y2007-m08/0132.html
2080             http://unicode.org/mail-arch/unicode-ml/y2007-m08/0138.html
2081             http://unicode.org/mail-arch/unicode-ml/y2007-m08/0120.html
2082            
2083             HOWTO: (non)dummy VS in fonts
2084            
2085             http://unicode.org/mail-arch/unicode-ml/y2007-m08/0118.html
2086            
2087             ZWSP ZWNJ WJ SHY NON-BREAKING HYPHEN
2088            
2089             http://unicode.org/mail-arch/unicode-ml/y2007-m08/0123.html
2090             http://unicode.org/mail-arch/unicode-ml/y2007-m07/0188.html
2091             http://unicode.org/mail-arch/unicode-ml/y2007-m07/0199.html
2092             http://unicode.org/mail-arch/unicode-ml/y2007-m07/0201.html
2093             http://unicode.org/mail-arch/unicode-ml/y2007-m06/0122.html
2094             http://unicode.org/mail-arch/unicode-ml/y2007-m01/0297.html
2095            
2096             On which base to draw a "standalone" diacritics
2097            
2098             http://unicode.org/mail-arch/unicode-ml/y2007-m07/0075.html
2099            
2100             Variation sequences
2101            
2102             http://unicode.org/mail-arch/unicode-ml/y2004-m07/0246.html
2103            
2104             =head2 Typesetting
2105            
2106             Upside-down text in CSS (remove position?)
2107            
2108             http://unicode.org/mail-arch/unicode-ml/y2012-m01/0037.html
2109            
2110             Unicode to PostScript
2111            
2112             http://unicode.org/mail-arch/unicode-ml/y2009-m06/0056.html
2113             http://www.linuxfromscratch.org/blfs/view/svn/pst/enscript.html
2114             http://unicode.org/mail-arch/unicode-ml/y2009-m06/0062.html
2115            
2116             Spacing: English and French
2117            
2118             http://unicode.org/mail-arch/unicode-ml/y2006-m09/0167.html
2119             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0103.html
2120             http://unicode.org/mail-arch/unicode-ml/y2007-m08/0138.html
2121            
2122             Chicago Manual of Style
2123            
2124             http://unicode.org/mail-arch/unicode-ml/y2006-m01/0127.html
2125            
2126             Coloring parts of ligatures
2127             Implemenations:
2128            
2129             http://unicode.org/mail-arch/unicode-ml/y2005-m06/0195.html
2130             http://unicode.org/mail-arch/unicode-ml/y2005-m06/0233.html
2131             http://unicode.org/mail-arch/unicode-ml/y2005-m06/0208.html
2132             GPOS
2133             http://unicode.org/mail-arch/unicode-ml/y2005-m06/0167.html
2134            
2135             Chinese typesetting
2136            
2137             http://idsgn.org/posts/the-end-of-movable-type-in-china/
2138            
2139             @fonts and non-URL URIs
2140            
2141             http://unicode.org/mail-arch/unicode-ml/y2010-m01/0156.html
2142            
2143             =head2 Looking at the future
2144            
2145             Why and how to introduce innovative characters
2146            
2147             http://unicode.org/mail-arch/unicode-ml/y2012-m01/0045.html
2148            
2149             Unicode knows the concept of a provisional property
2150            
2151             http://unicode.org/mail-arch/unicode-ml/y2011-m11/0142.html
2152             http://unicode.org/reports/tr23/
2153             http://unicode.org/mail-arch/unicode-ml/y2011-m11/0161.html
2154             If you want to make analogies, however, the ISO ballots constitute
2155             the *provisional* publication for character code points and names.
2156             that needs to be available from day one for a character to be
2157             implementable at all (such as decomp mappings, bidi class,
2158             code point, name, etc.).
2159            
2160             ZERO-WIDTH UNDEFINED DECOMPOSITION MARK
2161             - to define decomposition, prepend it
2162            
2163             Exciting new letter forms for English
2164            
2165             http://www.theonion.com/articles/alphabet-updated-with-15-exciting-new-replacement,2869/
2166            
2167             Proposing new stuff, finding new stuff proposed
2168            
2169             http://unicode.org/mail-arch/unicode-ml/y2008-m01/0238.html
2170             http://www.unicode.org/mail-arch/unicode-ml/y2013-m09/0056.html
2171            
2172             A useful set of criteria for encoding symbols is found in
2173             Annex H of this document:
2174            
2175             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3002.pdf
2176            
2177             =head2 Unsorted
2178            
2179             Summary views into CLDR
2180            
2181             http://www.unicode.org/cldr/charts//by_type/patterns.characters.html
2182             http://www.unicode.org/cldr/charts//by_type/misc.exemplarCharacters.html
2183            
2184             Pound
2185            
2186             http://unicode.org/mail-arch/unicode-ml/y2012-m05/0242.html
2187            
2188             Classification of Dings (bats etc)
2189            
2190             std.dkuug.dk/jtc1/sc2/wg2/docs/n4115.pdf
2191            
2192             Escape: 2be9 2b9b
2193             ARROW SHAFT - various
2194            
2195             Locales
2196            
2197             http://blog.kyero.com/2011/11/14/what-is-the-common-locale-data-repository/
2198             http://blog.kyero.com/2010/12/02/lost-in-translation-locales-not-languages/
2199             http://unicode.org/mail-arch/unicode-ml/y2006-m06/0203.html
2200            
2201             General
2202            
2203             http://ebixio.com/online_docs/UnicodeDemystified.pdf
2204            
2205             Diacritics in fonts
2206            
2207             http://unicode.org/mail-arch/unicode-ml/y2011-m05/0047.html
2208             http://www.user.uni-hannover.de/nhtcapri/combining-marks.html#greek
2209            
2210             Licences (GPL etc) in TV sets
2211            
2212             http://unicode.org/mail-arch/unicode-ml/y2009-m12/0092.html
2213            
2214             Similar glyphs:
2215            
2216             http://unicode.org/reports/tr39/data/confusables.txt
2217            
2218             GeoLocation by IP
2219            
2220             http://unicode.org/mail-arch/unicode-ml/y2009-m04/0197.html
2221            
2222             Per language character repertoir:
2223            
2224             http://unicode.org/mail-arch/unicode-ml/y2009-m04/0253.html
2225             http://unicode.org/mail-arch/unicode-ml/y2009-m04/0255.html
2226            
2227             Dates/numbers in Unicode
2228            
2229             http://unicode.org/mail-arch/unicode-ml/y2010-m02/0122.html
2230            
2231             Normalization FAQ
2232            
2233             http://www.macchiato.com/unicode/nfc-faq
2234            
2235             Apostrophe
2236            
2237             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0060.html
2238             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0063.html
2239             http://unicode.org/mail-arch/unicode-ml/y2008-m05/0066.html
2240             http://unicode.org/mail-arch/unicode-ml/y2007-m07/0251.html
2241             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0309.html
2242            
2243             Apostroph as soft sign
2244            
2245             http://unicode.org/mail-arch/unicode-ml/y2010-m08/0123.html
2246            
2247             Questionner at start of Unicode proposal
2248            
2249             http://unicode.org/mail-arch/unicode-ml/y2007-m05/0087.html
2250            
2251             Rubi
2252            
2253             http://en.wikipedia.org/wiki/Ruby_character#Unicode
2254            
2255             Tamil/ISCII
2256            
2257             http://unicode.org/faq/indic.html
2258             http://unicode.org/versions/Unicode6.1.0/ch09.pdf
2259             http://www.brainsphere.co.in/keyboard/tm.pdf
2260            
2261             CGI and OpenType
2262            
2263             http://unicode.org/mail-arch/unicode-ml/y2008-m02/0097.html
2264            
2265             Numbers in scripts ;-)
2266            
2267             http://unicode.org/mail-arch/unicode-ml/y2008-m02/0120.html
2268            
2269             Indicating coverage of the font
2270            
2271             http://unicode.org/mail-arch/unicode-ml/y2008-m02/0152.html
2272             http://unicode.org/mail-arch/unicode-ml/y2008-m02/0167.html
2273            
2274             Accessing ligatures
2275            
2276             http://unicode.org/mail-arch/unicode-ml/y2007-m11/0210.html
2277            
2278             Folding characters
2279            
2280             http://unicode.org/reports/tr30/tr30-4.html
2281            
2282             Writing systems vs written languages
2283            
2284             http://unicode.org/mail-arch/unicode-ml/y2005-m07/0198.html
2285             http://unicode.org/mail-arch/unicode-ml/y2005-m07/0241.html
2286            
2287             MS Visual OpenType tables
2288            
2289             http://www.microsoft.com/typography/VOLT.mspx
2290             http://www.microsoft.com/typography
2291            
2292             "Same" character Oacute used for different "functions" in the same text
2293            
2294             http://unicode.org/mail-arch/unicode-ml/y2004-m08/0019.html
2295             etc:
2296             http://unicode.org/mail-arch/unicode-ml/y2004-m07/0227.html
2297            
2298             Diacritics
2299            
2300             http://www.sil.org/~gaultney/ProbsOfDiacDesignLowRes.pdf
2301             http://en.wikipedia.org/wiki/Sylfaen_%28typeface%29
2302             http://tiro.com/Articles/sylfaen_article.pdf
2303            
2304             Sign writing
2305            
2306             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n4342.pdf
2307            
2308             Writing digits in non-decimal
2309            
2310             http://unicode.org/mail-arch/unicode-ml/y2011-m03/0050.html
2311             Which separator is less ambiguous? Breve ˘ ? ␣ ? Inverted ␣ ?
2312            
2313             Use to identify a letter:
2314            
2315             http://unicode.org/charts/collation/
2316            
2317             Perl has problems with unpaired surrogates (whole thread)
2318            
2319             http://unicode.org/mail-arch/unicode-ml/y2010-m11/0034.html
2320            
2321             Complex fonts (e.g., Indic)
2322            
2323             http://unicode.org/mail-arch/unicode-ml/y2010-m10/0049.html
2324            
2325             Complex glyphs in Symbola (pre-6.01) font may crash older versions of Windows
2326            
2327             http://unicode.org/mail-arch/unicode-ml/y2010-m10/0082.html
2328             http://unicode.org/mail-arch/unicode-ml/y2010-m10/0084.html
2329            
2330             Window 7 SP1 improvements
2331            
2332             http://babelstone.blogspot.de/2010/05/prototyping-tangut-imes-or-why-windows.html
2333            
2334             Middle dot is ambiguous
2335            
2336             http://unicode.org/mail-arch/unicode-ml/y2010-m09/0023.html
2337             http://unicode.org/mail-arch/unicode-ml/y2013-m03/0151.html
2338            
2339             Superscript == modifiers
2340            
2341             http://unicode.org/mail-arch/unicode-ml/y2010-m03/0133.html
2342            
2343             Translation of Unicode names
2344            
2345             http://unicode.org/mail-arch/unicode-ml/y2012-m12/0066.html
2346             http://unicode.org/mail-arch/unicode-ml/y2012-m12/0076.html
2347            
2348             Transliteration on passports (see p.IV-48), UniDEcode
2349            
2350             http://www.icao.int/publications/Documents/9303_p1_v1_cons_en.pdf
2351             http://unicode.org/mail-arch/unicode-ml/y2013-m11/0025.html
2352            
2353             =head1 Keyboard input on Windows: interaction of applications and the kernel
2354            
2355             =head2 Keyboard input on Windows, Part I: what is the kernel doing?
2356            
2357             This is not documented. We try to provide a description which is
2358             both as simple as possible, and as complete as possible. (We ignore
2359             many important parts: the handling of hot keys [or C]), IME,
2360             handling of focus switch [C etc], the syncronization of keystate
2361             between different queues, waking up the system, the keyboard filters,
2362             widening of virtual keycodes, and LED lights.)
2363            
2364             We omit Step 0, when the hardware keyboard drivers (PS/2 or USB) deliver keydown/up(/repeat???) event for scan
2365             codes of corresponding keys. (This is a complicated topic, but well-documented.)
2366            
2367             =over
2368            
2369             =item 1
2370            
2371             The scan codes are massaged (see “Low level scancode mapping” in L<"SEE ALSO">).
2372            
2373             =item 2
2374            
2375             The keyboard layout tables map the translated scancode to a virtual keycode.
2376             (This may also depend on the “modification column”; see L<"Far Eastern keyboards on Windows">.)
2377             The “internal” key state table is updated.
2378            
2379             =item 3
2380            
2381             Mythology: the modification keys (C, C, C etc) are taken into account.
2382            
2383             What actually happens: any key may act as a modification key. The keyboard layout tables
2384             map keycodes to 8-bit masks. (The customary names for lower bits of the mask are C,
2385             C, C, C; two more bits are named C and C — after
2386             OYAYUBI 親指, meaning THUMB; two more
2387             bits are unnamed.) The keycodes of the currently pressed keys (from the “internal” table) are translated to masks, and
2388             these masks are ORed together. (For the purpose of translation to C/etc [done
2389             in ToUnicode()/ToUnicodeEx()], the bit C may be set
2390             also when key C was pressed odd number of times; this is
2391             controlled by C flag in a virtual key descriptor [of the key being currently processed]
2392             of the keyboard layout tables.)
2393            
2394             The keyboard layout tables translate the ORed mask to a number called “modification column”.
2395             (Thess two numbers are completely hidden from applications. The only glint the
2396             applications get is in the [useless, since there is no way to map it to anything “real”] result of
2397             L.])
2398            
2399             =item 4
2400            
2401             Depending on the current “modification column”, the virtual keycode of the current key event
2402             may be massaged further. (See L<"Far Eastern keyboards on Windows">.) Numpad keycodes
2403             depend also on the state of C — provided the keyboard layout table marks them with
2404             C flag. A few other scancodes may also produce different virtual keycodes in
2405             different situations (e.g., C).
2406            
2407             When C flag is present, fake presses/releases of left C are generated
2408             on presses(repeats)/releases of right C (exception: the press is not generated if any
2409             Ctrl key is down; likewise for when left C up when right C is released). With
2410             keypad presses/releases in presence of C and C, fake releases/presses of C
2411             are generated.
2412            
2413             =item 5
2414            
2415             If needed, asyncroneous key state for the current key's non-left-non-right flavor is updated.
2416             (The rest is dropped if the key is consumed by a C hook.)
2417            
2418             Asyncroneous key state for the current key is updated. Numpad-by-number flags are updated.
2419             (The rest is dropped if the key is a hotkey.)
2420            
2421             The message C is posted to the application. If C [usually
2422             called the C key] is
2423             down, but C is not, the event is of C flavor (this info is duplicated in
2424             lParam. Additionally, for C tapping, the UP event is also made C — although
2425             at this moment C is not down!).
2426             (The C flag [of the scancode] is also delivered to the application.)
2427            
2428             (When a C message is posted, the key state is updated. This key state
2429             may be used by TranslateMessage() as an argument to ToUnicode(), and is returned by GetKeyState() etc.)
2430            
2431             B
2432             with TranslateMessage()/DispatchMessage() or uses some equivalent code.>
2433            
2434             =item 6
2435            
2436             Before the application dispatches C to the message handler,
2437             TranslateMessage() calls L with C (unless a popup menu
2438             is active; then C — which disables character-by-number input via
2439             numeric KeyPad) and the buffer of 16 UTF-16 code units.
2440            
2441             =item 7
2442            
2443             The UTF-16 code units obtained from ToUnicode() are posted via PostMessage(). All the code units but
2444             the last one are marked by C flag in C. If the initial message
2445             was C, the C flavor is posted; if ToUnicode() returns a
2446             deadkey, the C flavor is posted.
2447            
2448             (The bit C is set/used only for the console handler.)
2449            
2450             =back
2451            
2452             =head2 Keyboard input on Windows, Part II: The semantic of ToUnicode()
2453            
2454             L,
2455             the semantic is not. Here we fix this.
2456            
2457             =over 4
2458            
2459             =item 1
2460            
2461             If the bit 0x01 in C is not set, the key event is checked for contributing to
2462             character-by-number input via numeric KeyPad (and numpad-by-number flags are updated).
2463             If so, the character is
2464             delivered only when C is released. (This the only case when KEYUP
2465             delivers a character.) Unless the bit 0x02 in C is set, the KEYUP
2466             events are not processed any more.
2467            
2468             =item 2
2469            
2470             The flag C is acted upon, and C is processed.
2471            
2472             =item 3
2473            
2474             The keys which are currently down are mapped to the ORed bitmap (see above).
2475            
2476             =item 4
2477            
2478             If the key event does not contribute to input-by-number via numeric keypad,
2479             and C is set, and no other bits except C, C are set:
2480             then the bit C is removed from the ORed mask.
2481            
2482             =item 5
2483            
2484             If C is active, C state is flipped in the following cases: either at most
2485             C is set in the bitmap, and C is set in the descriptor,
2486             or both C and C are set in the bitmap, and C is set in the
2487             descriptor.
2488            
2489             Now the ORed bitmap is converted to the modification column (see above).
2490            
2491             =item 6
2492            
2493             The key descriptor for the current virtual keycode is consulted (the “row” of the table).
2494             If C flag is on, C is active, and no other bits but C are set in the bitmap,
2495             the row is replaced by the next row.
2496            
2497             =item 7
2498            
2499             The entry at
2500             the row/column is extracted; if defined, it is either a string (zero or more UTF-16 code units), or a
2501             dead key ID (one UTF-16 unit). (I: the ID is taken from the next row of the table.)
2502            
2503             (If the ORed mask corresponds to a valid modification column, but the row does not
2504             define the behaviour at this column, and the bit C is set, and no other bits but C, C
2505             are set, then an autogenerated character in the range 0x00..0x1f is emitted for virtual keycodes
2506             'A'..'Z' and widened virtual keycodes 0xFF61..0xFF91 [for latter, based on the low bits of translation-to-scancode]).
2507            
2508             =item 8
2509            
2510             The resulting units are fed to the finite automaton. When the automaton is in
2511             0-state, a fed character unit is passed through, and a fed deadkey ID sets the state
2512             of the automaton to this number. In non-0 state, the IDs behave the
2513             same as numerically equal character units; the behaviour is described by the keyboard layout
2514             tables. The automaton changes the state according to the input; it may also emit a character
2515             (= 1 code unit; then it is always reset to 0 state). When “unrecognized input” arrives, the automaton
2516             emits the ID I the input, and resets to 0 state.
2517            
2518             (On KEYUP event, the changes to the state of the finite-automaton are ignored. This is only
2519             relevant if C has bit 0x02 set.)
2520            
2521             =item 9
2522            
2523             After UTF-16 units are passed through the automaton, its output is returned by ToUnicode().
2524             If the automaton is in non-0 state, the state ID becomes the output.
2525            
2526             =back
2527            
2528             B MSKLC restricts the length of the string associated to the row/column cell to
2529             be at most 4 UTF-16 code units. There are 2 restrictions for keyboard layouts created with other tools:
2530             first, the maximal number of UTF-16 codepoints in all these strings is stored in a byte, hence there
2531             may be at most 255 UTF-16 codepoints. Second, the actual slot C where the string is allocated
2532             contains two shorts, then the UTF-16 data; its length is also stored in a byte. This results in
2533             the maximal string length of 125 code units — if it is stored in one slot.
2534            
2535             However, with creative allocations, one can use more than one slot for a string storage
2536             (theoretically, one may imagine specially crafted layout where this would break the
2537             layout; on practice, such situations should not arise — even if one stores long strings in
2538             I slots good for 4-chars strings.
2539            
2540             B If the application uses the stardard message pump
2541             with TranslateMessage()/DispatchMessage(), the caller of ToUnicode() is TranslateMessage().
2542             In this case, ToUnicode() is called with an output buffer consisting of 16 UTF-16 code units. For
2543             such applications, the strings associated to keypresses are truncated after 16 code units.
2544            
2545             B If the string is “long” (i.e., defined via LIGATURES), when it is fed through the
2546             finite automaton, the transitions to non-0 state do not generate deadkey IDs in the output
2547             string. (The LIGATURES may contain strings of one code unit! This may lead to non-obvious
2548             behaviour! If pressing such a key after a deadkey generates a chained deadkey, this
2549             would happen without delivering C message.)
2550            
2551             B How kernel recognizes which key sequences contribute to
2552             character-by-number input via numeric KeyPad? First, the starter keydown must happen
2553             when the ORed mask contains C, and no other bits except C
2554             and C. (E.g., one can press C, then tap C, release C
2555             [with 1,2,3 on the numeric keypad].
2556             This would deliver C, then C<1> would start character-by-number input
2557             provided C and C together have ORed mask “in between” of C
2558             and C.)
2559            
2560             After the starter keydown (NumPad: 0..9, DOT, PLUS) is recognized as such, all the keydowns
2561             should be followed by the corresponding keyup (keydowns-due-to-repeat are ignored);
2562             more precisely, between two KEYDOWN events, the KEYUP for the first of them must be present.
2563             (In other words, KEYDOWN/KEYUP events must come in the expected order, maybe with some intermixed “extra” KEYUP events.)
2564             In the decimal mode (numeric starter) only the keys with scancodes of NumPad 0..9 are allowed.
2565             In the hex mode (starter is NumPad's DOT or PLUS) also the keys with virtual codes
2566             '0'..'9' and 'A'..'F' are allowed. The sequence is terminated by releasing C
2567             (=C) key.
2568            
2569             B In most cases, the resulting number is reduced mod 256. The exceptions are: the starter key is C,
2570             or the translate-to codepage is multibyte (then a number above 255 is interpreted as big-endian combination
2571             of bytes). In multibyte codepages, numbers 0x80..0xFF
2572             are considered in C codepage (unless the translate-to codepage is Japanese, and the number’s codepoint is Katakana).
2573            
2574             B If the starter key is C or C, the number is a codepoint in the default codepage of the keyboard layout;
2575             if it is another digit, it is in the OEM codepage.
2576             Enabling hex modes (C or C) requires extra tinkering; see L<"Hex input of unicode is not enabled">.
2577            
2578             B since keyboard layout normally map C to the mask C, and do not define
2579             a modification column for the ORed mask C<=KBDALT>, and C is B stripped for
2580             key events in input-by-number, these key events usually do not generate spurious Cs.
2581            
2582             B if the bit 0x01 of C is intended to be set, then there is a way to query
2583             the kernel “what would happen if a particular key with a particular combination of modifiers
2584             were pressed now”. (Recall that a “usual” ToUnicode() call is “destructive”: it modifies the
2585             I of the keyboard stored in the kernel. The information about whether one is in the
2586             middle of entering-by-number and/or whether one is in a middle of a deadkey sequence is
2587             erased or modified by such calls.) In general, there is no way preserve the state of
2588             entering-by-number; however, in presence of bit 0x01, this is of no concern, so a solution
2589             exists.
2590            
2591             Using C, and setting the high bit of C gives the same result as
2592             ToUnicode() with C and no high bit in C. Moreover, this preserves the state of
2593             the deadkey-finite-automaton. This way, one gets a “I” flavor of ToUnicode().
2594            
2595             =head2 Keyboard input on Windows, Part III: Customary “special” keybindings of typical keyboards
2596            
2597             Typically, keyboards define a few keypresses which deliver “control” characters
2598             (for benefits of console applications). As shown above, even if the keyboard does not
2599             define C combinations (but does define modification column for C
2600             which is associated to C — with maybe C, C intermixed), C
2601             with C<^letter> I be delivered to the application. Same with happen for combinations
2602             with modifiers which produce only C, C, C.
2603            
2604             Additionally, the typical keyboards also define the following bindings:
2605            
2606             Ctrl-Space ——→ 0x20
2607             Esc, Ctrl-[ ——→ 0x1b
2608             Ctrl-] ——→ 0x1d
2609             Ctrl-\ ——→ 0x1c
2610             BackSpace ——→ ^H
2611             Ctrl-BackSpace ——→ 0x7f
2612             Ctrl-Break ——→ ^C
2613             Tab ——→ ^I
2614             Enter ——→ ^M
2615             Ctrl-Enter ——→ ^J
2616            
2617             In addition to this, the standard US keyboard (and keyboards built by this Perl module) define
2618             the following bindings with C modifiers:
2619            
2620             @ ——→ 0x00
2621             ^ ——→ 0x1e
2622             _ ——→ 0x1f
2623            
2624             =head2 Can an application on Windows accept keyboard events? Part I: insert only
2625            
2626             The logic described above makes the kernel deliver more or less “correct” C messages
2627             to the application. The only bindings which may be defined in the keyboard layout, but will not be
2628             seen as C are those in modification columns which involve C, and do not
2629             involve any bits except C and C. (Due to the stripping of C described
2630             above, these modification columns are never accessed — I.)
2631            
2632             Try to design an application with an entry field; the application should insert B the
2633             characters ”delivered for insertion” by the keyboard layout and the kernel. The application
2634             should not do anything else for all the other keyboard events. First, ignore
2635             the C stripping.
2636            
2637             Then the only C which are NOT supposed to insert the contents to the editable UI fields are the
2638             L described above. They are easy to recognize and ignore: just
2639             ignore all the C carrying characters in the range C<0x00..0x1f>, C<0x7f>, and ignore C<0x20>
2640             delivered when one of C keys is down. So the application which inserts all the I
2641             Cs will follow I of the keyboard as close as possible.
2642            
2643             Now return to consideration of C stripping. If the application follows the policy above,
2644             pressing C would enter C — provided C is mapped to C, as done
2645             on standard keyboards. So the application should recognize which C carrying C
2646             are actually due to stripping of C, and should not insert the delivered characters.
2647            
2648             Here comes the major flaw of the Windows’ keyboard subsystem: the kernel translates
2649             SCANCODE —→ VK_CODE —→ ORED_MASK —→ MODIFICATION_COLUMN, then operates in terms of
2650             ORed masks and modification columns. The application can access only the first two levels
2651             of this translation; one cannot query the kernel for any information about the last
2652             two numbers. (Except for the API L,
2653             but it is unclear how this API may help: it translates “in wrong direction” and covers only BMP.)
2654             Therefore, there is no bullet-proof way to recognize when C arrived
2655             due to C stripping.
2656            
2657             B of course, if only C keys are associated to non-0 ORed mask bitmaps,
2658             and they are associated to the “expected” C bits, then the
2659             application would easily recognize this situation by checking whether C is down,
2660             but C is not. (Also observe that this is exactly the situation distinguishing
2661             C from C — no surprises here!)
2662            
2663             Assuming that the application uses this method, it would correctly recognize stripped
2664             events on the “primitive” keyboards. However, on a keyboard with an extra modifier
2665             key (call it C; assume its mask involves a non-SHIFT/ALT/CTRL/KANA bit),
2666             the C combination will not be stripped by the kernel, but the application
2667             would think that it was, and would not insert the character in C message. A bug!
2668            
2669             Moreover, if “supporing only the naive mapping” were a feasible
2670             restriction, there would be no reason for the kernel to go through the extra step of “the ORed mask”.
2671             Actually, to have a keyboard which is simultaneously backward compatible, easy for users, and
2672             covering a sufficiently wide range of possible characters, one B use more or
2673             less convoluted implementations (as in L bitmaps to modifier keys>).
2674            
2675             B the fact that the kernel and the applications speak different
2676             incompatible languages makes even the primitive task discussed here impossible
2677             to code in a bullet-proof way. A heuristic workaround exists, but it will not
2678             work with all keyboards and all combinations of modifiers.
2679            
2680             B some applications (e.g., Emacs) manage to distinguish
2681             C combination of modifier keys from the combination C produced by
2682             a typical C; these applications are able to use C-modified
2683             keys as a bindable accelerator keys. We address this question in the L.
2684            
2685             =head2 Can an application on Windows accept keyboard events? Part II: special key events
2686            
2687             In the preceding section, we considered the most primitive application accepting
2688             the user inserting of characters, and nothing more. “Real applications” must
2689             support also keyboard actions different from “insertion”; so those KEYDOWN events
2690             which are not related to insertion may trigger some “special actions”. To model a full-featured
2691             keyboard input, consider the following specification:
2692            
2693             As above, the application has an entry field, and should insert B the
2694             characters ”delivered for insertion” by the keyboard layout and the kernel.
2695             For all the keyboard events I, the application
2696             should write to the log file which of C modifiers were down,
2697             and the virtual keycode of the KEYDOWN event. Again, at first, we ignore
2698             the C stripping.
2699            
2700             At first, the problem looks simple: with the standard message pump, when C
2701             message is processed, the corresponding C messages are already
2702             sent to the message queue. One can PeekMessage() for these messages; if present,
2703             and not “special”, they correspond to “insertion”, so nothing should be written to the log.
2704             Otherwise, one reports this C to the log.
2705            
2706             Unfortunately, this solution is wrong. Inspect again what the kernel is delivering
2707             during the input-by-number via numeric keyboard: the KEYDOWN for decimal/hex digits
2708             B a part of the “insertion”, but it does not generate any C.
2709             Essentially, the application may see C pressed during the processing of
2710             C, but even if C is supposed to format the paragraph,
2711             this action should not be triggered (but C should be eventually inserted).
2712            
2713             B Input-by-number is getting in the way of using the standard message
2714             pump. C: one should write a clone of TranslateMessage() which delivers
2715             suitable C messages for KEYDOWN/KEYUP involved in Input-by-number. Doing
2716             this, one can also remove sillyness from the Windows’ handling of Input-by-number
2717             (such as taking C for numbers above 255).
2718            
2719             B: myTranslateMessage() should:
2720            
2721             =over 4
2722            
2723             =item *
2724            
2725             when non handling input-by-number, call ToUnicode(), but use C, so that ToUnicode() does not handle input-by-number.
2726            
2727             =item *
2728            
2729             Recognize input-by-number starters by the scancode/virtual-keycode, the presence of C down, and
2730             the fact that ToUnicode() produces nothing or C<'0'..'9','.',',','+'>.
2731            
2732             =item *
2733            
2734             After the starter, allow continuation by checking the scancode/virtual-keycode and the presence of C down.
2735             Do not call ToUnicode() for continuation keydown/up events.
2736            
2737             =item *
2738            
2739             After a chain of continuations followed by KEYUP for C, one should PostMessage() for C with
2740             accumulated input.
2741            
2742             =back
2743            
2744             Combining this with the heuristical recognition of stripped C, one gets an architecture
2745             with a naive approximation to handling of C (but still miles ahead of all the applications
2746             I saw!), and bullet-proof handling of other combinations of modifiers.
2747            
2748             B this implementation of MyTranslateMessage() loses one “feature” of the original one:
2749             that input-by-number is disabled in the presence of (popup) menu. However, since I never saw
2750             this “feature” in action (and never have heard of it described anywhere), this must be of
2751             negligible price.
2752            
2753             B I the applications I checked do this logic wrong. Most of them check B for
2754             “whether the key event looks like those which should trigger special actions”, then perform
2755             these special actions (and ignore the character payload).
2756            
2757             As shown above, the reasonable way is to do this in the opposite order, and check for
2758             special actions only I it is known that the key event does not carry a character payload.
2759             The impossibility of reversing the order of these checks is due to the same reason as one discussed
2760             above: the
2761             kernel and application speaking different languages.
2762            
2763             Indeed, since the application knows nothing
2764             about ORed masks, it has no way to distinguish that, for example, C may be I to be
2765             distinct from C and C, and while the last two do not carry the character
2766             payload, the first one does. Checking I for the absense of C
2767             delegates such a discrimination to the kernel, which has enough information about the
2768             intent of the keyboard layout. (Likewise, the keyboard may define the pair of C
2769             and C to insert ᵃ. Then C alone will not carry any character payload,
2770             its combination with a deadkey may.)
2771            
2772             Why the applications are trying to grab the potential special-key messages as early
2773             as possible? I suspect that the developers are afraid that otherwise, a keyboard layout may
2774             “steal” important accelerators from the application. While this is technically possible,
2775             nowadays keyboard accelerators are rarely the I way to access features of the applications;
2776             and among hundreds of keyboard layout I saw, all but 2 or 3 would not “steal” I from applications.
2777             (Or maybe the developers just have no clue that the correct solution is so simple?)
2778            
2779             B Among the applications I checked, the worst offender is Firefox. It follows L
2780             unfortunate advice by Mike Kaplan|http://blogs.msdn.com/b/michkap/archive/2005/01/19/355870.aspx>
2781             and tries to reconstruct the mentioned above row/columns table of the keyboard layout, then
2782             uses this (heuristically reconstructed) table as a substitute for the real thing. And
2783             due to the mismatch of languages spoken by kernel and applications, working via such an
2784             attempted reconstruction turns out to have very little relationship to the actually intended
2785             behaviour of the keyboard (the behaviour observed in less baroque applications). In particular, if
2786             keyboards uses different modification columns for C and C=C
2787             modifiers, pressing C inputs wrong characters in Firefox.
2788            
2789             B Among notable applications which fail spectacularly is Emacs. The developers
2790             forget that for a generation, it is already XXI century; so they L
2791             ToUnicode()|http://fossies.org/linux/misc/emacs-24.3.tar.gz:a/emacs-24.3/src/w32fns.c>!
2792             (Even if ToUnicode() is available, its result is converted to the result of the
2793             corresponding ToAscii() code.)
2794            
2795             In addition to 8-bitness, Emacs also suffers from check-for-specials-first syndrome…
2796            
2797             =head2 Can an application on Windows accept keyboard events? Part III: better detection of C stripping
2798            
2799             We explained above that L
2800             handling the case when C might have been stripped by the kernel|"Can an application on Windows accept keyboard events? Part I: insert only">. The
2801             very naive heuristic algorithm described there will recognize the simplest
2802             cases, but will also have many false positives: for many combinations it will decide
2803             that C was stripped while it was not. The result will be that
2804             when the kernel reports that the character C is delivered, the
2805             application would interpret it as C, so C would not be inserted.
2806             It will not handle, for example,
2807             the C modifier combinations with L
2808             from that section|"A convenient assignment of C bitmaps to modifier keys">.
2809            
2810             Indeed, with this assignment, the only combination of modifiers for which the kernel will strip C
2811             is C (and C if one does not assign any bits to C).
2812             So C is not stripped, hence the
2813             correct C is delivered by the kernel. However, since this combination is
2814             still visible to the application as having C, and not having C,
2815             it is delivered as the C flavor.
2816            
2817             So the net result is: one designed a nice assignment of masks to the modifier
2818             keys. This assignment makes keypresses successfully navigate around the quirks
2819             of I’s calculations of the character to deliver. However, the naive
2820             algorithm used by I will force the application to ignore this
2821             correctly delivered character to insert.
2822            
2823             A very robust workaround for this problem is introduced in the
2824             L.
2825             What we discuss here is a simple heuristic to recognize the combinations involving
2826             C and an “unexpected modifier”, so that these combinations become
2827             exceptions to the rule “C flavor means ‘do not insert’”.
2828            
2829             B when C message arrives, inspect the virtual keycodes
2830             which are reported as pressed. Ignore the keycode for the current message.
2831             Ignore the keycodes for “usual modifiers” (C) which are
2832             expected to keep stripping. Ignore the keycode for the keys which may be
2833             kept “stuck down” by the keyboards (see L<"Far Eastern keyboards on Windows">).
2834             If some keycode remains, then consider it as an “extra” modifier, and ignore
2835             the fact that the message was of C flavor.
2836            
2837             So all one must do is to define one user message (for input-by-number-in-progress),
2838             code two very simple routines, MyTranslateMessage() and HasExtraModifiersHeuristical(), and perform two
2839             PeekMessage() on KEYDOWN event, and one gets a powerful almost-robust
2840             algorithm for keyboard input on Windows. (Recall that all the applications
2841             I saw provide close-to-abysmal support of keyboard input on Windows.)
2842            
2843             =head2 Can an application on Windows accept keyboard events? Part IV: application-specific modifiers
2844            
2845             Some application handle certain keys as “extra modifiers for the purpose of
2846             application-specific accelerator keypresses”. For example, Emacs may treat
2847             the C in this way (as a C modifier for its bindable-keys
2848             framework). Usually, C does not
2849             contribute anything into the ORed mask; hence, C
2850             combination will deliver the same character as just C alone. When
2851             the application treats C as an accelerator, it must
2852             ignore the character delivered by this combination.
2853            
2854             Additionally, many keyboard layouts
2855             use the C flag (it makes the kernel to fake pressing/releasing the
2856             left C key when the right C is pressed/released) with “standard”
2857             assignments of the ORed masks. On such keyboards, pressing right C (i.e.,
2858             C) delivers the same characters as pressing any C together with
2859             any C. On the other hand, an application may distinguish left-C combinined
2860             with left-C from C pressed
2861             on such keyboards by inspecting which (virtual) keys are currently down. So the application
2862             may consider left-C combinined with left-C
2863             as “intended to be an accelerator”; then the application would ignore the characters delivered by
2864             such a keypress.
2865            
2866             One can immediately see that such applications would inevitably enter into conflict
2867             with keyboards which B these key combinations. For example, on a keyboard
2868             which defines an ORed mask for C, pressing C
2869             I deliver a different character than pressing C. However, the
2870             application does not know this, and just ignores the character delivered by
2871             C.
2872            
2873             A similar situation arises when the keyboard defines C to
2874             deliver a different character than C. Again, the character will be ignored
2875             by the application. Since the fact that such a “unusual” keyboard is active
2876             implies user's intent, such behaviour is a bug of the application.
2877            
2878             B an application must interpret a keypress as “intended to be an accelerator”
2879             only if this keypress produces no character, or produces B character as
2880             the key without the “extra” modifier. (Correspondingly, if replacing C by
2881             C does not change the delivered character.)
2882            
2883             B to do this, the application must be able to query “what would happen
2884             if the user pressed different key combinations?”; such a query requires “non-destructive”
2885             calls of ToUnicode(). (These calls must be done I the “actual”, destructive,
2886             call of ToUnicode() corresponding to the currently pressed down modifiers.)
2887            
2888             Fortunately, with the framework described in the
2889             L stripping">,
2890             the call of ToUnicode() is performed with C being 0x01. As explained near the end of the section
2891             L<"Keyboard input on Windows, Part II: The semantic of ToUnicode()">, this call has a “non-destructive”
2892             flavor! Hence, for applications with such “enhanced” modifier keys, the logic of the
2893             L stripping">
2894             should be enhanced in the following ways:
2895            
2896             =over 4
2897            
2898             =item *
2899            
2900             Make a non-destructive call of ToUnicode(). Store the result. If no insertable character
2901             (or deadkey) is delivered, ignore the rest.
2902            
2903             =item *
2904            
2905             If both left C and left C are down (AND right C AND right C are up!)
2906             replace left C by the right C, and
2907             make another non-destructive call of ToUnicode(). If the result is identical to the first one,
2908             mark C as “special modifiers present for accelerators”.
2909            
2910             Remove left C and left C from the collection of keys which are down (argument to ToUnicde()),
2911             and continue with the previous step.
2912             (This may be generalized to other combinations of left/right C/C.)
2913            
2914             =item *
2915            
2916             For every other “special modifier” virtual key which is down,
2917             make another non-destructive call of ToUnicode() with this virtual key up.
2918             If the result is identical to the first one, mark this “special modifier” as “present for accelerators”.
2919            
2920             =item *
2921            
2922             Finally, if nothing suitable for accelerators is found, make a “usual” call of ToUnicode()
2923             (so that on future keypresses the deadkey finite automaton behaves as expected). Generate the
2924             corresponding messages.
2925            
2926             =back
2927            
2928             If no insertable character is delivered, or suitable “extra” accelerators are found, the
2929             process-the-accelerator logic should be triggered.
2930            
2931             For example, if the character Ω is delivered, and a special modifier C is down
2932             and marked as suitable as accelerator, then Ω will be ignored. The accelerator for C
2933             should be triggered. (Processing this as C may be also done. This may require an
2934             extra non-destructive call.)
2935            
2936             An alternative logic is possible: if this Ω was generated by modifiers C
2937             with the virtual key C, then the application may query what C generates standalone (for example,
2938             cyrillic ц), and trigger the accelerator for C. (This assumes that
2939             C with C generates the same Ω!)
2940            
2941             If no character is delivered, then this is a “trivial” situation, and the framework of accelerator keys
2942             should be called as if the complication considered here did not exist.
2943            
2944             B this logic handles the intended behaviour of C key as well! So, with this implementation,
2945             the application would
2946            
2947             =over 5
2948            
2949             =item *
2950            
2951             Handle C-NUMPAD input-by-number in an intuitive mostly compatible with Windows way
2952             (but not bug-for-bug compatible with the Windows' way);
2953            
2954             =item *
2955            
2956             Would recognize C modifier which does not change the delivered character as such. (So it may be processed
2957             as the menu accessor.)
2958            
2959             =item *
2960            
2961             Would recognize B the key combinations defined by the keyboard layout (and deliverable via ToUnicode());
2962            
2963             =item *
2964            
2965             Would recognize all the application-specific extra modifier keys which do not interfere with the
2966             key combinations defined by the keyboard layout.
2967            
2968             =back
2969            
2970             =head2 Far Eastern keyboards on Windows
2971            
2972             The syntax of defining these keyboards is documented in F of the toolkit.
2973             The semantic of the NLS table is undocumented. Here we fix this.
2974            
2975             The function returning the NLS table should be exported with ordinal 2.
2976             The offsets of both tables in the module should be below 0x10000.
2977             The keyboard layout should define a function with ordinal 3 or 5 returning 0, or
2978             be loaded through such a function returning non-0; the signature is
2979            
2980             BOOL ordinal5(HKL hkl, LPWSTR __OUT__ dllname , PCLIENTKEYBOARDTYPE type_if_remote_session, LPVOID dummy);
2981             BOOL ordinal3(LPWSTR __OUT__ dllname);
2982            
2983             if return is non-0, keyboard is reloaded from C.
2984            
2985             In short, these layouts have an extra table which may define the following enhancements:
2986            
2987             One 3-state (or 2-state) radio-button:
2988             on keys with VK codes DBE_ALPHANUMERIC/DBE_HIRAGANA/DBE_KATAKANA
2989             (the third state can be also toggled independently of the others).
2990             Three Toggling (like CAPSLOCK) button (pairs):
2991             toggling radio-button-like VK codes DBE_SBCSCHAR/DBE_DBCSCHAR, DBE_ROMAN/DBE_NOROMAN, DBE_CODEINPUT/DBE_NOCODEINPUT
2992             Make key produce different VK codes with different modifiers.
2993             Make a “reverse NUMPAD” translation.
2994             Manipulate a couple of bits of IME state.
2995             A few random hacks for key-deficient hardware layouts.
2996            
2997             (Via assigning ORed masks to radio-buttons, the radio-buttons and toggle-buttons above may affect the layout.
2998             Using this, it is easy to convert each toggling buttons to 2-state radiobuttons.
2999             The limitation is that the number of modification columns compatible with the
3000             extra table is at most 8 — counting one for C.)
3001            
3002             Every C may be associated to two tables of functions, the “normal” one, and the “alternative” one. For
3003             every modification column, each table
3004             assigns a filter id, and a parameter for the filter. (Recall that columns are associated
3005             to the ORed masks by the table in the C structure. One B define all the entries
3006             in the table — or at least the entries reachable by the
3007             modifier keys. B the limit on the number of states in the tables is 8; it is not clear what happens with the 
3008             states above this; some versions of Windows may buffer-overflow.)
3009            
3010             The input/output for the filters consists of: the C, C/C flag, the flags associated to the scancode in C<< KBDTABLES->ausVK >>
3011             (may be added to upsteam), the
3012             parameter given in C structure (and an unused C read/write parameter). A filter may change these parameters,
3013             then pass the event forward, or it may ignore an event. Filters by ID:
3014            
3015             KBDNLS_NULL Ignore key (should not be called; only for unreachable slots in the tables).
3016             KBDNLS_NOEVENT Ignore key.
3017             KBDNLS_SEND_BASE_VK Pass through VK unchanged.
3018             KBDNLS_SEND_PARAM_VK Replace VK by the number specified as the parameter.
3019             KBDNLS_KANAMODE Ignore UP; on DOWN, toggle (=generate UP-or-DOWN for) DBE_KATAKANA
3020            
3021             These 3 generate UP for “other” key, then DOWN for the target (as needed!):
3022             KBDNLS_ALPHANUM Ignore UP; DBE_ALPHANUMERIC,DBE_HIRAGANA,DBE_KATAKANA → DBE_ALPHANUMERIC
3023             KBDNLS_HIRAGANA Ignore UP; DBE_ALPHANUMERIC,DBE_HIRAGANA,DBE_KATAKANA → DBE_HIRAGANA
3024             KBDNLS_KATAKANA Ignore UP; DBE_ALPHANUMERIC,DBE_HIRAGANA,DBE_KATAKANA → DBE_KATAKANA
3025            
3026             KBDNLS_SBCSDBCS Ignore UP; Toggle DBE_SBCSCHAR / DBE_DBCSCHAR
3027             KBDNLS_ROMAN Ignore UP; Toggle DBE_ROMAN / DBE_NOROMAN
3028             KBDNLS_CODEINPUT Ignore UP; Toggle DBE_CODEINPUT / DBE_NOCODEINPUT
3029             KBDNLS_HELP_OR_END Pass-through if NUMPAD flag ON (in ausVK); send-or-toggle HELP/END (see below)
3030             KBDNLS_HOME_OR_CLEAR Pass-through if NUMPAD flag ON (in ausVK); send HOME/CLEAR (see below)
3031             KBDNLS_NUMPAD If !NUMLOCK | SHIFT, replace NUMPADn/DECIMAL by no-numpad flavors
3032             KBDNLS_KANAEVENT Replace VK by the number specified as the parameter. On DOWN, see below
3033             KBDNLS_CONV_OR_NONCONV See below
3034            
3035             The startup values are C, C, C, C.
3036            
3037             Typical usages:
3038            
3039             KBDNLS_KANAMODE (VK_KANA (Special case))
3040             KBDNLS_ALPHANUM (VK_DBE_ALPHANUMERIC)
3041             KBDNLS_HIRAGANA (VK_DBE_HIRAGANA)
3042             KBDNLS_KATAKANA (VK_DBE_KATAKANA)
3043             KBDNLS_SBCSDBCS (VK_DBE_SBCSCHAR/VK_DBE_DBCSCHAR)
3044             KBDNLS_ROMAN (VK_DBE_ROMAN/VK_DBE_NOROMAN)
3045             KBDNLS_CODEINPUT (VK_DBE_CODEINPUT/VK_DBE_NOCODEINPUT)
3046             KBDNLS_HELP_OR_END (VK_HELP or VK_END) [NEC PC-9800 Only]
3047             KBDNLS_HOME_OR_CLEAR (VK_HOME or VK_CLEAR) [NEC PC-9800 Only]
3048             KBDNLS_NUMPAD (VK_xxx for Numpad) [NEC PC-9800 Only]
3049             KBDNLS_KANAEVENT (VK_KANA) [Fujitsu FMV oyayubi Only]
3050             KBDNLS_CONV_OR_NONCONV (VK_CONVERT and VK_NONCONVERT) [Fujitsu FMV oyayubi Only]
3051            
3052             Toggle (= 2-state) and 3-state radio-keys are switched by sending KEYUP for the currently
3053             “active” key, then KEYDOWN for the newly activated key. When switching 3-state, additional
3054             action happens depending on the new state:
3055            
3056             DBE_ALPHANUMERIC If IME is off, and KANA toggle is on, switch IME on in the KATAKANA mode
3057             DBE_HIRAGANA If IME is off, and KANA toggle is off, switch IME off in the ALPHANUMERIC mode
3058             DBE_KATAKANA SAME AS HIRAGANA
3059            
3060             Additionally, C of C switches IME to
3061            
3062             KANA toggle on: switch IME off in the ALPHANUMERIC mode
3063             KANA toggle off: switch IME on in the KATAKANA mode
3064            
3065             and C (on C and C) passes through, and does
3066            
3067             KANA toggle on, IME off: switch IME off in the ALPHANUMERIC mode
3068             otherwise: Do nothing
3069            
3070             (The semantic of IME being-in/switching-to OFF/ON mode is not clear (probably IME-specific).
3071             The switching happens by
3072             calling C for devices with a C
3073             and C, while putting the request at into global memory — unless
3074             C flag is set on the foreground keyboard.)
3075            
3076             For C, the registry is checked at statup. For C, the registry is checked at statup, and:
3077            
3078             KANA_AWARE: flips END/HELP if KANA toggle is ON (on input, “HELP” means not-an-END)
3079             otherwise: sends END/HELP depending on what registry says.
3080            
3081             The checked values are C, C, C in the hive C.
3082            
3083             Which of two tables is chosen is controlled by the type (C/C/C) of the key's tables, and the (per key) history bit.
3084             The initial state of the bit is in C
3085             (L!).
3086             The tables of type C are ignored (the key descriptor passes all events
3087             through), the C key uses only the first table. The C key uses the first table on KEYDOWN, and
3088             uses the first or the second table on KEYUP. The choice depends on modifiers present in the preceding KEYDOWN;
3089             the bitmap C is indexed by the modification column of KEYDOWN event; the second table is
3090             used on the following KEYUP if the indexed bit is set. (The KEYREPEAT events are handled the same way as KEYUP.)
3091            
3092             The typical usage of C keys is to make the KEYUP event match B no matter what
3093             is the order of releasing the modifier keys and the main key.
3094             Having the history bit up “propagates” to KEYUP the information about which modifiers were active on KEYDOWN. This helps in ensuring
3095             consistency of some actions between the KEYDOWN event and the corresponding KEYUP event: remember that the state of modifiers
3096             on KEYUP is often different than the state on KEYDOWN: people can release modifiers in different orders:
3097            
3098             press-Shift, press-Enter, release-Shift, release-Enter ---> Shift-Enter pressed, Enter released
3099             press-Shift, press-Enter, release-Enter, release-Shift ---> Shift-Enter pressed and released
3100            
3101             If pressing C acts as if it were the C key (and only so with C!), to ensure consistency, one would need
3102             to make releasing C B also releasing C to act as if it were the C key. So one can make pressing
3103             C special (via the first table), sets the history bit on C, and make I map C
3104             and C to be special too (send C) I.
3105            
3106             B the standard key processing has its own filters too. C processing adds fake C up/down events
3107             (provided the flag C is set);
3108             C processing ignores/fakes the C/C for C (=C)
3109             (provided the flag C is set); C becomes
3110             C (same for C); C become C/C; C may become C.
3111             OEM translations (NumPad→Cursor, except C; C<00> to double-press of C<0>) come first, then locale-specific (C,
3112             C), then those defined in the tables above.
3113            
3114             B As opposed to these translations, C and C is actually handled inside the
3115             even loop, by ToUnicode().
3116            
3117             B L (and references inside!)
3118             explains fine points of using Japanese keyboards. See also: L.
3119            
3120             =head2 A convenient assignment of C bitmaps to modifier keys
3121            
3122             In this section, we omit discussion of C modifier; so every
3123             bitmap may be further combined with C to produce two different bindings.
3124             Assign ORed masks to the modifier keys as follows:X
3125            
3126             lCtrl Win lAlt rAlt Menu rCtrl
3127             CTRL|LOYA CTRL|X1 ALT|KANA CTRL|ALT|LOYA|X1 CTRL|ALT|X2 CTRL|ALT|ROYA
3128            
3129             with suitable backward-compatible mapping of ORed masks to modification columns.
3130             This assignment allows using C flag (faking presses of C when
3131             C is pressed — this greatly increases compatibility of C with brain-damaged
3132             applications), all the combinations involving at most one of C, C or
3133             C give distinct ORed masks, it
3134             avoids stripping of C on C combined with other modifiers,
3135             makes C work with all relevant combinations, while completely preserving all
3136             application-visible properties of keyboard events [except those with C
3137             modifiers; this combination is equivalent to C].
3138            
3139             Note that ignoring the C and C bits, all combinations of
3140             C are possible, which gives at least 32 C-pairs.
3141             In fact, the only combination of C which may appear with
3142             different C bits is C; hence there are 33 possible combinations
3143             of C. Indeed, C is determined by C.
3144             If one of C is present, then C is set; so assume C are not present.
3145             But then, if C B set, then both C B be present; which gives the
3146             only duplication.
3147            
3148             Leaving out 5 combinations of C, C, C [8, minus the empty one, and
3149             C, which is avoided by most application due to its similarity to C,
3150             and C which is undistinguishable by the mask from C]
3151             to have bindable keypresses in applications, and having C as equivalent to
3152             C, this gives 27 C-pairs which may produce characters.
3153            
3154             B C being undistinguishable by the mask from C
3155             is not a big deal, since there is no standard keyboard shortcuts involving C.
3156            
3157             B Combinations of C with C L combination: multiple problems">;
3158             likewise for L with C |"C combination: many keys are not delivered to applications">.
3159            
3160             B Removing the binding for C key, only 21 useful C-pairs remain.
3161             (This is what C of L is
3162             using; out of 24 distinct combinations, C, C and C should be
3163             excluded.) B While this may look as a complete overkill, recall that characters
3164             outside BMP can be inserted on Windows I via one keypress, possibly with many
3165             modifiers. (This restriction relates only to the “classical” flavor of Windows keyboard layouts).
3166             Unicode L
3167             discourse|http://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols>. If a keyboard
3168             layout would want to support these letters, this would quickly exhaust the possible combinations
3169             of modifiers. (For 2-script layout, one could live with Latin/AltGr-Latin/Greek + 18 mathematical
3170             alphabets. But for layouts supporting more scripts, it lookes like using C key is not
3171             avoidable.)
3172            
3173             B Applications may call ToUnicode() with I of modifiers:
3174             for example, they may "put" C down, but do not specify whether it is C or
3175             C. Likewise for C.
3176            
3177             To support that, one would need to define a mask for standalone C and C
3178             (i.e., C and C). Since these modifiers are present when the real “left-right-handed”
3179             keys are down, the masks should be “contained” in the masks of handed keys. B one
3180             can make the pseudo-key C to generate bit C, and the pseudo-key C to generate
3181             the bit C. Then for any combination of modifiers with unhanded C and/or C,
3182             either the corresponding combination of bits is supported by the layout (and then the
3183             application will access the corresponding modification column — which is probably not
3184             the “expected” column corresponding to some handed flavor), or the combination is not
3185             yet defined. In the latter case, one may actually decide I to resolve this: one can
3186             map this combination of modifiers to an arbitatrary modification column!
3187            
3188             In particular, one can map such combination of modifiers to a certain choice of handedness
3189             of C and C. (An example of such a problematic application is L;
3190             look for “I”.)
3191            
3192             B Some applications may do a "reverse lookup" using
3193             L|https://msdn.microsoft.com/en-us/library/windows/desktop/ms646329%28v=vs.85%29.aspx>
3194             (this is B API which exposes the modifier masks). Most of these calls would not
3195             know anything about "higher bits", only S/C/A would be covered. In particular,
3196             it makes sense to add "fake" entries mapping combinations of bits 0x1/0x2/0x4 to the
3197             "corresponding" modification columns.
3198            
3199             For example, C above would produce modififier mask C;
3200             this mask would access a certain column in the table of bindings; make the
3201             mask C access the same column. Then an application making a lookup
3202             for a certain character via VkKeyScanW() would see C. Since this is
3203             the mask which is I produced by pressing C, the application
3204             would think (correctly! — but only thanks to this fake entry) that this character
3205             may be produced with C modifier.
3206            
3207             B The maximal number of “modification columns” supported by Windows is 126. A
3208             larger number would make the size of C to overflow the maximal number
3209             storable in the field C of type C = C.
3210            
3211             Given that the column 15 is ignored, this reduces the number of strings associated to
3212             a keypress (with different “modifiers”) to 125.
3213            
3214             =head1 WINDOWS GOTCHAS
3215            
3216             First of all, keyboard layouts on Windows are controlled by DLLs; the only function
3217             of these DLLs is to export a table of "actions" to perform. This table is passed
3218             to the kernel, and that's it - whatever is not supported by the format of this table
3219             cannot be implemented by native layouts. (The DLL performs no "actions" when
3220             actual keyboard events arrive.)
3221            
3222             Essentially, the logic is like that: there are primary "keypresses", and
3223             chained "keypresses" ("prefix keys" [= deadkeys] and keys pressed after them).
3224             Primary keypresses are distinguished by which physical key on keyboard is
3225             pressed, and which of "modifier keys" are also pressed at this moment (as well
3226             as the state of "latched keys" - usually C only, but may be also C). This combination
3227             determines which Unicode character is generated by the keypress, and whether
3228             this character starts a "chained sequence".
3229            
3230             On the other hand, the behaviour of chained keys is governed I by Unicode
3231             characters they generate: if there are several physical keypresses generating
3232             the same Unicode characters, these keypresses are completely interchangeable
3233             inside a chained sequence. (The only restriction is that the first keypress
3234             should be marked as "prefix key"; for example, there may be two keys producing
3235             B<-> so that one is producing a "real dash sign", and another is producing a
3236             "prefix" B<->.)
3237            
3238             The table allows: to map Cs to Cs; to associate a C to several
3239             (numbered) choices of characters to output, and mark some of these choices as prefixes
3240             (deadkeys). (These "base" choices may contain up to 4 16-bit characters (with 32-bit
3241             characters mapped to 2 16-bit surrogates); but only those with 1 16-bit character may
3242             be marked as deadkeys.) For each prefix character (not a prefix key!) one can
3243             associate a table mapping input 16-bit "base characters" to output 16-bit characters,
3244             and mark some of the output choices as prefix characters.
3245            
3246             The numbered choices above are determined by the state of "modifier keys" (such as
3247             C, C, C), but not directly. First of all, C may be
3248             associated to a certain combination of 6 "modifier bits" (called "logical" C,
3249             C, C, C, C and C, but the logical bits are not
3250             required to coincide with names of modifier keys). (Example: one can bind C
3251             to activate C and C bits.) The 64 possible combinations of modifier bits
3252             are mapped to the numbered choices above.
3253            
3254             Additionally, one can define two "separate
3255             numbered choices" in presence of CapsLock (but the only allowed modifier bit is C).
3256             The another way to determine what C is doing: one can mark that it
3257             flips the "logical C" bit (separately on no-modifiers state, C-only state,
3258             and C-only state [?!] - here "only" allow for the C bit to be C).
3259            
3260             C key is considered equivalent to C combination (of those
3261             are present, or always???), and one cannot bind C and C combinations.
3262             Additionally, binding bare C modifier on alphabetical keys (and
3263             C, C<[>, C<]>, C<\>) may confuse some applications.
3264            
3265             B there is some additional stuff allowed to be done (but only in presence
3266             of Far_East_Support installed???). FE-keyboards can define some sticky state (so
3267             may define some other "latching" keys in addition to C). However,
3268             I did not find a clear documentation yet (C in the DDK toolkit???).
3269            
3270             There is a tool to create/compile the required DLL: F of I
3271             Keyboard Layout Creator> (with a graphic frontend F). The tool does
3272             not support customization of modifier bits, and has numerous bugs concerning binding keys which
3273             usually do not generate characters. The graphic frontend does not support
3274             chained prefix keys, adds another batch of bugs, and has arbitrarily limitations:
3275             refuses to work if the compiled version of keyboard is already installed;
3276             refuses to work if C is redefined in useful ways.
3277            
3278             B uninstall the keyboard, comment the definition of C,
3279             load in F and create an install package. Then uncomment the
3280             definition of C, and compile 4 architecture versions using F,
3281             moving the DLLs into suitable directories of the install package. Install
3282             the keyboard.
3283            
3284             For development cycle, one does not need to rebuild the install package
3285             while recompiling.
3286            
3287             The following sections classify GOTCHAS into 3 categories:
3288            
3289             L<"WINDOWS GOTCHAS for keyboard users">
3290            
3291             L<"WINDOWS GOTCHAS for keyboard developers using MSKLC">
3292            
3293             L<"WINDOWS GOTCHAS for keyboard developers (problems in kernel)">
3294            
3295             =head1 WINDOWS GOTCHAS for keyboard users
3296            
3297             =head2 MSKLC keyboards not working on Windows 8 without reboot
3298            
3299             The layout is shown as active, but "preview" is grayed out,
3300             and is not shown on the Win-Space list. See also:
3301            
3302             http://www.errordetails.com/125726/activate-custom-keyboard-layout-created-with-msklc-windows
3303            
3304             The workaround is to reboot. Compare with
3305            
3306             http://blogs.msdn.com/b/michkap/archive/2012/03/12/10281199.aspx
3307            
3308             =head2 Default keyboard of an application
3309            
3310             Apparently, there is no way to choose a default keyboard for a certain
3311             language. The configuration UI allows moving keyboards up and down in
3312             the list, but, apparently, this order is not related to which keyboard
3313             is selected when an application starts. (This may be fixed on Windows 8?)
3314            
3315             =head2 Hex input of unicode is not enabled
3316            
3317             One needs to explicitly tinker with the registry (see F)
3318             and then I to enable this.
3319            
3320             =head2 Standard fonts have some chars exchanged
3321            
3322             At least in Consolas and Lucida Sans Unicode φ and ϕ are exchanged.
3323             Compare with Courier and Times. (This may be due to the L
3324             Unicode's pre-v3.0 choice of representative glyphs|http://en.wikipedia.org/wiki/Phi#Computing>,
3325             or the L
3326             between French/English Apla=Didot/Porson's approaches|http://www.greekfontsociety.gr/pages/en_typefaces19th.html>.)
3327            
3328             =head2 The console font configuration
3329            
3330             According to L, it is controlled by Registry hive
3331            
3332             HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Console\TrueTypeFont
3333            
3334             The key C<0> usually gives C, and the key C<00>
3335             gives C. Adding random numbers does not work; however,
3336             if one adds one more zero (at least when adding to a sequence of zeros),
3337             one can add more fonts.
3338             You need to export this hive (e.g., use
3339            
3340             reg export "HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Console\TrueTypeFont" console-ttf.reg
3341            
3342             ), save a copy (so you can always restore if the love goes sour)
3343             then edit the resulting file.
3344            
3345             So if the maximal key with 0s is C<00>, add one extra row with an extra 0
3346             at end, and the family name of your font. The "family name" is what the Font
3347             list in C shows for I (a "stacked" icon is shown);
3348             for individual fonts the weight (Regular, Book, Bold etc) is appended. So I add a line
3349            
3350             "000"="DejaVu Sans Mono"
3351            
3352             the result is (omitting Far Eastern fonts)
3353            
3354             Windows Registry Editor Version 5.00
3355            
3356             [HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Console\TrueTypeFont]
3357             "949"="..."
3358             "0"="Lucida Console"
3359             "950"="..."
3360             "932"="..."
3361             "936"="..."
3362             "00"="Consolas"
3363             "000"="DejaVu Sans Mono"
3364            
3365             The full file is in F. After importing this
3366             file via F (or give it as parameter to F; both require administrative priviledges)
3367             the font is immediately available in menu. (However, it does not work in "existing"
3368             console windows, only in newly created windows.)
3369            
3370             B<(Do not use the example file directly. First inspect the hive exported on your system,
3371             and find the number of 0s to use. Then add a new line with correct number of
3372             zeros - as a value, one can use the string above. This will I the defaults
3373             of your setup.> Keep in mind that
3374             selection-by-fontfamily is buggy: if you have more than one version of the font
3375             in different weight, it is a Russian Rullette which one of them will be taken
3376             (at least for DejaVu, which uses C as the default weight). First install
3377             the "normal" flavor of the font, then do as above (so the system has no way of picking
3378             the wrong flavor!), and only after this install the remaining
3379             flavors.
3380            
3381             B keep in mind that I distribute a good-for-console L<“merge” of two
3382             fonts|http://ilyaz.org/software/fonts/>: C; C brings
3383             in nicely shaped nicely-scalable
3384             glyphs, and C brings a scalable font with complete coverage of BMP (as of 2015, of Unicode C).
3385             (We omit Han/Hangul since it does not fit in a narrow box of a console font.
3386             (As of 2015, it does not include U+30fb since apparently, this breaks display of
3387             "undefined" character in PUA in Windows' console.)
3388            
3389             B the string to put into C is the I of the font.
3390             The family name is what is shown in the C list of the C — but only
3391             for families with more than one font; otherwise the “metric name” of the font is appended.
3392            
3393             On Windows, it is tricky to find the family name using the default Windows' tools, without
3394             inspecting the font in a font editor. One workaround is to select the font in C
3395             application, then inspect C via:
3396            
3397             reg export HKCU\Software\Microsoft\CharMap character-map-font.reg
3398            
3399             Note: the mentioned above MicroSoft KB article lists the wrong way to find the family name.
3400             What is visible in the C dialogue of the font, and in C is the
3401             I. Fortunately, quite often the full name and the family name coincide —
3402             this is what happened with C. To find the "Full name" of the font, one can look into the hive
3403            
3404             HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Fonts
3405             reg export "HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Fonts" fonts.reg
3406            
3407             For example, after installing C, I see
3408             C as a key in this hive.
3409            
3410             B for desktop icons coming from the “Public” user (“shared”
3411             icons) which start a console application, the default font is not directly editable.
3412             To reset it, one must:
3413            
3414             =over
3415            
3416             =item *
3417            
3418             copy the F<.lnk> icon file to “your” desktop directory;
3419            
3420             =item *
3421            
3422             start the application using the “new” icon;
3423            
3424             =item *
3425            
3426             change the font via “Properties” of the window's menu;
3427            
3428             =item *
3429            
3430             as administrator, copy the F<.lnk> file back to the F
3431             directory (usually in something like F). Manually refresh
3432             the desktop. Verify that the “old” icon works as expected.
3433             (Now you can remove the “new” icon created on the first step.)
3434            
3435             =back
3436            
3437             =head2 There is no way to show Unicode contents on Windows
3438            
3439             Until Firefox C, one could use FireFox to show arbitrary
3440             Unicode text (limited only by which fonts are installed on your
3441             system). If you upgraded to a newer version, there is no (AFAIK)
3442             Windows program (for general public consumption) which would visualize
3443             Unicode text. The applications are limited either (in the worst case) by
3444             the characters supported by the currently selected font, or (in the best
3445             case) they can show additionally characters, but only those considered by the
3446             system as "important enough" (coming from a few of default fonts?).
3447            
3448             There is a workaround for this major problem in FireFox (present at least
3449             up to C). The problem is caused
3450             by L
3451             which blatantly saves a few seconds of load time for a tiny minority of
3452             users, the price being an unability to show Unicode I
3453             (compare with comments L<33|https://bugzilla.mozilla.org/show_bug.cgi?id=705594#c33>
3454             and L<75|https://bugzilla.mozilla.org/show_bug.cgi?id=705594#c75> on the bug report above).
3455            
3456             It is not documented, but this action is controlled by C
3457             setting C. To enable Unicode,
3458             make this setting into C (if you have it in the list as C, double-clicking it would
3459             do this — do search to determine this; otherwise you need to create a new
3460             C entry).
3461            
3462             There is an alternative/additional way to enable extra fonts; it makes
3463             sense if you know a few character-rich fonts present on your system. The (undocumented)
3464             settings C (apparently) control fallback fonts for situations
3465             when a suitable font cannot be found via more specific settings. For example, when
3466             you installed (free) L,
3467             L, L fonts on your system, you may set (these
3468             variables are not present by default; you need to create new C variables):
3469            
3470             font.name-list.sans-serif.x-unicode DejaVu Sans,Symbola,DejaVu Serif,DejaVu Sans Mono,Junicode,Unifont Smooth
3471             font.name-list.serif.x-unicode DejaVu Serif,Symbola,Junicode,DejaVu Sans,Symbola,DejaVu Sans Mono,Unifont Smooth
3472             font.name-list.cursive.x-unicode Junicode,Symbola,DejaVu Sans,DejaVu Serif,DejaVu Sans Mono,Unifont Smooth
3473             font.name-list.monospace.x-unicode DejaVu Sans Mono,DejaVu Sans,Symbola,DejaVu Serif,Junicode,Unifont Smooth
3474            
3475             And maybe also L
3476            
3477             font.name-list.fantasy.x-unicode Symbola,DejaVu Serif,Junicode,DejaVu Sans Mono,DejaVu Sans,Unifont Smooth
3478            
3479             (Above, we use the L||http://ilyaz.org/software/fonts/>
3480             as the font of last resort. Although the glyphs are very coarse, in this role
3481             it is very useful since it contains all the Unicode C characters in BMP.
3482            
3483             B L of C
3484             contains “fake” glyphs for characters not supported by the font. Such a design error is unexcusable for a TrueType font; this gets
3485             in the way when an application tries to find the best way to show a character. Using
3486             (non-C variant of) my “C” re-build not only fixes this (and some others) problems,
3487             but also makes the font nicely scalable — the original works well only in the size 16px.
3488            
3489             If you set both: the C variables with rich enough fonts,
3490             B C,
3491             then you may have the best of both worlds: the situation when a character cannot
3492             be shown via C settings will be extremely rare, so the possiblity of delay
3493             due to C is irrelevant.
3494            
3495             =head2 Firefox misinterprets keypresses
3496            
3497             =over 4
3498            
3499             =item *
3500            
3501             Multiple prefix keys are not supported.
3502            
3503             =item *
3504            
3505             C and C are recognized as a character-generating
3506             keypress (good!), but the character they produce bears little relationship
3507             to what keyboard produces. (In our examples, the character may be available
3508             only via multiple prefix keys!)
3509            
3510             =item *
3511            
3512             After a prefix key, C is not recognized as a
3513             character-generating key.
3514            
3515             =item *
3516            
3517             C is not recognized as a character-generating key.
3518            
3519             =item *
3520            
3521             C is not recognized as a character-generating key sequence (recall
3522             that C should be pressed all the time, and other keys C<+ HEXDIGITS> should be
3523             pressed+released sequentially).
3524            
3525             =item *
3526            
3527             When keyboard has an “extra” modifier key in addition to C (an
3528             analogue of C key), combining it with C or with C is interpreted
3529             by Firefox as if only C or C were pressed.
3530            
3531             =item *
3532            
3533             When keyboard generates different characters on C than on C
3534             (possible with assigning extra modifier bits to C), FireFox interprets any
3535             C as if it were C.
3536            
3537             C when C produces a character, this character is understood
3538             correctly by FF. Same for C (but again, while this works on numeric
3539             keypad, it is still buggy if C is on, or if the key is C.)
3540            
3541             =item *
3542            
3543             The keyboard may have C which produces the same characters as C, but
3544             which behaves differently when combined with other keys. FireFox ignores these
3545             differences.
3546            
3547             This is combinable with other remarks above: e.g., C is interpreted
3548             by FireFox as C.
3549            
3550             =item *
3551            
3552             In addition to this, Firefox replaces C and C modifiers by
3553             an I: Firefox pretends that I C is down. (Here
3554             C is a fake key C which Window pretends is down when either one
3555             of C or C is down.) Since the situation when C
3556             is down, but neither C nor C are down is not possible, this
3557             may access parts of the keyboard layout not visible to other applications.
3558             (Same for C and C.)
3559            
3560             The net effect is that key combinations involving C or C keys
3561             may behave wrong in Firefox. For example, with version C<0.63> of
3562             L, C and C
3563             are ignored on character-producing keys.
3564            
3565             =item *
3566            
3567             If C produces C< — > (this is C), and
3568             C produces the “cedilla deadkey”, then pressing C
3569             acts as both: first C are inserted, then C<ç>.
3570            
3571             =item *
3572            
3573             A subtle variation of the previous failure mode: If C produces
3574             deadkey X, and C produces the deadkey Y, then combining C
3575             with C gives the expected Y*a combination. However, if combining with
3576             something more complicated (C or C), with what
3577             deadkey Y is not combinable, B the bugs strike:
3578            
3579             =over 4
3580            
3581             =item 1
3582            
3583             in the first case the deadkey behaves as X: it produces a pair of characters
3584             C; here C produces C<α>. (Keep in mind that inserting two
3585             characters is the expected behaviour outside of Firefox, but Firefox usually
3586             “eats” an undefined deadkey combination; and note that it is X, not the
3587             expected Y!).
3588            
3589             =item 2
3590            
3591             in the second case it produces only the character C<ф> generated by C. Here
3592             the behaviour is neither as outside Firefox (where it would produce C) nor as
3593             usual in Firefox (where it would eat the undefined sequence).
3594            
3595             =back
3596            
3597             =back
3598            
3599             Of these problems, C has only C one, but a very cursory inspection shows
3600             other problems: C are not recognized as character-generating keys. (And IE9 just
3601             crashes in most of these situations…)
3602            
3603             =head2 C-keypresses triggering some actions
3604            
3605             For example, newer versions of windows have graphics driver reacting on Cs by
3606             rotating the screen. Usually, when you know which application is stealing your keypresses, one
3607             can find a way to disable or reconfigure this action.
3608            
3609             For screen rotation: Right-Click on desktop, “Graphics Options”, “Hot Keys”, disable. The way to
3610             reconfigure this is to use “Graphics Properties” instead of “Graphics Options” (but this may depend
3611             on your graphics subsystem).
3612            
3613             =head2 C-keypresses going nowhere
3614            
3615             Some C-keypresses do not result in the corresponding letter on
3616             keyboard being inserted. It looks like they are stolen by some system-wide
3617             hotkeys. See:
3618            
3619             http://www.kbdedit.com/manual/ex13_replacing_altgr_with_kana.html
3620            
3621             If these keypresses would perform some action, one might be able to deduce
3622             how to disable the hotkeys. So the real problem comes when the keypress
3623             is silently dropped.
3624            
3625             I found out one scenario how this might happen, and how to fix this particular
3626             situation. (Unfortunately, it did not fix what I see, when C [but not
3627             C] is stolen.) Installing a shortcut, one can associate a hotkey to
3628             the shortcut. Unfortunately, the UI allows (and encourages!) hotkeys of the
3629             form (which are equivalent to C) - instead
3630             of safe combinations like C or
3631             C (which — by convention — are ignored by keyboard drivers, and do not generate
3632             characters). If/when an application linked to by this shortcut is
3633             gone, the hotkey remains, but now it does nothing (no warning or dialogue comes).
3634            
3635             If the shortcut is installed in one of "standard places", one can find it.
3636             Save this to F (replace F by the suitable drive letter
3637             here and below)
3638            
3639             on error resume next
3640             set WshShell = WScript.CreateObject("WScript.Shell")
3641             Dim A
3642             Dim Ag
3643             Set Ag=Wscript.Arguments
3644             If Ag.Count > 0 then
3645             For x = 0 to Ag.Count -1
3646             A = A & Ag(x)
3647             Next
3648             End If
3649             Set FSO = CreateObject("Scripting.FileSystemObject")
3650             f=FSO.GetFile(A)
3651             set lnk = WshShell.CreateShortcut(A)
3652             If lnk.hotkey <> "" then
3653             msgbox A & vbcrlf & lnk.hotkey
3654             End If
3655            
3656             Save this to F
3657            
3658             set findhotkey=k:\findhotkey
3659             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3660             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3661             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3662             cd /d %UserProfile%\desktop
3663             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3664             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3665             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3666             cd /d %AllUsersProfile%\desktop
3667             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3668             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3669             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3670             cd /d %UserProfile%\Start Menu
3671             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3672             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3673             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3674             cd /d %AllUsersProfile%\Start Menu
3675             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3676             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3677             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3678             cd /d %APPDATA%
3679             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3680             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3681             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3682             cd /d %HOMEDRIVE%%HOMEPATH%
3683             for /r %%A in (*.lnk) do %findhotkey%.vbs "%%A"
3684             for /r %%A in (*.pif) do %findhotkey%.vbs "%%A"
3685             for /r %%A in (*.url) do %findhotkey%.vbs "%%A"
3686            
3687             (In most situations, only the section after the last C is important;
3688             in my configuration all the "interesting" stuff is in C<%APPDATA%>. Running
3689             this should find all shortcuts which define hot keys.
3690            
3691             Run the cmd file. Repeat in the "All users"/"Public" directory. It should
3692             show a dialogue for every shortcut with a hotkey it finds. (But, as I said,
3693             it did not fix I problem: C works in F test window,
3694             and nowhere else I tried...)
3695            
3696             =head2 C-keypresses starting bloatware applications
3697            
3698             (Seen on IdeaPad.) Some pre-installed programs may steal C-keypresses;
3699             it may be hard to understand what is the name of the application even when
3700             the stealing results in user-visible changes.
3701            
3702             One way to deal with it is to start C in C (or
3703             C
) panel, and click on CPU column until one gets decreasing-order
3704             of CPU percentage. Then one can try to detect which process is becoming
3705             active by watching top rows when the action happens (or when one manages to
3706             get back to the desktop from the full-screen bloatware); one may need to
3707             repeat triggering this action several times in a row. After you know
3708             the name of executable, you can google to find out how to disable it, and/or
3709             whether it is safe to kill this process.
3710            
3711             B On IdeaPad, it was F (safe to kill). It was stealing
3712             C and C.
3713            
3714             B On MSI, a similar stealer was F (some claim it is used to show on-screen
3715             animation when special laptop keys are pressed; if you do not need them, it is safe
3716             to kill). It was stealing C. (But to find I one, I needed to
3717             kill all suspicious apps one by one…)
3718            
3719             =back
3720            
3721             =head1 WINDOWS GOTCHAS for keyboard developers using MSKLC
3722            
3723             =head2 Several similar F created keyboards may confuse the system
3724            
3725             Apparently, the system may get majorly confused when the C
3726             of the project gets changed without changing the DLL (=project) name.
3727            
3728             (Tested only with Win7 and the name in the DESCRIPTIONS section
3729             coinciding with the name on the KBD line - both in F<*.klc> file.)
3730            
3731             The symptoms: I know how one can get 4 different lists of keyboards:
3732            
3733             =over 4
3734            
3735             =item 1
3736            
3737             Click on the keyboard icon in the C - usually shown
3738             on the toolbar; positioned to the right of the language code EN/RU
3739             etc (keyboard icon is not shown if only one keyboard is associated
3740             to the current language).
3741            
3742             =item
3743            
3744             Go to the C settings (e.g., right-click on the
3745             Language bar, Settings, General.
3746            
3747             =item
3748            
3749             on this C page, press C button, go to the language
3750             in question.
3751            
3752             =item
3753            
3754             Check the F<.klc> files for recently installed Input Languages.
3755            
3756             =item
3757            
3758             In MS Keyboard Layout Creator, go to C
3759             list.
3760            
3761             =back
3762            
3763             It looks like the first 4 get in sync if one deletes all related keyboards,
3764             then installs the necessary subset. I do not know how to fix 5 - MSKLC
3765             continues to show the old name for this project.
3766            
3767             Another symptom: Current language indicator (like C) on the language
3768             bar disappears. (Reboot time?)
3769            
3770             Is it related to C<***\Local Settings\MuiCache\***> hive???
3771            
3772             Possible workaround: manually remove the entry in C
3773             (the last 4 digits match the codepage in the F<.klc> file).
3774            
3775             =head2 Too long description (or funny characters in description?)
3776            
3777             If the name in the C section is too long, the name shown in
3778             the list C<2> above may be empty.
3779            
3780             (Checked only on Win7 and when the name in the DESCRIPTIONS section
3781             coincides with the name on the C line - both in F<*.klc> file.
3782             Length=63 works fine, Length=64 triggers the bug.)
3783            
3784             (Fixed by shortening the name [but see
3785             L<"Several similar F created keyboards may confuse the system">
3786             above!], so maybe it was
3787             not the length but some particular character (C<+>?) which was confusing
3788             the system. (I saw a report on F bug when description had apostroph
3789             character C<'>.)
3790            
3791             =head2 F ruins names of dead key when reading a F<.klc>
3792            
3793             When reading a F<.klc> file, MS Keyboard Layout Creator may ruin the names
3794             of dead keys. Symptom: open the dialogue for a dead key mapping
3795             (click the key, check that C has checkmark, click on the
3796             C<...> button near the C checkbox); then the name (the first
3797             entry field) contains some junk. (Looks like a long ASCII string
3798            
3799             U+0030 U+0030 U+0061 U+0039
3800            
3801             .)
3802            
3803             B if all one needs is to compile a F<.klc>, one can run
3804             F directly.
3805            
3806             B correct ALL these names manually in MSKLC. If the names are
3807             the Unicode name for the dead character, just click the C button
3808             near the entry field. Do this for ALL the dead keys in all the registers
3809             (including C!). If C is not made "semantically meaningful",
3810             there are 6 views of the keyboard (C
3811             AltGr, AltGr+Shift>) - check them all for grayed out keys (=deadkeys).
3812            
3813             Check for success: C, use a temporary name.
3814             Inspect near the end of the generated F<.klc> file. If OK, you can
3815             go to the Project/Build menu. (Likewise, this way lets you find which
3816             deadkey's names need to be fixed.)
3817            
3818             !!! This is time-consuming !!! Make sure that I things are OK
3819             before you do this (by C, C).
3820            
3821             BTW: It might be that this is cosmetic only. I do not know any bad
3822             effect - but I did not try to use any tool with visual feedback on
3823             the currently active sub-layout of keyboard.
3824            
3825             =head2 Double bug in F with dead characters above 0x0fff
3826            
3827             This line in F<.klc> file is treated correctly by F's builtin keyboard tester:
3828            
3829             39 SPACE 0 0020 00a0@ 0020 2009@ 200a@ // ,  , ,  ,   // SPACE, NO-BREAK SPACE, SPACE, THIN SPACE, HAIR SPACE
3830            
3831             However, via F it produces the following two bugs:
3832            
3833             static ALLOC_SECTION_LDATA MODIFIERS CharModifiers = {
3834             &aVkToBits[0],
3835             7,
3836             {
3837             // Modification# // Keys Pressed
3838             // ============= // =============
3839             0, //
3840             1, // Shift
3841             2, // Control
3842             SHFT_INVALID, // Shift + Control
3843             SHFT_INVALID, // Menu
3844             SHFT_INVALID, // Shift + Menu
3845             3, // Control + Menu
3846             4 // Shift + Control + Menu
3847             }
3848             };
3849             .....................................
3850             {VK_SPACE ,0 ,' ' ,WCH_DEAD ,' ' ,WCH_LGTR ,WCH_LGTR },
3851             {0xff ,0 ,WCH_NONE ,0x00a0 ,WCH_NONE ,WCH_NONE ,WCH_NONE },
3852             .....................................
3853             static ALLOC_SECTION_LDATA LIGATURE2 aLigature[] = {
3854             {VK_SPACE ,6 ,0x2009 ,0x2009 },
3855             {VK_SPACE ,7 ,0x200a ,0x200a },
3856            
3857             Essentially, C<2009@ 200a@> produce C (= multiple 16-bit chars)
3858             instead of deadkeys. Moreover, these ligatures are put on non-existing
3859             "modifications" 6, 7 (the maximal modification defined is 4; so the code uses
3860             the C flags instead of "modification number" in
3861             the ligatures table.
3862            
3863             =head2 F keyboards handle C, C , C and C differently than US keyboard
3864            
3865             The US keyboard produces (as the
3866             “string value”) the corresponding Control-letter when
3867             C is pressed. (In console applications,
3868             C<\x00> is not visible.) F does not reproduces this
3869             behaviour. This may break an application if
3870             it was not specifically tested with “complicated” keyboards.
3871            
3872             The only way to fix this from the “naive” keyboard
3873             layout DLL (i.e., the kind that F generates) which I found is to
3874             explicitly include C as a handled combination, and return
3875             C on such keypresses. (This is enabled in the generated
3876             keyboards generated by this module - not customizable in v0.12.)
3877            
3878             =head2 "There was a problem loading the file" from F
3879            
3880             Make line endings in F<.klc> DOSish.
3881            
3882             =head2 C do not work
3883            
3884             Make line endings in F<.klc> DOSish (when given as input to F -
3885             it gives no error messages, and deadkeys work [?!]).
3886            
3887             =head2 Error 2011 (ooo-us, line 33): There are not enough columns in the layout list.
3888            
3889             The maximal line end of F is exceeded (a line or two ahead). Try remoing
3890             inline comments. If helps, change he workflow to cut off long lines (250 bytes is OK).
3891            
3892             =head2 C
3893            
3894            
3895            
3896             from F. This means that the internal table of virtual keys
3897             mapped to non-C (sic!) scancodes is overloaded.
3898            
3899             Time to switch to direct generation of F<.c> file? Or you need to
3900             triage the “added” virtual keys, and decide which are less important
3901             so you can delete them from the F<.klc> file.
3902            
3903             =head2 Only the first 8 with-modifiers columns are processed by F
3904            
3905             Time to switch to direct generation of F<.c> file?
3906            
3907             =head2 Only the first digit of the which-modifier-column is output by F in C
3908            
3909             Time to switch to direct generation of F<.c> file?
3910            
3911             =head2 F produces C section with meaningless entries for prefix keys C<0x08>, C<0x0A>, C<0x0D>
3912            
3913             These entries do not stop keyboard from working. They look like C...
3914            
3915             Time to switch to direct generation of F<.c> file?
3916            
3917             =head2 It is not clear how to compile F<.C> files emitted by F
3918            
3919             This distribution includes a script F which can do this. It is
3920             inspired by
3921            
3922             http://stackoverflow.com/questions/3360746/how-can-i-compile-programmer-dvorak
3923             http://levicki.net/articles/tips/2006/09/29/HOWTO_Build_keyboard_layouts_for_Windows_x64.php
3924            
3925             It allows us to build using the cycle
3926            
3927             =over 4
3928            
3929             =item *
3930            
3931             Build skeleton F<.klc> file.
3932            
3933             =item *
3934            
3935             Convert to B using F.
3936            
3937             =item *
3938            
3939             Patch against bugs in F.
3940            
3941             =item *
3942            
3943             Patch in features not supported by F.
3944            
3945             =item *
3946            
3947             Compile and link DLLs.
3948            
3949             =back
3950            
3951             (This assumes that the installer was already built by F using a
3952             “simplified-to-nothing” F<.klc> file which does not trigger the F bugs).
3953            
3954             (See also L.)
3955            
3956             =head2 F cannot ignore column=15 of the keybinding definition table
3957            
3958             (Compare with L<"Windows ignores column=15 of the keybinding definition table">.)
3959            
3960             F requires that all the columns are associated to a modifier-bitmap.
3961             But column=15 should not be associated to any.
3962            
3963             The workaround is to associate it to the bitmap which should not be bound to any
3964             column (like C<4=KBDALT>). In the output C<.C> file, one would have 15 instead
3965             of C for the bitmap 4, but C is defined to be 15 anyway…
3966            
3967             =head2 F ignores bits above 0x20 in the modification columns descriptor
3968            
3969             Time to switch to direct generation of F<.C> files?
3970            
3971             =head2 F cannot assign more than one bitmask to a modification column
3972            
3973             Time to switch to direct generation of F<.C> files?
3974            
3975             (Quite often, one combination of modifiers should produce the same characters as
3976             another one. The format of keyboard layout tables allows them to share a
3977             modification column. The format of F<.klc> files does not allow sharing.)
3978            
3979             =head2 F forgets to emit C/6/8
3980            
3981             If the F<.klc> file has many modification columns, the emitted aVkToWcharTable
3982             contains only C/2.
3983            
3984             =head2 F confuses LIGATURES on unusual keys
3985            
3986             For example, C may be replaced by C in the LIGATURES table.
3987            
3988             Time to switch to direct generation of F<.C> files?
3989            
3990             =head2 F places C at end of the generated F<.c> file
3991            
3992             The offset of this structure should be no more than 0x10000. Thus keyboards
3993             with large tables of prefixed keys may fail to load. This may be related to
3994             the bug L<"If data in C takes too much space, keyboard is mis-installed, and “Language Bar” goes crazy">.
3995            
3996             Time to switch to direct generation of F<.C> files?
3997            
3998             =head2 Error "the required resource DATABASE is missing" from F
3999            
4000             The localized C in F<.klc> file contains a character outside of
4001             the repertoir of the codepage in question. Removing offending characters, or
4002             removing the C altogether should fix this. (But either way, the name of
4003             layout in the C of the Language Bar may become empty.) Having a
4004             different localized description has a side effect that the name of the layout
4005             shown in the Language Bar popups is localized.
4006            
4007             (The localized description is what put into the C of the
4008             DLL file; it is this resource which is mentioned in the registry. (There
4009             will be no such resource when the localized C is missing.)
4010            
4011             (The failure of F is not reproducible after a reboot!)
4012            
4013             Apparently, this has nothing to do with the length, so the (older) conjectures
4014             below are wrong (although the F<.RC> file generated by MSKLC has the [non-localized] name
4015             truncated after 40 chars in the field C — but not in other fields):
4016            
4017             It looks like there is a buffer overflow in MSKLC, and sometimes the generated
4018             F in the install package would just exit with this error. The
4019             apparent reason is the length of the C-like fields.
4020            
4021             Workaround: it looks like the C field is not used in F.
4022             So generate an “extra dummied” F<.klc> file I (with shortened descriptions),
4023             make an install package from it, and mix the F from the “extra
4024             dummied” variant with the rest of the install package from a
4025             “less dummied” F<.klc> file.
4026            
4027             The alternative is to get rid of F completely, and ask users
4028             to run the appropriate F<.msi> file from the install package by hand
4029             (choosing basing on 32-bit vs 64-bit architecture).
4030            
4031             =head2 Summary of the productive workflow with F<.klc>:
4032            
4033             If direct generation of F<.C> files is out of question, the following workflow
4034             may be used (some of these steps may be omitted depending on how complicated
4035             your F<.klc> layout is; for practical implementation, see
4036             L creation|http://cpansearch.perl.org/src/ILYAZ/UI-KeyboardLayout/examples/build-iz.pl>
4037             and L to F<.dll>
4038             processing|http://cpansearch.perl.org/src/ILYAZ/UI-KeyboardLayout/examples/build_here.cmd>):
4039            
4040             =over 4
4041            
4042             =item
4043            
4044             Make an “extra dummied” F<.klc> (short descriptions, short dummy C,
4045             C, C, C sections, no C section). Run
4046             it through GUI MSKLC (C, then C, C).
4047             Store the generated F, rename the directory.
4048            
4049             =item
4050            
4051             Make a “less dummied” F<.klc> file (as above, but with the correct description).
4052             Do as above, and mix in the F from the previous step.
4053            
4054             =item
4055            
4056             Run the “real” F<.klc> file through the F CLI. Fix errors in the
4057             generated F<.C> and F<.H> files (using scripts and patches if needed).
4058            
4059             (One may need to remove a few lines in the C section to avoid buffer overflows too.)
4060            
4061             =item
4062            
4063             Compile the fixed F<.C> files. (One may need to split them in two to
4064             decrease the offset of the static table in the DLL to the level
4065             Windows can handle: less than 64K.) Mix the generated F<.dll> files
4066             with the install package made above.
4067            
4068             =back
4069            
4070             =head1 WINDOWS GOTCHAS for application developers (problems in kernel)
4071            
4072             =head2 Many applications need to know the state of hidden flag C
4073            
4074             To decide what to do with a keypress, an application may need to know
4075             whether C is enabled in the keyboard (in other words, if
4076             C is faked when C is pressed). For example, when
4077             the kernel processes accelerators, it would not trigger C
4078             if C was pressed with C in the presence of this flag — even
4079             though C I visible as being pressed (one needs to press
4080             C).
4081            
4082             An application with configurable bindings may need to emulate this action
4083             of TranslateMessage(). One of the ways to do this may be to do (when
4084             C and C are down)
4085            
4086             =over 4
4087            
4088             =item *
4089            
4090             Set a global flag disabling processing of C in the application;
4091            
4092             =item *
4093            
4094             Call TranslateAccelerator() with an improbably virtual key (C or
4095             some such) and appropriate ad hoc translation table;
4096            
4097             =item *
4098            
4099             Check whether accelerator was recognized (if so, C is not enabled).
4100            
4101             =back
4102            
4103             Possible problems with this approach: the “improbable key” should better not
4104             trigger some system accelerator (this is why one should not use “ordinary”
4105             keys). Additionally, some system accelerators react on Windows key as a
4106             modifier; so acceleration table may specify this as a certain flag. This
4107             would imply that the algorithm above may not work when C key is
4108             down. (Not tested.)
4109            
4110             (Or maybe these C bindings are not accelerators, and are
4111             processed in a different part of keyboard input events. — Then there is
4112             little to worry about.)
4113            
4114             =head1 WINDOWS GOTCHAS for keyboard developers (problems in kernel)
4115            
4116             =head2 It is hard to understand what a keyboard really does
4117            
4118             To inspect the output of the keyboard in the console mode (may be 8-bit,
4119             depending on how Perl is compiled), one can run
4120            
4121             perl -MWin32::Console -wle 0 || cpan install Win32::Console
4122             perl -we "sub mode2s($){my $in = shift; my @o; $in & (1<<$_) and push @o, (qw(rAlt lAlt rCtrl lCtrl Shft NumL ScrL CapL Enh ? ??))[$_] for 0..10; qq(@o)} use Win32::Console; my $c = Win32::Console->new( STD_INPUT_HANDLE); my @k = qw(T down rep vkey vscan ch ctrl); for (1..20) {my @in = $c->Input; print qq($k[$_]=), ($in[$_] < 0 ? $in[$_] + 256 : $in[$_]), q(; ) for 0..$#in; print(@in ? mode2s $in[-1] : q(empty)); print qq(\n)}"
4123            
4124             This installs Win32::Console module (if needed; included with ActiveState Perl)
4125             then reports 20 following console events (press and keep C key
4126             to exit by generating a “harmless” chain of events). B the reported
4127             input character is not processed (via ToUnicode(); hence chained keys and
4128             multiple chars per key are reported only as low-level), and is reported as
4129             a signed 8-bit integer (so the report for above-8bit characters is
4130             completely meaningless).
4131            
4132             T=1; down=1; rep=1; vkey=65; vscan=30; ch=240; ctrl=9; rAlt lCtrl
4133             T=1; down=0; rep=1; vkey=65; vscan=30; ch=240; ctrl=9; rAlt lCtrl
4134            
4135             This reports single (T=1) events for keypress/keyrelease (down=1/0) of
4136             C. One can see that C generates C modifiers
4137             (this is just a transcription of C,
4138             that C is on virtual key 65 (this is C) with virtual scancode
4139             30, and that the generated character (it was C<æ>) is C<240>.
4140            
4141             The character is approximated to the current codepage. For example, this is
4142             C entering C<β = U+03b2> in codepage C:
4143            
4144             T=1; down=1; rep=1; vkey=66; vscan=48; ch=223; ctrl=0;
4145             T=1; down=0; rep=1; vkey=66; vscan=48; ch=223; ctrl=0;
4146            
4147             Note that C<223 = 0xDF>, and C. So I is substituted by
4148             I.
4149            
4150             There is also a script F in this distribution
4151             which does a little
4152             bit more than this. One can also give this script the argument C (or C,
4153             where C is the 0-based number among the listed keyboard layouts) to report
4154             ToUnicode() results, or argument C to report what is produced by reading raw
4155             charactes (as opposed to events) from the console.
4156            
4157             =head2 It is not documented how to make a with-prefix-key(s) combination produce 0-length string
4158            
4159             Use C<0000@> (in F<.klc>), or DEADKEY 0 in a F<.c> file. Explanation: what a prefix key
4160             is doing is making the kernel remember a word (the state of the finite automaton), and not
4161             producing any output character. Having no prefix key corresponds to the state being 0.
4162            
4163             Hence makeing prefix_key=0 is the same as switching the finite automaton to the initial
4164             state, and not producing any character — and this exactly what is requested in the question.
4165            
4166             =head2 If data in C takes too much space, keyboard is mis-installed, and “Language Bar” goes crazy
4167            
4168             Installation reports success, the keyboard appears in the list in the Language Bar's "Settings".
4169             But the keyboard is not listed in the menu of the Language Bar itself. (This is not fixed
4170             by a reboot.)
4171            
4172             Deinstalling (by F's installer) in such a case removes one (apparently, the last) of the listed keyboards for the language;
4173             at least it is removed from the menu of the Language Bar itself. However, the list in the “Settings”
4174             does not change! One can't restore the (wrongly) removed (unrelated!) layout by manipulating the latter list.
4175             (I did not try to check what will happen if only one keyboard for the language is available — is it removed
4176             for good?) I condition is fixed by a reboot: the “missing” “unrelated” layout jumps to existence.
4177            
4178             I did not find a way to restore the deleted keyboard layout (without a reboot). Experimenting with these is kinda painful:
4179             with each failure,
4180             I add one extra keyboard to the list in the “Settings”; - so the list is growing and growing! [Better
4181             add useless-to-you keyboards, since until the reboot you will never be able to install them again.]
4182            
4183             B this condition reappeared in update from v0.61 to v0.63 of B layouts. Between
4184             these versions, there was
4185             a very small increment of the size: one modification column was added, and two deadkeys were added.
4186             Removing a bunch of (useless?) dead keys descriptions fixed this again; but now I have my doubts on
4187             whether it was due to I increasing the size of C… Maybe it is due to the total
4188             size of certain segments in the DLL.
4189            
4190             (This may be related to the bug L<"F places C at end of the generated F<.c> file">.)
4191            
4192             =head2 Windows ignores column=15 of the keybinding definition table
4193            
4194             Note that 15 is C; this column number is used to indicate that
4195             this particular combination of modifiers does not produce keys. In particular,
4196             the generator must avoid this column number.
4197            
4198             Workaround: put junk into this column, and use different columns for useful modifier
4199             combinations. The mapping from modifiers to columns should not be necessarily 1-to-1.
4200             (But see L<"F cannot ignore column=15 of the keybinding definition table">.)
4201            
4202             =head2 Windows combines modifier bitmaps for C, C and C on C
4203            
4204             (At least when C is special in the keyboard,) the modifier bitmap bound to this
4205             key is actually bit-or of bitmaps above. Essentially, this prohibits assigning
4206             interesting flag combinations to C.
4207            
4208             The (very limited) workaround is to ensure that the flags one puts on C contain
4209             all the flags assigned to the above VK codes. (This does not change anything, but
4210             at least makes the assignments less confusing for human inspection.)
4211            
4212             =head2 Windows ignores C if its modifier bitmaps is not standard
4213            
4214             Adding C to C disables console sending non-modified char on keydown.
4215             Together with the previous problem, this looks like essentially prohibiting
4216             putting interesting bitmaps on the left modifier keys.
4217            
4218             Workaround: one can add C on C. It looks like the combination
4219             C is compatible with Windows' handling of C (both in console,
4220             and for accessing/highlighting the menu entries). (However, since only C
4221             is going to be stripped for handling of C, the modification column for
4222             C should duplicate the modification column for no-C-flags. Same with
4223             C added.)
4224            
4225             =head2 When C produces C, problems in Notepad
4226            
4227             Going to the Save As dialogue in Notepad loses "speciality of AltGr" (it highlights Menu);
4228             one need to switch layouts via LAlt+LShift to restore.
4229            
4230             I do not know any workaround.
4231            
4232             =head2 Console applications cannot detect when a keypress may be interpreted as a “command”
4233            
4234             The typical logic of an (advanced) application is that it interprets certain keypresses
4235             (combinations of keys with modifiers) as “commands”. To do this in presence of user-switchable
4236             keyboards, when it is not known in compile time which key sequences generate characters,
4237             the application must be able to find at runtime which keypresses are characters-generating,
4238             and which are not. The latter keypresses are candidates to be checked whether they should trigger commands
4239             of the application.
4240            
4241             For final keypresses of a character-generating key-sequence, an application gets a notification
4242             from the ReadConsoleEvent() API call that this keypress generates a character. However, for the
4243             keypresses of the sequence which are non the last one (“dead” keys), there is no such notification.
4244            
4245             Therefore, there is no way to avoid dead keys triggering actions in an application. What is the
4246             difference with non-console applications? First of all, they get such a notification (with the
4247             standard TranslateMessage()/DispatchMessage() sequence of API calls, on WM_KEYDOWN, one can
4248             PeekMessage() for WM_SYSDEADCHAR/WM_DEADCHAR and/or WM_SYSCHAR/WM_CHAR). Second, the windowed
4249             application may call ToUnicode(Ex)() to calculate this information itself.
4250            
4251             Well, why a console application cannot use the second method? First, the active keyboard layout
4252             of a console application is the default one. When user switches the keyboard layout of the console,
4253             the application gets no notification of this, and its keyboard layout does not change. This makes
4254             ToUnicode() useless. Moreover, due to
4255             security architecture, the console application cannot query the ID of the thread serving the message
4256             loop of the console, so cannot query GetKeyboardLayout() of this thread. Hence ToUnicodeEx() is
4257             useless too.
4258            
4259             (There may be a lousy workaround: run ToUnicodeEx() on B the installed keyboard layouts, and
4260             check which of them are excluded by comparing with results of ReadConsoleEvent(). Interpret
4261             contradictions as user changing the keyboard layout. Of course, on several keypresses following
4262             a change of keyboard layout one may get unexpected results. And if two similar
4263             keyboards are installed, one may also never get definite answer on which of them is currently active.)
4264            
4265             (To handle this workaround, one must have a way to call ToUnicode() in a way which does not change
4266             the internal state of the keyboard driver. Observe:
4267            
4268             =over 4
4269            
4270             =item *
4271            
4272             Such a way is not documented.
4273            
4274             =item *
4275            
4276             Watch the character reported by ReadConsoleEvent() on the C event for deadkeys. This is
4277             the character which a deadkey would produce if it is pressed twice (and is 0 if pressing it twice
4278             results in a deadkey again). The only explanation for this I can fathom is that the console's
4279             message queue thread calls such a non-disturbing-state version of ToUnicode().
4280            
4281             Why it should be “non-disturbing”? Otherwise it would reset the state “this deadkey was pressed”,
4282             and the following keypress would be interpreted as not preceded by a deadkey. And this is not
4283             what happens. (If one does it with usual ToUnicode() call, DOWN reports a deadkey, but UP reports
4284             “ignored”; to see this, run F with arguments C
4285             with a keyboard which produces ç on C. Here C is the number of the keyboard in the list
4286             of available keyboards reported by C).
4287            
4288             Well, when one I that some API calls are possible, it is just a SMP to find it out
4289             (see F). It turns out that given argument C achieves
4290             the behaviour of a console during KeyUp event. (As a side benefit, it also avoids another
4291             glitch in Windows' keyboard processing: it reports the character value in presence of C
4292             modifier — recall that ToUnicodeEx() ignores C unless C is present too. Well, I
4293             checked this so far only on KeyUp event, where console producess mysterious results.)
4294            
4295             =item *
4296            
4297             However, even without using undocumented flags, it is not hard to construct such a non-disturbing version of ToUnicode(). The only
4298             ingredient needed is a way to reset the state to “no deadkeys pressed” one. Then just store
4299             keypresses/releases from the time the last such state was found, call ToUnicode(), reset state,
4300             and call ToUnicode() again for all the stored keypresses/releases; then update the stored state
4301             appropriately.
4302            
4303             =item *
4304            
4305             But I strongly doubt that console's message loop does anything so advanced. My bet would be that
4306             it uses a non-documented call or non-documented flags. (Especially since the approach above does
4307             not handle C the same way as the console does.)
4308            
4309             =back
4310            
4311             =head2 In console, which combinations of keypresses may deliver characters?
4312            
4313             In addition to the problem outlined in the preceding section, a console application should
4314             better support input of character-by-numeric-code, and of copy-and-pasted strings. Actually,
4315             the second situation, although undocumented, is well-engineered, so let us document these two
4316             here. (These two should better be documented together, since pasting may fake input by
4317             repeated character-by-numeric-code.)
4318            
4319             Pasting happens character-by-character (more precise, by UTF-16 codepoints), but C
4320             would group them together:
4321            
4322             =over 4
4323            
4324             =item *
4325            
4326             When pasting a character present in a keyboard layout with at most C modifier,
4327             a fully correct emulation of a sequence C
4328             is produced (without C if it is not needed). The character (as usual) is delivered
4329             on both C events.
4330            
4331             =item *
4332            
4333             When pasting a character present in a keyboard layout, but needing I modifiers (not
4334             only C), a partial emulation of a certain key tap is produced:
4335             C. The character (as usual) is delivered
4336             on both C events.
4337            
4338             Quirks: first, if C is needed, its press/release are not emulated, but the flags on
4339             the C events indicate presence of a C. Second (by this, the
4340             pasting may be distinguished from “real” keypress), C press/release are not emulated,
4341             but it is indicated as "present" in flags of all 4 events.
4342            
4343             =item *
4344            
4345             When pasting control-characters (available via the C-maps of the layout),
4346             the press/release of C is not emulated (but the flags indicate C downs); however,
4347             if C is needed, its press/release is emulated (and flags for I events do not
4348             have C is down).
4349            
4350             Pasting C delivers only U+000D (CR) — the typical maps have it on C and C<^M>,
4351             and C is delivered.
4352            
4353             =item *
4354            
4355             Otherwise, an emulation of C is sent, with the C delivering a character:
4356             C. The C
4357             are very unusual combinations of scancode/vkey for C<6> and C<3> on the numeric keyboard:
4358             they are delivered as if C (or C) is down, but the flags indicate that
4359             these modifiers are "not present".
4360            
4361             The “honest” C delivers U+003f, which is "C" (as above, it is delivered on release
4362             of C).
4363            
4364             =item *
4365            
4366             In general, entering characters-by-numeric-code (entering the decimal — or “KP+” then hex — while
4367             C is down) produces the resulting character when C is released. Processing this may create
4368             a significant problem for applications which interpret C as “commands” (e.g., if
4369             they interpret C as “word-left”).
4370            
4371             There may several work-arounds. First, usually hex input is much more important than decimal,
4372             and usually, C is not bound to commands. Then the application may ignore characters
4373             delivered on C B the C was immediately followed by the press/release
4374             of C; additionally, it should disable the interpret-as-commands logic while C is down,
4375             and its press was followed by press/release of C.
4376            
4377             Second, it is not crucial to deliver Unicode characters numbered in single-digits. So one may
4378             require that commands are triggered by C only when pressed one-by-one (releasing
4379             C between them), and consider multi-digit presses as input-by-number only.
4380            
4381             Finally, Windows aborts entering character-by-numeric-code if any unexpected key press interferes.
4382             For example, C is “C”, but pressing-releasing C after pressing down C
4383             would not deliver anything. If an application follows the same logic (in reverse!) when recognizing
4384             keypressing resulting in “commands”, the users would have at least a “technical ability” to enter
4385             both commands, I enter characters-by-numeric-code.
4386            
4387             =back
4388            
4389             This is tested I in the situation when a layout has C present, and all the
4390             "with-extra-modifiers" characters are on bitmap entries with C bit marked. This is
4391             a situation with discussed in the section L<"A convenient assignment of C bitmaps to modifier keys">.
4392            
4393             It is plausible that only C, C and C bits in a bitmap returned by C are
4394             acted upon (with C flag added based on C). Some popular keyboard layouts
4395             use C bit on the C key; under this assumption, the characters available via C key
4396             would be delivered with at most C modifier.
4397            
4398             All the emulated events do not have C indicated as "present" in their flags.
4399            
4400             =head2 Behaviour of C vs C
4401            
4402             When both combinations produce characters (say, X and Y), it is not clear
4403             how an application shouild decide whether it got C event (for menu
4404             entry starting with Y), or an C event.
4405            
4406             A partial workaround (if the semantic of the layout fits into the limited number
4407             of bits in the ORed mask): make all the keys which may be combined with
4408             C to have the C bit in the mask set; add some extra bit
4409             to C keys to be able to distinguish them. Then at least the
4410             kernel will produce the correct character on the ToUnicode() call (hence
4411             in TranslateMessage()). [A potential that an application may be confused
4412             is still large.]
4413            
4414             =head2 Customization of what C is doing is very limited
4415            
4416             (See the description of the semantic of C in L<"Keyboard input on Windows, Part II: The semantic of ToUnicode()">.)
4417            
4418             A partial workaround (if the semantic of the layout fits into the limited number
4419             of bits in the ORed mask): make all the modifier combinations (except for the
4420             base layer) to have C and C bits set; add some extra bits to
4421             C keys and C keys (apparently, only C will work with C)
4422             to be able to distinguish them. Then the C flag will affect all
4423             these combinations too.
4424            
4425             =head2 C combination: multiple problems
4426            
4427             First of all, sometimes C is ignored when used with this combination.
4428             (Fixed by reboot. When this happens, C does not work also with combinations
4429             with C and/or C). On the
4430             other hand, C works as intended. (I even got an impression that
4431             sometimes C works when C is active; cannot reproduce this,
4432             though.)
4433            
4434             I suspect this is related to the binding (usually not active) of C to switch between
4435             keyboards of a language. It may have suddently jumped to existence (without my interaction).
4436             Simultaneously, this option disappeared from the UI to change keyboard options
4437             (L in Windows 7). It might be that
4438             press/release of C is filtered out in presence of C? (Looks
4439             like this for C now...)
4440            
4441             (I also saw what looks like C key being stuck in some rare situations — fixed
4442             by pressing it again. Do not know how to reproduce this. It is interesting to
4443             note that one of the bits in the mask of the C key is 0x80, and there is
4444             a define for this bit in F named C — but it is undocumented,
4445             and, judging by names, one might think that C would work in pair with the flag
4446             C of CAttributes>.)
4447            
4448             B Apparently, key up/down for many combinations of C are
4449             not delivered to applications.
4450             Key up/down for C<`/5/6/-/=/Z/X/C/V/M/,/./Enter/rShift> are not delivered here when used with C modifiers
4451             (at least in a console). Adding C does not change this. Same for C
4452             and C (but not for keypad ones!).
4453            
4454             Moreover, when used with C or C, this behaves as if both these
4455             keys were pressed. Same with the pair C and C (is it hardware-dependent???).
4456            
4457             (Time to time C do not work — neither with nor without C.)
4458            
4459             No workarounds are known. Although I could reproduce this on 3 physically different
4460             keyboards, this is, most probably, a design defect of hardware keyboards. Compare with
4461             L and
4462             L.
4463             Another related tidbit: apparently, L
4464             after pressing some modifier keys|http://ccm.net/forum/affich-24692-keyboard-mess-up-after-shift-key-held-too-lon?page=2>
4465            
4466             =head2 C combination: many keys are not delivered to applications
4467            
4468             Apparently, key up/down for many combinations of C are
4469             not delivered to applications.
4470             For example, C and C — neither with nor without C; same
4471             for C (at least in a console). Adding C
4472             does not change this. Same for C.
4473            
4474             No workarounds are known (except that C and C (without C)
4475             may be replaced by C and C).
4476            
4477             B in the bottom row of the keyboard, all the keys (except C) are
4478             either in the list above, or in the list for C modifiers. See also the
4479             references in the discussion of the previous problem (with C).
4480            
4481             =head2 Too long C of the layout is not shown in Language Bar Settings
4482            
4483             (the description is shown in the Language Bar itself). The examples are (behave the same)
4484            
4485             Greek-QWERTY (Pltn) Grn=⇑␣=^ˡⒶˡ-=Lat; Ripe=Ⓐʳ␣=Mnu-=Rus(Phon); Ripe²=Mnu-^ʳ-=Hbr; k.ilyaz.org
4486             US-Intl Grn=⇑␣=^ˡⒶˡ-=Grk; Ripe=Ⓐʳ␣=Mnu-=Rus(Phon); Ripe²=Mnu-^ʳ-=Hbr; k.ilyaz.org
4487            
4488             (Or maybe it is the semicolons in the names???). If this happens, one can still assign
4489             distinctive icons to the layout, and distinguish them via going to C.
4490            
4491             =head1 UNICODE TABLE GOTCHAS
4492            
4493             The position of Unicode consortium is, apparently, that the “name” of
4494             a Unicode character is “just an identifier”. In other words, its
4495             (primary) function is to identify a character uniquely: different
4496             characters should have different names, and that's it. Any other function
4497             is secondary, and “if it works, fine”; if it does not work, tough luck.
4498             If the name does not match how people use the character (and with the
4499             giant pool of defined characters, this has happened a few times), this is not
4500             a reason to abandon the name.
4501            
4502             This position makes the practice of maintaining backward compatibility
4503             easy. There is L.
4504            
4505             However, this module tries to extract a certain amount of I
4506             from the giant heap of characters defined in Unicode; the principal concept
4507             is “a mutator”. Most mutators are defined by programmatic inspection of names
4508             of characters and relations between names of different characters. (In other
4509             words, we base such mutators on names, not glyphs.) Here we
4510             sketch the irregularities uncovered during this process.
4511            
4512             APL symbols with C and C look reverted w.r.t. other
4513             C and C symbols.
4514            
4515             C, C, C, C C, C
4516             are defined with C or C at end, but (may) drop it when combined
4517             with modifiers via C. Likewise for C, C,
4518             C, C, C, C.
4519            
4520             Sometimes opposite happens, and C appears out of blue sky; compare:
4521            
4522             2A18 INTEGRAL WITH TIMES SIGN
4523             2A19 INTEGRAL WITH INTERSECTION
4524            
4525             C I a combination of C with C, but it is not marked as such
4526             in its name.
4527            
4528             Sometimes a name of diacritic (after C) acquires an C at end
4529             (see C).
4530            
4531             Oftentimes the part to the left of C is not resolvable: sometimes it
4532             is underspecified (e.g, just C), sometimes it is overspecified
4533             (e.g., in C), sometime it should be understood
4534             as a glyph-of-written-word (e.g, in C). Sometimes it just
4535             does not exist (e.g., C -
4536             there is C, but not the reversed variant).
4537             Sometimes it is a defined synonym (C).
4538            
4539             Sometimes it has something appended (C).
4540            
4541             Sometimes C is just a clarification (C).
4542            
4543             1 AND
4544             1 ANTENNA
4545             1 ARABIC MATHEMATICAL OPERATOR HAH
4546             1 ARABIC MATHEMATICAL OPERATOR MEEM
4547             1 ARABIC ROUNDED HIGH STOP
4548             1 ARABIC SMALL HIGH LIGATURE ALEF
4549             1 ARABIC SMALL HIGH LIGATURE QAF
4550             1 ARABIC SMALL HIGH LIGATURE SAD
4551             1 BACK
4552             1 BLACK SUN
4553             1 BRIDE
4554             1 BROKEN CIRCLE
4555             1 CIRCLED HORIZONTAL BAR
4556             1 CIRCLED MULTIPLICATION SIGN
4557             1 CLOSED INTERSECTION
4558             1 CLOSED LOCK
4559             1 COMBINING LEFTWARDS HARPOON
4560             1 COMBINING RIGHTWARDS HARPOON
4561             1 CONGRUENT
4562             1 COUPLE
4563             1 DIAMOND SHAPE
4564             1 END
4565             1 EQUIVALENT
4566             1 FISH CAKE
4567             1 FROWNING FACE
4568             1 GLOBE
4569             1 GRINNING CAT FACE
4570             1 HEAVY OVAL
4571             1 HELMET
4572             1 HORIZONTAL MALE
4573             1 IDENTICAL
4574             1 INFINITY NEGATED
4575             1 INTEGRAL AVERAGE
4576             1 INTERSECTION BESIDE AND JOINED
4577             1 KISSING CAT FACE
4578             1 LATIN CAPITAL LETTER REVERSED C
4579             1 LATIN CAPITAL LETTER SMALL Q
4580             1 LATIN LETTER REVERSED GLOTTAL STOP
4581             1 LATIN LETTER TWO
4582             1 LATIN SMALL CAPITAL LETTER I
4583             1 LATIN SMALL CAPITAL LETTER U
4584             1 LATIN SMALL LETTER LAMBDA
4585             1 LATIN SMALL LETTER REVERSED R
4586             1 LATIN SMALL LETTER TC DIGRAPH
4587             1 LATIN SMALL LETTER TH
4588             1 LEFT VERTICAL BAR
4589             1 LOWER RIGHT CORNER
4590             1 MEASURED RIGHT ANGLE
4591             1 MONEY
4592             1 MUSICAL SYMBOL
4593             1 NIGHT
4594             1 NOTCHED LEFT SEMICIRCLE
4595             1 ON
4596             1 OR
4597             1 PAGE
4598             1 RIGHT ANGLE VARIANT
4599             1 RIGHT DOUBLE ARROW
4600             1 RIGHT VERTICAL BAR
4601             1 RUNNING SHIRT
4602             1 SEMIDIRECT PRODUCT
4603             1 SIX POINTED STAR
4604             1 SMALL VEE
4605             1 SOON
4606             1 SQUARED UP
4607             1 SUMMATION
4608             1 SUPERSET BESIDE AND JOINED BY DASH
4609             1 TOP
4610             1 TOP ARC CLOCKWISE ARROW
4611             1 TRIPLE VERTICAL BAR
4612             1 UNION BESIDE AND JOINED
4613             1 UPPER LEFT CORNER
4614             1 VERTICAL BAR
4615             1 VERTICAL MALE
4616             1 WHITE SUN
4617             2 CLOSED MAILBOX
4618             2 CLOSED UNION
4619             2 DENTISTRY SYMBOL LIGHT VERTICAL
4620             2 DOWN-POINTING TRIANGLE
4621             2 HEART
4622             2 LEFT ARROW
4623             2 LINE INTEGRATION
4624             2 N-ARY UNION OPERATOR
4625             2 OPEN MAILBOX
4626             2 PARALLEL
4627             2 RIGHT ARROW
4628             2 SMALL CONTAINS
4629             2 SMILING CAT FACE
4630             2 TIMES
4631             2 TRIPLE HORIZONTAL BAR
4632             2 UP-POINTING TRIANGLE
4633             2 VERTICAL KANA REPEAT
4634             3 CHART
4635             3 CONTAINS
4636             3 TRIANGLE
4637             4 BANKNOTE
4638             4 DIAMOND
4639             4 PERSON
4640             5 LEFTWARDS TWO-HEADED ARROW
4641             5 RIGHTWARDS TWO-HEADED ARROW
4642             8 DOWNWARDS HARPOON
4643             8 UPWARDS HARPOON
4644             9 SMILING FACE
4645             11 CIRCLE
4646             11 FACE
4647             11 LEFTWARDS HARPOON
4648             11 RIGHTWARDS HARPOON
4649             15 SQUARE
4650            
4651             perl -wlane "next unless /^Unresolved: <(.*?)>/; $s{$1}++; END{print qq($s{$_}\t$_) for keys %s}" oxx-us2 | sort -n > oxx-us2-sorted-kw
4652            
4653             C specify fill - not combining. C is not combining, same for Cs.
4654            
4655             Only C is combining. Triangle is combining only with underbar and dot above.
4656            
4657             C means C. C - C (so do many others.)
4658             C means C; but C means C - go figure!
4659             C is not a decomposition (it is "something circled").
4660            
4661             Another way of compositing is C (but not C!) and C. See also C, C
4662             - but only C. Avoid C after these.
4663            
4664             C should replace C. C means C, same for C.
4665             C means C - actually just a bug - http://www.reddit.com/r/programming/comments/fv8ao/unicode_600_standard_published/?
4666             C means C. C means C.
4667             C means C. C means C.
4668            
4669             C means C. C looks genuinely missing...
4670            
4671             C means one of two, left or right???
4672            
4673             This better be convertible by rounding/sharpening mutators, but see
4674             C
4675            
4676             2268 LESS-THAN BUT NOT EQUAL TO; 1.1
4677             2269 GREATER-THAN BUT NOT EQUAL TO; 1.1
4678             228A SUBSET OF WITH NOT EQUAL TO; 1.1
4679             228B SUPERSET OF WITH NOT EQUAL TO; 1.1
4680             @ Relations
4681             22E4 SQUARE IMAGE OF OR NOT EQUAL TO; 1.1
4682             22E5 SQUARE ORIGINAL OF OR NOT EQUAL TO; 1.1
4683             @@ 2A00 Supplemental Mathematical Operators 2AFF
4684             @ Relational operators
4685             2A87 LESS-THAN AND SINGLE-LINE NOT EQUAL TO; 3.2
4686             x (less-than but not equal to - 2268)
4687             2A88 GREATER-THAN AND SINGLE-LINE NOT EQUAL TO; 3.2
4688             x (greater-than but not equal to - 2269)
4689             2AB1 PRECEDES ABOVE SINGLE-LINE NOT EQUAL TO; 3.2
4690             2AB2 SUCCEEDS ABOVE SINGLE-LINE NOT EQUAL TO; 3.2
4691             2AB5 PRECEDES ABOVE NOT EQUAL TO; 3.2
4692             2AB6 SUCCEEDS ABOVE NOT EQUAL TO; 3.2
4693             @ Subset and superset relations
4694             2ACB SUBSET OF ABOVE NOT EQUAL TO; 3.2
4695             2ACC SUPERSET OF ABOVE NOT EQUAL TO; 3.2
4696            
4697             Looking into v6.1 reference PDFs, 2268,2269,2ab5,2ab6,2acb,2acc have two horizontal bars,
4698             228A,228B,22e4,22e5,2a87,2a88,2ab1,2ab2 have one horizontal bar, Hence C and C
4699             are equivalent; so are C, C, C
4700             and C. (Square variants come only with one horizontal line?)
4701            
4702            
4703             Set C<$ENV{UI_KEYBOARDLAYOUT_UNRESOLVED}> to enable warnings. Then do
4704            
4705             perl -wlane "next unless /^Unresolved: <(.*?)>/; $s{$1}++; END{print qq($s{$_}\t$_) for keys %s}" oxx | sort -n > oxx-sorted-kw
4706            
4707             =head1 SEE ALSO
4708            
4709             The keyboard(s) generated with this module: L, L
4710            
4711             On diacritics:
4712            
4713             http://www.phon.ucl.ac.uk/home/wells/dia/diacritics-revised.htm#two
4714             http://en.wikipedia.org/wiki/Tonos#Unicode
4715             http://en.wikipedia.org/wiki/Early_Cyrillic_alphabet#Numerals.2C_diacritics_and_punctuation
4716             http://en.wikipedia.org/wiki/Vietnamese_alphabet#Tone_marks
4717             http://diacritics.typo.cz/
4718            
4719             http://en.wikipedia.org/wiki/User:TEB728/temp (Chars of languages)
4720             http://www.evertype.com/alphabets/index.html
4721            
4722             Accents in different Languages:
4723             http://fonty.pl/porady,12,inne_diakrytyki.htm#07
4724             http://en.wikipedia.org/wiki/Latin-derived_alphabet
4725            
4726             On typography marks
4727            
4728             http://wiki.neo-layout.org/wiki/Striche
4729             http://www.matthias-kammerer.de/SonsTypo3.htm
4730             http://en.wikipedia.org/wiki/Soft_hyphen
4731             http://en.wikipedia.org/wiki/Dash
4732             http://en.wikipedia.org/wiki/Ditto_mark
4733            
4734             On keyboard layouts:
4735            
4736             http://en.wikipedia.org/wiki/Keyboard_layout
4737             http://en.wikipedia.org/wiki/Keyboard_layout#US-International
4738             http://en.wikipedia.org/wiki/ISO/IEC_9995
4739             http://www.pentzlin.com/info2-9995-3-V3.pdf (used almost nowhere - only half of keys in Canadian multilanguage match)
4740             http://en.wikipedia.org/wiki/QWERTY#Canadian_Multilingual_Standard
4741             http://en.wikipedia.org/wiki/Unicode_input
4742             Discussion of layout changes and position of €:
4743             https://www.libreoffice.org/bugzilla/show_bug.cgi?id=5981
4744            
4745             History of QUERTY
4746             http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/publications/PreQWERTY.html
4747             http://kanji.zinbun.kyoto-u.ac.jp/db-machine/~yasuoka/QWERTY/
4748            
4749             http://msdn.microsoft.com/en-us/goglobal/bb964651
4750             http://eurkey.steffen.bruentjen.eu/layout.html
4751             http://ru.wikipedia.org/wiki/%D0%A4%D0%B0%D0%B9%D0%BB:Birman%27s_keyboard_layout.svg
4752             http://bepo.fr/wiki/Accueil
4753             http://www.unibuc.ro/e/prof/paliga_v_s/soft-reso/ (Academic for Mac)
4754             http://cgit.freedesktop.org/xkeyboard-config/tree/symbols/ru
4755             http://cgit.freedesktop.org/xkeyboard-config/tree/symbols/keypad
4756             http://www.evertype.com/celtscript/type-keys.html (Old Irish mechanical typewriters)
4757             http://eklhad.net/linux/app/halfqwerty.xkb (One-handed layout)
4758             http://www.doink.ch/an-x11-keyboard-layout-for-scholars-of-old-germanic/ (and references there)
4759             http://www.neo-layout.org/
4760             https://commons.wikimedia.org/wiki/File:Neo2_keyboard_layout.svg
4761             Images in (download of)
4762             http://www.mzuther.de/en/contents/osd-neo2
4763             Neo2 sources:
4764             http://wiki.neo-layout.org/browser/windows/kbdneo2/Quelldateien
4765             Shift keys at center, nice graphic:
4766             http://www.tinkerwithabandon.com/twa/keyboarding.html
4767             Physical keyboard:
4768             http://www.konyin.com/?page=product.Multilingual%20Keyboard%20for%20UNITED%20STATES
4769             Polytonic Greek
4770             http://www.polytoniko.org/keyb.php?newlang=en
4771             Portable keyboard layout
4772             http://www.autohotkey.com/forum/viewtopic.php?t=28447
4773             One-handed
4774             http://www.autohotkey.com/forum/topic1326.html
4775             Typing on numeric keypad
4776             http://goron.de/~johns/one-hand/#documentation
4777             On screen keyboard indicator
4778             http://www.autohotkey.com/docs/scripts/KeyboardOnScreen.htm
4779             Keyboards of ЕС-1840/1/5
4780             http://aic-crimea.narod.ru/Study/Shen/PC/1/5-4-1.htm
4781             (http://www.aic-crimea.narod.ru/Study/Shen/PC/main.htm) Руководство пользователя ПЭВМ
4782             http://fdd5-25.net/fddforum/index.php?PHPSESSID=201bd45ab972f1ab4b440dcb6c7ca18f&topic=489.30
4783             Phonetic Hebrew layout(s) (1st has many duplicates, 2nd overweighted)
4784             http://bc.tech.coop/Hebrew-ZC.html
4785             http://help.keymanweb.com/keyboards/keyboard_galaxiehebrewkm6.php
4786             Greek (Galaxy) with a convenient mapping (except for Ψ) and BibleScript
4787             http://www.tavultesoft.com/keyboarddownloads/%7B4D179548-1215-4167-8EF7-7F42B9B0C2A6%7D/manual.pdf
4788             With 2-letter input of Unicode names:
4789             http://www.jlg-utilities.com
4790             Medievist's
4791             http://www.personal.leeds.ac.uk/~ecl6tam/
4792             Yandex visual keyboards
4793             http://habrahabr.ru/company/yandex/blog/108255/
4794             Implementation in FireFox
4795             http://mxr.mozilla.org/mozilla-central/source/widget/windows/KeyboardLayout.cpp#1085
4796             Implementation in Emacs 24.3 (ToUnicode() in fns)
4797             http://fossies.org/linux/misc/emacs-24.3.tar.gz:a/emacs-24.3/src/w32inevt.c
4798             http://fossies.org/linux/misc/emacs-24.3.tar.gz:a/emacs-24.3/src/w32fns.c
4799             http://fossies.org/linux/misc/emacs-24.3.tar.gz:a/emacs-24.3/src/w32term.c
4800             Naive implementations:
4801             http://social.msdn.microsoft.com/forums/en-US/windowssdk/thread/07afec87-68c1-4a56-bf46-a38a9c2232e9/
4802             Quality of a keyboard
4803             http://www.tavultesoft.com/keymandev/quality/whitepaper1.1.pdf
4804            
4805             Manipulating keyboards on Windows and X11
4806            
4807             http://symbolcodes.tlt.psu.edu/keyboards/winkeyvista.html (using links there: up to Win7)
4808             http://windows.microsoft.com/en-us/windows-8/change-keyboard-layout
4809             http://www.howtoforge.com/changing-language-and-keyboard-layout-on-various-linux-distributions
4810            
4811             MSKLC parser
4812            
4813             http://pastebin.com/UXc1ub4V
4814            
4815             By author of MSKLC Michael S. Kaplan (do not forget to follow links)
4816            
4817             Input on Windows:
4818             http://seit.unsw.adfa.edu.au/staff/sites/hrp/personal/Sanskrit-External/Unicode-KbdsonWindows.pdf
4819            
4820             http://blogs.msdn.com/b/michkap/archive/2006/03/26/560595.aspx
4821             http://blogs.msdn.com/b/michkap/archive/2006/04/22/581107.aspx
4822             Chaining dead keys:
4823             http://blogs.msdn.com/b/michkap/archive/2011/04/16/10154700.aspx
4824             Mapping VK to VSC etc:
4825             http://blogs.msdn.com/b/michkap/archive/2006/08/29/729476.aspx
4826             [Link] Remapping CapsLock to mean Backspace in a keyboard layout
4827             (if repeat, every second Press counts ;-)
4828             http://colemak.com/forum/viewtopic.php?id=870
4829             Scancodes from kbd.h get in the way
4830             http://blogs.msdn.com/b/michkap/archive/2006/08/30/726087.aspx
4831             What happens if you start with .klc with other VK_ mappings:
4832             http://blogs.msdn.com/b/michkap/archive/2010/11/03/10085336.aspx
4833             Keyboards with Ctrl-Shift states:
4834             http://blogs.msdn.com/b/michkap/archive/2010/10/08/10073124.aspx
4835             On assigning Ctrl-values
4836             http://blogs.msdn.com/b/michkap/archive/2008/11/04/9037027.aspx
4837             On hotkeys for switching layouts:
4838             http://blogs.msdn.com/b/michkap/archive/2008/07/16/8736898.aspx
4839             Text services
4840             http://blogs.msdn.com/b/michkap/archive/2008/06/30/8669123.aspx
4841             Low-level access in MSKLC
4842             http://levicki.net/articles/tips/2006/09/29/HOWTO_Build_keyboard_layouts_for_Windows_x64.php
4843             http://blogs.msdn.com/b/michkap/archive/2011/04/09/10151666.aspx
4844             On font linking
4845             http://blogs.msdn.com/b/michkap/archive/2006/01/22/515864.aspx
4846             Unicode in console
4847             http://blogs.msdn.com/michkap/archive/2005/12/15/504092.aspx
4848             Adding formerly "invisible" keys to the keyboard
4849             http://blogs.msdn.com/b/michkap/archive/2006/09/26/771554.aspx
4850             Redefining NumKeypad keys
4851             http://blogs.msdn.com/b/michkap/archive/2007/07/04/3690200.aspx
4852             BUT!!!
4853             http://blogs.msdn.com/b/michkap/archive/2010/04/05/9988581.aspx
4854             And backspace/return/etc
4855             http://blogs.msdn.com/b/michkap/archive/2008/10/27/9018025.aspx
4856             kbdutool.exe, run with the /S ==> .c files
4857             Doing one's own WM_DEADKEY processing'
4858             http://blogs.msdn.com/b/michkap/archive/2006/09/10/748775.aspx
4859             Dead keys do not work on SG-Caps
4860             http://blogs.msdn.com/b/michkap/archive/2008/02/09/7564967.aspx
4861             Dynamic keycaps keyboard
4862             http://blogs.msdn.com/b/michkap/archive/2005/07/20/441227.aspx
4863             Backslash/yen/won confusion
4864             http://blogs.msdn.com/b/michkap/archive/2005/09/17/469941.aspx
4865             Unicode output to console
4866             http://blogs.msdn.com/b/michkap/archive/2010/10/07/10072032.aspx
4867             Install/Load/Activate an input method/layout
4868             http://blogs.msdn.com/b/michkap/archive/2007/12/01/6631463.aspx
4869             http://blogs.msdn.com/b/michkap/archive/2008/05/23/8537281.aspx
4870             Reset to a TT font from an application:
4871             http://blogs.msdn.com/b/michkap/archive/2011/09/22/10215125.aspx
4872             How to (not) treat C-A-Q
4873             http://blogs.msdn.com/b/michkap/archive/2012/04/26/10297903.aspx
4874             Treating Brazilian ABNT c1 c2 keys
4875             http://blogs.msdn.com/b/michkap/archive/2006/10/07/799605.aspx
4876             And JIS ¥|-key
4877             (compare with http://www.scs.stanford.edu/11wi-cs140/pintos/specs/kbd/scancodes-7.html
4878             http://hp.vector.co.jp/authors/VA003720/lpproj/others/kbdjpn.htm )
4879             http://blogs.msdn.com/b/michkap/archive/2006/09/26/771554.aspx
4880             Suggest a topic:
4881             http://blogs.msdn.com/b/michkap/archive/2007/07/29/4120528.aspx#7119166
4882            
4883             Installable Keyboard Layouts - Apple Developer (“.keylayout” files; modifiers not editable; cache may create problems;
4884             to enable deadkeys in X11, one may need extra work)
4885            
4886             http://developer.apple.com/technotes/tn2002/tn2056.html
4887             http://wordherd.com/keyboards/
4888             http://stackoverflow.com/questions/999681/how-to-remap-context-menu-key-in-mac-os-x
4889             http://apple.stackexchange.com/questions/21691/ukelele-generated-custom-keyboard-layouts-not-working-in-lion
4890             http://wiki.openoffice.org/wiki/X11Keymaps
4891             http://www.tenshu.net/2012/11/using-caps-lock-as-new-modifier-key-in.html
4892             http://raw.github.com/lreddie/ukelele-steps/master/USExtended.keylayout
4893             http://scripts.sil.org/cms/scripts/page.php?item_id=keylayoutmaker
4894            
4895             ANSI/ISO/ABNT/JIS/Russian Apple’s keyboards
4896            
4897             https://discussions.apple.com/thread/1508293
4898             http://www.dtp-transit.jp/apple/mac/post_1137.html
4899             http://www.dtp-transit.jp/images/apple-keyboards-US-JIS.jpg
4900             http://m10lmac.blogspot.co.il/2007/02/fixing-brazilian-keyboard-layout.html
4901             http://www2d.biglobe.ne.jp/~msyk/keyboard/layout/mac-jiskbd.html
4902             http://commons.wikimedia.org/wiki/File:KB_Russian_Apple_Macintosh.svg
4903            
4904             JIS variations (OADG109 vs A)
4905            
4906             http://ja.wikipedia.org/wiki/JIS%E3%82%AD%E3%83%BC%E3%83%9C%E3%83%BC%E3%83%89
4907            
4908             Different ways to access chars on Mac (1ˢᵗ suggests adding a Discover via plists via Keycaps≠Strings)
4909            
4910             http://apple.stackexchange.com/questions/49565/how-can-i-expand-the-number-of-special-characters-i-can-type-using-my-keyboard
4911             http://developer.apple.com/library/mac/#documentation/cocoa/conceptual/eventoverview/TextDefaultsBindings/TextDefaultsBindings.html#//apple_ref/doc/uid/20000468-CJBDEADF
4912             http://www.hcs.harvard.edu/~jrus/Site/System%20Bindings.html Default keybindings
4913             http://www.hcs.harvard.edu/~jrus/Site/Cocoa%20Text%20System.html
4914             http://hints.macworld.com/article.php?story=2005051118320432 Mystery keys on Mac
4915             http://www.snark.de/index.cgi/0007 Patching ADB drivers
4916             http://www.snark.de/mac/usbkbpatch/index_en.html Patching USB drivers (gives LCtrl vs RCtrl etc???)
4917             http://www.lorax.com/FreeStuff/TextExtras.html (has no docs???)
4918             http://stevelosh.com/blog/2012/10/a-modern-space-cadet/ Combining different approaches
4919             http://brettterpstra.com/2012/12/08/a-useful-caps-lock-key/ (simplified version of ↖)
4920             http://david.rothlis.net/keyboards/microsoft_natural_osx/ Num Lock is claimed as not working
4921            
4922             Compose on Mac requires hacks:
4923            
4924             http://apple.stackexchange.com/questions/31487/add-compose-key-to-os-x
4925            
4926             Convert Apple to MSKLC
4927            
4928             http://typophile.com/node/90606
4929            
4930             Keyboards on Mac:
4931            
4932             http://homepage.mac.com/thgewecke/mlingos9.html
4933             http://web.archive.org/web/20080717203026/http://homepage.mac.com/thgewecke/mlingos9.html
4934            
4935             Tool to produce:
4936            
4937             http://wordherd.com/keyboards/
4938             http://developer.apple.com/library/mac/#technotes/tn2056/_index.html
4939            
4940             VK_OEM_8 Kana modifier - Using instead of AltGr
4941            
4942             http://www.kbdedit.com/manual/ex13_replacing_altgr_with_kana.html
4943            
4944             Limitations of using KANA toggle
4945            
4946             http://www.kbdedit.com/manual/ex12_trilang_ser_cyr_lat_gre.html
4947            
4948             FE (Far Eastern) keyboard source code example (NEC AT is 106 with SPECIAL MULTIVK flags changed on some scancodes, OEM_7/8 producing 0x1e 0x1f, and no OEM_102):
4949            
4950             http://read.pudn.com/downloads3/sourcecode/windows/248345/win2k/private/ntos/w32/ntuser/kbd/fe_kbds/jpn/ibm02/kbdibm02.c__.htm
4951             http://read.pudn.com/downloads3/sourcecode/windows/248345/win2k/private/ntos/w32/ntuser/kbd/fe_kbds/jpn/kbdnecat/kbdnecat.c__.htm
4952             http://read.pudn.com/downloads3/sourcecode/windows/248345/win2k/private/ntos/w32/ntuser/kbd/fe_kbds/jpn/106/kbd106.c__.htm
4953            
4954             Investigation on relation between VK_ asignments, KBDEXT, KBDNUMPAD etc:
4955             http://code.google.com/p/ergo-dvorak-for-developers/source/browse/trunk/kbddvp.c
4956            
4957             PowerShell vs ISE (and how to find them [On Win7: WinKey Accessories]
4958             http://blogs.msdn.com/b/powershell/archive/2009/04/17/differences-between-the-ise-and-powershell-console.aspx
4959             http://blogs.msdn.com/b/michkap/archive/2013/01/23/10387424.aspx
4960             http://blogs.msdn.com/b/michkap/archive/2013/02/15/10393862.aspx
4961             http://blogs.msdn.com/b/michkap/archive/2013/02/19/10395086.aspx
4962             http://blogs.msdn.com/b/michkap/archive/2013/02/20/10395416.aspx
4963            
4964             Google for "Get modification number for Shift key" for code to query the kbd DLL directly ("keylogger")
4965             http://web.archive.org/web/20120106074849/http://debtnews.net/index.php/article/debtor/2008-09-08/1088.html
4966             http://code.google.com/p/keymagic/source/browse/KeyMagicDll/kbdext.cpp?name=0419d8d626&r=d85498403fd59bca9efc04b4e5bb4406d39439a0
4967            
4968             How to read Unicode in an ANSI Window:
4969             http://social.msdn.microsoft.com/Forums/en-US/windowsgeneraldevelopmentissues/thread/d455e846-d18b-4086-98de-822658bcebf0/
4970             http://blog.tavultesoft.com/2011/06/accepting-unicode-input-in-your-windows-application.html
4971            
4972             HTML consolidated entity names and discussion, MES charsets:
4973            
4974             http://www.w3.org/TR/xml-entity-names
4975             http://www.w3.org/2003/entities/2007/w3centities-f.ent
4976             http://www.cl.cam.ac.uk/~mgk25/ucs/mes-2-rationale.html
4977             http://web.archive.org/web/20000815100817/http://www.egt.ie/standards/iso10646/pdf/cwa13873.pdf
4978            
4979             Ctrl2cap
4980            
4981             http://technet.microsoft.com/en-us/sysinternals/bb897578
4982            
4983             Low level scancode mapping
4984            
4985             http://www.annoyances.org/exec/forum/winxp/r1017256194
4986             http://web.archive.org/web/20030211001441/http://www.microsoft.com/hwdev/tech/input/w2kscan-map.asp
4987             http://msdn.microsoft.com/en-us/windows/hardware/gg463447
4988             http://www.annoyances.org/exec/forum/winxp/1034644655
4989             ???
4990             http://netj.org/2004/07/windows_keymap
4991             the free remapkey.exe utility that's in Microsoft NT / 2000 resource kit.
4992            
4993             perl -wlne "BEGIN{$t = {T => q(), qw( X e0 Y e1 )}} print qq( $t->{$1}$2\t$3) if /^#define\s+([TXY])([0-9a-f]{2})\s+(?:_EQ|_NE)\((?:(?:\s*\w+\s*,){3})?\s*([^\W_]\w*)\s*(?:(?:,\s*\w+\s*){2})?\)\s*(?:\/\/.*)?$/i" kbd.h >ll2
4994             then select stuff up to the first e1 key (but DECIMAL is not there T53 is DELETE??? take from MSKLC help/using/advanced/scancodes)
4995            
4996             CapsLock as on typewriter:
4997            
4998             http://web.archive.org/web/20120717083202/http://www.annoyances.org/exec/forum/winxp/1071197341
4999            
5000             Scancodes visible on the low level:
5001            
5002             http://openbsd.7691.n7.nabble.com/Patch-Support-F13-F24-on-PC-122-terminal-keyboard-td224992.html
5003             http://www.seasip.info/Misc/1227T.html
5004            
5005             Scancodes visible on Windows (with USB)
5006            
5007             http://download.microsoft.com/download/1/6/1/161ba512-40e2-4cc9-843a-923143f3456c/translate.pdf
5008            
5009             Problems on X11:
5010            
5011             http://www.x.org/releases/X11R7.7/doc/kbproto/xkbproto.html (definition of XKB???)
5012             http://www.x.org/releases/current/doc/kbproto/xkbproto.html
5013            
5014             http://wiki.linuxquestions.org/wiki/Configuring_keyboards (current???)
5015             http://wiki.linuxquestions.org/wiki/Accented_Characters (current???)
5016             http://wiki.linuxquestions.org/wiki/Altering_or_Creating_Keyboard_Maps (current???)
5017             https://help.ubuntu.com/community/ComposeKey (documents almost 1/2 of the needed stuff)
5018             http://www.gentoo.org/doc/en/utf-8.xml (2005++ ???)
5019             http://en.gentoo-wiki.com/wiki/X.Org/Input_drivers (2009++ HAS: How to make CapsLock change layouts)
5020             http://www.freebsd.org/cgi/man.cgi?query=setxkbmap&sektion=1&manpath=X11R7.4
5021             http://people.uleth.ca/~daniel.odonnell/Blog/custom-keyboard-in-linuxx11
5022             http://shtrom.ssji.net/skb/xorg-ligatures.html (of 2008???)
5023             http://tldp.org/HOWTO/Danish-HOWTO-2.html (of 2005???)
5024             http://www.tux.org/~balsa/linux/deadkeys/index.html (of 1999???)
5025             http://www.x.org/releases/X11R7.6/doc/libX11/Compose/en_US.UTF-8.html
5026             http://cgit.freedesktop.org/xorg/proto/xproto/plain/keysymdef.h
5027            
5028             EIGHT_LEVEL FOUR_LEVEL_ALPHABETIC FOUR_LEVEL_SEMIALPHABETIC PC_SYSRQ : see
5029             http://cafbit.com/resource/mackeyboard/mackeyboard.xkb
5030            
5031             ./xkb in /etc/X11 /usr/local/X11 /usr/share/local/X11 /usr/share/X11
5032             (maybe it is more productive to try
5033             ls -d /*/*/xkb /*/*/*/xkb
5034             ?)
5035             but what dead_diaeresis means is defined here:
5036             Apparently, may be in /usr/X11R6/lib/X11/locale/en_US.UTF-8/Compose /usr/share/X11/locale/en_US.UTF-8/Compose
5037             http://wiki.maemo.org/Remapping_keyboard
5038             http://www.x.org/releases/current/doc/man/man8/mkcomposecache.8.xhtml
5039            
5040             B have XIM input method in GTK disables Control-Shift-u way of entering HEX unicode.
5041            
5042             How to contribute:
5043             http://www.freedesktop.org/wiki/Software/XKeyboardConfig/Rules
5044            
5045             B the problems with handling deadkeys via .Compose are that: .Compose is handled by
5046             applications, while keymaps by server (since they may be on different machines, things can
5047             easily get out of sync); .Compose knows nothing about the current "Keyboard group" or of
5048             the state of CapsLock etc (therefore emulating "group switch" via composing is impossible).
5049            
5050             JS code to add "insert these chars": google for editpage_specialchars_cyrilic, or
5051            
5052             http://en.wikipedia.org/wiki/User:TEB728/monobook.jsx
5053            
5054             Latin paleography
5055            
5056             http://en.wikipedia.org/wiki/Latin_alphabet
5057             http://tlt.its.psu.edu/suggestions/international/bylanguage/oenglish.html
5058             http://guindo.pntic.mec.es/~jmag0042/LATIN_PALEOGRAPHY.pdf
5059             http://www.evertype.com/standards/wynnyogh/ezhyogh.html
5060             http://www.wordorigins.org/downloads/OELetters.doc
5061             http://www.menota.uio.no/menota-entities.txt
5062             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n2957.pdf (Uncomplete???)
5063             http://skaldic.arts.usyd.edu.au/db.php?table=mufi_char&if=mufi (No prioritization...)
5064            
5065             Summary tables for Cyrillic
5066            
5067             http://ru.wikipedia.org/wiki/%D0%9A%D0%B8%D1%80%D0%B8%D0%BB%D0%BB%D0%B8%D1%86%D0%B0#.D0.A1.D0.BE.D0.B2.D1.80.D0.B5.D0.BC.D0.B5.D0.BD.D0.BD.D1.8B.D0.B5_.D0.BA.D0.B8.D1.80.D0.B8.D0.BB.D0.BB.D0.B8.D1.87.D0.B5.D1.81.D0.BA.D0.B8.D0.B5_.D0.B0.D0.BB.D1.84.D0.B0.D0.B2.D0.B8.D1.82.D1.8B_.D1.81.D0.BB.D0.B0.D0.B2.D1.8F.D0.BD.D1.81.D0.BA.D0.B8.D1.85_.D1.8F.D0.B7.D1.8B.D0.BA.D0.BE.D0.B2
5068             http://ru.wikipedia.org/wiki/%D0%9F%D0%BE%D0%B7%D0%B8%D1%86%D0%B8%D0%B8_%D0%B1%D1%83%D0%BA%D0%B2_%D0%BA%D0%B8%D1%80%D0%B8%D0%BB%D0%BB%D0%B8%D1%86%D1%8B_%D0%B2_%D0%B0%D0%BB%D1%84%D0%B0%D0%B2%D0%B8%D1%82%D0%B0%D1%85
5069             http://en.wikipedia.org/wiki/List_of_Cyrillic_letters - per language tables
5070             http://en.wikipedia.org/wiki/Cyrillic_alphabets#Summary_table
5071             http://en.wiktionary.org/wiki/Appendix:Cyrillic_script
5072            
5073             Extra chars (see also the ordering table on page 8)
5074             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3194.pdf
5075            
5076             Typesetting Old and Modern Church Slavonic
5077             http://www.sanu.ac.rs/Cirilica/Prilozi/Skup.pdf
5078             http://irmologion.ru/ucsenc/ucslay8.html
5079             http://irmologion.ru/csscript/csscript.html
5080             http://cslav.org/success.htm
5081             http://irmologion.ru/developer/fontdev.html#allocating
5082            
5083             Non-dialogue of Slavists and Unicode experts
5084             http://www.sanu.ac.rs/Cirilica/Prilozi/Standard.pdf
5085             http://kodeks.uni-bamberg.de/slavling/downloads/2008-07-26_white-paper.pdf
5086            
5087             Newer: (+ combining ф)
5088             http://tug.org/pipermail/xetex/2012-May/023007.html
5089             http://www.unicode.org/alloc/Pipeline.html As below, plus N-left-hook, ДЗЖ ДЧ, L-descender, modifier-Ь/Ъ
5090             http://www.synaxis.info/azbuka/ponomar/charset/charset_1.htm
5091             http://www.synaxis.info/azbuka/ponomar/charset/charset_2.htm
5092             http://www.synaxis.info/azbuka/ponomar/roadmap/roadmap.html
5093             http://www.ponomar.net/cu_support.html
5094             http://www.ponomar.net/files/out.pdf
5095             http://www.ponomar.net/files/variants.pdf (5 VS for Mark's chapter, 2 VS for t, 1 VS for the rest)
5096            
5097             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3772.pdf typikon (+[semi]circled), ε-form
5098             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3971.pdf inverted ε-typikon
5099             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3974.pdf two variants of o/O
5100             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3998.pdf Mark's chapter
5101             http://std.dkuug.dk/jtc1/sc2/wg2/docs/n3563.pdf Reversed tse
5102            
5103             IPA
5104            
5105             http://upload.wikimedia.org/wikipedia/commons/f/f5/IPA_chart_2005_png.svg
5106             http://en.wikipedia.org/wiki/Obsolete_and_nonstandard_symbols_in_the_International_Phonetic_Alphabet
5107             http://en.wikipedia.org/wiki/Case_variants_of_IPA_letters
5108             Table with Unicode points marked:
5109             http://www.staff.uni-marburg.de/~luedersb/IPA_CHART2005-UNICODE.pdf
5110             (except for "Lateral flap" and "Epiglottal" column/row.
5111             (Extended) IPA explained by consortium:
5112             http://unicode.org/charts/PDF/U0250.pdf
5113             IPA keyboard
5114             http://www.rejc2.co.uk/ipakeyboard/
5115            
5116             http://en.wikipedia.org/wiki/International_Phonetic_Alphabet_chart_for_English_dialects#cite_ref-r_11-0
5117            
5118            
5119             Is this discussing KBDNLS_TYPE_TOGGLE on VK_KANA???
5120            
5121             http://mychro.mydns.jp/~mychro/mt/2010/05/vk-f.html
5122            
5123             Windows: fonts substitution/fallback/replacement
5124            
5125             http://msdn.microsoft.com/en-us/goglobal/bb688134
5126            
5127             Problems on Windows:
5128            
5129             http://en.wikipedia.org/wiki/Help:Special_characters#Alt_keycodes_for_Windows_computers
5130             http://en.wikipedia.org/wiki/Template_talk:Unicode#Plane_One_fonts
5131            
5132             Console font: Lucida Console 14 is viewable, but has practically no Unicode support.
5133             Consolas (good at 16) has much better Unicode support (sometimes better sometimes worse than DejaVue)
5134             Dejavue is good at 14 (equal to a GUI font size 9 on 15in 1300px screen; 16px unifont is native at 12 here)
5135             http://cristianadam.blogspot.com/2009/11/windows-console-and-true-type-fonts.html
5136            
5137             Apparently, Windows picks up the flavor (Bold/Italic/Etc) of DejaVue at random; see
5138             http://jpsoft.com/forums/threads/strange-results-with-cp-1252.1129/
5139             - he got it in bold. I''m getting it in italic... Workaround: uninstall
5140             all flavors but one (the BOOK flavor), THEN enable it for the console... Then reinstall
5141             (preferably newer versions).
5142            
5143             Display (how WikiPedia does it):
5144            
5145             http://en.wikipedia.org/wiki/Help:Special_characters#Displaying_special_characters
5146             http://en.wikipedia.org/wiki/Template:Unicode
5147             http://en.wikipedia.org/wiki/Template:Unichar
5148             http://en.wikipedia.org/wiki/User:Ruud_Koot/Unicode_typefaces
5149             In CSS: .IPA, .Unicode { font-family: "Arial Unicode MS", "Lucida Sans Unicode"; }
5150             http://web.archive.org/web/20060913000000/http://en.wikipedia.org/wiki/Template:Unicode_fonts
5151            
5152             Inspect which font is used by Firefox:
5153            
5154             https://addons.mozilla.org/en-US/firefox/addon/fontinfo/
5155            
5156             Windows shortcuts:
5157            
5158             http://windows.microsoft.com/en-US/windows7/Keyboard-shortcuts
5159             http://www.redgage.com/blogs/pankajugale/all-keyboard-shortcuts--very-useful.html
5160             https://skydrive.live.com/?cid=2ee8d462a8f365a0&id=2EE8D462A8F365A0%21141
5161             http://windows.microsoft.com/en-us/windows-8/new-keyboard-shortcuts
5162            
5163             On meaning of Unicode math codepoints
5164            
5165             http://milde.users.sourceforge.net/LUCR/Math/unimathsymbols.pdf
5166             http://milde.users.sourceforge.net/LUCR/Math/data/unimathsymbols.txt
5167             http://www.ams.org/STIX/bnb/stix-tbl.ascii-2006-10-20
5168             http://www.ams.org/STIX/bnb/stix-tbl.layout-2006-05-15
5169             http://mirrors.ibiblio.org/CTAN/macros/latex/contrib/unicode-math/unimath-symbols.pdf
5170             http://mirrors.ibiblio.org/CTAN//biblio/biber/documentation/utf8-macro-map.html
5171             http://tex.stackexchange.com/questions/14/how-to-look-up-a-symbol-or-identify-a-math-symbol-or-character
5172             http://unicode.org/Public/math/revision-09/MathClass-9.txt
5173             http://www.w3.org/TR/MathML/
5174             http://www.w3.org/TR/xml-entity-names/
5175             http://www.w3.org/TR/xml-entity-names/bycodes.html
5176            
5177             Transliteration (via iconv [it is locale-dependent], example rules for Greek)
5178            
5179             http://sourceware.org/bugzilla/show_bug.cgi?id=12031
5180            
5181             Monospaced fonts with combining marks (!)
5182            
5183             https://bugs.freedesktop.org/show_bug.cgi?id=18614
5184             https://bugs.freedesktop.org/show_bug.cgi?id=26941
5185            
5186             Indic ISCII - any hope with it? (This is not representable...:)
5187            
5188             http://unicode.org/mail-arch/unicode-ml/y2012-m09/0053.html
5189            
5190             (Percieved) problems of Unicode (2001)
5191            
5192             http://www.ibm.com/developerworks/library/u-secret.html
5193            
5194             On a need to have input methods for unicode
5195            
5196             http://unicode.org/mail-arch/unicode-ml/y2012-m07/0226.html
5197            
5198             On info on Unicode chars
5199            
5200             http://unicode.org/mail-arch/unicode-ml/y2012-m07/0415.html
5201            
5202             Zapf dingbats encoding, and other fine points of AdobeGL:
5203            
5204             ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/ADOBE/zdingbat.txt
5205             http://web.archive.org/web/20001015040951/http://partners.adobe.com/asn/developer/typeforum/unicodegn.html
5206            
5207             Yet another (IMO, silly) way to handle '; fight: ' vs ` ´
5208            
5209             http://www.cl.cam.ac.uk/~mgk25/ucs/apostrophe.html
5210            
5211             Surrogate characters on IE
5212            
5213             HKEY_CURRENT_USER\Software\Microsoft\Internet Explorer\International\Scripts\42
5214             http://winvnkey.sourceforge.net/webhelp/surrogate_fonts.htm
5215             http://msdn.microsoft.com/en-us/library/aa918682.aspx Script IDs
5216            
5217             Quoting tchrist:
5218             I, C, and C from L if you like.>
5219            
5220             Tom's unicode scripts
5221            
5222             http://search.cpan.org/~bdfoy/Unicode-Tussle-1.03/lib/Unicode/Tussle.pm
5223            
5224             =head2 F<.XCompose>: on docs and examples
5225            
5226             Syntax of C<.XCompose> is (partially) documented in
5227            
5228             http://www.x.org/archive/current/doc/man/man5/Compose.5.xhtml
5229             http://cgit.freedesktop.org/xorg/lib/libX11/tree/man/Compose.man
5230            
5231             # Modifiers are not documented
5232             # (Shift, Alt, Lock, Ctrl with aliases Meta, Caps [Alt/Meta binds Mod1];
5233             # ! means that not mentioned supported modifiers must be off;
5234             # None means that all recognizerd modifiers are off.)
5235            
5236             Semantic (e.g., which of keybindings has a preference) is not documented.
5237             Experiments (see below) show that a longer binding wins; if same
5238             length, one which is loaded later wins (as far as they match exactly, both
5239             the keys, and the set of required modifiers and their states).
5240             Note that a given keypress may match several I lists of
5241             modifier; one defined earlier wins.
5242            
5243             For example, in
5244            
5245             ~Ctrl Shift : "a1"
5246             Shift ~Ctrl : "ab1"
5247             ~Meta Shift : "b1"
5248             ~Ctrl ~Meta Shift : "ba1"
5249             Shift ~Meta : "b2"
5250             Shift ~Meta ~Lock : "b3"
5251            
5252             there is no way to trigger the output C<"a1"> (since the next row captures
5253             essentially the same keypress into a longer binding). The only binding which
5254             is explicitly overwritten is one for C<"b1">. Hence pressing
5255             C would trigger the binding C<"b2">, and there is no way to trigger
5256             the bindings for C<"b3"> and C<"ba1">.
5257            
5258             # (the source of imLcPrs.c shows that the expansion of the
5259             # shorter sequence is stored too - but the presence of
5260             # ->succession means that the code to process the resulting
5261             # tree ignores the expansion).
5262            
5263             The interaction of C<.Compose> with
5264             L
5265             of passed-through C and C modifiers is not documented.
5266            
5267             Before the syntax was documented: For the best approximation,
5268             read the parser's code, e.g., google for
5269            
5270             inurl:compose.c XCompose
5271             site:cgit.freedesktop.org "XCompose"
5272             site:cgit.freedesktop.org "XCompose" filetype:c
5273             _XimParseStringFile
5274            
5275             http://cgit.freedesktop.org/xorg/lib/libX11/tree/modules/im/ximcp/imLcIm.c
5276             http://cgit.freedesktop.org/xorg/lib/libX11/tree/modules/im/ximcp/imLcPrs.c
5277             http://uim.googlecode.com/svn-history/r6111/trunk/gtk/compose.c
5278             http://uim.googlecode.com/svn/tags/uim-1.5.2/gtk/compose.c
5279            
5280             The actual use of the compiled compose table:
5281            
5282             http://cgit.freedesktop.org/xorg/lib/libX11/tree/modules/im/ximcp/imLcFlt.c
5283            
5284             Apparently, the first node (= defined last) in the tree which
5285             matches keysym and modifiers is chosen. So to override C<< >>,
5286             looks like (checked to work!) C<< ~Ctrl >> may be used...
5287             On the other hand, defining both C<< >> and (later) C<< ~Ctrl >>,
5288             one would expect that C<< >> should still trigger the
5289             expansion of C<< >> — but it does not... See also:
5290            
5291             http://cgit.freedesktop.org/xorg/lib/libX11/tree/modules/im/ximcp/imLcLkup.c
5292            
5293             The file F<.XCompose> is processed by X11 I on startup. The changes
5294             to this file should be seen immediately by all newly started clients
5295             (but GTK or QT applications may need extra config - see below)
5296             unless the directory F<~/.compose-cache> is present and has a cache
5297             file compatible with binary architecture (then until cache
5298             expires - one day after creation - changes are not seen). The
5299             name F<.XCompose> may be overriden by environment variable C.
5300            
5301             To get (better?) examples, google for C<"multi_key" partial alpha "DOUBLE-STRUCK">.
5302            
5303             # include these first, so they may be overriden later
5304             include "%H/my-Compose/.XCompose-kragen"
5305             include "%H/my-Compose/.XCompose-ootync"
5306             include "%H/my-Compose/.XCompose-pSub"
5307            
5308             Check success: kragen: C<\ space> --> ␣; ootync: C --> ℉; pSub: C<0 0> --> ∞ ...
5309            
5310             Older versions of X11 do not understand %L %S. - but understand %H
5311            
5312             E.g. Debian Squeeze 6.0.6; according to
5313            
5314             http://packages.debian.org/search?keywords=x11-common
5315            
5316             it has C).
5317            
5318             include "/etc/X11/locale/en_US.UTF-8/Compose"
5319             include "/usr/share/X11/locale/en_US.UTF-8/Compose"
5320            
5321             Import default rules from the system Compose file:
5322             usually as above (but supported only on newer systems):
5323            
5324             include "%L"
5325            
5326             detect the success of the lines above: get C<#> by doing C ...
5327            
5328             The next file to include have been generated by
5329            
5330             perl -wlne 'next if /#\s+CIRCLED/; print if />\s+<.*>\s+<.*>\s+<.*/' /usr/share/X11/locale/en_US.UTF-8/Compose
5331             ### Std tables contain quadruple prefix for GREEK VOWELS and CIRCLED stuff
5332             ### only. But there is a lot of triple prefix...
5333             perl -wne 'next if /#\s+CIRCLED/; $s{$1}++ or print qq( $1) if />\s+<.*>\s+<.*>\s+<.*"(.*)"/' /usr/share/X11/locale/en_US.UTF-8/Compose
5334             ## – — ☭ ª º Ǖ ǖ Ǘ ǘ Ǚ ǚ Ǜ ǜ Ǟ ǟ Ǡ ǡ Ǭ ǭ Ǻ ǻ Ǿ ǿ Ȫ ȫ Ȭ ȭ Ȱ ȱ ʰ ʱ ʲ ʳ ʴ ʵ ʶ ʷ ʸ ˠ ˡ ˢ ˣ ˤ ΐ ΰ Ḉ ḉ Ḕ ḕ Ḗ ḗ Ḝ ḝ Ḯ ḯ Ḹ ḹ Ṍ ṍ Ṏ ṏ Ṑ ṑ Ṓ ṓ Ṝ ṝ Ṥ ṥ Ṧ ṧ Ṩ ṩ Ṹ ṹ Ṻ ṻ Ấ ấ Ầ ầ Ẩ ẩ Ẫ ẫ Ậ ậ Ắ ắ Ằ ằ Ẳ ẳ Ẵ ẵ Ặ ặ Ế ế Ề ề Ể ể Ễ ễ Ệ ệ Ố ố Ồ ồ Ổ ổ Ỗ ỗ Ộ ộ Ớ ớ Ờ ờ Ở ở Ỡ ỡ Ợ ợ Ứ ứ Ừ ừ Ử ử Ữ ữ Ự ự ἂ ἃ ἄ ἅ ἆ ἇ Ἂ Ἃ Ἄ Ἅ Ἆ Ἇ ἒ ἓ ἔ ἕ Ἒ Ἓ Ἔ Ἕ ἢ ἣ ἤ ἥ ἦ ἧ Ἢ Ἣ Ἤ Ἥ Ἦ Ἧ ἲ ἳ ἴ ἵ ἶ ἷ Ἲ Ἳ Ἴ Ἵ Ἶ Ἷ ὂ ὃ ὄ ὅ Ὂ Ὃ Ὄ Ὅ ὒ ὓ ὔ ὕ ὖ ὗ Ὓ Ὕ Ὗ ὢ ὣ ὤ ὥ ὦ ὧ Ὢ Ὣ Ὤ Ὥ Ὦ Ὧ ᾀ ᾁ ᾂ ᾃ ᾄ ᾅ ᾆ ᾇ ᾈ ᾉ ᾊ ᾋ ᾌ ᾍ ᾎ ᾏ ᾐ ᾑ ᾒ ᾓ ᾔ ᾕ ᾖ ᾗ ᾘ ᾙ ᾚ ᾛ ᾜ ᾝ ᾞ ᾟ ᾠ ᾡ ᾢ ᾣ ᾤ ᾥ ᾦ ᾧ ᾨ ᾩ ᾪ ᾫ ᾬ ᾭ ᾮ ᾯ ᾲ ᾴ ᾷ ῂ ῄ ῇ ῒ ῗ ῢ ῧ ῲ ῴ ῷ ⁱ ⁿ ℠ ™ שּׁ שּׂ а̏ А̏ е̏ Е̏ и̏ И̏ о̏ О̏ у̏ У̏ р̏ Р̏ 🙌
5335            
5336             The folloing exerpt from NEO compose tables may be good if you use
5337             keyboards which do not generate dead keys, but may generate Cyrillic keys;
5338             in other situations, edit filtering/naming on the following download
5339             command and on the C line below. (For my taste, most bindings are
5340             useless since they contain keysymbols which may be generated with NEO, but
5341             not with less intimidating keylayouts.)
5342            
5343             (Filtering may be important, since having a large file may
5344             significantly slow down client's startup (without F<~/.compose-cache>???).)
5345            
5346             # perl -wle 'foreach (qw(base cyrillic greek lang math)) {my @i=@ARGV; $i[-1] .= qq($_.module?format=txt); system @i}' wget -O - http://wiki.neo-layout.org/browser/Compose/src/ | perl -wlne 'print unless /<(U[\dA-F]{4,6}>|dead_|Greek_)/' > .XCompose-neo-no-Udigits-no-dead-no-Greek
5347             include "%H/.XCompose-neo-no-Udigits-no-dead-no-Greek"
5348             # detect the success of the line above: get ♫ by doing Compose Compose (but this binding is overwritten later!)
5349            
5350             ###################################### Neo's Math contains junk at line 312
5351            
5352             Print with something like (loading in a web browser after this):
5353            
5354             perl -l examples/filter-XCompose ~/.XCompose-neo-no-Udigits-no-dead-no-Greek > ! o-neo
5355             env LC_ALL=C sort -f o-neo | column -x -c 130 > ! /tmp/oo-neo-x
5356            
5357             =head2 “Systematic” parts of rules in a few F<.XCompose>
5358            
5359             ================== .XCompose b=bepo o=ootync k=kragen p=pSub s=std
5360             b Double-Struck b
5361             o circled ops b
5362             O big circled ops b
5363             r rotated b 8ACETUv ∞
5364            
5365             - sub p
5366             = double arrows po
5367             g greek po
5368             m math p |=Double-Struck rest haphasard...
5369             O circles p Oo
5370             S stars p Ss
5371             ^ sup p added: i -
5372             | daggers p
5373            
5374             Double mathop ok +*&|%8CNPQRZ AE
5375            
5376             # thick-black arrows o
5377             -,Num- arrows o
5378             N/N fractions o
5379             hH pointing hands o
5380             O circled ops o
5381             o degree o
5382             rR roman nums o
5383             \ UP upper modifiers o
5384             \ DN lower modifiers o
5385             { set theoretic o
5386             | arrows |-->flavors o
5387             UP / roots o
5388             LFT DN 6-quotes, bold delim o
5389             RT DN 9-quotes, bold delim o
5390             UP,DN super,sub o
5391            
5392             DOUBLE-separated-by-& op k ( )
5393             in-() circled k xx for tensor
5394             in-[] boxed, dice, play-cards k
5395             BKSP after revert k
5396             < after revert k
5397             ` after small-caps k
5398             ' after hook k
5399             , after hook below k
5400             h after phonetic k
5401            
5402             # musical k
5403             %0 ROMAN k %_0 for two-digit
5404             % roman k %_ for two-digit
5405             * stars k
5406             *. var-greek k
5407             * greek k
5408             ++, 3 triple k
5409             + double k
5410             , quotes k
5411             !, / negate k
5412             6,9 6,9-quotes k
5413             N N fractions k
5414             = double-arrows, RET k
5415             CMP x2 long names k
5416             f hand, pencils k
5417             \ combining??? k
5418             ^ super, up modifier k
5419             _ low modifiers k
5420             |B, |W chess, checkers, B&W k
5421             | double-struck k
5422             ARROWS ARROWS k
5423            
5424             ! dot below s
5425             " diaeresis s
5426             ' acute s
5427             trail < left delimiter s
5428             trail > right delimiter s
5429             trail \ slopped variant s
5430             ( ... ) circled s
5431             ( greek aspirations s
5432             ) greek aspirations s
5433             + horn s
5434             , cedilla s
5435             . dot above s
5436             - hor. bar s
5437             / diag, vert hor. bar s
5438             ; ogonek s
5439             = double hor.bar s
5440             trail = double hor.bar s
5441             ? hook above s
5442             b breve s
5443             c check above s
5444             iota iota below s
5445             trail 0338 negated s
5446             o ring above s
5447             U breve s
5448             SOME HEBREW
5449             ^ circumblex s
5450             ^ _ superscript s
5451             ^ undbr superscript s
5452             _ bar s
5453             _ subscript s
5454             underbr subscript s
5455             ` grave s
5456             ~ greek dieresis s
5457             ~ tilde s
5458             overbar bar s
5459             ´ acute s ´ is not '
5460             ¸ cedilla s ¸ is cedilla
5461            
5462             =head1 LIMITATIONS
5463            
5464             Currently only output for Windows keyboard layout drivers (via MSKLC) is available.
5465            
5466             Currently only the keyboards with US-mapping of hardware keys to "the etched
5467             symbols" are supported (think of German physical keyboards where Y/Z keycaps
5468             are swapped: Z is etched between T and U, and Y is to the left of X, or French
5469             which swaps A and Q, or French or Russian physical keyboards which have more
5470             alphabetical keys than 26).
5471            
5472             While the architecture of assembling a keyboard of small easy-to-describe
5473             pieces is (IMO) elegant and very powerful, and is proven to be useful, it
5474             still looks like a collection of independent hacks. Many of these hacks
5475             look quite similar; it would be great to find a way to unify them, so
5476             reduce the repertoir of operations for assembly.
5477            
5478             The current documentation of the module’s functionality is not complete.
5479            
5480             The implementation of the module is crumbling under its weight. Its
5481             evolution was by bloating (even when some design features were simplified).
5482             Since initially I had very little clue to which level of abstraction and
5483             flexibility the keyboard description would evolve, bloating accumulated
5484             to incredible amounts.
5485            
5486             =head1 COPYRIGHT
5487            
5488             Copyright (c) 2011-2013 Ilya Zakharevich
5489            
5490             This library is free software; you can redistribute it and/or modify
5491             it under the same terms as Perl itself, either Perl version 5.8.0 or,
5492             at your option, any later version of Perl 5 you may have available.
5493            
5494             The distributed examples may have their own copyrights.
5495            
5496             =head1 TODO
5497            
5498             UniPolyK-MultiSymple
5499            
5500             Multiple linked faces (accessible as described in ChangeLog); designated
5501             Primary- and Secondary- switch keys (as Shift-Space and AltGr-Space now).
5502            
5503             C as a deadkey may be not a good idea: following it by a special key
5504             (such as C, or C) may insert the deadkey character???
5505             Hence the character should be highly visible... (Now the key is invisible,
5506             so this is irrelevant...)
5507            
5508             Currently linked layers must have exactly the same number of keys in VK-tables.
5509            
5510             VK tables for TAB, BACK were BS. Same (remains) for the rest of unusual keys... (See TAB-was.)
5511             But UTOOL cannot handle them anyway...
5512            
5513             Define an extra element in VK keys: linkable. Should be sorted first in the kbd map,
5514             and there should be the same number in linked lists. Non-linkable keys should not
5515             be linked together by deadkey access...
5516            
5517             Interaction of FromToFlipShift with SelectRX not intuitive. This works: Diacritic[](SelectRX[[0-9]](FlipShift(Latin)))
5518            
5519             DefinedTo cannot be put on Cyrillic 3a9 (yo to superscript disappears - due to duplication???).
5520            
5521             ... so we do it differently now, but: LinkLayer was not aggressively resolving all the occurences of a character on a layer
5522             before we started to combine it with Diacritic_if_undef... - and Cyrillic 3a9 is not helped...
5523            
5524             via_parent() is broken - cannot replace for Diacritic_if_undef.
5525            
5526             Currently, we map ephigraphic letters to capital letters - is it intuitive???
5527            
5528             dotted circle ◌ 25CC
5529            
5530             DeadKey_Map200A= FlipLayers
5531             #DeadKey_Map200A_0= Id(Russian-AltGr)
5532             #DeadKey_Map200A_1= Id(Russian)
5533             performs differently from the commented variant: it adds links to auto-filled keys...
5534            
5535             Why ¨ on THIN SPACE inserts OGONEK after making ¨ multifaceted???
5536            
5537             When splitting a name on OVER/BELOW/ABOVE, we need both sides as modifiers???
5538            
5539             Ỳ currently unreachable (appears only in Latin-8 Celtic, is not on Wikipedia)
5540            
5541             Somebody is putting an extra element at the end of arrays for layers??? - Probably SPACE...
5542            
5543             Need to treat upside-down as a pseudo-decomposition.
5544            
5545             We decompose reversed-smallcaps in one step - probably better add yet another two-steps variant...
5546            
5547             When creating a treat SYMBOL/SIGN/FINAL FORM/ISOLATED FORM/INITIAL FORM/MEDIAL FORM;
5548             note that SIGN may be stripped: LESS-THAN SIGN becomes LESS-THAN WITH DOT
5549            
5550             We do not do canonical-merging of diacritics; so one needs to specify VARIA in addition to GRAVE ACCENT.
5551            
5552             We use a smartish algorithm to assign multiple diacritics to the same deadkey. A REALLY smart algorithm
5553             would use information about when a particular precombined form was introduced in Unicode...
5554            
5555             Inspector tool for NamesList.txt:
5556            
5557             grep " WITH .* " ! | grep -E -v "(ACUTE|GRAVE|ABOVE|BELOW|TILDE|DIAERESIS|DOT|HOOK|LEG|MACRON|BREVE|CARON|STROKE|TAIL|TONOS|BAR|DOTS|ACCENT|HALF RING|VARIA|OXIA|PERISPOMENI|YPOGEGRAMMENI|PROSGEGRAMMENI|OVERLAY|(TIP|BARB|CORNER) ([A-Z]+WARDS|UP|DOWN|RIGHT|LEFT))$" | grep -E -v "((ISOLATED|MEDIAL|FINAL|INITIAL) FORM|SIGN|SYMBOL)$" |less
5558             grep " WITH " ! | grep -E -v "(ACUTE|GRAVE|ABOVE|BELOW|TILDE|DIAERESIS|CIRCUMFLEX|CEDILLA|OGONEK|DOT|HOOK|LEG|MACRON|BREVE|CARON|STROKE|TAIL|TONOS|BAR|CURL|BELT|HORN|DOTS|LOOP|ACCENT|RING|TICK|HALF RING|COMMA|FLOURISH|TITLO|UPTURN|DESCENDER|VRACHY|QUILL|BASE|ARC|CHECK|STRIKETHROUGH|NOTCH|CIRCLE|VARIA|OXIA|PSILI|DASIA|DIALYTIKA|PERISPOMENI|YPOGEGRAMMENI|PROSGEGRAMMENI|OVERLAY|(TIP|BARB|CORNER) ([A-Z]+WARDS|UP|DOWN|RIGHT|LEFT))$" | grep -E -v "((ISOLATED|MEDIAL|FINAL|INITIAL) FORM|SIGN|SYMBOL)$" |less
5559            
5560             AltGrMap should be made CapsLock aware (impossible: smart capslock works only on the first layer, so
5561             the dead char must be on the first layer). [May work for Shift-Space - but it has a bag of problems...]
5562            
5563             Alas, CapsLock'ing a composition cannot be made stepwise. Hence one must calculate it directly.
5564             (Oups, Windows CapsLock is not configurable on AltGr-layer. One may need to convert
5565             it to VK_KANA???)
5566            
5567             WarnConflicts[exceptions] and NoConflicts translation map parsing rules.
5568            
5569             Need a way to map to a different face, not a different layer.
5570            
5571             Vietnamese: to put second accent over ă, ơ (o/horn), put them over ae/oe; - including
5572             another ˘ which would "cancel the implied one", so will get o-horn itself. - Except
5573             for acute accent which should replaced by ¨, and hook must be replaced by ˆ. (Over ae/oe
5574             there is only macron and diaeresis over ae.)
5575            
5576             Or: for the purpose of taking a second accent, AltGr-A behaves as Ă (or Â?), AltGr-O
5577             behaves as Ô (or O-horn Ơ?). Then Å and O/ behave as the other one... And ˚ puts the
5578             dot *below*, macron puts a hook. Exception: ¨ acts as ´ on the unaltered AE.
5579            
5580             While Å takes acute accent, one can always input it via putting ˚ on Á.
5581            
5582             If Ê is on the keyboard (and macron puts a hook), then the only problem is how to enter
5583             a hook alone (double circumflex is not precombined), dot below (???), and accents on u-horn ư.
5584            
5585             Mogrification rules for double accents: AE Å OE O/ Ù mogrify into hatted/horned versions; macron
5586             mogrifies into a hook; second hat modifies a hat into a horn. The only problem: one won't be
5587             able to enter double grave on U - use the OTHER combination of ¨ and `... And how to enter
5588             dot below on non-accented aue? Put ¨ on umlaut? What about Ë?
5589            
5590             To allow . or , on VK_DECIMAL: maybe make CapsLock-dependent?
5591            
5592             http://blogs.msdn.com/b/michkap/archive/2006/09/13/752377.aspx
5593            
5594             How to write this diacritic recipe: insert hacheck on AltGr-variant, but only if
5595             the breve on the base layer variant does not insert hacheck (so inserts breve)???
5596            
5597             Sorting diacritics by usefulness: we want to apply one of accents from the
5598             given list to a given key (with l layers of 2 shift states). For each accent,
5599             we have 2l possible variants for composition; assign to 2 variants differing
5600             by Shift the minimum penalty of the two. For each layer we get several possible
5601             combinations of different priority; and for each layer, we have a certain number
5602             of slots open. We can redistribute combinations from the primary layer to
5603             secondary one, but not between secondary layers.
5604            
5605             Work with slots one-by-one (so that the assignent is "monotinic" when the number
5606             of slots increases). Let m be the number of layers where slots are present.
5607             Take highest priority combinations; if the number of "extra" combinations
5608             in the primary layer is at least m, distribute the first m of them to
5609             secondary layers. If n
5610             have no their own combinations first, then other n-k layers. More precisely,
5611             if n<=k, use the first n of "free" layers; if n>k, fill all free layers, then
5612             the last n-k of non-free layers.
5613            
5614             Repeat as needed (on each step, at most one slot in each layer appears).
5615            
5616             But we do not need to separate case-differing keys! How to fix?
5617            
5618             All done, but this works only on the current face! To fix, need to pass
5619             to the translator all the face-characters present on the given key simultaneously.
5620            
5621             ===== Accent-key TAB accesses extra bindinges (including NUM->numbered one)
5622             (may be problematic with some applications???
5623             -- so duplicate it on + and @ if they is not occupied
5624             -- there is nothing related to AT in Unicode)
5625            
5626             Diacritics_0218_0b56_0c34= May create such a thing...
5627             (0b56_0c34 invisible to the user).
5628            
5629             Hmm - how to combine penaltized keys with reversion? It looks like
5630             the higher priority bindings would occupy the hottest slots in both
5631             direct and reverse bindings...
5632            
5633             Maybe additional forms Diacrtitics2S_* and Diacrtitics2E_* which fight
5634             for symbols of the same penalty from start and from end (with S winning
5635             on stuff exactly in the middle...). (The E-form would also strip the last |-group.)
5636            
5637             ' Shift-Space (from US face) should access the second level of Russian face.
5638             To avoid infinite cycles, face-switch keys to non-private faces should be
5639             marked in each face...
5640            
5641             "Acute makes sharper" is applicable to () too to get <>-parens...
5642            
5643             Another ways of combining: "OR EQUAL TO", "OR EQUIVALENT TO", "APL FUNCTIONAL
5644             SYMBOL QUAD", "APL FUNCTIONAL SYMBOL *** UNDERBAR", "APL FUNCTIONAL SYMBOL *** DIAERESIS".
5645            
5646             When recognizing symbols for GREEK, treat LUNATE (as NOP). Try adding HEBREW LETTER at start as well...
5647            
5648             Compare with: 8 basic accents: http://en.wikipedia.org/wiki/African_reference_alphabet (English 78)
5649            
5650             When a diacritic on a base letter expands to several variants, use them all
5651             (with penalty according to the flags).
5652            
5653             Problem: acute on acute makes double acute modifier...
5654            
5655             Penalized letter are temporarily completely ignored; need to attach them in the end...
5656             - but not 02dd which should be completely ignore...
5657            
5658             Report characters available on diacritic chains, but not accessible via such chains.
5659             Likewise for characters not accessible at all. Mark certain chains as "Hacks" so that
5660             they are not counted in these lists.
5661            
5662             Long s and "preceded by" are not handled since the table has its own (useless) compatibility decompositions.
5663            
5664             ╒╤╕
5665             ╞╪╡
5666             ╘╧╛
5667             ╓╥╖
5668             ╟╫╢
5669             ╙╨╜
5670             ╔╦╗
5671             ╠╬╣
5672             ╚╩╝
5673             ┌┬┐
5674             ├┼┤
5675             └┴┘
5676             ┎┰┒
5677             ┠╂┨
5678             ┖┸┚
5679             ┍┯┑
5680             ┝┿┥
5681             ┕┷┙
5682             ┏┳┓
5683             ┣╋┫
5684             ┗┻┛
5685             On top of a light-lines grid (3×2, 2×3, 2×2; H, V, V+H):
5686             ┲┱
5687             ╊╉
5688             ┺┹
5689             ┢╈┪
5690             ┡╇┩
5691             ╆╅
5692             ╄╇
5693             ╼†━†╾†╺†╸†╶†─†╴†╌†┄†┈† †╍†┅†┉†
5694             ╼━╾╺╸╶─╴╌┄┈ ╍┅┉
5695            
5696            
5697            
5698            
5699            
5700            
5701            
5702            
5703            
5704             ╎┆┊╏┇┋
5705            
5706             ╲ ╱
5707            
5708             ╭╮
5709             ╰╯
5710             ◤▲◥
5711             ◀■▶
5712             ◣▼◢
5713             ◜△◝
5714             ◁□▷
5715             ◟▽◞
5716             ◕◓◔
5717             ◐○◑
5718            
5719             ▗▄▖
5720             ▐█▌
5721             ▝▀▘
5722             ▛▀▜
5723             ▌ ▐
5724             ▙▄▟
5725            
5726             ░▒▓
5727            
5728            
5729             =head2 Implementation details
5730            
5731             Since the C accessor may have different effects at different moment of
5732             a face C synthesis, here is the order in which C changes:
5733            
5734             ini_layers: essentially, contains what is given in the key “layers” of the face recipe
5735             Later, a version of these layers with exportable keys marked is created as ini_layers_prefix.
5736             ini_filled_layers: adds extra (fake) keys containing control characters and created via-VK-keys
5737             (For these extended layers, the previous version can be inspected via ini_copy1.)
5738             (created when exportable keys are handled.)
5739            
5740             The next modification is done not by modifying the list of names of layers
5741             associated to the face, but by editing the corresponding layers in place.
5742             (The unmodified version of layer, one containing the exportable keys, is
5743             accessible via C.) On this step one adds the missing characters via
5744             from the face specified in the C key.
5745            
5746             =cut
5747            
5748             # '
5749             my (%Globals, $DEBUG);
5750            
5751             sub set__value ($$$) {
5752 0     0 0   my($class, $key) = (shift, shift);
5753 0 0         (ref $class ? $class->{$key} : $Globals{$key}) = shift;
5754             }
5755             sub get__value ($$) {
5756 0     0 0   my($class, $key) = (shift, shift);
5757 0 0 0       if (ref $class and defined(my $v = $class->{$key})) {
5758 0           $v;
5759             } else {
5760 0           $Globals{$key};
5761             }
5762             }
5763             sub set_NamesList ($$;$) {
5764 0     0 0   my $class = shift;
5765 0           set__value($class, 'NamesList', shift);
5766 0           set__value($class, 'AgeList', shift);
5767             }
5768 0     0 0   sub get_NamesList ($) { get__value(shift, 'NamesList') }
5769 0     0 0   sub get_AgeList ($) { get__value(shift, 'AgeList') }
5770            
5771             sub new ($;$) {
5772 0     0 0   my $class = shift;
5773 0 0         die "too many arguments to UI::KeyboardLayout->new" if @_ > 1;
5774 0 0         my $data = @_ ? {%{shift()}} : {};
  0            
5775 0   0       bless $data, (ref $class or $class);
5776             }
5777            
5778             sub put_deep($$$$@) {
5779 0     0 0   my($self, $hash, $v, $k) = (shift, shift, shift, shift);
5780 0 0 0       return $self->put_deep($hash->{$k} ||= {}, $v, @_) if @_;
5781 0           $hash->{$k} = $v;
5782             }
5783            
5784             # Sections [foo/bar] [visual -> foo/bar]; directives foo=bar or @foo=bar,baz
5785             sub parse_configfile ($$) { # Trailing whitespace is ignored, whitespace about "=" is not
5786 0     0 0   my ($self, $s, %v, @KEYS) = (shift, shift);
5787 0           $s =~ s/[^\S\n]+$//gm;
5788 0           $s =~ s/^\x{FEFF}//; # BOM are not stripped by Perl from UTF-8 files with -C31
5789 0           (my $pre, my %f) = split m(^\[((?:visual\s*->\s*)?[\w/]*)\]\s*$ \n?)mx, $s; # //x is needed to avoid $\
5790 0 0         warn "Part before the first section in configfile ignored: `$pre'" if length $pre;
5791 0           for my $k (keys %f) {
5792             # warn "Section `$k'";
5793 0           my($v, $V, @V) = $f{$k};
5794 0 0         if ($k =~ s{^visual\s*->\s*}{[unparsed]/}) { # Make sure that prefixes do not allow visual line to be confused with a config
5795 0           $v =~ s[(^(?!#|[/\@+]?\w+=).*)]//ms; # find non-comment non-assignment
5796 0           @V = "unparsed_data=$1";
5797             }
5798             # warn "xxx: @V";
5799 0           push @KEYS, $k;
5800 0           my @k = split m(/), $k;
5801 0 0         @k = () if "@k" eq ''; # root
5802 0           for my $l ((grep !/^#/, split(/\n/, $v)), @V) {
5803 0 0         die "unrecognized config file line: `$l' in `$s'"
5804             unless my($arr, $at, $slash, $kk, $vv) = ($l =~ m[^((?:(\@)|(/)|\+)?)(\w+)=(.*)]s);
5805 0 0         my $spl = $at ? qr/,/ : ( $slash ? qr[/] : qr[(?!)] );
    0          
5806 0 0         $vv = [ length $vv ? (split $spl, $vv, -1) : $vv ] if $arr; # create empty element if $vv is empty
    0          
5807 0           my $slot = $self->get_deep(\%v, @k);
5808 0 0 0       if ($slot and exists $slot->{$kk}) {
5809 0 0         if ($arr) {
5810 0 0 0       if (ref($slot->{$kk} || 0) eq 'ARRAY') {
5811 0           $vv = [@{$slot->{$kk}}, @$vv];
  0            
5812             } else {
5813 0           warn "Redefinition of non-array entry `$kk' in `$k' by array one, old value ignored"
5814             }
5815             } else {
5816 0           warn "Redefinition of entry `$kk' in `$k', old value ignored"
5817             }
5818             }
5819             # warn "Putting to the root->@k->`$kk'";
5820 0           $self->put_deep(\%v, $vv, @k, $kk);
5821             }
5822             }
5823 0           $v{'[keys]'} = \@KEYS;
5824             # warn "config parsed";
5825 0           \%v
5826             }
5827            
5828             sub process_key_chunk ($$$$$) {
5829 0     0 0   my $self = shift;
5830 0           my $name = shift;
5831 0           my $skip_first = shift;
5832 0           (my $k = shift) =~ s/\p{Blank}(?=\p{NonspacingMark})//g; # Allow combining marks to be on top of SPACE
5833 0           my $sep2 = shift;
5834 0           $k = $self->stringHEX2string($k);
5835 0           my @k = split //, $k;
5836 0 0 0       if (defined $sep2 and 3 <= @k and $k =~ /$sep2/) { # Allow separation by $sep2, but only if too long
      0        
5837 0           @k = split /$sep2/, $k;
5838 0 0 0       shift @k if not length $k[0] and @k == 2;
5839 0 0 0       warn "Zero length expansion in the key slot <$k>\n" if not @k or grep !length, @k;
5840             }
5841 0 0 0       undef $k[0] if ($k[0] || '') eq "\0" and $skip_first;
      0        
5842 0 0 0       push @k, ucfirst $k[0] if @k == 1 and defined $k[0] and 1==length $k[0] and $k[0] ne ucfirst $k[0];
      0        
      0        
5843 0 0         $name = "VisLr=$name" if $name;
5844             # warn "Multi-char key in <<@k>>" if grep $_ && 1
5845 0 0         warn "More that 2 Shift-states in <<@k>>" if @k > 2;
5846             #warn "Sep2 in $name, $skip_first, <$k> ==> <@k>\n" if defined $sep2 and $k =~ /$sep2/;
5847 0 0         map {defined() ? [$_, undef, undef, $name] : $_} @k;
  0            
5848             # @k
5849             } # -> list of chars
5850            
5851             sub process_key ($$$$$$;$) { # $sep may appear only in a beginning of the first key chunk
5852 0     0 0   my ($self, $k, $limit, $sep, $ln, $l_off, $sep2, @tr) = (shift, shift, shift, shift, shift, shift, shift);
5853 0           my @k = split m((?!^)\Q$sep), $k;
5854 0 0         die "Key descriptor `$k' separated by `$sep' has too many parts: expected $limit, got ", scalar @k
5855             if @k > $limit;
5856 0   0       defined $k[$_] and $k[$_] =~ s/^--(?=.)/\0/ and $tr[$_]++ for 0..$#k;
      0        
5857 0 0         $k[0] = '' if $k[0] eq '--'; # Allow a filler (multi)-chunk
5858 0 0         map [$self->process_key_chunk( $ln->[$l_off+$_], $tr[$_], (defined($k[$_]) ? $k[$_] : ''), $sep2)], 0..$#k;
5859             } # -> list of arrays of chars
5860            
5861             sub decode_kbd_layers ($@) {
5862 0     0 0   my ($self, $lineN, $row, $line_in_row, $cur_layer, @out, $N, $l0) = (shift, 0, -1);
5863 0           my %needed = qw(unparsed_data x visual_rowcount 2 visual_per_row_counts [2;2] visual_prefixes * prefix_repeat 3 in_key_separator / layer_names ???);
5864 0           my %extra = (qw(keyline_offsets 1 in_key_separator2), undef);
5865 0           my $opt;
5866 0           for my $k (keys %needed, keys %extra) {
5867 0 0         my ($from) = grep exists $_->{$k}, @_, (ref $self ? $self : ());
5868 0 0 0       die "option `$k' not specified" unless $from or exists $extra{$k};
5869 0           $opt->{$k} = $from->{$k};
5870             }
5871             die "option `visual_rowcount' differs from length of `visual_per_row_counts': $opt->{visual_rowcount} vs. ",
5872 0 0         scalar @{$opt->{visual_per_row_counts}} unless $opt->{visual_rowcount} == @{$opt->{visual_per_row_counts}};
  0            
  0            
5873 0           my @lines = grep !/^#/, split /\s*\n/, $opt->{unparsed_data};
5874 0           my ($C, $lc, $pref) = map $opt->{$_}, qw(visual_rowcount visual_per_row_counts visual_prefixes);
5875 0 0         die "Number of uncommented rows (" . scalar @lines . ") in a visual template not divisible by the rowcount $C: `$opt->{unparsed_data}'"
5876             if @lines % $C;
5877 0 0         $pref = [map {$_ eq ' ' ? qr/\s/ : qr/\Q$_/ } split(//, $pref), (' ') x $C];
  0            
5878             # my $line_in_row = [];
5879 0           my @counts;
5880             my $sep2;
5881 0 0         $sep2 = qr/$opt->{in_key_separator2}/ if defined $opt->{in_key_separator2};
5882 0           while (@lines) {
5883             # push @out, $line_in_row = [] unless $C % $c;
5884 0 0         $row++, $line_in_row = $cur_layer = 0 unless $lineN % $C;
5885 0           $lineN++;
5886 0           my $l1 = shift @lines;
5887 0           my $PREF = qr/(?:$pref->[$line_in_row]){$opt->{prefix_repeat}}/;
5888 0 0         $PREF = '\s' if $pref->[$line_in_row] eq qr/\s/;
5889 0 0         $l1 =~ s/\s*\x{202c}$// if $l1 =~ s/^[\x{202d}\x{202e}]//; # remove PDF if removed LRO, RLO
5890 0 0         die "line $lineN in visual layers has unexpected prefix:\n\tPREF=/$PREF/\n\tLINE=`$l1'" unless $l1 =~ s/^$PREF\s*(?<=\s)//;
5891 0           my @k1 = split /\s+(?!\p{NonspacingMark})/, $l1;
5892 0 0         $l0 = $l1, $N = @k1 if $line_in_row == 0;
5893             # warn "Got keys: ", scalar @k1;
5894 0 0         die sprintf "number of keys in lines differ: %s vs %s in:\n\t`%s'\n\t`%s'\n\t<%s>",
5895             scalar @k1, $N, $l0, $l1, join(">\t<", @k1) unless @k1 == $N; # One can always fill by --
5896 0           for my $key (@k1) {
5897 0           my @kk = $self->process_key($key, $lc->[$line_in_row], $opt->{in_key_separator}, $opt->{layer_names}, $cur_layer, $sep2);
5898 0           push @{$out[$cur_layer + $_]}, $kk[$_] || [] # (defined $kk[$_] ? [$kk[$_],undef,undef,$opt->{layer_names}[$cur_layer + $_]] : [])
5899 0   0       for 0..($lc->[$line_in_row]-1);
5900             }
5901 0           $cur_layer += $lc->[$line_in_row++];
5902 0 0         push @counts, scalar @k1 if 1 == $lineN % $C;
5903             }
5904             # warn "layer[0] = ", join ', ', map "@$_", @{$out[0]};
5905 0           die "Got ", scalar @out, " layers, but ", scalar @{$opt->{layer_names}}, " layer names"
5906 0 0         unless @out == @{$opt->{layer_names}};
  0            
5907 0           my(%seen, %out);
5908 0   0       $seen{$_}++ and die "Duplicate layer name `$_'" for @{$opt->{layer_names}};
  0            
5909 0           @out{ @{$opt->{layer_names}} } = @out;
  0            
5910 0           \%out, \@counts, $opt->{keyline_offsets};
5911             }
5912            
5913             sub decode_rect_layers ($@) {
5914 0     0 0   my ($self, $cnt, %extra, $opt, @out) = (shift, 0, qw(empty N/A));
5915 0           my %needed = qw(unparsed_data x rect_rows_cols [4;4] rect_horizontal_counts [2;2] layer_names ??? COLgap 0 ROWgap 0);
5916 0           for my $k (keys %needed, keys %extra) {
5917 0 0         my ($from) = grep exists $_->{$k}, @_, (ref $self ? $self : ());
5918 0 0 0       die "option `$k' not specified" unless $from or exists $extra{$k};
5919 0           $opt->{$k} = $from->{$k};
5920             }
5921 0           $cnt += $_ for @{ $opt->{rect_horizontal_counts} };
  0            
5922             die "total of option `rect_horizontal_counts' differs from count of `layer_names': $cnt vs. ",
5923 0 0         scalar @{$opt->{layer_names}} unless $cnt == @{$opt->{layer_names}};
  0            
  0            
5924 0           $cnt = @{ $opt->{rect_horizontal_counts} };
  0            
5925 0           (my $D = $opt->{unparsed_data}) =~ s/^(#.*\n)+//;
5926 0           $D =~ s/^(#.*(\n|\z))+\z//m;
5927 0           my @lines = split /\s*\n/, $D;
5928 0           my ($C, $lc, $pref, $c0, $r0) = map $opt->{$_}, qw(visual_rowcount visual_per_row_counts visual_prefixes COLgap ROWgap);
5929             die "Number of uncommented rows (" . scalar @lines . ") in a visual rect template not matching rows(rect_rows_cols) x cnt(rect_horizontal_counts) = $opt->{rect_rows_cols}[0] x $cnt: `$opt->{unparsed_data}'"
5930 0 0         if @lines != $cnt * $opt->{rect_rows_cols}[0] + ($cnt-1)*$r0;
5931 0           my $c = 0;
5932 0           while (@lines) {
5933 0           die "Too many rect vertically: expect only ", scalar @{ $opt->{rect_horizontal_counts} }, " in `" . join("\n",'',@lines,'') . "'"
5934 0 0         if $c >= @{ $opt->{rect_horizontal_counts} };
  0            
5935 0           my @L = splice @lines, 0, $opt->{rect_rows_cols}[0];
5936 0           my ($cR, $L) = 0;
5937 0           while (++$cR <= $r0) { # Inter-row gap
5938 0 0         last unless @lines;
5939 0 0         ($L = shift @lines) =~ /^#/ or die "Line expected to be inter-row comment line No. $cR: <<<$L>>>"
5940             }
5941 0           my $l = length $L[0];
5942 0   0       $l == length or die "Lengths of lines encoding rect do not match: expect $l, got `" . join("\n",'',@L,'') . "'" for @L[1..$#L];
5943             $l == $opt->{rect_rows_cols}[1] * $opt->{rect_horizontal_counts}[$c] + ($opt->{rect_horizontal_counts}[$c] - 1)*$c0
5944             or die "Wrong line length in rect: expect $opt->{rect_rows_cols}[1] * $opt->{rect_horizontal_counts}[$c] gaps=$c0, got $l in `"
5945 0   0       . join("\n",'',@L,'') . "'" for @L[1..$#L];
5946 0           while (length $L[0]) {
5947 0           my @c;
5948 0           push @c, split //, substr $_, 0, $opt->{rect_rows_cols}[1], '' for @L;
5949 0   0       $_ eq $opt->{empty} and $_ = undef for @c;
5950 0           push @out, [map [$_], @c];
5951 0 0 0       next unless $c0 and length $L[0]; # Inter-col gap
5952 0           for my $i (0..$#L) {
5953 0 0         next unless (my $gap = substr $L[$i], 0, $c0, '') =~ /\S/;
5954 0           die "Inter-column gap not whitespace: line No. $i (0-based), gap No. $#out: <<<$gap>>>"
5955             }
5956             }
5957 0           $c++;
5958             }
5959 0           die "Too few vertical rect: got $c, expect ", scalar @{ $opt->{rect_horizontal_counts} }, " in `" . join("\n",'',@lines,'') . "'"
5960 0 0         if $c != @{ $opt->{rect_horizontal_counts} };
  0            
5961 0           my(%seen, %out);
5962 0   0       $seen{$_}++ and die "Duplicate layer name `$_'" for @{$opt->{layer_names}};
  0            
5963 0           @out{ @{$opt->{layer_names}} } = @out;
  0            
5964 0           for my $i ( 0 .. ($#{ $opt->{layer_names} } - 1) ) {
  0            
5965 0           my($base,$shift) = ($out[$i], $out[$i+1]);
5966 0   0       $out{$opt->{layer_names}[$i] . '²'} ||= [ map [$base->[$_][0], $shift->[$_][0]], 0..$#$base ];
5967             }
5968 0           \%out, [($opt->{rect_rows_cols}[1]) x $opt->{rect_rows_cols}[0]];
5969             }
5970            
5971             sub get_deep ($$@) {
5972 0     0 0   my($self, $h) = (shift, shift);
5973 0 0         return $h unless @_;
5974 0           my $k = shift @_;
5975 0 0         return unless exists $h->{$k};
5976 0           $self->get_deep($h->{$k}, @_);
5977             }
5978            
5979             sub get_deep_via_parents ($$$@) { # quadratic algorithm
5980 0     0 0   my($self, $h, $idx, $IDX) = (shift, shift, shift);
5981             #warn "Deep: `@_'";
5982 0 0         ((defined $h) ? return $h : return) unless @_;
    0          
5983 0           my $k = pop @_;
5984             {
5985             #warn "Deep::: `@_'";
5986 0           my $H = $self->get_deep($h, @_);
  0            
5987             (@_ or return), $IDX++, # Start extraction from array
5988 0 0 0       pop, redo unless exists $H->{$k};
5989 0           my $v = $H->{$k};
5990             #warn "Deep -> `$v'";
5991 0 0 0       return $v unless ref($v || 1) and $IDX and defined $idx;
      0        
      0        
5992 0           return $v->[$idx];
5993             }
5994 0           return;
5995             }
5996            
5997             sub fill_kbd_layers ($$) { # We do not do deep processing here...
5998 0     0 0   my($self, $h, %o, %c, %O) = (shift, shift);
5999 0           my @K = grep m(^\[unparsed]/(KBD|RECT)\b), @{$h->{'[keys]'}};
  0            
6000             # my $H = $h->{'[unparsed]'};
6001 0           for my $k (@K) {
6002 0           my (@parts, @h) = split m(/), $k;
6003 0   0       ref $self and push @h, $self->get_deep($self, @parts[1..$_]) || {} for 0..$#parts;
      0        
6004 0   0       push @h, $self->get_deep($h, @parts[1..$_]) || {} for 0..$#parts; # Drop [unparsed]/ prefix...
6005 0   0       push @h, $self->get_deep($h, @parts[0..$_]) || {} for -1..$#parts;
6006 0 0         my ($in, $counts, $offsets) = ($k =~ m(^\[unparsed]/KBD\b) ? $self->decode_kbd_layers( reverse @h )
6007             : $self->decode_rect_layers( reverse @h ) );
6008 0   0       exists $o{$_} and die "Visual spec `$k' overwrites exiting layer `$k'" for keys %$in;
6009 0           my $cnt = (@o{keys %$in} = values %$in);
6010 0           @c{keys %$in} = ($counts) x $cnt;
6011 0 0         @O{keys %$in} = ($offsets) x $cnt if $offsets;
6012             }
6013 0           \%o, \%c, \%O
6014             }
6015            
6016             sub key2hex ($$;$) {
6017 0     0 0   my ($self, $k, $ignore) = (shift, shift, shift);
6018 0 0 0       return -1 if $ignore and not defined $k;
6019 0           return sprintf '%04x', ord $k; # if ord $k <= 0xFFFF;
6020             # sprintf '%06x', ord $k;
6021             }
6022            
6023             sub keyORarray2hex ($$;$) {
6024 0     0 0   my ($self, $k, $ignore) = (shift, shift, shift);
6025 0 0 0       return -1 if $ignore and not defined $k;
6026 0 0 0       $k = $k->[0] if $k and ref $k;
6027 0           $self->key2hex($k, $ignore);
6028             }
6029            
6030             sub keys2hex ($$;$) {
6031 0     0 0   my ($self, $k, $ignore) = (shift, shift, shift);
6032 0 0 0       return -1 if $ignore and not defined $k;
6033 0           return join '.', map {sprintf '%04x', ord} split //, $k; # if ord $k <= 0xFFFF;
  0            
6034             # sprintf '%06x', ord $k;
6035             }
6036            
6037             sub coverage_hex_sub($$$) { # Unfinished!!! XXXX UNUSED
6038 0     0 0   my ($self, $layer, $to) = (shift, shift, shift);
6039             ++$to->{ $self->key2hex($_->[0], 'undef_ok') }, ++$to->{ $self->key2hex($_->[1], 'undef_ok') }
6040 0           for @{$self->{layers}{$layer}};
  0            
6041             }
6042            
6043             # my %MANUAL_MAP = qw( 0020 0020 00a0 00a0 2007 2007 ); # We insert entry for SPACE manually
6044             # my %MANUAL_MAP_ch = map chr hex, %MANUAL_MAP;
6045            
6046             sub coverage_hex($$) {
6047 0     0 0   my ($self, $face) = (shift, shift);
6048 0           my $layers = $self->{faces}{$face}{layers};
6049 0   0       my $to = ($self->{faces}{$face}{'[coverage_hex]'} ||= {}); # or die "Panic!"; # Synthetic faces may not have this...
6050 0           my @Layers = map $self->{layers}{$_}, @$layers;
6051 0           for my $sub (@Layers) {
6052 0           ++$to->{ $self->keyORarray2hex($_, 'undef_ok') } for map +(@$_[0,1]), @$sub;
6053             }
6054             }
6055            
6056             sub deep_copy($$) {
6057 0     0 0   my ($self, $o) = (shift, shift);
6058 0 0         return $o unless ref $o;
6059 0 0         return [map $self->deep_copy($_), @$o] if "$o" =~ /^ARRAY\(/; # We should not have overloaded elements
6060 0 0         return {map $self->deep_copy($_), %$o} if "$o" =~ /^HASH\(/;
6061             }
6062             sub DEEP_COPY($@) {
6063 0     0 0   my ($self) = (shift);
6064 0           map $self->deep_copy($_), @_;
6065             }
6066            
6067             sub deep_undef_by_hash($$@) {
6068 0     0 0   my ($self, $h) = (shift, shift);
6069 0           for (@_) {
6070 0 0         next unless defined;
6071 0 0         if (ref $_) {
    0          
6072 0 0         die "a reference not an ARRAY in deep_undef_by_hash()" unless 'ARRAY' eq ref $_;
6073 0           $self->deep_undef_by_hash($h, @$_);
6074             } elsif ($h->{$_}) {
6075 0           undef $_
6076             }
6077             }
6078             }
6079            
6080             # Make symbols from the first half-face ($h1) to be accessible in the second face ($H1/$H2)
6081             sub pre_link_layers ($$$;$$) { # Un-obscure non-alphanum bindings from the first face; assign in the direction $hh ---> $HH
6082 0     0 0   my ($self, $hh, $HH, $skipfix, $skipwarn) = (shift, shift, shift, shift, shift); # [Main, AltGr-Main,...], [Secondary, AltGr-Secondary,...]
6083 0           my ($hn,$Hn, %seen_deobsc) = map $self->{faces}{$_}{layers}, $hh, $HH;
6084             #warn "Link $hh --> $HH;\t(@$hn) -> (@$Hn)" if "$hh $HH" =~ /00a9/i;
6085 0 0         die "Can't link sets of layers `$hh' `$HH' of different sizes: ", scalar @$hn, " != ", scalar @$Hn if @$hn != @$Hn;
6086            
6087 0           my $already_linked = $self->{faces}{$hh}{'[linked]'}{$HH}++;
6088 0           $self->{faces}{$HH}{'[linked]'}{$hh}++;
6089 0           for my $L (@$Hn) {
6090 0 0         next if $skipfix;
6091             die "Layer `$L' of face `$HH' is being relinked via `$HH' -> `$hh'???"
6092 0 0         if $self->{layers}{'[ini_copy]'}{$L};
6093             #warn "ini_copy: `$L'";
6094 0           $self->{layers}{'[ini_copy]'}{$L} = $self->deep_copy($self->{layers}{$L});
6095             }
6096 0           for my $K (0..$#{$self->{layers}{$hn->[0]}}) { # key number
  0            
6097             #warn "One key data, FROM: K=$K, layer=<", join( '> <', map $self->{layers}{$_}[$K], @$Hn), '>' if "$hh $HH" =~ /00a9/i;
6098 0           my @h = map $self->{layers}{$_}[$K], @$hn; # arrays of [lowercase,uppercase]
6099             #warn "One key data, TO: K=$K, layer=<", join( '> <', map $self->{layers}{$_}[$K], @$Hn), '>' if "$hh $HH" =~ /00a9/i;
6100 0           my @H = map $self->{layers}{$_}[$K], @$Hn;
6101 0 0 0       my @p = map [map {$_ and ref and $_->[2]} @$_], @h; # Prefix
  0            
6102 0 0 0       my @c = map [map {($_ and ref) ? $_->[0] : $_} @$_], @h; # deep copy, remove extra info
  0            
6103 0 0 0       my @C = map [map {($_ and ref) ? $_->[0] : $_} @$_], @H;
  0            
6104             # Find which of keys on $H[0] obscure symbol keys from $h[0]
6105 0 0 0       my @symb0 = grep {$p[0][$_] or ($c[0][$_] || '') =~ /[\W_]/} 0, 1; # not(wordchar but not _): prefix/symbols on $h[0]
  0            
6106             defined $H[0][$_] or not defined $C[0][$_] or $skipwarn
6107             or warn "Symbol char `$c[0][$_]' not copied to the second face while the slot is empty"
6108 0   0       for @symb0;
      0        
      0        
6109 0 0         my @obsc = grep { defined $C[0][$_] and $c[0][$_] ne $C[0][$_]} @symb0; # undefined positions will be copied later
  0            
6110             #warn "K=$K,\tobs=@obsc;\tsymb0=@symb0";
6111             # If @obsc == 1, put on non-shifted location; may overwrite only ?-binding if it exists
6112             #return unless @obsc;
6113 0           my %map;
6114 0           my @free_first = ((grep {not defined $C[1][$_]} 0, 1), grep defined $C[1][$_], 0, 1);
  0            
6115 0 0 0       @free_first = (1,0) if 1 == ($obsc[0] || 0) and $free_first[0] = 0 and not defined $C[1][1]; # un-Shift ONLY if needed
      0        
      0        
6116 0 0         @map{@obsc} = @free_first[0 .. $#obsc] unless $skipfix;
6117             # %map = map +($_, $free_first[$map{$_}]), keys %map;
6118 0           for my $k (keys %map) {
6119 0 0 0       if ($skipfix) {
    0          
6120 0 0         my $s = $k ? ' (shifted)' : '';
6121             warn "Key `$C[0][$k]'$s in layer $Hn->[0] does not match symbol $c[0][$k] in layer $hn->[0], and skipfix is requested...\n"
6122 0 0 0       unless ref($skipwarn || '') ? $skipwarn->{$c[0][$k]} : $skipwarn;
    0          
6123             } elsif (defined $C[1][$map{$k}] and $p[0][$k]) {
6124 0           warn "Prefix `$c[0][$k]' in layer $hn->[0] obscured on a key with `$C[1][$map{$k}]' in layer=1: $Hn->[0]"
6125             } else {
6126 0 0         if (defined $C[1][$map{$k}]) {
6127 0 0         next if $seen_deobsc{$c[0][$k]}; # See ъЪ + palochkas obscuring \| on the secondary \|-key in RussianPhonetic
6128             # So far, the only "obscuring" with useful de-obscuring is when the obscuring symbol is a letter
6129 0 0 0       die "existing secondary AltGr-binding `$C[1][$map{$k}]' blocks de-obscuring `$c[0][$k]';\n symbols to de-obscure are at positions [@symb0] in [@{$c[0]}]"
  0            
6130             unless ($C[0][$k] || '.') =~ /[\W\d_]/;
6131             next
6132 0           }
6133 0           $H[1][$map{$k}] = $h[0][$k]; # !!!! Modify in place
6134 0           $seen_deobsc{$c[0][$k]}++;
6135             }
6136             }
6137             # Inherit keys from $h
6138 0 0         for my $L (0..($skipfix? -1 : $#H)) {
6139 0           for my $shift (0,1) {
6140 0 0         next if defined $H[$L][$shift];
6141 0           $H[$L][$shift] = $h[$L][$shift];
6142             }
6143             }
6144 0 0         next if $already_linked;
6145 0           for my $i (0..@$hn) { # layer type
6146 0           for my $j (0,1) { # case
6147             #??? ++$seen_hex[$_]{ key2hex(($_ ? $key2 : $key1)->[$i][$j], 'undef') } for 0,1;
6148 0 0 0       push @{$self->{faces}{$hh}{need_extra_keys_to_access}{$HH}}, $H[$i][$j] if defined $C[$i][$j] and not defined $h[$i][$j];
  0            
6149 0 0 0       push @{$self->{faces}{$HH}{need_extra_keys_to_access}{$hh}}, $h[$i][$j] if defined $c[$i][$j] and not defined $H[$i][$j];
  0            
6150            
6151             }
6152             }
6153             }
6154             }
6155            
6156             # Make symbols from the first half-face ($h1) to be accessible in the second face ($H1/$H2)
6157             sub link_layers ($$$;$$) { # Un-obscure non-alphanum bindings from the first keyboard
6158 0     0 0   my ($self, $hh, $HH, $skipfix, $skipwarn) = (shift, shift, shift, shift, shift); # [Main, AltGr-Main,...], [Secondary, AltGr-Secondary,...]
6159 0           $self->pre_link_layers ($hh, $HH, $skipfix, $skipwarn);
6160             #warn "Linking with FIX: $hh, $HH" unless $skipfix;
6161             # We expect that $hh is base-face, and $HH is a satellite.
6162 0           $self->face_make_backlinks($HH, $self->{faces}{$HH}{'[char2key_prefer_first]'}, $self->{faces}{$HH}{'[char2key_prefer_last]'}, $skipfix, 'skipwarn');
6163             # To insert Flip_AltGr_Key into a face, we need to know where it is on the base face, and put it into the corresponding
6164             # slot of the satellite face. After face_make_backlinks(), we can find it in the base face.
6165             # Moreover, we must do it BEFORE calling faces_link_via_backlinks().
6166 0 0         if (defined (my $flip = $self->{faces}{$hh}{'[Flip_AltGr_Key]'})) {{
6167 0 0         defined ( my $flipped = $self->{faces}{$HH}{'[invAltGr_Accessor]'} ) or last;
  0            
6168             # warn "adding AltGr-inv for $hh, accessor=", $self->key2hex($flipped);
6169 0           $flip = $self->charhex2key($flip);
6170             # warn "face_back on $hh: ", join ' ', keys %{$self->{face_back}{$hh} || {}};
6171 0 0         if (my $where = $self->{face_back}{$hh}{$flip}) {
6172 0           my($l, $k, $shift) = @{ $where->[0] };
  0            
6173             # warn "Hex face_back l=$l, k=$k, shift-$shift on $hh";
6174 0           my($L, $expl, $dead) = ($self->{faces}{$HH}{layers}, '???');
6175 0           $L = $self->{layers}{$L->[$l]};
6176 0           my $C = my $c = $L->[$k][$shift];
6177 0 0 0       $c = $c->[0], $dead = $C->[2], $expl = $C->[3] || '???' if $c and ref $c;
      0        
6178 0   0       my $DEAD = $dead || '';
6179 0 0 0       warn "adding Flip_AltGr => <<$flipped>> to $hh\'s satellite $HH: already occuplied by <<<$c>>> (via $expl), dead=$DEAD"
      0        
6180             if defined $c and ($c ne $flipped or not $dead);
6181 0           $L->[$k][$shift] = [$flipped, undef, 1, 'Prefix for AltGr inversion'];
6182 0           delete $self->{faces}{$hh}{'Face_link_map'}{$HH}; # Reuse old copy
6183             # warn "Added to $HH; k=$k\[$l, $shift]";
6184             } else {
6185 0           warn "failed: adding AltGr-inv for $hh, flip=$flip, accessor=", $self->key2hex($flipped);
6186             }
6187             }}
6188 0           $self->face_make_backlinks($hh, $self->{faces}{$hh}{'[char2key_prefer_first]'}, $self->{faces}{$hh}{'[char2key_prefer_last]'}, 'skip');
6189 0           $self->faces_link_via_backlinks($hh, $HH);
6190             # $self->faces_link_via_backlinks($HH, $hh);
6191             }
6192            
6193             sub face_make_backlinks($$$$;$$) { # It is crucial to proceed layers in
6194             # parallel: otherwise the semantic of char2key_prefer_first suffers
6195 0   0 0 0   my ($self, $F, $prefer_first, $prefer_last, $skipfix, $skipwarn) = (shift, shift, shift || {}, shift || {}, shift, shift);
      0        
6196             #warn "Making backlinks for `$F'";
6197 0           my $LL = $self->{faces}{$F}{layers};
6198 0 0         if ($self->{face_back}{$F}) { # reuse old copy
6199 0 0         return if $skipfix; # reuse old copy
6200 0           die "An obsolete copy of `$F' is cashed";
6201             }
6202 0   0       my $seen = ($self->{face_back}{$F} ||= {}); # maps char to array of possitions it appears in, each [key, shift]
6203             # Since prefer_first should better operate in terms of keys, not layers; so the loop in $k should be the external one
6204 0           my $last = $#{ $self->{layers}{$LL->[0]} };
  0            
6205 0           my %warn;
6206 0           for my $k (0..$last) {
6207 0           for my $Lc (0..$#$LL) {
6208 0           my $L = $LL->[$Lc];
6209             # $self->layer_make_backlinks($_, $prefer_first) for @$L;
6210 0           my $a = $self->{layers}{$L};
6211 0 0         unless ($#$a == $last) { # Detect typos if we can (i.e., if no overflow into special ranges)
6212 0           my $fst = 1e100; # infinity
6213 0   0       $fst > $_->[0] and $fst = $_->[0] for values %start_SEC;
6214 0 0 0       die "Layer `$L' has lastchar $#$a, expected $last" unless $last >= $fst or $#$a >= $fst;
6215             }
6216             ##########
6217 0           for my $shift (0..$#{$a->[$k]}) {
  0            
6218 0 0         next unless defined (my $c = $a->[$k][$shift]);
6219 0 0         $c = $c->[0] if 'ARRAY' eq ref $c; # Treat prefix keys as usual chars
6220 0 0         if ($prefer_first->{$c}) {
6221             #warn "Layer `$L' char `$c': prefer first";
6222 0 0 0       @{ $seen->{$c} } = reverse @{ $seen->{$c} } if $seen->{$c} and $prefer_last->{$c}; # prefer 2nd of 3 (2nd from the end)
  0            
  0            
6223 0           push @{ $seen->{$c} }, [$Lc, $k, $shift];
  0            
6224             } else {
6225 0 0 0       $warn{$c}++ if @{ $seen->{$c} || [] } and not $prefer_last->{$c} and $c ne ' '; # XXXX Special-case ' ' ????
  0 0 0        
6226 0           unshift @{ $seen->{$c} }, [$Lc, $k, $shift];
  0            
6227             }
6228             }
6229             }
6230             }
6231 0 0 0       warn "The following chars appear several times in face `$F', but are not clarified\n\t (by `char2key_prefer_first', `char2key_prefer_last'):\n\t<",
6232             join('> <', sort keys %warn), '>' if %warn and not $skipwarn;
6233             }
6234            
6235             sub flip_layer_N ($$$) { # Increases layer number if number of layers is >2 (good for order Plain/AltGr/S-Ctrl)
6236 0     0 0   my ($self, $N, $max) = (shift, shift, shift);
6237 0 0         return 0 if $N == $max;
6238 0           $N + 1
6239             }
6240            
6241             sub faces_link_via_backlinks($$$;$) { # It is crucial to proceed layers in
6242             # parallel: otherwise the semantic of char2key_prefer_first suffers
6243 0     0 0   my ($self, $F1, $F2, $no_inic) = (shift, shift, shift, shift);
6244 0 0         return if $self->{faces}{$F1}{'Face_link_map'}{$F2}; # Reuse old copy
6245             #warn "Making links for `$F1' -> `$F2'";
6246 0 0         my $seen = $self->{face_back}{$F1} or die "Panic!"; # maps char to array of possitions it appears in, each [layer, key, shift]
6247 0           my $LL = $self->{faces}{$F2}{layers};
6248             #!$no_inic and $self->{layers}{'[ini_copy1]'}{$_} and warn "ini_copy1 of `$_' exists" for @$LL;
6249             #!$no_inic and $self->{layers}{'[ini_copy]'}{$_} and warn "ini_copy of `$_' exists" for @$LL;
6250 0   0       my @LL = map $self->{layers}{'[ini_copy1]'}{$_} || $self->{layers}{'[ini_copy]'}{$_} || $self->{layers}{$_}, @$LL;
6251 0 0         @LL = map $self->{layers}{$_}, @$LL if $no_inic;
6252 0           my($maxL, %r, %altR) = $#LL;
6253             # XXXX Must use $self->{layers}{'[ini_copy]'}{$L} for the target
6254 0           for my $c (sort keys %$seen) {
6255 0           my $arr = $seen->{$c};
6256 0 0         warn "Empty back-mapping array for `$c' in face `$F1'" unless @$arr;
6257             # if (@$arr > 1) {
6258             # }
6259             my ($to) = grep defined, (map {
6260             #warn "Check `$c': <@$_> ==> <", (defined $LL[$_->[0]][$_->[1]][$_->[2]] ? $LL[$_->[0]][$_->[1]][$_->[2]] : 'undef'), '>';
6261 0           $LL[$_->[0]][$_->[1]][$_->[2]]
  0            
6262             } @$arr);
6263 0           my ($To) = grep defined, (map { $LL[$self->flip_layer_N($_->[0], $maxL)][$_->[1]][$_->[2]] } @$arr);
  0            
6264 0           $r{$c} = $to; # Keep prefix keys as array refs
6265 0           $altR{$c} = $To; # Ditto
6266             }
6267 0           $self->{faces}{$F1}{'Face_link_map'}{$F2} = \%r;
6268 0           $self->{faces}{$F1}{'Face_link_map_INV'}{$F2} = \%altR;
6269             }
6270            
6271             sub charhex2key ($$) {
6272 0     0 0   my ($self, $c) = (shift, shift);
6273 0 0         return chr hex $c if $c =~ /^[0-9a-f]{4,}$/i;
6274 0           $c
6275             }
6276            
6277             sub __manyHEX($$) { # for internal use only
6278 0     0     my ($self, $s) = (shift, shift);
6279 0           $s =~ s/\.?(\b[0-9a-f]{4,}\b)\.?/ chr hex $1 /ieg;
  0            
6280 0           $s
6281             }
6282            
6283             sub stringHEX2string ($$) { # One may surround HEX by ".", but only if needed. If not needed, "." is preserved...
6284 0     0 0   my ($self, $s) = (shift, shift);
6285 0           $s =~ s/(?:\b\.)?((?:\b[0-9a-f]{4,}\b(?:\.\b)?)+)/ $self->__manyHEX("$1") /ieg;
  0            
6286 0           $s
6287             }
6288            
6289             sub layer_recipe ($$) {
6290 0     0 0   my ($self, $l) = (shift, shift);
6291 0 0         return unless exists $self->{layer_recipes}{$l};
6292 0           $self->recipe2str($self->{layer_recipes}{$l})
6293             }
6294            
6295             sub massage_faces ($) {
6296 0     0 0   my $self = shift;
6297             # warn "Massaging faces...";
6298 0           for my $f (keys %{$self->{faces}}) { # Needed for (pre_)link_layers...
  0            
6299 0 0 0       next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6300             #warn "Massaging face `$f'...";
6301 0           for my $key ( qw( Flip_AltGr_Key Diacritic_if_undef DeadChar_DefaultTranslation DeadChar_32bitTranslation extra_report_DeadChar
6302             PrefixChains ctrl_after_modcol create_alpha_ctrl keep_missing_ctrl output_layers layers_modifiers
6303             layers_mods_keys mods_keys_KBD
6304             ComposeKey_Show AltGr_Invert_Show Apple_Override Apple_Duplicate Apple_HexInput
6305             ComposeKey Explicit_AltGr_Invert Auto_Diacritic_Start ) ) {
6306 0           $self->{faces}{$f}{"[$key]"} = $self->get_deep_via_parents($self, undef, 'faces', (split m(/), $f), $key);
6307             }
6308             $self->{faces}{$f}{'[char2key_prefer_first]'}{$_}++ # Make a hash
6309 0 0         for @{ $self->{faces}{$f}{char2key_prefer_first} || [] } ;
  0            
6310             $self->{faces}{$f}{'[char2key_prefer_last]'}{$_}++ # Make a hash
6311 0 0         for @{ $self->{faces}{$f}{char2key_prefer_last} || [] } ;
  0            
6312            
6313 0           my %R = qw(ComposeKey_Show ⎄ AltGr_Invert_Show ⤨); # On Apple only
6314 0   0       defined $self->{faces}{$f}{"[$_]"} or $self->{faces}{$f}{"[$_]"} = $R{$_} for keys %R;
6315             $self->{faces}{$f}{"[ComposeKey_Show]"}[0] = '⎄' # Make a safe default
6316 0 0 0       if ref $self->{faces}{$f}{"[ComposeKey_Show]"} and not length $self->{faces}{$f}{"[ComposeKey_Show]"}[0];
6317            
6318 0           my ($compK, %compK) = $self->{faces}{$f}{'[ComposeKey]'};
6319 0 0 0       if ($compK and ref $compK) {
    0          
6320 0           for my $cK (@$compK) {
6321 0           my @kkk = split /,/, $cK;
6322 0 0 0       $compK{ $self->key2hex($self->charhex2key($kkk[3])) }++ if defined $kkk[3] and length $kkk[3];
6323             }
6324             } elsif (defined $compK) {
6325 0           $compK{ $self->key2hex($self->charhex2key($compK)) }++;
6326             }
6327 0           $self->{faces}{$f}{'[ComposeKeys]'} = \%compK;
6328 0 0         unless ($self->{faces}{$f}{layers}) {
6329 0 0         next unless $self->{face_recipes}{$f};
6330 0           $self->face_by_face_recipe($f, $f);
6331             }
6332 0 0         for my $ln ( 0..$#{$self->{faces}{$f}{layers} || []} ) {
  0            
6333 0           my $ll = my $l = $self->{faces}{$f}{layers}[$ln];
6334 0 0         next if $self->{layers}{$l}; # Else, auto-vivify
6335             #warn "Creating layer `$l' for face `$f'...";
6336 0           my @r = $self->layer_recipe($l);
6337 0 0         $ll = $r[0] if @r;
6338 0           warn "Massaging: Using layout_recipe `$ll' for layer '$l'\n" if debug_face_layout_recipes and exists $self->{layer_recipes}{$l};
6339 0           $ll = $self->make_translated_layers($ll, $f, [$ln], '0000');
6340             #warn "... Result `@$ll' --> $self->{layers}{$ll->[0]}";
6341 0 0         $self->{layers}{$l} = $self->{layers}{$ll->[0]} unless $self->{layers}{$l}; # Could autovivify in between???
6342             }
6343 0           (my ($seen, $seen_dead), $self->{faces}{$f}{'[dead_in_VK]'}) = $self->massage_VK($f);
6344 0           $self->{faces}{$f}{'[dead_in_VK_array]'} = $seen_dead;
6345 0           $self->{faces}{$f}{'[coverage_hex]'}{$self->key2hex($_)}++ for @$seen;
6346 0 0         for my $S (@{ $self->{faces}{$f}{AltGrCharSubstitutions} || []}) {
  0            
6347 0           my $s = $self->stringHEX2string($S);
6348 0           $s =~ s/\p{Blank}(?=\p{NonspacingMark})//g;
6349 0 0         die "Expect 2 chars in AltGr-char substitution rule; I see <$s> (from <$S>)" unless 2 == (my @s = split //, $s);
6350 0           push @{ $self->{faces}{$f}{'[AltSubstitutions]'}{$s[0]} }, [$s[1], 'manual'];
  0            
6351 0 0 0       push @{ $self->{faces}{$f}{'[AltSubstitutions]'}{lc $s[0]} }, [lc $s[1], 'manual']
  0            
6352             if lc $s[0] ne $s[0] and lc $s[1] ne $s[1];
6353 0 0 0       push @{ $self->{faces}{$f}{'[AltSubstitutions]'}{uc $s[0]} }, [uc $s[1], 'manual']
  0            
6354             if uc $s[0] ne $s[0] and uc $s[1] ne $s[1];
6355             }
6356 0 0         s/^\s+//, s/\s+$//, $_ = $self->stringHEX2string($_) for @{ $self->{faces}{$f}{Import_Prefix_Keys} || []};
  0            
6357 0 0         my %h = @{ $self->{faces}{$f}{Import_Prefix_Keys} || []};
  0            
6358 0 0         $self->{faces}{$f}{'[imported2key]'} = \%h if %h;
6359 0           my ($l0, $c);
6360 0 0         unless ($c = $self->{layer_counts}{$l0 = $self->{faces}{$f}{layers}[0]}) {
6361 0           $l0 = $self->get_deep_via_parents($self, undef, 'faces', (split m(/), $f), 'geometry_via_layer');
6362 0 0         $c = $self->{layer_counts}{$l0} if defined $l0;
6363             }
6364 0 0         my $o = $self->{layer_offsets}{$l0} if defined $l0;
6365 0 0         $self->{faces}{$f}{'[geometry]'} = $c if $c;
6366 0 0         $self->{faces}{$f}{'[g_offsets]'} = $o if $o;
6367             }
6368 0           for my $f (keys %{$self->{faces}}) { # Needed for face_make_backlinks: must know which keys in faces will be finally present
  0            
6369 0 0 0       next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6370 0 0         for my $F (@{ $self->{faces}{$f}{AltGrCharSubstitutionFaces} || []}) { # Now has a chance to have real layers
  0            
6371 0           for my $L (0..$#{$self->{faces}{$f}{layers}}) {
  0            
6372 0           my $from = $self->{faces}{$f}{layers}[$L];
6373 0 0         next unless my $to = $self->{faces}{$F}{layers}[$L];
6374 0           $_ = $self->{layers}{$_} for $from, $to;
6375 0           for my $k (0..$#$from) {
6376 0 0 0       next unless $from->[$k] and $to->[$k];
6377 0           for my $shift (0..1) {
6378 0 0 0       next unless defined (my $s = $from->[$k][$shift]) and defined (my $ss = $to->[$k][$shift]);
6379 0   0       $_ and ref and $_ = $_->[0] for $s, $ss;
      0        
6380 0           push @{ $self->{faces}{$f}{'[AltSubstitutions]'}{$s} }, [$ss, "F=$F"];
  0            
6381             }
6382             }
6383             }
6384             }
6385             } # ^^^ This is not used yet???
6386 0           for my $f (keys %{$self->{faces}}) { # Needed for face_make_backlinks: must know which keys in faces will be finally present
  0            
6387 0 0 0       next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6388 0 0         for my $N (0..$#{ $self->{faces}{$f}{AltGrCharSubstitutionLayers} || []}) { # Now has a chance to have real layers
  0            
6389 0           my $TO = my $to = $self->{faces}{$f}{AltGrCharSubstitutionLayers}[$N];
6390 0 0         my $from = $self->{faces}{$f}{layers}[$N] or next;
6391 0           $_ = $self->{layers}{$_} for $from, $to;
6392 0           for my $k (0..$#$from) {
6393 0 0 0       next unless $from->[$k] and $to->[$k];
6394 0           for my $shift (0..1) {
6395 0 0 0       next unless defined (my $s = $from->[$k][$shift]) and defined (my $ss = $to->[$k][$shift]);
6396 0   0       $_ and ref and $_ = $_->[0] for $s, $ss;
      0        
6397 0           push @{ $self->{faces}{$f}{'[AltSubstitutions]'}{$s} }, [$ss, "L=$TO"];
  0            
6398             }
6399             }
6400             }
6401             }
6402 0           for my $f (keys %{$self->{faces}}) { # Linking uses the number of slots in layer 0 as the limit; fill to make into max
  0            
6403 0 0 0       next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6404 0           my $L = $self->{faces}{$f}{layers};
6405 0           my @last = map $#{$self->{layers}{$_}}, @$L;
  0            
6406 0           my $last = $last[0];
6407 0   0       $last < $_ and $last = $_ for @last;
6408 0           push @{$self->{layers}{$L->[0]}}, [] for 1..($last-$last[0]);
  0            
6409             }
6410 0           for my $f (keys %{$self->{faces}}) { # Needed for face_make_backlinks: must know which keys in faces will be finally present
  0            
6411 0 0 0       next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6412 0           my $o = $self->{faces}{$f}{LinkFace};
6413 0 0         $self->pre_link_layers($o, $f) if defined $o; # May add keys to $f
6414             # warn("pre_link <$o> <$f>\n") if defined $o;
6415             }
6416 0           for my $f (keys %{$self->{faces}}) {
  0            
6417 0 0 0       next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6418 0           $self->face_make_backlinks($f, $self->{faces}{$f}{'[char2key_prefer_first]'}, $self->{faces}{$f}{'[char2key_prefer_last]'});
6419             }
6420 0           for my $f (keys %{$self->{faces}}) {
  0            
6421 0 0 0       next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6422 0           my $o = $self->{faces}{$f}{LinkFace};
6423 0 0         next unless defined $o;
6424 0           $self->faces_link_via_backlinks($f, $o);
6425 0           $self->faces_link_via_backlinks($o, $f);
6426             }
6427 0           for my $f (keys %{$self->{faces}}) {
  0            
6428 0 0 0       next if 'HASH' ne ref $self->{faces}{$f} or $f =~ m(\bVK$); # "parent" taking keys for a child
6429 0   0       my ($DDD, $export, $vk) = map $self->{faces}{$f}{"[$_]"} ||= {}, qw(DEAD export dead_in_VK);
6430 0   0       my ($ddd) = map $self->{faces}{$f}{"[$_]"} ||= [], qw(dead);
6431 0           $self->coverage_hex($f);
6432 0           my $S = $self->{faces}{$f}{layers};
6433 0           my ($c,%s,@d) = 0;
6434 0 0         for my $D (@{$self->{faces}{$f}{layerDeadKeys} || []}) { # deprecated...
  0            
6435 0 0         $c++, next unless length $D; # or $D ~= /^\s*--+$/ ; # XXX How to put empty elements in an array???
6436 0           $D =~ s/^\s+//;
6437 0           (my $name, my @k) = split /\s+/, $D;
6438 0           @k = map $self->charhex2key($_), @k;
6439             die "name of layerDeadKeys' element in face `$f' does not match:\n\tin `$D'\n\t`$name' vs `$self->{faces}{$f}{layers}[$c]'"
6440 0 0         unless $self->{faces}{$f}{layers}[$c] =~ /^\Q$name\E(<.*>)?$/; # Name might have changed in VK processing
6441 0   0       1 < length and die "not a character as a deadkey: `$_'" for @k;
6442 0           $ddd->[$c] = {map +($_,1), @k};
6443 0   0       ($s{$_}++ or push @d, $_), $DDD->{$_} = 1 for @k;
6444 0           $c++;
6445             }
6446 0 0         for my $k (split /\p{Blank}+(?:\|{3}\p{Blank}+)?/,
6447             (defined $self->{faces}{$f}{faceDeadKeys} ? $self->{faces}{$f}{faceDeadKeys} : '')) {
6448 0 0         next unless length $k;
6449 0           $k = $self->charhex2key($k);
6450 0 0         1 < length $k and die "not a character as a deadkey: `$k'";
6451 0           $ddd->[$_]{$k} = 1 for 0..$#{ $self->{faces}{$f}{layers} }; # still used...
  0            
6452 0           $DDD->{$k} = 1;
6453 0 0         $s{$k}++ or push @d, $k;
6454             }
6455 0 0         for my $k (split /\p{Blank}+/, (defined $self->{faces}{$f}{ExportDeadKeys} ? $self->{faces}{$f}{ExportDeadKeys} : '')) {
6456 0 0         next unless length $k;
6457 0           $k = $self->charhex2key($k);
6458 0 0         1 < length $k and die "not a character as an exported deadkey: `$k'";
6459 0           $export->{$k} = 1;
6460             }
6461 0 0         if (my $LL = $self->{faces}{$f}{'[ini_layers]'}) {
6462 0           my @out;
6463 0           for my $L ( @$LL ) {
6464 0           push @out, "$L++prefix+";
6465 0           my $l = $self->{layers}{$out[-1]} = $self->deep_copy($self->{layers}{$L});
6466 0           for my $n (0 .. $#$l) {
6467 0           my $K = $l->[$n];
6468 0           for my $k (@$K) {
6469             #warn "face `$f' layer `$L' ini_layers_prefix: key `$k' marked as a deadkey" if defined $k and $DDD->{$k};
6470 0 0 0       $k = [$k] if defined $k and not ref $k; # Allow addition of doc strings
6471 0 0 0       if (defined $k and ($DDD->{$k->[0]} or $vk->{$k->[0]})) {
      0        
6472 0   0       @$k[1,2] = ($f, $k->[2] || ($export->{$k->[0]} ? 2 : 1)); # Is exportable?
6473             }
6474             }
6475             }
6476             }
6477 0           $self->{faces}{$f}{'[ini_layers_prefix]'} = \@out;
6478 0           $LL = $self->{faces}{$f}{'[ini_filled_layers]'} = [ @{ $self->{faces}{$f}{layers} } ]; # Deep copy
  0            
6479 0           my @OUT;
6480 0           for my $L ( @$LL ) {
6481 0           push @OUT, "$L++PREFIX+";
6482 0           my $l = $self->{layers}{$OUT[-1]} = $self->deep_copy($self->{layers}{$L});
6483 0           for my $n (0 .. $#$l) {
6484 0           my $K = $l->[$n];
6485 0           for my $k (@$K) {
6486             #warn "face `$f' layer `$L' layers_prefix: key `$k' marked as a deadkey" if defined $k and $DDD->{$k};
6487 0 0 0       $k = [$k] if defined $k and not ref $k; # Allow addition of doc strings
6488 0 0 0       if (defined $k and ($DDD->{$k->[0]} or $vk->{$k->[0]})) {
      0        
6489 0   0       @$k[1,2] = ($f, $k->[2] || ($export->{$k->[0]} ? 2 : 1)); # Is exportable?
6490             }
6491             }
6492             }
6493             }
6494 0           $self->{faces}{$f}{layers} = \@OUT;
6495             } else {
6496 0           warn "Face `$f' has no ini_layers";
6497             }
6498 0           $self->{faces}{$f}{'[dead_array]'} = \@d;
6499 0 0 0       for my $D (@{$self->{faces}{$f}{faceDeadKeys2} || $self->{faces}{$f}{layerDeadKeys2} || []}) { # layerDeadKeys2 obsolete
  0            
6500 0           $D =~ s/^\s+//; $D =~ s/\s+$//;
  0            
6501 0           my @k = split //, $self->stringHEX2string($D);
6502 0 0         2 != @k and die "not two characters as a chained deadkey: `@k'";
6503             #warn "dead2 for <@k>";
6504 0           $self->{faces}{$f}{'[dead2]'}{$k[0]}{$k[1]}++;
6505             # $k[1] is "untranslated"; it is not good for [DEAD]:
6506             #$self->{faces}{"$f###" . $self->key2hex($k[0])}{'[DEAD]'}{$k[1]}++;
6507             }
6508             }
6509             $self
6510 0           }
6511            
6512             sub massage_hash_values($) {
6513 0     0 0   my($self) = (shift);
6514 0           for my $K ( @{$self->{'[keys]'}} ) {
  0            
6515 0           my $h = $self->get_deep($self, split m(/), $K);
6516 0 0         $_ = $self->charhex2key($_) for @{ $h->{char2key_prefer_first} || []}, @{ $h->{char2key_prefer_last} || []};
  0 0          
  0            
6517             }
6518            
6519             }
6520             #use Dumpvalue;
6521            
6522             sub print_codepoint ($$;$) {
6523 0     0 0   my ($self, $k, $prefix) = (shift, shift, shift);
6524 0 0         my $K = ($k =~ /$rxCombining/ ? " $k" : $k);
6525 0 0         $prefix = '' unless defined $prefix;
6526 0           my $kk = join '.', map $self->key2hex($_), split //, $k;
6527 0           my $UN = join ' + ', map $self->UName($_, 'verbose', 'vbell'), split //, $k;
6528 0           printf "%s%s\t<%s>\t%s\n", $prefix, $kk, $K, $UN;
6529             }
6530            
6531             sub require_unidata_age ($) {
6532 0     0 0   my $self = shift;
6533 0           my $f = $self->get_NamesList;
6534 0 0         $self->load_compositions($f) if defined $f;
6535            
6536 0           $f = $self->get_AgeList;
6537 0 0 0       $self->load_uniage($f) if defined $f and not $self->{Age};
6538 0           $self;
6539             }
6540            
6541             sub print_coverage_string ($$) {
6542 0     0 0   my ($self, $s, %seen) = (shift, shift);
6543 0           $seen{$_}++ for split //, $s;
6544            
6545 0           my $f = $self->get_NamesList;
6546 0 0         $self->load_compositions($f) if defined $f;
6547            
6548 0           $f = $self->get_AgeList;
6549 0 0 0       $self->load_uniage($f) if defined $f and not $self->{Age};
6550            
6551 0           require Unicode::UCD;
6552            
6553 0           $self->print_codepoint($_) for sort keys %seen;
6554             }
6555            
6556             sub print_coverage ($$) {
6557 0     0 0   my ($self, $F) = (shift, shift);
6558            
6559 0           my $f = $self->get_NamesList;
6560 0 0         $self->load_compositions($f) if defined $f;
6561            
6562 0           $f = $self->get_AgeList;
6563 0 0 0       $self->load_uniage($f) if defined $f and not $self->{Age};
6564            
6565 0           my $file = $self->{'[file]'};
6566 0 0         $file = (defined $file) ? "file $file" : 'string descriptor';
6567 0           my $v = $self->{VERSION};
6568 0 0         $file .= " version $v" if defined $v;
6569 0 0         $file .= " Unicode tables version $self->{uniVersion}" if defined $self->{uniVersion};
6570            
6571 0           print "############# Generated with UI::KeyboardLayout v$UI::KeyboardLayout::VERSION for $file, face=$F\n#\n";
6572            
6573 0           my $is32 = $self->{faces}{$F}{'[32-bit]'};
6574 0 0         my $cnt32 = keys %{$is32 || {}};
  0            
6575 0           my $c1 = @{ $self->{faces}{$F}{'[coverage1only]'} }; # - $cnt32;
  0            
6576 0           my $c2 = @{ $self->{faces}{$F}{'[coverage1]'} } - @{ $self->{faces}{$F}{'[coverage1only]'} };
  0            
  0            
6577 0           my $more = ''; #$cnt32 ? " (and up to $cnt32 not available on Windows - at end of this section above FFFF)" : '';
6578 0           my @multi;
6579 0           for my $n (0, 1) {
6580 0           $multi[$n]{$_}++ for grep 1 < length, @{ $self->{faces}{$F}{"[coverage$n]"} };
  0            
6581             }
6582 0           my @multi_c = map { scalar keys %{$multi[$_]} } 0, 1;
  0            
  0            
6583 0 0         my %comp = %{ $self->{faces}{$F}{'[inCompose]'} || {} };
  0            
6584 0           delete $comp{$_} for @{ $self->{faces}{$F}{"[coverage0]"} }, @{ $self->{faces}{$F}{"[coverage1]"} };
  0            
  0            
6585 0 0         my @comp = grep {2 > length and 0x10000 > ord} sort keys %comp;
  0            
6586             printf "######### %i = %i + %i + %i + %i bindings [1-char + base multi-char-strings (MCS) + “extra layers” MCS + only via Compose key]\n",
6587 0           @{ $self->{faces}{$F}{'[coverage0]'} } + $c1 + $c2 + @comp,
6588 0           @{ $self->{faces}{$F}{'[coverage0]'} } + $c1 + $c2 - $multi_c[0] - $multi_c[1],
  0            
6589             $multi_c[0], $multi_c[1], scalar @comp;
6590             printf "######### %i = %i + %i + %i%s [direct + via single prefix keys and “extra layers” (both=%i) + via repeated prefix key] chars\n",
6591 0           @{ $self->{faces}{$F}{'[coverage0]'} } + $c1 + $c2 - $multi_c[0] - $multi_c[1],
6592 0           scalar @{ $self->{faces}{$F}{'[coverage0]'} } - $multi_c[0],
6593 0           $c1 - $multi_c[1], $c2, $more, @{ $self->{faces}{$F}{'[coverage00+]'} } + $c1 - $multi_c[0] - $multi_c[1];
  0            
6594 0           for my $k (@{ $self->{faces}{$F}{'[coverage00+]'} }) {
  0            
6595 0           $self->print_codepoint($k);
6596             }
6597 0           print "############# Base multi-char strings:\n";
6598 0           for my $k (@{ $self->{faces}{$F}{'[coverage00++]'} }) {
  0            
6599 0           $self->print_codepoint($k);
6600             }
6601 0           print "############# Via single prefix keys:\n";
6602 0           for my $k (@{ $self->{faces}{$F}{'[coverage1only]'} }) {
  0            
6603 0 0         $self->print_codepoint($k) if 2 > length $k;
6604             }
6605 0           print "############# Multi-char via single prefix keys:\n";
6606 0           for my $k (@{ $self->{faces}{$F}{'[coverage1only]'} }) {
  0            
6607 0 0         $self->print_codepoint($k) if 1 < length $k;
6608             }
6609 0           my $h1 = $self->{faces}{$F}{'[coverage1only_hash]'};
6610 0           print "############# Via repeated prefix keys:\n";
6611 0           for my $k (@{ $self->{faces}{$F}{'[coverage1]'} }) {
  0            
6612 0 0 0       $h1->{$k} or $self->print_codepoint($k) if 2 > length $k;
6613             }
6614 0           print "############# Multi-char via repeated prefix keys:\n";
6615 0           for my $k (@{ $self->{faces}{$F}{'[coverage1]'} }) {
  0            
6616 0 0 0       $h1->{$k} or $self->print_codepoint($k) if 1 < length $k;
6617             }
6618 0           print "############# Only via Compose key:\n";
6619 0           for my $k (@comp) {
6620 0           $self->print_codepoint($k, '= ');
6621             }
6622 0           print "############# Have lost the competition (for prefixed position), but available elsewhere:\n";
6623 0           for my $k (sort keys %{ $self->{faces}{$F}{'[in_dia_chains]'} }) {
  0            
6624 0 0 0       next unless $self->{faces}{$F}{'[coverage_hash]'}{$k} and not $self->{faces}{$F}{'[from_dia_chains]'}{$k};
6625 0           $self->print_codepoint($k, '+ '); # May be in from_dia_chains, but be obscured later...
6626             }
6627 0           print "############# Have lost the competition (not counting those explicitly prohibited by \\\\):\n";
6628 0           for my $k (sort keys %{ $self->{faces}{$F}{'[in_dia_chains]'} }) {
  0            
6629 0 0         next if $self->{faces}{$F}{'[coverage_hash]'}{$k};
6630 0           $self->print_codepoint($k, '- ');
6631             }
6632 0           my ($tot_diac, $lost_diac) = (0,0);
6633             $tot_diac++, $self->{faces}{$F}{'[coverage_hash]'}{$_} || $lost_diac++
6634 0   0       for keys %{ $self->{'[map2diac]'} };
  0            
6635 0           print "############# Lost among known classified modifiers/standalone/combining ($lost_diac/$tot_diac):\n";
6636 0           for my $k (sort keys %{ $self->{'[map2diac]'} }) {
  0            
6637 0 0         next if $self->{faces}{$F}{'[coverage_hash]'}{$k};
6638 0           $self->print_codepoint($k, '?- ');
6639             }
6640 0           print "############# Per key list:\n";
6641 0           my $OOut = $self->print_table_coverage($F);
6642 0           my ($OUT, $CC, $CC1) = ('', 0, 0);
6643 0           for my $r ([0x2200, 0x40], [0x2240, 0x40], [0x2280, 0x40], [0x22c0, 0x40],
6644             [0x27c0, 0x30], [0x2980, 0x40], [0x29c0, 0x40],
6645             [0x2a00, 0x40], [0x2a40, 0x40], [0x2a80, 0x40], [0x2ac0, 0x40], [0xa720, 0x80-0x20], [0xa780, 0x80] ) {
6646 0 0 0       my $C = join '', grep { (0xa720 >= ord $_ or $self->{UNames}{$_}) and !$self->{faces}{$F}{'[coverage_hash]'}{$_} }
  0            
6647             map chr($_), $r->[0]..($r->[0]+$r->[1]-1); # before a720, the tables are filled up...
6648 0 0         ${ $r->[0] < 0xa720 ? \$CC : \$CC1 } += length $C;
  0            
6649 0           $OUT .= "-==-\t$C\n";
6650             }
6651 0           print "############# Not covered in the math+latin-D ranges ($CC+$CC1):\n$OUT";
6652 0           ($OUT, $CC, $CC1) = ('', 0, 0);
6653 0           for my $r ([0x2200, 0x80], [0x2280, 0x80],
6654             [0x27c0, 0x30], [0x2980, 0x80],
6655             [0x2a00, 0x80], [0x2a80, 0x80], [0xa720, 0x100-0x20] ) {
6656 0           my $C = join '', grep {(0xa720 >= ord $_ or $self->{UNames}{$_}) and !$self->{faces}{$F}{'[coverage_hash]'}{$_}
6657 0 0 0       and !$self->{faces}{$F}{'[in_dia_chains]'}{$_}} map chr($_), $r->[0]..($r->[0]+$r->[1]-1);
      0        
6658 0 0         ${ $r->[0] < 0xa720 ? \$CC : \$CC1 } += length $C;
  0            
6659 0           $OUT .= "-==-\t$C\n";
6660             }
6661 0           print "############# Not competing, in the math+latin-D ranges ($CC+$CC1):\n$OUT";
6662 0           $OOut
6663             }
6664            
6665             my %html_esc = qw( & & < < > > );
6666             my %ctrl_special = qw( \r Enter \n Control-Enter \b BackSpace \x7f Control-Backspace \t Tab
6667             \x1b Esc; Control-[ \x1d Control-] \x1c Control-\ ^C Control-Break \x1e Control-^ \x1f Control-_ \x00 Control-@);
6668             my %alt_symb;
6669 1     1   65399 { no warnings 'qw';
  1         3  
  1         255  
6670             # ZWS ZWNJ ZWJ LRM RLM WJ=ZWNBSP Func Times Sep Plus
6671             my %a = (qw(200b ∅ 200c ‸ 200d & 200e → 200f ← 2060 ⊕ 2061 () 2062 × 2063 | 2064 +),
6672             # SPC NBSP obs-N obs-M n m m/3 m/4 m/6 figure=digit punctuation thin hair Soft-hyphen
6673             qw(0020 ␣ 00a0 ⍽ 2000 N 2001 M 2002 n 2003 m 2004 ᵐ⁄₃ 2005 ᵐ⁄₄ 2006 ᵐ⁄₆ 2007 ᵈ 2008 , 2009 ᵐ⁄₅ 200a ᵐ⁄₈ 00ad -),
6674             # LineSep ParSep LRE RLE PopDirForm LRO RLO narrowNBSP
6675             qw(2028 ⏎ 2029 ¶ 202a ⇒ 202b ⇐ 202c ↺ 202d ⇉ 202e ⇇ 202f ⁿ));
6676             @alt_symb{map chr hex, keys %a} = values %a;
6677             }
6678            
6679             # Make: span for control, soft-hyphen, white-space; include in with popup; include in span with special highlight
6680             sub char_2_html_span ($$$$$$;@) {
6681 0     0 0   my ($self, $base_c, $C, $c, $F, $opts, @types, $expl, $title, $vbell) = @_;
6682 0           my $aInv = $self->charhex2key($self->{faces}{$F}{'[Flip_AltGr_Key]'});
6683 0 0 0       $expl = $C->[3] if 'ARRAY' eq ref $C and $C->[3];
6684 0 0         $expl =~ s/(?=\p{NonspacingMark})/ /g if $expl;
6685 0   0       my $prefix = (ref $C and $C->[2]);
6686 0           my $cc = $c;
6687 0   0       $aInv = ($base_c || 'N/A') eq $aInv;
6688 0   0       my $docs = ($prefix and $self->{faces}{$F}{'[prefixDocs]'}{$self->key2hex($cc)}); # or $pre and warn "No docs: face=`$F', c=`$cc'\n";
6689 0 0         $docs =~ s/([''&])/sprintf '&#x%02x;', ord $1/ge if defined $docs;
  0            
6690             # warn "... is_D2: ", $self->array2string([$c, $baseK[$L][$shift]]);
6691 0           $c =~ s/(?=$rxCombining)/\x{25cc}/go; # dotted circle ◌ 25CC
6692 0           $c =~ s/([&<>])/$html_esc{$1}/g;
6693 0           my $create_a_c = $self->{faces}{$F}{'[create_alpha_ctrl]'};
6694 0 0         $create_a_c = $create_alpha_ctrl unless defined $create_a_c;
6695 0   0       my $alpha_ctrl = ($create_a_c and $cc =~ /[\cA-\cZ]/);
6696 0 0 0       my $with_shift = (($create_a_c > 1 and $alpha_ctrl) ? '(Shift-)' : '');
6697 0   0       $c =~ s{([\x00-\x1F\x7F])}{ my $C = $self->control2prt("$1"); my $S = $ctrl_special{$C} || '';
  0            
  0            
6698 0 0 0       ($S and $S .= ", "), $S .= "Control-$with_shift".chr(0x40+ord $1) if $alpha_ctrl;
6699 0 0         $C = "$C" if $S; $C }ge;
  0            
6700 0   0       my $type = ($cc =~ /[^\P{Blank}\x00-\x1f]/ && 'WS'); # Blank and not control char
6701 0           my ($fill, $prefill, $zw) = ('', '');
6702 0 0 0       if ($type or $c =~ /($rxZW)$/o) {
6703 0 0         my $alt = ($alt_symb{$cc} ? qq( convention="$alt_symb{$cc}") : '');
6704 0           $fill = ""; # Soft hyphen etc
6705             }
6706 0 0         if ($type) { # Putting WS inside l makes gaps between adjacent WS blocks
6707 0           $prefill = '';
6708 0           $fill .= '';
6709             }
6710 0 0         push @types, 'no-mirror-rtl' if "\x{34f}" eq $cc; # CGJ
6711 0   0       $zw = !!$fill || $cc eq "\x{034f}";
6712 0           $vbell = !defined $C;
6713 0 0         unless (defined $title) {
6714 0   0       $title = ((ord $cc >= 0x80 or $cc eq ' ') && sprintf '%04X %s', ord $cc, $self->UName($cc, 'verbose', $vbell));
6715 0 0 0       if ($title and $docs) {
6716 0           $title = "$docs (on $title)";
6717             }
6718 0   0       $title ||= ($docs || '');
      0        
6719 0 0 0       if (defined $expl and length $expl and (1 or 0x7f <= ord $cc)) {
      0        
6720 0 0         $title .= ' ' if length $title;
6721 0           $title .= " {via $expl}";
6722             }
6723 0 0 0       $title .= ' (visual bell indicates unassigned keypress)' if $title and !$expl and $vbell;
      0        
6724 0 0         $title = 'This prefix key accesses this column with AltGr-invertion' if $aInv;
6725 0 0         $title =~ s/([''&])/sprintf '&#x%02x;', ord $1/ge if $title;
  0            
6726 0 0         $title = qq( title='$title') if $title;
6727             }
6728 0 0 0       if ($type) { # Already covered
    0 0        
    0 0        
    0 0        
    0 0        
    0          
    0          
    0          
    0          
    0          
6729             } elsif ($zw) {
6730 0           push @types,'ZW';
6731             } elsif (not defined $C) {
6732 0           push @types,'vbell';
6733             } elsif ($title =~ /(\b(N-ARY|BIG(?!\s+YUS\b)|GREEK\s+PROSGEGRAMMENI|KORONIS|SOF\s+PASUQ|PUNCTUATION\s+(?:GERESH|GERSHAYIM)|PALOCHKA|CYRILLIC\s.*\s(DZE|JE|QA|WE|A\s+IE)|ANO\s+TELEIA|GREEK\s+QUESTION\s+MARK)|"\w+\s+(?:BIG|LARGE))\b.*\s+\[/) { # "0134 BIG GUY#"
6734 0           push @types,'nAry';
6735             } elsif ($title =~ /\b(OPERATOR|SIGN|SYMBOL|PROOF|EXISTS|FOR\s+ALL|(DIVISION|LOGICAL)\b.*)\s+\[/) {
6736 0           push @types,'operator';
6737             } elsif ($title =~ /\b(RELATION|PERPENDICULAR|PARALLEL\s*TO|DIVIDES|FRACTION\s+SLASH)\s+\[/) {
6738 0           push @types,'relation';
6739             } elsif ($title =~ /\[.*\b(IPA)\b|\bCLICK\b/) {
6740 0           push @types,'ipa';
6741             } elsif ($title =~ /\bLETTER\s+[AEUIYO]\b/ and
6742             $title =~ /\b(WITH|AND)\s+(HOOK\s+ABOVE|HORN)|(\s+(WITH|AND)\s+(CIRCUMFLEX|BREVE|ACUTE|GRAVE|TILDE|DOT\s+BELOW)\b){2}/) {
6743 0           push @types,'viet';
6744             } elsif (0 <= index(lc '⁊ǷꝥƕǶᵹ', lc $cc) or 0xa730 <= ord $cc and 0xa78b > ord $cc or 0xa7fb <= ord $cc and 0xa7ff >= ord $cc) {
6745 0           push @types,'paleo';
6746             } elsif ($title =~ /(\s+(WITH|AND)\s+((DOUBLE\s+)?\w+(\s+(BELOW|ABOVE))?)\b){2}/) {
6747 0           push @types,'doubleaccent';
6748             }
6749 0 0 0       push @types, ($1 ? 'withSubst' : 'isSubst') if ($expl || '') =~ /\sSubst\{(\S*\}\s+\S)?/;
    0          
6750 0 0         push @types, 'altGrInv' if $aInv;
6751 0 0         my $q = ("@types" =~ /\s/ ? "'" : '');
6752             # ($prefill, $fill) = ("$prefill", "$fill");
6753 0 0         @types = " class=$q@types$q" if @types;
6754 0 0 0       my($T,$OPT) = ($opts && $opts->{ltr} ? ('bdo', ' dir=ltr') : ('span', '')); # Just `span´ does not work in FF15
6755 0 0 0       $c = '†' if $aInv and $cc ne ($base_c || 'N/A'); #  
      0        
6756 0           "<$T$OPT@types$title>$prefill$c$fill"
6757             }
6758            
6759             sub print_table_coverage ($$;$$) {
6760 0   0 0 0   my ($self, $F, $html, $extra_headers) = (shift, shift, shift, shift || '');
6761 0           my $f = $self->{'[file]'};
6762 0 0         $f = (defined $f) ? "file $f" : 'string descriptor';
6763 0           my $v = $self->{VERSION};
6764 0 0         $f .= " version $v" if defined $v;
6765 0 0         $f .= " Unicode tables version $self->{uniVersion}" if defined $self->{uniVersion};
6766 0 0         print <
6767            
6768             "http://www.w3.org/TR/html4/loose.dtd">
6769            
6770            
6771            
6772             $extra_headers
6815            
6816            
6817             [1] />"; $COLS ), next unless $dFace; "; $header\n \n" " if $html; # Do not make RTL chars mix up the order
6818             EOP
6819 0           my($LL, $INV, %s, @d, %access, %docs) = ($self->{faces}{$F}{layers}, $self->{faces}{$F}{'[Flip_AltGr_Key]'});
6820 0 0         $s{$self->charhex2key($INV)}++ if defined $INV; # Skip in reports '
6821 0           my @LL = map $self->{layers}{$_}, @$LL;
6822 0 0 0       $s{$_}++ or push @d, $_ for map @{ $self->{faces}{$F}{"[$_]"} || [] }, qw(dead_array dead_in_VK_array extra_report_DeadChar);
  0            
6823 0           my (@A, %isD2, @Dface, @DfaceKey, %d_seen) = [];
6824 0           my $compK = $self->{faces}{$F}{'[ComposeKeys]'};
6825             #warn 'prefix keys to report: <', join('> <', @d), '>';
6826 0           for my $ddK (@d) {
6827 0           (my $dK = $ddK) =~ s/^\s+//;
6828 0           my $c = $self->key2hex($self->charhex2key($dK));
6829 0 0         next if $d_seen{$c}++;
6830             ($compK->{$c} or warn("??? Skip non-array prefix key `$c' for face `$F', k=`$dK'")), next
6831 0 0 0       unless defined (my $FF = $self->{faces}{$F}{'[deadkeyFace]'}{$c});
6832 0           $access{$FF} = [$self->charhex2key($dK)];
6833 0           push @Dface, $FF;
6834 0           push @DfaceKey, $c;
6835 0           $docs{$FF} = $self->{faces}{$F}{'[prefixDocs]'}{$c}; # and warn "Found docs: face=`$F', c=`$c'\n";
6836 0           push @A, [$self->charhex2key($dK)];
6837             }
6838            
6839 0           my ($lastDface, $prevCol, $COLS, @colOrn, %S, @joinedPairs) = ($#Dface, -1, '', [qw(0 column1)]);
6840 0 0         for my $kk (split /\p{Blank}+\|{3}\p{Blank}+/,
6841             (defined $self->{faces}{$F}{faceDeadKeys} ? $self->{faces}{$F}{faceDeadKeys} : ''), -1) {
6842 0           my $cnt = 0;
6843 0   0       length and $cnt++ for split /\p{Blank}+/, $kk;
6844 0           push @joinedPairs, $cnt;
6845             }
6846 0           pop @joinedPairs;
6847 0           my $done = 0;
6848 0           push @colOrn, [$done += $_, 'endPair'] for @joinedPairs;
6849 0           my @skip_sections;
6850 0           for my $s (values %start_SEC) {
6851 0           $skip_sections[$_]++ for $s->[0]..($s->[0]+$s->[1]-1)
6852             }
6853            
6854 0           for my $reported (1, 0) {
6855 0 0         for my $DD (@{ $self->{faces}{$F}{$reported ? 'LayoutTable_add_double_prefix_keys' : 'faceDeadKeys2'} }) {
  0            
6856 0           (my $dd = $DD) =~ s/^\s+//;
6857             # XXXX BUG in PERL??? This gives 3: DB<4> x scalar (my ($x, $y) = split //, 'ab')
6858 0 0         2 == (my (@D) = split //, $self->stringHEX2string($dd)) or die "Not a double character in LayoutTable_add_double_prefix_keys for `$F': `$DD' -> `", $self->stringHEX2string($dd), "'";
6859 0 0         my $map1 = $self->{faces}{$F}{'[deadkeyFaceHexMap]'}{$self->key2hex($D[0])}
    0          
6860             or ($reported ? die "Can't find prefix key face for `$D[0]' in `$F'" : next); # inverted faces bring havoc
6861 0 0         defined (my $Dead2 = $map1->{$self->key2hex($D[1])}) or die "Can't map `$D[1]' in `$F'+prefix `$D[0]'"; # in hex already
6862 0 0         $Dead2 = $Dead2->[0] if 'ARRAY' eq ref $Dead2;
6863 0 0         defined (my $ddd = $self->{faces}{$F}{'[deadkeyFace]'}{$Dead2}) or die "Can't find prefix key face for `$D[1]' -> `$Dead2' in `$F'+prefix `$D[0]'";
6864 0 0         next if $S{"@D"}++;
6865 0 0         push(@Dface, $ddd), push @DfaceKey, $Dead2 if $reported;
6866 0   0       $access{$ddd} ||= \@D;
6867 0           $docs{$ddd} = $self->{faces}{$F}{'[prefixDocs]'}{$Dead2};
6868 0 0         push @A, \@D if $reported;
6869             # warn "set is_D2: @D";
6870 0           $isD2{$D[0]}{$D[1]}++;
6871             }
6872             }
6873 0 0         push @colOrn, [$lastDface+1, 'pre_ExtraCols'] if $#Dface != $lastDface;
6874 0           for my $orn (@colOrn) {
6875 0           my $skip = $orn->[0] - $prevCol - 1;
6876 0 0         warn("Multiple classes on columns of report unsupported: face=$F, col [@$orn]"), next if $skip < 0;
6877 0           $prevCol = $orn->[0];
6878 0 0         my $many = $skip > 1 ? " span=$skip" : '';
6879 0 0         $skip = $skip > 0 ? "\n " : '';
6880 0           $COLS .= "$skip\n
6881             }
6882 0 0         print <
6883            
6884            
6885             EOP
6886 0   0       my ($k, $first_ctrl, $post_ctrl, @last_in_row) = (-1, map $self->{faces}{$F}{"[$_]"} || 0, qw(start_ctrl end_ctrl));
6887 0 0         $last_in_row[ $k += $_ ]++ for @{ $self->{faces}{$F}{'[geometry]'} || [] };
  0            
6888             #warn 'prefix key faces to report: <', join('> <', @Dface), '>';
6889 0           my @maps = (undef, map $self->{faces}{$F}{'[deadkeyFaceHexMap]'}{$_}, @DfaceKey); # element of Dface may be false if this is non-autonamed AltGr-inverted face
6890 0 0         my $dead = $html ? "\x{2620}" : "\x{2620}";
6891 0 0         my $dead_i = $html ? "\x{2620}" : "\x{2620}";
6892 0           my $header = '';
6893 0           for my $dFace ('', @Dface) { # '' is no-dead
6894 0           my $base_t = 'Characters immediately on keys (without prefix keys); the first two are without/with Shift, two others same, but with added AltGr (excluding the special-key zone)';
6895 0           my $prefix_t = 'After tapping a prefix key, the base keys are replaced by what is in the column of the prefix key';
6896 0 0         $header .= qq( ↓Base Prefix→
6897 0           my @a = map {(my $a = $_) =~ s/^(?=$rxCombining)/\x{25cc}/o; $a } @{ $access{$dFace} };
  0            
  0            
  0            
6898 0           my $docs = $docs{$dFace};
6899 0 0         $docs =~ s/([''&])/sprintf '&#x%02x;', ord $1/ge if $docs;
  0            
6900 0 0         my $withDocs = (defined $docs ? "@a" : "@a");
6901 0           $header .= " $withDocs
6902             }
6903 0 0         print "
6904             if $html;
6905 0           my $vbell = '♪';
6906 0           my $OOut = '';
6907 0           for my $n ( 0 .. $#{ $LL[0] } ) {
  0            
6908 0           my ($out, $out_c, $prev, @KKK, $base_c) = ('', 0, '');
6909 0           my @baseK;
6910 0 0 0       next if $n >= $first_ctrl and $n < $post_ctrl or $skip_sections[$n];
      0        
6911 0           for my $dn (0..@Dface) { # 0 is no-dead
6912 0 0 0       next if $dn and not $maps[$dn];
6913 0 0         $out .= $html ? '' : ($prev =~ /\X{7}/ ? ' ' : "\t") if length $out;
    0          
    0          
6914 0 0         my $is_D2 = $isD2{ @{$A[$dn]} == 1 ? $A[$dn][0] : 'n/a' };
  0            
6915             # warn "is_D2: ", $self->array2string([$dn, $is_D2, $A[$dn], $A[$dn][0]]);
6916 0           my $o = '';
6917 0           for my $L (0..$#$LL) {
6918 0           for my $shift (0..1) {
6919 0           my $c = $LL[$L][$n][$shift];
6920 0           my ($pre, $expl, $C, $expl1, $invert_dead) = ('', '', $c);
6921 0 0         $o .= ' ', next unless defined $c;
6922 0           $out_c++;
6923 0 0 0       $pre = $dead if not $dn and 'ARRAY' eq ref $c and $c->[2];
      0        
6924 0 0         $c = $c->[0] if 'ARRAY' eq ref $c;
6925 0 0         $KKK[$L][$shift] = $c unless $dn;
6926 0           $base_c = $KKK[$L][$shift];
6927             # warn "int_struct -> dead; face `$F', KeyPos=$n, Mods=$L, shift=$shift, ch=$c\n" if $pre;
6928 0 0         if ($dn) {
6929 0           $C = $c = $maps[$dn]{$self->key2hex($c)};
6930 0 0         $c = $vbell unless defined $c;
6931 0 0 0       $invert_dead = (3 == ($c->[2] || 0) || (3 << 3) == ($c->[2] || 0)) if ref $c;
6932 0 0 0       $pre = $invert_dead ? $dead_i : $dead if 'ARRAY' eq ref $c and $c->[2];
    0          
6933 0 0         $c = $c->[0] if 'ARRAY' eq ref $c;
6934 0           $c = $self->charhex2key($c);
6935             } else {
6936             # warn "coverage0_prefix -> dead; face `$F', KeyPos=$n, Mods=$L, shift=$shift, ch=$c\n" if $self->{faces}{$F}{'[coverage0_prefix]'}{$c};
6937 0 0 0       $invert_dead = (3 == ($c->[2] || 0) || (3 << 3) == ($c->[2] || 0)) if ref $c;
6938 0 0 0       $pre = $invert_dead ? $dead_i : $dead if $pre or $self->{faces}{$F}{'[coverage0_prefix]'}{$c};
    0          
6939             }
6940 0 0         $baseK[$L][$shift] = $c unless $dn;
6941 0 0 0       $pre ||= $dead if $dn and $is_D2->{$baseK[$L][$shift]};
      0        
6942            
6943 0 0         if ($html) {
6944 0           $c = $self->char_2_html_span($base_c, $C, $c, $F, {ltr => 1}, 'l');
6945             } else {
6946 0           $c =~ s/(?=$rxCombining)/\x{25cc}/go; # dotted circle ◌ 25CC
6947 0           $c =~ s{([\x00-\x1F\x7F])}{ $self->control2prt("$1") }ge;
  0            
6948             }
6949 0           $c = "$pre$c";
6950 0           $o .= $c;
6951             }
6952             }
6953 0           $o =~ s/ +$//;
6954 0           $prev = $o;
6955 0           $out .= $o;
6956             }
6957 0 0         my $class = $last_in_row[$n] ? ' class=lastKeyInKRow' : '';
6958 0 0         $out = " $out
6959 0 0         $OOut .= "$out\n", print "$out\n" if $out_c;
6960             }
6961 0 0         my @extra = map {(my $s = $_) =~ s/^\s+//; "\n\n

$s"} @{ $self->{faces}{$F}{TableSummaryAddHTML} || [] };

  0            
  0            
  0            
6962 0           my $create_a_c = $self->{faces}{$F}{'[create_alpha_ctrl]'};
6963 0 0         $create_a_c = $create_alpha_ctrl unless defined $create_a_c;
6964 0   0       my $extra_ctrl = ($create_a_c >= 1) && '/[/]/\\';
6965 0   0       $extra_ctrl .= ($create_a_c >= 2) && '/^/_';
6966 0   0       my $more .= ($create_a_c >= 1) && ' Most of Ctrl-letters are omitted from the table; deduce them from reports for C/H/I/J/M/Z.';
6967 0 0         print <
6968            
6969            
6970            
6971             @extra

Highlights (homographs and special needs): zero-width or SOFT HYPHEN: , whitespace: , Vietnamese; other double-accent; paleo-Latin;

6972             or IPA.
6973             Or name having RELATION, PERPENDICULAR,
6974             PARALLEL, DIVIDES, FRACTION SLASH; or BIG, LARGE, N-ARY, CYRILLIC PALOCHKA/DZE/JE/QA/WE/A-IE,
6975             ANO TELEIA, KORONIS, PROSGEGRAMMENI, GREEK QUESTION MARK, SOF PASUQ, PUNCTUATION GERESH/GERSHAYIM; or OPERATOR, SIGN,
6976             SYMBOL, PROOF, EXISTS, FOR ALL, DIVISION, LOGICAL; or AltGr-inverter prefix;
6977             or via a rule involving/exposing a “BlueKey” substitution rule.
6978             (Some browsers fail to show highlights for whitespace/zero-width.)
6979            

Vertical lines separate: the column of the base face, paired

6980             prefix keys with “inverted bindings”, and explicitly selected multi-key prefixes. Horizontal lines separate key rows of
6981             the keyboard (including a fake row with the “left extra key” [one with <> or \\| - it is missing on many keyboards]
6982             and the KP_Decimal key [often marked as . Del on numeric keypad]); the last group is for semi-fake keys for
6983             Enter/C-Enter/Backspace/C-Backspace/Tab and C-Break$extra_ctrl (make sense after prefix keys) and special keys explicitly added
6984             in .kbdd files (usually SPACE).$more
6985            

Hover mouse over any appropriate place to get more information.

6986             In popups: brackets enclose Script, Range, “1st Unicode version with this character”;
6987             braces enclose “the reason why this position was assigned to this character” (VisLr means that a visual table was
6988             used; in Subst{HOW}, L=Layer and F=Face mean that a “BlueKey” substitution rule was defined
6989             via a special layer/face).
6990            
6991            
6992             EOP
6993 0           $OOut
6994             }
6995            
6996             sub coverage_face0 ($$;$) {
6997 0     0 0   my ($self, $F, $after_import, $after) = (shift, shift, shift);
6998 0           my $H = $self->{faces}{$F};
6999 0           my $LL = $H->{layers};
7000 0 0         return $H->{'[coverage0]'} if exists $H->{'[coverage0]'};
7001 0           my (%seen, %seen_prefix, %imported);
7002 0 0         my $d = { %{ $H->{'[DEAD]'} || {} }, %{ $H->{'[dead_in_VK]'} || {} } };
  0 0          
  0            
7003             # warn "coverage0 for `$F'" if $after_import;
7004 0           for my $l (@$LL) {
7005 0           my $L = $self->{layers}{$l};
7006 0           for my $k (@$L) {
7007             warn "Face `$F', layer `$l': coverage check is run too late: after the importation translation is performed"
7008 0 0 0       if not $after_import and $F !~ /^(.*)##Inv#([a-f0-9]{4,})$/is and grep {defined and ref and $_->[4]} @$k;
  0 0 0        
      0        
7009 0 0 0       $seen{ref() ? $_->[0] : $_}++ for grep {defined and !(ref and $_->[2]) and !$d->{ref() ? $_->[0] : $_}} @$k;
  0 0 0        
    0          
7010 0 0 0       $seen_prefix{ref() ? $_->[0] : $_}++ for grep {defined and (ref and $_->[2] or $d->{ref() ? $_->[0] : $_})} @$k;
  0 0 0        
    0          
7011 0 0 0       $imported{"$_->[0]:$_->[1]"}++ for grep {defined and ref and 2 == ($_->[2] || 0)} @$k; # exportable
  0   0        
7012             }
7013 0 0         unless ($after++) {
7014 0           $H->{'[layer0coverage0]'} = [sort keys %seen];
7015             }
7016             }
7017 0           $H->{'[coverage0_prefix]'} = \%seen_prefix;
7018 0           $H->{'[coverage0]'} = [sort keys %seen];
7019 0 0         $H->{'[coverage00]'} = [grep { 2>length and 0x10000 > ord } @{$H->{'[coverage0]'}}];
  0            
  0            
7020 0   0       $H->{'[coverage0+]'} = [grep {!(2>length and 0x10000 > ord)} @{$H->{'[coverage0]'}}];
  0            
  0            
7021 0           $H->{'[coverage00+]'} = [grep { 2>length } @{$H->{'[coverage0]'}}];
  0            
  0            
7022 0           $H->{'[coverage00++]'} = [grep { 1{'[coverage0]'}}];
  0            
  0            
7023 0           $H->{'[imported]'} = [sort keys %imported];
7024 0           $H->{'[coverage00hash]'} = { map { ($_, 1) } @{ $H->{'[coverage00]'} } };
  0            
  0            
7025 0           $H->{'[coverage0]'};
7026             }
7027            
7028             # %imported is analysed: if manual deadkey is specified, this value is used, otherwised new value is generated and rememebered.
7029             # (but is not put in the keymap???]
7030             sub massage_imported ($$) {
7031 0     0 0   my ($self, $f) = (shift, shift);
7032 0 0         return unless my ($F, $KKK) = $f =~ /^(.*)###([a-f0-9]{4,})$/is;
7033 0           my $H = $self->{faces}{$F};
7034 0 0         for my $i ( @{ $self->{faces}{$f}{'[imported]'} || [] } ) {
  0            
7035 0 0         my($k,$face) = $i =~ /^(.):(.*)/s or die "Unrecognized imported: `$i'";
7036 0           my $K;
7037 0 0 0       if (exists $H->{'[imported2key]'}{$i} or exists $H->{'[imported2key_auto]'}{$i}) {
    0 0        
7038 0 0         $K = exists $H->{'[imported2key]'}{$i} ? $H->{'[imported2key]'}{$i} : $H->{'[imported2key_auto]'}{$i};
7039             } elsif ($H->{'[coverage0_prefix]'}{$k} or $H->{'[auto_dead]'}{$k}) { # it is already used
7040             # Assign a fake prefix key to imported map
7041             warn("Imported prefix keys exist, but Auto_Diacritic_Start is not defined in face `$F'"), return
7042 0 0         unless defined $H->{'[first_auto_dead]'};
7043 0           $K = $H->{'[imported2key_auto]'}{$i} = $self->next_auto_dead($H);
7044             } else { # preserve the prefix key
7045 0           $K = $H->{'[imported2key_auto]'}{$i} = $k;
7046 0           $H->{'[auto_dead]'}{$k}++;
7047             }
7048 0 0         my $LL = $self->{faces}{$face}{'[deadkeyLayers]'}{$self->key2hex($k)}
7049             or die "Cannot import a deadkey `$k' from `$face'";
7050 0           $LL = [@$LL]; # Deep copy, so may override
7051 0           my $KK = $self->key2hex($K);
7052 0 0         if (my $over = $H->{'[AdddeadkeyLayers]'}{$KK}) {
7053             #warn "face `$F': additional bindings for deadkey $KK exist.\n";
7054 0           $LL = [$self->make_translated_layers_stack($over, $LL)];
7055             }
7056 0           $H->{'[imported2key_all]'}{"$k:$face"} = $self->charhex2key($KK);
7057 0           $H->{'[deadkeyLayers]'}{$KK} = $LL;
7058 0           my $new_facename = "$F#\@#\@#\@$i";
7059 0           $self->{faces}{$new_facename}{layers} = $LL;
7060 0           $H->{'[deadkeyFace]'}{$KK} = $new_facename;
7061 0           $self->link_layers($F, $new_facename, 'skipfix', 'no-slot-warn');
7062            
7063 0           $self->coverage_face0($new_facename);
7064             }
7065             }
7066            
7067             sub massage_imported2 ($$) {
7068 0     0 0   my ($self, $f) = (shift, shift);
7069 0           warn "... Importing into face=`$f" if debug_import;
7070 0 0         return unless my ($F, $KKK) = ($f =~ /^(.*)###([a-f0-9]{4,})$/is); # what about multiple prefixes???
7071 0 0         return unless my $HH = $self->{faces}{$F}{'[imported2key_all]'};
7072 0           my $H = $self->{faces}{$f};
7073 0           warn "Importing into face=`$F' prefix=$KKK" if debug_import;
7074 0           my $LL = $H->{layers};
7075 0           my @unresolved;
7076 0           for my $l (@$LL) {
7077 0           my $L = $self->{layers}{$l};
7078 0           for my $k (@$L) {
7079 0 0 0       for my $kk (grep {defined and ref and $_->[2]} @$k) { # exportable
  0            
7080 0           $kk = [@$kk]; # deep copy
7081 0 0         if (2 == $kk->[2]) { # exportable
7082 0 0         my $v = (defined $kk->[4] ? $kk->[4] : $kk->[0]);
7083 0           my $j = $HH->{"$v:$kk->[1]"};
7084             # push(@unresolved, "$v:$kk->[1]"),
7085 0 0         warn "Can't resolve `$v:$kk->[1]' to an imported dead key, face=`$F' prefix=$KKK; layer=$l"
7086             unless defined $j;
7087 0           warn "Importing `$v:$kk->[1]' as `$j', face=`$F' prefix=$KKK; layer=$l" if debug_import;
7088 0           @$kk[0,4] = ($j, $v);
7089             } else {
7090             #warn "massage_imported2: shift $kk->[2] <<= 3 key `$kk->[0]' face `$f' layer `$l'\n" if $kk->[2] >> 3;
7091 0           $kk->[2] >>= 3; # ByPairs makes <<= 3 !
7092             }
7093             }
7094             }
7095             }
7096 0           delete $self->{faces}{$f}{'[coverage0]'};
7097 0           $self->coverage_face0($f, 'after_import'); # recalculate
7098             # $H->{'[unresolved_imported]'} = \@unresolved if @unresolved;
7099             }
7100            
7101             sub massage_char_substitutions($$) { # Read $self->{Substitutions}
7102 0     0 0   my($self, $data) = (shift, shift);
7103 0 0         die "Too late to load char substitutions" if $self->{Compositions};
7104 0 0         for my $K (keys %{ $data->{Substitutions} || {}}) {
  0            
7105 0           my $arr = $data->{Substitutions}{$K};
7106 0           for my $S (@$arr) {
7107 0           my $s = $self->stringHEX2string($S);
7108 0           $s =~ s/\p{Blank}(?=\p{NonspacingMark})//g;
7109 0 0         die "Expect 2 chars in substitution rule; I see <$s> (from <$S>)" unless 2 == (my @s = split //, $s);
7110 0           $self->{'[Substitutions]'}{""}{$s[0]} = [[0, $s[1]]]; # Format as in Compositions
7111 0 0 0       $self->{'[Substitutions]'}{""}{lc $s[0]} = [[0, lc $s[1]]]
7112             if lc $s[0] ne $s[0] and lc $s[1] ne $s[1];
7113 0 0 0       $self->{'[Substitutions]'}{""}{uc $s[0]} = [[0, uc $s[1]]]
7114             if uc $s[0] ne $s[0] and uc $s[1] ne $s[1];
7115             }
7116             }
7117             }
7118            
7119             sub new_from_configfile ($$) {
7120 0     0 0   my ($class, $F) = (shift, shift);
7121 0 0         open my $f, '< :utf8', $F or die "Can't open `$F' for read: $!";
7122 0           my $s = do {local $/; <$f>};
  0            
  0            
7123 0 0         close $f or die "Can't close `$F' for read: $!";
7124             #warn "Got `$s'";
7125 0           my $self = $class->new_from_configfile_string($s);
7126 0           $self->{'[file]'} = $F;
7127 0           $self;
7128             }
7129            
7130             sub new_from_configfile_string ($$) {
7131 0     0 0   my ($class, $ss) = (shift, shift);
7132 0 0         die "too many arguments to UI::KeyboardLayout->new_from_configfile" if @_;
7133 0           my $data = $class->parse_configfile($ss);
7134             # Dumpvalue->new()->dumpValue($data);
7135 0           my ($layers, $counts, $offsets) = $class->fill_kbd_layers($data);
7136 0           @{$data->{layers}}{keys %$layers} = values %$layers;
  0            
7137 0           @{$data->{layer_counts} }{keys %$counts} = values %$counts;
  0            
7138 0           @{$data->{layer_offsets}}{keys %$offsets} = values %$offsets;
  0            
7139 0   0       $data = bless $data, (ref $class or $class);
7140 0           $data->massage_hash_values;
7141 0           $data->massage_diacritics; # Read $self->{Diacritics}
7142 0           $data->massage_char_substitutions($data); # Read $self->{Substitutions}
7143 0           $data->massage_faces;
7144            
7145 0           $data->massage_deadkeys_win($data); # Process (embedded) MSKLC-style deadkey maps
7146 0           $data->scan_for_DeadKey_Maps(); # Makes a direct-access synonym, scan for DeadKey_Maps* keys
7147 0           $data->create_DeadKey_Maps();
7148 0           $data->create_composite_layers; # Needs to be after simple deadkey maps are known
7149            
7150 0           for my $F (keys %{ $data->{faces} }) {
  0            
7151 0 0 0       next if 'HASH' ne ref $data->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
7152 0           $data->coverage_face0($F); # creates coverage0, imported array (c0 excludes diacritics), coverage0_prefix hash
7153             }
7154 0           for my $F (keys %{ $data->{faces} }) {
  0            
7155 0 0 0       next if 'HASH' ne ref $data->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
7156 0           $data->massage_imported($F); # calc new values for imported prefix keys, augments imported maps with Add-maps
7157             }
7158 0           for my $F (keys %{ $data->{faces} }) {
  0            
7159 0 0 0       next if 'HASH' ne ref $data->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
7160 0           $data->massage_imported2($F); # changes imported prefix keys to appropriate values for the target personality
7161             }
7162 0           $data->create_prefix_chains;
7163 0           $data->create_inverted_faces;
7164 0           $data->link_composite_layers; # Needs to be after imported keys are reassigned...
7165 0           for my $F (keys %{ $data->{faces} }) { # Fine-tune inverted-AltGr faces
  0            
7166 0 0 0       next if 'HASH' ne ref $data->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
7167 0 0         next if $F =~ /#\@?#\@?(Inv)?#\@?/; # Face-on-a-deadkey
7168            
7169 0           my $D = $data->{faces}{$F}{'[deadkeyFace]'};
7170 0           my $Ex = $data->{faces}{$F}{'[AltGr_Invert_Show]'};
7171 0           for my $d (keys %$D) {
7172 0           $data->{faces}{$F}{'[deadkeyFaceHexMap]'}{$d} = $data->linked_faces_2_hex_map($F, $D->{$d});
7173 0 0         defined (my $auto_inv_AltGr = $data->{faces}{$F}{'[deadkeyInvAltGrKey]'}{$d}) or next;
7174 0           my $b1 = $data->{faces}{$F}{'[deadkeyFaceInvAltGr]'}{my $a = $data->charhex2key($auto_inv_AltGr)};
7175 0 0         $data->{faces}{$F}{'[deadkeyFaceHexMapInv]'}{$d} = $data->linked_faces_2_hex_map($F, $b1) if $b1;
7176 0           my $D = $data->{faces}{$F}{'[prefixDocs]'}{$d};
7177 0 0         $data->{faces}{$F}{'[prefixDocs]'}{$data->key2hex($a)} = 'AltGr-inverted: ' . (defined $D ? $D : "[[$d]]");
7178 0           my $S = $data->{faces}{$F}{'[Show]'}{$d};
7179 0 0         $data->{faces}{$F}{'[Show]'}{$data->key2hex($a)} = (defined $S ? $S : $data->charhex2key($d)) . $Ex;
7180             }
7181            
7182 0           my($flip_AltGr, @protect_chr) = $data->{faces}{$F}{'[Flip_AltGr_Key]'}; # Who put it into deadkeyFace???
7183 0 0         if (defined $flip_AltGr) {
7184 0           $flip_AltGr = $data->key2hex($data->charhex2key($flip_AltGr));
7185 0           push @protect_chr, $flip_AltGr;
7186             $data->{faces}{$F}{'[prefixDocs]'}{$flip_AltGr} = 'AltGr-inverted base face'
7187 0 0         unless defined $data->{faces}{$F}{'[prefixDocs]'}{$flip_AltGr};
7188 0 0         $data->{faces}{$F}{'[Show]'}{$flip_AltGr} = $Ex unless defined $data->{faces}{$F}{'[Show]'}{$flip_AltGr};
7189             }
7190 0   0       my $expl = $data->{faces}{$F}{'[Explicit_AltGr_Invert]'} || [];
7191 0           for my $i (1..(@$expl/2)) {
7192 0           my @C = map $data->key2hex($expl->[2*$i + $_]), -2, -1;
7193 0           push @protect_chr, $C[1];
7194 0           my $D = $data->{faces}{$F}{'[prefixDocs]'}{$C[0]};
7195 0 0         $data->{faces}{$F}{'[prefixDocs]'}{$C[1]} = 'AltGr-inverted: ' . (defined $D ? $D : "[[$C[0]]]");
7196 0           my $S = $data->{faces}{$F}{'[Show]'}{$C[0]};
7197 0 0         $data->{faces}{$F}{'[Show]'}{$C[1]} = (defined $S ? $S : $data->charhex2key($C[0])) . $Ex;
7198             }
7199 0           $data->{faces}{$F}{'[auto_dead]'}{ord $data->charhex2key($_)}++ for @protect_chr;
7200             # warn " Keys HexMap: ", join ', ', sort keys %{$data->{faces}{$F}{'[deadkeyFaceHexMap]'}};
7201             }
7202            
7203 0           for my $F (keys %{ $data->{faces} }) { # Finally, collect the stats
  0            
7204 0 0 0       next if 'HASH' ne ref $data->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
7205 0 0         next if $F =~ /#\@?#\@?(Inv)?#\@?/; # Face-on-a-deadkey
7206 0           my %seenExtra;
7207 0 0         my @extras = ( "@{ $data->{faces}{$F}{'[output_layers]'} || [''] }" =~ /\bprefix(?:\w*)=([0-9a-fA-F]{4,6}\b|.(?![^ ]))/g );
  0            
7208 0           my %is_extra = map { ($data->charhex2key($_), 1) } @extras; # extra layers (on bizarre modifiers)
  0            
7209 0           for my $deadKEY ( sort keys %{ $data->{faces}{$F}{'[deadkeyFace]'}} ) {
  0            
7210 0           my $deadKey = $data->charhex2key($deadKEY);
7211 0 0         next unless $is_extra{$deadKey};
7212 0           my $FFF = $data->{faces}{$F}{'[deadkeyFace]'}{$deadKEY};
7213 0 0         my $cov1 = $data->{faces}{$FFF}{'[coverage0]'} # XXXX not layer0coverage0 - may slide down to layer0
7214             or warn("Deadkey `$deadKey' on face `$F' -> unmassaged face"), next;
7215             $seenExtra{$_}++
7216 0 0 0       for map {ref() ? $_->[0] : $_} grep !(ref and $_->[2]), @$cov1; # Skip 2nd level deadkeys
  0            
7217             }
7218 0           $data->{faces}{$F}{'[coverageExtra]'} = \%seenExtra;
7219            
7220 0 0         next unless my $prefix = $data->{faces}{$F}{'[ComposeKey]'};
7221 0           $data->auto_dead_can_wrap($F); # All manual deadkeys are set, so auto may be flexible
7222 0           $data->create_composekey($F, $prefix);
7223             }
7224            
7225 0           for my $F (keys %{ $data->{faces} }) { # Finally, collect the stats
  0            
7226 0 0 0       next if 'HASH' ne ref $data->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
7227 0 0         next if $F =~ /#\@?#\@?(Inv)?#\@?/; # Face-on-a-deadkey
7228 0           my($seen_prefix, %seen0, %seen00, %seen1, %seen1only, %seenExtra) = $data->{faces}{$F}{'[coverage0_prefix]'};
7229             # warn("Face `$F' has no [deadkeyFace]"),
7230 0 0         next unless $data->{faces}{$F}{'[deadkeyFace]'};
7231             # next;
7232 0           my (%check_later, %coverage1_prefix);
7233             # warn "...... face `$F',\tprefixes0 ", keys %$seen_prefix;
7234             # $seen_prefix = {%$seen_prefix}; # Deep copy
7235             # $seen_prefix->{$_}++ for @{ $data->{faces}{$F}{'[dead_in_VK_array]'} || [] };
7236 0 0         my @extras = ( "@{ $data->{faces}{$F}{'[output_layers]'} || [''] }" =~ /\bprefix(?:\w*)=([0-9a-fA-F]{4,6}\b|.(?![^ ]))/g );
  0            
7237 0           my %is_extra = map { ($data->charhex2key($_), 1) } @extras; # extra layers (on bizarre modifiers)
  0            
7238 0           for my $deadKEY ( sort keys %{ $data->{faces}{$F}{'[deadkeyFace]'}} ) {
  0            
7239 0 0         unless (%seen0) { # Do not calculate if $F has no deadkeys...
7240 0           $seen0{$_}++ for @{ $data->{faces}{$F}{'[coverage00]'} };
  0            
7241 0           %seen00 = %seen0;
7242             }
7243             ### XXXXX Directly linked faces may have some chars unreachable via the switch-prefixKey
7244 0           my ($deadKey, $not_in_0) = $data->charhex2key($deadKEY);
7245             # It does not make sense to not include it into the summary: 0483 on US is such...
7246 0 0         $not_in_0++, $check_later{$deadKey}++ unless $seen_prefix->{$deadKey}; # For multi-prefix maps, and extra layers
7247 0           my ($FFF, @dd2) = $data->{faces}{$F}{'[deadkeyFace]'}{$deadKEY};
7248 0 0         my $cov1 = $data->{faces}{$FFF}{$is_extra{$deadKey} ? '[coverage0]' : '[coverage00]'} # XXXX not layer0coverage0 - may slide down to layer0
    0          
7249             or warn("Deadkey `$deadKey' on face `$F' -> unmassaged face"), next;
7250             ($seen0{$_}++ or $seen1{$_}++),
7251             ($not_in_0 and not $is_extra{$deadKey}) || $seen00{$_} || $seen1only{$_}++, # Only for multi-prefix maps
7252             $is_extra{$deadKey} && $seenExtra{$_}++ # Only for extra modifiers maps
7253 0 0 0       for map {ref() ? $_->[0] : $_} grep !(ref and $_->[2]), @$cov1; # Skip 2nd level deadkeys
  0   0        
      0        
      0        
7254 0 0         if (my $d2 = $data->{faces}{$F}{'[dead2]'}{$deadKey}) {
7255 0           my $map = $data->linked_faces_2_hex_map($F, $FFF);
7256             # warn "linked map (face=$F) = ", keys %$d2;
7257 0 0 0       @dd2 = map $data->charhex2key($_), map {($_ and ref $_) ? $_->[0] : $_} map $map->{$data->key2hex($_)}, keys %$d2;
  0            
7258             # warn "sub-D2 (face=$F) = ", @dd2;
7259             }
7260             #warn "2nd level prefixes for `$deadKey': ", keys %{$data->{faces}{$FFF}{'[coverage0_prefix]'} || {}};
7261             #warn "2nd level prefixes for `$deadKey': <@dd2> ", keys %{$data->{faces}{$F}{'[dead2]'}{$deadKey} || {}};
7262 0 0         unless ($not_in_0) {
7263             # warn "sub-cov0 (face=$F) = ", keys %{ $data->{faces}{$FFF}{'[coverage0_prefix]'} || {} };
7264 0 0         $coverage1_prefix{$_}++ for keys %{ $data->{faces}{$FFF}{'[coverage0_prefix]'} || {} };
  0            
7265             # warn "sub-D2 (face=$F) = ", @dd2;
7266 0           $coverage1_prefix{$_}++ for @dd2;
7267             }
7268             # warn "...... deadkey `$deadKey' reached0 in face `$F'" unless $not_in_0;
7269             }
7270            
7271 0   0       my @check = grep { !$coverage1_prefix{$_} and !$is_extra{$_} } keys %check_later;
  0            
7272 0 0         my @only_extra = grep { !$coverage1_prefix{$_} and $is_extra{$_} } keys %check_later;
  0            
7273 0           $data->{faces}{$F}{'[only_extra]'} = { map {($_, 1)} @only_extra };
  0            
7274            
7275 0 0         my $_s = (@check > 1 ? 's' : '');
7276 0 0         warn("Prefix key$_s <@check> not reached (without double prefix keys?) in face `$F'; later=", keys %check_later, " ; cov1=", keys %coverage1_prefix) if @check;
7277 0           $data->{faces}{$F}{'[coverage1]'} = [sort keys %seen1];
7278 0           $data->{faces}{$F}{'[coverage1only]'} = [sort keys %seen1only];
7279 0           $data->{faces}{$F}{'[coverage1only_hash]'} = \%seen1only;
7280 0           $data->{faces}{$F}{'[coverage_hash]'} = \%seen0;
7281 0           $data->{faces}{$F}{'[coverageExtra]'} = \%seenExtra;
7282             }
7283             $data
7284 0           }
7285            
7286             sub massage_deadkeys_win ($$) {
7287 0     0 0   my($self, $h, @process, @to) = (shift, shift);
7288 0           my @K = grep m(^\[unparsed]/DEADKEYS\b), @{$h->{'[keys]'}};
  0            
7289             # warn "Found deadkey sections `@K'";
7290             # my $H = $h->{'[unparsed]'};
7291 0           for my $k (@K) {
7292 0           push @process, $self->get_deep($h, (split m(/), $k), 'unparsed_data');
7293 0           (my $k1 = $k) =~ s(^\[unparsed]/)();
7294 0           push @to, $k1
7295             }
7296 0           @K = grep m(^DEADKEYS\b), @{$h->{'[keys]'}};
  0            
7297 0           for my $k (@K) {
7298 0           my $slot = $self->get_deep($h, split m(/), $k);
7299 0 0         next unless exists $slot->{klc_filename};
7300             open my $fh, '< :encoding(UTF-16)', $slot->{klc_filename}
7301 0 0         or die "open of =`$slot->{klc_filename}' failed: $!";
7302 0           local $/;
7303 0           my $in = <$fh>;
7304 0           push @process, $in;
7305 0           push @to, $k;
7306             }
7307 0           for my $k1 (@to) {
7308             #warn "DK sec `$k' -> `$v', <", join('> <', keys %{$h->{'[unparsed]'}{DEADKEYS}{la_ru}}), ">";
7309             #warn "DK sec `$k' -> `$v', <$h->{'[unparsed]'}{DEADKEYS}{la_ru}{unparsed_data}>";
7310 0           my $v = shift @process;
7311 0           my($o,$d,$t) = $self->read_deadkeys_win($v); # Translation tables, names, rest of input
7312 0           my (@parts, @h) = split m(/), $k1;
7313 0           my %seen = (%$o, %$d);
7314 0           for my $kk (keys %seen) {
7315             #warn "DK sec `$k1', deadkey `$kk'. Map: ", $self->array2string( [%{$o->{$kk} || {}}] );
7316 0           my $slot = $self->get_deep($h, @parts, $kk);
7317             warn "Deadkey `$kk' defined for `$k1' conflicts with previous definition"
7318 0 0 0       if $slot and grep exists $slot->{$_}, qw(map name);
7319 0 0         $self->put_deep($h, $o->{$kk}, @parts, $kk, 'map') if exists $o->{$kk};
7320 0 0         $self->put_deep($h, $d->{$kk}, @parts, $kk, 'name') if exists $d->{$kk};
7321             }
7322             }
7323             $self
7324 0           }
7325            
7326             # http://bepo.fr/wiki/Pilote_Windows
7327             # http://www.phon.ucl.ac.uk/home/wells/dia/diacritics-revised.htm#two
7328             # http://msdn.microsoft.com/en-us/library/windows/desktop/ms646280%28v=vs.85%29.aspx
7329            
7330 1     1   9918 my %oem_keys = do {{ no warnings 'qw' ; reverse (qw(
  1         3  
  1         15208  
7331             OEM_MINUS -
7332             OEM_PLUS =
7333             OEM_4 [
7334             OEM_6 ]
7335             OEM_1 ;
7336             OEM_7 '
7337             OEM_3 `
7338             OEM_5 \
7339             OEM_COMMA ,
7340             OEM_PERIOD .
7341             OEM_2 /
7342             OEM_102 \#
7343             SPACE #
7344             DECIMAL .#
7345             DECIMAL ,#
7346             ABNT_C1 /#
7347             ABNT_C1 ¥
7348             ABNT_C1 ¦
7349             )) }}; #'# Here # marks "second occurence" of keys...
7350             # Extra bindings: see http://www.fysh.org/~zefram/keyboard/xt_scancodes.txt (after “===”)
7351             # e005 Messenger (or Files); e007 Redo; e008 undo; e009 ApplicationLeft; e00a Paste;
7352             # e00b,e011,e012,e01f ScrollWheel-to-key-emulation
7353             # e013 Word; e014 Excel; e015 Calendar; e016 Log Off; e017 Cut; e018 Copy; e01e ApplicationRight
7354             # e03b -- e044 (Microsoft/Logitech Fkeys_without_Flock, F1...F10)
7355             # e063 Wake; e064 My Pictures [or Keypad-) ]
7356             # For type 4 of keyboard (same as types 1,3, except OEM_AX, (NON)CONVERT, ABNT_C1)
7357             # except KANA,(NON)CONVERT,; scancode of YEN,| for OEM_8 is our invention; after OEM_8 all is junk (non-scancodes???)...
7358             my %scan_codes = (reverse qw(
7359             02 1
7360             03 2
7361             04 3
7362             05 4
7363             06 5
7364             07 6
7365             08 7
7366             09 8
7367             0a 9
7368             0b 0
7369             0c OEM_MINUS
7370             0d OEM_PLUS
7371             10 Q
7372             11 W
7373             12 E
7374             13 R
7375             14 T
7376             15 Y
7377             16 U
7378             17 I
7379             18 O
7380             19 P
7381             1a OEM_4
7382             1b OEM_6
7383             1e A
7384             1f S
7385             20 D
7386             21 F
7387             22 G
7388             23 H
7389             24 J
7390             25 K
7391             26 L
7392             27 OEM_1
7393             28 OEM_7
7394             29 OEM_3
7395             2b OEM_5
7396             2c Z
7397             2d X
7398             2e C
7399             2f V
7400             30 B
7401             31 N
7402             32 M
7403             33 OEM_COMMA
7404             34 OEM_PERIOD
7405             35 OEM_2
7406             39 SPACE
7407             56 OEM_102
7408             53 DECIMAL
7409            
7410             01 ESCAPE
7411             0C OEM_MINUS
7412             0D OEM_PLUS
7413             0E BACK
7414             0F TAB
7415             1A OEM_4
7416             1B OEM_6
7417             1C RETURN
7418             1D LCONTROL
7419             27 OEM_1
7420             28 OEM_7
7421             29 OEM_3
7422             2A LSHIFT
7423             2B OEM_5
7424             33 OEM_COMMA
7425             34 OEM_PERIOD
7426             35 OEM_2
7427             36 RSHIFT
7428             37 MULTIPLY
7429             38 LMENU
7430             3A CAPITAL
7431             3B F1
7432             3C F2
7433             3D F3
7434             3E F4
7435             3F F5
7436             40 F6
7437             41 F7
7438             42 F8
7439             43 F9
7440             44 F10
7441             45 NUMLOCK
7442             46 SCROLL
7443             47 HOME
7444             48 UP
7445             49 PRIOR
7446             4A SUBTRACT
7447             4B LEFT
7448             4C CLEAR
7449             4D RIGHT
7450             4E ADD
7451             4F END
7452             50 DOWN
7453             51 NEXT
7454             52 INSERT
7455             e053 DELETE
7456             54 SNAPSHOT
7457             56 OEM_102
7458             57 F11
7459             58 F12
7460             59 CLEAR
7461             5A OEM_WSCTRL
7462             5B OEM_FINISH
7463             5C OEM_JUMP
7464             5C OEM_AX
7465             5D EREOF
7466             5E OEM_BACKTAB
7467             5F OEM_AUTO
7468             62 ZOOM
7469             63 HELP
7470             64 F13
7471             65 F14
7472             66 F15
7473             67 F16
7474             68 F17
7475             69 F18
7476             6A F19
7477             6B F20
7478             6C F21
7479             6D F22
7480             6E F23
7481             6F OEM_PA3
7482             70 KANA
7483             71 OEM_RESET
7484             73 ABNT_C1
7485             76 F24
7486             79 CONVERT
7487             7B NONCONVERT
7488             7B OEM_PA1
7489             7C TAB
7490             7E ABNT_C2
7491             7F OEM_PA2
7492             e010 MEDIA_PREV_TRACK
7493             e019 MEDIA_NEXT_TRACK
7494             e01C RETURN
7495             e01D RCONTROL
7496             e020 VOLUME_MUTE
7497             e021 LAUNCH_APP2
7498             e022 MEDIA_PLAY_PAUSE
7499             e024 MEDIA_STOP
7500             e02E VOLUME_DOWN
7501             e030 VOLUME_UP
7502             e032 BROWSER_HOME
7503             e035 DIVIDE
7504             e037 SNAPSHOT
7505             e038 RMENU
7506             e046 CANCEL
7507             e047 HOME
7508             e048 UP
7509             e049 PRIOR
7510             e04B LEFT
7511             e04D RIGHT
7512             e04F END
7513             e050 DOWN
7514             e051 NEXT
7515             e052 INSERT
7516             e053 DELETE
7517             e05B LWIN
7518             e05C RWIN
7519             e05D APPS
7520             e05E POWER
7521             e05F SLEEP
7522             e065 BROWSER_SEARCH
7523             e066 BROWSER_FAVORITES
7524             e067 BROWSER_REFRESH
7525             e068 BROWSER_STOP
7526             e069 BROWSER_FORWARD
7527             e06A BROWSER_BACK
7528             e06B LAUNCH_APP1
7529             e06C LAUNCH_MAIL
7530             e06D LAUNCH_MEDIA_SELECT
7531             e11D PAUSE
7532            
7533             7D OEM_8
7534            
7535             10 SHIFT
7536             11 CONTROL
7537             12 MENU
7538             15 KANA
7539             15 HANGUL
7540             17 JUNJA
7541             18 FINAL
7542             19 HANJA
7543             19 KANJI
7544             1C CONVERT
7545             1D NONCONVERT
7546             1E ACCEPT
7547             1F MODECHANGE
7548             29 SELECT
7549             2A PRINT
7550             2B EXECUTE
7551            
7552             60 NUMPAD0
7553             61 NUMPAD1
7554             62 NUMPAD2
7555             63 NUMPAD3
7556             64 NUMPAD4
7557             65 NUMPAD5
7558             66 NUMPAD6
7559             67 NUMPAD7
7560             68 NUMPAD8
7561             69 NUMPAD9
7562             6C SEPARATOR
7563             B4 MEDIA_LAUNCH_MAIL
7564             B5 MEDIA_LAUNCH_MEDIA_SELECT
7565             B6 MEDIA_LAUNCH_APP1
7566             B7 MEDIA_LAUNCH_APP2
7567            
7568             E5 PROCESSKEY
7569             E7 PACKET
7570             F6 ATTN
7571             F7 CRSEL
7572             F8 EXSEL
7573             FA PLAY
7574             FC NONAME
7575             FD PA1
7576             FE OEM_CLEAR
7577            
7578             )); # http://www.opensource.apple.com/source/WebCore/WebCore-1C25/platform/gdk/KeyboardCodes.h
7579             # the part after PAUSE is junk...
7580            
7581             # [ ] \ space
7582             my %oem_control = (qw(
7583             OEM_4 [001b
7584             OEM_6 ]001d
7585             OEM_5 \001c
7586             SPACE 0020
7587             OEM_102 \001c
7588             )); # In ru layouts, only entries which match the char are present
7589             my %do_control = map /^(.)(.+)/, values %oem_control;
7590             $do_control{' '} = '0020';
7591             delete $do_control{0};
7592            
7593             my %default_bind = ( (map {( "NUMPAD$_" => [[$_]] )} 0..9 ),
7594             TAB => [["\t", "\t"]],
7595             ADD => [["+", "+"]],
7596             SUBTRACT => [["-", "-"]],
7597             MULTIPLY => [["*", "*"]],
7598             DIVIDE => [["/", "/"]],
7599             RETURN => [["\r", "\r"], ["\n"]],
7600             BACK => [["\b", "\b"], ["\x7f"]],
7601             ESCAPE => [["\e", "\e"], ["\e"]],
7602             CANCEL => [["\cC", "\cC"], ["\cC"]],
7603             );
7604            
7605             sub get_VK ($$) {
7606 0     0 0   my ($self, $f) = (shift, shift);
7607 0 0         $self->get_deep_via_parents($self, undef, 'faces', (split m(/), $f), 'VK') || {}
7608             # $self->{faces}{$f}{VK} || {}
7609             }
7610            
7611             sub massage_VK ($$) {
7612 0     0 0   my ($self, $f, %seen, %seen_dead, @dead, @ctrl) = (shift, shift);
7613 0           my $l0 = $self->{faces}{$f}{layers}[0];
7614 0           $self->{faces}{$f}{'[non_VK]'} = @{ $self->{layers}{$l0} };
  0            
7615 0           my $create_a_c = $self->{faces}{$f}{'[create_alpha_ctrl]'};
7616 0 0         $create_a_c = $create_alpha_ctrl unless defined $create_a_c;
7617 0 0         my $EXTR = [ ["\r","\n"], ["\b","\x7F"], ["\t","\cC"], ["\x1b","\x1d"], # Enter/C-Enter/Bsp/C-Bsp/Tab/Cancel/Esc=C-[/C-]
    0          
7618             ["\x1c", ($create_a_c ? "\cZ" : ())], ($create_a_c>1 ? (["\x1e", "\x1f"], ["\x00"]) : ())]; # C-\ C-z, C-^ C-_
7619 0 0         if ($create_a_c) {
7620 0           my %s;
7621 0           push @ctrl, scalar @$EXTR;
7622 0           $s{$_}++ for $self->flatten_arrays($EXTR);
7623 0           my @ctrl_l = grep !$s{$_}, map chr($_), 1..26;
7624 0           push @$EXTR, [shift @ctrl_l, shift @ctrl_l] while @ctrl_l > 1;
7625 0 0         push @$EXTR, [@ctrl_l] if @ctrl_l;
7626 0           push @ctrl, scalar @$EXTR;
7627             }
7628 0           my @extra = ( $EXTR, map [([]) x @$EXTR], 1..$#{ $self->{faces}{$f}{layers} } );
  0            
7629 0           my $VK = $self->get_VK($f);
7630 0           $self->{faces}{$f}{'[VK_off]'} = \ my %VK_off;
7631 0           for my $K (sort keys %$VK) {
7632 0           my ($v, @C) = $VK->{$K};
7633 0 0 0       $v->[0] = $scan_codes{$K} or die("Can't find the scancode for the VK key `$K'")
7634             unless length $v->[0];
7635             # warn 'Key: <', join('> <', @$v), '>';
7636 0           my $c = 0;
7637 0           $VK_off{$K} = @{ $extra[0] }; # Where in the layouts is the VK key
  0            
7638 0           for my $k (@$v[1..$#$v]) {
7639 0 0         ($k, my $dead) = ($k =~ /^(.+?)(\@?)$/) or die "Empty key in VK list";
7640 0 0         $seen{$k eq '-1' ? '' : ($k = $self->charhex2key($k))}++;
7641 0 0 0       $seen_dead{$k}++ or push @dead, $k if $dead and $k ne '-1';
      0        
7642 0 0         my $kk = ($k eq '-1' ? undef : $k);
7643 0 0         push @{ $extra[int($c/2)] }, [] unless $c % 2;
  0            
7644 0 0         push @{ $extra[int($c/2)][-1] }, ($dead ? [$kk, undef, 1] : $kk); # $extra[$N] is [[$k0, $k1] ...]
  0            
7645 0 0         $kk .= $dead if defined $kk;
7646 0           push @C, $kk;
7647 0           $c++;
7648             }
7649             # warn 'Key: <', join('> <', @C), '>';
7650 0           @$v = ($v->[0], @C);
7651             }
7652 0           $self->{faces}{$f}{'[ini_layers]'} = [ @{ $self->{faces}{$f}{layers} } ]; # Deep copy
  0            
7653 0 0         if (@extra) {
7654 0           my($start_append, @Ln);
7655 0           for my $l (0 .. $#{ $self->{faces}{$f}{layers} } ) { # Assume that in every layer a few positions after end of the
  0            
7656 0           my $oLn = my $Ln = $self->{faces}{$f}{layers}[$l]; # first layer are empty
7657 0           my $L = $self->{layers}{$Ln};
7658 0 0         unless ($l) {
7659 0           $start_append = @$L;
7660 0           $self->{faces}{$f}{'[start_ctrl0]'} = $start_append;
7661 0   0       $self->{faces}{$f}{'[start_ctrl]'} = $start_append + ($ctrl[0]||0);
7662 0   0       $self->{faces}{$f}{'[end_ctrl]'} = $start_append + ($ctrl[1]||0);
7663 0           $_ += $start_append for values %VK_off;
7664             }
7665 0           my @L = map [$_->[0], $_->[1]], @$L; # Each element is []; 1-level deep copy
7666 0           my $add = $start_append + @{ $extra[$l] } - @L;
  0            
7667 0           $L[$start_append+$_] = [] for 0..$add-1; # Avoid splicing after the end of array
7668 0           splice @L, $start_append, @{ $extra[$l] }, @{ $extra[$l] };
  0            
  0            
7669 0           push @Ln, ($Ln .= "<$f>");
7670 0           $self->{layers}{$Ln} = \@L;
7671             # At this moment ini_copy should not exist yet
7672 0 0         warn "ini_copy of `$oLn' exists; --> `$Ln'" if $self->{layers}{'[ini_copy]'}{$oLn};
7673             # $self->{layers}{'[ini_copy]'}{$Ln} = $self->{layers}{'[ini_copy]'}{$oLn} if $self->{layers}{'[ini_copy]'}{$oLn};
7674             #??? Why does not this works???
7675             #warn "ini_copy1: `$Ln' --> `$oLn'";
7676 0           $self->{layers}{'[ini_copy1]'}{$Ln} = $self->deep_copy($self->{layers}{$oLn});
7677             }
7678 0           $self->{faces}{$f}{layers} = \@Ln;
7679             }
7680 0           ([keys %seen], \@dead, \%seen_dead)
7681             }
7682            
7683             sub format_key ($$$$) {
7684 0     0 0   my ($self, $k, $dead, $used) = (shift, shift, shift, shift);
7685 0 0         return -1 unless defined $k;
7686 0 0         my $mod = ($dead ? '@' : '') and $used->{$k}++;
    0          
7687 0 0         return "$k$mod" if $k =~ /^[A-Z0-9]$/i;
7688 0 0 0       return '%%' if 1 != length $k or ord $k > 0xFFFF;
7689 0           $self->key2hex($k) . $mod;
7690             }
7691            
7692             sub auto_capslock($$) {
7693 0     0 0   my ($self, $u) = (shift, shift);
7694 0           my %fix = qw( ӏ Ӏ ); # Perl 5.8.8 uc is wrong
7695 0 0 0       return 0 unless defined $u->[0] and defined $u->[1] and $u->[0] ne $u->[1];
      0        
7696 0 0 0       return 1 if ($fix{$u->[0]} || uc($u->[0])) eq $u->[1];
7697 0 0 0       return 1 if ($fix{$u->[0]} || ucfirst($u->[0])) eq $u->[1];
7698 0           return 0;
7699             }
7700            
7701             my %double_scan_VK = ('56 OEM_102' => '7D OEM_8', # ISO vs JIS (right) keyboard
7702             # '73 ABNT_C1' => '7E ABNT_C2', # ABNT (right) = JIS (left) keyboard vs ABNT (numpad)
7703             # '53 DECIMAL' => '7E ABNT_C2', # NUMPAD-period vs ABNT (numpad) [Does not work??? DECIMAL too late?]
7704             '34 OEM_PERIOD' => '7E ABNT_C2', # period vs ABNT (numpad)
7705             '7B NONCONVERT' => '79 CONVERT'); # JIS keyboard: left of SPACE, right of SPACE
7706             my %shift_control_extra = (2 => "\x00", 6 => "\x1e", OEM_MINUS => "\x1f");
7707            
7708             { my(%seen, %seen_scan, %seen_VK, @add_scan_VK, @ligatures, @decimal);
7709 0     0 0   sub reset_units ($) { @decimal = @ligatures = @add_scan_VK = %seen_scan = %seen_VK = %seen = () }
7710            
7711             sub output_unit00 ($$$$$$$;$$) {
7712 0     0 0   my ($self, $face, $k, $U, $N, $deadkeys, $Used, $known_scancode, $skippable) = (shift, shift, shift, shift, shift, shift, shift, shift, shift);
7713 0 0 0       my $sc = ($known_scancode or $scan_codes{$k}) or warn("Can't find the scancode for the key `$k'"), return;
7714 0           my(@cntrl, %s, $cnt); # Set Control-KEY if is [ or ] or \
7715 0 0 0       my $u = [map { defined() ? [map {($_ and ref $_) ? $_->[0] : $_} @$_] : $_ } @$U]; # deep copy with $_->[0] on a key-array
  0 0          
  0            
7716 0 0 0       @cntrl = chr hex $do_control{$u->[0][0]} if $do_control{$u->[0][0] || 'N/A'}; # \ ---> ^\
7717 0 0 0       @cntrl = @{ $default_bind{$k}[1] } if !@cntrl and $default_bind{$k}[1];
  0            
7718 0           my $create_a_c = $self->{faces}{$face}{'[create_alpha_ctrl]'};
7719 0 0         $create_a_c = $create_alpha_ctrl unless defined $create_a_c;
7720 0 0 0       @cntrl = (chr(0x1F & ord $k)) x $create_a_c if $k =~ /^[A-Z]$/ and $create_a_c;
7721 0 0 0       @cntrl = (undef, $shift_control_extra{$k}) if $create_a_c > 1 and $shift_control_extra{$k};
7722 0   0       $deadkeys ||= []; # known_scancode is true when we start from VK, and $deadkeys is (arr of arrays) vs (hash per layer)
7723             my @KK = map [$_->[2], $_->[0], # 0:layer#, 1:shift#, 2:char, 3:deadkeys(layer), 4:char_array ==> 0:char, 1:layer, 2:dead
7724 0 0         (ref $_->[4] ? $_->[4][2] : ($known_scancode ? $_->[3][$_->[1]] : $_->[3]{defined $_->[2] ? $_->[2] : 'n/a'}))],
    0          
    0          
7725             map [@$_[0,1], $u->[$_->[0]][$_->[1]], $deadkeys->[$_->[0]], $U->[$_->[0]][$_->[1]]],
7726             map +([$_, 0], [$_, 1]), 0..$#$u;
7727 0   0       defined and $s{$_}++ for map $_->[0], @KK;
7728 0   0       ($_->[2] || 0) >= 3 and $_->[0] = $self->dead_with_inversion(!'hex', $_, $face, $self->{faces}{$face}) for @KK;
      0        
7729             #($KK[$_][2] || 0) >= 3 and warn "face=$face N=$N c=$_ <$KK[$_][0]> --> $KK[$_][2]\n" for 0..$#KK;
7730             #my @UUU = map $U->[$_->[0]][$_->[1]], map +([$_, 0], [$_, 1]), 0..$#$u;
7731             #ref $UUU[$_] and ($UUU[$_][2] || 0) >= 3 and warn "face=$face N=$N cc=$_ <$UUU[$_][0]> --> $UUU[$_][2]\n" for 0..$#UUU;
7732            
7733 0   0       $cnt = keys %s || @cntrl;
7734 0 0 0       if (my $extra = $self->{faces}{$face}{'[output_layers]'} and defined $N) { # $N not supported on VK...
7735 0           my $b = @{ $self->{faces}{$face}{layers} };
  0            
7736 0           for my $f ($b..$#$extra) {
7737             # warn "Extra layer number $f, base=$b requested while the character N=$N has " . (scalar @$u) . " layers" if $f+$b <= $#$u;
7738 0 0         (my $lll = $extra->[$f]) =~ s/^prefix(NOTSAME(case)?)?=// or die "Extra layer: expected `prefix=PREFIX', see: `$extra->[$f]'";
7739 0           my($notsame, $case) = ($1,$2);
7740 0           my $c = $self->key2hex($self->charhex2key($lll));
7741 0 0         my $L = $self->{faces}{$face}{'[deadkeyLayers]'}{$c} or die "Unknown prefix character `$c´ in extra layers";
7742 0           my @L = map $self->{layers}{$_}[$N], @$L;
7743 0           my(@CC, @pp, @OK);
7744 0 0         for my $l (@L[0 .. ($notsame ? $b-1 : 0)]) {
7745 0           my(%s1, @was);
7746 0           for my $sh (0..$#$l) {
7747 0 0         my @C = map {defined() ? (ref() ? $self->dead_with_inversion(!'hex', $_, $face, $self->{faces}{$face}) : $_) : $_} $l->[$sh];
  0 0          
7748 0 0         my @p = map {defined() ? (ref() ? $_->[2] : 0 ) : 0 } $l->[$sh];
  0 0          
7749 0 0 0       ($CC[$sh], $pp[$sh]) = ($C[0], $p[0]) if not defined $CC[$sh] and defined $C[0];
7750 0 0 0       ($CC[$sh], $pp[$sh], $OK[$sh], $s1{$C[0]}) = ($C[0], $p[0], 1,1) if !$OK[$sh] and defined $C[0] and not $s{$C[0]};
      0        
7751             ($CC[$sh], $pp[$sh], $OK[$sh], $s1{$was[0]}) = (@was, 1,1) # use unshifted if needed
7752 0 0 0       if $sh and !$OK[$sh] and defined $C[0] and defined $was[0] and not $s{$was[0]} and not $s1{$was[0]};
      0        
      0        
      0        
      0        
7753 0 0         @was = ($C[0], $p[0]) unless $sh; # may omit `unless´
7754 0 0         $cnt++ if defined $CC[$sh];
7755             }
7756             }
7757             # Avoid read-only values (can get via $#KK) which cannot be autovivified
7758 0 0         push @KK, ([]) x (2*$f - @KK) if @KK < 2*$f; # splice can't do with a gap after the end of array
7759 0           splice @KK, 2*$f, 0, map [$CC[$_], $f-$b, $pp[$_]], 0..$#CC;
7760             }
7761             }
7762 0 0 0       return if $skippable and not $cnt;
7763 0 0 0       if ($skippable and not defined $KK[0][0] and not defined $KK[1][0]) {
      0        
7764 0           for my $shft (0,1) {
7765 0 0         $KK[$shft] = [$default_bind{$k}[0][$shft], 0] if defined $default_bind{$k}[0][$shft];
7766             ### $KK[$shft] = [$decimal[$shft], 0] if $k eq 'DECIMAL' and @decimal;
7767             }
7768             }
7769 0           my $pre_ctrl = $self->{faces}{$face}{'[ctrl_after_modcol]'};
7770 0 0         $pre_ctrl = 2*$ctrl_after unless defined $pre_ctrl;
7771 0 0 0       $#cntrl = $create_a_c - 1 if $pre_ctrl < 2*@$u or $self->{faces}{$face}{'[keep_missing_ctrl]'};
7772 0 0         warn "cac=$create_a_c #cntrl=$#cntrl pre=$pre_ctrl \@u=", scalar @$u if $#cntrl < 2*$ctrl_after - 1;
7773 0           splice @KK, $pre_ctrl, 0, map [$_, 0], @cntrl;
7774 0 0         splice @KK, 15, 0, [undef, 0] if @KK >= 16; # col=15 is the fake one
7775            
7776 0 0         if ($k eq 'DECIMAL') { # may be described both via visual maps and NUMPAD
7777 0 0         my @d = @{ $decimal[1] || [] };
  0            
7778 0   0       defined $KK[$_][0] or $KK[$_] = $d[$_] for 0..$#d; # fill on the second round
7779 0           @decimal = ([$k, $u, $sc, $Used], [@KK]);
7780 0           return;
7781             }
7782 0           $self->output_unit_KK($k, $u, $sc, $Used, @KK);
7783             }
7784            
7785             sub output_unit_KK($$@) {
7786 0     0 0   my ($self, $k, $u, $sc, $Used, @KK) = @_;
7787 0   0       my @K = map $self->format_key($_->[0], $_->[2], $Used->[$_->[1] || 0]), @KK;
7788             #warn "keys with ligatures: <@K>" if grep $K[$_] eq '%%', 0..$#K;
7789 0           push @ligatures, map [$k, $_, $KK[$_][0]], grep $K[$_] eq '%%', 0..$#K;
7790 0           my $keys = join "\t", @K;
7791 0           my @kk = map $_->[0], @KK;
7792 0 0         my $fill = ((8 <= length $k) ? '' : "\t");
7793 0 0         my $expl = join ", ", map +(defined() ? (0x20 > ord() ? '^'.chr(0x40+ord) : $_) : ' '), @kk;
    0          
7794 0 0         my $expl1 = exists $self->{UNames} ? "\t// " . join ", ", map +((defined $_) ? $self->UName($_) : ' '), @kk : '';
    0          
7795 0           my $capslock = ($self->auto_capslock($u->[0])) | (($self->auto_capslock($u->[1])) << 2);
7796 0           $seen_scan{$sc}++;
7797 0           $seen_VK{$k}++;
7798 0           ($sc, $k, $fill, <
7799             $capslock\t$keys\t// $expl$expl1
7800             EOP
7801             }
7802            
7803             sub output_unit0 ($$$$$$$;$$) {
7804 0 0   0 0   my @i = &output_unit00 or return;
7805 0   0       my @add = split '/', ($double_scan_VK{uc "$i[0] $i[1]"} || '');
7806             #warn "<<<<< Secondary key <$add> for <$i[0] $i[1]>" if $add;
7807 0           push @add_scan_VK, map [split(/ /, $_), @i[2,3]], grep $_, @add;
7808 0           "$i[0]\t$i[1]$i[2]\t$i[3]"
7809             }
7810            
7811             sub output_added_units ($) {
7812 0     0 0   my ($self, @i, @o, @dec) = shift;
7813 0           for my $i (@add_scan_VK) {
7814 0 0 0       next if $seen_scan{$i->[0]} or $seen_VK{$i->[1]}; # Cannot duplicate either one...
7815 0           push @i, $i;
7816             }
7817 0 0         if ($decimal[0]) {
7818             # @decimal = ([$self->output_unit_KK($k, $u, $sc, $Used, @KK)], [@KK]);
7819 0           my ($k, $u, $sc, $Used) = @{$decimal[0]};
  0            
7820 0           push @dec, [$self->output_unit_KK($k, $u, $sc, $Used, @{$decimal[1]})];
  0            
7821             }
7822 0           for my $i (@i, @dec) {
7823 0           push @o, "$i->[0]\t$i->[1]$i->[2]\t$i->[3]";
7824             }
7825             @o
7826 0           }
7827            
7828             my $enc_UTF16LE;
7829             sub to_UTF16LE_units ($) {
7830 0     0 0   my $k = shift;
7831 0 0         unless ($k =~ /^[\x00-\x{FFFF}]*$/) {
7832 0 0         (require Encode), $enc_UTF16LE = Encode::find_encoding('UTF-16LE') unless $enc_UTF16LE;
7833 0 0         die "Can't arrange encoding to UTF-16LE" unless $enc_UTF16LE;
7834 0           $k = $enc_UTF16LE->encode($k);
7835             # warn join '> <', ($k =~ /(..)/sg); # Can't use decode() on surrogates...
7836             # warn join '> <', map {unpack 'v', $_} ($k =~ /(..)/sg); # Can't use decode() on surrogates...
7837 0           $k = join '', map chr(unpack 'v', $_), ($k =~ /(..)/sg); # Can't use decode() on surrogates...
7838             }
7839 0           $k;
7840             }
7841            
7842             sub output_ligatures ($) {
7843 0     0 0   my ($self, @o, %s) = shift;
7844 0           for my $l (@ligatures) {
7845 0 0         warn("Repeated LIGATURE $l->[0] $l->[1]"), next if $s{"$l->[0] $l->[1]"}++;
7846 0           my $k = to_UTF16LE_units $l->[2];
7847 0           my @k = ((map $self->key2hex($_), split //, $k), ('') x 4);
7848 0 0         my @expl = exists $self->{UNames} ? "// " . join " + ", map $self->UName($_), split //, $l->[2] : ();
7849 0 0         my $add = ((8 <= length $l->[0]) ? '' : "\t");
7850 0           push @o, (join "\t", "$l->[0]$add", $l->[1], @k[0..3], @expl) . "\n";
7851             }
7852             @o
7853 0           }
7854            
7855             sub base_unit ($$$$) {
7856 0     0 0   my ($self, $basesub, $u, $ingroup, $k) = (shift, shift, shift, shift);
7857 0 0         if (!$ingroup) {
7858 0           my @c = map $self->{layers}{$_}[$u][0], @$basesub;
7859 0           my($c) = grep defined, @c;
7860 0 0         my $c0 = $c = $c->[0] if 'ARRAY' eq ref $c;
7861 0 0         $c .= '#' if $seen{uc $c}++;
7862 0 0         $c = '#' if $c eq ' ';
7863 0           $c = uc $c;
7864 0           return [0, $c, $c0]
7865             } # Now do the VK groups
7866 0           for my $v (values %start_SEC) {
7867 0 0 0       $k = $v->[2]($self, $u, $v), last if $v->[0] <= $u and $v->[0] + $v->[1] > $u;
7868             }
7869 0           [1, $k]
7870             }
7871            
7872             sub output_unit ($$$$$$$$) {
7873 0     0 0   my ($self, $face, $layers, $u, $deadkeys, $Used, $canskip, $baseK, $k) = (shift, shift, shift, shift, shift, shift, shift, shift);
7874 0           my $U = [map $self->{layers}{$_}[$u], @$layers];
7875 0 0         defined ($k = $baseK->[$u]) or return;
7876 0           $self->output_unit0($face, $k, $U, $u, $deadkeys, $Used, undef, $canskip);
7877             }
7878             }
7879            
7880             sub output_layout_win ($$$$$$$) {
7881 0     0 0   my ($self, $face, $layers, $deadkeys, $Used, $cnt, $baseK) = (shift, shift, shift, shift, shift, shift, shift);
7882             # die "Count of non-VK entries mismatched: $cnt vs ", scalar @{$self->{layers}{$layers->[0]}}
7883             # unless $cnt <= scalar @{$self->{layers}{$layers->[0]}};
7884 0           map $self->output_unit($face, $layers, $_, $deadkeys, $Used, $_ >= $cnt, $baseK), 0..$#$baseK;
7885             }
7886            
7887             sub output_VK_win ($$$) {
7888 0     0 0   my ($self, $face, $Used, @O) = (shift, shift, shift);
7889 0           my $VK = $self->get_VK($face);
7890 0           for my $k (keys %$VK) {
7891 0           my $v = $VK->{$k};
7892             # warn 'Key: <', join('> <', @$v), '>';
7893 0 0         my (@dead) = map +(/^(.+)\@$/ ? [$1, 1] : [$_]), @$v[1..$#$v];
7894 0           my (@k, @o, @oo, $x, $y) = map $_->[0], @dead;
7895 0           @dead = map $_->[1], @dead;
7896 0   0       push @o, [$x, $y] while @dead and ($x, $y) = splice @dead, 0, 2;
7897 0   0       push @oo, [$x, $y] while @k and ($x, $y) = splice @k, 0, 2;
7898 0           push @O, $self->output_unit0($face, $k, \@oo, undef, \@o, $Used, $v->[0]);
7899             }
7900             @O
7901 0           }
7902            
7903             sub read_deadkeys_win ($$) {
7904 0     0 0   my ($self, $t, $dead, $next, @p, %o) = (shift, shift, '', '');
7905            
7906 0           $t =~ s(\s*//.*)()g; # remove comments
7907 0           $t =~ s([^\S\n]+$)()gm; # remove trailing whitespace (including \r!)
7908             # deadkey lines, empty lines, HEX HEX keymap lines
7909 0 0         $t =~ s/(^(?=DEADKEY)(?:(?:(?:DEADKEY|\s*[0-9a-f]{4,})\s+[0-9a-f]{4,})?(?:\n|\Z))*)(?=(.*))/DEADKEYS\n\n/mi
7910             and ($dead, $next) = ($1, $2);
7911 0 0 0       warn "Unknown keyword follows deadkey descriptions in MSKLC map file: `$next'; dead=<$dead>"
7912             if length $next and not $next =~ /^(KEYNAME|LIGATURE|COPYRIGHT|COMPANY|LOCALENAME|LOCALEID|VERSION|SHIFTSTATE|LAYOUT|ATTRIBUTES|KEYNAME_EXT|KEYNAME_DEAD|DESCRIPTIONS|LANGUAGENAMES|ENDKBD)$/i;
7913             # $dead =~ /\S/ or warn "EMPTY DEADKEY section";
7914             #warn "got `$dead' from `$t'";
7915            
7916             # when a pattern has parens, split does not remove the leading empty fields (?!!!)
7917 0           (undef, my %d) = split /^DEADKEY\s+([0-9a-f]+)\s*\n/im, $dead;
7918 0           for my $d (keys %d) {
7919             #warn "split `$d' from `$d{$d}'";
7920 0           @p = split /\n+/, $d{$d};
7921 0           my @bad;
7922 0 0         die "unrecognized part in deadkey map for $d: `@bad'"
7923             if @bad = grep !/^\s*([0-9a-f]+)\s+([0-9a-f]+)$/i, @p;
7924 0           %{$o{lc $d}} = map /^\s*([0-9a-f]+)\s+([0-9a-f]+)/i, @p;
  0            
7925             }
7926            
7927             # empty lines, HEX "NAME" lines
7928 0 0         if ($t =~ s/^KEYNAME_DEAD\n((?:(?:\s*[0-9a-f]{4,}\s+".*")?(?:\n|\Z))*)(?=(.*))/KEYNAMES_DEAD\n\n/mi) {
    0          
7929 0           ($dead, $next) = ($1,$2);
7930 0 0 0       warn "Unknown keyword follows deadkey names descriptions in MSKLC map file: `$next'"
7931             if length $next and not $next =~ /^(DEADKEY|KEYNAME|LIGATURE|COPYRIGHT|COMPANY|LOCALENAME|LOCALEID|VERSION|SHIFTSTATE|LAYOUT|ATTRIBUTES|KEYNAME_EXT|KEYNAME_DEAD|DESCRIPTIONS|LANGUAGENAMES|ENDKBD)$/i;
7932 0 0         $dead =~ /\S/ or warn "EMPTY KEYNAME_DEAD section";
7933 0           %d = map /^([0-9a-f]+)\s+"(.*)"\s*$/i, split /\n\s*/, $dead;
7934 0           $d{lc $_} = $d{$_} for keys %d;
7935 0   0       $self->{'[seen_knames]'} ||= {};
7936 0           @{$self->{'[seen_knames]'}}{map {chr hex $_} keys %d} = values %d; # XXXX Overwrites older values
  0            
  0            
7937             } elsif ($dead =~ /\S/) {
7938 0           warn "no KEYNAME_DEAD section found" if 0;
7939             }
7940 0           \%o, \%d, $t; # %o - translation tables; %d - names; $t is what is left of input
7941             }
7942            
7943             sub massage_template ($$$) {
7944 0     0 0   my ($self, $t, $r, %seen, %miss) = (shift, shift, shift);
7945 0 0         my $keys = join '|', sort {length $b <=> length $a or $a cmp $b} keys %$r; # Prefer matching a longer key
  0            
7946 0           $t =~ s/($keys)/ # warn "Plugging in `$1'";
7947 0           $seen{$1}++, $r->{$1} /ge; # Can't use \b: see SORT_ORDER_ID_ LOCALE_ID
7948 0   0       $seen{$_} or $miss{$_}++ for keys %$r;
7949 0 0         warn "The following parts missing in the template: ", join ' ', sort keys %miss if %miss;
7950 0           $t
7951             }
7952            
7953             # http://msdn.microsoft.com/en-us/library/dd373763
7954             # http://msdn.microsoft.com/en-us/library/dd374060
7955             my $template_win = <<'EO_TEMPLATE';
7956             KBD DLLNAME "LAYOUTNAME"
7957            
7958             COPYRIGHT "(c) COPYR_YEARS COMPANYNAME"
7959            
7960             COMPANY "COMPANYNAME"
7961            
7962             LOCALENAME "LOCALE_NAME"
7963            
7964             LOCALEID "SORT_ORDER_ID_LOCALE_ID"
7965            
7966             VERSION 1.0
7967            
7968             SHIFTSTATE
7969            
7970             BITS_TEMPLATE
7971            
7972             LAYOUT ;an extra '@' at the end is a dead key
7973            
7974             //SC VK_ Cap COL_HEADERS
7975             //-- ---- ---- COL_EXPL
7976             LAYOUT_KEYS
7977             DO_LIGA
7978             DEADKEYS
7979            
7980             KEYNAME
7981            
7982             01 Esc
7983             0e Backspace
7984             0f Tab
7985             1c Enter
7986             1d Ctrl
7987             2a Shift
7988             36 "Right Shift"
7989             37 "Num *"
7990             38 Alt
7991             39 Space
7992             3a "Caps Lock"
7993             3b F1
7994             3c F2
7995             3d F3
7996             3e F4
7997             3f F5
7998             40 F6
7999             41 F7
8000             42 F8
8001             43 F9
8002             44 F10
8003             45 Pause
8004             46 "Scroll Lock"
8005             47 "Num 7"
8006             48 "Num 8"
8007             49 "Num 9"
8008             4a "Num -"
8009             4b "Num 4"
8010             4c "Num 5"
8011             4d "Num 6"
8012             4e "Num +"
8013             4f "Num 1"
8014             50 "Num 2"
8015             51 "Num 3"
8016             52 "Num 0"
8017             53 "Num Del"
8018             54 "Sys Req"
8019             57 F11
8020             58 F12
8021             5C AX
8022             70 KANA
8023             73 "ABNT C1"
8024             79 CONVERT
8025             7c F13
8026             7d F14
8027             7e F15
8028             7f F16
8029             80 F17
8030             81 F18
8031             82 F19
8032             83 F20
8033             84 F21
8034             85 F22
8035             86 F23
8036             87 F24
8037            
8038             KEYNAME_EXT
8039            
8040             1c "Num Enter"
8041             1d "Right Ctrl"
8042             35 "Num /"
8043             37 "Prnt Scrn"
8044             38 "Right Alt"
8045             45 "Num Lock"
8046             46 Break
8047             47 Home
8048             48 Up
8049             49 "Page Up"
8050             4b Left
8051             4d Right
8052             4f End
8053             50 Down
8054             51 "Page Down"
8055             52 Insert
8056             53 Delete
8057             54 <00>
8058             56 Help
8059             5b "Left Windows"
8060             5c "Right Windows"
8061             5d Application
8062            
8063             KEYNAMES_DEAD
8064            
8065             DESCRIPTIONS
8066            
8067             LOCALE_ID LAYOUTNAME
8068            
8069             LANGUAGENAMES
8070            
8071             LOCALE_ID LANGUAGE_NAME
8072            
8073             ENDKBD
8074            
8075             EO_TEMPLATE
8076             # "
8077            
8078             my $template_osx = <<'EO_TEMPLATE';
8079            
8080            
8081            
8082            
8083            
8084            
8085            
8086            
8087            
8088            
8089            
8090            
8091            
8092            
8093            
8094            
8095            
8096            
8097            
8098            
8099            
8100            
8101            
8102            
8103            
8104            
8105            
8106            
8107            
8108            
8109            
8110            
8111            
8112            
8113            
8114            
8115            
8116            
8117            
8118            
8119            
8120            
8121            
8122            
8123            
8124            
8125            
8126            
8127             OSX_KEYMAP_0_AND_COMMAND
8128            
8129            
8130            
8131             OSX_KEYMAP_SHIFT
8132            
8133            
8134            
8135             OSX_KEYMAP_CAPS
8136            
8137            
8138            
8139             OSX_KEYMAP_OPTION
8140            
8141            
8142            
8143             OSX_KEYMAP_OPTION_SHIFT
8144            
8145            
8146            
8147             OSX_KEYMAP_OPTION_CAPS
8148            
8149            
8150            
8151             OSX_KEYMAP_OPTION_COMMAND
8152            
8153            
8154            
8155            
8156            
8157            
8158            
8159            
8160            
8161            
8162            
8163            
8164            
8165            
8166            
8167            
8168            
8169            
8170            
8171            
8172            
8173            
8174            
8175            
8176            
8177            
8178            
8179            
8180            
8181            
8182            
8183            
8184            
8185            
8186            
8187            
8188            
8189            
8190            
8191            
8192            
8193            
8194            
8195            
8196            
8197            
8198            
8199            
8200            
8201            
8202            
8203            
8204            
8205            
8206            
8207            
8208            
8209            
8210            
8211            
8212            
8213            
8214            
8215            
8216            
8217            
8218            
8219            
8220            
8221            
8222            
8223            
8224            
8225            
8226            
8227            
8228            
8229            
8230            
8231            
8232            
8233            
8234            
8235            
8236            
8237            
8238            
8239            
8240            
8241            
8242            
8243            
8244            
8245            
8246            
8247            
8248            
8249            
8250            
8251            
8252            
8253            
8254            
8255            
8256            
8257            
8258            
8259            
8260            
8261            
8262            
8263            
8264            
8265            
8266            
8267            
8268            
8269            
8270            
8271            
8272            
8273            
8274            
8275            
8276             OSX_ACTIONS_BASE
8277            
8278             OSX_ACTIONS
8279            
8280            
8281            
8282             OSX_TERMINATORS_BASE
8283            
8286             OSX_TERMINATORS2
8287            
8288            
8289             EO_TEMPLATE
8290             # "
8291            
8292             sub KEY2hex ($$) {
8293 0     0 0   my ($self, $k) = (shift, shift);
8294 0 0         return $self->key2hex($k) unless 'ARRAY' eq ref $k;
8295             #warn "see a deadkey `@$k'";
8296 0           $k = [@$k]; # deeper copy
8297 0           $k->[0] = $self->key2hex($k->[0]);
8298 0           $k;
8299             }
8300            
8301             sub linked_faces_2_hex_map ($$$$) {
8302 0     0 0   my ($self, $name, $b, $inv) = (shift, shift, shift, shift);
8303 0           my $L = $self->{faces}{$name};
8304 0 0         my $remap = $L->{$inv ? 'Face_link_map_INV' : 'Face_link_map'}{$b};
8305 0           die "Face `$b' not linked to face `$name'; HAVE: <", join('> <', keys %{$L->{Face_link_map}}), '>'
8306 0 0 0       if $self->{faces}{$b} != $L and not $remap;
8307 0 0         my $cover = $L->{'[coverage_hex]'} or die "Face $name not preprocessed";
8308             # warn "Keys of the Map `$name' -> '$b': <", join('> <', keys %$remap), '>';
8309             # $remap ||= {map +(chr hex $_, chr hex $remap->{$_}), keys %$cover}; # This one in terms of chars, not hex
8310 0           my @k = keys %$remap;
8311             # warn "Map `$name' -> '$b': <", join('> <', map +($self->key2hex($_), $self->key2hex($remap->{$_})), @k), '>';
8312 0 0         return { map +($self->key2hex($_), (defined $remap->{$_} ? $self->KEY2hex($remap->{$_}) : undef)), @k }
8313             }
8314            
8315             my $dead_descr;
8316             #my %control = split / /, "\n \\n \r \\r \t \\t \b \\b \cC \\x03 \x7f \\x7f \x1b \\x1b \x1c \\x1c \x1d \\x1d";
8317             my %control = split / /, "\n \\n \r \\r \t \\t \b \\b";
8318             $control{$_->[0]} ||= $_->[1] for map [chr($_), '^'.chr(0x40+$_)], 1..26;
8319             sub control2prt ($$) {
8320 0     0 0   my($self, $c) = (shift, shift);
8321 0 0 0       return $c unless ord $c < 0x20 or ord $c == 0x7f;
8322 0 0         $control{$c} or sprintf '\\x%02x', ord $c;
8323             }
8324            
8325             sub dead_with_inversion ($$$$$) {
8326 0     0 0   my($self, $is_hex, $to, $nameF, $H) = (shift, shift, shift, shift, shift);
8327 0   0       my $invert_dead = (3 == ($to->[2] || 0) or 3 == (($to->[2] || 0) >> 3));
8328 0           $to = $to->[0];
8329 0 0         if ($invert_dead) {
8330 0 0         $to = $self->key2hex($to) unless $is_hex;
8331 0 0         defined ($to = $H->{'[deadkeyInvAltGrKey]'}{$to}) or die "Cannot invert prefix key `$to' in face `$nameF'";
8332             # warn "invert $to in face=$nameF, inv=$invertAlt0 --> $inv\n";
8333 0 0         $to = $self->key2hex($to) if $is_hex;
8334             }
8335 0           $to;
8336             }
8337            
8338             sub output_deadkeys ($$$$$$;$) {
8339 0     0 0   my ($self, $nameF, $d, $Dead2, $flip_AltGr_hex, $prefix_flippedMap_hex, $OUT_Apple) = (shift, shift, shift, shift, shift, shift, shift);
8340 0           my $H = $self->{faces}{$nameF};
8341             # warn "emit `$nameF' d=`$d' f=$H->{'[deadkeyFace]'}{$d}";
8342             # if (my $unres = $H->{'[unresolved_imported]'}) {
8343             # warn "Can't resolve `@$unres' to an imported dead key; face=`$nameF'" unless $H->{'[unresolved_imported_warned]'}++;
8344             # }
8345             #warn "See dead2 in <$nameF> for <$d>" if $dead2;
8346 0   0       my $dead2 = ($Dead2 || {})->{$self->charhex2key($d)} || {};
8347 0 0         my(@sp, %sp) = map {(my $in = $_) =~ s/(?<=.)\@$//s; $in} @{ ($self->get_VK($nameF))->{SPACE} || [] };
  0            
  0            
  0            
8348 0           @sp = map $self->charhex2key($_), @sp;
8349 0           @sp{@sp[1..$#sp]} = (0..$#sp); # The leading elt is the scancode
8350            
8351 0           my @maps = map $H->{"[deadkeyFaceHexMap$_]"}{$d}, '', 'Inv';
8352 0 0         pop @maps unless defined $maps[-1];
8353 0           my($D, @DD) = ($d, $d, $prefix_flippedMap_hex);
8354 0           my ($OUT, $keys) = '';
8355             # There are 3 situations:
8356             # 0) process one map without AltGr-inversion; 1) Process one map which is the AltGr-inversion of the principal one;
8357             # 2) process one map with AltGr-inversion (in 1-2 the inversion may have a customization put over it).
8358             # The problem is to recognize when deadkeys in the inversion come from non-inverted one, or from customization
8359             # And, in case (1), we must consider flip_AltGr specially... (the case (2) is now treated during face preparation)
8360 0   0       my($is_invAltGr_Base_with_chain, $AMap, $default) = ($D eq ($flip_AltGr_hex || 'n/a') and $H->{'[have_AltGr_chain]'});
8361 0           $default = $self->default_char($nameF);
8362 0 0         $default = $self->key2hex($default) if defined $default;
8363 0 0 0       if ($#maps or $is_invAltGr_Base_with_chain) { # One of the maps we will process is AltGr-inverted; calculate AltGr-inversion
8364 0           $self->faces_link_via_backlinks($nameF, $nameF, 'no_ini'); # Create AltGr-invert self-mapping
8365 0           $AMap = $self->linked_faces_2_hex_map($nameF, $nameF, 1);
8366             #warn "deadkey=$D flip=$flip_AltGr_hex" if defined $default;;
8367             }
8368 0           my($docs, $map_AltGr_over, $over_dead2) = ($H->{'[prefixDocs]'}{$D}, {}, {});
8369 0 0         if ($is_invAltGr_Base_with_chain) {
8370 0 0         if (my $override_InvAltGr = $H->{'[InvAltGrFace]'}{''}) { # NOW: needed only for invAltGr
8371 0           $map_AltGr_over = $self->linked_faces_2_hex_map($nameF, $override_InvAltGr);
8372             }
8373 0 0 0       $over_dead2 = $Dead2->{$self->charhex2key($flip_AltGr_hex)} || {} if defined $flip_AltGr_hex; # used in CyrPhonetic v0.04
8374 0           $dead2 = { %{ $H->{'[DEAD]'} }, %{ $H->{'[dead_in_VK]'} } };
  0            
  0            
8375             # $docs ||= 'AltGr-inverted base face';
8376             }
8377            
8378             # warn "output map for `$D' invert=", !!$is_invAltGr_Base_with_chain, ' <',join('> <', sort keys %$dead2),'>';
8379 0           for my $invertAlt0 (0..$#maps) {
8380 0   0       my $invertAlt = $invertAlt0 || $is_invAltGr_Base_with_chain;
8381 0           my $map = $maps[$invertAlt0];
8382 0           $d = $DD[$invertAlt0];
8383 0 0         my $docs1 = (defined $docs ? sprintf("\t// %s%s", ($invertAlt0 ? 'AltGr inverted: ' : ''), $docs) : '');
    0          
8384 0           $OUT .= "DEADKEY\t$d$docs1\n\n";
8385 0           my $OUT_Apple_map = $d;
8386             # Good order: first alphanum, then punctuation, then space
8387 0           my @keys = sort keys %$map; # Sorting not OK for 6-byte keys - but can't have them on Win
8388             @keys = (grep(( lc(chr hex $_) ne uc(chr hex $_)and not $sp{chr hex $_} ), @keys),
8389             grep(((lc(chr hex $_) eq uc chr hex $_ and (chr hex $_) !~ /\p{Blank}/) and not $sp{chr hex $_}), @keys),
8390 0   0       grep((((lc(chr hex $_) eq uc chr hex $_ and (chr hex $_) =~ /\p{Blank}/) or $sp{chr hex $_}) and $_ ne '0020'), @keys),
      0        
      0        
8391             grep( $_ eq '0020', @keys)); # make SPACE last
8392 0           for my $n (@keys) { # Not OK for 6-byte keys (impossible on Win)
8393             # warn "doing $n\n";
8394 0           my ($to, $import_dead, $EXPL) = $map->{$n};
8395 0 0 0       if ($to and 'ARRAY' eq ref $to) {
8396 0           $EXPL = $to->[3];
8397 0 0         $EXPL =~ s/(?=\p{NonspacingMark})/ /g if $EXPL;
8398 0   0       $import_dead = (1 <= ($to->[2] || 0)); # was: exportable; now: any dead
8399 0           $to = $self->dead_with_inversion('hex', $to, $nameF, $H);
8400             }
8401 0 0 0       warn "0000: face `$nameF' d=`$d': $n --> $to" if $to and $to eq '0000';
8402 0           my $map_n = $map->{$n};
8403 0 0 0       $map_n = $map_n->[0] if $map_n and ref $map_n;
8404 0 0 0       $H->{'[32-bit]'}{chr hex $map_n}++, next if hex $n > 0xFFFF and $map_n; # Cannot be put in a map...
8405 0 0 0       if ($to and hex $to > 0xFFFF) { # Value cannot be put in a map...
8406             # warn "32-bit: n=$n map{n}=$map_n to=$to";
8407 0           $H->{'[32-bit]'}{chr hex $map_n}++;
8408 0 0         next unless defined ($to = $H->{'[DeadChar_32bitTranslation]'});
8409 0           $to =~ s/^\s+//; $to =~ s/\s+$//;
  0            
8410 0           $to = $self->key2hex($to);
8411             }
8412 0           my $was_to = $to;
8413 0 0 0       $to ||= $default or next;
8414             # Tricky: dead keys may come from the override map (which is indexed by NOT-INVERTED KEYS!); it is already merged into
8415             # the map - unless for inverted base face
8416             my ($alt_n, $use_dead2) = (($is_invAltGr_Base_with_chain and defined $map_AltGr_over->{$n})
8417             ? ($n, $over_dead2)
8418 0 0 0       : (($invertAlt ? $AMap->{$n} : $n), $dead2));
    0          
8419 0 0 0       $alt_n = $alt_n->[0] if $alt_n and ref $alt_n; # AMap may have "complex" values
8420             #warn "$D --> $d, `$n', `$alt_n', `$AMap->{$n}'; `$map_AltGr_over->{$n}' i=$invertAlt i0=$invertAlt0 d=$use_dead2->{chr hex $alt_n}";
8421             #warn "... n=`$n', alt=`$alt_n' Amap=`$AMap->{$n}'\n" if $AMap->{$n};
8422 0 0 0       my $DEAD = ( (defined $alt_n and $use_dead2->{chr hex $alt_n}) ? '@' : '' );
8423             #warn "AltGr flip: $nameF:$D: $n --> $H->{'[dead2_AltGr_chain]'}{$D}" if $n eq ($flip_AltGr_hex || 'n/a');
8424 0           my $from = $self->control2prt(chr hex $n);
8425             # This is now done inside the map:
8426 0           if (0 and (hex $n) == hex ($flip_AltGr_hex || 'ffffff') and @maps == 2 and !$invertAlt) {
8427             if (defined $was_to or $DEAD) {
8428             warn "AltGr_Flip key=", hex $n, " overwrites '$was_to', DEAD=", $DEAD||$import_dead||0, " on face=$nameF\[$d]";
8429             }
8430             ($DEAD, $to) = ('@', $DD[1]); # Join Inv to not-Inv on $flip_AltGr_hex; Do not overwrite existing binding... Warn???
8431             }
8432 0 0 0       $to = $default
      0        
      0        
      0        
      0        
      0        
8433             if !($DEAD or $import_dead)
8434             and defined $default and (0x7f == hex $to or 0x20 > hex $to) and (0x7f == hex $n or 0x20 > hex $n);
8435 0 0 0       if (($DEAD or $import_dead) and $d eq $to) {
      0        
8436 0 0 0       if (($flip_AltGr_hex or 'n/a') eq $d) { # This is what routinely happens in Flip_AltGr face
8437 0           $import_dead = $DEAD = '';
8438 0   0       $to = $H->{'[DeadChar_32bitTranslation]'} || '003f'; # ? = U+003f
8439 0           $to =~ s/^\s+//; $to =~ s/\s+$//;
  0            
8440 0           $to = $self->key2hex($to);
8441 0           $EXPL = 'removal of immediate deadkey loop';
8442             } else {
8443 0           warn "Immediate deadkey loop: face `$nameF' d=`$d': $n --> $to";
8444             }
8445             }
8446 0 0         my $expl = exists $self->{UNames} ? "\t// " . join "\t-> ", # map $self->UName($_),
8447             # chr hex $n, chr hex $map->{$n} : '';
8448             $self->UName(chr hex $n), $self->UName(chr hex $to, 'verbose', 'vbell') : '';
8449 0 0 0       $expl .= " (via $EXPL)" if $expl and $EXPL;
8450 0           my $to1 = $self->control2prt(chr hex $to);
8451             # warn "Both import_dead and DEAD properties hold for `$from' --> '$to1' via deadkey $d face=$nameF" if $DEAD and $import_dead;
8452 0 0         $DEAD = '@' if $import_dead;
8453 0           $OUT .= sprintf "%s\t%s%s\t// %s -> %s%s\n", $n, $to, $DEAD, $from, $to1, $expl;
8454 0 0 0       $OUT_Apple->{$n}{$OUT_Apple_map} = [$to, undef, $DEAD && 1] if $OUT_Apple and 0x20 <= hex $n and 0x7f != hex $n;
      0        
      0        
8455             }
8456 0           $OUT .= "\n";
8457 0   0       $keys ||= @keys;
8458             }
8459 0 0         warn "DEADKEY $d for face `$nameF' empty" unless $keys;
8460 0           (!!$keys, $OUT, $OUT_Apple)
8461             }
8462            
8463             sub massage_diacritics ($) { # "
8464 0     0 0   my ($self) = (shift);
8465 0           my %char2dia;
8466 0           for my $dia (sort keys %{$self->{Diacritics}}) { # Make order deterministic
  0            
8467 0           my @v = map { s/\p{Blank}//g; $_ } @{ $self->{Diacritics}{$dia} };
  0            
  0            
  0            
8468             # $self->{'[map2diac]'}{$_} = $dia for split //, join '', @v; # XXXX No check for duplicates???
8469 0           for my $cc ( [ split //, join '', @v[0..3] ], [ split //, join '', @v[4..$#v] ] ) { # modifiers, combining
8470 0           $char2dia{$cc->[$_]}{$_} = $dia for 0..$#$cc; # XXXX No check for duplicates???
8471             }
8472 0           my @vv = map [ split // ], @v;
8473 0           $self->{'[diacritics]'}{$dia} = \@vv;
8474             }
8475 0           for my $c (keys %char2dia) {
8476 0           my @pos = sort {$a <=> $b} keys %{ $char2dia{$c} };
  0            
  0            
8477             # warn("map2diac( $c ): @pos; ", join '; ', values %{ $char2dia{$c} });
8478 0           $self->{'[map2diac]'}{$c} = $char2dia{$c}{$pos[0]}; # prefer the earliest possible occurence
8479             }
8480             }
8481            
8482             sub extract_diacritic ($$$$$$@) {
8483 0     0 0   my ($self, $dia, $idx, $which, $need, $skip2, @elt0) = (shift, shift, shift, shift, shift, shift);
8484 0           my @v = map @$_, my $elt0 = shift; # first one full
8485 0 0         push @v, map @$_[($skip2 ? 2 : 0)..$#$_], @_; # join the rest, omitting the first 2 (assumed: accessible in other ways)
8486 0 0 0       @elt0 = $elt0 if $skip2 and $skip2 eq 'skip2-include0';
8487 0 0         push @v, grep defined, map @$_[0..1], @elt0, @_ if $skip2;
8488             # @v = grep +((ord $_) >= 128 and $_ ne $dia), @v;
8489 0           @v = grep +(ord $_) >= 0x80, @v;
8490 0 0 0       die "diacritic ` $dia ' has no $which no.$idx (0-based) assigned"
8491             unless $idx >= $need or defined $v[$idx];
8492             # warn "Translating for dia=<$dia>: idx=$idx <$which> -> <$v[$idx]> of <@v>" if defined $v[$idx];
8493 0           return $v[$idx];
8494             }
8495            
8496             sub diacritic2self ($$$$$$$$$) {
8497 0     0 0   my ($self, $dia, $c, $face, $N, $space, $c_base, $c_noalt, $seen_before) = (shift, shift, shift, shift, shift, shift, shift, shift, shift);
8498             # warn("Translating for dia=<$dia>: got undef"),
8499 0 0         return $c unless defined $c;
8500             # $c = $c->[0] if 'ARRAY' eq ref $c; # Prefix keys behave as usual keys
8501             # return undef if
8502 0   0       my $prefix = (ref $c and $c->[2]); # Ignore deadkeys (unless we act on $c_base or $c_noalt - UNIMPLEMENTED);
8503 0   0       $_ and 'ARRAY' eq ref $_ and $_ = $_->[0] for $c, $c_base, $c_noalt; # Prefix keys behave as usual keys
      0        
8504             #warn " Translating for dia=<$dia>: got <$c>";
8505 0 0         die "` $dia ' not a known diacritic" unless my $name = $self->{'[map2diac]'}{$dia};
8506 0 0         my $v = $self->{'[diacritics]'}{$name} or die "Panic!";
8507 0 0         my ($first) = grep 0x80 <= ord, @{$v->[0]} or die "diacritic ` $dia ' does not define any non-7bit modifier";
  0            
8508 0 0         return $first if $c eq ' ';
8509 0           my $spaces = keys %$space;
8510 0           my $flip_AltGr = $self->{faces}{$face}{'[Flip_AltGr_Key]'};
8511 0 0         $flip_AltGr = $self->charhex2key($flip_AltGr) if defined $flip_AltGr;
8512 0 0         $flip_AltGr = 'n/a' unless defined $flip_AltGr;
8513 0   0       my $is_flip_AltGr = (defined $flip_AltGr and $prefix and $c eq $flip_AltGr);
8514 0 0 0       if ($c eq $dia and $prefix) {
8515             #warn "Translating2combining dia=<$dia>: got <$c> --> <$v->[4][0]>";
8516             # This happens with caron which reaches breve as the first:
8517             # warn "The diacritic ` $dia ' differs from the first non-7bit entry ` $first ' in its list" unless $dia eq $first;
8518 0 0         die "diacritic ` $dia ' has no default combining char assigned" unless defined $v->[4][0];
8519 0           return $v->[4][0];
8520             }
8521 0   0       my $limits = $self->{Diacritics_Limits}{ALL} || [(0) x 7];
8522 0 0 0       if ($space->{$c}) { # SPACE is handled above (we assume it is on index 0)...
    0 0        
    0 0        
8523             # ~ and ^ have only 3 spacing variants; one of them must be on ' ' - and we omit the first 2 of non-principal block...
8524 0           return $self->extract_diacritic($dia, $space->{$c}, 'spacing variant', $limits->[0], 'skip2', @$v[0..3]);
8525             } elsif (0 <= (my $off = index "\r\t\n\x1b\x1d\x1c\b\x7f\x1e\x1f\x00", $c)
8526             and not $prefix) { # Enter, Tab, C-Enter, C-[, C-], C-\, Bspc, C-Bspc, C-^, C-_, C-@
8527             # ~ and ^ have only 3 spacing variants; one of them must be on ' '
8528 0           return $self->extract_diacritic($dia, $spaces + $off, 'spacing variant', $limits->[0], 'skip2', @$v[0..3]);
8529             } elsif (!$spaces and $c =~ /^\p{Blank}$/ and not $prefix) { # NBSP and, (eg) Thin space 2007 -> second/third modifier
8530             # ~ and ^ have only 3 spacing variants; one of them must be on ' '
8531 0           my @pre = grep /^\p{Blank}$/, keys %$seen_before; # no prefix keys in $seen_before
8532 0 0         push @pre, 'something' unless $seen_before->{' '}; # there is no sense to address slot number 0
8533 0           return $self->extract_diacritic($dia, scalar @pre, 'spacing variant', $limits->[0], 'skip2', @$v[0..3]);
8534             }
8535 0 0 0       if ($c eq "|" or $c eq "\\" and not $prefix) {
      0        
8536             #warn "Translating2vertical dia=<$dia>: got <$c> --> <$v->[4][0]>"; # Skip2 would hurt, since macron+\ is defined:
8537 0           return $self->extract_diacritic($dia, ($c eq "|"), 'vertical+etc spacing variant', $limits->[2], !'skip2', @$v[2..3]);
8538             }
8539 0 0 0       if ($N == 1 and $c_noalt and ($c_noalt eq "|" or $c_noalt eq "\\")) {
      0        
      0        
8540             #warn "Translating2vertical dia=<$dia>: got <$c> --> <$v->[4][0]>"; # Skip2 would hurt, since macron+\ is defined:
8541 0           return $self->extract_diacritic($dia, ($c_noalt eq "|"), 'vertical+dotlike combining', $limits->[6], 'skip2', @$v[6,7,4,5]);
8542             }
8543 0 0 0       if ($c eq "/" or $c eq "?" and not $prefix) {
      0        
8544 0           return $self->extract_diacritic($dia, ($c eq "?"), 'prime-like+etc spacing variant', $limits->[3], 'skip2', @$v[3]);
8545             }
8546 0 0 0       if ($c_noalt and ($c_noalt eq "'" or $c_noalt eq '"')) {
      0        
8547 0           return $self->extract_diacritic($dia, 1 + ($c_noalt eq '"') + 2*$N, 'combining', $limits->[4], 'skip2', @$v[4..7]); # 1 for double-prefix
8548             }
8549 0 0 0       if ($c eq "_" or $c eq "-" and not $prefix) {
      0        
8550 0           return $self->extract_diacritic($dia, ($c eq "_"), 'lowered+etc spacing variant', $limits->[1], 'skip2', @$v[1..3]);
8551             }
8552 0 0 0       if ($N == 1 and $c_noalt and ($c_noalt eq "_" or $c_noalt eq "-")) {
      0        
      0        
8553 0           return $self->extract_diacritic($dia, ($c_noalt eq "_"), 'lowered combining', $limits->[5], 'skip2', @$v[5..7,4]);
8554             }
8555 0 0 0       if ($N == 1 and $c_noalt and ($c_noalt eq ";" or $c_noalt eq ":")) {
      0        
      0        
8556 0           return $self->extract_diacritic($dia, ($c_noalt eq ":"), 'combining for symbols', $limits->[7], 'skip2', @$v[7,4..6]);
8557             }
8558 0 0 0       if ($N == 1 and defined $c_base and 0 <= (my $ind = index "`1234567890=[],.'", $c_base)) {
      0        
8559 0           return $self->extract_diacritic($dia, 2 + $ind, 'combining', $limits->[4], 'skip2-include0', @$v[4..7]); # -1 for `, 1+2 for double-prefix and AltGr-/?
8560             }
8561 0 0 0       if ($N == 0 and 0 <= (my $ind = index "[{]}", $c) and not $prefix) {
      0        
8562 0           return $self->extract_diacritic($dia, 2 + $ind, 'combining for symbols', $limits->[7], 'skip2-include0', @$v[7,4..6]);
8563             }
8564 0 0 0       if ($N == 1 and $c_noalt and ($c_noalt eq "/" or $c_noalt eq "?")) {
      0        
      0        
8565 0           return $self->extract_diacritic($dia, 6 + ($c_noalt eq "?"), 'combining for symbols', $limits->[7], 'skip2-include0', @$v[7,4..6]);
8566             }
8567 0           return undef;
8568             }
8569            
8570             sub diacritic2self_2 ($$$$$$) { # Takes a key: array of arrays [lc,uc]
8571 0     0 0   my ($self, $dia, $c, $face, $space, @out, %seen) = (shift, shift, shift, shift, shift);
8572 0           my $c0 = $c->[0][0]; # Base character
8573 0           for my $N (0..$#$c) {
8574 0           my($c1, @res) = $c->[$N];
8575 0           for my $shift (0..$#$c1) {
8576 0           my($c2, $pref) = $c1->[$shift];
8577 0           push @res, $self->diacritic2self($dia, $c2, $face, $N, $space, $c0, $c->[0][$shift], \%seen);
8578 0 0         $pref = $c2->[2], $c2 = $c2->[0] if ref $c2;
8579 0 0 0       $seen{$c2}++ if defined $c2 and not $pref;
8580             }
8581 0           push @out, \@res;
8582             }
8583             @out
8584 0           }
8585            
8586             # Combining stuff:
8587             # perl -C31 -MUnicode::UCD=charinfo -le 'sub n($) {(charinfo(ord shift) || {})->{name}} for (0x20..0x10ffff) {next unless (my $c = chr) =~ /\p{NonspacingMark}/; (my $n = n($c)) =~ /^COMBINING\b/ or next; printf qq(%04x\t%s\t%s\n), $_, $c, $n}' >cc
8588             # perl -C31 -MUnicode::UCD=charinfo -le 'sub n($) {(charinfo(ord shift) || {})->{name}} for (0x20..0x10ffff) {next unless (my $c = chr) =~ /\p{NonspacingMark}/; (my $n = n($c)) =~ /^COMBINING\b/ and next; printf qq(%04x\t%s\t%s\n), $_, $c, $n}' >cc
8589            
8590             sub cache_dialist ($@) { # downstream, it is crucial that a case pair comes from "one conversion"
8591 0     0 0   my ($self, %seen, %caseseen, @out) = (shift);
8592 0           warn("caching dia: [@_]") if warnCACHECOMP;
8593 0           for my $d (@_) {
8594 0 0         next unless my $h = $self->{Compositions}{$d};
8595 0           $seen{$_}++ for keys %$h;
8596             }
8597 0           for my $c (keys %seen) {
8598 0 0         next if $caseseen{$c};
8599             # uc may include a wrong guy: uc(ſ) is S, and this may break the pair s/S if ſ comes before s, and S gets a separate binding;
8600             # so be very conservative with which case pair we include...
8601 0 0 0       my @case = grep { $_ ne $c and $seen{$_} and lc $_ eq lc $c } lc $c, uc $c or next;
  0 0          
8602 0           push @case, $c;
8603 0           $caseseen{$_} = \@case, delete $seen{$_} for @case;
8604             } # Currently (?), downstream does not distinguish case pairs from Shift-pairs...
8605 0           for my $cases ( values %caseseen, map [$_], keys %seen ) { # To avoid pairing symbols, keep them in separate slots too
8606 0           my (@dia, $to);
8607 0           for my $dia (@_) {
8608 0 0         push @dia, $dia if grep $self->{Compositions}{$dia}{$_}, @$cases;
8609             }
8610 0           for my $diaN (0..$#dia) {
8611             $to = $self->{Compositions}{$dia[$diaN]}{$_} and
8612             (warnCACHECOMP and warn("cache dia; c=`$_' of `@$cases'; dia=[$dia[$diaN]]")),
8613 0   0       $out[$diaN]{$_} = $to for @$cases;
8614             }
8615             }
8616             #warn("caching dia --> ", scalar @out);
8617             @out
8618 0           }
8619            
8620             my %cached_aggregate_Compositions;
8621             sub dia2list ($$) {
8622 0     0 0   my ($self, $dia, @dia) = (shift, shift);
8623             #warn "Split dia `$dia'";
8624 0 0         if ((my ($pre, $mid, $post) = split /(\+|--)/, $dia, 2) > 1) { # $mid is not counted in that "2"
8625 0           for my $p ($self->dia2list($pre)) {
8626 0           push @dia, map "$p$mid$_", $self->dia2list($post);
8627             }
8628             # warn "Split dia to `@dia'";
8629 0           return @dia;
8630             }
8631 0 0         return $dia if $dia =~ /^!?\\/; # (De)Penalization lists
8632 0           $dia = $self->charhex2key($dia);
8633 0 0         unless ($dia =~ /^-?(\p{NonspacingMark}|<(?:font=)?[-\w!]+>|[ul]c(first)?|dectrl)$/) {
8634 0 0         die "` $dia ' not a known diacritic" unless my $name = $self->{'[map2diac]'}{$dia};
8635 0 0         my $v = $self->{'[diacritics]'}{$name} or die "A spacing character <$dia> was requested to be treated as a composition one, but we do not know translation";
8636 0 0         die "Panic!" unless defined ($dia = $v->[4][0]);
8637             }
8638 0 0         if ($dia =~ /^(-)?<(reverse-)?any(1)?-(other-)?\b([-\w]+?)\b((?:-![-\w]+\b)*)>$/) {
8639 0   0       my($neg, $rev, $one, $other, $match, $rx, $except, @except)
8640             = ($1||'', $2, $3, $4, $5, "(?:(?
8641 0           my $cached;
8642 0           (my $dia_raw = $dia) =~ s/^-//;
8643 0 0         $cached = $cached_aggregate_Compositions{$dia_raw} and return map "$neg$_", @$cached;
8644            
8645 0           @except = map { s/^(?=\w)/\\b/; s/(?<=\w)$/\\b/; $_} @except;
  0            
  0            
  0            
8646 0 0         $except = join('|', @except[1..$#except]), $except = qr($except) if @except;
8647             #warn "Exceptions: $except" if @except;
8648 0           $rx =~ s/-/\\b\\W+\\b/g;
8649 0           my ($A, $B, $AA, $BB);
8650 0           my @out = keys %{$self->{Compositions}};
  0            
8651 0           @out = grep !/^Cached\d+=
8652 0 0 0       @out = grep {length > 1 ? /$rx/ : (lc $self->UName($_) || '') =~ /$rx/ } @out;
  0            
8653 0 0 0       @out = grep {length > 1 ? !/$except/ : (lc $self->UName($_) || '') !~ /$except/ } @out;
  0            
8654             # make before ; penalize those with and/over inside
8655 0           @out = sort {($A=$a) =~ s/>/\cA/g, ($B=$b) =~ s/>/\cA/g; ($AA=$a) =~ s/\w+\W*/a/g, ($BB=$b) =~ s/\w+\W*/a/g; # Number of words
  0            
  0            
8656 0 0 0       /.\b(and|over)\b./ and s/^/~/ for $A,$B; $AA cmp $BB or $A cmp $B or $a cmp $b} @out;
  0   0        
8657 0 0         @out = grep length($match) != length, @out if $other;
8658 0 0         @out = grep !/\bAND\s/, @out if $one;
8659 0 0         @out = reverse @out if $rev; # xor $reverse;
8660 0 0 0       if (!dontCOMPOSE_CACHE and @out > 1 and not $neg) { # Optional caching; will modify composition tables
8661 0           my @cached = $self->cache_dialist(@out); # but not decomposition ones, hence `not $neg'
8662 0           @out = map "Cached$_=$dia_raw", 0..$#cached;
8663 0           $self->{Compositions}{$out[$_]} = $cached[$_] for 0..$#cached;
8664 0           $cached_aggregate_Compositions{$dia} = \@out;
8665             }
8666 0 0         @out = map "-$_", @out if $neg;
8667 0           return @out;
8668             } else { # etc
8669             #warn "Dia=`$dia'";
8670 0           return $dia;
8671             }
8672             }
8673            
8674             sub flatten_arrays ($$) {
8675 0     0 0   my ($self, $a) = (shift, shift);
8676 0 0         warn "method flatten_arrays() takes one argument" if @_;
8677 0 0 0       return $a unless ref($a || '') eq 'ARRAY';
8678 0           map $self->flatten_arrays($_), @$a;
8679             }
8680            
8681             sub array2string ($$) {
8682 0     0 0   my ($self, $a) = (shift, shift);
8683 0 0         warn "method array2string() takes one argument" if @_;
8684 0 0         return '(undef)' unless defined $a;
8685 0 0 0       return "<$a>" unless ref($a || '') eq 'ARRAY';
8686 0           '[ ' . join(', ', map $self->array2string($_), @$a) . ' ]';
8687             }
8688            
8689             sub dialist2lists ($$) {
8690 0     0 0   my ($self, $Dia, @groups) = (shift, shift);
8691 0           for my $group (split /\|/, $Dia, -1) {
8692 0           my @dia;
8693 0           for my $dia (split /,/, $group) {
8694 0           push @dia, $self->dia2list($dia);
8695             }
8696 0           push @groups, \@dia; # Do not omit empty groups
8697             } # Now get all the chars, and precompile results for them
8698             @groups
8699 0           }
8700            
8701             sub document_char ($$$;$) {
8702 0     0 0   my ($self, $c, $doc, $old) = (shift, shift, shift, shift);
8703 0 0 0       return $c if not defined $c or not defined $doc;
8704 0 0 0       $doc = "$old->[3] ⇒ $doc" if $old and ref $old and defined $old->[3];
      0        
8705 0 0         $c = [$c] unless ref $c;
8706 0 0         $c->[3] = $doc if defined $doc;
8707 0           $c
8708             }
8709            
8710             sub document_chars_on_key ($$$;$) { # Usable with all_layers
8711 0     0 0   my ($self, $c, $doc, $old, @o) = (shift, shift, shift, shift);
8712 0           for my $layer (@$c) {
8713 0           push @o, [ map {$self->document_char($_, $doc, $old)} @$layer ];
  0            
8714             }
8715             @o
8716 0           }
8717            
8718             #use Dumpvalue;
8719             my %translators = ( Id => sub ($) {shift}, Empty => sub ($) { return undef },
8720             dectrl => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c;
8721             return undef if 0x20 <= ord $c; chr(0x40 + ord $c)},
8722             maybe_ucfirst => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c; ucfirst $c},
8723             maybe_lc => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c; lc $c},
8724             maybe_uc => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c; uc $c},
8725             ucfirst => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c;
8726             my $c1 = ucfirst $c; return undef if $c1 eq $c; $c1},
8727             lc => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c;
8728             my $c1 = lc $c; return undef if $c1 eq $c; $c1},
8729             uc => sub ($) {defined (my $c = shift) or return undef; $c = $c->[0] if 'ARRAY' eq ref $c;
8730             my $c1 = uc $c; return undef if $c1 eq $c; $c1} );
8731             sub make_translator ($$$$$) { # translator may take some values from "environment"
8732             # (such as which deadkey is processed), so caching is tricky: if does -> $used_deadkey reflects this
8733             # The translator should return exactly one value (possibly undef) so that map TRANSLATOR, list works intuitively.
8734 0   0 0 0   my ($self, $name, $deadkey, $face, $N, $used_deadkey) = (shift, shift, shift || 0, shift, shift, ''); # $deadkey used eg for diagnostics
8735 0 0         die "Undefined recipe in a translator for face `$face', layer $N on deadkey `$deadkey'" unless defined $name;
8736 0 0         if ($name =~ /^Imported\[([\/\w]+)(?:,([\da-fA-F]{4,}))?\]$/) {
8737 0 0         my($d, @sec) = (($2 ? "$2" : undef), split m(/), "$1");
8738 0 0         $d = $deadkey, $used_deadkey ="/$deadkey" unless defined $d;
8739 0 0         my $fromKBDD = $self->get_deep($self, 'DEADKEYS', @sec, lc $d, 'map') # DEADKEYS/bepo with 00A4 ---> DEADKEYS/bepo/00a4
8740             or die "DEADKEYS section for `$d' with parts `@sec' not found";
8741             # indexed by lc hex
8742 0 0   0     return sub { my $cc=my $c=shift; return $c unless defined $c; $c = $c->[0] if 'ARRAY' eq ref $c; defined($c = $fromKBDD->{$self->key2hex($c)}) or return $c; $self->document_char(chr hex $c, $name, $cc) }, '';
  0 0          
  0 0          
  0            
  0            
  0            
8743             }
8744 0 0         die "unrecognized Imported argument: `$1'" if $name =~ /^Imported(\[.*)/s;
8745 0 0         return $translators{$name}, '' if $translators{$name};
8746 0 0         if ($name =~ /^PrefixDocs\[(.+)\]$/) {
8747 0           $self->{faces}{$face}{'[prefixDocs]'}{$deadkey} = $1;
8748 0           return $translators{Empty}, '';
8749             }
8750 0 0         if ($name =~ /^Show\[(.+)\]$/) {
8751 0           $self->{faces}{$face}{'[Show]'}{$deadkey} = $self->stringHEX2string($1);
8752 0           return $translators{Empty}, '';
8753             }
8754 0 0         if ($name =~ /^HTML_classes\[(.+)\]$/) {
8755 0 0         (my @c = split /,/, "$1") % 3 and die "HTML_classes[] for key `$deadkey' not come in triples";
8756 0   0       my $C = ( $self->{faces}{$face}{'[HTML_classes]'}{$deadkey || ''} ||= {} ); # Above, deadkey is ||= 0
      0        
8757             # warn "I create HTML_classes for face=$face, prefix=`$deadkey'";
8758 0           while (@c) {
8759 0           my ($where, $class, $chars) = splice @c, 0, 3;
8760 0           ( $chars = $self->stringHEX2string($chars) ) =~ s/\p{Blank}(?=\p{NonspacingMark})//g;
8761 0           push @{ $C->{$where}{$_} }, $class for split //, $chars;
  0            
8762             }
8763 0           return $translators{Empty}, '';
8764             }
8765 0 0         if ($name =~ /^Space(Self)?2Id(?:\[(.+)\])?$/) {
8766 0 0         my $dia = $self->charhex2key((defined $2) ? $2 : do {$used_deadkey = "/$deadkey"; $deadkey}); # XXXX `do' is needed, comma does not work
  0            
  0            
8767 0 0         my $self_OK = $1 ? $dia : 'n/a';
8768 0 0 0 0     return sub ($) { my $c = (shift() || '[none]'); $c = $c->[0] if 'ARRAY' eq ref $c; # Prefix key as usual letter
  0            
8769 0 0 0       ($c eq ' ' or $c eq $self_OK and defined $dia) ? $self->document_char($dia, $name) : undef }, $used_deadkey;
  0            
8770             }
8771 0 0         if ($name =~ /^ShiftFromTo\[(.+)\]$/) {
8772 0           my ($f,$t) = split /,/, "$1";
8773 0           $_ = hex $self->key2hex($self->charhex2key($_)) for $f, $t;
8774 0           $t -= $f; # Treat prefix keys as usual keys:
8775 0 0   0     return sub ($) { my $cc=my $c=shift; return $c unless defined $c; $c = $c->[0] if 'ARRAY' eq ref $c; $self->document_char(chr($t + ord $c), $name, $cc) }, '';
  0 0          
  0            
  0            
  0            
8776             }
8777 0 0         if ($name =~ /^SelectRX\[(.+)\]$/) {
8778 0           my ($rx) = qr/$1/; # Treat prefix keys as usual keys:
8779 0 0   0     return sub ($) { my $cc = my $c=shift; defined $c or return $c; $c = $c->[0] if 'ARRAY' eq ref $c; return undef unless $c =~ $rx; $cc }, '';
  0 0          
  0 0          
  0            
  0            
  0            
8780             }
8781 0 0         if ($name =~ /^FlipShift$/) {
8782 0 0   0     return sub ($) { my $c = shift; defined $c or return $c; map [@$_[1,0]], @$c }, '', 'all_layers';
  0            
  0            
  0            
8783             }
8784 0 0         if ($name =~ /^AssignTo\[(\w+),(\d+)\]$/) {
8785 0           my ($sec, $cnt) = ($1, $2);
8786 0 0         $cnt = 0, warn "Unrecognized section `$sec' in AssignTo" unless my $S = $start_SEC{$sec};
8787 0 0         warn("Too many keys ($cnt) put into section `$sec', max=$S->[1]"), $cnt = $S->[1] if $cnt > $S->[1];
8788 0 0   0     my $toTarget = sub { my $slot = shift; return unless $slot < $cnt; $slot + $S->[0] };
  0            
  0            
  0            
8789 0     0     return sub ($) { @{shift()} }, '', ['all_layers', $toTarget];
  0            
  0            
8790             }
8791 0 0         if ($name =~ /^FromTo(FlipShift)?\[(.+)\]$/) {
8792 0           my $flip = $1;
8793 0           my ($f,$t) = split /,/, "$2", 2;
8794             exists $self->{layers}{$_} or $_ = ($self->make_translated_layers($_, $face, [$N], $deadkey))->[0]
8795 0   0       for $f, $t; # Be conservative for caching...
8796 0           my $B = "~~~{$f>>>$t}";
8797 0           $_ = $self->{layers}{$_} for $f, $t;
8798 0           my (%h, $kk);
8799 0           for my $k (0..$#$f) {
8800 0 0 0       my @fr = map {($_ and ref) ? $_->[0] : $_} @{$f->[$k]};
  0            
  0            
8801 0 0 0       my @to = map {($_ and ref) ? $_->[0] : $_} @{$t->[$k]};
  0            
  0            
8802 0 0         if ($flip) {
8803 0 0         $h{defined($kk = $fr[$_]) ? $kk : ''} = $to[1-$_] for 0,1;
8804             } else {
8805 0 0         $h{defined($kk = $fr[$_]) ? $kk : ''} = $to[$_] for 0,1;
8806             }#
8807             } # Treat prefix keys as usual keys:
8808 0 0   0     return sub ($) { my $cc = my $c = shift; defined $c or return $c; $c = $c->[0] if 'ARRAY' eq ref $c; $self->document_char($h{$c}, $name, $cc) }, $B;
  0 0          
  0            
  0            
  0            
8809             }
8810 0 0         if ($name =~ /^InheritPrefixKeys\[(.+)\]$/) {
8811 0           my $base = $1;
8812             exists $self->{layers}{$_} or $_= ($self->make_translated_layers($_, $face, [$N], $deadkey))->[0]
8813 0   0       for $base;
8814 0           my $baseL = $self->{layers}{$base};
8815 0           my (%h);
8816 0           for my $k (0..$#$baseL) {
8817 0           for my $shift (0..1) {
8818 0 0         my $C = $baseL->[$k][$shift] or next;
8819 0 0 0       next unless ref $C and $C->[2]; # prefix
8820 0           $h{"$N $k $shift $C->[0]"} = $C;
8821             }
8822             } # Treat prefix keys as usual keys:
8823 0 0 0 0     return sub ($) { my $c = shift; defined $c or return $c; return $c if 'ARRAY' eq ref $c and $c->[2]; $h{"@_ $c"} or $c }, $base;
  0 0          
  0 0          
  0            
  0            
8824             }
8825 0 0         if ($name =~ /^ByColumns\[(.+)\]$/) {
8826 0 0         my @chars = map {length() ? $self->charhex2key($_) : undef} split /,/, "$1";
  0            
8827 0 0         my $g = $self->{faces}{$face}{'[geometry]'}
8828             or die "Face `$face' has no associated layer with geometry info; did you set geometry_via_layer?";
8829 0   0       my $o = ($self->{faces}{$face}{'[g_offsets]'} or [(0) x @$g]);
8830 0           $o = [@$o]; # deep copy
8831 0           my ($tot, %c) = 0;
8832             # warn "geometry: [@$g] [@$o]";
8833 0           for my $r (@$g) {
8834 0           my $off = shift @$o;
8835 0           $c{$tot + $_} = $_ + $off for 0..($r-1);
8836 0           $tot += $r;
8837             }
8838 0 0 0 0     return sub ($$$$) { (undef, my ($L, $k, $shift)) = @_; return undef if $L or $shift or $k >= $tot; $self->document_char($chars[$c{$k}], "ByColumn[$c{$k}]") }, '';
  0   0        
  0            
  0            
8839             }
8840 0 0         if ($name =~ /^ByRows\[(.+)\]$/) {
8841 0           s(^\s+(?!\s|///\s+))(), s((?
8842 0           my (@recipes, @subs) = split m(\s+///\s+), $recipes;
8843 0           my $LL = $#{ $self->{faces}{$face}{layers} }; # Since all_layers, we are called only for layer 0; subrecipes may need more
  0            
8844 0           for my $rec (@recipes) {
8845 0 0   0     push(@subs, sub {return undef}), next unless length $rec;
  0            
8846             #warn "recipe=`$rec'; face=`$face'; N=$N; deadkey=`$deadkey'; last_layer=$LL";
8847 0           my ($tr) = $self->make_translator_for_layers( $rec, $deadkey, $face, [0..$LL] );
8848             #warn " done";
8849 0           push @subs, $tr;
8850             }
8851 0 0         my $g = $self->{faces}{$face}{'[geometry]'}
8852             or die "Face `$face' has no associated layer with geometry info; did you set geometry_via_layer?";
8853 0           my ($tot, $row, %r) = (0, 0);
8854             # warn "geometry: [@$g] [@$o]";
8855 0           for my $r (@$g) {
8856 0           $r{$tot + $_} = $row for 0..($r-1);
8857 0           $tot += $r;
8858 0           $row++;
8859             }
8860             # return sub ($$$$) { (undef, undef, my $k) = @_; return undef if $k >= $tot; return undef if $#recipes < (my $r = $r{$k});
8861             # die "Undefined recipe: row=$row; face=`$face'; N=$N; deadkey=`$deadkey'; ARGV=(@_)" unless $subs[$r];
8862             # goto &{$subs[$r]} }, '';
8863 0 0 0 0     return sub ($$) { (undef, my $k) = @_; return [] if $k >= $tot or $#recipes < (my $r = $r{$k});
  0            
8864 0 0         die "Undefined recipe: row=$row; face=`$face'; N=$N; deadkey=`$deadkey'; ARGV=(@_)" unless $subs[$r];
8865 0           goto &{$subs[$r]} }, '', 'all_layers';
  0            
  0            
8866             }
8867 0 0         if ($name =~ /^(?:Diacritic|Mutate)(SpaceOK)?(Hack)?(2Self)?(DupsOK)?(32OK)?(?:\[(.+)\])?$/) {
8868 0           my ($spaceOK, $hack, $toSelf, $dupsOK, $w32OK) = ($1, $2, $3, $4, $5);
8869 0 0         my $Dia = ((defined $6) ? $6 : do {$used_deadkey ="/$deadkey"; $deadkey}); # XXXX `do' is needed, comma does not work
  0            
  0            
8870 0 0         if ($toSelf) {
8871 0 0         die "Mutate2Self does not make sense with SpaceOK/Hack/DupsOK/32OK" if grep $_, $hack, $spaceOK, $dupsOK, $w32OK;
8872 0           $Dia = $self->charhex2key($Dia);
8873 0 0         my(@sp, %sp) = map {(my $in = $_) =~ s/(?<=.)\@$//s; $in} @{ ($self->get_VK($face))->{SPACE} || [] };
  0            
  0            
  0            
8874 0           @sp = map $self->charhex2key($_), @sp;
8875 0           my $flip_AltGr = $self->{faces}{$face}{'[Flip_AltGr_Key]'};
8876 0 0         $flip_AltGr = $self->charhex2key($flip_AltGr) if defined $flip_AltGr;
8877 0 0         @sp = grep $flip_AltGr ne $_, @sp if defined $flip_AltGr; # It has a different function...
8878 0           @sp{@sp[1..$#sp]} = (0..$#sp); # The leading elt is the scancode
8879             # warn "SPACE on $Dia: <", join('> <', %sp), '>';
8880             return sub ($) {
8881 0     0     $self->document_chars_on_key([$self->diacritic2self_2($Dia, shift, $face, \%sp)], $name)
8882 0           }, $used_deadkey, 'all_layers';
8883             }
8884            
8885 0           my $isPrimary;
8886 0 0         $Dia =~ s/^\+// and $isPrimary++; # Wait until are expanded
8887            
8888 0           my $f = $self->get_NamesList;
8889 0 0         $self->load_compositions($f) if defined $f;
8890            
8891 0           $f = $self->get_AgeList;
8892 0 0 0       $self->load_uniage($f) if defined $f and not $self->{Age};
8893             # New processing: - = strip 1 from end; -3/ = strip 1 from the last 3
8894             #warn "Doing `$Dia'";
8895             #print "Doing `$Dia'\n";
8896             #warn "Age of <à> is <$self->{Age}{à}>";
8897 0           $Dia =~ s(){ (my $R = $1) =~ s/-/_/g;
  0            
8898 0 0         die "Named recipe `$1' unknown" unless exists $self->{faces}{$face}{"Named_DIA_Recipe__$R"};
8899             # (my $r = $self->{faces}{$face}{"Named_DIA_Recipe__$R"}) =~ s/^\s+//;
8900 0           $self->recipe2str($self->{faces}{$face}{"Named_DIA_Recipe__$R"}) }ge;
8901 0 0         $Dia =~ s/\|{3,4}/|/g if $isPrimary;
8902 0           my($skip, $limit, @groups, @groups2, @groups3) = (0);
8903 0           my($have4, @Dia) = (1, split /\|\|\|\|/, $Dia, -1);
8904 0 0         $have4 = 0, @Dia = split /\|\|\|/, $Dia, -1 if 1 == @Dia;
8905 0 0         if (1 < @Dia) {
8906 0 0         die "Too many |||- or ||||-sections in <$Dia>" if @Dia > 3;
8907 0           my @Dia2 = split /\|\|\|/, $Dia[1], -1;
8908 0 0         die "Too many |||-sections in the second ||||-section in <$Dia>" if @Dia2 > 2;
8909             # splice @Dia, 1, 1, @Dia2;
8910 0 0         @Dia2 = @Dia, shift @Dia2 unless $have4;
8911 0 0         $skip = (@Dia2 > 1 ? 1 + ($Dia2[0] =~ tr/|/|/) : 0);
8912 0 0 0       $Dia[1] .= "|$Dia[2]", pop @Dia if not $have4 and @Dia == 3;
8913             # $limit = 1 + ($Dia[-1] =~ tr/|/|/) + $skip;
8914 0           $limit = 0; # Not needed with the current logic...
8915 0           my @G = map [$self->dialist2lists($_)], @Dia; # will reverse when merging many into one cached...
8916 0           @groups = @{shift @G};
  0            
8917 0 0         @groups2 = @{shift @G} if @G;
  0            
8918 0 0         @groups3 = @{shift @G} if @G;
  0            
8919             } else {
8920 0           @groups = $self->dialist2lists($Dia);
8921             }
8922             #warn "Dia `$Dia' -> ", $self->array2string([$limit, $skip, @groups]);
8923 0           my $L = $self->{faces}{$face}{layers};
8924 0           my @L = map $self->{layers}{$_}, @$L;
8925 0   0       my $Sub = $self->{faces}{$face}{'[AltSubstitutions]'} || {};
8926             # warn "got AltSubstitutions: <",join('> <', %$Sub),'>' if $Sub;
8927             return sub {
8928 0     0     my $K = shift; # bindings of the key
8929 0 0         return ([]) x @$K unless grep defined, $self->flatten_arrays($K); # E.g, ByPairs and SelectRX produce many empty entries...
8930             #warn "Undefined base key for diacritic <$Dia>: <", join('> <', map {defined() ? $_ : '[undef]'} $self->flatten_arrays($K)), '>' unless defined $K->[0][0];
8931             #warn "Input for <$Dia>: <", join('> <', map {defined() ? $_ : '[undef]'} $self->flatten_arrays($K)), '>';
8932 0           my $base = $K->[0][0];
8933 0 0         $base = '' unless defined $base;
8934 0 0         $base = $base->[0] if ref $base;
8935 0 0 0       return ([]) x @$K if not $spaceOK and $base eq ' '; # Ignore possiblity that SPACE is a deadKey
8936 0           my $sorted = $self->sort_compositions(\@groups, $K, $Sub, $dupsOK, $w32OK);
8937 0           my ($sorted2, $sorted3, @idx_sorted3);
8938 0 0         $sorted2 = $self->sort_compositions(\@groups2, $K, $Sub, $dupsOK, $w32OK) if @groups2;
8939 0 0         $sorted3 = $self->sort_compositions(\@groups3, $K, $Sub, $dupsOK, $w32OK) if @groups3;
8940 0 0         @idx_sorted3 = @$sorted + (@groups2 ? @$sorted2 : 0) if @groups3; # used for warnings only
    0          
8941             $self->{faces}{$face}{'[in_dia_chains]'}{$_}++
8942 0 0 0       for grep defined, ($hack ? () : map {($_ and ref) ? $_->[0] : $_}
  0 0          
8943             # index as $res->[group][penalty_N][double_occ][layer][NN][shift]
8944 0 0         map {$_ ? @$_ : ()} map {$_ ? @$_ : ()} map {$_ ? @$_ : ()} map {$_ ? @$_ : ()} map {$_ ? @$_ : ()}
  0 0          
  0 0          
  0 0          
  0 0          
8945 0 0         @$sorted, @{$sorted2 || []}, @{$sorted3 || []});
  0 0          
8946             # map {($_ and ref) ? $_->[0] : $_} map @{$_||[]}, @out
8947 0           require Dumpvalue if printSORTEDLISTS;
8948 0           Dumpvalue->new()->dumpValue(["Key $base", $sorted]) if printSORTEDLISTS;
8949 0           warn $self->report_sorted_l($base, [@$sorted, @{$sorted2 || []}, @{$sorted3 || []}], [scalar @$sorted, $skip + scalar @{$sorted || []}, @idx_sorted3])
8950             if warnSORTEDLISTS;
8951 0           my $LLL = '';
8952 0 0         if ($sorted2) {
8953 0           my (@slots, @LL);
8954 0           for my $l (0..$#L) {
8955 0           push @slots, $self->shift_pop_compositions($sorted2, $l, !'from end', !'omit', $limit, $skip, my $ll = []);
8956 0           push @LL, $ll;
8957 0           print 'From Layers <', join('> <', map {defined() ? $_ : 'undef'} @$ll), ">\n" if printSORTEDLISTS;
8958 0           $LLL .= ' | ' . join(' ', map {defined() ? $_ : 'undef'} @$ll) if warnSORTEDLISTS;
8959             }
8960 0           print 'TMP Extracted ', $self->array2string($slots[0]), "\n" if printSORTEDLISTS;
8961 0           print 'TMP Extracted ', $self->array2string([@slots[1..$#slots]]), " deadKey=$deadkey\n" if printSORTEDLISTS;
8962 0   0       my $appended = $self->append_keys($sorted3 || $sorted2, \@slots, \@LL, !$sorted3 && 'prepend');
      0        
8963 0           Dumpvalue->new()->dumpValue(["Key $base; II", $sorted2]) if printSORTEDLISTS;
8964 0           if (warnSORTEDLISTS) {
8965             $LLL =~ s/^[ |]+//;
8966             $_++ for @idx_sorted3; # empty or 1 elt
8967             warn "TMP Extracted: ", $self->array2string(\@slots), " from layers $LLL\n"; # 1 is for what is prepended by append_keys()
8968             warn $self->report_sorted_l($base, [@$sorted, @$sorted2, @{$sorted3 || []}], # Where to put bold/dotted-bold separators:
8969             [scalar @$sorted, !!$appended + $skip + scalar @$sorted, @idx_sorted3], ($appended ? [1 + scalar @$sorted] : ()));
8970             }
8971             }
8972 0           my(@out, %seen);
8973 0           for my $Ln (0..$#L) {
8974 0           $out[$Ln] = $self->shift_pop_compositions($sorted, $Ln);
8975 0 0 0       $seen{$_}++ for grep defined, map {($_ and ref) ? $_->[0] : $_} @{$out[$Ln]};
  0            
  0            
8976             }
8977 0           for my $L (@out) { # $L is an array indexed by shift state
8978 0 0 0       $L = [map {(not $_ or ref $_) ? $_ : [$_,undef,undef,'Diacritic operator']} @$L];
  0            
8979             }
8980             # Insert non-yet-inserted characters from $sorted2, $sorted3
8981 0           for my $extra (['from end', $sorted2, 2], [0, $sorted3, 3]) {
8982 0 0         next unless $extra->[1];
8983 0           $self->deep_undef_by_hash(\%seen, $extra->[1]);
8984 0           for my $Ln (0..$#L) {
8985 0           my $o = $out[$Ln];
8986 0 0 0       unless (defined $o->[0] and defined $o->[1]) {
8987 0           my $o2 = $self->shift_pop_compositions($extra->[1], $Ln, $extra->[0], !'omit', !'limit', 0, undef, defined $o->[0], defined $o->[1]);
8988 0 0 0       $o2 = [map {(!defined $_ or ref) ? $_ : [$_,undef,undef,"Diacritic operator (choice $extra->[2])"]} @$o2];
  0            
8989 0   0       defined $o->[$_] or $o->[$_] = $o2->[$_] for 0,1;
8990 0 0 0       $seen{$_}++ for grep defined, map {($_ and ref) ? $_->[0] : $_} @$o;
  0            
8991             }
8992             }
8993             }
8994 0           print 'Extracted ', $self->array2string(\@out), " deadKey=$deadkey\n" if printSORTEDLISTS;
8995 0           warn 'Extracted ', $self->array2string(\@out), " deadKey=$deadkey\n" if warnSORTEDLISTS;
8996             $self->{faces}{$face}{'[from_dia_chains]'}{$_}++
8997 0 0 0       for grep defined, ($hack ? () : map {($_ and ref) ? $_->[0] : $_} map @{$_||[]}, @out);
  0 0          
  0 0          
8998             #warn "Age of <à> is <$self->{Age}{à}>";
8999             #warn "Output: <", join('> <', map {defined() ? $_ : '[undef]'} $self->flatten_arrays(\@out)), '>';
9000 0           return @out;
9001 0           }, $used_deadkey, 'all_layers';
9002             }
9003 0 0         if ($name =~ /^DefinedTo\[(.+)\]$/) {
9004 0           my $to = $self->charhex2key($1);
9005 0 0   0     return sub ($) { my $c = shift; defined $c or return $c; $self->document_char($to, 'DefinedTo', $c) }, '';
  0            
  0            
  0            
9006             }
9007 0 0         if ($name =~ /^ByPairs((Inv)?Prefix)?(Apple)?\[(.+)\]$/) {
9008 0           my ($prefix, $invert, $Apple, $in, @Pairs, %Map) = ($1, $2, $3, $4);
9009 0           $in =~ s/^\s+//;
9010 0           @Pairs = split /\s+(?!\p{NonspacingMark})/, $in;
9011 0           for my $p (@Pairs) {
9012 0           while (length $p) {
9013 0 0         die "Odd number of characters in a ByPairs map <$in>"
9014             unless $p =~ s/^((?:\p{Blank}\p{NonspacingMark}|(?:\b\.)?[0-9a-f]{4,}\b(?:\.\b)?|.){2})//i;
9015 0           (my $Pair = $1) =~ s/\p{Blank}//g;
9016             #warn "Pair = <$Pair>";
9017             # Cannot do it earlier, since HEX can introduce new blanks
9018 0           $Pair =~ s/(?<=[0-9a-f]{4})\.$//i; # Remove . which was on \b before extracting substring
9019 0           $Pair = $self->stringHEX2string($Pair);
9020             #warn " --> <$Pair>";
9021 0 0         die "Can't split ByPairs rule into a pair: I see <$Pair>" unless 2 == scalar (my @c = split //, $Pair);
9022             die qq("From" character <$c[0] duplicated in a ByPairs map <$in>)
9023 0 0         if exists $Map{$c[0]};
9024 0 0         $Map{$c[0]} = ($prefix ? [$c[1], undef, ($invert ? 3 : 1)<<3] : $c[1]); # massage_imported2 makes >> 3
    0          
9025             }
9026             }
9027 0 0         die "Empty ByPairs map <$in>" unless %Map; # Treat prefix keys as usual keys:
9028 0 0         if ($Apple) {
9029 0           $self->{faces}{$face}{'[AppleMap]'}[$N]{$_} = $Map{$_} for keys %Map;
9030 0           %Map = ();
9031             }
9032 0 0   0     return sub ($) { my $c = shift; defined $c or return $c; $c = $c->[0] if 'ARRAY' eq ref $c; $self->document_char($Map{$c}, 'explicit tuneup') }, '';
  0 0          
  0            
  0            
  0            
9033             }
9034 0           my $map = $self->get_deep($self, 'DEADKEYS', split m(/), $name);
9035 0 0         die "Can't resolve character map `$name'" unless defined $map;
9036 0 0         unless (exists $map->{map}) {{
9037 0           my($k1) = keys %$map;
  0            
9038 0 0 0       die "Character map `$name' does not contain HEX: `$k1'" if %$map and not $k1 =~ /^[0-9a-f]{4,}$/;
9039 0 0         die "Character map is a parent-type map, but no deadkey to use specified" unless defined $deadkey;
9040 0           my $Map = { map +(chr hex $_, $map->{$_}), keys %$map };
9041             die "Character map `$name' does not contain `$deadkey', contains <", (join '> <', keys %$map), ">"
9042 0 0         unless exists $Map->{chr hex $deadkey};
9043 0 0         $map = $Map->{chr hex $deadkey}, $used_deadkey = "/$deadkey" if %$Map;
9044 0 0         $map = {map => {}}, warn "Character map for `$name' empty" unless %$map;
9045             }}
9046 0 0         die "Can't resolve character map `$name' `map': <", (join '> <', %$map), ">" unless defined $map->{map};
9047 0           $map = $map->{map};
9048 0           my $Map = { map +(chr hex $_, chr hex($map->{$_})), keys %$map }; # hex form is not unique
9049             ( sub ($) { # Treat prefix keys as usual keys:
9050 0 0   0     my $c = shift; defined $c or return $c; $c = $c->[0] if 'ARRAY' eq ref $c; $self->document_char($Map->{$c}, "DEADKEYS=$name")
  0 0          
  0            
  0            
9051 0           }, $used_deadkey )
9052             }
9053            
9054             sub depth1_A_translator($$) { # takes a ref to an array of chars
9055 0     0 0   my ($self, $tr) = (shift, shift);
9056             return sub ($) {
9057 0     0     my $in = shift;
9058 0           [map $tr->($_), @$in]
9059             }
9060 0           }
9061            
9062             sub depth2_translator($$) { # takes a ref to an array of arrays of chars
9063 0     0 0   my ($self, $tr) = (shift, shift);
9064             return sub ($$) {
9065 0     0     my ($in, $k, @out) = (shift, shift);
9066 0           for my $L (0..$#$in) {
9067 0           my $Tr = $tr->[$L];
9068 0 0         die "Undefined translator for layer=$L; total=", scalar @$tr unless defined $Tr;
9069 0           push @out, [map $Tr->($in->[$L][$_], $L, $k, $_), 0..$#{$in->[$L]}]
  0            
9070             }
9071             @out
9072 0           }
9073 0           }
9074            
9075             sub make_translator_for_layers ($$$$$) { # translator may take some values from "environment"
9076             # (such as which deadkey is processed), so caching is tricky: if does -> $used_deadkey reflects this
9077             # The translator should return exactly one value (possibly undef) so that map TRANSLATOR, list works intuitively.
9078 0   0 0 0   my ($self, $name, $deadkey, $face, $NN) = (shift, shift, shift || 0, shift, shift); # $deadkey used eg for diagnostics
9079 0           my ($Tr, $used, $for_layers) = $self->make_translator( $name, $deadkey, $face, $NN->[0] );
9080 0 0         ($for_layers, my $cvt) = (ref $for_layers ? @$for_layers : $for_layers);
9081 0 0         return $Tr, [map "$used![$_]", @$NN], $cvt if $for_layers;
9082 0           my @Tr = map [$self->make_translator($name, $deadkey, $face, $_)], @$NN;
9083 0           $self->depth2_translator([map $_->[0], @Tr]), [map $_->[1], @Tr], $cvt;
9084             }
9085            
9086             sub make_translated_layers_tr ($$$$$$$) { # Apply translation map
9087 0     0 0   my ($self, $layers, $tr, $append, $deadkey, $face, $NN) = (shift, shift, shift, shift, shift, shift, shift);
9088 0           my ($Tr, $used, $cvt) = $self->make_translator_for_layers($tr, $deadkey, $face, $NN);
9089             #warn " tr=<$tr>, key=<$deadkey>, used=<$used>";
9090 0   0       my @new_names = map "$tr$used->[$_]($layers->[$_])$append" . ($append and $NN->[$_]), 0..$#$NN;
9091 0 0         return @new_names unless grep {not exists $self->{layers}{$_}} @new_names;
  0            
9092             # warn "Translating via `$tr' from layer [$layer]: <", join('> <', map "@$_", @{$self->{layers}{$layer}}), '>';
9093 0           my (@L, @LL) = map $self->{layers}{$_}, @$layers;
9094 0           for my $n (0..$#{$L[0]}) { # key number
  0            
9095 0           my @C = $Tr->( [ map $L[$_][$n], 0..$#L ], $n ); # rearrange one key into $X[$Layer][$shift]
9096 0 0         if ($cvt) {
9097 0   0       defined $cvt->($n) and $LL[$_][$cvt->($n)] = $C[$_] for 0..$#L;
9098             } else {
9099 0           push @{$LL[$_]}, $C[$_] for 0..$#L;
  0            
9100             }
9101             }
9102 0           $self->{layers}{$new_names[$_]} = $LL[$_] for 0..$#L;
9103             @new_names
9104 0           }
9105            
9106             sub key2string ($$) {
9107 0     0 0   my ($self, $key, @o) = (shift, shift);
9108 0 0         return '<>' unless defined $key;
9109 0 0         return '[]' unless grep defined, @$key;
9110 0           for my $k (@$key) {
9111 0 0         push(@o, 'undef'), next unless defined $k;
9112 0 0         push @o, ((ref $k) ? (defined $k->[0] ? $k->[0] : '') : $k);
    0          
9113             }
9114             "[@o]"
9115 0           }
9116            
9117             sub layer2string ($$) {
9118 0     0 0   my ($self, $layer, $last, $rest) = (shift, shift, -1, '');
9119 0           my @o = map $self->key2string($_), @$layer;
9120 0   0       2 < length $o[$_] and $last = $_ for 0..$#o;
9121 0 0         $rest = '...' if $last != $#o;
9122 0           (join ' ', @o[0..$last]) . $rest
9123             }
9124            
9125             sub make_translated_layers_stack ($$@) { # Stacking
9126 0     0 0   my ($self, @out, $ref) = (shift);
9127 0           my $c = @{$_[0]};
  0            
9128             @$_ == $c or die "Stacking: number of layers ", scalar(@$_), " != number of layers $c of the first elt"
9129 0   0       for @_;
9130 0           for my $lN (0..$c-1) { # layer Number
9131 0           my @layers = map $_->[$lN], @_;
9132 0           push @out, "@layers";
9133 0           if (debug_stacking) {
9134             warn "Stack in-layer $lN `$_': ", $self->layer2string($self->{layers}{$_}), "\n" for @layers;
9135             }
9136 0 0         next if exists $self->{layers}{"@layers"};
9137 0           my (@L, @keys) = map $self->{layers}{$_}, @layers;
9138 0           for my $lI (0..$#L) {
9139 0           my $l = $L[$lI];
9140             # warn "... Layer$lN: `$layers[$lI]'..." if debug_stacking;
9141 0           for my $k (0..$#$l) {
9142 0           for my $kk (0..$#{$l->[$k]}) {
  0            
9143 0           if (debug_STACKING and defined( my $cc = $l->[$k][$kk] )) {
9144             $cc = $cc->[0] if ref $cc;
9145             warn "...... On $k/$kk (${lI}th lN=$lN): I see `$cc': ", !defined $keys[$k][$kk], "\n" ;
9146             }
9147 0 0 0       $keys[$k][$kk] = $l->[$k][$kk] if defined $l->[$k][$kk] and not defined $keys[$k][$kk]; # Shallow copy
9148             }
9149 0   0       $keys[$k] ||= [];
9150             }
9151             }
9152 0           $self->{layers}{"@layers"} = \@keys;
9153 0           warn "Stack out-layer $lN `@layers':\n\t", $self->layer2string(\@keys), "\n" if debug_stacking;
9154             }
9155 0           warn 'Stack out-layers:', (join "\n\t", '', @out), "\n" if debug_stacking;
9156 0           @out;
9157             }
9158            
9159             sub make_translated_layers_noid ($$$@) { # Stacking
9160 0     0 0   my ($self, $whole, $refr, @out, $ref, @seen) = (shift, shift, shift);
9161 0           my $c = @$refr;
9162             #warn "noid: join ", scalar @_, " faces of $c layers; ref=[@$refr] first=[@{$_[0]}]";
9163             @$_ == $c or die "Stacking: number of layers ", scalar(@$_), " != number of layers $c of the reference face"
9164 0   0       for @_;
9165 0           my @R = map $self->{layers}{$_}, @$refr;
9166 0 0         if ($whole) {
9167 0           my $last = $#{$R[0]};
  0            
9168 0           for my $key (0..$last) {
9169 0           for my $l (@R) {
9170 0 0         $seen[$key]{$_}++ for map {ref() ? $_->[0] : $_} grep defined, @{ $l->[$key] };
  0            
  0            
9171             #warn "$key of $last: keys=", join(',',keys %{$seen[$key]});
9172             }
9173             }
9174             }
9175 0           my $name = 'NOID([' . join('], [', map {join ' +++ ', @$_} @_) . '])';
  0            
9176 0           for my $l (0..$c-1) {
9177 0           my (@layers) = map $_->[$l], @_;
9178 0 0         if ($whole) {
9179 0           $name .= "'" # Keep names of layers distinct, but since they are all interdependent, do not construct basing on layer names
9180             } else {
9181 0           $name = "NOID[$refr->[$l]](" . (join ' +++ ', @layers) . ')'
9182             }
9183 0           push @out, $name;
9184             #warn ". Doing layer number $l, name=`$name'...";
9185 0 0         next if exists $self->{layers}{$name};
9186 0           my ($Refr, @L, @keys) = map $self->{layers}{$_}, $refr->[$l], @layers;
9187 0           for my $ll (@L) {
9188             #warn "... Another layer for $l...";
9189 0           for my $k (0..$#$ll) {
9190 0           for my $kk (0..$#{$ll->[$k]}) {
  0            
9191             #warn "...... On $k/$kk: I see `$ll->[$k][$kk]'; seen=`$seen[$k]{$ll->[$k][$kk]}'; keys=", join(',',keys %{$seen[$k]}) if defined $ll->[$k][$kk];
9192 0           my $ch = $ll->[$k][$kk];
9193 0           my $rch = $R[$l][$k][$kk];
9194 0 0 0       $ch = $ch->[0] if $ch and ref $ch;
9195 0 0 0       $rch = $rch->[0] if $rch and ref $rch;
9196             $keys[$k][$kk] = $ll->[$k][$kk] # Deep copy
9197             if defined $ch and not defined $keys[$k][$kk]
9198 0 0 0       and ($whole ? !$seen[$k]{$ch} : $ch ne ( defined $rch ? $rch : '' ));
    0 0        
    0          
9199             }
9200 0   0       $keys[$k] ||= [];
9201             }
9202             }
9203 0           $self->{layers}{$name} = \@keys;
9204             }
9205 0           warn "NOID --> <@out>\n" if debug_noid;
9206 0           @out;
9207             }
9208            
9209             sub paren_match_q ($$) {
9210 0     0 0   my ($self, $s) = (shift, shift);
9211 0           ($s =~ tr/(/(/) == ($s =~ tr/)/)/)
9212             }
9213            
9214             sub brackets_match_q ($$) {
9215 0     0 0   my ($self, $s) = (shift, shift);
9216 0           ($s =~ tr/[/[/) == ($s =~ tr/]/]/)
9217             }
9218            
9219             sub join_min_paren_brackets_matched ($$@) {
9220 0     0 0   my ($self, $join, @out) = (shift, shift, shift);
9221             #warn 'joining <', join('> <', @out, @_),'>';
9222 0           while (@_) {
9223 0   0       while (@_ and not ($self->paren_match_q($out[-1]) and $self->brackets_match_q($out[-1]))) {
      0        
9224 0           $out[-1] .= $join . shift;
9225             }
9226 0 0         push @out, shift if @_;
9227             }
9228             @out
9229 0           }
9230            
9231             sub face_by_face_recipe ($$$) {
9232 0     0 0   my($self, $f, $base) = (shift, shift, shift);
9233 0 0         return if $self->{faces}{$f}{layers};
9234 0 0         return unless $self->{face_recipes}{$f};
9235             die "Can't determine number of layers in face `$f': face_recipe exists, but not numLayers"
9236 0 0         unless defined (my $n = $self->{faces}{$base}{numLayers});
9237 0           warn "Massaging face `$f': use face_recipes...\n" if debug_face_layout_recipes;
9238 0           $self->{faces}{$f}{layers} = [('Empty') x $n]; # Preliminary (so know the length???)
9239 0           $self->{faces}{$f}{layers} = $self->layers_by_face_recipe($f, $base);
9240             }
9241            
9242             sub layers_by_face_recipe ($$$) {
9243 0     0 0   my ($self, $face, $base) = (shift, shift, shift);
9244 0 0         die "No face recipe for `$face' found" unless my $r = $self->{face_recipes}{$face};
9245 0           $r = $self->recipe2str($r);
9246             #print "face recipe `$face'\n";
9247 0           my $LL = $self->{faces}{$base}{layers};
9248 0           warn "Using face_recipes for `$face', base=$base ==> `$r'\n" if debug_face_layout_recipes;
9249 0           my $L = $self->{faces}{$face}{layers} = $self->make_translated_layers($r, $base, [0..$#$LL]);
9250             #print "face recipe `$face' -> ", $self->array2string($L), "\n";
9251             # warn "Using face_recipes `$face' -> ", $self->array2string($L) if debug_face_layout_recipes;
9252 0           warn "Massaged face `$face' ->", (join "\n\t", '', @$L), "\n" if debug_face_layout_recipes;
9253             #warn "face recipe `$face' --> ", $self->array2string([map $self->{layers}{$_}, @$L]);
9254 0           $L;
9255             }
9256            
9257             sub export_layers ($$$;$) {
9258 0     0 0   my ($self, $face, $base, $full) = (shift, shift, shift, shift);
9259             # warn "Doing FullFace on <$face>, base=<$base>\n" if $full;
9260             ($full ? undef : $self->{faces}{$face}{'[ini_layers_prefix]'} || $self->{faces}{$face}{'[ini_layers]'}) ||
9261             $self->{faces}{$face}{layers}
9262 0 0 0       || $self->layers_by_face_recipe($face, $base)
    0 0        
9263             }
9264            
9265             sub pseudo_layer ($$$$) {
9266 0     0 0   my ($self, $recipe, $face, $N) = (shift, shift, shift, shift);
9267 0           my $ll = my $l = $self->pseudo_layer0($recipe, $face, $N);
9268             # warn "Pseudo-layer recipe `$recipe', face=`$face', N=$N ->\n\t$l\n" if $recipe =~ /Greek__/;
9269             #warn("layer recipe: `$l'"),
9270 0 0         ($l = $self->layer_recipe($l)) =~ s/^\s+// if exists $self->{layer_recipes}{$ll};
9271 0           warn "pseudo_layer(`$recipe'): Using layout_recipe `$l' for layer '$ll'\n" if debug_face_layout_recipes and exists $self->{layer_recipes}{$ll};
9272 0 0         return $l if $self->{layers}{$l};
9273 0           ($self->make_translated_layers($l, $face, [$N]))->[0]
9274             # die "Component `$l' of a pseudo-layer cannot be resolved"
9275             }
9276            
9277             sub pseudo_layer0 ($$$$) {
9278 0     0 0   my ($self, $recipe, $face, $N) = (shift, shift, shift, shift);
9279 0 0         if ($recipe eq 'LinkFace') {
9280 0 0         my $L = $self->{faces}{$face}{LinkFace} or die "Face `$face' has no LinkFace";
9281 0           return ($self->export_layers($L, $face))->[$N];
9282             }
9283 0 0         return ($self->export_layers($face, $face))->[$N] if $recipe eq 'Self';
9284 0 0         if ($recipe =~ /^Layers\((.*\+.*)\)$/) {
9285 0           my @L = split /\+/, "$1";
9286 0           return $L[$N];
9287             }
9288 0           my $N1 = $self->flip_layer_N($N, $#{ $self->{faces}{$face}{layers} });
  0            
9289 0 0         if ($recipe eq 'FlipLayersLinkFace') {
9290 0 0         my $L = $self->{faces}{$face}{LinkFace} or die "Face `$face' has no LinkFace";
9291 0           return ($self->export_layers($L, $face))->[$N1];
9292             }
9293             #warn "Doing flip/face via `$recipe', N=$N, N1=$N1, face=`$face'";
9294 0 0         return ($self->export_layers($face, $face))->[$N1] if $recipe eq 'FlipLayers';
9295             # my $gr_debug = ($recipe =~ /Greek__/);
9296 0           if (debug_PERL_dollar1_scoping) {
9297             return ($self->export_layers("$3", $face, !!$2))->[$1 ? $N : $N1]
9298             if $recipe =~ /^(?:((Full)?Face)|FlipLayers)\((.*)\)$/;
9299             } else {
9300 0           my $m1; # Apparently, in perl5.10, if replace $m1 by $1 below, $1 loses its TRUE value between match and evaluation of $1
9301             # ($gr_debug and warn "Pseudo-layer `$recipe', face=`$face', N=$N, N1=$N1\n"),
9302 0 0 0       return ($self->export_layers("$3", $face, !!$2))->[$m1 ? $N : $N1]
    0          
9303             if $recipe =~ /^(?:((Full)?Face)|FlipLayers)\((.*)\)$/ and ($m1 = $1, 1);
9304             }
9305 0 0         if ($recipe =~ /^prefix(NOTSAME(case)?)?=(.+)$/) { # `case´ unsupported
9306             # Analogue of NOID with the principal layers as reference, and layers of DeadKey as sources
9307 0           my($notsame, $case) = ($1,$2);
9308 0           my $hexPrefix = $self->key2hex($self->charhex2key($3));
9309 0           $self->ensure_DeadKey_Map($face, $hexPrefix);
9310 0 0         my $layers = $self->{faces}{$face}{'[deadkeyLayers]'}{$hexPrefix} or die "Unknown prefix character `$hexPrefix´ in layers-from-prefix-key";
9311 0 0 0       return $layers->[$N] if $N or not $notsame;
9312 0           my $name = "NOTSAME[$face]$layers->[$N]";
9313 0 0         return $self->{layers}{$name} if $self->{layers}{$name};
9314 0           my @LL = map $self->{layers}{$_}, @$layers;
9315 0           my $L0 = $self->{faces}{$face}{layers};
9316 0           my @L0 = map $self->{layers}{$_}, @$L0;
9317 0           my @OUT;
9318 0           for my $charN (0..$face->{'[non_VK]'}-1) {
9319 0           my (@L, %s) = map $_->[$charN], @LL;
9320 0           for my $layers0 (map $_->[$charN], @$L0) {
9321 0           for my $sh (@$layers0) {
9322 0 0         $s{ref($sh) ? $sh->[0] : $sh}++ if defined $sh;
    0          
9323             }
9324             }
9325 0           my(@CC, @pp, @OK);
9326 0 0 0       for my $l (@L[0 .. (($notsame && !$N) ? @{ $self->{faces}{$face}{layers} } - 1 : 0)]) {
  0            
9327 0           my(%s1, @was, @out);
9328 0           for my $sh (0..$#$l) { # $self->dead_with_inversion(!'hex', $_, $face, $self->{faces}{$face})
9329 0 0         my @C = map {defined() ? (ref() ? $_->[0] : $_) : $_} $l->[$sh];
  0 0          
9330 0 0         my @p = map {defined() ? (ref() ? $_->[2] : 0 ) : 0 } $l->[$sh];
  0 0          
9331 0 0 0       ($CC[$sh], $pp[$sh]) = ($C[0], $p[0]) if not defined $CC[$sh] and defined $C[0];
9332 0 0 0       ($CC[$sh], $pp[$sh], $OK[$sh], $s1{$C[0]}) = ($C[0], $p[0], 1,1) if !$OK[$sh] and defined $C[0] and not $s{$C[0]};
      0        
9333             ($CC[$sh], $pp[$sh], $OK[$sh], $s1{$was[0]}) = (@was, 1,1) # use unshifted if needed
9334 0 0 0       if $sh and !$OK[$sh] and defined $C[0] and defined $was[0] and not $s{$was[0]} and not $s1{$was[0]};
      0        
      0        
      0        
      0        
9335 0 0         @was = ($C[0], $p[0]) unless $sh; # may omit `unless´
9336             # $cnt++ if defined $CC[$sh];
9337             }
9338             }
9339 0           push @OUT, \@CC;
9340             }
9341 0           $self->{layers}{$name} = \@OUT;
9342 0           return $name;
9343             }
9344 0           die "Unrecognized Face recipe `$recipe'"
9345             }
9346            
9347             # my @LL = map $self->{layers}{'[ini_copy1]'}{$_} || $self->{layers}{'[ini_copy]'}{$_} || $self->{layers}{$_}, @$LL;
9348            
9349             # A stand-alone word is either LinkFace, or is interpreted as a name of
9350             # translation function applied to the current face.
9351             # A name which is an argument to a function is allowed to be a layer name
9352             # (but note that then both layers of the face will be mapped to that same
9353             # layer - unless one restricts the recipe to a particular layer 0/1 of the
9354             # face).
9355             # In particular: to specify a layer, use Id(LayerName).
9356             #use Dumpvalue;
9357             sub make_translated_layers ($$$$;$$) { # support Self/FlipLayers/LinkFace/FlipShift, stacking and maps
9358 0     0 0   my ($self, $recipe, $face, $NN, $deadkey, $noid, $append, $ARG) = (shift, shift, shift, shift, shift, shift, '');
9359             # XXX We can't cache created layer by name, since it depends on $recipe and $N too???
9360             # return $recipe if exists $self->{layers}{$recipe};
9361             # my $FACE = $recipe . join '===', '', @$NN, '';
9362             # return $self->{faces}{$FACE}{layers} if exists $self->{faces}{$FACE}{layers};
9363 0 0         return [map $self->pseudo_layer($recipe, $face, $_), @$NN]
9364             if $recipe =~ /^(prefix(?:NOTSAME(?:case)?)?=.*|(FlipLayers)?LinkFace|FlipLayers|Self|((Full)?Face|FlipLayers|Layers)\([^()]+\))$/;
9365 0           $recipe =~ s/^(FlipShift)$/$1(Self)/;
9366 0 0         my @parts = grep /\S/, $self->join_min_paren_brackets_matched('', split /(\s+)/, $recipe)
9367             or die "Whitespace face recipe `$recipe'?!";
9368 0 0         if (@parts > 1) {
9369             #warn "parts of the translation spec: <", join('> <', @parts), '>';
9370 0           my @layers = map $self->make_translated_layers($_, $face, $NN, $deadkey), @parts;
9371 0           warn "Stacking/NOID for layers `@parts'", (join "\n\t", '', map {join ' &&& ', @$_} @layers), "\n" if debug_noid or debug_stacking;
9372             #print "Stacking for `$recipe'\n" if $DEBUG;
9373             #Dumpvalue->new()->dumpValue(\@layers) if $DEBUG;
9374 0 0         return [$self->make_translated_layers_noid($noid eq 'NotSameKey', @layers)]
9375             if $noid;
9376 0           return [$self->make_translated_layers_stack(@layers)];
9377             }
9378 0 0         if ( $recipe =~ /\)$/ ) {
9379 0 0         if ( $recipe =~ /^[^(]*\[/ ) { # Tricky: allow () inside Func[](args)
9380 0           my $pos;
9381 0           while ( $recipe =~ /(?=\]\()/g ) {
9382 0 0         $pos = 1 + pos $recipe, last if $self->brackets_match_q(substr $recipe, 0, 1 + pos $recipe)
9383             }
9384 0 0         die "Can't parse `$recipe' as Func[Arg1](Arg2)" unless $pos;
9385 0           $ARG = substr $recipe, $pos + 1, length($recipe) - $pos - 2;
9386 0           $recipe = substr $recipe, 0, $pos;
9387             } else {
9388 0           my $o = $recipe;
9389 0 0         ($recipe, $ARG) = ($recipe =~ /^(.*?)\((.*)\)$/s) or warn "Can't parse recipe `$o'";
9390             }
9391             } else {
9392 0           $ARG = '';
9393             }
9394             #warn "Translation sub-spec: recipe = <$recipe>, ARG=<$ARG>";
9395 0 0         if (length $ARG) {
9396 0 0         if (exists $self->{layers}{$ARG}) {
9397 0           $ARG = [($ARG) x @$NN];
9398             } else {
9399 0 0         ($ARG = $self->layer_recipe($ARG)) =~ s/^\s+// if exists $self->{layer_recipes}{my $a = $ARG};
9400 0           warn "make_translated_layers: Using layout_recipe `$ARG' for layer '$a'\n" if debug_face_layout_recipes and exists $self->{layer_recipes}{$a};
9401 0           ($noid) = ($recipe =~ /^(NotId|NotSameKey)$/);
9402 0           $ARG = $self->make_translated_layers($ARG, $face, $NN, $deadkey, $noid);
9403 0 0         return $ARG if $noid;
9404             }
9405             } else {
9406 0           $ARG = [map $self->{faces}{$face}{layers}[$_], @$NN];
9407 0           $append = "#$face#";
9408             }
9409 0           [$self->make_translated_layers_tr($ARG, $recipe, $append, $deadkey, $face, $NN)]; # Either we saw (), or $recipe is not a face recipe!
9410             }
9411            
9412             sub massage_translated_layers ($$$$;$) {
9413 0     0 0   my ($self, $in, $face, $NN, $deadkey) = (shift, shift, shift, shift, shift, '');
9414             #warn "Massaging `$deadkey' for `$face':$N";
9415 0 0         return $in unless my $r = $self->get_deep($self, 'faces', (my @p = split m(/), $face), '[Diacritic_if_undef]');
9416 0           $r =~ s/^\s+//;
9417             #warn " -> end recipe `$r'";
9418 0           my $post = $self->make_translated_layers($r, $face, $NN, $deadkey);
9419 0           return [$self->make_translated_layers_stack($in, $post)];
9420             }
9421            
9422             sub default_char ($$) {
9423 0     0 0   my ($self, $F) = (shift, shift);
9424 0           my $default = $self->get_deep($self, 'faces', $F, '[DeadChar_DefaultTranslation]');
9425 0 0         $default =~ s/^\s+//, $default = $self->charhex2key($default) if defined $default;
9426 0           $default;
9427             }
9428            
9429             sub create_inverted_face ($$$$$) {
9430 0     0 0   my ($self, $F, $KK, $chain, $flip_AltGr) = (shift, shift, shift, shift, shift);
9431 0           my $H = $self->{faces}{$F};
9432 0           my $auto_chr = $H->{'[deadkeyInvAltGrKey]'}{$KK};
9433 0           my $new_facename = $H->{'[deadkeyFaceInvAltGr]'}{$auto_chr};
9434 0           my ($LL, %Map) = $H->{'[deadkeyLayers]'}{$KK};
9435 0 0         $LL = $H->{layers} if $KK eq '';
9436             %Map = ($flip_AltGr, [$chain->{$KK and $self->charhex2key($KK)}, undef, 1, 'AltGrInv-faces-chain'])
9437 0 0 0       if defined $flip_AltGr and defined $chain->{$KK and $self->charhex2key($KK)};
      0        
      0        
9438 0           $self->patch_face($LL, $new_facename, $H->{"[InvdeadkeyLayers]"}{$KK}, $KK, \%Map, $F, 'invert');
9439            
9440             # warn "Joining <$F>, <$new_facename>";
9441 0           $self->link_layers($F, $new_facename, 'skipfix', 'no-slot-warn');
9442 0 0 0       if ($KK eq '' and defined $flip_AltGr) {
9443 0           $H->{'[deadkeyFace]'}{$self->key2hex($flip_AltGr)} = $H->{'[deadkeyFaceInvAltGr]'}{$auto_chr};
9444             }
9445 0 0         if ($H->{"[InvdeadkeyLayers]"}{$KK}) { # There are overrides for the inverted face. Make a map for them...
9446             #warn "Overriding face for inverted `$KK' in face $F; new_facename=$new_facename";
9447 0           $H->{'[InvAltGrFace]'}{$KK} = "$new_facename\@override";
9448 0           $self->{faces}{"$new_facename\@override"}{layers} = $H->{"[InvdeadkeyLayers]"}{$KK};
9449 0           $self->link_layers($F, "$new_facename\@override", 'skipfix', 'no-slot-warn');
9450             }
9451 0           $new_facename;
9452             }
9453            
9454             sub auto_dead_can_wrap ($$) { # Call after all the manually set prefix key are already established, so one can avoid them
9455 0     0 0   my ($self, $F) = (shift, shift);
9456 0           $self->{faces}{$F}{'[ad_can_wrap]'}++
9457             }
9458            
9459             sub next_auto_dead ($$) {
9460 0     0 0   my ($self, $H, $o) = (shift, shift);
9461 0 0         unless ($H->{'[autodead_wrapped]'}) {
9462 0   0       1 while $H->{'[auto_dead]'}{ $o = $H->{'[first_auto_dead]'}++ }++ and ($o < 0x1000 or not $H->{'[ad_can_wrap]'}); # Bug in kbdutool
      0        
9463 0 0 0       $H->{'[first_auto_dead]'} = 0xa0 if $o >= 0x1000 and $H->{'[ad_can_wrap]'} and not $H->{'[autodead_wrapped]'}++;
      0        
9464             }
9465 0 0         if ($H->{'[autodead_wrapped]'}) { # This does not deal with manual assignment of inverted prefixes??? Inv_AltGr???
9466 0   0       1 while $H->{'[auto_dead]'}{ $o = $H->{'[first_auto_dead]'}++ }++ or $H->{'[deadkeyFaceHexMap]'}{$self->key2hex(chr $o)};
9467             # if ($o == 0x00a3) {
9468             # warn "$o: Keys HexMap: ", join ', ', sort keys %{$H->{'[deadkeyFaceHexMap]'}};
9469             # }
9470             }
9471 0           chr $o;
9472             }
9473            
9474             sub recipe2str ($$) {
9475 0     0 0   (undef, my $recipe) = (shift, shift);
9476 0 0         if ('ARRAY' eq ref $recipe) {
9477 0           $recipe = [@$recipe]; # deep copy
9478 0           s/\s+$//, s/^\s+// for @$recipe;
9479 0           s/(?
9480 0           $recipe = join '', @$recipe;
9481             }
9482 0           $recipe =~ s/^\s+//;
9483 0           $recipe
9484             }
9485            
9486             sub scan_for_DeadKey_Maps ($) { # Makes a direct-access synonym, scan for DeadKey_Maps* keys
9487 0     0 0   my ($self, %h, $expl) = (shift);
9488             #Dumpvalue->new()->dumpValue($self);
9489 0           my @F = grep m(^faces(/.*)?$), @{$self->{'[keys]'}};
  0            
9490 0           for my $FF (@F) {
9491 0           (my $F = $FF) =~ s(^faces/?)();
9492 0           my(@FF, @HH) = split m(/), $FF;
9493 0 0 0       next if @FF == 1 or $FF[-1] eq 'VK';
9494 0           my @FF1 = @FF;
9495 0           push(@HH, $self->get_deep($self, @FF1)), pop @FF1 while @FF1; # All the parents
9496 0           my $H = $HH[0];
9497 0 0         next if $H->{PartialFace};
9498 0 0 0       $self->{faces}{$F} = $H if $F =~ m(/) and exists $H->{layers}; # Make a direct-access copy
9499             #warn "Face section `${FF}'s parents: ", scalar @HH;
9500             #warn "Mismatch of hashes for `$FF'" unless $self->{faces}{$F} == $H;
9501            
9502             # warn "compositing: faces `$F'; -> <", (join '> <', %$H), ">";
9503 0           for my $HH (@HH) {
9504 0           for my $k ( keys %$HH ) {
9505             # warn "\t`$k' -> `$HH->{$k}'";
9506 0 0         next unless $k =~ m(^DeadKey_(Inv|Add)?Map([0-9a-f]{4,})?(?:_(\d+))?$)i;
9507             #warn "\t`$k' -> `$HH->{$k}'";
9508 0   0       my($inv, $key, $layers) = ($1 || '', $2, $3);
9509 0 0         $key = $self->key2hex($self->charhex2key($key)) if defined $key; # get rid of uc/lc hex problem
9510             # XXXX The problem is that the parent may define layers in different ways (_0,_1 or no); ignore it for now...
9511 0 0 0       $H->{'[DeadKey__Maps]'}{$key || ''}{$inv}{(defined $layers) ? $layers : 'All'} ||= $HH->{$k};
      0        
9512             }
9513             }
9514             }
9515             }
9516            
9517             #use Dumpvalue;
9518             sub ensure_DeadKey_Map_by_recipe ($$$$;$$) {
9519 0   0 0 0   my ($self, $F, $hexPrefix, $recipe, $layers, $inv) = (shift, shift, shift, shift, shift, shift || '');
9520 0           my $H = $self->{faces}{$F};
9521 0 0 0       return if $H->{"[${inv}deadkeyLayersCreated]"}{$hexPrefix}{$layers and "@$layers"}++;
9522             #Dumpvalue->new()->dumpValue($self);
9523 0           my $massage = !($recipe =~ s/\s+NoDefaultTranslation$//);
9524 0   0       $layers ||= [ 0 .. $#{$self->{faces}{$F}{layers}} ];
  0            
9525             #warn "Doing key `$hexPrefix' inv=`$inv' face=`$F', recipe=`$recipe'";
9526 0           my $new = $self->make_translated_layers($recipe, $F, $layers, $hexPrefix);
9527 0 0 0       $new = $self->massage_translated_layers($new, $F, $layers, $hexPrefix) if $massage and not $inv;
9528 0           for my $NN (0..$#$layers) { # Create a layer according to the spec
9529             #warn "DeadKey Layer for face=$F; layer=$layer, k=$k:\n\t$HH->{$k}, key=`", ($hexPrefix||''),"'\n\t\t";
9530             #$DEBUG = $hexPrefix eq '0192';
9531             #print "Doing key `$hexPrefix' face=$F layer=`$layer' recipe=`$recipe'\n" if $DEBUG;
9532             #Dumpvalue->new()->dumpValue($self->{layers}{$new}) if $DEBUG;
9533             #warn "new=<<<", join('>>> <<<', @$new),'>>>';
9534 0           $H->{"[${inv}deadkeyLayers]"}{$hexPrefix}[$layers->[$NN]] = $new->[$NN];
9535             #warn "Face `$F', layer=$layer key=$hexPrefix\t=> `$new'" if $H->{layers}[$layer] =~ /00a9/i;
9536             #Dumpvalue->new()->dumpValue($self->{layers}{$new}) if $self->charhex2key($hexPrefix) eq chr 0x00a9;
9537             }
9538             }
9539            
9540             sub ensure_DeadKey_Map ($$$;$) {
9541 0     0 0   my ($self, $F, $hexPrefix, $hexPrefixWas, %h, $expl) = (shift, shift, shift, shift);
9542 0 0         $hexPrefixWas = $hexPrefix unless defined $hexPrefixWas;
9543 0           my $H = $self->{faces}{$F};
9544 0           my $v0 = $H->{'[DeadKey__Maps]'}{$hexPrefixWas};
9545 0           for my $inv (keys %$v0) {
9546 0           my $v1 = $v0->{$inv};
9547 0 0 0       my $K = (($inv and "$inv $hexPrefix" eq "Inv 0000") ? '' : $hexPrefix);
9548 0           for my $layers (keys %$v1) {
9549 0           my $recipe = $self->recipe2str($v1->{$layers});
9550 0 0         $layers = ($layers eq 'All' ? '' : [$layers]);
9551 0           $self->ensure_DeadKey_Map_by_recipe($F, $K, $recipe, $layers, $inv);
9552             }
9553             }
9554             }
9555            
9556             sub create_DeadKey_Maps ($) {
9557 0     0 0   my ($self, %h, $expl) = (shift);
9558             #Dumpvalue->new()->dumpValue($self);
9559 0           for my $F (keys %{ $self->{faces} }) {
  0            
9560 0 0 0       next if 'HASH' ne ref $self->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
9561 0           my $H = $self->{faces}{$F};
9562 0           my $flip_AltGr = $H->{'[Flip_AltGr_Key]'};
9563 0 0         $flip_AltGr = (defined $flip_AltGr) ? $self->charhex2key($flip_AltGr) : 'N/A';
9564             # Treat first the specific maps (for one deadkey) then the deadkeys which were not seen via the universal map
9565 0           for my $key (keys %{$H->{'[DeadKey__Maps]'}}) {
  0            
9566 0           my $v0 = $H->{'[DeadKey__Maps]'}{$key};
9567             my @keys = (($key ne '')
9568             ? $key
9569 0   0       : (grep {not $H->{'[DeadKey__Maps]'}{$_} and not $H->{'[ComposeKeys]'}{$_}}
9570 0 0         map $self->key2hex($_), grep $_ ne $flip_AltGr, keys %{ $H->{'[DEAD]'} }));
  0            
9571 0           $self->ensure_DeadKey_Map($F, $_, $key) for @keys;
9572             }
9573             }
9574             }
9575            
9576             #use Dumpvalue;
9577             sub create_composite_layers ($) {
9578 0     0 0   my ($self, %h, $expl) = (shift);
9579             #Dumpvalue->new()->dumpValue($self);
9580 0           for my $F (keys %{ $self->{faces} }) {
  0            
9581 0 0 0       next if 'HASH' ne ref $self->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
9582 0           my $H = $self->{faces}{$F};
9583 0 0         next if $H->{PartialFace};
9584 0 0         next unless $H->{'[deadkeyLayers]'}; # Are we in a no-nonsense Face-hash with defined deadkeys?
9585             #warn "Face: <", join( '> <', %$H), ">";
9586 0           my $layerL = @{ $self->{layers}{ $H->{layers}[0] } }; # number of keys in the face (in the principal layer)
  0            
9587 0           my $first_auto_dead = $H->{'[Auto_Diacritic_Start]'};
9588 0 0         $H->{'[first_auto_dead]'} = ord $self->charhex2key($first_auto_dead) if defined $first_auto_dead;
9589 0           for my $KK (sort keys %{$H->{'[deadkeyLayers]'}}) { # Given a deadkey: join layers into a face, and link to it
  0            
9590 0           for my $layer ( 0 .. $#{ $H->{layers} } ) {
  0            
9591             #warn "Checking for empty layers, Face `$face', layer=$layer key=$KK";
9592             $self->{layers}{"[empty$layerL]"} ||= [map[], 1..$layerL], $H->{'[deadkeyLayers]'}{$KK}[$layer] = "[empty$layerL]"
9593 0 0 0       unless defined $H->{'[deadkeyLayers]'}{$KK}[$layer]
9594             }
9595             # Join the syntetic layers (now well-formed) into a new synthetic face:
9596 0           my $new_facename = "$F###$KK";
9597 0           $self->{faces}{$new_facename}{layers} = $H->{'[deadkeyLayers]'}{$KK};
9598 0           $H->{'[deadkeyFace]'}{$KK} = $new_facename;
9599             #warn "Joining <$F>, <$new_facename>";
9600             # $self->link_layers($F, $new_facename, 'skipfix', 'no-slot-warn'); # Now moved to link_composite_layers
9601             }
9602             }
9603             $self
9604 0           }
9605            
9606             sub create_prefix_chains ($) {
9607 0     0 0   my ($self, %h, $expl) = (shift);
9608 0           my @F = grep m(^faces(/.*)?$), @{$self->{'[keys]'}};
  0            
9609 0           for my $FF (@F) {
9610 0           (my $F = $FF) =~ s(^faces/?)();
9611 0           my(@FF, @HH) = split m(/), $FF;
9612 0 0 0       next if @FF == 1 or $FF[-1] eq 'VK';
9613 0           push(@HH, $self->get_deep($self, @FF)), pop @FF while @FF;
9614 0           my($H, %KK) = $HH[0];
9615 0 0         for my $chain ( @{ $H->{'[PrefixChains]'} || [] } ) {
  0            
9616 0           (my $c = $chain) =~ s/^\s+//;
9617 0 0         my @prefix = map { $_ and $self->charhex2key($_) } split /,/, $c, -1; # trailing empty means all are prefixes
  0            
9618 0 0         length(my $trail_nonprefix = $prefix[-1]) or pop @prefix;
9619 0           my $start = shift @prefix;
9620 0 0         warn "PrefixChain for `$start' in font `$F' is empty" unless @prefix > 1;
9621 0           for my $Kn (1..$#prefix) {
9622 0           my($from, $to) = @prefix[$Kn-1, $Kn];
9623 0   0       $KK{$from}{$start} = [$to, undef, $Kn != $#prefix || !$trail_nonprefix, 'PrefixChains'];
9624             }
9625             }
9626 0           for my $K (keys %KK) {
9627 0           my $KK = $self->key2hex($K);
9628 0 0         die "Key `$KK=$K' in PrefixChain for font=`$F' is not a prefix" unless my $KF = $H->{'[deadkeyFace]'}{$KK};
9629 0           my $new_facename = "$F*==>*Chain*$KK";
9630 0           my $LL = $H->{'[deadkeyLayers]'}{$KK};
9631 0           $self->patch_face($LL, $new_facename, undef, "chain-in-$KK", $KK{$K}, $F, !'invert');
9632 0           $H->{'[deadkeyFace]'}{$KK} = $new_facename;
9633 0           $H->{'[deadkeyLayers]'}{$KK} = $self->{faces}{$new_facename}{layers};
9634 0           $self->coverage_face0($new_facename, 'after import');
9635             }
9636             }
9637             $self
9638 0           }
9639            
9640             sub link_composite_layers ($) { # as above, but finish
9641 0     0 0   my ($self, %h, $expl) = (shift);
9642 0           my @F = grep m(^faces(/.*)?$), @{$self->{'[keys]'}};
  0            
9643 0           for my $FF (@F) {
9644 0           (my $F = $FF) =~ s(^faces/?)();
9645 0           my(@FF, @HH) = split m(/), $FF;
9646 0 0 0       next if @FF == 1 or $FF[-1] eq 'VK';
9647 0           push(@HH, $self->get_deep($self, @FF)), pop @FF while @FF;
9648 0           my $H = $HH[0];
9649 0           for my $new_facename (values %{$H->{'[deadkeyFace]'}}) {
  0            
9650             #warn "Joining <$F>, <$new_facename>";
9651 0           $self->link_layers($F, $new_facename, 'skipfix', 'no-slot-warn');
9652             }
9653             }
9654             $self
9655 0           }
9656            
9657             sub create_inverted_faces ($) {
9658 0     0 0   my ($self) = (shift);
9659             #Dumpvalue->new()->dumpValue($self);
9660 0           for my $F (keys %{$self->{faces} }) {
  0            
9661 0 0 0       next if 'HASH' ne ref $self->{faces}{$F} or $F =~ /\bVK$/; # "parent" taking keys for a child
9662 0           my $H = $self->{faces}{$F};
9663 0 0         next unless $H->{'[deadkeyLayers]'}; # Are we in a no-nonsense Face-hash with defined deadkeys?
9664 0   0       my $expl = $H->{'[Explicit_AltGr_Invert]'} || [];
9665 0 0         $expl = [], warn "Odd number of elements of Explicit_AltGr_Invert in face $F, ignore" if @$expl % 2;
9666 0           $expl = {map $self->charhex2key($_), @$expl};
9667            
9668             #warn "Face: <", join( '> <', %$H), ">";
9669 0           my $layerL = @{ $self->{layers}{ $H->{layers}[0] } }; # number of keys in the face (in the principal layer)
  0            
9670 0           for my $KK (sort keys %{$H->{'[deadkeyLayers]'}}) { # Create AltGr-inverted face if there is at least one key in the AltGr face:
  0            
9671 0           my $LL = $H->{'[deadkeyLayers]'}{$KK};
9672             # To check that a key is defined, we do not care about whether a shift-state is encoded as a string, or as an array:
9673 0 0 0       next unless defined $H->{'[first_auto_dead]'} and grep defined, map $self->flatten_arrays($_), map $self->{layers}{$_}, @$LL[1..$#$LL];
9674 0 0         $H->{'[deadkeyInvAltGrKey]'}{''} = $self->next_auto_dead($H) unless exists $H->{'[deadkeyInvAltGrKey]'}{''}; # Prefix key for principal invertred face
9675             my $auto_chr = $H->{'[deadkeyInvAltGrKey]'}{$KK} =
9676 0 0         ((exists $expl->{$self->charhex2key($KK)}) ? $expl->{$self->charhex2key($KK)} : $self->next_auto_dead($H));
9677 0           $H->{'[deadkeyFaceInvAltGr]'}{$auto_chr} = "$F##Inv#$KK";
9678 0           $self->{faces}{ $H->{'[deadkeyFace]'}{$KK} }{'[invAltGr_Accessor]'} = $auto_chr;
9679             }
9680 0 0         next unless defined (my $flip_AltGr = $H->{'[Flip_AltGr_Key]'});
9681 0           $flip_AltGr = $self->charhex2key($flip_AltGr);
9682 0 0         $H->{'[deadkeyFaceInvAltGr]'}{ $H->{'[deadkeyInvAltGrKey]'}{''} } = "$F##Inv#" if exists $H->{'[deadkeyInvAltGrKey]'}{''};
9683 0           my ($prev, %chain) = '';
9684 0 0         for my $k ( @{ $H->{chainAltGr} || [] }) {
  0            
9685 0           my $K = $self->charhex2key($k);
9686 0           my $KK = $self->key2hex($K);
9687             warn("Deadkey ` $K ' of face $F has no associated AltGr-inverted face"), next
9688 0 0         unless exists $H->{'[deadkeyInvAltGrKey]'}{$KK};
9689 0           $chain{$prev} = $H->{'[deadkeyInvAltGrKey]'}{$KK};
9690             #warn "chain `$prev' --> `$K' => $H->{'[deadkeyInvAltGrKey]'}{$KK}";
9691             # $H->{'[dead2_AltGr_chain]'}{(length $prev) ? $self->key2hex($prev) : ''}++;
9692 0           $prev = $K;
9693             }
9694 0 0         $H->{'[have_AltGr_chain]'} = 1 if length $prev;
9695 0           for my $KK (keys %{$H->{'[deadkeyInvAltGrKey]'}}) { # Now know which deadkeys take inversion, and via what prefix
  0            
9696 0           my $new = $self->create_inverted_face($F, $KK, \%chain, $flip_AltGr);
9697 0           $self->coverage_face0($new);
9698             }
9699             # We do not link the AltGr-inverted faces to the "parent" faces here. Currently, it should be done when
9700             # outputting a kbd description...
9701             }
9702             $self
9703 0           }
9704            
9705             #use Dumpvalue;
9706             sub patch_face ($$$$$$$;$) { # flip layers paying attention to linked AltGr-inverted faces, and overrides
9707 0     0 0   my ($self, $LL, $newname, $prefix, $mapId, $Map, $face, $inv, @K) = (shift, shift, shift, shift, shift, shift, shift, shift);
9708 0 0         if (%$Map) { # Borrow from make_translated_layer_tr()
9709 0 0   0     my $Tr = sub ($) { my $c = shift; defined $c or return $c; $c = $c->[0] if ref $c; my $o = $Map->{$c} ;
  0 0          
  0            
  0            
9710             #warn "Tr: `$c' --> `$o'" if defined $o;
9711             #$o
9712 0           };
9713 0           $Tr = $self->depth1_A_translator($Tr);
9714 0           my $LLL = $self->{faces}{$face}{layers};
9715 0 0         my $mod_name = ($inv ? 'AltGr' : '');
9716 0           for my $n (0..$#$LL) { # Layer number
9717 0           my $new_Name = "$face##Chain$mod_name#$n.." . $mapId;
9718             #warn "AltGr-chaining: name=$new_Name; `$chainKey' => `$nextL'";
9719 0   0       $self->{layers}{$new_Name} ||= [ map $Tr->($_), @{ $self->{layers}{ $LLL->[$n] } }];
  0            
9720 0           push @K, $new_Name;
9721             }
9722             }
9723 0 0         my @prefix = $prefix ? $prefix : ();
9724 0           my @n1 = (0..$#$LL);
9725 0 0         @n1 = map $self->flip_layer_N($_, $#$LL), @n1 if $inv;
9726 0           my @invLL = @$LL[@n1];
9727 0 0         push @prefix, \@K if @K;
9728 0           $self->{faces}{$newname}{layers} = [$self->make_translated_layers_stack(@prefix, \@invLL)];
9729             }
9730            
9731             # use Dumpvalue;
9732             my %subst_Shift = qw( -- - -S S t- t tS T ); # There is no space for 8 MODs, so we contract tS into T
9733             sub fmt_bitmap_mods ($$$;$) {
9734 0     0 0   my ($self, $b, $col, $short, @b) = (shift, shift, shift, shift, qw(Shift Ctrl Alt Kana Roya Loya Z t));
9735 0 0         my ($j, $empty, @ind) = ($short ? ('', '-', 1..$#b, 0) : ("\t", '', 0..$#b)); # better have Shift at end (Ctrl-Alt-Shift)...
9736 0 0         my $O = join $j, map {($b & (1<<$_)) ? ($short ? substr $b[$_], 0, 1 : $b[$_]) : $empty} @ind;
  0 0          
9737 0 0         $O =~ s/(..)$/$subst_Shift{$1}/ if $short;
9738 0           $O =~ s/\t+$//;
9739 0 0         $O = 'Invalid' if $col == 15;
9740 0           $O
9741             }
9742            
9743             sub BaseKeys ($$) {
9744 0     0 0   my($self, $K) = (shift, shift);
9745 0           my $F = $self->get_deep($self, @$K); # Presumably a face hash, as in $K = [qw(faces US)]
9746 0 0         return $F->{baseKeysWin} if $F->{baseKeysWin};
9747 0           my $cnt = $F->{'[non_VK]'};
9748 0           my $b = $F->{BaseLayer};
9749 0           my $layers = $F->{layers};
9750 0 0 0       $b = $self->make_translated_layers($b, $K->[-1], [0])->[0] if defined $b and not $self->{layers}{$b};
9751 0 0         my $basesub = [((defined $b) ? $b : ()), $F->{layers}[0]];
9752 0           my $max = -1;
9753 0   0       $max < $#{$self->{layers}{$_}} and $max = $#{$self->{layers}{$_}} for @$basesub;
  0            
  0            
9754 0   0       $max < $_->[0] + $_->[1] and $max = $_->[0] + $_->[1] for values %start_SEC;
9755             # warn "Basekeys: max=$max; cnt=$cnt";
9756 0           my(@o, @oo);
9757 0           for my $u (0..$max) {
9758 0           my $c = $self->base_unit($basesub, $u, $u >= $cnt); # [0 || 1, VK]
9759 0           my($k, $kk) = ($c->[1], $c->[2]);
9760 0 0         if (!$c->[0]) {
9761 0 0 0       $k = $oem_keys{$k} or warn("Can't find a key with VKEY `$c', unit=$u, lim=$cnt"), return
9762             unless $k =~ /^[A-Z0-9]$/;
9763             } else {
9764 0           my $U = [map $self->{layers}{$_}[$u], @$layers];
9765 0           my $keys = grep defined, map $self->flatten_arrays($_->[$u]), @$U;
9766 0 0 0       $keys and warn "Can't find the range of keys to which unit `$u' belongs (max=$max; cnt=$cnt)" unless defined $k;
9767 0           $kk = $k;
9768             }
9769 0           push @o, $k;
9770 0           push @oo, $kk;
9771             }
9772             # warn "BaseKeys: @o";
9773 0           $F->{baseKeysRaw} = \@oo;
9774 0           $F->{baseKeysWin} = \@o;
9775             }
9776            
9777            
9778             sub fill_win_template ($$$;$$) {
9779 0     0 0   my @K = qw( COMPANYNAME LAYOUTNAME COPYR_YEARS LOCALE_NAME LOCALE_ID DLLNAME SORT_ORDER_ID_ LANGUAGE_NAME );
9780 0           my ($self, $t, $k, $dummy, $dummyDscr, %h) = (shift, shift, shift, shift, shift);
9781 0           $self->reset_units;
9782 0           my $B = $self->BaseKeys($k);
9783             # Dumpvalue->new()->dumpValue($self);
9784 0           my $idx = $self->get_deep($self, @$k, 'MetaData_Index');
9785 0           $h{$_} = $self->get_deep_via_parents($self, $idx, @$k, $_) for @K;
9786 0 0         $h{LAYOUTNAME} = "KBD Layout $h{DLLNAME}" if $dummyDscr; # error "the required resource DATABASE is missing" from setup.exe
9787 0           my $LLL = length($h{LAYOUTNAME}) + grep ord >= 0x10000, split //, $h{LAYOUTNAME};
9788 0 0         warn "The DESCRIPTION of the layout [@$k] is longer than 63 chars;\n the name shown in LanguageBar/Settings may be empty"
9789             if $LLL > 63;
9790 0           $h{LAYOUTNAME} =~ s/([\\""])/\\$1/g; # C-like syntax (directly copied to resource files???)
9791             # warn "Translate: ", %h;
9792 0           my $F = $self->get_deep($self, @$k); # Presumably a face hash, as in $k = [qw(faces US)]
9793 0           $F->{'[dead-used]'} = [map {}, @{$F->{layers}}]; # Which of deadkeys are reachable on the keyboard
  0            
9794 0           my $cnt = $F->{'[non_VK]'};
9795 0 0         if ($dummy) {
9796 0           @h{qw(DO_LIGA COL_HEADERS COL_EXPL KEYNAMES_DEAD DEADKEYS)} = ('') x 5;
9797 0           @h{qw(LAYOUT_KEYS BITS_TEMPLATE)} = (<
9798             10 Q 0 q -1 -1 // LATIN SMALL LETTER Q, ,
9799             EOT
9800             0 // Column 4 :
9801             1 // Column 5 : Shift
9802             2 // Column 6 : Ctrl
9803             3 // Column 7 : Shift Ctrl
9804             6 // Column 12 : Ctrl Alt t
9805             7 // Column 13 : Shift Ctrl Alt t
9806             EOT
9807             } else {
9808 0           $h{LAYOUT_KEYS} = join '', $self->output_layout_win($k->[-1], $F->{layers}, $F->{'[dead]'}, $F->{'[dead-used]'}, $cnt, $B);
9809 0           $h{LAYOUT_KEYS} .= join '', $self->output_VK_win($k->[-1], $F->{'[dead-used]'});
9810 0           $h{LAYOUT_KEYS} .= join '', $self->output_added_units();
9811            
9812 0           $h{DO_LIGA} = join '', $self->output_ligatures();
9813 0 0         $h{DO_LIGA} = <
9814            
9815             LIGATURE
9816            
9817             // VK_ ModCol# Char0 Char1 Char2 Char3
9818             // --------- ------- ----- ----- ----- -----
9819            
9820            
9821             EOPREF
9822            
9823             ### Deadkeys??? need_extra_keys_to_access???
9824 0           my ($OUT, $OUT_NAMES) = ('', "KEYNAME_DEAD\n\n");
9825            
9826 0           my $f = $self->get_AgeList;
9827 0 0 0       $self->load_uniage($f) if defined $f and not $self->{Age};
9828            
9829 0           my($flip_AltGr_hex, %nn) = $F->{'[Flip_AltGr_Key]'};
9830 0 0         $flip_AltGr_hex = $self->key2hex($self->charhex2key($flip_AltGr_hex)) if defined $flip_AltGr_hex;
9831 0           for my $deadKey ( sort keys %{ $F->{'[deadkeyFaceHexMap]'} } ) {
  0            
9832 0 0         next if $F->{'[only_extra]'}{$self->charhex2key($deadKey)};
9833 0           my $auto_inv_AltGr = $F->{'[deadkeyInvAltGrKey]'}{$deadKey};
9834 0 0         $auto_inv_AltGr = $self->key2hex($auto_inv_AltGr) if defined $auto_inv_AltGr;
9835             #warn "flipkey=$flip_AltGr_hex, dead=$deadKey" if defined $flip_AltGr_hex;
9836 0           (my $nonempty, my $MAP) = $self->output_deadkeys($k->[-1], $deadKey, $F->{'[dead2]'}, $flip_AltGr_hex, $auto_inv_AltGr);
9837 0           $OUT .= "$MAP\n";
9838 0 0         my @K = ($deadKey, ($auto_inv_AltGr ? $auto_inv_AltGr : ()));
9839 0   0       my @N = map $self->{DEADKEYS}{$_} || $self->{'[seen_knames]'}{chr hex $_} || $F->{'[prefixDocs]'}{$_} || $self->UName($_), @K;
9840 0           s/(?=[""\\])/\\/g for @N;
9841             # if (defined $N and length $N) {
9842 0           $nn{$K[$_]} = $N[$_] for 0..$#K;
9843             # }# else { warn "DeadKey `$deadKey' for face `@$k' has no name associated" }
9844             }
9845             # Apparently, if the name table is too long, the keyboard is not activatable (installs OK on Win7_64,
9846             # is in Settings' list, but is not in the panel's list). Omit the multiple-Compose entries as a workaround...
9847 0   0       $nn{$_} =~ /\bCompose\s+(Compose\b|(?!key)\S+)/ or $OUT_NAMES .= qq($_\t"$nn{$_}"\n) for sort keys %nn;
9848             #warn "Translate: ", %h;
9849 0           $h{DEADKEYS} = $OUT;
9850 0           $h{KEYNAMES_DEAD} = $OUT_NAMES;
9851 0           my %mods = qw( S 1 C 2 A 4 K 8 X 16 Y 32 Z 64 T 128 R 16 L 32);
9852 0           $_ += 0 for values %mods; # Convert to numbers, so | works as expected
9853 0           my @cols;
9854 0 0         my %tr_mods_keys = ( @{ $F->{'[mods_keys_KBD]'} || [qw(rA CA)] } );
  0            
9855 0   0       my $mods_keys = $F->{'[layers_mods_keys]'} || ['', 'rA'];
9856 0   0       my $mods = $F->{'[layers_modifiers]'} || []; # || ['', 'CA']; # Plain, and Control-Alt
9857 0 0         $#$mods = $#$mods_keys if $#$mods < $#$mods_keys;
9858 0           for my $MOD ( @$mods ) {
9859 0           my $mask = 0;
9860 0 0         my $mod = ((defined $MOD) ? $MOD : ''); # Copy
9861 0 0         unless ($mod =~ /\S/) {
9862 0           my @K = grep /./, split /(?<=[A-Z])(?=[rl]?[A-Z])/, $mods_keys->[scalar @cols];
9863             #warn "cols=(@cols), K=(@K)\n";
9864 0           $mod = join '', map $tr_mods_keys{$_}, @K;
9865             }
9866 0           $mask |= $mods{$_} for split //, $mod;
9867 0           push @cols, $mask;
9868             }
9869 0           @cols = map {($_, $_ | $mods{S})} @cols; # Add shift
  0            
9870            
9871 0   0       my($ctrl_f,$ctrl_F) = ($mods{C}, $tr_mods_keys{lC} || $tr_mods_keys{C} || $tr_mods_keys{rC} || 'C'); # Prefer left-Ctrl
9872             # $ctrl_f |= $mods{$_} for split //, $ctrl_F; # kbdutool complains if there is no column for 'C'
9873            
9874 0           my $pre_ctrl = $self->get_deep($self, @$k, '[ctrl_after_modcol]');
9875 0 0         $pre_ctrl = 2*$ctrl_after unless defined $pre_ctrl;
9876 0           my $create_a_c = $self->get_deep($self, @$k, '[create_alpha_ctrl]');
9877 0 0         $create_a_c = $create_alpha_ctrl unless defined $create_a_c;
9878 0 0         splice @cols, $pre_ctrl, 0, $ctrl_f, ($create_a_c>1 ? $ctrl_f|$mods{S} : ()); # Control (and maybe Control-Shift)
9879 0 0         splice @cols, 15, 0, $mods{A} if @cols >= 16; # col=15 is the fake one; assigning it to Alt is the best palliative to fixing MSKLC
9880 0           $h{COL_HEADERS} = join "\t", map sprintf('%-3d[%d]', $cols[$_], $_), 0..$#cols;
9881 0           $h{COL_EXPL} = join "\t", map $self->fmt_bitmap_mods($cols[$_], $_, 'short'), 0..$#cols;
9882 0           $h{BITS_TEMPLATE} = join "\n", map { "$cols[$_]\t// Column " . (4+$_) . " :\t" . $self->fmt_bitmap_mods($cols[$_], $_) } 0..$#cols;
  0            
9883             # $h{BITS_TEMPLATE} =~ s(^(?=.*\bInvalid$))(#)m; # XXX Actually, MSKLC is not ignoring the leading #
9884             }
9885 0           $self->massage_template($template_win, \%h);
9886             }
9887            
9888             sub AppleMap_i_j ($$$$$;$$$);
9889             sub AppleMap_prefix ($$;$$$$$$);
9890            
9891             # https://developer.apple.com/library/mac/technotes/tn2056/_index.html
9892             sub fill_osx_template ($$) {
9893 0     0 0   my @K = qw( OSX_LAYOUTNAME LAYOUTNAME OSX_ID OSX_ADD_VERSION OSX_DUP_KEYS COPYR_YEARS COMPANYNAME );
9894 0           my ($self, $k, %h, %ids) = (shift, shift);
9895 0           $self->reset_units;
9896 0           my $B = $self->BaseKeys($k);
9897             # Dumpvalue->new()->dumpValue($self);
9898 0           my $idx = $self->get_deep($self, @$k, 'MetaData_Index');
9899 0           $h{$_} = $self->get_deep_via_parents($self, $idx, @$k, $_) for @K;
9900            
9901 0   0       $h{OSX_LAYOUTNAME} ||= $h{LAYOUTNAME};
9902 0           delete $h{LAYOUTNAME};
9903 0 0         $h{OSX_ID} = -17 unless defined $h{OSX_ID}; # (Arbitrary) Negative number
9904 0           my $v = $self->{VERSION};
9905 0 0 0       if (defined $v and defined $h{OSX_ADD_VERSION}) {
9906 0 0         if ($h{OSX_ADD_VERSION} > 0) {
    0          
    0          
9907 0           my $c = $h{OSX_ADD_VERSION} - 1;
9908 0           $h{OSX_LAYOUTNAME} =~ s/^(\s*(\S+($|\s+)){$c}\S+)(?!\S)/$1 v$v/;
9909             } elsif ($h{OSX_ADD_VERSION} < -1) {
9910 0           my $c = -$h{OSX_ADD_VERSION} - 2;
9911 0           $h{OSX_LAYOUTNAME} =~ s/((?
9912             } elsif ($h{OSX_ADD_VERSION} == -1) {
9913 0           $h{OSX_LAYOUTNAME} =~ s/\z/ v$v/;
9914             } else {
9915 0           $h{OSX_LAYOUTNAME} =~ s/^/v$v /;
9916             }
9917             }
9918 0           delete $h{OSX_ADD_VERSION};
9919 0           my $dupk = delete $h{OSX_DUP_KEYS};
9920 0 0         $dupk = {@$dupk} if $dupk;
9921            
9922             # OSX_CREATOR version OSX_CREATOR_VERSION on OSX_EDIT_DATE
9923 0           my $file = $self->{'[file]'};
9924 0 0         $file = (defined $file) ? "keyboard layout file $file" : 'string descriptor';
9925 0 0         $file .= " version $v" if defined $v;
9926 0 0         $file .= " Unicode tables version $self->{uniVersion}" if defined $self->{uniVersion};
9927 0           $h{OSX_CREATOR} = "UI::KeyboardLayout";
9928 0           $h{OSX_CREATOR_VERSION} = "$UI::KeyboardLayout::VERSION with $file";
9929 0           my @t = (gmtime)[5,4,3,2,1,0];
9930 0           $t[0] += 1900; $t[1]++;
  0            
9931 0           $h{OSX_EDIT_DATE} = sprintf '%d-%02d-%02d at %d:%02d:%02d GMT', @t;
9932            
9933 0           my $F = $self->get_deep($self, @$k);
9934 0           my($flip_AltGr_hex, %nn) = $F->{'[Flip_AltGr_Key]'};
9935 0 0         $flip_AltGr_hex = $self->key2hex($self->charhex2key($flip_AltGr_hex)) if defined $flip_AltGr_hex;
9936 0           my %map; # Indexed by hex (??? What about UTF-16???)
9937 0           for my $deadKey ( sort keys %{ $F->{'[deadkeyFaceHexMap]'} } ) {
  0            
9938 0 0         next if $F->{'[only_extra]'}{$self->charhex2key($deadKey)};
9939 0           my $auto_inv_AltGr = $F->{'[deadkeyInvAltGrKey]'}{$deadKey};
9940 0 0         $auto_inv_AltGr = $self->key2hex($auto_inv_AltGr) if defined $auto_inv_AltGr;
9941             #warn "flipkey=$flip_AltGr_hex, dead=$deadKey" if defined $flip_AltGr_hex;
9942 0           $self->output_deadkeys($k->[-1], $deadKey, $F->{'[dead2]'}, $flip_AltGr_hex, $auto_inv_AltGr, \%map);
9943             }
9944            
9945 0           my %how = qw( OSX_KEYMAP_0_AND_COMMAND 0;0;0
9946             OSX_KEYMAP_SHIFT 0;1;0
9947             OSX_KEYMAP_CAPS 0;0;1
9948             OSX_KEYMAP_OPTION 1;0;0
9949             OSX_KEYMAP_OPTION_SHIFT 1;1;0
9950             OSX_KEYMAP_OPTION_CAPS 1;0;1
9951             OSX_KEYMAP_OPTION_COMMAND 1;0;0
9952             ); # In US Extended, OPT-CMD is the same as OPT
9953 0   0       my($OVERR, $ov) = $F->{'[Apple_Override]'} || [];
9954 0           for my $o (@$OVERR) {
9955 0           my($K, $dead, $out) = split /,/, $o, 3;
9956 0 0         if ($out =~ /^hex[46]\z/) {
9957 0           $out = ['lit', $out]
9958             } else {
9959 0           $out = [0, $self->stringHEX2string($out)]
9960             }
9961 0           $ov->{$K} = [$out->[1], undef, $dead, $out->[0]];
9962             }
9963 0   0       my $DUP = $F->{'[Apple_Duplicate]'} || [0x6e, 10, 0x47, 10, 0x66, 49, 0x68, 49]; # Mnu => ISO, KP_Clear => ISO, L/R-SPace => Space
9964 0           $ov->{dup} = {@$DUP};
9965 0           $ov->{extra_actions} = {};
9966 0           for my $m (keys %how) {
9967 0           my($l, $shift, $capsl) = split /;/, $how{$m};
9968 0           $h{$m} = $self->AppleMap_i_j ($k, $l, $shift, $capsl, \%ids, \%map, $ov);
9969             }
9970             # my $vbell = $self->get_deep_via_parents($self, undef, @$k, '[DeadChar_DefaultTranslation]');
9971             # $vbell =~ s/^\s+(?=.)//, $vbell = $self->charhex2key($vbell) if defined $vbell;
9972             # undef $vbell; # Terminators are used as visual feedback when prefix is pressed!
9973 0           my($S, %act) = $F->{'[Show]'};
9974 0           @h{qw(OSX_ACTIONS_BASE OSX_ACTIONS OSX_TERMINATORS_BASE OSX_TERMINATORS2)}
9975             = map +($self->AppleMap_prefix(\%ids, 'do_initiating', $_, \%map, $S, $ov, \%act),
9976             $self->AppleMap_prefix(\%ids, !'do_initiating', $_, \%map, $S, $ov, \%act)), '', 'term';
9977            
9978 0           $self->massage_template($template_osx, \%h);
9979             }
9980            
9981             my $unused = <<'EOR';
9982             # extract compositions, add to char downgrades; -> composition, => compatibility composition
9983             perl -wlne "$k=$1, next if /^([\da-f]+)/i; undef $a; $a = qq($k -> $1) if /^\s+:\s*([0-9A-F]+(?:\s+[0-9A-F]+)*)/; $a = qq($k => $2 $1) if /^\s+#\s*((?:<.*?>\s+)?)([0-9A-F]+(?:\s+[0-9A-F]+)*)/; next unless $a; $a =~ s/\s*$/ / unless $a =~ />\s+\S.*\s\S/; print $a" NamesList.txt >compose2b-NamesList.txt
9984             # expand recursively
9985             perl -wlne "/^(.+?)\s+([-=])>\s+(.+?)\s*$/ or die; $t{$1} = $3; $h{$1}=$2; sub t($); sub t($) {my $i=shift; return $n{$i} if exists $n{$i}; return $i unless $t{$i}; $t{$i} =~ /^(\S+)(.*)/ or die $i; return t($1).$2} END{print qq($_\t:$h{$_} ), join q( ), sort split /\s+/, t($_) for sort {hex $a <=> hex $b} keys %t}" compose2b-NamesList.txt >compose3c-NamesList.txt
9986            
9987             #### perl -wlne "($k,$r)=/^(\S+)\s+:[-=]\s+(.*?)\s*$/ or die; $k{$r} = $k; $r{$k}=$r; END { for my $k (sort {hex $a <=> hex $b} keys %r) { my @r = split /\s+/, $r{$k}; for my $o (1..$#r) {my @rr = @r; splice @rr, $o, 1; my ($rr,$kk) = join q( ), @rr; print qq($k\t<= $kk ), $r[$o] if $kk = $k{$rr}} } }" compose3c-NamesList.txt >compose4-NamesList.txt
9988             perl -wlne "($k,$h,$r)=/^(\S+)\s+:([-=])\s+(.*?)\s*$/ or die; $k{$r} = $k; $r{$k}=$r; $hk{$k}=$hr{$r}= ($h eq q(=)); END { for my $k (sort {hex $a <=> hex $b} keys %r) { my $h = $hk{$k}; my @r = split /\s+/, $r{$k}; print qq($k\t:$h $r{$k}) and next if @r == 2; for my $o (1..$#r) {my @rr = @r; splice @rr, $o, 1; my ($rr,$kk) = join q( ), @rr; print qq($k\t<= $kk ), $r[$o] if $kk = $k{$rr}} } }" compose3c-NamesList.txt >compose4-NamesList.txt
9989            
9990            
9991             # Recursively decompose; :- composition, := compatibility composition
9992             perl -wlne "/^(.+?)\s+([-=])>\s+(.+?)\s*$/ or die; $t{$1} = $3; $h{$1}=$2 if $2 eq q(=); sub t($); sub t($) {my $i=shift; return $n{$i} if exists $n{$i}; return $i unless $t{$i}; $t{$i} =~ /^(\S+)(.*)/ or die $i; my @rr = t($1); return $rr[0].$2, $h{$i} || $rr[1]} END{my(@rr, $h); @rr=t($_), $h = $rr[1] || q(-), (@i = split /\s+/, $rr[0]), print qq($_\t:$h ), join q( ), $i[0], sort @i[1..$#i] for sort {hex $a <=> hex $b} keys %t}" compose2b-NamesList.txt >compose3e-NamesList.txt
9993             # Recompose parts to get "merge 2" decompositions; <- and <= if involve composition, :- and := otherwise
9994             perl -wlne "($k,$h,$r)=/^(\S+)\s+:([-=])\s+(.*?)\s*$/ or die; $k{$r} = $k; $r{$k}=$r; $hk{$k}=$hr{$r}= ($h eq q(=) ? q(=) : undef); END { for my $k (sort {hex $a <=> hex $b} keys %r) { my $h = $hk{$k} || q(-); my @r = split /\s+/, $r{$k}; print qq($k\t:$h $r{$k}) and next if @r == 2; my %s; for my $o (1..$#r) {my @rr = @r; next if $s{$rr[$o]}++; splice @rr, $o, 1; my ($rr,$kk) = join q( ), @rr; print qq($k\t<), $hk{$k} || $hr{$kk} || q(-), qq( $kk ), $r[$o] if $kk = $k{$rr}} } }" compose3e-NamesList.txt >compose4b-NamesList.txt
9995             # List of possible modifiers for each char, introduced by -->, separated by //
9996             perl -C31 -wlne "sub f($) {my $i=shift; return $i unless $i=~/^\w/; qq($i ).chr hex $i} sub ff($) {join q( ), map f($_), split /\s+/, shift} my($c,$B,$m) = /^(\S+)\s+[:<][-=]\s+(\S+)\s+(\S+)\s*$/ or die; push @{$c{$B}}, ff qq($m $c); END { for my $k (sort {hex $a <=> hex $b} keys %c) { print f($k), qq(\t--> ), join q( // ), sort @{$c{$k}} } }" compose4b-NamesList.txt >compose5d-NamesList.txt
9997             # Find what appears as modifiers:
9998             perl -F"\s+//\s+|\s+-->\s+" -wlane "s/\s+[0-9A-F]{4,}(\s\S+)?\s*$//, print for @F[1..$#F]" ! | sort -u >!-words
9999            
10000             Duplicate: 0296 <== [ 003F ] ==> <1 0295> (prefered)
10001             <ʖ> LATIN LETTER INVERTED GLOTTAL STOP
10002             <ʕ> LATIN LETTER PHARYNGEAL VOICED FRICATIVE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10003             Duplicate: 0384 <== [ 0020 0301 ] ==> <1 00B4> (prefered)
10004             <΄> GREEK TONOS
10005             <´> ACUTE ACCENT at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10006             Duplicate: 1D43 <== [ 0061 ] ==> <1 00AA> (prefered)
10007             <ᵃ> MODIFIER LETTER SMALL A
10008             <ª> FEMININE ORDINAL INDICATOR at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10009             Duplicate: 1D52 <== [ 006F ] ==> <1 00BA> (prefered)
10010             <ᵒ> MODIFIER LETTER SMALL O
10011             <º> MASCULINE ORDINAL INDICATOR at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10012             Duplicate: 1D9F <== [ 0065 ] ==> <1 1D4C> (prefered)
10013             <ᶟ> MODIFIER LETTER SMALL REVERSED OPEN E
10014             <ᵌ> MODIFIER LETTER SMALL TURNED OPEN E at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10015             Duplicate: 1E7A <== [ 0055 0304 0308 ] ==> <0 01D5> (prefered)
10016             <Ṻ> LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
10017             <Ǖ> LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10018             Duplicate: 1E7B <== [ 0075 0304 0308 ] ==> <0 01D6> (prefered)
10019             <ṻ> LATIN SMALL LETTER U WITH MACRON AND DIAERESIS
10020             <ǖ> LATIN SMALL LETTER U WITH DIAERESIS AND MACRON at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10021             Duplicate: 1FBF <== [ 0020 0313 ] ==> <1 1FBD> (prefered)
10022             <᾿> GREEK PSILI
10023             <᾽> GREEK KORONIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10024             Duplicate: 2007 <== [ 0020 ] ==> <1 00A0> (prefered)
10025             < > FIGURE SPACE
10026             < > NO-BREAK SPACE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10027             Duplicate: 202F <== [ 0020 ] ==> <1 00A0> (prefered)
10028             < > NARROW NO-BREAK SPACE
10029             < > NO-BREAK SPACE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10030             Duplicate: 2113 <== [ 006C ] ==> <1 1D4C1> (prefered)
10031             <ℓ> SCRIPT SMALL L
10032             <퓁> MATHEMATICAL SCRIPT SMALL L at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10033             Duplicate: 24B8 <== [ 0043 ] ==> <1 1F12B> (prefered)
10034             <Ⓒ> CIRCLED LATIN CAPITAL LETTER C
10035             <> CIRCLED ITALIC LATIN CAPITAL LETTER C at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10036             Duplicate: 24C7 <== [ 0052 ] ==> <1 1F12C> (prefered)
10037             <Ⓡ> CIRCLED LATIN CAPITAL LETTER R
10038             <> CIRCLED ITALIC LATIN CAPITAL LETTER R at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10039             Duplicate: 2E1E <== [ 007E ] ==> <1 2A6A> (prefered)
10040             <⸞> TILDE WITH DOT ABOVE
10041             <⩪> TILDE OPERATOR WITH DOT ABOVE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10042             Duplicate: 33B9 <== [ 004D 0056 ] ==> <1 1F14B> (prefered)
10043             <㎹> SQUARE MV MEGA
10044             <> SQUARED MV at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10045             Duplicate: FC03 <== [ 064A 0649 0654 ] ==> <1 FBF9> (prefered)
10046             <ﰃ> ARABIC LIGATURE YEH WITH HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM
10047             <ﯹ> ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10048             Duplicate: FC68 <== [ 064A 0649 0654 ] ==> <1 FBFA> (prefered)
10049             <ﱨ> ARABIC LIGATURE YEH WITH HAMZA ABOVE WITH ALEF MAKSURA FINAL FORM
10050             <ﯺ> ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH HAMZA ABOVE WITH ALEF MAKSURA FINAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10051             Duplicate: FD55 <== [ 062A 062C 0645 ] ==> <1 FD50> (prefered)
10052             <ﵕ> ARABIC LIGATURE TEH WITH MEEM WITH JEEM INITIAL FORM
10053             <ﵐ> ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10054             Duplicate: FD56 <== [ 062A 062D 0645 ] ==> <1 FD53> (prefered)
10055             <ﵖ> ARABIC LIGATURE TEH WITH MEEM WITH HAH INITIAL FORM
10056             <ﵓ> ARABIC LIGATURE TEH WITH HAH WITH MEEM INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10057             Duplicate: FD57 <== [ 062A 062E 0645 ] ==> <1 FD54> (prefered)
10058             <ﵗ> ARABIC LIGATURE TEH WITH MEEM WITH KHAH INITIAL FORM
10059             <ﵔ> ARABIC LIGATURE TEH WITH KHAH WITH MEEM INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10060             Duplicate: FD5D <== [ 0633 062C 062D ] ==> <1 FD5C> (prefered)
10061             <ﵝ> ARABIC LIGATURE SEEN WITH JEEM WITH HAH INITIAL FORM
10062             <ﵜ> ARABIC LIGATURE SEEN WITH HAH WITH JEEM INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10063             Duplicate: FD87 <== [ 0644 062D 0645 ] ==> <1 FD80> (prefered)
10064             <ﶇ> ARABIC LIGATURE LAM WITH MEEM WITH HAH FINAL FORM
10065             <ﶀ> ARABIC LIGATURE LAM WITH HAH WITH MEEM FINAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10066             Duplicate: FD8C <== [ 0645 062C 062D ] ==> <1 FD89> (prefered)
10067             <ﶌ> ARABIC LIGATURE MEEM WITH JEEM WITH HAH INITIAL FORM
10068             <ﶉ> ARABIC LIGATURE MEEM WITH HAH WITH JEEM INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10069             Duplicate: FD92 <== [ 0645 062C 062E ] ==> <1 FD8E> (prefered)
10070             <ﶒ> ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM
10071             <ﶎ> ARABIC LIGATURE MEEM WITH KHAH WITH JEEM INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10072             Duplicate: FDB5 <== [ 0644 062D 0645 ] ==> <1 FD88> (prefered)
10073             <ﶵ> ARABIC LIGATURE LAM WITH HAH WITH MEEM INITIAL FORM
10074             <ﶈ> ARABIC LIGATURE LAM WITH MEEM WITH HAH INITIAL FORM at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10075             Duplicate: FE34 <== [ 005F ] ==> <1 FE33> (prefered)
10076             <︴> PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
10077             <︳> PRESENTATION FORM FOR VERTICAL LOW LINE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 4224, <$f> line 38879.
10078            
10079             Duplicate: 0273 <== [ 006E ] ==> <1 014B> (prefered)
10080             <ɳ> LATIN SMALL LETTER N WITH RETROFLEX HOOK
10081             <ŋ> LATIN SMALL LETTER ENG at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10082             Duplicate: 1DAF <== [ 006E ] ==> <1 1D51> (prefered)
10083             <ᶯ> MODIFIER LETTER SMALL N WITH RETROFLEX HOOK
10084             <ᵑ> MODIFIER LETTER SMALL ENG at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10085             Duplicate: 2040 <== [ 007E ] ==> <1 203F> (prefered)
10086             <⁀> CHARACTER TIE
10087             <‿> UNDERTIE at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10088             Duplicate: 207F <== [ 004E ] ==> <1 014A> (prefered)
10089             <ⁿ> SUPERSCRIPT LATIN SMALL LETTER N
10090             <Ŋ> LATIN CAPITAL LETTER ENG at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10091             Duplicate: 224B <== [ 007E ] ==> <1 2248> (prefered)
10092             <≋> TRIPLE TILDE
10093             <≈> ALMOST EQUAL TO at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10094             Duplicate: 2256 <== [ 003D ] ==> <1 224D> (prefered)
10095             <≖> RING IN EQUAL TO
10096             <≍> EQUIVALENT TO at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10097             Duplicate: 2257 <== [ 003D ] ==> <1 224D> (prefered)
10098             <≗> RING EQUAL TO
10099             <≍> EQUIVALENT TO at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10100             Duplicate: 225E <== [ 225F ] ==> <1 225C> (prefered)
10101             <≞> MEASURED BY
10102             <≜> DELTA EQUAL TO at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10103             Duplicate: 2263 <== [ 003D ] ==> <1 2261> (prefered)
10104             <≣> STRICTLY EQUIVALENT TO
10105             <≡> IDENTICAL TO at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10106             Duplicate: 2277 <== [ 003D 0338 ] ==> <1 2276> (prefered)
10107             <≷> GREATER-THAN OR LESS-THAN
10108             <≶> LESS-THAN OR GREATER-THAN at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10109             Duplicate: 2279 <== [ 003D ] ==> <1 2278> (prefered)
10110             <≹> NEITHER GREATER-THAN NOR LESS-THAN
10111             <≸> NEITHER LESS-THAN NOR GREATER-THAN at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10112             Duplicate: 2279 <== [ 003D 0338 0338 ] ==> <1 2278> (prefered)
10113             <≹> NEITHER GREATER-THAN NOR LESS-THAN
10114             <≸> NEITHER LESS-THAN NOR GREATER-THAN at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10115             Duplicate: 2982 <== [ 003A ] ==> <1 2236> (prefered)
10116             <⦂> Z NOTATION TYPE COLON
10117             <∶> RATIO at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10118             Duplicate: 2993 <== [ 0028 ] ==> <1 2985> (prefered)
10119             <⦓> LEFT ARC LESS-THAN BRACKET
10120             <⦅> LEFT WHITE PARENTHESIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10121             Duplicate: 2994 <== [ 0029 ] ==> <1 2986> (prefered)
10122             <⦔> RIGHT ARC GREATER-THAN BRACKET
10123             <⦆> RIGHT WHITE PARENTHESIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10124             Duplicate: 2995 <== [ 0029 ] ==> <1 2986> (prefered)
10125             <⦕> DOUBLE LEFT ARC GREATER-THAN BRACKET
10126             <⦆> RIGHT WHITE PARENTHESIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10127             Duplicate: 2996 <== [ 0028 ] ==> <1 2985> (prefered)
10128             <⦖> DOUBLE RIGHT ARC LESS-THAN BRACKET
10129             <⦅> LEFT WHITE PARENTHESIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10130             Duplicate: 29BC <== [ 0025 ] ==> <1 2030> (prefered)
10131             <⦼> CIRCLED ANTICLOCKWISE-ROTATED DIVISION SIGN
10132             <‰> PER MILLE SIGN at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10133             Duplicate: 2A17 <== [ 222B ] ==> <1 2A10> (prefered)
10134             <⨗> INTEGRAL WITH LEFTWARDS ARROW WITH HOOK
10135             <⨐> CIRCULATION FUNCTION at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10136             Duplicate: 2A34 <== [ 00D7 ] ==> <1 22C9> (prefered)
10137             <⨴> MULTIPLICATION SIGN IN LEFT HALF CIRCLE
10138             <⋉> LEFT NORMAL FACTOR SEMIDIRECT PRODUCT at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10139             Duplicate: 2A35 <== [ 00D7 ] ==> <1 22CA> (prefered)
10140             <⨵> MULTIPLICATION SIGN IN RIGHT HALF CIRCLE
10141             <⋊> RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10142             Duplicate: 2A36 <== [ 00D7 ] ==> <1 2A2F> (prefered)
10143             <⨶> CIRCLED MULTIPLICATION SIGN WITH CIRCUMFLEX ACCENT
10144             <⨯> VECTOR OR CROSS PRODUCT at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10145             Duplicate: 2A50 <== [ 00D7 ] ==> <1 2A33> (prefered)
10146             <⩐> CLOSED UNION WITH SERIFS AND SMASH PRODUCT
10147             <⨳> SMASH PRODUCT at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10148             Duplicate: 2ACF <== [ 25C1 ] ==> <1 2A1E> (prefered)
10149             <⫏> CLOSED SUBSET
10150             <⨞> LARGE LEFT TRIANGLE OPERATOR at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10151             Duplicate: 2AFB <== [ 2223 ] ==> <1 2AF4> (prefered)
10152             <⫻> TRIPLE SOLIDUS BINARY RELATION
10153             <⫴> TRIPLE VERTICAL BAR BINARY RELATION at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10154             Duplicate: 2AFB <== [ 007C ] ==> <1 2AF4> (prefered)
10155             <⫻> TRIPLE SOLIDUS BINARY RELATION
10156             <⫴> TRIPLE VERTICAL BAR BINARY RELATION at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10157             Duplicate: 2AFD <== [ 002F ] ==> <1 2215> (prefered)
10158             <⫽> DOUBLE SOLIDUS OPERATOR
10159             <∕> DIVISION SLASH at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10160             Duplicate: 2AFF <== [ 007C ] ==> <1 2AFE> (prefered)
10161             <⫿> N-ARY WHITE VERTICAL BAR
10162             <⫾> WHITE VERTICAL BAR at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10163             Duplicate: 3018 <== [ 0028 ] ==> <1 27EE> (prefered)
10164             <〘> LEFT WHITE TORTOISE SHELL BRACKET
10165             <⟮> MATHEMATICAL LEFT FLATTENED PARENTHESIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10166             Duplicate: 3019 <== [ 0029 ] ==> <1 27EF> (prefered)
10167             <〙> RIGHT WHITE TORTOISE SHELL BRACKET
10168             <⟯> MATHEMATICAL RIGHT FLATTENED PARENTHESIS at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10169             Duplicate: A760 <== [ 0059 ] ==> <1 A73C> (prefered)
10170             <Ꝡ> LATIN CAPITAL LETTER VY
10171             <Ꜽ> LATIN CAPITAL LETTER AY at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10172             Duplicate: A761 <== [ 0079 ] ==> <1 A73D> (prefered)
10173             <ꝡ> LATIN SMALL LETTER VY
10174             <ꜽ> LATIN SMALL LETTER AY at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10175             Duplicate: 1D4C1 <== [ 006C ] ==> <1 2113> (prefered)
10176             <𝓁> MATHEMATICAL SCRIPT SMALL L
10177             <ℓ> SCRIPT SMALL L at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10178             Duplicate: 1F12B <== [ 0043 ] ==> <1 24B8> (prefered)
10179             <🄫> CIRCLED ITALIC LATIN CAPITAL LETTER C
10180             <Ⓒ> CIRCLED LATIN CAPITAL LETTER C at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10181             Duplicate: 1F12C <== [ 0052 ] ==> <1 24C7> (prefered)
10182             <🄬> CIRCLED ITALIC LATIN CAPITAL LETTER R
10183             <Ⓡ> CIRCLED LATIN CAPITAL LETTER R at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10184             Duplicate: 1F14B <== [ 004D 0056 ] ==> <1 33B9> (prefered)
10185             <🅋> SQUARED MV
10186             <㎹> SQUARE MV MEGA at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 5263, <$f> line 38876.
10187             Duplicate: A789 <== [ 003A ] ==> <1 02F8> (prefered)
10188             <꞉> MODIFIER LETTER COLON
10189             <˸> MODIFIER LETTER RAISED COLON at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 8032, <$f> line 39278.
10190             Duplicate: 02EF <== [ 0020 0306 ] ==> <1 02EC> (prefered)
10191             <˯> 02EF MODIFIER LETTER LOW DOWN ARROWHEAD
10192             <ˬ> 02EC MODIFIER LETTER VOICING at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 8634, <$f> line 39278.
10193             Duplicate: 2B95 <== [ 2192 ] ==> <1 27A1> (prefered)
10194             <⮕> 2B95 RIGHTWARDS BLACK ARROW
10195             <➡> 27A1 BLACK RIGHTWARDS ARROW at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 9828, <$f> line 43944.
10196             Duplicate: 1F7C6 <== [ 2727 ] ==> <1 2726> (prefered)
10197             <🟆> 1F7C6 FOUR POINTED BLACK STAR
10198             <✦> 2726 BLACK FOUR POINTED STAR at UI-KeyboardLayout/lib/UI/KeyboardLayout.pm line 9828, <$f> line 43944.
10199             EOR
10200            
10201             my (%known_dups) = map +($_,1), qw(0296 0384 1D43 1D52 1D9F 1E7A 1E7B 1FBF 2007
10202             202F 2113 24B8 24C7 2E1E 33B9 FC03 FC68 FD55 FD56 FD57 FD5D FD87 FD8C
10203             FD92 FDB5 FE34 2B95 1F7C6
10204             0273 1DAF 2040 207F 224B 2256 2257 225E 2263 2277 2279 2982 2993 2994 2995 2996 29BC
10205             2A17 2A34 2A35 2A36 2A50 2ACF 2AFB 2AFD 2AFF 3018 3019 A760 A761 1D4C1 1F12B 1F12C 1F14B A789 02EF); # As of Unicode 6.2 (questionable: 2982 2ACF)
10206            
10207             sub decompose_r($$$$); # recursive
10208             sub decompose_r($$$$) { # returns array ref, elts are [$compat, @expand]
10209 0     0 0   my ($self, $t, $i, $cache, @expand) = (shift, shift, shift, shift);
10210 0 0         return $cache->{$i} if $cache->{$i};
10211 0 0         return $cache->{$i} = [[0, $i]] unless my $In = $t->{$i};
10212 0           for my $in (@$In) {
10213 0           my $compat = $in->[0];
10214             #warn "i=<$i>, compat=<$compat>, rest=<$in->[1]>";
10215 0           my $expand_in = $self->decompose_r($t, $in->[1], $cache);
10216 0           $expand_in = $self->deep_copy($expand_in);
10217             #warn "Got: $in->[1] -> <@$expand> from $i = <@$in>";
10218 0           for my $expand (@$expand_in) {
10219 0 0 0       warn "Expansion funny: <@$expand>" if @$expand < 2 or $expand->[0] !~ /^[01]$/;
10220 0           $compat = ( shift(@$expand) | $compat);
10221 0 0         warn "!Malformed: $i -> $compat <@$expand>" if $expand->[0] =~ /^[01]$/;
10222 0           push @expand, [ $compat, @$expand, @$in[2..$#$in] ];
10223             }
10224             }
10225 0           return $cache->{$i} = \@expand;
10226             }
10227            
10228 0 0   0 0   sub fromHEX ($) { my $i = shift; $i =~ /^\w/ and hex $i}
  0            
10229            
10230             my %operators = (DOT => ['MIDDLE DOT', 'FULL STOP'], RING => ['DEGREE SIGN'], DIAMOND => ['WHITE DIAMOND'],
10231             'DOUBLE SOLIDUS' => ['PARALLEL TO'], MINUS => ['HYPHEN-MINUS']);
10232            
10233             # THIS IS A MULTIMAP (later entry for a TARGER wins)! ■□ ◼◻ ◾◽ ◇◆◈⟐⟡⟢⟣⌺ △▲▵▴▽▼▿▾⟁⧊⧋
10234             my %uni_manual = (phonetized => [qw( 0 ə s ʃ z ʒ j ɟ v ⱱ n ɳ N ⁿ n ŋ V ɤ ! ǃ ? ʔ ¿ ʕ | ǀ f ʄ F ǂ x ʘ X ǁ
10235             g ʛ m ɰ h ɧ d ᶑ C ʗ)], # z ɮ (C ʗ is "extras")
10236             phonetize2 => [qw( e ɘ E ɞ i ɻ I ɺ)], # Use some capitalized sources (no uc variants)...
10237             phonetize3 => [qw( a ɒ A Ɒ e ɜ E ɝ)], # Use some capitalized sources (no uc variants)...
10238             phonetize0 => [qw( e ə)],
10239             paleo => [qw( & ⁊ W Ƿ w ƿ h ƕ H Ƕ G Ȝ g ȝ )],
10240             # cut&paste from http://en.wikipedia.org/wiki/Coptic_alphabet
10241             # perl -C31 -wne "chomp; ($uc,$lc,undef,undef,$gr) = split /\t/;($ug,$lg)=split /,\s+/, $gr; print qq( $lg $lc $ug $uc)" coptic2 >coptic-tr
10242             # Fix stigma, koppa; p/P are actually 900; a/A are for AKHMIMIC KHEI (variant of KHEI on h/H);
10243             # 2e17 ⸗ double hyphen; sampi's are duplicated in both places
10244             greek2coptic => [qw(
10245             α ⲁ Α Ⲁ β ⲃ Β Ⲃ γ ⲅ Γ Ⲅ δ ⲇ Δ Ⲇ ε ⲉ Ε Ⲉ ϛ ⲋ Ϛ Ⲋ ζ ⲍ Ζ Ⲍ η ⲏ Η Ⲏ ϙ ϭ Ϙ Ϭ ϡ ⳁ Ϡ Ⳁ
10246             θ ⲑ Θ Ⲑ ι ⲓ Ι Ⲓ κ ⲕ Κ Ⲕ λ ⲗ Λ Ⲗ μ ⲙ Μ Ⲙ ν ⲛ Ν Ⲛ ξ ⲝ Ξ Ⲝ ο ⲟ Ο Ⲟ
10247             π ⲡ Π Ⲡ ρ ⲣ Ρ Ⲣ σ ⲥ Σ Ⲥ τ ⲧ Τ Ⲧ υ ⲩ Υ Ⲩ φ ⲫ Φ Ⲫ χ ⲭ Χ Ⲭ ψ ⲯ Ψ Ⲯ ω ⲱ Ω Ⲱ )],
10248             latin2extracoptic => [qw( - ⸗
10249             s ϣ S Ϣ f ϥ F Ϥ x ϧ X Ϧ h ϩ H Ϩ j ϫ J Ϫ t ϯ T Ϯ p ⳁ P Ⳁ a ⳉ A Ⳉ )],
10250             addline => [qw( 0 ∅ ∅ ⦱ + ∦ ∫ ⨏ • ⊝ / ⫽ ⫽ ⫻ ∮ ⨔ × ⨳ × ⩐ )], # ∮ ⨔ a cheat
10251             addhline => [qw( = ≣ = ≡ ≡ ≣ † ‡ + ∦ / ∠ | ∟ . ∸ ∨ ⊻ ∧ ⊼ ◁ ⩤ * ⩮
10252             ⊨ ⫢ ⊦ ⊧ ⊤ ⫧ ⊥ ⫨ ⊣ ⫤ ⊳ ⩥ ⊲ ⩤ ⋄ ⟠ ∫ ⨍ ⨍ ⨎ • ⦵ ( ∈ ) ∋
10253             ∪ ⩌ ∩ ⩍ ≃ ≅ ⨯ ⨲ )], # conflict with modifiers: qw( _ ‗ ); ( ∈ ) ∋ not very useful - but logical - with ∈∋ as bluekeys... 2 ƻ destructive
10254             addvline => [qw( ⊢ ⊩ ⊣ ⫣ ⊤ ⫪ ⊥ ⫫ □ ⎅ | ‖ ‖ ⦀ ∫ ⨒ ≢ ⩨ ⩨ ⩩ • ⦶
10255             \ ⫮ ° ⫯ . ⫰ ⫲ ⫵ ∞ ⧞ = ⧧ ⧺ ⧻ + ⧺ ∩ ⨙ ∪ ⨚ 0 ⦽ _ ⟂ _ ∟ )], # + ⫲
10256             addtilde => [qw( 0 ∝ / ∡ \ ∢ ∫ ∱ ∮ ⨑ : ∻ - ≂ ≠ ≆ ~ ≋ ~ ≈ ∼ ≈ ≃ ≊ ≈ ≋ = ≌
10257             ≐ ≏ ( ⟅ ) ⟆ ∧ ⩄ ∨ ⩅ ∩ ⩆ ∪ ⩇ )], # not on 2A**
10258             adddot => [qw( : ⫶ " ∵ ∫ ⨓ ∮ ⨕ □ ⊡ ◇ ⟐ ( ⦑ ) ⦒ ≟ ≗ ≐ ≑
10259             - ┄ — ┄ ─ ┈ ━ ┅ ═ ┉ | ┆ │ ┊ ┃ ┇ ║ ┋ )], # ⫶ is tricolon, not vert. … "; (m-)dash/bar, (b)[h/v]draw, bold/dbl
10260             adddottop => [qw( + ∔ )],
10261             addleft => [qw( = ≔ × ⨴ × ⋉ \ ⋋ + ⨭ → ⧴ ∫ ⨐ ∫ ⨗ ∮ ∳ ⊂ ⟈ ⊃ ⫐ ⊳ ⧐ ⊢ ⊩ ⊩ ⊪ ⊣ ⟞
10262             ◇ ⟢ ▽ ⧨ ≡ ⫢ • ⥀ ⋈ ⧑ ≟ ⩻ ≐ ≓ | ⩘ ≔ ⩴ ⊲ ⫷)], # × ⨴ is hidden
10263             addright => [qw( = ≕ × ⨵ × ⋊ / ⋌ + ⨮ - ∹ ∫ ⨔ ∮ ∲ ⊂ ⫏ ⊃ ⟉ ⊲ ⧏ ⊢ ⟝ ⊣ ⫣
10264             ◇ ⟣ △ ⧩ • ⥁ ⋈ ⧒ ≟ ⩼ ≐ ≒ | ⩗ ⊳ ⫸ : ⧴)], # × ⨵ is hidden
10265             sharpen => [qw( < ≺ > ≻ { ⊰ } ⊱ ( ⟨ ) ⟩ ∧ ⋏ ∨ ⋎ . ⋄ ⟨ ⧼ ⟩ ⧽ ∫ ⨘
10266             ⊤ ⩚ ⊥ ⩛ ◇ ⟡ ▽ ⧍ • ⏣ ≟ ≙ + ⧾ - ⧿)], # ⋆
10267             unsharpen => [qw( < ⊏ > ⊐ ( ⟮ ) ⟯ ∩ ⊓ ∪ ⊔ ∧ ⊓ ∨ ⊔ . ∷ ∫ ⨒ ∮ ⨖ { ⦉ } ⦊
10268             / ⧄ \ ⧅ ° ⧇ ◇ ⌺ • ⌼ ≟ ≚ ≐ ∺ ( 〘 ) 〙 )], # + ⊞ - ⊟ * ⊠ . ⊡ × ⊠, ( ⦗ ) ⦘ ( 〔 ) 〕
10269             whiten => [qw( [ ⟦ ] ⟧ ( ⟬ ) ⟭ { ⦃ } ⦄ ⊤ ⫪ ⊥ ⫫ ; ⨟ ⊢ ⊫ ⊣ ⫥ ⊔ ⩏ ⊓ ⩎ ∧ ⩓ ∨ ⩔ _ ‗ = ≣
10270             : ⦂ | ⫾ | ⫿ • ○ < ⪡ > ⪢ ⊓ ⩎ ⊔ ⩏ )], # or blacken □ ■ ◻ ◼ ◽ ◾ ◇ ◆ △ ▲ ▵ ▴ ▽ ▼ ▿ ▾
10271             quasisynon => [qw( ∈ ∊ ∋ ∍ ≠ ≶ ≠ ≷ = ≸ = ≹ ≼ ⊁ ≽ ⊀ ≺ ⋡ ≻ ⋠ < ≨ > ≩ Δ ∆
10272             ≤ ⪕ ≥ ⪖ ⊆ ⊅ ⊇ ⊄ ⊂ ⊉ ⊃ ⊈ ⊏ ⋣ ⊐ ⋢ ⊳ ⋬ ⊲ ⋭ … ⋯ / ⟋ \ ⟍
10273             ( ⦇ ) ⦈ [ ⨽ ] ⨼ ∅ ⌀
10274             ⊤ ⫟ ⊥ ⫠ ⟂ ⫛ □ ∎ ▽ ∀ ‖ ∥ ≟ ≞ ≟ ≜ ~ ‿ ~ ⁀ ■ ▬ )], # ( ⟬ ) ⟭ < ≱ > ≰ ≤ ≯ ≥ ≮ * ⋆
10275             amplify => [qw( < ≪ > ≫ ≪ ⋘ ≫ ⋙ ∩ ⋒ ∪ ⋓ ⊂ ⋐ ⊃ ⋑ ( ⟪ ) ⟫ ∼ ∿ = ≝ ∣ ∥ . ⋮
10276             ∈ ∊ ∋ ∍ - − / ∕ \ ∖ √ ∛ ∛ ∜ ∫ ∬ ∬ ∭ ∭ ⨌ ∮ ∯ ∯ ∰ : ⦂ ` ⎖
10277             : ∶ ≈ ≋ ≏ ≎ ≡ ≣ × ⨯ + ∑ Π ∏ Σ ∑ ρ ∐ ∐ ⨿ ⊥ ⟘ ⊤ ⟙ ⟂ ⫡ ; ⨾ □ ⧈ ◇ ◈
10278             ⊲ ⨞ ⊢ ⊦ △ ⟁ ∥ ⫴ ⫴ ⫼ / ⫽ ⫽ ⫻ • ● ⊔ ⩏ ⊓ ⩎ ∧ ⩕ ∨ ⩖ ▷ ⊳ ◁ ⊲
10279             ⋉ ⧔ ⋊ ⧕ ⋈ ⧓ ⪡ ⫷ ⪢ ⫸ ≟ ≛ ≐ ≎ ⊳ ⫐ ⊲ ⫏ { ❴ } ❵ × ⨶ )], # ⋆ ☆ ⋆ ★ ; ˆ ∧ conflicts with combining-ˆ; * ∏ stops propagation *->×->⋈, : ⦂ hidden; ∥ ⫴; × ⋈ not needed; ∰ ⨌ - ???; ≃ ≌ not useful
10280             turnaround => [qw( ∧ ∨ ∩ ∪ ∕ ∖ ⋏ ⋎ ∼ ≀ ⋯ ⋮ … ⋮ ⋰ ⋱ _ ‾
10281             8 ∞ ∆ ∇ Α ∀ Ε ∃ ∴ ∵ ≃ ≂
10282             ∈ ⫛ ∈ ∋ ∋ ⫙ ∉ ∌ ∊ ∍ ∏ ∐ ± ∓ ⊓ ⊔ ≶ ≷ ≸ ≹ ⋀ ⋁ ⋂ ⋃ ⋉ ⋊ ⋋ ⋌ ⋚ ⋛ ≤ ⋜ ≥ ⋝ ≼ ⋞ ≽ ⋟ )], # XXXX Can't do both directions
10283             superize => [qw( h ʱ ' ʹ < ˂ > ˃ ^ ˑ ( ˓ ) ˒ ⊢ ˫ 0 ᵊ * ˟ × ˟ ~ ﹋ ≈ ﹌ ─ ‾ □ ⸋ . ⸳)], # Additions to !
10284             subize => [qw( < ˱ > ˲ _ ˍ ' ˏ " ˶ ˵ ˵ . ˳ ° ˳ ˘ ˯ ˘ ˬ ( ˓ ) ˒ 0 ₔ ~ ﹏ ═ ‗ , ¸)], # "
10285             subize2 => [qw( < ˂ > ˃ )], # these are in older Unicode, so would override if in subize
10286             # Most of these are for I/O on very ancient systems:
10287             aplbox => [qw( | ⌷ = ⌸ ÷ ⌹ ◇ ⌺ ∘ ⌻ ○ ⌼ / ⍁ \ ⍂ < ⍃ > ⍄ ← ⍇ → ⍈ ∨ ⍌ Δ ⍍ ↑ ⍐ ∧ ⍓ ∇ ⍔ ↓ ⍗ ' ⍞ : ⍠ ≠ ⍯ ? ⍰ ∅ ⎕ )],
10288             round => [qw( < ⊂ > ⊃ = ≖ = ≗ = ≍ ∫ ∮ ∬ ∯ ∭ ∰ ∼ ∾ - ⊸ □ ▢ ∥ ≬ ‖ ≬ • ⦁
10289             … ∴ ≡ ≋ ⊂ ⟃ ⊃ ⟄ ⊤ ⫙ ⊥ ⟒ ( ⦖ ) ⦕ ( ⦓ ) ⦔ ( ⦅ ) ⦆ ⊳ ⪧ ⊲ ⪦ ≟ ≘ ≐ ≖ . ∘
10290             [ ⟬ ] ⟭ { ⧼ } ⧽ % ⦼ % ‰ × ⦻ ⨯ ⨷ ∧ ∩ ∨ ∪ )]); # = ≈
10291            
10292             sub parse_NameList ($$) {
10293 0     0 0   my ($self, $f, $k, $kk, $name, $_c, %basic, %cached_full, %compose, $version,
10294             %into2, %ordered, %candidates, %N, %comp2, %NM, %BL, $BL, %G, %NS) = (shift, shift);
10295 0           binmode $f; # NameList.txt is in Latin-1, not unicode
10296 0           while (my $s = <$f>) { # extract compositions, add to char downgrades; -> composition, => compatibility composition
10297 0 0         if ($s =~ /^\@\@\@\s+The\s+Unicode\s+Standard\s+(.*?)\s*$/i) {
10298 0           $version = $1;
10299             }
10300 0 0         if ($s =~ /^([\da-f]+)\b\s*(.*?)\s*$/i) {
10301 0           my ($K, $Name, $C, $t) = ($1, $2, $self->charhex2key("$1"));
10302 0           $N{$Name} = $K;
10303 0           $NM{$C} = $Name; # Not needed for compositions, but handy for user-visible output
10304 0           $BL{$C} = $self->charhex2key($BL); # Used for sorting
10305             # Finish processing of preceding text
10306 0 0         if (defined $kk) { # Did not see (official) decomposition
10307             # warn("see combining: $K $C $Name"),
10308 0 0 0       $NS{$_c}++ if $name =~ /\bCOMBINING\b/ and not ($_c =~ /\p{NonSpacingMark}/);
10309 0 0         if ($name =~ /^(.*?)\s+(?:(WITH)\s+|(?=(?:OVER|ABOVE|PRECEDED\s+BY|BELOW(?=\s+LONG\s+DASH))\s+\b(?!WITH\b|AND\b)))(.*?)\s*$/) {
10310 0           push @{$candidates{$k}}, [$1, $3];
  0            
10311 0           my ($b, $with, $ext) = ($1, $2, $3);
10312 0           my @ext = split /\s+AND\s+/, $ext;
10313 0 0 0       if ($with and @ext > 1) {
10314 0           for my $i (0..$#ext) {
10315 0           my @ext1 = @ext;
10316 0           splice @ext1, $i, 1;
10317 0           push @{$candidates{$k}}, ["$b WITH ". (join ' AND ', @ext1), $ext[$i]];
  0            
10318             }
10319             }
10320             }
10321 0 0         if ($name =~ /^(.*)\s+(?=OR\s)(.*?)\s*$/) { # Find the latest possible...
10322 0           push @{$candidates{$k}}, [$1, $2];
  0            
10323             }
10324 0 0         if (($t = $name) =~ s/\b(COMBINING(?=\s+CYRILLIC\s+LETTER)|BARRED|SLANTED|APPROXIMATELY|ASYMPTOTICALLY|(?
10325 0           push @{$candidates{$k}}, [$t, "calculated-$+"];
  0            
10326 0 0         $candidates{$k}[-1][1] .= '-epigraphic' if $t =~ /\bEPIGRAPHIC\b/; # will be massaged away from $t later
10327             $candidates{$k}[-1][0] =~ s/\s+SYMBOL$// and $candidates{$k}[-1][1] .= '-symbol'
10328 0 0 0       if $candidates{$k}[-1][1] =~ /\bLUNATE\b/;
10329             # warn("smallcapital $name"),
10330 0 0         $candidates{$k}[-1][1] .= '-smallcaps' if $t =~ /\bSMALL\s+CAPITAL\b/; # will be massaged away from $t later
10331             # warn "Candidates: <$candidates{$k}[0]>; <$candidates{$k}[1]>";
10332             }
10333 0 0         if (($t = $name) =~ s/\b(WHITE|BLACK|CIRCLED)\s+//) {
10334 0           push @{$candidates{$k}}, [$t, "fake-$1"];
  0            
10335             }
10336 0 0         if (($t = $name) =~ s/\bBLACK\b/WHITE/) {
10337 0           push @{$candidates{$k}}, [$t, "fake-black"];
  0            
10338             }
10339 0 0         if (($t = $name) =~ s/^(?:RAISED|MODIFIER\s+LETTER(?:\s+RAISED)?(\s+LOW)?)\s+//) {
10340 0 0         push @{$candidates{$k}}, [$t, $1 ? "fake-sub" : "fake-super"];
  0            
10341             }
10342 0 0         if (($t = $name) =~ s/\bBUT\s+NOT\b/OR/) {
10343 0           push @{$candidates{$k}}, [$t, "fake-but-not"];
  0            
10344             }
10345 0 0         if (($t = $name) =~ s/(^LATIN\b.*\b\w)UM((?:\s+ROTUNDA)?)$/$1$2/) { # Paleo-latin
10346 0           push @{$candidates{$k}}, [$t, "fake-umify"];
  0            
10347             }
10348 0 0 0       if ((0xa7 == ((hex $k)>>8)) and ($t = $name) =~ s/\b(\w|CO|VEN)(?!\1)(\w)$/$2/) { # Paleo-latin (CON/VEND + digraph)
10349 0           push @{$candidates{$k}}, [$t, "fake-paleocontraction-by-last"];
  0            
10350             }
10351 0 0         if (($t = $name) =~ s/(?:(\bMIDDLE-WELSH)\s+)?\b(\w)(?=\2$)//) {
10352 0 0         push @{$candidates{$k}}, [$t, "fake-doubleletter" . ($1 ? "-$1" : '')];
  0            
10353             }
10354 0 0         if (($t = $name) =~ s/\b(APL\s+FUNCTIONAL\s+SYMBOL)\s+\b(.*?)\b\s*\b(QUAD(?!$)|UNDERBAR|TILDE|DIAERESIS|VANE|STILE|JOT|OVERBAR|BAR)\b\s*/$2/) {
10355             #warn "APL: $k ($name) --> <$t>; <$1> <$3>";
10356 0           push @{$candidates{$k}}, [$t, "calculated-$1-$3apl"];
  0            
10357 0           my %s = qw(UP DOWN DOWN UP); # mispring in the official name???
10358 0           $candidates{$k}[-1][0] =~ s/\b(UP|DOWN)(?=\s+TACK\b)/$s{$1}/;
10359             }
10360 0 0         if (($t = $name) =~ s/\b(LETTER\s+SMALL\s+CAPITAL)/CAPITAL LETTER/) {
10361 0           push @{$candidates{$k}}, [$t, "smallcaps"];
  0            
10362             }
10363 0 0 0       if (($t = $name) =~ s/\b(LETTER\s+)E([SZN])[HG]$/$1$2/ # esh/eng/ezh
      0        
      0        
      0        
10364             # next two not triggered since this is actually decomposed:
10365             or ($t = $name) =~ s/(?<=\bLETTER\sV\s)WITH\s+RIGHT\s+HOOK$//
10366             or ($t = $name) =~ s/\bDOTLESS\s+J\s+WITH\s+STROKE$/J/
10367             or $name eq 'LATIN SMALL LETTER SCHWA' and $t = 'DIGIT ZERO') {
10368 0           push @{$candidates{$k}}, [$t, "phonetized"] if 0;
10369             }
10370             }
10371 0           ($k, $name, $_c) = ($K, $Name, $C);
10372 0 0         $G{$k} = $name if $name =~ /^GREEK\s/; # Indexed by hex
10373 0           $kk = $k;
10374 0           next;
10375             }
10376 0 0         if ($s =~ /^\@\@\s+([\da-f]+)\b/i) {
10377 0 0         die unless $s =~ /^\@\@\s+([\da-f]+)\s.*\s([\da-f]+)\s*$/i;
10378 0           $BL = $1;
10379             }
10380 0           my $a; # compatibility_p, composed, decomposition string
10381 0 0         $a = [0, split /\s+/, "$1"] if $s =~ /^\s+:\s*([0-9A-F]+(?:\s+[0-9A-F]+)*)/;
10382 0 0 0       $a = [1, split /\s+/, "$2"], ($1 and push @$a, $1)
10383             if $s =~ /^\s+#\s*(?:(<.*?>)\s+)?([0-9A-F]+(?:\s+[0-9A-F]+)*)/; # Put at end
10384 0 0         next unless $a;
10385 0 0         if ($a->[-1] eq '') {{ # Clarify
10386 0           my ($math, $type) = ('', '');
  0            
10387             # warn("Unexpected name with : <$name>"), unless $name =~ s/^MATHEMATICAL\s+// and $math = "math-";
10388 0 0 0       warn("Unexpected name with : $k <$name>"), last # In BMP, MATHEMATICAL is omited
      0        
      0        
10389             unless $name =~ /^(?:MATHEMATICAL\s+)?((?:(?:BLACK-LETTER|FRAKTUR|BOLD|ITALIC|SANS-SERIF|DOUBLE-STRUCK|MONOSPACE|SCRIPT)\b\s*?)+)(?=\s+(?:SMALL|CAPITAL|DIGIT|NABLA|PARTIAL|N-ARY|\w+\s+SYMBOL)\b)/
10390             or $name =~ /^HEBREW\s+LETTER\s+(WIDE|ALTERNATIVE)\b/
10391             or $name =~ /^(ARABIC\s+MATHEMATICAL(?:\s+(?:INITIAL|DOTLESS|STRETCHED|LOOPED|TAILED|DOUBLE-STRUCK))?)\b/
10392             or $name =~ /^(PLANCK|INFORMATION)/; # information source
10393 0 0         $type = $1 if $1;
10394 0           $type =~ s/BLACK-LETTER/FRAKTUR/; # http://en.wikipedia.org/wiki/Black-letter#Unicode
10395 0           $type =~ s/INFORMATION/Letterlike/; # http://en.wikipedia.org/wiki/Letterlike_Symbols_%28Unicode_block%29
10396 0 0         $type = '=' . join '-', map lc($_), split /\s+/, $type if $type;
10397 0           $a->[-1] = "";
10398             }}
10399 0 0         push @$a, '' unless @$a > 2;
10400 0           push @{$basic{$k}}, $a; # 1 2044 --\
  0            
10401 0 0 0       undef $kk unless $a->[-1] eq '' # Disable guesswork processing
      0        
      0        
      0        
10402             or @$a == 3 and (chr hex $a->[-2]) =~ /\W|\p{Lm}/ and $a->[-1] !~ /^[-1]) =~ /\w/;
10403             # print "@$a";
10404             }
10405             # $candidates{'014A'} = ['LATIN CAPITAL LETTER N', 'faked-HOOK']; # Pretend on ENG...
10406             # $candidates{'014B'} = ['LATIN SMALL LETTER N', 'faked-HOOK']; # Pretend on ENG...
10407             # XXXX Better have this together with pseudo-upgrade???
10408 0           push @{$candidates{'00b5'}}, ['GREEK SMALL LETTER MU', 'faked-calculated-SYMBOL']; # Pretend on MICRO SIGN...
  0            
10409             # $candidates{'00b5'} = ['GREEK SMALL LETTER MU', 'calculated-SYMBOL']; # Pretend on MICRO SIGN...
10410 0           for my $k (keys %basic) { # hex
10411 0           for my $exp (@{$basic{$k}}) {
  0            
10412 0           my $base = $exp->[1]; # hex
10413 0           my $name = $NM{$self->charhex2key($base)};
10414 0 0 0       next if not $name and ($k =~ /^[12]?F[89A]..$/ or hex $base >= 0x4E00 and hex $base <= 0x9FCC); # ideographs; there is also 3400 region...
      0        
10415 0 0         warn "Basic: `$k' --> `@$exp', base=`$base' --> `",$self->charhex2key($base),"'" unless $name;
10416 0 0         if ((my $NN = $name) =~ s/\s+OPERATOR$//) {
10417             #warn "operator: `$k' --> <$NN>, `@$exp', base=`$base' --> `",$self->charhex2key($base),"'";
10418 0 0         push @{$candidates{$k}}, [$_, @$exp[2..$#$exp]] for $NN, @{ $operators{$NN} || []};
  0            
  0            
10419             }
10420             }
10421             }
10422 0           for my $how (keys %uni_manual) { # Some stuff is easier to describe in terms of char, not names
10423 0           my $map = $uni_manual{$how};
10424 0 0         die "manual translation map for $how has an odd number of entries" if @$map % 2;
10425             # for my $from (keys %$map) {
10426 0           while (@$map) {
10427 0           my $to = pop @$map; # Give precedence to later entries
10428 0           my $from = pop @$map;
10429 0           for my $shift (0,1) {
10430 0 0         if ($shift) {
10431 0           my ($F, $T) = (uc $from, uc $to);
10432 0 0 0       next unless $F ne $from and $T ne $to;
10433 0           ($from, $to) = ($F, $T);
10434             }
10435 0           push @{$candidates{uc $self->key2hex($to)}}, [$NM{$from}, "manual-$how"];
  0            
10436             }
10437             }
10438             }
10439 0           for my $g (keys %G) {
10440 0 0         (my $l = my $name = $G{$g}) =~ s/^GREEK\b/LATIN/ or die "Panic";
10441 0 0         next unless my $L = $N{$l}; # is HEX
10442             #warn "latinize: $L\t$l";
10443 0           push @{$candidates{$L}}, [$name, 'faked-latinize'];
  0            
10444 0 0         next unless my ($lat, $first, $rest, $add) = ($l =~ /^(LATIN\s+(?:SMALL|CAPITAL)\s+LETTER\s+(\w))(\w+)(?:\s+(\S.*))?$/);
10445 0 0         $lat =~ s/P$/F/, $first = 'F' if "$first$rest" eq 'PHI';
10446 0 0         die unless my $LL = $N{$lat};
10447 0 0         $add = (defined $add ? "-$add" : ''); # None of 6.1; only iIuUaAgGdf present of 6.1
10448 0           push @{$candidates{$L}}, [$lat, "faked-greekize$add"];
  0            
10449             #warn "latinize++: $L\t$l;\t`$add'\t$lat";
10450             }
10451 0           my %iu_TR = qw(INTERSECTION CAP UNION CUP);
10452 0           my %_TR = map { (my $in = $_) =~ s/_/ /g; $in } qw(SMALL_VEE LOGICAL_OR
  0            
  0            
10453             UNION_OPERATOR_WITH_DOT MULTISET_MULTIPLICATION
10454             UNION_OPERATOR_WITH_PLUS MULTISET_UNION);
10455 0           my $_TR_rx = map qr/$_/, join '|', keys %_TR;
10456 0           for my $c (keys %candidates) { # Done after all the names are known
10457 0           my ($CAND, $app, $t, $base, $b) = ($candidates{$c}, '');
10458 0           for my $Cand (@$CAND) { # (all keys in hex)
10459             #warn "candidates: $c <$Cand->[0]>, <@$Cand[1..$#$Cand]>";
10460             # An experiment shows that the FORMS are properly marked as non-canonical decompositions; so they are not needed here
10461 0 0         (my $with = my $raw = $Cand->[1]) =~ s/\s+(SIGN|SYMBOL|(?:FINAL|ISOLATED|INITIAL|MEDIAL)\s+FORM)$//
10462             and $app = " $1";
10463 0           for my $Mod ( (map ['', $_], $app, '', ' SIGN', ' SYMBOL', ' OF', ' AS MEMBER', ' TO'), # `SUBSET OF', `CONTAINS AS MEMBER', `PARALLEL TO'
10464             (map [$_, ''], 'WHITE ', 'WHITE UP-POINTING ', 'N-ARY '), ['WHITE ', ' SUIT'] ) {
10465 0           my ($prepend, $append) = @$Mod;
10466 0 0 0       next if $raw =~ /-SYMBOL$/ and 0 <= index($append, "SYMBOL"); #
10467 0           warn "raw=`$raw', prepend=<$prepend>, append=<$append>, base=$Cand->[0]\n" if debug_GUESS_MASSAGE;
10468 0           $t++;
10469 0           $b = "$prepend$Cand->[0]$append";
10470 0 0         $b =~ s/\bTWO-HEADED\b/TWO HEADED/ unless $N{$b};
10471 0 0         $b =~ s/\bTIMES\b/MULTIPLICATION SIGN/ unless $N{$b};
10472 0 0         $b =~ s/(?:(?<=\bLEFT)|(?<=RIGHT))(?=\s+ARROW\b)/WARDS/ unless $N{$b};
10473 0 0         $b =~ s/\bLINE\s+INTEGRATION\b/CONTOUR INTEGRAL/ unless $N{$b};
10474 0 0         $b =~ s/\bINTEGRAL\s+AVERAGE\b/INTEGRAL/ unless $N{$b};
10475 0 0         $b =~ s/\s+(?:SHAPE|OPERATOR|NEGATED)$// unless $N{$b};
10476 0 0         $b =~ s/\bCIRCLED\s+MULTIPLICATION\s+SIGN\b/CIRCLED TIMES/ unless $N{$b};
10477 0 0         $b =~ s/^(CAPITAL|SMALL)\b/LATIN $1 LETTER/ unless $N{$b}; # TURNED SMALL F
10478 0 0         $b =~ s/\b(CAPITAL\s+LETTER)\s+SMALL\b/$1/ unless $N{$b}; # Q WITH HOOK TAIL
10479 0 0         $b =~ s/\bEPIGRAPHIC\b/CAPITAL/ unless $N{$b}; # XXXX is it actually capital?
10480             $b =~ s/^LATIN\s+LETTER\s+SMALL\s+CAPITAL\b/LATIN CAPITAL LETTER/ # and warn "smallcapital -> <$b>"
10481 0 0 0       if not $N{$b} or $with=~ /smallcaps/; # XXXX is it actually capital?
10482 0 0         $b =~ s/^GREEK\s+CAPITAL\b(?!=\s+LETTER)/GREEK CAPITAL LETTER/ unless $N{$b};
10483 0 0         $b =~ s/^GREEK\b(?!\s+(?:CAPITAL|SMALL)\s+LETTER)/GREEK SMALL LETTER/ unless $N{$b};
10484 0 0         $b =~ s/^CYRILLIC\b(?!\s+(?:CAPITAL|SMALL)\s+LETTER)(?=\s+LETTER\b)/CYRILLIC SMALL/ unless $N{$b};
10485 0 0         $b =~ s/\bEQUAL\s+TO\s+SIGN\b/EQUALS SIGN/ unless $N{$b};
10486 0 0         $b =~ s/\bMINUS\b/HYPHEN-MINUS/ unless $N{$b};
10487 0 0         $b =~ s/\b(SQUARE\s+)(INTERSECTION|UNION)(?:\s+OPERATOR)?\b/$1$iu_TR{$2}/ unless $N{$b};
10488 0 0         $b =~ s/(?<=WARDS)$/ ARROW/ unless $N{$b}; # APL VANE
10489 0 0         $b =~ s/\b($_TR_rx)\b/$_TR{$1}/ unless $N{$b};
10490             # $b =~ s/\bDOT\b/FULL STOP/ unless $N{$b};
10491             # $b =~ s/^MICRO$/GREEK SMALL LETTER MU/ unless $N{$b};
10492            
10493 0           warn " b =`$b', prepend=<$prepend>, append=<$append>, base=$Cand->[0]\n" if debug_GUESS_MASSAGE;
10494 0 0         if (defined ($base = $N{$b})) {
10495 0 0         undef $base, next if $base eq $c;
10496 0 0         $with = $raw if $t;
10497 0           warn "<$Cand->[0]> WITH <$Cand->[1]> resolved via SIGN/SYMBOL/.* FORM: strip=<$app> add=<$prepend/$append>\n"
10498             if debug_GUESS_MASSAGE and ($append or $app or $prepend);
10499             last
10500 0           }
10501             }
10502 0 0         if (defined $base) {
    0          
10503 0           $base = [$base];
10504             } elsif ($raw =~ /\bOPERATOR$/) {
10505 0 0         $base = [map $N{$_}, @{ $operators{$Cand->[0]} }] if exists $operators{$Cand->[0]};
  0            
10506             }
10507 0 0         (warnUNRES and warn("Unresolved: <$Cand->[0]> WITH <$Cand->[1]>")), next unless defined $base;
10508 0           my @modifiers = split /\s+AND\s+/, $with;
10509 0 0         @modifiers = map { s/\s+/-/g; /^[\da-f]{4,}$/i ? $_ : "" } @modifiers;
  0            
  0            
10510             #warn " $c --> <@$base>; <@modifiers>...\t$b <- $NM{chr hex $c}" ;
10511 0           unshift @{$basic{$c}}, [1, $_, @modifiers] for @$base;
  0            
10512 0 0         if ($b =~ s/\s+(OPERATOR|SIGN)$//) { # ASTERISK (note that RING is a valid name, but has no relation to RING OPERATOR
10513 0 0         unshift @{$basic{$c}}, [1, $base, @modifiers] if defined ($base = $N{$b}); # ASTERISK
  0            
10514             #$base = '[undef]' unless defined $base;
10515             #warn("operator via <$b>, <$c> => `$base'");
10516             (debug_OPERATOR and warn "operator: `$c' ==> `$_', <@modifiers> via <$b>\n"),
10517 0 0         unshift @{$basic{$c}}, [1, $_, @modifiers] for map $N{$_}, @{ $operators{$b} || [] }; # ASTERISK
  0            
  0            
10518             }
10519             # push @{$candidates{$k}}, [$_, @$exp[2..$#$exp]] for $NN, @{ $operators{$NN} || []};
10520             # $basic{$c} = [ [1, $base, @modifiers ] ]
10521             }
10522             }
10523 0           $self->decompose_r(\%basic, $_, \%cached_full) for keys %basic; # Now %cached_full is fully expanded - has trivial expansions too
10524 0 0         for my $c (sort {fromHEX $a <=> fromHEX $b or $a cmp $b} keys %cached_full) { # order of chars in Unicode matters (all keys in hex)
  0            
10525 0           my %seen_compose;
10526 0           for my $exp (@{ $cached_full{$c} }) {
  0            
10527 0           my @exp = @$exp; # deep copy
10528 0 0         die "Expansion too short: <@exp>" if @exp < 2;
10529 0 0         next if @exp < 3; # Skip trivial decompositions
10530 0           my $compat = shift @exp;
10531 0           my @PRE = @exp;
10532 0           my $base = shift @exp;
10533 0 0         @exp = ($base, sort {fromHEX $a <=> fromHEX $b or $a cmp $b} @exp); # Any order will do; do not care about Unicode rules
  0            
10534             #warn "Malformed: [@exp]" if "@exp" =~ /^
10535 0 0         next if $seen_compose{"$compat; @exp"}++; # E.g., WHITE may be added in several ways...
10536 0 0         push @{$ordered{$c}}, [$compat, @exp > 3 ? @exp : @PRE]; # with 2 modifiers order does not matter for the algo below, but we catch U"¯ vs U¯".
  0            
10537 0           warn qq(Duplicate: $c <== [ @exp ] ==> <@{$compose{"@exp"}[0]}> (prefered)\n\t<), chr hex $c,
10538             qq(>\t$c\t$NM{chr hex $c}\n\t<), chr hex $compose{"@exp"}[0][1], qq(>\t$compose{"@exp"}[0][1]\t$NM{chr hex $compose{"@exp"}[0][1]})
10539 0 0 0       if $compose{"@exp"} and "@exp" !~ /<(font|pseudo-upgrade)>/ and $c ne $compose{"@exp"}[0][1] and not $known_dups{$c};
      0        
      0        
10540             #warn "Compose rule: `@exp' ==> $compat, `$c'";
10541 0           push @{$compose{"@exp"}}, [$compat, $c];
  0            
10542             }
10543             } # compose mapping done
10544 0 0         for my $c (sort {fromHEX $a <=> fromHEX $b or $a cmp $b} keys %ordered) { # all nontrivial! Order of chars in Unicode matters...
  0            
10545 0           my(%seen_compose, %seen_contract) = ();
10546 0           for my $v (@{ $ordered{$c} }) { ## When (FOO and FOO OPERATOR) + tilde are both remapped to X: X+operator == X
  0            
10547 0           my %seen;
10548 0           for my $off (reverse(2..$#$v)) {
10549             # next if $seen{$v->[$off]}++; # chain of compat, or 2A76 -> ?2A75 003D < = = = >
10550 0           my @r = @$v; # deep copy
10551 0           splice @r, $off, 1;
10552 0           my $compat = shift @r;
10553             #warn "comp: $compat, $c; $off [@$v] -> $v->[$off] + [@r]";
10554 0 0         next if $seen_compose{"$compat; $v->[$off]; @r"}++;
10555             # next unless my $contracted = $compose{"@r"}; # This omits trivial compositions
10556 0 0         my $contracted = [@{$compose{"@r"} || []}]; # Deep copy
  0            
10557             # warn "Panic $c" if @$contracted and @r == 1;
10558 0 0         push @$contracted, [0, @r] if @r == 1; # Not in %compose
10559             # QUAD-INT: may be INT INT INT INT, may be INT amp INT INT etc; may lead to same compositions...
10560             #warn "contraction: $_->[0]; $compat; $c; $v->[$off]; $_->[1]" for @$contracted;
10561 0 0         @$contracted = grep {$_->[1] ne $c and not $seen_contract{"$_->[0]; $compat; $v->[$off]; $_->[1]"}++} @$contracted;
  0            
10562             #warn " contraction: $_->[0]; $compat; $c; $v->[$off]; $_->[1]" for @$contracted;
10563 0           for my $contr (@$contracted) { # May be empty: Eg, fractions decompose into 2 3 and cannot be composed in 2 steps
10564 0   0       my $calculated = $contr->[0] || $off != $#$v;
10565 0           push @{ $into2{$self->charhex2key($c)} }, [(($compat | $contr->[0])<<1)|$calculated, $self->charhex2key($contr->[1]), $self->charhex2key($v->[$off])]; # each: compat, char, combine
  0            
10566 0           push @{ $comp2{$v->[$off]}{$contr->[1]} }, [ (($compat | $contr->[0])<<1)|$calculated, $c]; # each: compat, char
  0            
10567             }
10568             }
10569             }
10570             } # (de)compose-into-2 mapping done
10571 0           for my $h2 (values %comp2) { # Massage into the natural order - prefer canonical (de)compositions
10572 0           for my $h (values %$h2) { # RValues!!! [compat, charHEX] each
10573             # my @a = sort { "@$a" cmp "@$b" } @$h;
10574 0 0         my @a = sort { $a->[0] <=> $b->[0] or $self->charhex2key($a->[1]) cmp $self->charhex2key($b->[1]) } @$h;
  0            
10575 0           $h = \@a;
10576             }
10577             }
10578 0           \%into2, \%comp2, \%NM, \%BL, \%NS, $version
10579             }
10580            
10581             sub print_decompositions($;$) {
10582 0     0 0   my $self = shift;
10583 0 0         my $dec = @_ ? shift : do { my $f = $self->get_NamesList;
  0            
10584 0 0         $self->load_compositions($f) if defined $f;
10585 0           $self->{Decompositions}} ;
10586 0           for my $c (sort keys %$dec) {
10587 0           my $arr = $dec->{$c};
10588 0 0         my @out = map +($_->[0] ? '? ' : '= ') . "@$_[1,2]", @$arr;
10589 0           print "$c\t->\t", join(",\t", @out), "\n";
10590             }
10591             }
10592            
10593             sub print_compositions($$) {
10594 0 0   0 0   goto &print_compositions_ch if @_ == 1;
10595 0           my ($self, $comp) = (shift, shift);
10596 0 0         for my $c (sort {fromHEX $a <=> fromHEX $b or $a cmp $b} keys %$comp) { # composing char
  0            
10597 0           print "$c\n";
10598 0 0         for my $b (sort {fromHEX $a <=> fromHEX $b or $a cmp $b} keys %{$comp->{$c}}) { # base char
  0            
  0            
10599 0           my $arr = $comp->{$c}{$b};
10600 0 0         my @out = map +($_->[0] ? '?' : '=') . $_->[1], @$arr;
10601 0           print "\t$b\t->\t", join(",\t\t", @out), "\n";
10602             }
10603             }
10604             }
10605            
10606             sub print_compositions_ch($$) {
10607 0     0 0   my $self = shift;
10608 0 0         my $comp = @_ ? shift : do { my $f = $self->get_NamesList;
  0            
10609 0 0         $self->load_compositions($f) if defined $f;
10610 0           $self->{Compositions}} ;
10611 0           for my $c (sort keys %$comp) { # composing char
10612 0           print "$c\n";
10613 0           for my $b (sort keys %{$comp->{$c}}) { # base char
  0            
10614 0           my $arr = $comp->{$c}{$b};
10615 0 0         my @out = map +($_->[0] ? '? ' : '= ') . $_->[1], @$arr;
10616 0           print "\t$b\t->\t", join(",\t\t", @out), "\n";
10617             }
10618             }
10619             }
10620            
10621             sub load_compositions($$) {
10622 0     0 0   my ($self, $comp, @comb) = (shift, shift);
10623 0 0         return $self if $self->{Compositions};
10624 0 0         my %comp = %{ $self->{'[Substitutions]'} || {} };
  0            
10625 0 0         open my $f, '<', $comp or die "Can't open $comp for read";
10626 0           ($self->{Decompositions}, $comp, $self->{UNames}, $self->{UBlock}, $self->{exComb}, $self->{uniVersion}) = $self->parse_NameList($f);
10627 0 0         close $f or die "Can't close $comp for read";
10628             #warn "(De)Compositions and UNames loaded";
10629             # Having hex as index is tricky: is it 4-digits or more? Is it in uppercase?
10630 0 0         for my $c (sort {fromHEX $a <=> fromHEX $b or $a cmp $b} keys %$comp) { # composing char
  0            
10631 0 0         for my $b (sort {fromHEX $a <=> fromHEX $b or $a cmp $b} keys %{$comp->{$c}}) { # base char
  0            
  0            
10632 0           my $arr = $comp->{$c}{$b};
10633 0           my @out = map [$self->charhex2key($_->[0]), $self->charhex2key($_->[1])], @$arr;
10634 0           $comp{$self->charhex2key($c)}{$self->charhex2key($b)} = \@out;
10635             }
10636             }
10637 0           $self->{Compositions} = \%comp;
10638 0           my $comb = join '', keys %{$self->{exComb}}; # should not have metachars here...
  0            
10639 0 0         $rxCombining = qr/\p{nonSpacingMark}|[$comb]/ if $comb;
10640 0           $self
10641             }
10642            
10643             sub load_uniage($$) {
10644 0     0 0   my ($self, $fn) = (shift, shift);
10645             # get_AgeList
10646 0 0         open my $f, '<', $fn or die "Can't open `$fn' for read: $!";
10647 0           local $/;
10648 0           my $s = <$f>;
10649 0 0         close $f or die "Can't close `$fn' for read: $!";
10650 0           $self->{Age} = $self->parse_derivedAge($s);
10651 0           $self
10652             }
10653            
10654             sub load_unidata($$) {
10655 0     0 0   my ($self, $comp) = (shift, shift);
10656 0           $self->load_compositions($comp);
10657 0 0         return $self unless @_;
10658 0           $self->load_uniage(shift);
10659             }
10660            
10661             my(%charinfo, %UName_v); # Unicode::UCD::charinfo extremely slow
10662             sub UName($$$;$) {
10663 0     0 0   my ($self, $c, $verbose, $vbell, $app, $n, $i, $A) = (shift, shift, shift, shift, '');
10664 0           $c = $self->charhex2key($c);
10665 0 0 0       return $UName_v{$c} if $verbose and exists $UName_v{$c} and ($vbell or 0x266a != ord $c);
      0        
      0        
10666 0 0 0       if (not exists $self->{UNames} or $verbose) {
10667 0           require Unicode::UCD;
10668 0   0       $i = ($charinfo{$c} ||= Unicode::UCD::charinfo(ord $c) || {});
      0        
10669 0           $A = $self->{Age}{$c};
10670 0   0       $n = $self->{UNames}{$c} || ($i->{name}) || "<$c>";
10671 0 0 0       if ($verbose and (%$i or $A)) {
      0        
10672 0           my $scr = $i->{script};
10673 0           my $bl = $i->{block};
10674 0           $scr = join '; ', grep defined, $scr, $bl, $A;
10675 0 0 0       $scr = "Com/MiscSym1.1" if $vbell and 0x266a == ord $c; # EIGHT NOTE: we use as "visual bell"
10676 0 0         $app = " [$scr]" if length $scr;
10677             }
10678 0 0 0       return($UName_v{$c} = "$n$app") if $verbose and ($vbell or 0x266a != ord $c);
      0        
10679 0           return "$n$app"
10680             }
10681 0 0         $self->{UNames}{$c} || ($c =~ /[\x{d800}-\x{dfff}\x00-\x1f\x7f-\xAF]/ ? '['.$self->key2hex($c).']' : "[$c]")
    0          
10682             }
10683            
10684             sub parse_derivedAge ($$) {
10685 0     0 0   my ($self, $s, %C) = (shift, shift);
10686 0           for my $l (split /\n/, $s) {
10687 0 0         next if $l =~ /^\s*(#|$)/;
10688 0 0         die "Unexpected line in DerivedAge: `$l'"
10689             unless $l =~ /^([0-9a-f]{4,})(?:\.\.([0-9a-f]{4,}))?\s*;\s*(\d\.\d)\b/i;
10690 0   0       $C{chr $_} = $3 for (hex $1) .. hex($2 || $1);
10691             }
10692 0           \%C;
10693             }
10694            
10695             # use Dumpvalue;
10696             # my $first_time_dump;
10697             sub get_compositions ($$$$;$) { # Now only the undo-brach is used...
10698 0     0 0   my ($self, $m, $C, $undo, $unAltGr, @out) = (shift, shift, shift, shift, shift);
10699             # return unless defined $C and defined (my $r = $self->{Compositions}{$m}{$C});
10700             # Dumpvalue->new()->dumpValue($self->{Compositions}) unless $first_time_dump++;
10701 0 0         return undef unless defined $C;
10702 0 0         $C = $C->[0] if 'ARRAY' eq ref $C; # Treat prefix keys as usual keys
10703 0           warn "doing <$C> <@$m>: undo=$undo C=", $self->key2hex($C), ", maps=", join ' ', map $self->key2hex($_), @$m if warnDO_COMPOSE; # if $m eq 'A';
10704 0 0         if ($undo) {
10705 0 0         return undef unless my $dec = $self->{Decompositions}{$C};
10706             # order in @$m matters; so does one in Decompositions - but less so
10707             # Hence the external loop should be in @$m
10708 0           for my $M (@$m) {
10709 0           push @out, $_ for grep $M eq $_->[2], @$dec;
10710 0 0         if (@out) { # We took the first guy from $m which allows such decomposition
10711 0 0         warn "Decomposing <$C> <$M>: multiple answers: <", (join '> <', map "@$_", @out), ">" unless @out == 1;
10712 0           warn "done undo <$C> <@$m>: -> ", $self->array2string(\@out) if warnDO_COMPOSE; # if $m eq 'A';
10713 0           return $out[0][1]
10714             }
10715             }
10716 0           return undef;
10717             }
10718 0 0         if ($unAltGr) {{
10719 0 0         last unless $unAltGr = $unAltGr->{$C};
  0            
10720 0           my(@seen, %seen);
10721 0           for my $comp ( @$m ) {
10722 0           my $a1 = $self->{Compositions}{$comp}{$unAltGr};;
10723 0 0 0       push @seen, $a1 if $a1 and not $seen{$a1->[0][1]}++;
10724             #warn "Second binding `$a1->[0][1]' for `$unAltGr' (on `$C') - after $seen[0][0][1]" if @seen == 2;
10725 0 0 0       next unless defined (my $a2 = $self->{Compositions}{$comp}{$C}) or @seen == 2;
10726             #warn " --> AltGr-binding `$a2->[0][1]' (on `$C')" if @seen == 2 and defined $a2;
10727 0 0 0       warn "Conflict between the second binding `$a1->[0][1]' for `$unAltGr' and AltGr-binding `$a2->[0][1]' (on `$C')"
      0        
      0        
10728             if $a2 and $a1 and @seen == 2 and $a1->[0][1] ne $a2->[0][1];
10729 0   0       return ((@seen == 2 and $a1) or $a2)->[0][1];
10730             }
10731             }}
10732 0 0         return undef unless my ($r) = grep defined, map $self->compound_composition($_,$C), @$m;
10733 0 0 0       warn "Composing <$C> <@$m>: multiple answers: <", (join '> <', map "@$_", @$r), ">" unless @$r == 1 or $C eq ' ';
10734             # warn("done <$C> <$m>: <$r->[0][1]>"); # if $m eq 'A';
10735 0           $r->[0][1]
10736             }
10737            
10738             sub compound_composition ($$$) {
10739 0     0 0   my ($self, $M, $C, $doc, $doc1, @res, %seen) = (shift, shift, shift, '', '');
10740 0 0         return undef unless defined $C;
10741 0 0 0       $doc1 = $C->[3] if 'ARRAY' eq ref $C and defined $C->[3]; # may be used via
10742 0 0         $doc = "$doc1 ⇒ " if length $doc1;
10743 0 0         $C = $C->[0] if 'ARRAY' eq ref $C;
10744 0           warn "composing `$M' with base <$C>" if warnDO_COMPOSE;
10745 0           $C = [[0, $C, $doc1]]; # Emulate element of return of Compositions ("one translation, explicit")
10746 0           for my $m (reverse split /\+|-(?=-)/, $M) {
10747 0           my @res;
10748 0 0         if ($m =~ /^(?:-|(?:[ul]c(?:first)?|dectrl)$)/) {
10749 0 0         if ($m =~ s/^-//) {
    0          
    0          
    0          
    0          
10750 0           @res = map $self->get_compositions([$m], $_->[1], 'undo'), @$C;
10751 0           @res = map [[0,$_]], grep defined, @res;
10752             } elsif ($m eq 'lc') {
10753 0 0 0       @res = map {($_->[1] eq lc($_->[1]) or 1 != length lc($_->[1])) ? () : [[0, lc $_->[1]]]} @$C
  0            
10754             } elsif ($m eq 'uc') {
10755 0 0 0       @res = map {($_->[1] eq uc($_->[1]) or 1 != length uc($_->[1])) ? () : [[0, uc $_->[1]]]} @$C
  0            
10756             } elsif ($m eq 'ucfirst') {
10757 0 0 0       @res = map {($_->[1] eq ucfirst($_->[1]) or 1 != length ucfirst($_->[1])) ? () : [[0, ucfirst $_->[1]]]} @$C
  0            
10758             } elsif ($m eq 'dectrl') {
10759 0 0         @res = map {(0x20 <= ord($_->[1])) ? () : [[0, chr(0x40 + ord $_->[1])]]} @$C
  0            
10760             } else {
10761 0           die "Panic"
10762             }
10763             } else {
10764             #warn "compose `$m' with bases <", join('> <', map $_->[1], @$C), '>';
10765 0           @res = map $self->{Compositions}{$m}{$_->[1]}, @$C;
10766             }
10767 0           @res = map @$_, grep defined, @res;
10768 0 0         return undef unless @res;
10769 0           $C = [map [$_->[0], $_->[1], "$doc$M"], @res];
10770             }
10771             $C
10772 0           }
10773            
10774             sub compound_composition_many ($$$$) { # As above, but takes an array of [char, docs]
10775 0     0 0   my ($self, $M, $CC, $ini, @res) = (shift, shift, shift, shift);
10776 0 0         return undef unless $CC;
10777 0 0 0       my $doc = (($ini and ref $ini and defined $ini->[3]) ? "$ini->[3] ⇒ Subst{" : '');
10778 0   0       my $doc1 = $doc && '}';
10779 0           for my $C (@$CC) {
10780             # $C = $C->[0] if 'ARRAY' eq ref $C;
10781 0 0         next unless defined $C;
10782 0           my $in = $self->compound_composition($M, [$C->[0], undef, undef, "$doc$C->[1]$doc1"]);
10783 0 0         push @res, @$in if defined $in;
10784             }
10785 0 0         return undef unless @res;
10786             \@res
10787 0           }
10788            
10789             # Design goals: we assign several diacritics to a prefix key (possibly with
10790             # AltGr on the "Base key" and/or other "multiplexers" in between). We want:
10791             # *) a lc/uc paired result to sit on Shift-paired keypresses;
10792             # *) avoid duplication among multiplexers (a secondary goal);
10793             # *) allow some diacritics in the list to be prefered ("groups" below);
10794             # *) when there is a choice, prefer non-bizzare (read: with smaller Unicode
10795             # "Age" version) binding to be non-multiplexed.
10796             # We allow something which was not on AltGr to acquire AltGr when it gets a
10797             # diacritic.
10798            
10799             # It MAY happen that an earlier binding has empty slots,
10800             # but a later binding exists (to preserve lc/uc pairing, and shift-state)
10801            
10802             ### XXXX Unclear: how to catenate something in front of such a map...
10803             # we do $composition->[0][1], which means we ignore additional compositions! And we ignore HOW, instead of putting it into penalty
10804            
10805             sub sort_compositions ($$$$$;$) {
10806 0     0 0   my ($self, $m, $C, $Sub, $dupsOK, $w32OK, @res, %seen, %Penalize, %penalize, %OK, %ok, @C) = (shift, shift, shift, shift, shift, shift);
10807 0           warn "compounding ", $self->array2string($C) if warnSORTCOMPOSE;
10808 0           for my $c (@$C) {
10809 0 0 0       push @C, [map {($_ and 'ARRAY' eq ref $_) ? $_->[0] : $_} @$c]
  0            
10810             }
10811 0           my $char = $C[0][0];
10812 0 0         $char = 'N/A' unless defined $char;
10813 0           for my $MM (@$m) { # |-groups
10814 0           my(%byPenalty, @byLayers);
10815 0           for my $M (@$MM) { # diacritic in a group; may flatten each layer, but do not flatten separately each shift state: need to pair uc/lc
10816 0 0         if ((my $P = $M) =~ s/^(!)?\\(\\)?//) {
10817 0           my($neg, $strong) = ($1, $2);
10818             # warn "Penalize: <$P>"; # Actually, it is not enough to penalize; one should better put it in a different group...
10819 0 0         if ($P =~ s/\[(.*)\]$//) {
10820             #$P = $self->stringHEX2string($P);
10821 0           my $match;
10822 0   0       $char eq $_ and $match++ for split //, $self->stringHEX2string("$1");
10823 0 0         next unless $match;
10824             }
10825             #$P = $self->stringHEX2string($P);
10826 0 0         if ($neg) {
10827 0 0         $strong ? $OK{$_}++ : $ok{$_}++ for split //, $P;
10828             } else {
10829 0 0         $strong ? $Penalize{$_}++ : $penalize{$_}++ for split //, $P;
10830             }
10831             next
10832 0           }
10833 0           for my $L (0..$#C) { # Layer number; indexes a shift-pair
10834             # my @res2 = map {defined($_) ? $self->{Compositions}{$M}{$_} : undef } @{ $C[$L] };
10835 0           my @Res2 = map $self->compound_composition($M, $_), @{ $C->[$L] }; # elt: [$synth, $char]
  0            
10836 0           my @working_with = grep defined, @{ $C[$L] }; # ., KP_Decimal gives [. undef]
  0            
10837 0           warn "compound `$M' of [@working_with] -> ", $self->array2string(\@Res2) if warnSORTCOMPOSE;
10838 0           (my $MMM = $M) =~ s/(^|\+)$//; # Hack: the rule always fails if present, empty always succeeds
10839             my @Res3 = map $self->compound_composition_many($MMM, (defined() ? $Sub->{($_ and ref) ? $_->[0] : $_} : $_), $_),
10840 0 0 0       @{ $C->[$L] };
  0 0          
10841 0           warn "compound+ `$M' of [@working_with] -> ", $self->array2string(\@Res3) if warnSORTCOMPOSE;
10842 0           for my $shift (0..$#Res3) {
10843 0 0         if (defined $Res2[$shift]) {
10844 0 0         push @{ $Res2[$shift]}, @{$Res3[$shift]} if $Res3[$shift]
  0            
  0            
10845             } else {
10846 0           $Res2[$shift] = $Res3[$shift]
10847             }
10848             }
10849             # defined $Res2[$_] ? ($Res3[$_] and push @{$Res2[$_]}, @{$Res2[$_]}) : ($Res2[$_] = $Res3[$_]) for 0..$#Res3;
10850 0           @Res2 = $self->DEEP_COPY(@Res2);
10851 0           my ($ok, @ini_compat);
10852 0           do {{ # Run over found translations
10853 0 0         my @res2 = map {defined() ? $_->[0] : undef} @Res2; # process next unprocessed translations
  0            
  0            
10854 0   0       defined and (shift(@$_), (@$_ or undef $_)) for @Res2; # remove what is being processed
      0        
10855 0           $ok = grep $_, @res2;
10856 0 0 0       @res2 = map {(not defined() or (!$dupsOK and $seen{$_->[1]}++)) ? undef : $_} @res2; # remove duplicates
  0            
10857 0 0         my @compat = map {defined() ? $_->[0] : undef} @res2;
  0            
10858 0 0         my @_from_ = map {defined() ? $_->[2] : undef} @res2;
  0            
10859 0   0       defined and s/((?
10860 0 0         @res2 = map {defined() ? $_->[1] : undef} @res2;
  0            
10861 0 0 0       @res2 = map {0x10000 > ord($_ || 0) ? $_ : undef} @res2 unless $w32OK; # remove those needing surrogates
  0 0          
10862 0   0       defined $ini_compat[$_] or $ini_compat[$_] = $compat[$_] for 0..$#compat;
10863 0 0         my @extra_penalty = map {!!$compat[$_] and $ini_compat[$_] < $compat[$_]} 0..$#compat;
  0            
10864 0 0         next unless my $cnt = grep defined, @res2;
10865 0           my($penalty, $p) = [('zzz') x @res2]; # above any "5.1", "undef" ("unassigned"???)
10866             # Take into account the "compatibility", but give it lower precedence than the layer:
10867             # for no-compatibility: do not store the level;
10868             defined $res2[$_] and $penalty->[$_] gt ( $p = ($OK{$res2[$_]} ? '+' : '-') . ($self->{Age}{$res2[$_]} || 'undef') .
10869             ($ok{$res2[$_]} ? '+' : '-') . "#$extra_penalty[$_]#" . ($self->{UBlock}{$res2[$_]} || '') )
10870 0 0 0       and $penalty->[$_] = $p for 0..$#res2;
    0 0        
      0        
      0        
10871 0   0       my $have1 = not (defined $res2[0] and defined $res2[1]); # Prefer those with both entries
10872             # Break a non-lc/uc paired translations into separate groups
10873 0   0       my $double_occupancy = ($cnt == 2 and $res2[0] ne $res2[1] and lc $res2[0] eq lc $res2[1]); # Case fold
10874 0           warn " seeing random-double, penalties <$penalty->[0]>, <$penalty->[1]>\n" if warnSORTCOMPOSE;
10875 0 0 0       next if $double_occupancy and grep {defined and $Penalize{$_}} @res2;
  0 0          
10876 0 0 0       if ($double_occupancy and grep {defined and $penalize{$_}} @res2) {
  0 0          
10877 0   0       defined $res2[$_] and $penalty->[$_] = "zzz$penalty->[$_]" for 0..$#res2;
10878             } else {
10879 0   0       defined and $Penalize{$_} and $cnt--, $have1=1, undef $_ for @res2;
      0        
10880 0   0       defined $res2[$_] and $penalize{$res2[$_]} and $penalty->[$_] = "zzz$penalty->[$_]" for 0..$#res2;
      0        
10881             }
10882 0 0         next unless $cnt;
10883 0 0 0       if (not $double_occupancy and $cnt == 2 and (1 or $penalty->[0] ne $penalty->[1])) { # Break (penalty here is not a good idea???)
      0        
10884 0           warn " breaking random-double, penalties <$penalty->[0]>, <$penalty->[1]>\n" if warnSORTCOMPOSE;
10885 0           push @{ $byPenalty{"$penalty->[0]1"}[0][$L] }, [ [$res2[0],undef,undef,$_from_[0]]];
  0            
10886 0           push @{ $byPenalty{"$penalty->[1]1"}[0][$L] }, [undef, [$res2[1],undef,undef,$_from_[1]]];
  0            
10887 0           next; # Now: $double_occupancy or $cnt == 1 or $penalty->[0] eq $penalty->[1]
10888             }
10889 0 0         $p = (defined $res2[0] ? $penalty->[0] : 'zzz'); # may have been undef()ed due to Penalty...
10890 0 0 0       $p = $penalty->[1] if @$penalty > 1 and defined $res2[1] and $p gt $penalty->[1];
      0        
10891 0           push @{ $byPenalty{"$p$have1"}[$double_occupancy][$L] },
10892             # [map {defined $res2[$_] ? $res2[$_] : undef} 0..$#res2];
10893 0 0         [map {defined $res2[$_] ? [$res2[$_],undef,undef,$_from_[$_]] : undef} 0..$#res2];
  0            
10894             }} while $ok;
10895 0           warn " --> combined of [@working_with] -> ", $self->array2string([\@res, %byPenalty]) if warnSORTCOMPOSE;
10896             }
10897             } # sorted bindings, per Layer
10898 0           push @res, [ @byPenalty{ sort keys %byPenalty } ]; # each elt is an array ref indexed by layer number; elt of this is [lc uc]
10899             }
10900             #warn 'Compositions: ', $self->array2string(\@res);
10901             \@res
10902 0           } # index as $res->[group][penalty_N][double_occ][layer][NN][shift]
10903            
10904             sub equalize_lengths ($$@) {
10905 0   0 0 0   my ($self, $extra, $l) = (shift, shift || 0, 0);
10906 0   0       $l <= length and $l = length for @_;
10907 0           $l += $extra;
10908 0   0       $l > length and $_ .= ' ' x ($l - length) for @_;
10909             }
10910            
10911             sub report_sorted_l ($$$;$$) { # 6 levels: |-group, priority, double-occupancy, layer, count, shift
10912 0     0 0   my ($self, $k, $sorted, $bold, $bold1, $top2, %bold) = (shift, shift, shift, shift, shift);
10913 0 0 0       $k = $k->[0] if 'ARRAY' eq ref($k || 0);
10914 0 0         $k = '' unless defined $k;
10915 0 0 0       $k = "<$k>" if defined $k and $k !~ /[^┃┋║│┆\s]/;
10916 0           my @L = ($k, ''); # Up to 100 layers - an overkill, of course??? One extra level to store separators...
10917 0 0         $bold{$_} = '┋' for @{$bold1 || []};
  0            
10918 0 0         $bold{$_} = '┃' for @{$bold || []};
  0            
10919 0           for my $group (0..$#$sorted) { # Top level
10920 0           $self->equalize_lengths(0, @L);
10921 0   0       $_ .= ' ' . ($bold{$group} || '║') for @L;
10922 0           my $prio2;
10923 0           for my $prio (@{ $sorted->[$group] }) {
  0            
10924 0 0         if ($prio2++) {
10925 0           $self->equalize_lengths(0, @L);
10926 0           $_ .= ' │' for @L;
10927             }
10928 0           my $double2;
10929 0           for my $double (reverse @$prio) {
10930 0 0         if ($double2++) {
10931 0           $self->equalize_lengths(0, @L);
10932 0           $_ .= ' ┆' for @L;
10933             }
10934 0           for my $layer (0..$#$double) {
10935 0           for my $set (@{$double->[$layer]}) {
  0            
10936 0           for my $shift (0,1) {
10937 0 0         next unless defined (my $k = $set->[$shift]);
10938 0 0         $k = $k->[0] if ref $k;
10939 0 0         $k = " $k" if $k =~ /$rxCombining/;
10940 0 0         if (2*$layer + $shift >= $#L) { # Keep last layer pristine for correct separators...
10941 0           my $add = 2*$layer + $shift - $#L + 1;
10942 0           push @L, ($L[-1]) x $add;
10943             }
10944 0           $L[ 2*$layer + $shift ] .= " $k";
10945             }
10946             }
10947             }
10948             }
10949             }
10950             }
10951 0   0       pop @L while @L and $L[-1] !~ /[^┃┋║│┆\s]/;
10952 0           join "\n", @L, '';
10953             }
10954            
10955             sub append_keys ($$$$;$) { # $KK is [[lc,uc], ...]; modifies $C in place
10956 0     0 0   my ($self, $C, $KK, $LL, $prepend, @KKK, $cnt) = (shift, shift, shift, shift, shift);
10957 0           for my $L (0..$#$KK) { # $LL contains info about from which layer the given binding was stolen
10958 0           my $k = $KK->[$L];
10959 0 0 0       next unless defined $k and (defined $k->[0] or defined $k->[1]);
      0        
10960 0           $cnt++;
10961 0 0         my @kk = map {$_ and ref $_ ? $_->[0] : $_} @$k;
  0 0          
10962 0   0       my $paired = (@$k == 2 and defined $k->[0] and defined $k->[1] and $kk[0] ne $kk[1] and $kk[0] eq lc $kk[1]);
10963 0 0 0       my @need_special = map { $LL and $L and defined $k->[$_] and defined $LL->[$L][$_] and 0 == $LL->[$L][$_]} 0..$#$k;
  0   0        
      0        
10964 0 0         if (my $special = grep $_, @need_special) { # count
10965 0 0         ($prepend ? push(@{ $KKK[$paired][0] }, $k) : unshift(@{ $KKK[$paired][0] }, $k)),
  0 0          
  0            
10966             next if $special == grep defined, @$k;
10967 0           $paired = 0;
10968 0 0         my $to_level0 = [map { $need_special[$_] ? $k->[$_] : undef} 0..$#$k];
  0            
10969 0 0         $k = [map {!$need_special[$_] ? $k->[$_] : undef} 0..$#$k];
  0            
10970 0 0         $prepend ? push @{ $KKK[$paired][0] }, $to_level0 : unshift @{ $KKK[$paired][0] }, $to_level0;
  0            
  0            
10971             }
10972 0 0         $prepend ? push @{ $KKK[$paired][$L] }, $k : unshift @{ $KKK[$paired][$L] }, $k; # 0: layer has only one slot
  0            
  0            
10973             }
10974             #print "cnt=$cnt\n";
10975 0 0         return unless $cnt;
10976 0 0         push @$C, [[@KKK]] unless $prepend; # one group of one level of penalty
10977 0 0         unshift @$C, [[@KKK]] if $prepend; # one group of one level of penalty
10978 0           1
10979             }
10980            
10981             sub shift_pop_compositions ($$$;$$$$) { # Limit is how many groups to process
10982 0   0 0 0   my($self, $C, $L, $backwards, $omit, $limit, $ignore_groups, $store_level, $skip_lc, $skip_uc)
      0        
      0        
10983             = (shift, shift, shift, shift, shift || 0, shift || 1e100, shift || 0, shift, shift, shift);
10984 0           my($do_lc, $do_uc) = (!$skip_lc, !$skip_uc);
10985 0   0       my($both, $first, $out_lc, $out_uc, @out, @out_levels, $have_out, $groupN) = ($do_lc and $do_uc);
10986 0 0         my @G = $backwards ? reverse @$C : @$C;
10987 0           for my $group (@G[$omit..$#G]) {
10988 0 0         last if --$limit < 0;
10989 0           $groupN++;
10990 0           for my $penalty_group (@$group) { # each $penalty_group is indexed by double_occupancy and layer
10991             # each layer in sorted; if $both, we prefer to extract a paired translation; so it is enough to check the first elt on each layer
10992 0           my $group_both = $both;
10993 0 0         if ($both) {
10994 0 0 0       $group_both = 0 unless $penalty_group->[1] and @{ $penalty_group->[1][$L] || [] } or @{ $penalty_group->[1][0] || [] };
  0 0 0        
  0 0          
10995             } # if $group_both == 0, and $both: double-group is empty, so we can look only in single/unrelated one.
10996             # if $both = $group_both == 0: may not look in double group, so can look only in single/unrelated one
10997             # if $both = $group_both == 1: must look in double-group only.
10998 0 0         for my $Set (($L ? [0, $penalty_group->[$group_both][0]] : ()), [$L, $penalty_group->[$group_both][$L]]) {
10999 0           my $set = $Set->[1];
11000 0 0 0       next unless $set and @$set; # @$set consists of [unshifted, shifted] pairs
11001 0 0         if ($group_both) { # we know we meet a double element at start of the group
11002 0 0         my $OUT = $backwards ? pop @$set : shift @$set; # we know we meet a double element at start of the group
11003 0 0         return [] if $groupN <= $ignore_groups;
11004 0 0         @$store_level = ($Set->[0]) x 2 if $store_level;
11005 0           return $OUT;
11006             }
11007             ## or ($both and defined $elt->[0] and defined $elt->[1]);
11008 0           my $spliced = 0;
11009 0 0         for my $eltA ($backwards ? map($#$set - $_, 0..$#$set) : 0..$#$set) {
11010 0           my $elt = $eltA - $spliced;
11011 0   0       my $lc_ok = ($do_lc and defined $set->[$elt][0]);
11012 0   0       my $uc_ok = ($do_uc and defined $set->[$elt][1]);
11013 0 0 0       next if not ($lc_ok or $uc_ok);
11014 0   0       my $have_both = (defined $set->[$elt][0] and defined $set->[$elt][1]);
11015 0   0       my $found_both = ($lc_ok and $uc_ok); # If defined $have_out, cannot have $found_both; moreover $have_out ne $uc_ok
11016 0 0 0       die "Panic!" if defined $have_out and ($found_both or $have_out eq $uc_ok);
      0        
11017             # next if not $found_both and defined $have_out and $have_out eq $uc_ok;
11018 0 0         my $can_splice = $have_both ? $both : 1;
11019 0 0         my $can_return = $both ? $have_both : 1;
11020 0           my $OUT = my $out = $set->[$elt]; # Can't return yet: @out may contain a part of info...
11021 0 0 0       unless ($groupN <= $ignore_groups or defined $have_out and $have_out eq $uc_ok) { # In case !$do_return or $have_out
      0        
11022 0           $out[$uc_ok] = $out->[$uc_ok]; # In case !$do_return or $have_out
11023 0           $out_levels[$uc_ok] = $Set->[0];
11024             }
11025             #warn 'Doing <', join('> <', map {defined() ? $_ : 'undef'} @{ $set->[$elt] }), "> L=$L; splice=$can_splice; return=$can_return; lc=$lc_ok uc=$uc_ok";
11026 0 0         if ($can_splice) { # Now: $both and not $have_both; must edit in place
11027 0           splice @$set, $elt, 1;
11028 0 0         $spliced++ unless $backwards;
11029             } else { # Must edit in place
11030 0           $OUT = [@$out]; # Deep copy
11031 0           undef $out->[$uc_ok]; # only one matched...
11032             }
11033 0 0         $OUT = [] if $groupN <= $ignore_groups;
11034 0 0         if ($can_return) {
11035 0 0         if ($found_both) {
11036 0 0         @$store_level = map {$_ and $Set->[0]} @$OUT if $store_level;
  0 0          
11037 0           return $OUT;
11038             } else {
11039 0 0         @$store_level = @out_levels if $store_level;
11040 0           return \@out;
11041             }
11042             # return($found_both ? $OUT : \@out);
11043             } # Now: had $both and !$had_both; must condinue
11044 0           $have_out = $uc_ok;
11045 0           $both = 0; # $group_both is already FALSE
11046 0 0         ($lc_ok ? $do_lc : $do_uc) = 0;
11047             #warn "lc/uc: $do_lc/$do_uc";
11048             }
11049             }
11050             }
11051             }
11052 0 0         @$store_level = @out_levels if $store_level;
11053             return \@out
11054 0           }
11055            
11056             my ($rebuild_fake, $rebuild_style) = ("\n\t\t\t/* To be auto-generated */\n", <<'EOR');
11057            
11058             .klayout span, .klayout-wrapper .over-shift {
11059             font-size: 29pt ;
11060             font-weight: bolder;
11061             text-wrap: none;
11062             white-space: nowrap;
11063             }
11064             .klayout kbd, .asSpan { display: inline-block; }
11065             .asSpan2 { display: inline-table; }
11066            
11067             /* Not used; allows /-diagonals to be highlighted with nth-last-of-type() */
11068             .klayout kbd.hidden-align { display: none; }
11069            
11070             kbd span.lc, kbd span.uc { display: inline; }
11071            
11072             /* Hide lc only if in .uc or hovering over -uc and not inside; similarly for uc */
11073             /* States: .klayout-wrapper:not(:hover) | .klayout.uclc:hover NORMAL = UCLC
11074             .klayout-uc:hover .klayout:not(:hover) UC
11075             .klayout-wrapper:hover .klayout-uc:not(:hover) LC */
11076             .klayout.lc kbd span.uc, .klayout.uc kbd span.lc,
11077             .klayout-uc:hover:not(:active) .klayout:not(.lc):not(:hover) kbd span.lc,
11078             .klayout-uc:hover:active .klayout:not(.uc):not(:hover) kbd span.uc,
11079             .klayout-wrapper:hover:not(:active) .klayout-uc:not(:hover) .klayout:not(.uc) kbd span.uc,
11080             .klayout-wrapper:hover:active .klayout-uc:not(:hover) .klayout:not(.lc) kbd span.lc { display: none; }
11081            
11082             /* These should be active unless hovering over wrapper, and not internal .klayout */
11083             .klayout.uclc:hover kbd span.uc, .klayout.uclc:hover kbd span.lc,
11084             .klayout.uclc.force kbd span.uc, .klayout.uclc.force kbd span.lc,
11085             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc:not(.do-alt) kbd span.uc,
11086             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc:not(.do-alt) kbd span.lc {
11087             font-size: 70%;
11088             }
11089             .klayout.uclc:hover kbd span.uc, .klayout.uclc:hover kbd span.lc,
11090             .klayout.uclc:not(.in-wrapper) kbd span.uc, .klayout.uclc:not(.in-wrapper) kbd span.lc,
11091             .klayout.uclc.force kbd span.uc, .klayout.uclc.force kbd span.lc,
11092             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc.do-alt kbd span.uc,
11093             .klayout.uclc.do-alt:hover kbd span.uc,
11094             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc.do-alt kbd span.lc,
11095             .klayout.uclc.do-alt:hover kbd span.lc,
11096             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc:not(.do-alt) kbd span.uc,
11097             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc:not(.do-alt) kbd span.lc {
11098             position: absolute;
11099             z-index: 10;
11100             border: 1px dotted green;
11101             line-height: 0.8em; /* decreasing this moves up; should be changed with padding-bottom */
11102             }
11103             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc kbd span.uc,
11104             .klayout-wrapper .klayout-uc .klayout.uclc:hover kbd span.uc,
11105             .klayout.uclc kbd span.uc {
11106             right: 0.2em;
11107             top: -0.05em;
11108             padding-bottom: 0.15em; /* Less makes _ not fit inside border... */
11109             }
11110             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc kbd span.lc,
11111             .klayout-wrapper .klayout-uc .klayout.uclc:hover kbd span.lc,
11112             .klayout.uclc kbd span.lc {
11113             left: 0.2em;
11114             bottom: 0em;
11115             }
11116             /* Same for left/right placement */
11117             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc kbd span.uc.on-left,
11118             .klayout-wrapper .klayout-uc .klayout.uclc:hover kbd span.uc.on-left,
11119             .klayout.uclc:not(.in-wrapper) kbd span.uc.uc.on-left { /* repeat is needed to protect against :not(.base) about 25lines below */
11120             left: 0.35em;
11121             right: auto;
11122             }
11123             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc kbd span.lc.on-left,
11124             .klayout-wrapper .klayout-uc .klayout.uclc:hover kbd span.lc.on-left,
11125             .klayout.uclc:not(.in-wrapper) kbd span.lc.lc.on-left {
11126             left: 0.0em;
11127             }
11128             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc kbd span.uc.on-right,
11129             .klayout-wrapper .klayout-uc .klayout.uclc:hover kbd span.uc.on-right,
11130             .klayout.uclc:not(.in-wrapper) kbd span.uc.uc.on-right {
11131             right: 0.0em;
11132             }
11133             .klayout-wrapper:not(:hover) .klayout-uc .klayout.uclc kbd span.lc.on-right,
11134             .klayout-wrapper .klayout-uc .klayout.uclc:hover kbd span.lc.on-right,
11135             .klayout.uclc:not(.in-wrapper) kbd span.lc.lc.on-right {
11136             left: auto;
11137             right: 0.35em;
11138             }
11139             .klayout kbd span:not(.base):not(.base-uc):not(.base-lc).on-right
11140             { left: auto; right: 0.0em; position: absolute; }
11141             .klayout kbd span:not(.base):not(.base-uc):not(.base-lc).on-left
11142             { left: 0.0em; right: auto; position: absolute; }
11143             .klayout kbd .on-right:not(.prefix), .on-right-ex { color: firebrick; }
11144             .klayout kbd .on-right:not(.prefix).vbell { color: Coral; }
11145             .klayout kbd .on-left { z-index: 10; }
11146             .klayout kbd .on-right { z-index: 9; }
11147            
11148             .klayout-wrapper:hover .klayout.uclc:not(:hover) kbd.shift {outline: 6px dotted green;}
11149            
11150             kbd span, kbd div { vertical-align: bottom; } /* no effect ???!!! */
11151            
11152             kbd {
11153             color: #444;
11154             /* line-height: 1.6em; */
11155             width: 1.4em; /* +0.24em border +0.08em margin; total 1.72em */
11156            
11157             /* +0.3em border; */
11158             min-height: 0.83em; /* These two should be changed together to get uc letters centered... */
11159             line-height: 0.75em; /* Increasing by the same amount works fine??? */
11160             /* One also needs to change the vertical offsets of arrows from_*, and System-key icon */
11161            
11162             text-align: center;
11163             cursor: pointer;
11164             padding: 0.0em 0.0em 0.0em 0.0em;
11165             margin: 0.04em;
11166             white-space: nowrap;
11167             vertical-align: top;
11168             position: relative;
11169            
11170             background-color: #FFFFFF;
11171            
11172             background-image: -moz-linear-gradient(left, rgba(0,0,0,0.2), rgba(64,64,64,0.2), rgba(64,64,64,0.2), rgba(128,128,128,0.2));
11173             background-image: -webkit-gradient(linear, left top, right top, color-stop(0%,rgba(0,0,0,0.2)), color-stop(33%,rgba(64,64,64,0.2)), color-stop(66%,rgba(64,64,64,0.2)), color-stop(100%,rgba(128,128,128,0.2)));
11174             background-image: -webkit-linear-gradient(left, rgba(0,0,0,0.2) 0%, rgba(64,64,64,0.2) 33%, rgba(64,64,64,0.2) 66%, rgba(128,128,128,0.2) 100%);
11175             background-image: -o-linear-gradient(left, rgba(0,0,0,0.2) 0%, rgba(64,64,64,0.2) 33%, rgba(64,64,64,0.2) 66%, rgba(128,128,128,0.2) 100%);
11176             background-image: -ms-linear-gradient(left, rgba(0,0,0,0.2) 0%, rgba(64,64,64,0.2) 33%, rgba(64,64,64,0.2) 66%, rgba(128,128,128,0.2) 100%);
11177             background-image: linear-gradient(0deg, rgba(0,0,0,0.2) 0%, rgba(64,64,64,0.2) 33%, rgba(64,64,64,0.2) 66%, rgba(128,128,128,0.2) 100%);
11178             filter: progid:DXImageTransform.Microsoft.gradient( startColorstr='#dddddd', endColorstr='#e5e5e5',GradientType=1 );
11179            
11180             border-top: solid 0.1em #CCC;
11181             border-right: solid 0.12em #AAA;
11182             border-bottom: solid 0.2em #999;
11183             border-left: solid 0.12em #BBB;
11184             -webkit-border-radius: 0.22em;
11185             -moz-border-radius: 0.22em;
11186             border-radius: 0.22em;
11187             z-index: 0;
11188            
11189             -webkit-box-shadow:
11190             0.03em 0.1em 0.1em 0.06em #888,
11191             0.05em 0.1em 0.06em 0.06em #aaa;
11192             -moz-box-shadow:
11193             0.03em 0.1em 0.1em 0.06em #888,
11194             0.05em 0.1em 0.06em 0.06em #aaa;
11195             box-shadow:
11196             0.03em 0.1em 0.1em 0.00em #888 ,
11197             0.05em 0.1em 0.06em 0.0em #aaa ;
11198             }
11199            
11200             kbd:hover, .klayout-wrapper:hover .klayout:not(:hover) kbd.shift {
11201             color: #222;
11202             background-image: -moz-linear-gradient(left, rgba(128,128,128,0.2), rgba(192,192,192,0.2), rgba(192,192,192,0.2), rgba(255,255,255,0.2));
11203             background-image: -webkit-gradient(linear, left top, right top, color-stop(0%,rgba(128,128,128,0.2)), color-stop(33%,rgba(192,192,192,0.2)), color-stop(66%,rgba(192,192,192,0.2)), color-stop(100%,rgba(255,255,255,0.2)));
11204             background-image: -webkit-linear-gradient(left, rgba(128,128,128,0.2) 0%, rgba(192,192,192,0.2) 33%, rgba(192,192,192,0.2) 66%, rgba(255,255,255,0.2) 100%);
11205             background-image: -o-linear-gradient(left, rgba(128,128,128,0.2) 0%, rgba(192,192,192,0.2) 33%, rgba(192,192,192,0.2) 66%, rgba(255,255,255,0.2) 100%);
11206             background-image: -ms-linear-gradient(left, rgba(128,128,128,0.2) 0%, rgba(192,192,192,0.2) 33%, rgba(192,192,192,0.2) 66%, rgba(255,255,255,0.2) 100%);
11207             background-image: linear-gradient(0deg, rgba(128,128,128,0.2) 0%, rgba(192,192,192,0.2) 33%, rgba(192,192,192,0.2) 66%, rgba(255,255,255,0.2) 100%);
11208             filter: progid:DXImageTransform.Microsoft.gradient( startColorstr='#e5e5e5', endColorstr='#ffffff',GradientType=1 );
11209             }
11210             kbd:active, kbd.selected, .klayout-uc:hover:not(:active) .klayout:not(:hover) kbd.shift, .klayout-wrapper:active .klayout-uc:not(:hover) kbd.shift {
11211             margin-top: 0.14em; /* This variant is with "solid" buttons, the commented one is with "rubber" ones */
11212             border-top: solid 0.10em #CCC;
11213             border-right: solid 0.12em #9a9a9a; /* Make right/bottom a tiny way darker */
11214             border-bottom: solid 0.1em #8a8a8a;
11215             border-left: solid 0.12em #BBB;
11216             /* margin-top: 0.11em;
11217             border-top: solid 0.13em #999;
11218             border-right: solid 0.12em #BBB;
11219             border-bottom: solid 0.1em #CCC;
11220             border-left: solid 0.12em #AAA; */
11221             padding: 0.0em 0.0em 0.0em 0.0em;
11222            
11223             -webkit-box-shadow:
11224             0.05em 0.03em 0.1em 0.1em #aaa;
11225             -moz-box-shadow:
11226             0.05em 0.03em 0.1em 0.1em #aaa;
11227             box-shadow:
11228             0.05em 0.03em 0.1em 0em #aaa;
11229            
11230             }
11231             kbd img {
11232             padding-left: 0.25em;
11233             vertical-align: middle;
11234             height: 22px; width: 22px;
11235             opacity: 0.8;
11236             }
11237             kbd:hover img {
11238             opacity: 1;
11239             }
11240             kbd span.shrink {
11241             font-size: 85%;
11242             }
11243             .klayout.do-altgr kbd span.shrink.altgr {
11244             font-size: 72%;
11245             }
11246             kbd .small {
11247             font-size: 62%;
11248             }
11249             kbd .vsmall {
11250             font-size: 39%;
11251             }
11252            
11253             kbd .base, kbd .base-lc, kbd .base-uc {
11254             -webkit-touch-callout: none;
11255             -webkit-user-select: none;
11256             -khtml-user-select: none;
11257             -moz-user-select: none;
11258             -ms-user-select: none;
11259             -o-user-select: none;
11260             user-select: none;
11261             }
11262            
11263             /* Special rules for do-alt-display. Without alt2, places the base on left and right;
11264             with alt2, places base on the left (unless base-right is present) */
11265            
11266             /* .klayout.do-alt.uclc kbd span.lc, .klayout.do-alt.uclc kbd span.uc { */
11267             .klayout.do-alt.uclc:not(.in-wrapper) kbd span.uc, .klayout.do-alt.uclc:not(.in-wrapper) kbd span.lc,
11268             .klayout.do-alt.uclc:hover kbd span.uc, .klayout.do-alt.uclc:hover kbd span.lc,
11269             .klayout.do-alt.uclc.force kbd span.uc, .klayout.do-alt.uclc.force kbd span.lc,
11270             .klayout-wrapper:not(:hover) .klayout-uc .klayout.do-alt.uclc kbd span.uc,
11271             .klayout-wrapper:not(:hover) .klayout-uc .klayout.do-alt.uclc kbd span.lc {
11272             font-size: 85%;
11273             }
11274            
11275             .klayout.do-alt.sz125 kbd span.uc, .klayout.do-alt.sz125 kbd span.lc, /* exclude below: too specific otherwise */
11276             .klayout.do-alt.sz125 kbd span:not(.lc):not(.uc):not(.base):not(.base-uc):not(.base-lc):not(.shrink):not(.small):not(.vsmall) {
11277             font-size: 125%;
11278             line-height: 0.98em; /* decreasing this moves up; should be changed with padding-bottom */
11279             /* padding-bottom: 0.1em; */ /* Less makes _ not fit inside border... */
11280             }
11281             .klayout.do-alt.sz120 kbd span.uc, .klayout.do-alt.sz120 kbd span.lc, /* exclude below: too specific otherwise */
11282             .klayout.do-alt.sz120 kbd span:not(.lc):not(.uc):not(.base):not(.base-uc):not(.base-lc):not(.shrink):not(.small):not(.vsmall) {
11283             font-size: 120%;
11284             line-height: 1.02em; /* decreasing this moves up; should be changed with padding-bottom */
11285             /* padding-bottom: 0.1em; */ /* Less makes _ not fit inside border... */
11286             }
11287             .klayout.do-alt kbd span.uc, .klayout.do-alt kbd span.lc, /* exclude below: too specific otherwise */
11288             .klayout.do-alt.sz115 kbd span.uc, .klayout.do-alt.sz115 kbd span.lc,
11289             .klayout.do-alt kbd span:not(.lc):not(.uc):not(.base):not(.base-uc):not(.base-lc):not(.shrink):not(.small):not(.vsmall),
11290             .klayout.do-alt.sz115 kbd span:not(.lc):not(.uc):not(.base):not(.base-uc):not(.base-lc):not(.shrink):not(.small):not(.vsmall) {
11291             font-size: 115%;
11292             line-height: 1.05em; /* decreasing this moves up; should be changed with padding-bottom */
11293             /* padding-bottom: 0.1em; */ /* Less makes _ not fit inside border... */
11294             }
11295             .klayout.do-alt.sz110 kbd span.uc, .klayout.do-alt.sz110 kbd span.lc, /* exclude below: too specific otherwise */
11296             .klayout.do-alt.sz110 kbd span:not(.lc):not(.uc):not(.base):not(.base-uc):not(.base-lc):not(.shrink):not(.small):not(.vsmall) {
11297             font-size: 110%;
11298             line-height: 1.12em; /* decreasing this moves up; should be changed with padding-bottom */
11299             /* padding-bottom: 0.1em; */ /* Less makes _ not fit inside border... */
11300             }
11301             .klayout.do-alt.sz100 kbd span.uc, .klayout.do-alt.sz100 kbd span.lc, /* exclude below: too specific otherwise */
11302             .klayout.do-alt.sz100 kbd span:not(.lc):not(.uc):not(.base):not(.base-uc):not(.base-lc):not(.shrink):not(.small):not(.vsmall) {
11303             line-height: 1.2em; /* decreasing this moves up; should be changed with padding-bottom */
11304             /* padding-bottom: 0.1em; */ /* Less makes _ not fit inside border... */
11305             }
11306            
11307             .klayout.do-alt kbd span.base-lc, .klayout.do-alt kbd span.base-uc {
11308             font-size: 90%;
11309             }
11310             .klayout.do-alt.alt2 kbd span.base-lc, .klayout.do-alt.alt2 kbd span.base-uc {
11311             font-size: 80%;
11312             }
11313            
11314             .klayout.do-alt kbd span.base-uc {
11315             right: 15%;
11316             top: 35%; /* Combine rel-parent and rel-us offsets : */
11317             }
11318             .klayout.do-alt kbd span.base-lc {
11319             left: 15%;
11320             bottom: 25%; /* Combine rel-parent and rel-us offsets : */
11321             }
11322             .klayout.do-alt.alt2 kbd span.base-uc {
11323             left: 35%;
11324             top: 30%; /* Combine rel-parent and rel-us offsets : */
11325             }
11326             .klayout.do-alt.alt2 kbd span.base-lc {
11327             left: 15%;
11328             bottom: 25%; /* Combine rel-parent and rel-us offsets : */
11329             }
11330             .klayout.do-alt.alt2.base-right kbd span.base-uc {
11331             right: 15%;
11332             left: auto; /* Combine rel-parent and rel-us offsets : */
11333             }
11334             .klayout.do-alt.alt2.base-right kbd span.base-lc {
11335             right: 35%;
11336             left: auto; /* Combine rel-parent and rel-us offsets : */
11337             }
11338             .klayout.do-alt.alt2.base-center kbd span.base-uc {
11339             left: 60%; /* Combine rel-parent and rel-us offsets : */
11340             }
11341             .klayout.do-alt.alt2.base-center kbd span.base-lc {
11342             left: 40%; /* Combine rel-parent and rel-us offsets : */
11343             }
11344            
11345             .klayout.do-alt kbd span.base {
11346             font-size: 120%;
11347             left: 25%;
11348             top: 65%; /* Combine rel-parent and rel-us offsets : */
11349             }
11350             .klayout.do-alt.large-base.large-base kbd span.base { /* Make .large-base override .alt2 */
11351             font-size: 200%;
11352             left: 50%;
11353             top: 50%; /* Combine rel-parent and rel-us offsets : */
11354             }
11355             .klayout.do-alt.alt2 kbd span.base {
11356             font-size: 110%;
11357             left: 25%;
11358             top: 75%; /* Combine rel-parent and rel-us offsets : */
11359             }
11360             .klayout.do-alt.alt2.base-right kbd span.base {
11361             right: 25%;
11362             left: auto; /* Combine rel-parent and rel-us offsets : */
11363             }
11364             .klayout.do-alt.alt2.base-center kbd span.base {
11365             left: 50%; /* Combine rel-parent and rel-us offsets : */
11366             }
11367             .klayout.do-alt kbd span.base, .klayout.do-alt kbd span.base-lc, .klayout.do-alt kbd span.base-uc {
11368             position: absolute;
11369             z-index: -1;
11370            
11371             opacity: 0.25;
11372             filter: alpha(opacity=25); /* IE6-IE8 */
11373            
11374             color: blue;
11375             line-height: 1em; /* Tight-fitting box */
11376             height: 1em;
11377             width: 1em;
11378             margin: -0.5em -0.5em -0.5em -0.5em; /* -0.5em is the geometric center */
11379             }
11380             .klayout.do-alt kbd {
11381             min-height: 1.2em; /* Should be changed together to get uc letters centered... */
11382             line-height: 1.2em; /* Increasing by the same amount works fine??? */
11383             }
11384             .klayout.do-altgr span.altgr {outline: 9px dotted green;}
11385            
11386             kbd.with_x-NONONO:before {
11387             position: absolute;
11388             z-index: -10;
11389            
11390             opacity: 0.25;
11391             filter: alpha(opacity=25); /* IE6-IE8 */
11392            
11393             content: "✖";
11394             color: red;
11395             font-size: 120%;
11396            
11397             line-height: 1em; /* Tight-fitting box */
11398             height: 1em;
11399             width: 1em;
11400            
11401             top: 50%; /* Combine rel-parent and rel-us offsets : */
11402             left: 50%;
11403             margin: -0.43em 0 0 -0.5em; /* -0.5em is the geometric center; but it is not in the center of ✖...*/
11404             }
11405             kbd.from_sw:after, kbd.from_ne:after, kbd.from_nw:after, kbd.to_ne:after, kbd.to_nw:before, kbd.to_w:after, kbd.from_w:after {
11406             position: absolute;
11407             z-index: 1;
11408             font-size: 80%;
11409             color: red;
11410             text-shadow: 1px 1px #ffff88, -1px -1px #ffff88, -1px 1px #ffff88, 1px -1px #ffff88;
11411             text-shadow: 1px 1px rgba(255,255,0,0.3), -1px -1px rgba(255,255,0,0.3), -1px 1px rgba(255,255,0,0.3), 1px -1px rgba(255,255,0,0.3);
11412             }
11413             kbd.from_sw.grn:after, kbd.from_ne.grn:after, kbd.from_nw.grn:after, kbd.to_ne.grn:after, kbd.to_nw.grn:before, kbd.to_w.grn:after, kbd.from_w.grn:after {
11414             color: green;
11415             }
11416             kbd.from_sw.blu:after, kbd.from_ne.blu:after, kbd.from_nw.blu:after, kbd.to_ne.blu:after, kbd.to_nw.blu:before, kbd.to_w.blu:after, kbd.from_w.blu:after {
11417             color: blue;
11418             }
11419             kbd.from_sw.ylw:after, kbd.from_ne.ylw:after, kbd.from_nw.ylw:after, kbd.to_ne.ylw:after, kbd.to_nw.ylw:before, kbd.to_w.ylw:after, kbd.from_w.ylw:after {
11420             color: #FFB400;
11421             }
11422             kbd.from_sw:not(.pure), kbd.xfrom_sw, kbd.from_ne:not(.pure), kbd.from_nw:not(.pure), kbd.to_ne:not(.pure), kbd.to_nw:not(.pure) {
11423             text-shadow: 1px 1px yellow, -1px -1px yellow, -1px 1px yellow, 1px -1px yellow;
11424             }
11425             kbd.from_sw:after {
11426             left: -0.0em;
11427             bottom: -0.65em;
11428             }
11429             kbd.from_sw:after, kbd.to_ne:after {
11430             content: "⇗";
11431             }
11432             kbd.from_se:after, kbd.to_nw:before {
11433             content: "⇖";
11434             }
11435             kbd.from_ne:after, kbd.from_nw:after {
11436             top: -0.55em;
11437             }
11438             kbd.to_ne:after, kbd.to_nw:before { top: -0.85em;}
11439             kbd.to_nw:before { left: 0.01em;}
11440             kbd.from_ne:after { content: "⇙"; }
11441             kbd.from_ne:after, kbd.to_ne:after { right: -0.0em; }
11442             kbd.from_nw:after { content: "⇘"; left: -0.0em; }
11443             kbd.to_w:after, kbd.from_w:after {
11444             top: 45%;
11445             left: -0.7em;
11446             }
11447             kbd.to_w.high:after, kbd.from_w.high:after {
11448             top: -15%;
11449             left: -0.5em;
11450             }
11451             kbd.to_w:after { content: "⇐"; }
11452             kbd.from_w:after { content: "⇒"; }
11453            
11454             /* Compensate for higher keys */
11455             .klayout.do-alt kbd.from_sw:after {
11456             bottom: -0.90em;
11457             }
11458             .klayout.do-alt kbd.from_ne:after, .klayout.do-alt kbd.from_nw:after {
11459             top: -0.85em;
11460             }
11461            
11462             span.prefix {
11463             color: yellow;
11464             text-shadow: 1px 1px black, -1px -1px black, -1px 1px black, 1px -1px black;
11465             }
11466             span.prefix.prefix2 {
11467             text-shadow: 1px 1px black, -1px -1px black, -1px 1px black, 1px -1px black,
11468             3px 0px firebrick, -3px 0px firebrick, 0px 3px firebrick, 0px -3px firebrick;
11469             }
11470             span.very-special {
11471             text-shadow: 1px 1px lime, -1px -1px lime, -1px 1px lime, 1px -1px lime;
11472             }
11473             span.special {
11474             text-shadow: 2px 2px dodgerblue, -2px -2px dodgerblue, -2px 2px dodgerblue, 2px -2px dodgerblue;
11475             }
11476             .thinspecial span.special {
11477             text-shadow: 1px 1px dodgerblue, -1px -1px dodgerblue, -1px 1px dodgerblue, 1px -1px dodgerblue;
11478             }
11479             span.not-surr:not(.prefix) {
11480             text-shadow: 2px 2px white, -2px -2px white, -2px 2px white, 2px -2px white;
11481             }
11482             span.need-learn {
11483             text-shadow: 1px 1px coral, -1px -1px coral, -1px 1px coral, 1px -1px coral;
11484             }
11485             span.need-learn.on-right {
11486             text-shadow: 1px 1px black, -1px -1px black, -1px 1px black, 1px -1px black,
11487             2px 2px coral, -2px -2px coral, -2px 2px coral, 2px -2px coral;
11488             }
11489             span.may-guess {
11490             text-shadow: 1px 1px yellow, -1px -1px yellow, -1px 1px yellow, 1px -1px yellow;
11491             }
11492            
11493             kbd.win_logo.ubuntu:before {
11494             content: url(http://linux.bihlman.com/wp-content/plugins/wp-useragent/img/24/os/ubuntu-2.png);
11495             }
11496             kbd.win_logo:before {
11497             position: absolute;
11498             z-index: -10;
11499            
11500             content: url(40px-computer_glenn_rolla_01.svg.med.png);
11501             height: 100%;
11502             width: 100%;
11503            
11504             top: 0%; /* Combine rel-parent and rel-us offsets : */
11505             left: 0%;
11506             /* margin: -0.5em -0.5em -0.5em -0.5em; */ /* -0.5em is the geometric center */
11507             }
11508             .do-alt kbd.win_logo:before { /* How to vcenter automatically??? */
11509             top: 20%;
11510             }
11511            
11512             /* Mark vowel's diagonals (for layout of diacritics) */
11513             .ddiag .arow > kbd:nth-of-type(2), .ddiag .arow > kbd:nth-last-of-type(7),
11514             .diag .arow > kbd:nth-of-type(2), .diag .arow > kbd:nth-of-type(7),
11515             .diag .drow > kbd:nth-of-type(2), .diag .drow > kbd:nth-of-type(7),
11516             .diag .arow > kbd:nth-of-type(10), .diag .drow > kbd:nth-of-type(10), kbd.red-bg
11517             { background-color: #ffcccc; }
11518             .ddiag .arow > kbd:nth-last-of-type(6), .ddiag .arow > kbd:nth-of-type(4),
11519             .diag .arow > kbd:nth-of-type(8), .diag .arow > kbd:nth-of-type(3),
11520             .diag .drow > kbd:nth-of-type(8), .diag .drow > kbd:nth-of-type(3), kbd.green-bg
11521             { background-color: #ccffcc; }
11522             .ddiag .arow > kbd:nth-last-of-type(8), .ddiag .arow > kbd:nth-last-of-type(5),
11523             .diag .arow > kbd:nth-of-type(9), .diag .arow > kbd:nth-of-type(4),
11524             .diag .drow > kbd:nth-of-type(9), .diag .drow > kbd:nth-of-type(4), kbd.blue-bg
11525             { background-color: #ccccff; }
11526            
11527             /* Mark non-vowel's diagonals (for layout of diacritics) */
11528             .hide45end .arow > kbd:nth-of-type(5), .hide45end .arow > kbd:nth-of-type(6),
11529             .hide45end .arow > kbd:nth-of-type(11),
11530             .hide45end .drow > kbd:nth-of-type(5), .hide45end .drow > kbd:nth-of-type(6),
11531             .hide45end .drow > kbd:nth-of-type(11), kbd.semi-hidden
11532             { opacity: 0.45; }
11533            
11534             span.vbell { color: SandyBrown; }
11535             span.three-cases { outline: 3px dotted yellow; }
11536             span.three-cases-long { outline: 3px dotted MediumSpringGreen; }
11537            
11538             span.withSubst { outline: 1px dotted blue; outline-offset: -1px; }
11539             span.isSubst { outline: 1px solid blue; outline-offset: -1px; }
11540            
11541             .use-operator span.operator { background-color: rgb(255,192,203) /*pink*/; }
11542             span.relation { background-color: rgb(255,160,122) /*lightsalmon*/; }
11543             span.ipa { background-color: rgb(173,255,47) /*greenyellow*/; }
11544             span.nAry { background-color: rgb(144,238,144) /*lightgreen*/; }
11545             span.paleo { background-color: rgb(240,230,140) /*Khaki*/; }
11546             .use-viet span.viet { background-color: rgb(220,220,220) /*Gainsboro*/; }
11547             div:not(.no-doubleaccent) span.doubleaccent { background-color: rgb(255,228,196) /*Bisque*/; }
11548             span.ZW { background-color: rgb(220,20,60) /*crimson*/; }
11549             span.WS { background-color: rgb(128,0,0) /*maroon*/; }
11550            
11551             .use-operator span.operator { background-color: rgba(255,192,203,0.5) /*pink*/; }
11552             span.relation { background-color: rgba(255,160,122,0.5) /*lightsalmon*/; }
11553             span.ipa { background-color: rgba(173,255,47,0.5) /*greenyellow*/; }
11554             span.nAry { background-color: rgba(144,238,144,0.5) /*lightgreen*/; }
11555             span.paleo { background-color: rgba(240,230,140,0.5) /*Khaki*/; }
11556             .use-viet span.viet { background-color: rgba(220,220,220,0.5) /*Gainsboro*/; }
11557             div:not(.no-doubleaccent) span.doubleaccent { background-color: rgba(255,228,196,0.5) /*Bisque*/; }
11558             span.ZW { background-color: rgba(220,20,60,0.5) /*crimson*/; }
11559             span.WS { background-color: rgba(128,0,0,0.5) /*maroon*/; }
11560            
11561             span.lFILL[convention]:before { content: attr(convention);
11562             color: white;
11563             font-size: 50%; }
11564            
11565             span.lFILL:not([convention]) { margin: 0ex 0.35ex; }
11566             span.l-NONONO { margin: 0ex 0.06ex; }
11567             span.yyy { padding: 0px !important; }
11568            
11569             div.rtl-hover:hover div:not(:hover) kbd span:not(.no-mirror-rtl):not(.base):not(.base-uc):not(.base-lc) { direction: rtl; }
11570            
11571             div.zero { position: relative;}
11572             div.zero div.over-shift { position: absolute; height: 1.13em; z-order: 999;}
11573             /* div.zero div.over-shift { outline: 3px dotted yellow;} */
11574             .do-alt + div.zero div.over-shift { height: 1.5em; }
11575             div.zero.l div.over-shift { left: 0.04pt; width: 4.24em;}
11576             div.zero.r div.over-shift { left: 21.12em; width: 3.56em;} /* (1.72em - 0.04em) × 10 + 4.24em + 0.08 */
11577             div.zero.tp div.over-shift { top: 7.8em;}
11578             .over-shift-outline div.zero.btm div.over-shift { outline: 3px dotted blue;}
11579             div.zero.btm div.over-shift { bottom: 1.13em;}
11580             .do-alt + div.zero.btm div.over-shift { bottom: 1.5em;}
11581             /* div.zero:hover { outline: 6px dotted yellow;} */
11582            
11583             EOR
11584            
11585             sub apply_filter_div ($$;$) {
11586 0   0 0 0   my($self, $txt, $opt) = (shift, shift, shift || {});
11587 0           $txt =~ s(^(]*\skbd_rebuild="([^""]*?)"[^'">]*>).*?^(
11588 0 0         ( $1 . ($opt->{fake} ? $rebuild_fake : $self->html_keyboard_diagram("$2", $opt)) . $3 )msge;
11589 0           $txt;
11590             }
11591             sub apply_filter_style ($$;$) {
11592 0   0 0 0   my($self, $txt, $opt) = (shift, shift, shift || {});
11593 0           $txt =~ s(^(\s*/\*\s*START\s+auto-generated\s+style\s*\*/).*?(/\*\s*END\s+auto-generated\s+style\s*\*/))
11594 0 0         ( $1 . ($opt->{fake} ? $rebuild_fake : $rebuild_style) . $2 )msge;
11595 0           $txt;
11596             }
11597            
11598             my @HTML_KBD_FIXED = ('
11599            
11600             ',
11601             'Backspace
11602            
11603            
Tab',
11604             '
11605            
11606            
CapsLock',
11607             'Enter
11608            
11609            
Shift',
11610             'Shift
11611            
11612            
CtrlAlt',
11613             'AltGrMenuCtrl
11614            
11615             ');
11616            
11617             sub classes_by_chars ($$$$$$$$$$) {
11618 0     0 0   my ($self, $h_classes, $opt, $layer, $lc0, $uc0, $lc, $uc, $k_base, $k, %cl) =
11619             (shift, shift, shift, shift, shift, shift, shift, shift, shift, shift);
11620 0           for my $L ('', $layer) {
11621 0           for my $c (grep defined, $lc0, $uc0) {
11622 0           $cl{$_}++ for @{ $h_classes->{"$k_base$L"}{$c} }; # k for key-based-on-background char
  0            
11623 0           for my $o (@$opt) {
11624 0           $cl{$_}++ for @{ $h_classes->{"$k_base$L=$o"}{$c} } # k=opt for key-based-on-background char
  0            
11625             }
11626             }
11627 0           for my $c (grep defined, $lc, $uc) {
11628 0           $cl{$_}++ for @{ $h_classes->{"$k$L"}{$c} }; # K for key-based-on-foreground char
  0            
11629 0           for my $o (@$opt) {
11630 0           $cl{$_}++ for @{ $h_classes->{"$k$L=$o"}{$c} } # K=opt for key-based-on-background char
  0            
11631             }
11632             }
11633             }
11634 0           keys %cl;
11635             }
11636            
11637             sub apply_kmap($$$) {
11638 0     0 0   my ($self, $kmap, $c) = (shift, shift, shift);
11639 0 0         return $c unless $kmap;
11640 0 0         $c = $c->[0] if ref $c;
11641 0 0         return $c unless defined ($c = $kmap->{$self->key2hex($c)});
11642 0 0         return chr hex $c unless ref $c;
11643 0           $c = [@$c]; # deep copy
11644 0           $c->[0] = chr hex $c->[0];
11645 0           $c;
11646             }
11647            
11648             sub do_keys ($$$@) { # calculate classes related to the “whole key”, and emit the “content” of the key
11649 0     0 0   my ($self, $opt, $base, $out, $lc0, $uc0, %c_classes) = (shift, shift, 1, '');
11650 0           for my $in (@_) {
11651 0           my ($lc, $uc, $f, $kmap, $layerN, $h_classes, $name, @classes) = @$in;
11652 0   0       $kmap and $_ = $self->apply_kmap($kmap, $_) for ($lc, $uc);
11653 0   0       ref and $_ = $_->[0] for $lc, $uc;
11654 0 0         ($lc0, $uc0) = ($lc, $uc), $base = 0 if $base;
11655             # k/K for key-based-on-(background/foreground) char; k=opt/K=opt likewise
11656 0           $c_classes{$_}++ for $self->classes_by_chars($h_classes, $opt, $layerN, $lc0, $uc0, $lc, $uc, 'k', 'K');
11657             }
11658 0           my @extra = sort keys %c_classes;
11659 0 0         my $q = ("@extra" =~ /\s/ ? '"' : '');
11660 0 0         my $cl = @extra ? " class=$q@extra$q" : '';
11661             # push @extra, 'from_se' if $k[0][0] =~ /---/i; # lc, uc, $h_classes, name, classes:
11662 0           join '', $out, "", (map $self->a_pair($opt, $lc0, $uc0, $self->apply_kmap($_->[3], $_->[0]),
11663             $self->apply_kmap($_->[3], $_->[1]),
11664             $_->[2], $_->[4], $_->[5], $_->[6], [@$_[7..$#$_]]), @_), ''
11665             }
11666            
11667 0     0 0   sub h($) { (my $c = shift) =~ s/([&<>])/$html_esc{$1}/g; $c }
  0            
11668             sub tags_by_rx {
11669 0     0 0   my ($c, @o) = shift;
11670 0 0         die "Need odd number of arguments" if @_ & 1;
11671 0           while (@_) {
11672 0           my $tag = shift;
11673 0 0         push @o, $tag if $c =~ shift;
11674             }
11675 0           return @o;
11676             }
11677            
11678             sub a_pair ($$$$$$$$$$;@) {
11679 0   0 0 0   my($self, $opts, $lc0, $uc0, $LC, $UC, $F, $layerN, $h_classes, $name, $extra) =
11680             (shift, shift, shift, shift, shift, shift, shift, shift, shift, shift, shift || []);
11681             # warn "See lc prefix $LC->[0] " if ref $LC and $LC->[2];
11682 0 0 0       my ($lc1, $uc1) = map {(defined and ref()) ? $_->[0] : $_} $LC, $UC;
  0            
11683            
11684 0           $extra = [@$extra];
11685 0           my $e = @$extra;
11686            
11687 0 0         my ($lc, $uc) = map {defined() ? $_ : '♪'} $lc1, $uc1;
  0            
11688             # return join '', map {defined() ? $_ : ''} $lc, $uc;
11689            
11690 0           my $opt = { map {($_, 1)} @$opts };
  0            
11691 0   0       my $base = (($name || '') eq 'base');
11692 0   0       my $prefix2 = (ref($LC) and ref($UC) and $LC->[2] and $UC->[2] && $uc eq $lc);
11693 0 0 0       if ($prefix2 or ($uc eq ucfirst $lc and $lc eq lc $uc and $lc ne 'ß' and defined($lc1) == defined($uc1))) {
      0        
      0        
      0        
11694 0 0         if ($uc ne $lc) {
11695 0   0       ref and $_->[2] and die "Do not expect a character `$_->[0]' to be a deadkey..." for $LC, $UC;
      0        
11696             }
11697 0 0 0       my @pref_i = map { ref $_ and (3 == ($_->[2] || 0) or (3 << 3) == ($_->[2] || 0)) } $LC, $UC;
  0   0        
      0        
11698 0 0 0       $prefix2 and $pref_i[1] and not $pref_i[0] and unshift @$extra, 'prefix2';
      0        
11699 0 0 0       $LC and ref $LC and $LC->[2] and unshift @$extra, 'prefix';
      0        
11700 0           push @$extra, $self->classes_by_chars($h_classes, $opts, $layerN, $lc0, undef, $lc1, undef, 'c', 'C');
11701             # unshift @$extra, tags_by_rx $lc, 'need-learn' => ($opt->{cyr} ? qr/N-A/i : qr/[ϝϙϲͻϿϾͲ℧ϗ]N-A/i);
11702             # push @$extra, 'vbell' unless defined $lc1;
11703 0 0 0       push @$extra, (1 < length uc $lc1 ? 'three-cases-long' : 'three-cases')
    0          
11704             if defined $lc1 and uc $lc1 ne ucfirst $lc1;
11705 0 0         push @$extra, $name if $name;
11706 0 0         my $q = ("@$extra" =~ /\s/ ? '"' : '');
11707 0           @$extra = sort @$extra;
11708 0 0         my $cl = @$extra ? " class=$q@$extra$q" : '';
11709 0 0         $base ? "" . h($uc) . "" : $self->char_2_html_span(undef, $UC, $uc, $F, {}, @$extra)
11710             # "" . $out . "";
11711             } else {
11712 0           my (@e_lc, @e_uc);
11713 0           my @do = ([$lc, [], 'lc', $LC, $lc0, $lc1], [$uc, [], 'uc', $UC, $uc0, $uc1]);
11714             # warn "See lc prefix $LC->[0] " if ref $LC and $LC->[2];
11715 0   0       $_->[3] and ref $_->[3] and $_->[3][2] and push @{$_->[1]}, 'prefix' for @do;
  0   0        
      0        
11716 0   0       $_->[3] and ref $_->[3] and (3 == ($_->[3][2] || 0) or (3 << 3) == ($_->[3][2] || 0)) and push @{$_->[1]}, 'prefix2' for @do;
  0   0        
      0        
      0        
11717 0           push @{$_->[1]}, $self->classes_by_chars($h_classes, $opts, $layerN, $_->[4], undef, $_->[5], undef, 'c', 'C'),
11718             tags_by_rx $_->[0], 'not-surr' => qr/[„‚“‘”’«‹»›‐–—―‒‑‵‶‷′″‴⁗〃´]/i # white
11719 0           for @do;
11720 0           push @{$_->[1]}, 'vbell' for grep !defined $_->[5], @do;
  0            
11721             join '', map {
11722 0 0         push @{$_->[1]}, ($name ? "$name-$_->[2]" : $_->[2]);
  0            
  0            
11723 0           my $ee = [sort @$extra, @{$_->[1]}];
  0            
11724 0 0         my $q = ("@$ee" =~ /\s/) ? '"' : '';
11725 0 0         my $o = ($base ? "" . h($_->[0]) . ""
11726             : $self->char_2_html_span(undef, $_->[3], $_->[0], $F, {}, @$ee));
11727             # "[2]$q>$o";
11728             } @do;
11729             }
11730             }
11731            
11732             my $kbdrow = 0;
11733             sub keys2html_diagram ($$$$@) {
11734 0     0 0   my ($self, $opts, $cnt, $new_row) = (shift, shift, shift, shift);
11735 0 0         my %opts = map { /^\w+=/ ? split /=/, $_, 2 : ($_, 1)} @$opts;
  0            
11736 0   0       my $off = (($opts{oneRow} && $kbdrow++) || 0) % 3;
11737 0           $off = "\xA0" x (2*$off);
11738 0 0         my @fixed = ($opts{oneRow} ? ("$off") : @HTML_KBD_FIXED);
11739 0           my $out = shift @fixed;
11740             # $cnt = $#{$layers_info->[0]} if $cnt > $#{$layers_info->[0]};
11741 0           my @keys = (0..($cnt-1));
11742 0   0       my $start = ($opts{startKey} || 0) % $cnt;
11743 0   0       my $CNT = $opts{cntKeys} || $cnt;
11744 0           @keys = (@keys) x ( 1 + int( ($start+$CNT-1)/$cnt ) );
11745 0           @keys = @keys[$start .. ($start + $CNT - 1)];
11746             KEY:
11747 0           for my $kn (@keys) { # Ordinal of keyboard's key
11748 0 0 0       $out .= (shift(@fixed) || '') if $new_row->{$kn};
11749 0           my ($symb, @keys, $last) = 0;
11750 0           for my $KK (@_) { # Layers
11751 0           my($layer, @rest) = @$KK; # rest = face, kmap, layerN, class_hash, name, classes
11752 0           push @keys, [@{$layer->[$kn]}[0,1], @rest];
  0            
11753             }
11754 0           $out .= $self->do_keys($opts, @keys);
11755             }
11756 0           $out .= join '', @fixed;
11757 0 0         $out .= "" if $opts{oneRow};
11758 0           $out
11759             }
11760            
11761             sub html_keyboard_diagram ($$$) {
11762 0     0 0   my($self, $OPT, $global_opt, @opt, @layers, $face0, $is_layer) = (shift, shift, shift);
11763 0           my %tr = qw(l 0 c 1 h 2);
11764 0           for my $arg (split /\s+/, $OPT) {
11765 0 0         push(@opt, $arg), next if $arg =~ s(^/opt=)(); # BELOW: `base' becomes NAME, `on-right' becomes CLASSES
11766 0 0         die "unrecognized `rebuild' option: `$arg'" # +=l,0,0 +base=l,0,0 +=l,0,1 +=l,ƒ,0 on-right+=c,0,1
11767             unless my($classes, $name, $f, $prefix, $which) = ( $arg =~ m{^((?:[-\w]+(?:,[-\w]+)*)?)\+([-\w]*)=(\w+),([\da-f]{4}|[^\x20-\x7e][^,]*|[02]?),(\d+|-)$}i );
11768 0 0         $f = $self->{face_shortcuts}{$f} if exists $self->{face_shortcuts}{$f};
11769 0 0 0       $face0 ||= $f unless $which eq '-';
11770 0           $prefix =~ s/◌(?=\p{NonspacingMark})//g;
11771 0           $prefix = $self->charhex2key($prefix);
11772 0   0       my $L = ($which eq '-' and $which = 0, [$f]);
11773 0 0 0       warn "unknown layer $L->[0]" if $L and not $self->{layers}{$L->[$which]};
11774             die "html_keyboard_diagram(): unknown face `$f'"
11775 0 0 0       unless $L ||= ($self->{faces}{$f}{layers} or $self->export_layers($f, $f));
      0        
11776 0 0 0       my $kmap = $self->{faces}{$f}{'[deadkeyFaceHexMap]'}{$self->key2hex($prefix)}
11777             or not length $prefix or die "output_html_keyboard_diagram(): Unknown prefix key `$prefix' for face $f";
11778             # create_composite_layers() translates 0000 key to ''
11779             # warn "I see HTML_classes for face=$f, prefix=`$prefix'" if $self->{faces}{$f}{'[HTML_classes]'}{length $prefix ? $self->key2hex($prefix) : ''};
11780 0   0       my $h_classes = $self->{faces}{$f}{'[HTML_classes]'}{length $prefix ? $self->key2hex($prefix) : ''} || {};
11781 0           push(@layers, [$self->{layers}{$L->[$which]}, $f, $kmap, $which, $h_classes, $name, split /,/, $classes]);
11782             }
11783 0 0         die "there must be exactly one /opt= argument in <<$OPT>>" unless @opt == 1;
11784 0           my $opt = [split /,/, $opt[0], -1];
11785 0 0         my ($cnt, @g, %new_row) = (0, @{ $self->{faces}{$face0}{'[geometry]'} || [] }); # keep only 1 from the last row
  0            
11786 0 0         @g or die "Face `$face0' has no associated layer with geometry info; did you set geometry_via_layer?";
11787 0           pop @g;
11788 0           $new_row{ $cnt += $_ }++ for @g;
11789 0           my ($pre, $post) = ('', '');
11790 0 0         ($pre, $post) = ("\n
", "
\nHover mouse here to see how characters look in RTL context.\n")
11791             if grep /^rtl-hover(-Trivia)?$/, @$opt;
11792 0 0         $post .= " Trivia: note mirroring of <{[()]}>." if grep /^rtl-hover-Trivia$/, @$opt;
11793 0           $pre . $self->keys2html_diagram($opt, $cnt+1, \%new_row, @layers) . $post;
11794             }
11795            
11796            
11797             # wget -O - http://cgit.freedesktop.org/xorg/proto/xproto/plain/keysymdef.h | perl -C31 -wlne 'next unless /\bXK_(\w+)\s+0x00([a-fA-F\d]+)/; print chr hex $2, qq(\t$1)' > ! oooo1
11798             # wget -O - http://cgit.freedesktop.org/xorg/proto/xproto/plain/keysymdef.h | perl -C31 -wlne "next unless /\bXK_(\w+)\s+0x([a-fA-F\d]+)\s+\/\*(?:\(?|\s+)U\+([a-fA-F\d]+)/; print chr hex $3, qq(\t$1)" > oooo3
11799             my @enc_dotcompose; # Have many-to-1, inverting hash would lose info; Do not distinguish Left/leftarrow etc.
11800 1     1   70179 { no warnings 'qw';
  1         2  
  1         3675  
11801             @enc_dotcompose = (qw#
11802             ` grave
11803             ' apostrophe
11804             " quotedbl
11805             ~ asciitilde
11806             ! exclam
11807             ? question
11808             @ at
11809             #, qw!
11810             # numbersign
11811             $ dollar
11812             % percent
11813             ^ asciicircum
11814             & ampersand
11815             * asterisk
11816             ( parenleft
11817             ) parenright
11818             [ bracketleft
11819             ] bracketright
11820             { braceleft
11821             } braceright
11822             - minus
11823             + plus
11824             = equal
11825             _ underscore
11826             < less
11827             > greater
11828             \ backslash
11829             / slash
11830             | bar
11831             , comma
11832             . period
11833             : colon
11834             ; semicolon
11835             _bar underbar
11836            
11837            
11838             ¡ exclamdown
11839             ¢ cent
11840             £ sterling
11841             ¤ currency
11842             ¥ yen
11843             ¦ brokenbar
11844             § section
11845             ¨ diaeresis
11846             © copyright
11847             ª ordfeminine
11848             « guillemotleft
11849             ¬ notsign
11850             ­ hyphen
11851             ® registered
11852             ¯ macron
11853             ° degree
11854             ± plusminus
11855             ² twosuperior
11856             ³ threesuperior
11857             ´ acute
11858             µ mu
11859             ¶ paragraph
11860             · periodcentered
11861             ¸ cedilla
11862             ¹ onesuperior
11863             º masculine
11864             » guillemotright
11865             ¼ onequarter
11866             ½ onehalf
11867             ¾ threequarters
11868             ¿ questiondown
11869             À Agrave
11870             Á Aacute
11871             Â Acircumflex
11872             Ã Atilde
11873             Ä Adiaeresis
11874             Å Aring
11875             Æ AE
11876             Ç Ccedilla
11877             È Egrave
11878             É Eacute
11879             Ê Ecircumflex
11880             Ë Ediaeresis
11881             Ì Igrave
11882             Í Iacute
11883             Î Icircumflex
11884             Ï Idiaeresis
11885             Ð ETH
11886             Ð Eth
11887             Ñ Ntilde
11888             Ò Ograve
11889             Ó Oacute
11890             Ô Ocircumflex
11891             Õ Otilde
11892             Ö Odiaeresis
11893             × multiply
11894             Ø Oslash
11895             Ø Ooblique
11896             Ù Ugrave
11897             Ú Uacute
11898             Û Ucircumflex
11899             Ü Udiaeresis
11900             Ý Yacute
11901             Þ THORN
11902             Þ Thorn
11903             ß ssharp
11904             à agrave
11905             á aacute
11906             â acircumflex
11907             ã atilde
11908             ä adiaeresis
11909             å aring
11910             æ ae
11911             ç ccedilla
11912             è egrave
11913             é eacute
11914             ê ecircumflex
11915             ë ediaeresis
11916             ì igrave
11917             í iacute
11918             î icircumflex
11919             ï idiaeresis
11920             ð eth
11921             ñ ntilde
11922             ò ograve
11923             ó oacute
11924             ô ocircumflex
11925             õ otilde
11926             ö odiaeresis
11927             ÷ division
11928             ø oslash
11929             ø ooblique
11930             ù ugrave
11931             ú uacute
11932             û ucircumflex
11933             ü udiaeresis
11934             ý yacute
11935             þ thorn
11936             ÿ ydiaeresis
11937            
11938             Cyr_ђ Serbian_dje
11939             ѓ Macedonia_gje
11940             є Ukrainian_ie
11941             Cyr_ѕ Macedonia_dse
11942             Cyr_і Ukrainian_i
11943             Cyr_ї Ukrainian_yi
11944             Cyr_ћ Serbian_tshe
11945             Cyr_ќ Macedonia_kje
11946             ґ Ukrainian_ghe_with_upturn
11947             Cyr_ў Byelorussian_shortu
11948             № numerosign
11949             Cyr_Ђ Serbian_DJE
11950             Ѓ Macedonia_GJE
11951             Є Ukrainian_IE
11952             Cyr_Ѕ Macedonia_DSE
11953             Cyr_І Ukrainian_I
11954             Cyr_Ї Ukrainian_YI
11955             Cyr_Ћ Serbian_TSHE
11956             Cyr_Ќ Macedonia_KJE
11957             Ґ Ukrainian_GHE_WITH_UPTURN
11958             Cyr_Ў Byelorussian_SHORTU
11959            
11960             ’sq rightsinglequotemark
11961             ‘sq leftsinglequotemark
11962             • enfilledcircbullet
11963             ♀ femalesymbol
11964             ♂ malesymbol
11965             NBSP nobreakspace
11966             … ellipsis
11967             ∩# intersection
11968             ∫ integral
11969             ≤ lessthanequal
11970             ≥ greaterthanequal
11971            
11972             d` dead_grave
11973             d' dead_acute
11974             d^ dead_circumflex
11975             d~ dead_tilde
11976             d¯ dead_macron
11977             dd# dead_breve----
11978             d^. dead_abovedot
11979             d" dead_diaeresis
11980             d^° dead_abovering
11981             d'' dead_doubleacute
11982             d^v dead_caron
11983             d, dead_cedilla
11984             dd# dead_ogonek---
11985             d_ι dead_iota
11986             d_voiced dead_voiced_sound
11987             d_½voiced dead_semivoiced_sound
11988             d. dead_belowdot
11989             dd# dead_hook---
11990             dd# dead_horn---
11991             d/ dead_stroke
11992             d^, dead_abovecomma
11993             dd# dead_abovereversedcomma---
11994             d`` dead_doublegrave
11995             d``# dead_double_grave
11996             d_° dead_belowring
11997             d__ dead_belowmacron
11998             dd# dead_belowcircumflex---
11999             d_~ dead_belowtilde
12000             dd# dead_belowbreve---
12001             d_" dead_belowdiaeresis
12002             d_invbrev dead_invertedbreve
12003             d_inv_brev dead_inverted_breve
12004             d_, dead_belowcomma
12005             dd# dead_currency
12006            
12007             d^( dead_dasia
12008             d^) dead_psili
12009            
12010             Ś Sacute
12011             Š Scaron
12012             Ş Scedilla
12013             Ť Tcaron
12014             Ź Zacute
12015             Ž Zcaron
12016             Ż Zabovedot
12017             ą aogonek
12018             ˛ ogonek
12019             ł lstroke
12020             ľ lcaron
12021             ś sacute
12022             ˇ caron
12023             š scaron
12024             ş scedilla
12025             ť tcaron
12026             ź zacute
12027             ˝ doubleacute
12028             ž zcaron
12029             ż zabovedot
12030             Ŕ Racute
12031             Ă Abreve
12032             Ĺ Lacute
12033             Ć Cacute
12034             Č Ccaron
12035             Ę Eogonek
12036             Ě Ecaron
12037             Ď Dcaron
12038             Đ Dstroke
12039             Ń Nacute
12040             Ň Ncaron
12041             Ő Odoubleacute
12042             Ř Rcaron
12043             Ů Uring
12044             Ű Udoubleacute
12045             Ţ Tcedilla
12046             ŕ racute
12047             ă abreve
12048             ĺ lacute
12049             ć cacute
12050             č ccaron
12051             ę eogonek
12052             ě ecaron
12053             ď dcaron
12054             đ dstroke
12055             ń nacute
12056             ň ncaron
12057             ő odoubleacute
12058             ř rcaron
12059             ů uring
12060             ű udoubleacute
12061             ţ tcedilla
12062             ˙ abovedot
12063            
12064             Ŗ Rcedilla
12065             Ĩ Itilde
12066             Ļ Lcedilla
12067             Ē Emacron
12068             Ģ Gcedilla
12069             Ŧ Tslash
12070             ŗ rcedilla
12071             ĩ itilde
12072             ļ lcedilla
12073             ē emacron
12074             ģ gcedilla
12075             ŧ tslash
12076             Ŋ ENG
12077             ŋ eng
12078             Ā Amacron
12079             Į Iogonek
12080             Ė Eabovedot
12081             Ī Imacron
12082             Ņ Ncedilla
12083             Ō Omacron
12084             Ķ Kcedilla
12085             Ų Uogonek
12086             Ũ Utilde
12087             Ū Umacron
12088             ā amacron
12089             į iogonek
12090             ė eabovedot
12091             ī imacron
12092             ņ ncedilla
12093             ō omacron
12094             ķ kcedilla
12095             ų uogonek
12096             ũ utilde
12097             ū umacron
12098            
12099             Ơ Ohorn
12100             ơ ohorn
12101             Ư Uhorn
12102             ư uhorn
12103            
12104             < leftcaret
12105             > rightcaret
12106             ∨ downcaret
12107             ∧ upcaret
12108             ¯ overbar
12109             ⊤ downtack
12110             ∩ upshoe
12111             ⌊ downstile
12112             _ underbar
12113             ∘ jot
12114             ⎕ quad
12115             ⊥ uptack
12116             ○ circle
12117             ⌈ upstile
12118             ∪ downshoe
12119             ⊃ rightshoe
12120             ⊂ leftshoe
12121             ⊣ lefttack
12122             ⊢ righttack
12123            
12124             ≤ lessthanequal
12125             ≠ notequal
12126             ≥ greaterthanequal
12127             ∫ integral
12128             ∴ therefore
12129             ∝ variation
12130             ∞ infinity
12131             ∇ nabla
12132             ∼ approximate
12133             ≃ similarequal
12134             ⇔ ifonlyif
12135             ⇒ implies
12136             ≡ identical
12137             √ radical
12138             ⊂ includedin
12139             ⊃ includes
12140             ∩ intersection
12141             ∪ union
12142             ∧ logicaland
12143             ∨ logicalor
12144             ∂ partialderivative
12145             ƒ function
12146             ← leftarrow
12147             ↑ uparrow
12148             → rightarrow
12149             ↓ downarrow
12150             ◆ soliddiamond
12151             ▒ checkerboard
12152            
12153             CP Multi_key
12154            
12155             +# KP_Add
12156             -# KP_Subtract
12157             *# KP_Multiply
12158             /# KP_Divide
12159             .# KP_Decimal
12160             =# KP_Equal
12161             SPC# KP_Space
12162            
12163             ← Left → Right ↑ Up ↓ Down
12164             !, map {("$_#", "KP_$_")} 0..9);
12165             }
12166            
12167             my %dec_dotcompose = reverse @enc_dotcompose;
12168             # perl -C31 -wne "/^(.)\tCyrillic_(\w+)/ and print qq($2 $1 )" oooo3 >oooo-cyr
12169             # perl -C31 -wne "/^(.)\thebrew_(\w+)/ and print qq($2 $1 )" oooo3 >oooo-heb
12170             my %cyr = qw( GHE_bar Ғ ghe_bar ғ ZHE_descender Җ zhe_descender җ KA_descender Қ ka_descender қ KA_vertstroke Ҝ ka_vertstroke ҝ
12171             EN_descender Ң en_descender ң U_straight Ү u_straight ү U_straight_bar Ұ u_straight_bar ұ HA_descender Ҳ
12172             ha_descender ҳ CHE_descender Ҷ che_descender ҷ CHE_vertstroke Ҹ che_vertstroke ҹ SHHA Һ shha һ SCHWA Ә schwa ә
12173             I_macron Ӣ i_macron ӣ O_bar Ө o_bar ө U_macron Ӯ u_macron ӯ io ё je ј lje љ nje њ dzhe џ IO Ё JE Ј LJE Љ NJE Њ
12174             DZHE Џ yu ю a а be б tse ц de д ie е ef ф ghe г ha х i и shorti й ka к el л em м en н o о pe п ya я er р es с te т
12175             u у zhe ж ve в softsign ь yeru ы ze з sha ш e э shcha щ che ч hardsign ъ YU Ю A А BE Б TSE Ц DE Д IE Е EF Ф GHE Г
12176             HA Х I И SHORTI Й KA К EL Л EM М EN Н O О PE П YA Я ER Р ES С TE Т U У ZHE Ж VE В SOFTSIGN Ь YERU Ы ZE З SHA Ш E Э
12177             SHCHA Щ CHE Ч HARDSIGN Ъ );
12178             my %heb = qw( doublelowline ‗ aleph א bet ב gimel ג dalet ד he ה waw ו zain ז chet ח tet ט yod י finalkaph ך kaph כ lamed ל
12179             finalmem ם mem מ finalnun ן nun נ samech ס ayin ע finalpe ף pe פ finalzade ץ zade צ qoph ק resh ר shin ש taw ת
12180             beth ב gimmel ג daleth ד samekh ס zayin ז het ח teth ט zadi צ kuf ק taf ת );
12181             my %grk = qw( ALPHAaccent Ά EPSILONaccent Έ ETAaccent Ή IOTAaccent Ί IOTAdieresis Ϊ OMICRONaccent Ό UPSILONaccent Ύ
12182             UPSILONdieresis Ϋ OMEGAaccent Ώ accentdieresis ΅ horizbar ― alphaaccent ά epsilonaccent έ etaaccent ή iotaaccent ί
12183             iotadieresis ϊ iotaaccentdieresis ΐ omicronaccent ό upsilonaccent ύ upsilondieresis ϋ upsilonaccentdieresis ΰ
12184             omegaaccent ώ ALPHA Α BETA Β GAMMA Γ DELTA Δ EPSILON Ε ZETA Ζ ETA Η THETA Θ IOTA Ι KAPPA Κ LAMDA Λ LAMBDA Λ MU Μ
12185             NU Ν XI Ξ OMICRON Ο PI Π RHO Ρ SIGMA Σ TAU Τ UPSILON Υ PHI Φ CHI Χ PSI Ψ OMEGA Ω alpha α beta β gamma γ delta δ
12186             epsilon ε zeta ζ eta η theta θ iota ι kappa κ lamda λ lambda λ mu μ nu ν xi ξ omicron ο pi π rho ρ sigma σ
12187             finalsmallsigma ς tau τ upsilon υ phi φ chi χ psi ψ omega ω );
12188             $dec_dotcompose{"Cyrillic_$_"} = "Cyr_$cyr{$_}" for keys %cyr;
12189             $dec_dotcompose{"hebrew_$_"} = "heb_$heb{$_}" for keys %heb;
12190             $dec_dotcompose{"Greek_$_"} = "Gr_$grk{$_}" for keys %grk;
12191            
12192             sub shorten_dotcompose ($$;$) { # Shorten but leave readable disambiguous (to allow more concise printout)
12193 0     0 0   shift; # self
12194 0           (my $in = shift) =~ s/\b(Cyr|Ukr|Gr|heb|Ar)[a-z]+(?=_)/$1/;
12195 0           $in =~ s/\b(dead)(?=_)/d/;
12196 0           $in =~ s/\b(Gr_\w+dier|d_diaer)esis/$1/;
12197 0 0         $in =~ s/^U([a-fA-F\d]{4,6})$/ 'uni_' . chr hex $1 /e if shift;
  0            
12198 0           $in
12199             }
12200            
12201             sub dec_dotcompose ($$;$) {
12202 0     0 0   my($self, $in, $dec_U) = (shift, shift, shift);
12203 0           my($pre, $post) = split /:/, $in, 2;
12204 0 0         $post or warn("Can't parse <<$in>>"), return;
12205 0 0         my @pre = ($pre =~ /<(\w+)>/g) or warn("Unknown format of IN in <<$in>>"), return;
12206 0 0         my($p) = ($post =~ /"(.+?)"/) or warn("Unknown format of OUT in <<$in>>"), return;
12207 0 0         @pre = map { exists $dec_dotcompose{$_} ? $dec_dotcompose{$_} : $self->shorten_dotcompose($_, $dec_U)} @pre;
  0            
12208 0           (@pre, $p)
12209             }
12210            
12211             # Stats: about 250 in: egrep "CP.*d_|d_.*CP" o-std
12212             sub process_dotcompose ($$$;$) {
12213 0     0 0   my($self, $fh, $sub, $dec_U) = (shift, shift, shift, shift);
12214 0           while (<$fh>) {
12215 0 0         next if /^\s*(#|include\b)/;
12216 0 0         next unless /\S/;
12217 0 0         next unless my @in = $self->dec_dotcompose($_, $dec_U);
12218 0           $sub->($self, $in[-1], @in[0..$#in-1]);
12219             }
12220             }
12221            
12222             sub filter_dotcompose ($;$) {
12223 0   0 0 0   my ($self, $fh) = (shift, shift || \*ARGV);
12224             $self->process_dotcompose($fh, sub ($$@) {
12225 0     0     my($self, $out) = (shift, shift);
12226 0           print "@_ $out\n"; # Two spaces to allow for combining marks
12227 0           });
12228             }
12229            
12230             sub put_val_deep ($$$$@) {
12231 0     0 0   my($self, $h, $term, $val, $k) = (shift, shift, shift, shift, shift);
12232 0 0         die "No key(s) in put_val_deep()" unless @_;
12233 0           while (@_) {
12234 0 0         $h->{$k} = {} unless defined $h->{$k};
12235 0           my $oh = $h;
12236 0           $h = $h->{$k};
12237 0 0         unless ('HASH' eq ref $h) {
12238 0 0         die "Encountered non-HASH in put_val_deep(): <$k>" unless $term;
12239 0           my $ov = $h;
12240 0           $h = $oh->{$k} = { $term => $ov };
12241             }
12242 0           $k = shift;
12243             }
12244 0           $h->{$k} = $val;
12245             }
12246            
12247             sub compose_array_2_hash ($$$) {
12248 0     0 0   my($self, $a, $h) = (@_);
12249 0           for my $l (@$a) {
12250 0           my($out, $term, @in) = @$l;
12251 0           $self->put_val_deep( $h, $term, $self->key2hex($out), map $self->key2hex($_), @in);
12252             }
12253             }
12254            
12255             sub compose_line_2_array ($$$$$@) {
12256 0     0 0   my($self, $a, $out, $massage, $term, @in) = (@_);
12257 0 0         if ($massage) {
12258 0           s/^(uni|Gr|Cyr|heb)_(?![\x00-\x7e])(?=.$)//, s/^space$/ / for @in; # copy
12259             #warn "compose: @in $out";
12260 0 0         return unless $in[0] eq 'CP';
12261 0           shift @in;
12262             }
12263             # Filter warnings via: egrep -v " d[^ ]|#" 00b | egrep -- "^---CP:" >00b2
12264             (printSkippedComposeKey and warn("---CP: @in $out")), # The last make sense only in the context of keysymbol operations???
12265             return if 1 != length $out or 0x10000 <= ord $out
12266 0 0 0       or grep {1 != length or 0x10000 <= ord} @in or grep $out eq $_, @in; # Allow for one char only
  0 0 0        
      0        
12267             #warn "CP: @in $out";
12268 0           push @$a, [$out, $term, @in];
12269             }
12270            
12271             sub compose_2_array ($$$$@) {
12272 0     0 0   my($self, $method, $fh, $a) = (shift, shift, shift, shift);
12273            
12274 0 0         if ($method eq 'dotcompose') {
    0          
    0          
12275             $self->process_dotcompose($fh, sub ($$@) {
12276 0     0     my($self, $out) = (shift, shift);
12277 0           $self->compose_line_2_array($a, $out, 'massage', !!'terminate', @_);
12278 0           }, 'decode U');
12279             } elsif ($method eq 'entity') {
12280 0           while (my $line = <$fh>) {
12281 0 0         next unless $line =~ /^\s*
12282 0           my($out, @in) = (chr hex "$1", split /\s*,\s*/, "$2");
12283 0           $in[0] =~ s/\s+$//;
12284 0           @in = split /\s*,\s*/, $in[0];
12285 0           @in = sort {length($a) <=> length($b)} @in;
  0            
12286 0           for my $in (@in) { # Avoid entries more than 2x longer than the shortest possible
12287 0 0 0       next if length($in) > $avoid_overlong_synonims_Entity*length $in[0] or length($in) > $maxEntityLen;
12288 0           my @IN = split //, $in;
12289 0           $self->compose_line_2_array($a, $out, !'massage', $self->key2hex(' '), @IN);
12290             }
12291             }
12292             } elsif ($method eq 'rfc1345') { # http://tools.ietf.org/html/rfc1345
12293 0           my %cvt = qw(gt > lt < amp &);
12294 0           while (my $line = <$fh>) {
12295 0 0         next unless ($line =~ /^\s+SP\s+0020\s+SPACE\s*$/) .. ($line =~ /^
12296 0 0         next unless $line =~ /^\s+(\S+)\s+([a-fA-F\d]{4})\s/;
12297 0           my($out, $in) = (chr hex "$2", "$1");
12298 0 0         next if "$2" =~ /^e0/i; # Skip private parts
12299 0           $in =~ s/&([lg]t|amp);/$cvt{$1}/g;
12300 0 0         next if 1 == length $in;
12301 0           my @IN = split //, $in;
12302 0           $self->compose_line_2_array($a, $out, !'massage', $self->key2hex(' '), @IN);
12303             }
12304 0           $self->compose_line_2_array($a, '€', !'massage', $self->key2hex(' '), 'E', 'u'); # http://en.wikipedia.org/wiki/Unicode_input#Character_mnemonics
12305             } else {
12306 0           die "Unknown compose parser: $method";
12307             }
12308             }
12309            
12310             sub composefile_2_array ($$$$@) {
12311 0     0 0   my($self, $method, $fn, $a) = (shift, shift, shift, shift);
12312 0 0         open my $fh, '< :encoding(utf8)', $fn or die "Can't open `$fn' for read: $!";
12313 0           $self->compose_2_array($method, $fh, $a);
12314 0 0         close $fh or die "Can't close `$fn' for read: $!";
12315             }
12316            
12317             sub merge_hash_to ($$$) { # We do NOT do deep copy
12318 0     0 0   my($self, $from, $to) = (shift, shift, shift);
12319 0           for my $k (keys %$from) { # ignore if the existing value is not hash
12320 0 0 0       next if 'HASH' ne ref($to->{$k} || {}); # existing non-hash (terminator) wins over a terminator or a longer binding
12321 0 0         $to->{$k} = $from->{$k}, next unless exists $to->{$k}; # existing hash wins over new terminator.
12322 0           $self->merge_hash_to($from->{$k}, $to->{$k});
12323             }
12324             }
12325            
12326             sub create_composeArray ($$$) {
12327 0     0 0   my ($self, $key, $method) = (shift, shift, shift);
12328 0 0         my $names = $self->get__value($key) or return;
12329 0           my @A;
12330 0           for my $fn (@$names) {
12331 0           $self->composefile_2_array($method, $fn, my $a = []);
12332 0           push @A, $a;
12333             # $self->compose_array_2_hash($a, my $h = {});
12334             # $self->merge_hash_to($h, $H);
12335             # warn "CP< ", join ', ', keys %$h;
12336             }
12337             # warn "CP= ", join ', ', keys %$H;
12338 0           \@A;
12339             }
12340            
12341             sub compose_Array_2_hash ($$) {
12342 0     0 0   my ($self, $A) = (shift, shift);
12343 0           my $H = {}; # indexed by HEX
12344 0           for my $a (@$A) {
12345 0           $self->compose_array_2_hash($a, my $h = {});
12346 0           $self->merge_hash_to($h, $H);
12347             # warn "CP< ", join ', ', keys %$h;
12348             }
12349             # warn "CP= ", join ', ', keys %$H;
12350 0           $H;
12351             }
12352            
12353             sub composehash_2_prefix ($$$$$$$$) {
12354 0     0 0   my($self, $F, $prefix, $h, $n, $prefixCompose, $show, $comp_show) = (shift, shift, shift, shift, shift, shift, shift, shift);
12355 0   0       my($H, $added) = ($self->{faces}{$F}, $h->{'[Added]'} || {});
12356 0 0         my(%orig, %map, %seen) = map { ( $_, exists($added->{$_}) ? $added->{$_} : $_ ) } keys %$h;
  0            
12357 0 0 0       for my $c (sort {($added->{$a} || '') cmp ($added->{$b} || '') or $a cmp $b} keys %$h) { # order affects the order of auto-prefixes
  0   0        
12358 0 0         next if $c =~ /^\[(G?Prefix(_Show)?|Added)\]$/;
12359 0           my $v = $h->{$c};
12360 0 0 0       if (ref $v and $seen{"$v"}) {
    0          
12361 0           $v = $seen{"$v"};
12362             } elsif (ref $v) {
12363 0   0       my $p = $v->{'[Prefix]'} || $self->key2hex($self->next_auto_dead($H));
12364 0           my $cc = $c; # Name should not reflect linking
12365             # warn(" [@$n] $cc => $added->{$c}"),
12366 0 0         $cc = $added->{$c} if exists $added->{$c};
12367 0           my $name_append = my $name_show = chr hex $cc;
12368 0 0         $name_append = 'Compose' if $name_append eq $self->charhex2key($prefixCompose);
12369 0 0         $name_show = '⎄' if $name_show eq $self->charhex2key($prefixCompose);
12370 0 0         $name_append = $self->key2hex($name_append) if $name_append =~ /\s/;
12371             # $name_show = $self->key2hex($name_show) if $name_show =~ /\s/ and $name_show ne ' ';
12372 0           my $c;
12373 0           ($name_show = "$show$name_show")
12374 0           =~ s[^((⎄[₁₂₃₄₅₆₇₈₉]?|\Q$comp_show\E){2,})][ $2 . (($c = length($1)/length($2)) =~ tr/0-9/⁰¹²³⁴⁵⁶⁷⁸⁹/, $c) ]e;
12375 0 0         $name_show = $v->{'[Prefix_Show]'} if defined $v->{'[Prefix_Show]'};
12376 0           $self->composehash_2_prefix($F, $p, $v, my $nn = [@$n, $name_append], $prefixCompose, $name_show, $comp_show);
12377 0           $self->{faces}{$F}{'[prefixDocs]'}{$p} = "@$nn";
12378 0           $self->{faces}{$F}{'[Show]'}{$p} = $name_show;
12379 0           $v = $seen{"$v"} = [$p, undef, 1];
12380             } else {
12381 0           $H->{'[inCompose]'}{$self->charhex2key($v)}++;
12382 0           $v = [$v];
12383             }
12384 0           $map{$c} = $v;
12385             }
12386 0           $H->{'[deadkeyFaceHexMap]'}{$prefix} = \%map;
12387             }
12388            
12389             sub composehash_add_linked ($$$$) {
12390 0     0 0   my($self, $hexH, $charH, $prefCharH, $delay, %add) = (shift, shift, shift, shift, {});
12391 0           for my $h (keys %$hexH) {
12392 0 0         $self->composehash_add_linked($hexH->{$h}, $charH, $prefCharH) if ref $hexH->{$h};
12393 0 0         next unless defined (my $to = $charH->{my $c = chr hex $h});
12394 0 0         $to = $to->[0] if ref $to;
12395 0           my $toC = $self->charhex2key($to);
12396 0           my $back = $prefCharH->{$toC};
12397 0 0         $back = $back->[0] if ref $back;
12398 0           my $now = $h eq $self->key2hex($back);
12399 0 0         next if exists $hexH->{$to = $self->key2hex($to)};
12400             # warn " ... link $c to $toC (now=$now, back = $prefCharH->{$toC}) @{$prefCharH->{$toC}||[]})";
12401             # warn " ... link $c to $toC (now=$now, back = $back)";
12402 0           $add{$to} = $h;
12403 0 0         ($now ? $hexH : $delay)->{$to} = $hexH->{$h};
12404             }
12405 0 0         $hexH->{'[Added]'} = \%add if %add;
12406             # warn " ... almost done";
12407 0 0         %$hexH = (%$delay, %$hexH) if keys %$delay;
12408             }
12409            
12410             sub create_composekey ($$$) {
12411 0     0 0   my($self, $F, $prefix, @PREFIX) = (shift, shift, shift);
12412 0           my $linkedF = $self->{faces}{$F}{LinkFace};
12413 0   0       my $linked = $linkedF && $self->{faces}{$linkedF}{Face_link_map}{$F};
12414 0 0 0       $linked &&= {map {ref($_ || 0) ? $_->[0] : $_} %$linked};
  0   0        
12415 0   0       my $rlinked = $linked && $self->{faces}{$F}{Face_link_map}{$linkedF};
12416             # $linked ||= {};
12417             # warn " Compose: $F: F linked to $linked->{F}" if $linked and $linked->{F};
12418             # $F eq 'Latin' and
12419             # warn " Compose: $F: ", join ', ', sort keys %{$self->{faces}{$linkedF}{Face_link_map}{$F}}
12420             # if $self->{faces}{$linkedF}{Face_link_map}{$F};
12421 0 0 0       if ($prefix and ref $prefix) {
12422 0           @PREFIX = map { my @a = split /,/;
  0            
12423 0   0       defined $a[$_] and length $a[$_] and $a[$_] = $self->key2hex($self->charhex2key($a[$_])) for 3,4;
      0        
12424 0           [@a]} @$prefix;
12425             } else {
12426 0           $prefix = $self->key2hex($self->charhex2key($prefix));
12427 0           @PREFIX = ( ['ComposeFiles', 'dotcompose', 'warn', $prefix, ''],
12428             ['EntityFiles', 'entity', 'warn', '', $prefix],
12429             ['rfc1345Files', 'rfc1345', 'warn', '', $prefix]);
12430             }
12431 0           my $p0 = my $first_prefix = $PREFIX[0][3]; # use for first found map
12432 0           my @Hashes;
12433 0 0         my @Arrays = @{ $self->{'[ComposeArrays]'} || [] };
  0            
12434 0 0         unless (@Arrays) { # Shared between faces
12435 0           my @Show;
12436 0           for my $i (0..$#PREFIX) { # FileList, type, OK_to_miss, prefix, prefix-in-last ... prefix-in-pre-last ...
12437 0           my $pref = $PREFIX[$i];
12438 0           my $arr;
12439 0 0 0       unless ($arr = $self->create_composeArray($pref->[0], $pref->[1]) and @$arr) {
12440 0 0         warn "Compose list of type $pref->[1] could not be created from FileList variable $pref->[0]" if $pref->[2];
12441 0           next;
12442             }
12443 0           push @Arrays, [$arr, $pref];
12444 0           push @Show, $i;
12445             }
12446 0           $self->{'[ComposeArrays]'} = \@Arrays;
12447 0           $self->{'[ComposeShowIdx]'} = \@Show;
12448             }
12449 0           my($v, $vv) = map $self->{faces}{$F}{$_}, qw( [coverage00hash] [coverageExtra] );
12450             # warn "Filter hashes $F ", scalar keys %$v, ' ', scalar keys %$vv, ' ', scalar @{$self->{faces}{$F}{'[coverage00]'}};
12451 0           for my $A (@Arrays) { # one per type
12452 0           my($arr, $pref) = @$A;
12453 0           my @NN;
12454 0           for my $a (@$arr) { # $a one per input file
12455 0           my @N;
12456 0           for my $l (@$a) {
12457 0           my($out, $term, @in) = @$l;
12458 0 0         next unless grep {$v->{$_} or $vv->{$_}} @in;
  0 0          
12459 0           push @N, $l;
12460             }
12461 0           push @NN, \@N;
12462             }
12463             # warn "Compose face=$F: keys <@$arr> @$pref";
12464             # warn "Compose face=$F: keys ", join ' ', map scalar @$_, @$arr;
12465 0           push @Hashes, [$self->compose_Array_2_hash(\@NN), $pref];
12466             }
12467 0           my @hashes;
12468 0           my $Comp_show = $self->{faces}{$F}{'[ComposeKey_Show]'};
12469 0           my $IDX = $self->{'[ComposeShowIdx]'};
12470 0           for my $i (0..$#Hashes) { # Now process separately for every face --- NOT YET
12471 0           my $H = $Hashes[$i];
12472 0           my($chained, $hash, $pref) = ('G', @$H); # Global
12473 0           $hash = $self->deep_copy($hash);
12474 0 0         $self->composehash_add_linked($hash, $linked, $rlinked) if $linked;
12475 0           my $pref0 = $pref->[3];
12476 0           my $prefix_repeat;
12477 0 0 0       if (@hashes and defined $pref->[4] and length $pref->[4]) {
    0 0        
12478 0           die "Chain-ComposeKey $pref->[4] already bound in the previous ComposeHash, keys = ", join ', ', keys %{$hashes[-1]{$pref->[4]}}
12479 0 0         if $hashes[-1]{$pref->[4]};
12480 0           $hashes[-1]{$pref->[4]} = $hash; # Bind to double/etc press
12481 0           $chained = '';
12482             } elsif ($first_prefix) { # The previous type could be not found; use the first defined accessor
12483 0           $pref0 = $first_prefix;
12484 0           undef $first_prefix;
12485             } else {
12486 0           warn "Hanging ComposeHash (no access prefix key) for ", join('///', @$pref);
12487             }
12488 0           push @hashes, $hash;
12489 0 0         $hash->{"[${chained}Prefix]"} = $pref0 if length $pref0;
12490 0 0 0       $hash->{"[Prefix_Show]"} = $Comp_show->[$IDX->[$i]] if ref $Comp_show and length $Comp_show->[$IDX->[$i]];
12491             }
12492 0 0         return unless @hashes;
12493 0           my @idx = split //, '₁₂₃₄₅₆₇₈₉';
12494 0           my $c = 0;
12495 0           for my $i ( 0..$#hashes ) {
12496 0           my $h = $hashes[$i];
12497 0           my $I = $IDX->[$i];
12498 0 0         next unless my $p = $h->{'[GPrefix]'}; # Not chained (chained are processed as subhashes by composehash_2_prefix()
12499 0 0         my $post = ($c ? "[$c]" : '');
12500 0           my $comp_show = $h->{'[Prefix_Show]'};
12501 0 0         unless (defined $comp_show) {
12502 0           my $c1;
12503 0 0         my $spost = ($c ? (($c1 = $c) =~ tr/0-9/₀₁₂₃₄₅₆₇₈₉/, $c1) : '');
12504 0 0         if (ref $Comp_show) { # Elt0 has a sane default
12505 0           $comp_show = "$Comp_show->[0]$spost";
12506             } else {
12507 0           $comp_show = "$Comp_show$spost";
12508             }
12509             }
12510 0           $self->{faces}{$F}{'[Show]'}{$p} = $comp_show;
12511             # push @Show, (ref $comp_show ? $comp_show->[$i] : $comp_show);
12512 0           $self->composehash_2_prefix($F, $p, $h, ["Compose$post"], $p0, $comp_show, $comp_show);
12513 0           $self->{faces}{$F}{'[prefixDocs]'}{$p} = "Compose$post key";
12514 0           ++$c;
12515             }
12516             }
12517            
12518             my(@AppleSym, %AppleSym);
12519             sub _AppleMap () { # http://forums.macrumors.com/archive/index.php/t-780577.html
12520             # https://github.com/tekezo/Karabiner/blob/version_10.7.0/src/bridge/generator/keycode/data/KeyCode.data
12521             # It has a defineition of 0x34; moreover, it also defines some keys above 0x80 (including ≤ 0x80 on some German keyboard???)
12522 0     0     chomp(my $lst = <<'EOF'); # 0..50; 65..92; 93..95 ↱KEYPAD; · = special ↱JIS
12523             asdfhgzxcv§bqweryt123465=97-80]ou[ip·lj'k;\,/nm.· `··············.·*·+·····/··-··=01234567·89¥_,
12524             EOF
12525             # '
12526 0           my @lst = split //, $lst;
12527 0           my $last = $#lst;
12528             # in addition to US Extended, we defined 64, 73 (BR), 102, 104 (hex 40 49 66 68) and 93-95 from JIS
12529 0           my @kVK_ = split /\n/, <
12530             24 Return 0d
12531             30 Tab 09
12532             ####31 Space
12533             33 Delete 08
12534             34 Enter_PowerBook 03 # Same as KeypadEnter
12535             35 Escape 1b
12536             37 Command
12537             38 Shift
12538             39 CapsLock
12539             3A Option
12540             3B Control
12541             3C RightShift
12542             3D RightOption
12543             3E RightControl
12544             3F Function
12545             40 F17 +
12546             42 ????????????? 1d # Same as RightArrow
12547             46 ?????????????? 1c # Same as LeftArrow
12548             47 ANSI_KeypadClear 1b # ??? Same as Escape
12549             48 VolumeUp 1f # ??? Same as DownArrow
12550             49 VolumeDown + # C1 of ABNT: /
12551             4A Mute
12552             ###4B ANSI_KeypadDivide /
12553             4C ANSI_KeypadEnter 03
12554             4D ??????? 1e # Same as UpArrow
12555             4F F18 +
12556             50 F19 +
12557             5A F20
12558             60 F5 +
12559             61 F6 +
12560             62 F7 +
12561             63 F3 +
12562             64 F8 +
12563             65 F9 +
12564             67 F11 +
12565             69 F13 +
12566             6A F16 +
12567             6B F14 +
12568             6D F10 +
12569             6E __PC__Menu +
12570             6F F12 +
12571             71 F15 +
12572             72 Help 05
12573             73 Home 01
12574             74 PageUp 0b
12575             75 ForwardDelete 7f
12576             76 F4 +
12577             77 End 04
12578             78 F2 +
12579             79 PageDown 0c
12580             7A F1 +
12581             7B LeftArrow 1c
12582             7C RightArrow 1d
12583             7D DownArrow 1f
12584             7E UpArrow 1e
12585             # ISO keyboards only
12586             ####0A ISO_Section §
12587             # JIS keyboards only
12588             ####5D JIS_Yen ¥
12589             ####5E JIS_Underscore _
12590             ####5F JIS_KeypadComma ,
12591             66 JIS_Eisu SPACE # Left of space (On CapsLock on Windows; compare http://commons.wikimedia.org/wiki/File:MacBookProJISKeyboard-1.jpg with http://en.wikipedia.org/wiki/Keyboard_layout#Japanese)
12592             68 JIS_Kana SPACE # Right of space (as on Windows, but without intervening key)
12593             # Defined in US Extended:
12594             6C ?????? +
12595             70 ?????? +
12596             EOF
12597 0           my %seen;
12598 0           for my $i (0..$#lst) {
12599 0 0         if ($lst[$i] eq '·') {
12600 0           undef $lst[$i];
12601             } else {
12602 0   0       my $pref = (defined $AppleSym{$lst[$i]} and '#');
12603 0           $AppleSym{"$pref$lst[$i]"} = $i;
12604             }
12605             }
12606             # $AppleSym{'#'} = $AppleSym{' '}; # Space is in a table as #
12607 0           my %map = ('+' => "\x10", 'SPACE' => ' ');
12608 0           for my $kVK (@kVK_) {
12609 0 0         warn ("unexpected OSX scan: <<$kVK>>"), next unless $kVK =~ /^\s*(#)|([A-F\d]{2})\s+(\?+|\w+)\s*(.*)/i;
12610 0 0         next if $1;
12611 0           my($hex, $name, $rest, $comment) = ($2, $3, $4);
12612 0           $AppleSym[hex $hex] = $name;
12613 0           $AppleSym{$name} = hex $hex;
12614 0 0         if(length $rest) {
12615 0 0         warn ("unexpected OSX scan expansion in $hex/$name: <<$rest>>"), next
12616             unless ( my($HEX,$lit,$sp), $comment) = ( $rest =~ /^(?:(?:([A-F\d]{2})|([^\w\s+])|(SPACE|\+))\s*)?(?:#\s*(.*))?$/i );
12617 0 0         if ($sp) {
    0          
12618 0 0         $rest = $map{$sp} or warn "Bad map in OSX basemap"
12619             } elsif ($HEX) {
12620 0           $rest = chr hex $HEX;
12621             } else {
12622 0           $rest = $lit;
12623             }
12624 0           my $idx = hex $hex;
12625 0 0 0       $idx > $last or not defined $lst[$idx] or warn "Non-special <<$lst[$idx]>> when overriding offset=$idx=hex($hex) in OSX basemap";
12626 0           $lst[$idx] = $rest;
12627             }
12628             }
12629             @lst
12630 0           }
12631            
12632             my @AppleMap;
12633            
12634             # Extra keys on Windows side: INSERT, and duplication-by-NumLock of the keypad.
12635             # Extra keys on Apple side: CLEAR on the KP, and KP-Equal.
12636            
12637             # Current solution: merge win-KP_Clear with apple-KP_CLear (1st in the center, 2nd in the ul-corner!)
12638             # merge INSERT with KP=
12639            
12640             # How to work with NumLock-modifications? There are 3 states: NumLock-, Base-, Shift.
12641            
12642             # Not in Apple maps:
12643             # F21-F24 HOME UP PRIOR DIVIDE LEFT CLEAR RIGHT MULTIPLY END DOWN NEXT SUBTRACT INSERT DELETE RETURN ADD NUMPAD0-NUMPAD9
12644             my %Apple_recode = (qw(
12645             DIVIDE #/ MULTIPLY * SUBTRACT #- ADD + DECIMAL #.
12646             RETURN ANSI_KeypadEnter DELETE ForwardDelete #\ § OEM_102 §
12647             PRIOR PageUp CLEAR ANSI_KeypadClear NEXT PageDown INSERT #=
12648             ABNT_C1 VolumeDown
12649             ), SPACE => ' ', map +("NUMPAD$_", "#$_"), 0..9);
12650             my %Apple_skip = map +($_, 1), (map "F$_", 21..24); #, (map "NUMPAD$_", 0..9);
12651             # ==> HOME UP PRIOR LEFT CLEAR RIGHT END DOWN NEXT INSERT DELETE RETURN
12652             # ==> PRIOR CLEAR NEXT INSERT
12653            
12654             sub AppleMap_Base ($$) {
12655 0     0 0   my($self, $K) = (shift, shift);
12656 0           my $F = $self->get_deep($self, @$K); # Presumably a face hash, as in $K = [qw(faces US)]
12657 0 0         return $F->{Apple2layout} if $F->{Apple2layout};
12658 0 0         @AppleMap = _AppleMap unless @AppleMap;
12659 0 0         warn 'AppleMap too long' if $#AppleMap >= 127;
12660 0           $self->reset_units;
12661 0           my $BB = $self->BaseKeys($K); # VK per position (except via-VK keys)
12662 0           my $B = $F->{baseKeysRaw}; # chars on key (if the first occurence???) OR VK
12663 0           my(@o, @A, @AA); # A: kbdd --> Apple; AA: Apple --> kbdd
12664 0           $_ = [@$_] for $B, $BB; # 1-level deep copy
12665 0           my $o = $F->{'[VK_off]'};
12666 0           for my $b ($B, $BB) { # Explicitly add via-VK keys
12667 0           for my $vk (keys %$o) {
12668 0 0         warn "[@$K]: $vk defined on \@$o->{$vk} as $b->[$o->{$vk}]" if defined $b->[$o->{$vk}];
12669 0 0         $b->[$o->{$vk}] = $vk unless defined $b->[$o->{$vk}];
12670             # warn "[@$K]: $vk \@$o->{$vk}"; # SPACE @ 116 (on izKeys)
12671             }
12672             }
12673             # warn "[[@$K]] @$B\n\t@$BB\n";
12674             # warn "\t", !(grep $_ eq ' ', @$B), "\t", !(grep $_ eq ' ', @$BB), "\n";
12675 0           for my $i (0..$#$B) { # Primary mappings
12676 0           my $k = $B->[$i];
12677 0           my $kk = $BB->[$i];
12678 0 0         next unless defined $k;
12679 0 0         $A[$i] = $AppleSym{$kk}, next if exists $AppleSym{$kk};
12680 0 0 0       $A[$i] = $AppleSym{$Apple_recode{$kk}}, next if exists $AppleSym{$Apple_recode{$kk} || 123};
12681 0 0         $A[$i] = $AppleSym{$k}, next if exists $AppleSym{$k};
12682 0 0 0       $A[$i] = $AppleSym{$Apple_recode{$k}}, next if exists $AppleSym{$Apple_recode{$k} || 123};
12683 0 0         $A[$i] = "\u\L$k" . 'Arrow', next if exists $AppleSym{"\u\L$k" . 'Arrow'};
12684 0 0         $A[$i] = "\u\L$k", next if exists $AppleSym{"\u\L$k"};
12685 0 0         next if $Apple_skip{$k};
12686 0           push @o, $k;
12687             }
12688 0           for my $i (0..126) { # Primary backwards mappings
12689 0 0         next unless defined $A[$i];
12690 0 0         warn "Duplicate backward Apple mapping: old=$AA[$A[$i]] --> $A[$i] <-- $i=new" if defined $AA[$A[$i]];
12691 0           $AA[$A[$i]] = $i;
12692             }
12693 0           for my $i (0..126) { # Secondary backwards mappings
12694 0 0 0       next if defined $AA[$i] or ($AppleSym[$i] || '') !~ /^#(.)$/ or not defined $AA[$AppleSym{$1}];
      0        
      0        
12695 0           $AA[$i] = $AA[$AppleSym{$1}]
12696             }
12697 0 0         warn "Not in Apple maps: @o" if @o;
12698 0           $F->{layout2Apple} = \@A;
12699 0           $F->{Apple2layout} = \@AA;
12700             }
12701            
12702             # fake is needed (apparently, the compiler does not allocate the named states smartly???)
12703             my @state_cnt = qw( 4of4 4096 3of4 256 2of4 16 1of4 0 0of4 0
12704             1of6 0 2of6 2 3of6 16 4of6 256 0of6 0
12705             );
12706             my @state_cnt_a = (@state_cnt, qw(
12707             5of6 4 6of6 64
12708             )); # At end, so may be skipped via merge_states_6_and_4
12709             my @state_cnt_b = (@state_cnt, qw(
12710             5of6 64 6of6 64
12711             ));
12712             my $in_group_4of6_plan_c = 2;
12713             my @state_cnt_c = (@state_cnt, '5of6' => 16 * $in_group_4of6_plan_c, '6of6' => 64);
12714             my $use_plan_b; # unimplemented
12715             my $use_plan_c = 1; # untested
12716            
12717             sub alloc_slots ($$) {
12718 0     0 0   my($tot, $a, %start) = (shift, shift);
12719 0           my @a = @$a; # deep copy
12720 0           while (@a) {
12721 0           my($how, $c) = splice @a, 0, 2;
12722 0           $start{$how} = [$tot, $tot+$c-1];
12723 0           $tot += $c;
12724             }
12725 0           \%start;
12726             }
12727            
12728             sub output_state_range ($$$$$$) { # Apparently, only ranges up to 256 states are supported.
12729 0     0 0   my($self, $from, $to, $mult, $next, $out, $o) = (shift, shift, shift, shift, shift, shift, ''); # $out is the ord(OUTPUT)
12730 0 0         $o .= "\t\t\t\n" if $to - $from > 255;
12731 0           while ($to - $from > 255) {
12732 0           $o .= $self->output_state_range($from, $from+255, $mult, $next, $out);
12733 0           $from += 256;
12734 0 0         $out += 256*$mult if defined $out;
12735 0 0         $next += 256*$mult if defined $next;
12736             }
12737 0 0         XML_format($out = chr $out) if defined $out;
12738 0           my @out;
12739 0 0         push @out, qq(next="$next") if defined $next;
12740 0 0         push @out, qq(output="$out") if defined $out ;
12741 0           $o .= <
12742            
12743             EOS
12744 0           $o
12745             }
12746            
12747             my $merge_states_6_and_4 = 1;
12748             my $do_hex5 = 0; # Won’t install with this… (Even with $merge_states_6_and_4)
12749            
12750             sub output_hex_input ($$$) { # only 4-hex-digits input supported now. First state in $states{'1of4'}[0].
12751 0     0 0   my($self, $states, $HEX, $o) = (shift, shift, shift, '');
12752 0 0         unless ($HEX =~ /[0-9a-f]/i) {
12753 0 0         return $do_hex5 ? <
12754            
12755            
12756            
12757             EOS
12758            
12759            
12760             EOS
12761             }
12762 0           my $i = hex $HEX;
12763 0           my @O = map { [$states->{($_+1).'of4'}[0] + $i] } 0..3;
  0            
12764 0           $O[4] = [undef, $i];
12765             # $O[4] = qq(output="$HEX;");
12766             # $O[4] = qq(next="5000");
12767 0           $o .= <
12768            
12769            
12770             EOS
12771             #
12772             #
12773             #
12774             #
12775             $o .= <output_state_range($states->{"${_}of4"}[0], $states->{"${_}of4"}[1], 16, $O[$_][0], $O[$_][1])
12776            
12777             EOS
12778 0           for 2..4; # ($HEX eq '9' ? 4 : 3); # 2..4; bisect installation problems here
12779            
12780             # return $o unless 15 >= hex $HEX; # debugging only
12781            
12782 0           @O = map { [$states->{($_+1).'of6'}[0] + $i] } 0..5;
  0            
12783 0           $O[2][0]--; # We start with U+01..., not U+00....
12784 0           $O[6] = [undef, 0xDC00 + $i];
12785 0 0         $o .= $do_hex5 ? <
12786            
12787             EOS
12788            
12789             EOS
12790             # $states->{"2of6"}[0] is U+0xxxxx=hex5 hex5 and hex6 differs only in treatment of 0, and of 1 0
12791             # $states->{"2of6"}[1] is U+1xxxxx hex5: 1 0 —→ U+010xxx
12792             # $states->{"3of6"}[0] is U+01xxxx hex6: 1 0 —→ U+10xxxx
12793             # $states->{"3of6"}[1] is U+10xxxx hex5: 0 —→ hex4, 1 —→ U+01xxxx, rest X —→ U+0Xxxx
12794             # hex6: 0 —→ hex5, 1 —→ U+1xxxxx, rest X —→ U+0Xxxx
12795 0 0         $o .= <
12796            
12797             EOS
12798             # What follows is a complete mess, since with $do_hex5 the resulting layout won’t install
12799 0 0 0       $o .= <
12800            
12801            
12802            
12803             EOS
12804 0 0 0       $o .= <
12805            
12806             EOS
12807 0 0 0       $o .= <
12808            
12809             EOS
12810 0 0         $o .= <
12811            
12812            
12813            
12814            
12815             EOS
12816 0 0         $o .= <
12817            
12818            
12819             EOS
12820 0 0 0       $o .= <
12821            
12822             EOS
12823             $o .= <output_state_range($states->{"${_}of6"}[0], $states->{"${_}of6"}[1], 16, $O[$_][0], $O[$_][1])
12824            
12825             EOS
12826 0           for 3; # ($HEX eq '9' ? 4 : 3); # 2..4; bisect installation problems here
12827             # VARIANT (A): for every one of 256 states, individually emit a surrogate (with multiplier 4), and set the next state (in B..B+3)
12828             # VARIANT (C): for every $in_group of 256 states, emit its surrogate (with multiplier 4).
12829             # This creates a spread of "next states" of size M-3, with M = 4*$in_group.
12830             # Create next state in ranges (B .. B+M-3) (B+M .. B+2M-3) (B+2M .. B+3M-3) (B+3M .. B+4M-3)
12831             # depending on ($i & 3). [Later, we should process every range with multiplier=0.]
12832 0 0 0       my $next_base = ($merge_states_6_and_4 and not $use_plan_c) ? $states->{"3of4"}[0] + 0xDC : $states->{"5of6"}[0];
12833 0 0         my $in_group = $use_plan_c ? $in_group_4of6_plan_c : 1;
12834 0 0         my $spread_next = $use_plan_c ? 4*$in_group_4of6_plan_c - 3 : 1;
12835 0 0         $o .= $use_plan_c ? <
12836            
12840             EOS
12841            
12842             EOS
12843 0           for my $j (0 .. ((0x100/$in_group)-1)) {
12844 0           my($J, $n, $O) = ($states->{"4of6"}[0] + $j*$in_group, $next_base + ($i & 0x3)*$spread_next, 0xD800 + 4*$j*$in_group + ($i>>2));
12845 0           XML_format($O = chr $O);
12846 0 0         if ($use_plan_c) {
12847 0           my $T = $J + $in_group_4of6_plan_c -1;
12848 0           $o .= <
12849            
12850             EOS
12851             } else {
12852 0           $o .= <
12853            
12854             EOS
12855             # ($HEX eq '9' ? 4 : 3); # 2..4; bisect installation problems here
12856             }
12857             }
12858 0 0         if ($use_plan_c) {
12859 0 0         my $doc = $merge_states_6_and_4 ? '; redirect to low surrogates' : '';
12860 0           $o .= <
12861            
12864             EOS
12865 0           for my $k (1 .. $in_group_4of6_plan_c) {
12866             # for my $j (0 .. 3) {
12867 0           my $n = $next_base + ($k-1)*4;
12868 0           my $T = $n + 3;
12869 0 0         my $next = ($merge_states_6_and_4 ? $states->{"4of4"}[0] + 0xDC0 + $i: $O[5][0]);
12870 0           $o .= <
12871            
12872             EOS
12873             }
12874             }
12875            
12876 0 0         unless ($merge_states_6_and_4) {
12877             $o .= $self->output_state_range($states->{"${_}of6"}[0], $states->{"${_}of6"}[1], 16, $O[$_][0], $O[$_][1])
12878 0 0         for ($use_plan_c ? 6 : 5) .. 6; # ($HEX eq '9' ? 4 : 3); # 2..4; bisect installation problems here
12879             }
12880             $o
12881 0           }
12882            
12883             sub output_hex_term ($$) { # only 4-hex-digits input supported now. First state in $states{'1of4'}[0].
12884 0     0 0   my($self, $states) = (shift, shift);
12885 0           my $o = <
12886            
12887            
12888             EOS
12889 0           my @hd = (0..9, 'A'..'F');
12890 0           for my $n (1 .. 3) {
12891 0           for my $i (0 .. ((16**$n)-1)) {
12892 0           my $N = $n + 1;
12893 0           my $I = $states->{"${N}of4"}[0] + $i;
12894 0           my $hex = sprintf "%0${n}X", $i;
12895 0           $o .= <
12896            
12897             EOS
12898             }
12899             }
12900 0 0         $o .= $do_hex5 ? <
12901            
12902            
12903            
12904             EOS
12905            
12906            
12907             EOS
12908            
12909 0           return $o; # the rest creates problems: see iz-Latin-hex6-vis3a.keylayout
12910            
12911 0           $o .= <
12912            
12913            
12914             EOS
12915 0           for my $n (2 .. 3) {
12916 0           for my $i (0 .. ((16**($n-1))-1)) {
12917 0           my $N = $n + 1;
12918 0           my $I = $states->{"${N}of6"}[0] + $i;
12919 0           my $hex = sprintf "%0${n}X", $i + 16**($n-2);
12920 0           $o .= <
12921            
12922             EOS
12923             }
12924             }
12925             $o
12926 0           }
12927            
12928             my $junkHEX = <
12929             After +0yz or +10z (16*16 states); instead of 4434 should put 4434 + 0..3
12930            
12931            
12932             WRONG!!! Need different multipliers for next and for output; so need 256 individual declarations
12933             Instead: use multiplier="4" (so that the output char is correct; next state takes 4K values, out of which we
12934             need only last two bits (manually inserted via next="" above); so we need 1K declarations for per-ultimate???
12935            
12936             So: maybe have 16 declarations for "After +0yz or +10z"; this way, next state takes 64 values, of which
12937             we may make account for by 16 declarations. (32 total per 22 chars 0-9a-fA-F.)
12938            
12939             Or: maybe have 16 declarations for "After +0yz or +10z"; each creates a range of 64 possible "next" states;
12940             but we create 4 groups of such states. So we may make account for by 4 declarations. (20 total per 22 chars 0-9a-fA-F.)
12941             EOJ
12942            
12943             #sub XML_format ($) { $_[0] =~ s/([&""''\x00-\x1f\x7f-\x9f\s<>]|$rxCombining|$rxZW)/ sprintf '&#x%04X;', ord $1 /ego;
12944             # # Avoid "Malformed UTF-8 character (fatal)" by not puting in a REx
12945             # $_[0] =~ s/(.)/ sprintf '&#x%04X;', ord $1 /ego if length $_[0] eq 1 and 0xd000 <= ord $_[0] and 0xdfff >= ord $_[0]}
12946             sub XML_format ($) {
12947 0     0 0   my @c = split //, $_[0];
12948 0           for my $c (@c) {
12949 0 0 0       if (0xd000 <= ord $c and 0xdfff >= ord $c) {
12950 0           $c = sprintf '&#x%04X;', ord $c;
12951             } else {
12952 0           $c =~ s/([&""''\x00-\x1f\x7f-\x9f\s<>]|$rxCombining|$rxZW)/ sprintf '&#x%04X;', ord $1 /ego;
  0            
12953             }
12954             }
12955 0           $_[0] = join '', @c;
12956             }
12957             sub XML_format_UTF_16 ($) {
12958 0     0 0   $_[0] = to_UTF16LE_units $_[0];
12959 0           XML_format $_[0];
12960             }
12961            
12962             sub AppleMap_i_j ($$$$$;$$$) { # http://forums.macrumors.com/archive/index.php/t-780577.html
12963 0   0 0 0   my($self, $K, $l, $sh, $caps, $dd, $map, $override) = (shift, shift, shift, shift, shift, shift || {}, shift || {}, shift || {});
      0        
      0        
12964 0           my $A2l = [ @{ $self->AppleMap_Base($K) } ]; # Deep copy
  0            
12965 0           my $dup = $override->{dup};
12966 0           for my $from (keys %$dup) {
12967 0           $A2l->[$from] = $A2l->[$dup->{$from}];
12968             }
12969 0           my $F = $self->get_deep($self, @$K); # Presumably a face hash, as in $K = [qw(faces US)]
12970 0           my $L = [map $self->{layers}{$_}, @{$F->{layers}}];
  0            
12971 0           $L = $L->[$l];
12972 0 0         @AppleMap = _AppleMap unless @AppleMap;
12973 0 0         warn 'AppleMap too long' if $#AppleMap >= 127;
12974 0           my $o = '';
12975 0           for my $i (0..127) {
12976 0           my($I, $d, $c) = ($A2l->[$i], 0); # offset inside the layout array
12977 0   0       $c = $override->{"$l-$sh-$caps-vk=$i"} || $override->{"$l-$sh--vk=$i"}; # $caps is 0 or 1
12978 0 0 0       $c = $L->[$I][$sh] if not defined $c and defined $I;
12979 0 0         $c = $AppleMap[$i] unless defined $c; # Fallback to US (apparently, there is no unbound "ASCII" keys in maps???); dbg to "\xffff" #
12980            
12981 0 0         $o .= <
12982            
12983             EOK
12984 0 0 0       $d = $c->[2] || 0 if ref $c;
12985 0 0         $c = $c->[0] if ref $c;
12986             # On windows, CapsLock flips the case; on Mac, it upcases
12987             # ($c) = grep {$_ ne $c} uc $c, ucfirst lc $c, lc $c if !$d and $caps and (lc $c ne uc $c or lc $c ne ucfirst lc $c);
12988 0 0 0       $c = uc $c if !$d and $caps;
12989 0 0         $dd->{$c}[1]++ if $d > 0; # 0 for normal char, 1 for base prefix; not for hex4/hex6
12990 0 0         $override->{extra_actions}{$c}++ if $d < 0;
12991 0   0       my $M = ($d >= 0 and $map->{$self->keys2hex($c)});
12992 0 0         my $pr = $M ? 'a_' : '';
12993 0 0 0       $dd->{$c}[0] = $c if $M or $d > 0; # 0 for normal char, 1 for base prefix
12994 0 0 0       my($how, $pref) = ($d || $M) ? ('action', ($M ? 'a_' : '') . ($d > 0 ? 'pr_' : (!$d && '_'))) : ('output', '');
    0 0        
    0          
12995 0 0         ($how eq 'output') ? XML_format_UTF_16 $c : XML_format $c;
12996 0           $o .= <
12997            
12998             EOK
12999             }
13000             $o
13001 0           }
13002            
13003             my $hex_states;
13004             sub AppleMap_prefix_map ($$$$$;$$) {
13005 0   0 0 0   my($o, $self, $kk, $pref, $M, $v, $doHEX, $override) = ('', shift, shift, shift, shift || {}, shift, shift, shift || {});
      0        
13006 0           XML_format (my $k = $kk);
13007 0 0         my $pr = $M ? 'a_' : '';
13008 0 0         my $prefix = $pref ? 'pr_' : '_';
13009 0           $o .= <
13010            
13011             EOK
13012             # A character and a prefix key with the same ordinal differ only in this:
13013 0           XML_format (my $oo = $v->[0]);
13014 0 0         my $todo = $pref ? qq(next="st_$oo") : qq(output="$oo");
13015 0           $o .= <
13016            
13017             EOK
13018 0 0         for my $st (sort keys %{$M || {}}) {
  0            
13019 0           my $v0 = $M->{$st};
13020 0           XML_format ($st = my $st0 = chr hex $st);
13021 0           my $KK = $self->key2hex($kk);
13022 0           my $ST0 = $self->key2hex($st0);
13023             my $v = $override->{"+$st0+$kk"} || $override->{"+$ST0+$kk"}
13024 0   0       || $override->{"+$st0+$KK"} || $override->{"+$ST0+$KK"} || $v0;
13025 0   0       my($d, $T) = $v->[2] || 0;
13026 0 0         $T = chr hex $v->[0] if $d >= 0;
13027 0 0         if ($d > 0) {
    0          
13028 0           XML_format $T;
13029 0           $T = qq(next="st_$T");
13030             } elsif ($d < 0) { # Literal state
13031 0           $T = qq(next="$v->[0]");
13032             } else {
13033 0           XML_format_UTF_16 $T;
13034 0           $T = qq(output="$T");
13035             }
13036 0           $o .= <
13037            
13038             EOK
13039             }
13040 0 0 0       $o .= $self->output_hex_input($hex_states, $v->[0]) if $doHEX and $v->[0] =~ /^[-u\x20_+=0-9a-f]\z/i;
13041 0           $o .= <
13042            
13043             EOK
13044 0           $o;
13045             }
13046            
13047             sub AppleMap_prefix ($$;$$$$$$) { # http://forums.macrumors.com/archive/index.php/t-780577.html
13048 0   0 0 0   my($self, $dd, $do_initing, $term, $map, $show, $override, $act) = (shift, shift, shift, shift, shift || {}, shift, shift, shift);
13049 0           my $o = '';
13050            
13051 0 0         my %e = %{ $override->{extra_actions} || {}}; # Deep copy
  0            
13052 0 0 0       ($do_hex5 and $e{hex5}++), $e{hex6}++ if $e{hex4};
13053 0           my @o = @$override{grep /^\+/, keys %$override}; # honest bindings, not extra_actions/etc
13054 0           @o = map chr hex $_->[0], grep $_->[2] > 0, @o; # dead keys
13055 0 0         unless (%$act) { # Treat states created by the actions only
13056 0           my %states;
13057 0           $states{$_}++ for keys(%e), @o, grep $dd->{$_}[1], keys %$dd;
13058 0           for my $v (values %$map) { # hash indexed by the prefix key
13059 0           for my $out (values %$v) {
13060 0 0         next if not $out->[2];
13061 0           my $k = $self->charhex2key($out->[0]);
13062 0           $states{$k}++;
13063 0           my $v;
13064 0 0 0       $act->{$k} = [$k] unless $v = $dd->{$k} and $v->[1]; # Skip if terminator was already created; do not create fake values
13065             }
13066             }
13067 0           my $states = 10 + keys(%states); # Was 4100; 10: "just in case"
13068 0 0         $hex_states = alloc_slots( $states, $use_plan_c ? \@state_cnt_c : ($use_plan_b ? \@state_cnt_b : \@state_cnt_a));
    0          
13069             }
13070            
13071 0 0 0       if ($term and not $do_initing) { # Treat states created by the actions only
13072 0           $dd = $act; # A terminator MUST be created for every state
13073             }
13074            
13075 0           my $doHEX = grep $e{"hex$_"}, 4,5,6;
13076 0           for my $kk (sort keys %$dd) {
13077 0           my $v = $dd->{$kk};
13078 0           XML_format (my $k = $kk);
13079 0 0         next if !!$do_initing != !!$v->[1];
13080            
13081 0 0         if ($term) {
13082 0           my $Show = $show->{$self->key2hex($kk)};
13083 0 0         $Show = $kk unless defined $Show;
13084 0           $Show =~ s/^(?=$rxCombining)/ /;
13085 0           XML_format $Show;
13086 0           $o .= qq(\t\n);
13087 0           next;
13088             }
13089            
13090 0           my $M = $map->{$self->keys2hex($kk)};
13091 0           $o .= $self->AppleMap_prefix_map($kk, $do_initing, $M, $v, $doHEX, $override);
13092             }
13093 0 0 0       for my $a ( ($do_initing and not $term) ? sort keys %e : () ) {
13094 0   0       my $add = ($a =~ /^hex4\z/ and ($do_hex5 ? <
13095            
13096            
13097             EOS
13098            
13099             EOS
13100 0           $o .= <
13101            
13102            
13103             $add
13104             EOS
13105             }
13106 0 0 0       $o .= $self->output_hex_term($hex_states) if $term and $doHEX and not $do_initing; # Do only once, at the end
      0        
13107 0           $o
13108             }
13109            
13110             1;
13111            
13112             __END__