File Coverage

blib/lib/ExtUtils/Constant/Base.pm
Criterion Covered Total %
statement 363 393 92.3
branch 143 182 78.5
condition 102 159 64.1
subroutine 22 36 61.1
pod 11 30 36.6
total 641 800 80.1


line stmt bran cond sub pod time code
1             package ExtUtils::Constant::Base;
2              
3 1     1   7 use strict;
  1         2  
  1         27  
4 1     1   4 use vars qw($VERSION);
  1         2  
  1         45  
5 1     1   8 use Carp;
  1         3  
  1         55  
6 1     1   407 use Text::Wrap;
  1         2258  
  1         48  
7 1     1   6 use ExtUtils::Constant::Utils qw(C_stringify perl_stringify);
  1         2  
  1         54  
8             $VERSION = '0.06';
9              
10 1   33 1   6 use constant is_perl56 => ($] < 5.007 && $] > 5.005_50);
  1         1  
  1         3858  
11              
12              
13             =head1 NAME
14              
15             ExtUtils::Constant::Base - base class for ExtUtils::Constant objects
16              
17             =head1 SYNOPSIS
18              
19             require ExtUtils::Constant::Base;
20             @ISA = 'ExtUtils::Constant::Base';
21              
22             =head1 DESCRIPTION
23              
24             ExtUtils::Constant::Base provides a base implementation of methods to
25             generate C code to give fast constant value lookup by named string. Currently
26             it's mostly used ExtUtils::Constant::XS, which generates the lookup code
27             for the constant() subroutine found in many XS modules.
28              
29             =head1 USAGE
30              
31             ExtUtils::Constant::Base exports no subroutines. The following methods are
32             available
33              
34             =over 4
35              
36             =cut
37              
38             sub valid_type {
39             # Default to assuming that you don't need different types of return data.
40 0     0 0 0 1;
41             }
42             sub default_type {
43 0     0 1 0 '';
44             }
45              
46             =item header
47              
48             A method returning a scalar containing definitions needed, typically for a
49             C header file.
50              
51             =cut
52              
53             sub header {
54 0     0 1 0 ''
55             }
56              
57             # This might actually be a return statement. Note that you are responsible
58             # for any space you might need before your value, as it lets to perform
59             # "tricks" such as "return KEY_" and have strings appended.
60             sub assignment_clause_for_type;
61             # In which case this might be an empty string
62 0     0 0 0 sub return_statement_for_type {undef};
63             sub return_statement_for_notdef;
64             sub return_statement_for_notfound;
65              
66             # "#if 1" is true to a C pre-processor
67             sub macro_from_name {
68 0     0 0 0 1;
69             }
70              
71             sub macro_from_item {
72 0     0 0 0 1;
73             }
74              
75             sub macro_to_ifdef {
76 143     143 0 312 my ($self, $macro) = @_;
77 143 100       423 if (ref $macro) {
78 4         17 return $macro->[0];
79             }
80 139 100 33     881 if (defined $macro && $macro ne "" && $macro ne "1") {
      66        
81 115 50       497 return $macro ? "#ifdef $macro\n" : "#if 0\n";
82             }
83 24         74 return "";
84             }
85              
86             sub macro_to_ifndef {
87 21     21 0 42 my ($self, $macro) = @_;
88 21 100       43 if (ref $macro) {
89             # Can't invert these stylishly, so "bodge it"
90 1         5 return "$macro->[0]#else\n";
91             }
92 20 50 33     106 if (defined $macro && $macro ne "" && $macro ne "1") {
      33        
93 20 50       71 return $macro ? "#ifndef $macro\n" : "#if 1\n";
94             }
95 0         0 croak "Can't generate an ifndef for unconditional code";
96             }
97              
98             sub macro_to_endif {
99 119     119 0 294 my ($self, $macro) = @_;
100              
101 119 100       309 if (ref $macro) {
102 3         10 return $macro->[1];
103             }
104 116 100 33     759 if (defined $macro && $macro ne "" && $macro ne "1") {
      66        
105 96         313 return "#endif\n";
106             }
107 20         65 return "";
108             }
109              
110             sub name_param {
111 107     107 0 328 'name';
112             }
113              
114             # This is possibly buggy, in that it's not mandatory (below, in the main
115             # C_constant parameters, but is expected to exist here, if it's needed)
116             # Buggy because if you're definitely pure 8 bit only, and will never be
117             # presented with your constants in utf8, the default form of C_constant can't
118             # be told not to do the utf8 version.
119              
120             sub is_utf8_param {
121 11     11 0 40 'utf8';
122             }
123              
124             sub memEQ {
125 0     0 0 0 "!memcmp";
126             }
127              
128             =item memEQ_clause args_hashref
129              
130             A method to return a suitable C C statement to check whether I
131             is equal to the C variable C. If I is defined, then it
132             is used to avoid C for short names, or to generate a comment to
133             highlight the position of the character in the C statement.
134              
135             If i is a reference to a scalar, then instead it gives
136             the characters pre-checked at the beginning, (and the number of chars by
137             which the C variable name has been advanced. These need to be chopped from
138             the front of I).
139              
140             =cut
141              
142             sub memEQ_clause {
143             # if (memEQ(name, "thingy", 6)) {
144             # Which could actually be a character comparison or even ""
145 71     71 1 182 my ($self, $args) = @_;
146 71         147 my ($name, $checked_at, $indent) = @{$args}{qw(name checked_at indent)};
  71         223  
147 71   50     252 $indent = ' ' x ($indent || 4);
148 71         144 my $front_chop;
149 71 100       192 if (ref $checked_at) {
150             # regexp won't work on 5.6.1 without use utf8; in turn that won't work
151             # on 5.005_03.
152 14         35 substr ($name, 0, length $$checked_at,) = '';
153 14         43 $front_chop = C_stringify ($$checked_at);
154 14         31 undef $checked_at;
155             }
156 71         143 my $len = length $name;
157              
158 71 100       280 if ($len < 2) {
159 8 100 66     79 return $indent . "{\n"
      66        
160             if (defined $checked_at and $checked_at == 0) or $len == 0;
161             # We didn't switch, drop through to the code for the 2 character string
162 3         11 $checked_at = 1;
163             }
164              
165 66         192 my $name_param = $self->name_param;
166              
167 66 100 100     312 if ($len < 3 and defined $checked_at) {
168 23         50 my $check;
169 23 50       164 if ($checked_at == 1) {
    0          
170 23         53 $check = 0;
171             } elsif ($checked_at == 0) {
172 0         0 $check = 1;
173             }
174 23 50       66 if (defined $check) {
175 23         130 my $char = C_stringify (substr $name, $check, 1);
176             # Placate 5.005 with a break in the string. I can't see a good way of
177             # getting it to not take [ as introducing an array lookup, even with
178             # ${name_param}[$check]
179 23         152 return $indent . "if ($name_param" . "[$check] == '$char') {\n";
180             }
181             }
182 43 100 66     320 if (($len == 2 and !defined $checked_at)
      100        
      100        
      100        
183             or ($len == 3 and defined ($checked_at) and $checked_at == 2)) {
184 20         74 my $char1 = C_stringify (substr $name, 0, 1);
185 20         75 my $char2 = C_stringify (substr $name, 1, 1);
186 20         113 return $indent .
187             "if ($name_param" . "[0] == '$char1' && $name_param" . "[1] == '$char2') {\n";
188             }
189 23 100 100     119 if (($len == 3 and defined ($checked_at) and $checked_at == 1)) {
      66        
190 5         34 my $char1 = C_stringify (substr $name, 0, 1);
191 5         28 my $char2 = C_stringify (substr $name, 2, 1);
192 5         48 return $indent .
193             "if ($name_param" . "[0] == '$char1' && $name_param" . "[2] == '$char2') {\n";
194             }
195              
196 18         44 my $pointer = '^';
197 18   100     83 my $have_checked_last = defined ($checked_at) && $len == $checked_at + 1;
198 18 100       53 if ($have_checked_last) {
199             # Checked at the last character, so no need to memEQ it.
200 3         17 $pointer = C_stringify (chop $name);
201 3         7 $len--;
202             }
203              
204 18         68 $name = C_stringify ($name);
205 18         76 my $memEQ = $self->memEQ();
206 18         80 my $body = $indent . "if ($memEQ($name_param, \"$name\", $len)) {\n";
207             # Put a little ^ under the letter we checked at
208             # Screws up for non printable and non-7 bit stuff, but that's too hard to
209             # get right.
210 18 100       64 if (defined $checked_at) {
    100          
211 10         51 $body .= $indent . "/* " . (' ' x length $memEQ)
212             . (' ' x length $name_param)
213             . (' ' x $checked_at) . $pointer
214             . (' ' x ($len - $checked_at + length $len)) . " */\n";
215             } elsif (defined $front_chop) {
216 2         12 $body .= $indent . "/* $front_chop"
217             . (' ' x ($len + 1 + length $len)) . " */\n";
218             }
219 18         69 return $body;
220             }
221              
222             =item dump_names arg_hashref, ITEM...
223              
224             An internal function to generate the embedded perl code that will regenerate
225             the constant subroutines. I, I and Is are the
226             same as for C_constant. I is treated as number of spaces to indent
227             by. If C is true a C<$types> is always declared in the perl
228             code generated, if defined and false never declared, and if undefined C<$types>
229             is only declared if the values in I as passed in cannot be inferred from
230             I and the Is.
231              
232             =cut
233              
234             sub dump_names {
235 7     7 1 52 my ($self, $args, @items) = @_;
236             my ($default_type, $what, $indent, $declare_types)
237 7         36 = @{$args}{qw(default_type what indent declare_types)};
  7         33  
238 7   50     67 $indent = ' ' x ($indent || 0);
239              
240 7         16 my $result;
241 7         17 my (@simple, @complex, %used_types);
242 7         25 foreach (@items) {
243 71         133 my $type;
244 71 50       181 if (ref $_) {
245 71   33     210 $type = $_->{type} || $default_type;
246 71 100       192 if ($_->{utf8}) {
247             # For simplicity always skip the bytes case, and reconstitute this entry
248             # from its utf8 twin.
249 11 100       41 next if $_->{utf8} eq 'no';
250             # Copy the hashref, as we don't want to mess with the caller's hashref.
251 6         54 $_ = {%$_};
252 6         18 unless (is_perl56) {
253 6         26 utf8::decode ($_->{name});
254             } else {
255             $_->{name} = pack 'U*', unpack 'U0U*', $_->{name};
256             }
257 6         17 delete $_->{utf8};
258             }
259             } else {
260 0         0 $_ = {name=>$_};
261 0         0 $type = $default_type;
262             }
263 66         135 $used_types{$type}++;
264 66 100 66     1056 if ($type eq $default_type
      100        
      66        
      66        
      66        
      66        
      66        
      33        
      33        
      33        
265             # grr 5.6.1
266             and length $_->{name}
267             and length $_->{name} == ($_->{name} =~ tr/A-Za-z0-9_//)
268             and !defined ($_->{macro}) and !defined ($_->{value})
269             and !defined ($_->{default}) and !defined ($_->{pre})
270             and !defined ($_->{post}) and !defined ($_->{def_pre})
271             and !defined ($_->{def_post}) and !defined ($_->{weight})) {
272             # It's the default type, and the name consists only of A-Za-z0-9_
273 43         154 push @simple, $_->{name};
274             } else {
275 23         60 push @complex, $_;
276             }
277             }
278              
279 7 50       35 if (!defined $declare_types) {
280             # Do they pass in any types we weren't already using?
281 0         0 foreach (keys %$what) {
282 0 0       0 next if $used_types{$_};
283 0         0 $declare_types++; # Found one in $what that wasn't used.
284 0         0 last; # And one is enough to terminate this loop
285             }
286             }
287 7 50       25 if ($declare_types) {
288 7         62 $result = $indent . 'my $types = {map {($_, 1)} qw('
289             . join (" ", sort keys %$what) . ")};\n";
290             }
291 7         44 local $Text::Wrap::huge = 'overflow';
292 7         21 local $Text::Wrap::columns = 80;
293 7         105 $result .= wrap ($indent . "my \@names = (qw(",
294             $indent . " ", join (" ", sort @simple) . ")");
295 7 100       2888 if (@complex) {
296 3         24 foreach my $item (sort {$a->{name} cmp $b->{name}} @complex) {
  54         103  
297 23         105 my $name = perl_stringify $item->{name};
298 23         73 my $line = ",\n$indent {name=>\"$name\"";
299 23 50       96 $line .= ", type=>\"$item->{type}\"" if defined $item->{type};
300 23         61 foreach my $thing (qw (macro value default pre post def_pre def_post)) {
301 161         348 my $value = $item->{$thing};
302 161 100       573 if (defined $value) {
303 27 100       70 if (ref $value) {
304             $line .= ", $thing=>[\""
305 3         12 . join ('", "', map {perl_stringify $_} @$value) . '"]';
  6         20  
306             } else {
307 24         90 $line .= ", $thing=>\"" . perl_stringify($value) . "\"";
308             }
309             }
310             }
311 23         55 $line .= "}";
312             # Ensure that the enclosing C comment doesn't end
313             # by turning */ into *" . "/
314 23         78 $line =~ s!\*\/!\*" . "/!gs;
315             # gcc -Wall doesn't like finding /* inside a comment
316 23         66 $line =~ s!\/\*!/" . "\*!gs;
317 23         127 $result .= $line;
318             }
319             }
320 7         29 $result .= ");\n";
321              
322 7         73 $result;
323             }
324              
325             =item assign arg_hashref, VALUE...
326              
327             A method to return a suitable assignment clause. If I is aggregate
328             (eg I expects both pointer and length) then there should be multiple
329             Is for the components. I
 and I if defined give snippets 
330             of C code to proceed and follow the assignment. I
 will be at the start 
331             of a block, so variables may be defined in it.
332              
333             =cut
334             # Hmm. value undef to do NOTDEF? value () to do NOTFOUND?
335              
336             sub assign {
337 74     74 1 174 my $self = shift;
338 74         187 my $args = shift;
339             my ($indent, $type, $pre, $post, $item)
340 74         143 = @{$args}{qw(indent type pre post item)};
  74         263  
341 74   50     424 $post ||= '';
342 74         157 my $clause;
343             my $close;
344 74 100       216 if ($pre) {
345 1         4 chomp $pre;
346 1         4 $close = "$indent}\n";
347 1         4 $clause = $indent . "{\n";
348 1         4 $indent .= " ";
349 1         5 $clause .= "$indent$pre";
350 1 50       9 $clause .= ";" unless $pre =~ /;$/;
351 1         4 $clause .= "\n";
352             }
353 74 50       196 confess "undef \$type" unless defined $type;
354 74 50       317 confess "Can't generate code for type $type"
355             unless $self->valid_type($type);
356              
357 74         414 $clause .= join '', map {"$indent$_\n"}
  72         377  
358             $self->assignment_clause_for_type({type=>$type,item=>$item}, @_);
359 74         231 chomp $post;
360 74 50       225 if (length $post) {
361 0         0 $clause .= "$post";
362 0 0       0 $clause .= ";" unless $post =~ /;$/;
363 0         0 $clause .= "\n";
364             }
365 74         254 my $return = $self->return_statement_for_type($type);
366 74 50       293 $clause .= "$indent$return\n" if defined $return;
367 74 100       200 $clause .= $close if $close;
368 74         240 return $clause;
369             }
370              
371             =item return_clause arg_hashref, ITEM
372              
373             A method to return a suitable C<#ifdef> clause. I is a hashref
374             (as passed to C and C. I is the number
375             of spaces to indent, defaulting to 6.
376              
377             =cut
378              
379             sub return_clause {
380              
381             ##ifdef thingy
382             # *iv_return = thingy;
383             # return PERL_constant_ISIV;
384             ##else
385             # return PERL_constant_NOTDEF;
386             ##endif
387 73     73 1 208 my ($self, $args, $item) = @_;
388 73         164 my $indent = $args->{indent};
389              
390             my ($name, $value, $default, $pre, $post, $def_pre, $def_post, $type)
391 73         315 = @$item{qw (name value default pre post def_pre def_post type)};
392 73 100       234 $value = $name unless defined $value;
393 73         280 my $macro = $self->macro_from_item($item);
394 73   50     268 $indent = ' ' x ($indent || 6);
395 73 50       206 unless (defined $type) {
396             # use Data::Dumper; print STDERR Dumper ($item);
397 0         0 confess "undef \$type";
398             }
399              
400             ##ifdef thingy
401 73         227 my $clause = $self->macro_to_ifdef($macro);
402              
403             # *iv_return = thingy;
404             # return PERL_constant_ISIV;
405 73 100       555 $clause
406             .= $self->assign ({indent=>$indent, type=>$type, pre=>$pre, post=>$post,
407             item=>$item}, ref $value ? @$value : $value);
408              
409 73 100 33     660 if (defined $macro && $macro ne "" && $macro ne "1") {
      66        
410             ##else
411 57         142 $clause .= "#else\n";
412              
413             # return PERL_constant_NOTDEF;
414 57 100       157 if (!defined $default) {
415 56         183 my $notdef = $self->return_statement_for_notdef();
416 56 50       217 $clause .= "$indent$notdef\n" if defined $notdef;
417             } else {
418 1 50       8 my @default = ref $default ? @$default : $default;
419 1         4 $type = shift @default;
420 1         10 $clause .= $self->assign ({indent=>$indent, type=>$type, pre=>$pre,
421             post=>$post, item=>$item}, @default);
422             }
423             }
424             ##endif
425 73         262 $clause .= $self->macro_to_endif($macro);
426              
427 73         253 return $clause;
428             }
429              
430             sub match_clause {
431             # $offset defined if we have checked an offset.
432 71     71 0 274 my ($self, $args, $item) = @_;
433 71         155 my ($offset, $indent) = @{$args}{qw(checked_at indent)};
  71         203  
434 71   100     354 $indent = ' ' x ($indent || 4);
435 71         152 my $body = '';
436 71         153 my ($no, $yes, $either, $name, $inner_indent);
437 71 100       230 if (ref $item eq 'ARRAY') {
438 11         32 ($yes, $no) = @$item;
439 11   66     67 $either = $yes || $no;
440 11 50       42 confess "$item is $either expecting hashref in [0] || [1]"
441             unless ref $either eq 'HASH';
442 11         35 $name = $either->{name};
443             } else {
444             confess "$item->{name} has utf8 flag '$item->{utf8}', should be false"
445 60 50       181 if $item->{utf8};
446 60         134 $name = $item->{name};
447 60         150 $inner_indent = $indent;
448             }
449              
450 71         401 $body .= $self->memEQ_clause ({name => $name, checked_at => $offset,
451             indent => length $indent});
452             # If we've been presented with an arrayref for $item, then the user string
453             # contains in the range 128-255, and we need to check whether it was utf8
454             # (or not).
455             # In the worst case we have two named constants, where one's name happens
456             # encoded in UTF8 happens to be the same byte sequence as the second's
457             # encoded in (say) ISO-8859-1.
458             # In this case, $yes and $no both have item hashrefs.
459 71 100       349 if ($yes) {
    100          
460 7         35 $body .= $indent . " if (" . $self->is_utf8_param . ") {\n";
461             } elsif ($no) {
462 4         32 $body .= $indent . " if (!" . $self->is_utf8_param . ") {\n";
463             }
464 71 100       188 if ($either) {
465 11         61 $body .= $self->return_clause ({indent=>4 + length $indent}, $either);
466 11 100 100     90 if ($yes and $no) {
467 2         12 $body .= $indent . " } else {\n";
468 2         13 $body .= $self->return_clause ({indent=>4 + length $indent}, $no);
469             }
470 11         38 $body .= $indent . " }\n";
471             } else {
472 60         254 $body .= $self->return_clause ({indent=>2 + length $indent}, $item);
473             }
474 71         416 $body .= $indent . "}\n";
475             }
476              
477              
478             =item switch_clause arg_hashref, NAMELEN, ITEMHASH, ITEM...
479              
480             An internal method to generate a suitable C clause, called by
481             C Is are in the hash ref format as given in the description
482             of C, and must all have the names of the same length, given by
483             I. I is a reference to a hash, keyed by name, values being
484             the hashrefs in the I list. (No parameters are modified, and there can
485             be keys in the I that are not in the list of Is without
486             causing problems - the hash is passed in to save generating it afresh for
487             each call).
488              
489             =cut
490              
491             sub switch_clause {
492 14     14 1 70 my ($self, $args, $namelen, $items, @items) = @_;
493 14         32 my ($indent, $comment) = @{$args}{qw(indent comment)};
  14         45  
494 14   50     69 $indent = ' ' x ($indent || 2);
495              
496 14         37 local $Text::Wrap::huge = 'overflow';
497 14         35 local $Text::Wrap::columns = 80;
498              
499 14         39 my @names = sort map {$_->{name}} @items;
  60         208  
500 14         54 my $leader = $indent . '/* ';
501 14         48 my $follower = ' ' x length $leader;
502 14         47 my $body = $indent . "/* Names all of length $namelen. */\n";
503 14 100       50 if (defined $comment) {
504 10         47 $body = wrap ($leader, $follower, $comment) . "\n";
505 10         4853 $leader = $follower;
506             }
507 14         52 my @safe_names = @names;
508 14         39 foreach (@safe_names) {
509 60 50       152 confess sprintf "Name '$_' is length %d, not $namelen", length
510             unless length == $namelen;
511             # Argh. 5.6.1
512             # next unless tr/A-Za-z0-9_//c;
513 60 100       182 next if tr/A-Za-z0-9_// == length;
514 11         51 $_ = '"' . perl_stringify ($_) . '"';
515             # Ensure that the enclosing C comment doesn't end
516             # by turning */ into *" . "/
517 11         38 s!\*\/!\*"."/!gs;
518             # gcc -Wall doesn't like finding /* inside a comment
519 11         45 s!\/\*!/"."\*!gs;
520             }
521 14         78 $body .= wrap ($leader, $follower, join (" ", @safe_names) . " */") . "\n";
522             # Figure out what to switch on.
523             # (RMS, Spread of jump table, Position, Hashref)
524 14         3235 my @best = (1e38, ~0);
525             # Prefer the last character over the others. (As it lets us shorten the
526             # memEQ clause at no cost).
527 14         74 foreach my $i ($namelen - 1, 0 .. ($namelen - 2)) {
528 49         136 my ($min, $max) = (~0, 0);
529 49         104 my %spread;
530 49         111 if (is_perl56) {
531             # Need proper Unicode preserving hash keys for bytes in range 128-255
532             # here too, for some reason. grr 5.6.1 yet again.
533             tie %spread, 'ExtUtils::Constant::Aaargh56Hash';
534             }
535 49         107 foreach (@names) {
536 176         412 my $char = substr $_, $i, 1;
537 176         456 my $ord = ord $char;
538 176 50       467 confess "char $ord is out of range" if $ord > 255;
539 176 100       461 $max = $ord if $ord > $max;
540 176 100       445 $min = $ord if $ord < $min;
541 176         309 push @{$spread{$char}}, $_;
  176         734  
542             # warn "$_ $char";
543             }
544             # I'm going to pick the character to split on that minimises the root
545             # mean square of the number of names in each case. Normally this should
546             # be the one with the most keys, but it may pick a 7 where the 8 has
547             # one long linear search. I'm not sure if RMS or just sum of squares is
548             # actually better.
549             # $max and $min are for the tie-breaker if the root mean squares match.
550             # Assuming that the compiler may be building a jump table for the
551             # switch() then try to minimise the size of that jump table.
552             # Finally use < not <= so that if it still ties the earliest part of
553             # the string wins. Because if that passes but the memEQ fails, it may
554             # only need the start of the string to bin the choice.
555             # I think. But I'm micro-optimising. :-)
556             # OK. Trump that. Now favour the last character of the string, before the
557             # rest.
558 49         112 my $ss;
559 49         272 $ss += @$_ * @$_ foreach values %spread;
560 49         150 my $rms = sqrt ($ss / keys %spread);
561 49 100 100     392 if ($rms < $best[0] || ($rms == $best[0] && ($max - $min) < $best[1])) {
      100        
562 22         120 @best = ($rms, $max - $min, $i, \%spread);
563             }
564             }
565 14 50       57 confess "Internal error. Failed to pick a switch point for @names"
566             unless defined $best[2];
567             # use Data::Dumper; print Dumper (@best);
568 14         53 my ($offset, $best) = @best[2,3];
569 14         54 $body .= $indent . "/* Offset $offset gives the best switch position. */\n";
570              
571 14   100     61 my $do_front_chop = $offset == 0 && $namelen > 2;
572 14 100       53 if ($do_front_chop) {
573 2         10 $body .= $indent . "switch (*" . $self->name_param() . "++) {\n";
574             } else {
575 12         58 $body .= $indent . "switch (" . $self->name_param() . "[$offset]) {\n";
576             }
577 14         75 foreach my $char (sort keys %$best) {
578 54 50       261 confess sprintf "'$char' is %d bytes long, not 1", length $char
579             if length ($char) != 1;
580 54 50       159 confess sprintf "char %#X is out of range", ord $char if ord ($char) > 255;
581 54         225 $body .= $indent . "case '" . C_stringify ($char) . "':\n";
582 54         110 foreach my $thisone (sort {
583             # Deal with the case of an item actually being an array ref to 1 or 2
584             # hashrefs. Don't assign to $a or $b, as they're aliases to the
585             # original
586 7 100 33     32 my $l = ref $a eq 'ARRAY' ? ($a->[0] || $->[1]) : $a;
587 7 100 33     28 my $r = ref $b eq 'ARRAY' ? ($b->[0] || $->[1]) : $b;
588             # Sort by weight first
589             ($r->{weight} || 0) <=> ($l->{weight} || 0)
590             # Sort equal weights by name
591 7 0 50     86 or $l->{name} cmp $r->{name}}
      50        
592             # If this looks evil, maybe it is. $items is a
593             # hashref, and we're doing a hash slice on it
594 54         194 @{$items}{@{$best->{$char}}}) {
  54         138  
595             # warn "You are here";
596 60 100       167 if ($do_front_chop) {
597 14         105 $body .= $self->match_clause ({indent => 2 + length $indent,
598             checked_at => \$char}, $thisone);
599             } else {
600 46         268 $body .= $self->match_clause ({indent => 2 + length $indent,
601             checked_at => $offset}, $thisone);
602             }
603             }
604 54         168 $body .= $indent . " break;\n";
605             }
606 14         42 $body .= $indent . "}\n";
607 14         182 return $body;
608             }
609              
610             sub C_constant_return_type {
611 17     17 0 123 "static int";
612             }
613              
614             sub C_constant_prefix_param {
615 0     0 0 0 '';
616             }
617              
618             sub C_constant_prefix_param_defintion {
619 0     0 0 0 '';
620             }
621              
622             sub name_param_definition {
623 17     17 0 84 "const char *" . $_[0]->name_param;
624             }
625              
626             sub namelen_param {
627 14     14 0 81 'len';
628             }
629              
630             sub namelen_param_definition {
631 0     0 0 0 'size_t ' . $_[0]->namelen_param;
632             }
633              
634             sub C_constant_other_params {
635 0     0 0 0 '';
636             }
637              
638             sub C_constant_other_params_defintion {
639 0     0 0 0 '';
640             }
641              
642             =item params WHAT
643              
644             An "internal" method, subject to change, currently called to allow an
645             overriding class to cache information that will then be passed into all
646             the C<*param*> calls. (Yes, having to read the source to make sense of this is
647             considered a known bug). I is be a hashref of types the constant
648             function will return. In ExtUtils::Constant::XS this method is used to
649             returns a hashref keyed IV NV PV SV to show which combination of pointers will
650             be needed in the C argument list generated by
651             C_constant_other_params_definition and C_constant_other_params
652              
653             =cut
654              
655             sub params {
656 0     0 1 0 '';
657             }
658              
659              
660             =item dogfood arg_hashref, ITEM...
661              
662             An internal function to generate the embedded perl code that will regenerate
663             the constant subroutines. Parameters are the same as for C_constant.
664              
665             Currently the base class does nothing and returns an empty string.
666              
667             =cut
668              
669             sub dogfood {
670 0     0 1 0 ''
671             }
672              
673             =item normalise_items args, default_type, seen_types, seen_items, ITEM...
674              
675             Convert the items to a normalised form. For 8 bit and Unicode values converts
676             the item to an array of 1 or 2 items, both 8 bit and UTF-8 encoded.
677              
678             =cut
679              
680             sub normalise_items
681             {
682 8     8 1 29 my $self = shift;
683 8         21 my $args = shift;
684 8         20 my $default_type = shift;
685 8         18 my $what = shift;
686 8         20 my $items = shift;
687 8         21 my @new_items;
688 8         38 foreach my $orig (@_) {
689 91         168 my ($name, $item);
690 91 100       204 if (ref $orig) {
691             # Make a copy which is a normalised version of the ref passed in.
692 37         88 $name = $orig->{name};
693 37         109 my ($type, $macro, $value) = @$orig{qw (type macro value)};
694 37   66     101 $type ||= $default_type;
695 37         79 $what->{$type} = 1;
696 37         118 $item = {name=>$name, type=>$type};
697              
698 37 50 66     150 undef $macro if defined $macro and $macro eq $name;
699 37 100       108 $item->{macro} = $macro if defined $macro;
700 37 50 66     230 undef $value if defined $value and $value eq $name;
701 37 100       187 $item->{value} = $value if defined $value;
702 37         82 foreach my $key (qw(default pre post def_pre def_post weight
703             not_constant)) {
704 259         436 my $value = $orig->{$key};
705 259 100       696 $item->{$key} = $value if defined $value;
706             # warn "$key $value";
707             }
708             } else {
709 54         107 $name = $orig;
710 54         222 $item = {name=>$name, type=>$default_type};
711 54         118 $what->{$default_type} = 1;
712             }
713             warn +(ref ($self) || $self)
714             . "doesn't know how to handle values of type $_ used in macro $name"
715 91 50 0     350 unless $self->valid_type ($item->{type});
716             # tr///c is broken on 5.6.1 for utf8, so my original tr/\0-\177//c
717             # doesn't work. Upgrade to 5.8
718             # if ($name !~ tr/\0-\177//c || $] < 5.005_50) {
719 91 100 66     426 if ($name =~ tr/\0-\177// == length $name || $] < 5.005_50
      100        
720             || $args->{disable_utf8_duplication}) {
721             # No characters outside 7 bit ASCII.
722 85 50       215 if (exists $items->{$name}) {
723 0         0 die "Multiple definitions for macro $name";
724             }
725 85         245 $items->{$name} = $item;
726             } else {
727             # No characters outside 8 bit. This is hardest.
728 6 50 66     44 if (exists $items->{$name} and ref $items->{$name} ne 'ARRAY') {
729 0         0 confess "Unexpected ASCII definition for macro $name";
730             }
731             # Again, 5.6.1 tr broken, so s/5\.6.*/5\.8\.0/;
732             # if ($name !~ tr/\0-\377//c) {
733 6 100       30 if ($name =~ tr/\0-\377// == length $name) {
734             # if ($] < 5.007) {
735             # $name = pack "C*", unpack "U*", $name;
736             # }
737 5         24 $item->{utf8} = 'no';
738 5         24 $items->{$name}[1] = $item;
739 5         13 push @new_items, $item;
740             # Copy item, to create the utf8 variant.
741 5         46 $item = {%$item};
742             }
743             # Encode the name as utf8 bytes.
744 6         19 unless (is_perl56) {
745 6         27 utf8::encode($name);
746             } else {
747             # warn "Was >$name< " . length ${name};
748             $name = pack 'C*', unpack 'C*', $name . pack 'U*';
749             # warn "Now '${name}' " . length ${name};
750             }
751 6 50       37 if ($items->{$name}[0]) {
752 0         0 die "Multiple definitions for macro $name";
753             }
754 6         23 $item->{utf8} = 'yes';
755 6         18 $item->{name} = $name;
756 6         21 $items->{$name}[0] = $item;
757             # We have need for the utf8 flag.
758 6         19 $what->{''} = 1;
759             }
760 91         225 push @new_items, $item;
761             }
762 8         54 @new_items;
763             }
764              
765             =item C_constant arg_hashref, ITEM...
766              
767             A function that returns a B of C subroutine definitions that return
768             the value and type of constants when passed the name by the XS wrapper.
769             I gives a list of constant names. Each can either be a string,
770             which is taken as a C macro name, or a reference to a hash with the following
771             keys
772              
773             =over 8
774              
775             =item name
776              
777             The name of the constant, as seen by the perl code.
778              
779             =item type
780              
781             The type of the constant (I, I etc)
782              
783             =item value
784              
785             A C expression for the value of the constant, or a list of C expressions if
786             the type is aggregate. This defaults to the I if not given.
787              
788             =item macro
789              
790             The C pre-processor macro to use in the C<#ifdef>. This defaults to the
791             I, and is mainly used if I is an C. If a reference an
792             array is passed then the first element is used in place of the C<#ifdef>
793             line, and the second element in place of the C<#endif>. This allows
794             pre-processor constructions such as
795              
796             #if defined (foo)
797             #if !defined (bar)
798             ...
799             #endif
800             #endif
801              
802             to be used to determine if a constant is to be defined.
803              
804             A "macro" 1 signals that the constant is always defined, so the C<#if>/C<#endif>
805             test is omitted.
806              
807             =item default
808              
809             Default value to use (instead of Cing with "your vendor has not
810             defined...") to return if the macro isn't defined. Specify a reference to
811             an array with type followed by value(s).
812              
813             =item pre
814              
815             C code to use before the assignment of the value of the constant. This allows
816             you to use temporary variables to extract a value from part of a C
817             and return this as I. This C code is places at the start of a block,
818             so you can declare variables in it.
819              
820             =item post
821              
822             C code to place between the assignment of value (to a temporary) and the
823             return from the function. This allows you to clear up anything in I
. 
824             Rarely needed.
825              
826             =item def_pre
827              
828             =item def_post
829              
830             Equivalents of I
 and I for the default value. 
831              
832             =item utf8
833              
834             Generated internally. Is zero or undefined if name is 7 bit ASCII,
835             "no" if the name is 8 bit (and so should only match if SvUTF8() is false),
836             "yes" if the name is utf8 encoded.
837              
838             The internals automatically clone any name with characters 128-255 but none
839             256+ (ie one that could be either in bytes or utf8) into a second entry
840             which is utf8 encoded.
841              
842             =item weight
843              
844             Optional sorting weight for names, to determine the order of
845             linear testing when multiple names fall in the same case of a switch clause.
846             Higher comes earlier, undefined defaults to zero.
847              
848             =back
849              
850             In the argument hashref, I is the name of the package, and is only
851             used in comments inside the generated C code. I defaults to
852             C if undefined.
853              
854             I is the type returned by Cs that don't specify their
855             type. It defaults to the value of C. I should be given
856             either as a comma separated list of types that the C subroutine I
857             will generate or as a reference to a hash. I will be added to
858             the list if not present, as will any types given in the list of Is. The
859             resultant list should be the same list of types that C is
860             given. [Otherwise C and C may differ in the number of
861             parameters to the constant function. I is currently unused and
862             ignored. In future it may be used to pass in information used to change the C
863             indentation style used.] The best way to maintain consistency is to pass in a
864             hash reference and let this function update it.
865              
866             I governs when child functions of I are generated. If there
867             are I or more Is with the same length of name, then the code
868             to switch between them is placed into a function named I_I, for
869             example C for names 5 characters long. The default I is
870             3. A single C is always inlined.
871              
872             =cut
873              
874             # The parameter now BREAKOUT was previously documented as:
875             #
876             # I if defined signals that all the Is of the Is are of
877             # this length, and that the constant name passed in by perl is checked and
878             # also of this length. It is used during recursion, and should be C
879             # unless the caller has checked all the lengths during code generation, and
880             # the generated subroutine is only to be called with a name of this length.
881             #
882             # As you can see it now performs this function during recursion by being a
883             # scalar reference.
884              
885             sub C_constant {
886 17     17 1 102 my ($self, $args, @items) = @_;
887             my ($package, $subname, $default_type, $what, $indent, $breakout) =
888 17         53 @{$args}{qw(package subname default_type types indent breakout)};
  17         78  
889 17   50     60 $package ||= 'Foo';
890 17   100     335 $subname ||= 'constant';
891             # I'm not using this. But a hashref could be used for full formatting without
892             # breaking this API
893             # $indent ||= 0;
894              
895 17         43 my ($namelen, $items);
896 17 100       64 if (ref $breakout) {
897             # We are called recursively. We trust @items to be normalised, $what to
898             # be a hashref, and pinch %$items from our parent to save recalculation.
899 10         30 ($namelen, $items) = @$breakout;
900             } else {
901 7         23 $items = {};
902 7         16 if (is_perl56) {
903             # Need proper Unicode preserving hash keys.
904             require ExtUtils::Constant::Aaargh56Hash;
905             tie %$items, 'ExtUtils::Constant::Aaargh56Hash';
906             }
907 7   50     57 $breakout ||= 3;
908 7   66     40 $default_type ||= $self->default_type();
909 7 100       34 if (!ref $what) {
910             # Convert line of the form IV,UV,NV to hash
911 1   50     19 $what = {map {$_ => 1} split /,\s*/, ($what || '')};
  0         0  
912             # Figure out what types we're dealing with, and assign all unknowns to the
913             # default type
914             }
915 7         59 @items = $self->normalise_items ({}, $default_type, $what, $items, @items);
916             # use Data::Dumper; print Dumper @items;
917             }
918 17         103 my $params = $self->params ($what);
919              
920             # Probably "static int"
921 17         41 my ($body, @subs);
922 17         90 $body = $self->C_constant_return_type($params) . "\n$subname ("
923             # Eg "pTHX_ "
924             . $self->C_constant_prefix_param_defintion($params)
925             # Probably "const char *name"
926             . $self->name_param_definition($params);
927             # Something like ", STRLEN len"
928 17 100       102 $body .= ", " . $self->namelen_param_definition($params)
929             unless defined $namelen;
930 17         77 $body .= $self->C_constant_other_params_defintion($params);
931 17         62 $body .= ") {\n";
932              
933 17 100       57 if (defined $namelen) {
934             # We are a child subroutine. Print the simple description
935 10         36 my $comment = 'When generated this function returned values for the list'
936             . ' of names given here. However, subsequent manual editing may have'
937             . ' added or removed some.';
938 10         87 $body .= $self->switch_clause ({indent=>2, comment=>$comment},
939             $namelen, $items, @items);
940             } else {
941             # We are the top level.
942 7         25 $body .= " /* Initially switch on the length of the name. */\n";
943 7         100 $body .= $self->dogfood ({package => $package, subname => $subname,
944             default_type => $default_type, what => $what,
945             indent => $indent, breakout => $breakout},
946             @items);
947 7         47 $body .= ' switch ('.$self->namelen_param().") {\n";
948             # Need to group names of the same length
949 7         22 my @by_length;
950 7         23 foreach (@items) {
951 71         135 push @{$by_length[length $_->{name}]}, $_;
  71         202  
952             }
953 7         49 foreach my $i (0 .. $#by_length) {
954 37 100       131 next unless $by_length[$i]; # None of this length
955 25         81 $body .= " case $i:\n";
956 25 100       54 if (@{$by_length[$i]} == 1) {
  25 100       89  
957 11         33 my $only_thing = $by_length[$i]->[0];
958 11 100       49 if ($only_thing->{utf8}) {
959 4 100       21 if ($only_thing->{utf8} eq 'yes') {
960             # With utf8 on flag item is passed in element 0
961 2         13 $body .= $self->match_clause (undef, [$only_thing]);
962             } else {
963             # With utf8 off flag item is passed in element 1
964 2         21 $body .= $self->match_clause (undef, [undef, $only_thing]);
965             }
966             } else {
967 7         35 $body .= $self->match_clause (undef, $only_thing);
968             }
969 14         63 } elsif (@{$by_length[$i]} < $breakout) {
970             $body .= $self->switch_clause ({indent=>4},
971 4         15 $i, $items, @{$by_length[$i]});
  4         16  
972             } else {
973             # Only use the minimal set of parameters actually needed by the types
974             # of the names of this length.
975 10         32 my $what = {};
976 10         29 foreach (@{$by_length[$i]}) {
  10         33  
977 52         141 $what->{$_->{type}} = 1;
978 52 100       168 $what->{''} = 1 if $_->{utf8};
979             }
980 10         53 $params = $self->params ($what);
981             push @subs, $self->C_constant ({package=>$package,
982             subname=>"${subname}_$i",
983             default_type => $default_type,
984             types => $what, indent => $indent,
985             breakout => [$i, $items]},
986 10         96 @{$by_length[$i]});
  10         60  
987 10         82 $body .= " return ${subname}_$i ("
988             # Eg "aTHX_ "
989             . $self->C_constant_prefix_param($params)
990             # Probably "name"
991             . $self->name_param($params);
992 10         43 $body .= $self->C_constant_other_params($params);
993 10         38 $body .= ");\n";
994             }
995 25         106 $body .= " break;\n";
996             }
997 7         38 $body .= " }\n";
998             }
999 17         81 my $notfound = $self->return_statement_for_notfound();
1000 17 50       79 $body .= " $notfound\n" if $notfound;
1001 17         39 $body .= "}\n";
1002 17         243 return (@subs, $body);
1003             }
1004              
1005             1;
1006             __END__