File Coverage

blib/lib/ExtUtils/Constant/Base.pm
Criterion Covered Total %
statement 363 393 92.3
branch 143 182 78.5
condition 102 159 64.1
subroutine 22 36 61.1
pod 11 30 36.6
total 641 800 80.1


line stmt bran cond sub pod time code
1             package ExtUtils::Constant::Base;
2              
3 1     1   5 use strict;
  1         2  
  1         25  
4 1     1   4 use vars qw($VERSION);
  1         2  
  1         30  
5 1     1   4 use Carp;
  1         1  
  1         46  
6 1     1   235 use Text::Wrap;
  1         1890  
  1         48  
7 1     1   7 use ExtUtils::Constant::Utils qw(C_stringify perl_stringify);
  1         2  
  1         53  
8             $VERSION = '0.24_01';
9              
10 1   33 1   6 use constant is_perl56 => ($] < 5.007 && $] > 5.005_50);
  1         1  
  1         3464  
11              
12              
13             =head1 NAME
14              
15             ExtUtils::Constant::Base - base class for ExtUtils::Constant objects
16              
17             =head1 SYNOPSIS
18              
19             require ExtUtils::Constant::Base;
20             @ISA = 'ExtUtils::Constant::Base';
21              
22             =head1 DESCRIPTION
23              
24             ExtUtils::Constant::Base provides a base implementation of methods to
25             generate C code to give fast constant value lookup by named string. Currently
26             it's mostly used ExtUtils::Constant::XS, which generates the lookup code
27             for the constant() subroutine found in many XS modules.
28              
29             =head1 USAGE
30              
31             ExtUtils::Constant::Base exports no subroutines. The following methods are
32             available
33              
34             =over 4
35              
36             =cut
37              
38             sub valid_type {
39             # Default to assuming that you don't need different types of return data.
40 0     0 0 0 1;
41             }
42             sub default_type {
43 0     0 1 0 '';
44             }
45              
46             =item header
47              
48             A method returning a scalar containing definitions needed, typically for a
49             C header file.
50              
51             =cut
52              
53             sub header {
54 0     0 1 0 ''
55             }
56              
57             # This might actually be a return statement. Note that you are responsible
58             # for any space you might need before your value, as it lets to perform
59             # "tricks" such as "return KEY_" and have strings appended.
60             sub assignment_clause_for_type;
61             # In which case this might be an empty string
62 0     0 0 0 sub return_statement_for_type {undef};
63             sub return_statement_for_notdef;
64             sub return_statement_for_notfound;
65              
66             # "#if 1" is true to a C pre-processor
67             sub macro_from_name {
68 0     0 0 0 1;
69             }
70              
71             sub macro_from_item {
72 0     0 0 0 1;
73             }
74              
75             sub macro_to_ifdef {
76 563     563 0 847 my ($self, $macro) = @_;
77 563 100       994 if (ref $macro) {
78 22         70 return $macro->[0];
79             }
80 541 100 33     2128 if (defined $macro && $macro ne "" && $macro ne "1") {
      66        
81 469 50       1597 return $macro ? "#ifdef $macro\n" : "#if 0\n";
82             }
83 72         172 return "";
84             }
85              
86             sub macro_to_ifndef {
87 147     147 0 226 my ($self, $macro) = @_;
88 147 100       272 if (ref $macro) {
89             # Can't invert these stylishly, so "bodge it"
90 7         25 return "$macro->[0]#else\n";
91             }
92 140 50 33     500 if (defined $macro && $macro ne "" && $macro ne "1") {
      33        
93 140 50       452 return $macro ? "#ifndef $macro\n" : "#if 1\n";
94             }
95 0         0 croak "Can't generate an ifndef for unconditional code";
96             }
97              
98             sub macro_to_endif {
99 395     395 0 705 my ($self, $macro) = @_;
100              
101 395 100       693 if (ref $macro) {
102 15         38 return $macro->[1];
103             }
104 380 100 33     1332 if (defined $macro && $macro ne "" && $macro ne "1") {
      66        
105 336         774 return "#endif\n";
106             }
107 44         88 return "";
108             }
109              
110             sub name_param {
111 107     107 0 240 'name';
112             }
113              
114             # This is possibly buggy, in that it's not mandatory (below, in the main
115             # C_constant parameters, but is expected to exist here, if it's needed)
116             # Buggy because if you're definitely pure 8 bit only, and will never be
117             # presented with your constants in utf8, the default form of C_constant can't
118             # be told not to do the utf8 version.
119              
120             sub is_utf8_param {
121 11     11 0 25 'utf8';
122             }
123              
124             sub memEQ {
125 0     0 0 0 "!memcmp";
126             }
127              
128             =item memEQ_clause args_hashref
129              
130             A method to return a suitable C C statement to check whether I
131             is equal to the C variable C. If I is defined, then it
132             is used to avoid C for short names, or to generate a comment to
133             highlight the position of the character in the C statement.
134              
135             If i is a reference to a scalar, then instead it gives
136             the characters pre-checked at the beginning, (and the number of chars by
137             which the C variable name has been advanced. These need to be chopped from
138             the front of I).
139              
140             =cut
141              
142             sub memEQ_clause {
143             # if (memEQ(name, "thingy", 6)) {
144             # Which could actually be a character comparison or even ""
145 71     71 1 142 my ($self, $args) = @_;
146 71         94 my ($name, $checked_at, $indent) = @{$args}{qw(name checked_at indent)};
  71         144  
147 71   50     178 $indent = ' ' x ($indent || 4);
148 71         113 my $front_chop;
149 71 100       151 if (ref $checked_at) {
150             # regexp won't work on 5.6.1 without use utf8; in turn that won't work
151             # on 5.005_03.
152 14         27 substr ($name, 0, length $$checked_at,) = '';
153 14         27 $front_chop = C_stringify ($$checked_at);
154 14         27 undef $checked_at;
155             }
156 71         109 my $len = length $name;
157              
158 71 100       136 if ($len < 2) {
159 8 100 66     54 return $indent . "{\n"
      66        
160             if (defined $checked_at and $checked_at == 0) or $len == 0;
161             # We didn't switch, drop through to the code for the 2 character string
162 3         5 $checked_at = 1;
163             }
164              
165 66         136 my $name_param = $self->name_param;
166              
167 66 100 100     198 if ($len < 3 and defined $checked_at) {
168 23         35 my $check;
169 23 50       41 if ($checked_at == 1) {
    0          
170 23         31 $check = 0;
171             } elsif ($checked_at == 0) {
172 0         0 $check = 1;
173             }
174 23 50       38 if (defined $check) {
175 23         73 my $char = C_stringify (substr $name, $check, 1);
176             # Placate 5.005 with a break in the string. I can't see a good way of
177             # getting it to not take [ as introducing an array lookup, even with
178             # ${name_param}[$check]
179 23         103 return $indent . "if ($name_param" . "[$check] == '$char') {\n";
180             }
181             }
182 43 100 66     219 if (($len == 2 and !defined $checked_at)
      100        
      100        
      100        
183             or ($len == 3 and defined ($checked_at) and $checked_at == 2)) {
184 20         103 my $char1 = C_stringify (substr $name, 0, 1);
185 20         67 my $char2 = C_stringify (substr $name, 1, 1);
186 20         92 return $indent .
187             "if ($name_param" . "[0] == '$char1' && $name_param" . "[1] == '$char2') {\n";
188             }
189 23 100 100     72 if (($len == 3 and defined ($checked_at) and $checked_at == 1)) {
      66        
190 5         19 my $char1 = C_stringify (substr $name, 0, 1);
191 5         18 my $char2 = C_stringify (substr $name, 2, 1);
192 5         31 return $indent .
193             "if ($name_param" . "[0] == '$char1' && $name_param" . "[2] == '$char2') {\n";
194             }
195              
196 18         31 my $pointer = '^';
197 18   100     53 my $have_checked_last = defined ($checked_at) && $len == $checked_at + 1;
198 18 100       33 if ($have_checked_last) {
199             # Checked at the last character, so no need to memEQ it.
200 3         23 $pointer = C_stringify (chop $name);
201 3         12 $len--;
202             }
203              
204 18         40 $name = C_stringify ($name);
205 18         49 my $memEQ = $self->memEQ();
206 18         59 my $body = $indent . "if ($memEQ($name_param, \"$name\", $len)) {\n";
207             # Put a little ^ under the letter we checked at
208             # Screws up for non printable and non-7 bit stuff, but that's too hard to
209             # get right.
210 18 100       43 if (defined $checked_at) {
    100          
211 10         45 $body .= $indent . "/* " . (' ' x length $memEQ)
212             . (' ' x length $name_param)
213             . (' ' x $checked_at) . $pointer
214             . (' ' x ($len - $checked_at + length $len)) . " */\n";
215             } elsif (defined $front_chop) {
216 2         7 $body .= $indent . "/* $front_chop"
217             . (' ' x ($len + 1 + length $len)) . " */\n";
218             }
219 18         43 return $body;
220             }
221              
222             =item dump_names arg_hashref, ITEM...
223              
224             An internal function to generate the embedded perl code that will regenerate
225             the constant subroutines. I, I and Is are the
226             same as for C_constant. I is treated as number of spaces to indent
227             by. If C is true a C<$types> is always declared in the perl
228             code generated, if defined and false never declared, and if undefined C<$types>
229             is only declared if the values in I as passed in cannot be inferred from
230             I and the Is.
231              
232             =cut
233              
234             sub dump_names {
235 7     7 1 34 my ($self, $args, @items) = @_;
236             my ($default_type, $what, $indent, $declare_types)
237 7         25 = @{$args}{qw(default_type what indent declare_types)};
  7         23  
238 7   50     45 $indent = ' ' x ($indent || 0);
239              
240 7         17 my $result;
241 7         17 my (@simple, @complex, %used_types);
242 7         31 foreach (@items) {
243 71         93 my $type;
244 71 50       119 if (ref $_) {
245 71   33     146 $type = $_->{type} || $default_type;
246 71 100       127 if ($_->{utf8}) {
247             # For simplicity always skip the bytes case, and reconstitute this entry
248             # from its utf8 twin.
249 11 100       27 next if $_->{utf8} eq 'no';
250             # Copy the hashref, as we don't want to mess with the caller's hashref.
251 6         45 $_ = {%$_};
252 6         12 unless (is_perl56) {
253 6         16 utf8::decode ($_->{name});
254             } else {
255             $_->{name} = pack 'U*', unpack 'U0U*', $_->{name};
256             }
257 6         10 delete $_->{utf8};
258             }
259             } else {
260 0         0 $_ = {name=>$_};
261 0         0 $type = $default_type;
262             }
263 66         104 $used_types{$type}++;
264 66 100 66     655 if ($type eq $default_type
      100        
      66        
      66        
      66        
      66        
      66        
      33        
      33        
      33        
265             # grr 5.6.1
266             and length $_->{name}
267             and length $_->{name} == ($_->{name} =~ tr/A-Za-z0-9_//)
268             and !defined ($_->{macro}) and !defined ($_->{value})
269             and !defined ($_->{default}) and !defined ($_->{pre})
270             and !defined ($_->{post}) and !defined ($_->{def_pre})
271             and !defined ($_->{def_post}) and !defined ($_->{weight})) {
272             # It's the default type, and the name consists only of A-Za-z0-9_
273 43         112 push @simple, $_->{name};
274             } else {
275 23         49 push @complex, $_;
276             }
277             }
278              
279 7 50       26 if (!defined $declare_types) {
280             # Do they pass in any types we weren't already using?
281 0         0 foreach (keys %$what) {
282 0 0       0 next if $used_types{$_};
283 0         0 $declare_types++; # Found one in $what that wasn't used.
284 0         0 last; # And one is enough to terminate this loop
285             }
286             }
287 7 50       16 if ($declare_types) {
288 7         44 $result = $indent . 'my $types = {map {($_, 1)} qw('
289             . join (" ", sort keys %$what) . ")};\n";
290             }
291 7         41 local $Text::Wrap::huge = 'overflow';
292 7         26 local $Text::Wrap::columns = 80;
293 7         89 $result .= wrap ($indent . "my \@names = (qw(",
294             $indent . " ", join (" ", sort @simple) . ")");
295 7 100       2012 if (@complex) {
296 3         20 foreach my $item (sort {$a->{name} cmp $b->{name}} @complex) {
  54         86  
297 23         60 my $name = perl_stringify $item->{name};
298 23         47 my $line = ",\n$indent {name=>\"$name\"";
299 23 50       53 $line .= ", type=>\"$item->{type}\"" if defined $item->{type};
300 23         40 foreach my $thing (qw (macro value default pre post def_pre def_post)) {
301 161         215 my $value = $item->{$thing};
302 161 100       269 if (defined $value) {
303 27 100       71 if (ref $value) {
304             $line .= ", $thing=>[\""
305 3         11 . join ('", "', map {perl_stringify $_} @$value) . '"]';
  6         12  
306             } else {
307 24         53 $line .= ", $thing=>\"" . perl_stringify($value) . "\"";
308             }
309             }
310             }
311 23         33 $line .= "}";
312             # Ensure that the enclosing C comment doesn't end
313             # by turning */ into *" . "/
314 23         53 $line =~ s!\*\/!\*" . "/!gs;
315             # gcc -Wall doesn't like finding /* inside a comment
316 23         47 $line =~ s!\/\*!/" . "\*!gs;
317 23         43 $result .= $line;
318             }
319             }
320 7         21 $result .= ");\n";
321              
322 7         43 $result;
323             }
324              
325             =item assign arg_hashref, VALUE...
326              
327             A method to return a suitable assignment clause. If I is aggregate
328             (eg I expects both pointer and length) then there should be multiple
329             Is for the components. I
 and I if defined give snippets 
330             of C code to proceed and follow the assignment. I
 will be at the start 
331             of a block, so variables may be defined in it.
332              
333             =cut
334              
335             # Hmm. value undef to do NOTDEF? value () to do NOTFOUND?
336             sub assign {
337 74     74 1 127 my $self = shift;
338 74         94 my $args = shift;
339             my ($indent, $type, $pre, $post, $item)
340 74         110 = @{$args}{qw(indent type pre post item)};
  74         182  
341 74   50     333 $post ||= '';
342 74         104 my $clause;
343             my $close;
344 74 100       143 if ($pre) {
345 1         4 chomp $pre;
346 1         4 $close = "$indent}\n";
347 1         12 $clause = $indent . "{\n";
348 1         4 $indent .= " ";
349 1         5 $clause .= "$indent$pre";
350 1 50       9 $clause .= ";" unless $pre =~ /;$/;
351 1         3 $clause .= "\n";
352             }
353 74 50       137 confess "undef \$type" unless defined $type;
354 74 50       163 confess "Can't generate code for type $type"
355             unless $self->valid_type($type);
356              
357 74         271 $clause .= join '', map {"$indent$_\n"}
  72         263  
358             $self->assignment_clause_for_type({type=>$type,item=>$item}, @_);
359 74         170 chomp $post;
360 74 50       158 if (length $post) {
361 0         0 $clause .= "$post";
362 0 0       0 $clause .= ";" unless $post =~ /;$/;
363 0         0 $clause .= "\n";
364             }
365 74         172 my $return = $self->return_statement_for_type($type);
366 74 50       246 $clause .= "$indent$return\n" if defined $return;
367 74 100       137 $clause .= $close if $close;
368 74         153 return $clause;
369             }
370              
371             =item return_clause arg_hashref, ITEM
372              
373             A method to return a suitable C<#ifdef> clause. I is a hashref
374             (as passed to C and C. I is the number
375             of spaces to indent, defaulting to 6.
376              
377             =cut
378              
379             sub return_clause {
380              
381             ##ifdef thingy
382             # *iv_return = thingy;
383             # return PERL_constant_ISIV;
384             ##else
385             # return PERL_constant_NOTDEF;
386             ##endif
387 73     73 1 152 my ($self, $args, $item) = @_;
388 73         115 my $indent = $args->{indent};
389              
390             my ($name, $value, $default, $pre, $post, $def_pre, $def_post, $type)
391 73         206 = @$item{qw (name value default pre post def_pre def_post type)};
392 73 100       160 $value = $name unless defined $value;
393 73         202 my $macro = $self->macro_from_item($item);
394 73   50     180 $indent = ' ' x ($indent || 6);
395 73 50       124 unless (defined $type) {
396             # use Data::Dumper; print STDERR Dumper ($item);
397 0         0 confess "undef \$type";
398             }
399              
400             ##ifdef thingy
401 73         157 my $clause = $self->macro_to_ifdef($macro);
402              
403             # *iv_return = thingy;
404             # return PERL_constant_ISIV;
405 73 100       328 $clause
406             .= $self->assign ({indent=>$indent, type=>$type, pre=>$pre, post=>$post,
407             item=>$item}, ref $value ? @$value : $value);
408              
409 73 100 33     360 if (defined $macro && $macro ne "" && $macro ne "1") {
      66        
410             ##else
411 57         111 $clause .= "#else\n";
412              
413             # return PERL_constant_NOTDEF;
414 57 100       109 if (!defined $default) {
415 56         120 my $notdef = $self->return_statement_for_notdef();
416 56 50       162 $clause .= "$indent$notdef\n" if defined $notdef;
417             } else {
418 1 50       6 my @default = ref $default ? @$default : $default;
419 1         2 $type = shift @default;
420 1         4 $clause .= $self->assign ({indent=>$indent, type=>$type, pre=>$pre,
421             post=>$post, item=>$item}, @default);
422             }
423             }
424             ##endif
425 73         191 $clause .= $self->macro_to_endif($macro);
426              
427 73         167 return $clause;
428             }
429              
430             sub match_clause {
431             # $offset defined if we have checked an offset.
432 71     71 0 156 my ($self, $args, $item) = @_;
433 71         95 my ($offset, $indent) = @{$args}{qw(checked_at indent)};
  71         147  
434 71   100     222 $indent = ' ' x ($indent || 4);
435 71         107 my $body = '';
436 71         102 my ($no, $yes, $either, $name, $inner_indent);
437 71 100       143 if (ref $item eq 'ARRAY') {
438 11         19 ($yes, $no) = @$item;
439 11   66     35 $either = $yes || $no;
440 11 50       24 confess "$item is $either expecting hashref in [0] || [1]"
441             unless ref $either eq 'HASH';
442 11         19 $name = $either->{name};
443             } else {
444             confess "$item->{name} has utf8 flag '$item->{utf8}', should be false"
445 60 50       114 if $item->{utf8};
446 60         98 $name = $item->{name};
447 60         84 $inner_indent = $indent;
448             }
449              
450 71         235 $body .= $self->memEQ_clause ({name => $name, checked_at => $offset,
451             indent => length $indent});
452             # If we've been presented with an arrayref for $item, then the user string
453             # contains in the range 128-255, and we need to check whether it was utf8
454             # (or not).
455             # In the worst case we have two named constants, where one's name happens
456             # encoded in UTF8 happens to be the same byte sequence as the second's
457             # encoded in (say) ISO-8859-1.
458             # In this case, $yes and $no both have item hashrefs.
459 71 100       500 if ($yes) {
    100          
460 7         18 $body .= $indent . " if (" . $self->is_utf8_param . ") {\n";
461             } elsif ($no) {
462 4         21 $body .= $indent . " if (!" . $self->is_utf8_param . ") {\n";
463             }
464 71 100       121 if ($either) {
465 11         37 $body .= $self->return_clause ({indent=>4 + length $indent}, $either);
466 11 100 100     52 if ($yes and $no) {
467 2         10 $body .= $indent . " } else {\n";
468 2         13 $body .= $self->return_clause ({indent=>4 + length $indent}, $no);
469             }
470 11         21 $body .= $indent . " }\n";
471             } else {
472 60         184 $body .= $self->return_clause ({indent=>2 + length $indent}, $item);
473             }
474 71         326 $body .= $indent . "}\n";
475             }
476              
477              
478             =item switch_clause arg_hashref, NAMELEN, ITEMHASH, ITEM...
479              
480             An internal method to generate a suitable C clause, called by
481             C Is are in the hash ref format as given in the description
482             of C, and must all have the names of the same length, given by
483             I. I is a reference to a hash, keyed by name, values being
484             the hashrefs in the I list. (No parameters are modified, and there can
485             be keys in the I that are not in the list of Is without
486             causing problems - the hash is passed in to save generating it afresh for
487             each call).
488              
489             =cut
490              
491             sub switch_clause {
492 14     14 1 45 my ($self, $args, $namelen, $items, @items) = @_;
493 14         24 my ($indent, $comment) = @{$args}{qw(indent comment)};
  14         36  
494 14   50     53 $indent = ' ' x ($indent || 2);
495              
496 14         37 local $Text::Wrap::huge = 'overflow';
497 14         25 local $Text::Wrap::columns = 80;
498              
499 14         28 my @names = sort map {$_->{name}} @items;
  60         152  
500 14         38 my $leader = $indent . '/* ';
501 14         33 my $follower = ' ' x length $leader;
502 14         37 my $body = $indent . "/* Names all of length $namelen. */\n";
503 14 100       35 if (defined $comment) {
504 10         33 $body = wrap ($leader, $follower, $comment) . "\n";
505 10         3626 $leader = $follower;
506             }
507 14         50 my @safe_names = @names;
508 14         36 foreach (@safe_names) {
509 60 50       98 confess sprintf "Name '$_' is length %d, not $namelen", length
510             unless length == $namelen;
511             # Argh. 5.6.1
512             # next unless tr/A-Za-z0-9_//c;
513 60 100       136 next if tr/A-Za-z0-9_// == length;
514 11         27 $_ = '"' . perl_stringify ($_) . '"';
515             # Ensure that the enclosing C comment doesn't end
516             # by turning */ into *" . "/
517 11         27 s!\*\/!\*"."/!gs;
518             # gcc -Wall doesn't like finding /* inside a comment
519 11         26 s!\/\*!/"."\*!gs;
520             }
521 14         57 $body .= wrap ($leader, $follower, join (" ", @safe_names) . " */") . "\n";
522             # Figure out what to switch on.
523             # (RMS, Spread of jump table, Position, Hashref)
524 14         2125 my @best = (1e38, ~0);
525             # Prefer the last character over the others. (As it lets us shorten the
526             # memEQ clause at no cost).
527 14         52 foreach my $i ($namelen - 1, 0 .. ($namelen - 2)) {
528 49         86 my ($min, $max) = (~0, 0);
529 49         61 my %spread;
530 49         57 if (is_perl56) {
531             # Need proper Unicode preserving hash keys for bytes in range 128-255
532             # here too, for some reason. grr 5.6.1 yet again.
533             tie %spread, 'ExtUtils::Constant::Aaargh56Hash';
534             }
535 49         81 foreach (@names) {
536 176         273 my $char = substr $_, $i, 1;
537 176         222 my $ord = ord $char;
538 176 50       282 confess "char $ord is out of range" if $ord > 255;
539 176 100       284 $max = $ord if $ord > $max;
540 176 100       290 $min = $ord if $ord < $min;
541 176         211 push @{$spread{$char}}, $_;
  176         520  
542             # warn "$_ $char";
543             }
544             # I'm going to pick the character to split on that minimises the root
545             # mean square of the number of names in each case. Normally this should
546             # be the one with the most keys, but it may pick a 7 where the 8 has
547             # one long linear search. I'm not sure if RMS or just sum of squares is
548             # actually better.
549             # $max and $min are for the tie-breaker if the root mean squares match.
550             # Assuming that the compiler may be building a jump table for the
551             # switch() then try to minimise the size of that jump table.
552             # Finally use < not <= so that if it still ties the earliest part of
553             # the string wins. Because if that passes but the memEQ fails, it may
554             # only need the start of the string to bin the choice.
555             # I think. But I'm micro-optimising. :-)
556             # OK. Trump that. Now favour the last character of the string, before the
557             # rest.
558 49         70 my $ss;
559 49         152 $ss += @$_ * @$_ foreach values %spread;
560 49         97 my $rms = sqrt ($ss / keys %spread);
561 49 100 100     283 if ($rms < $best[0] || ($rms == $best[0] && ($max - $min) < $best[1])) {
      100        
562 22         79 @best = ($rms, $max - $min, $i, \%spread);
563             }
564             }
565 14 50       44 confess "Internal error. Failed to pick a switch point for @names"
566             unless defined $best[2];
567             # use Data::Dumper; print Dumper (@best);
568 14         38 my ($offset, $best) = @best[2,3];
569 14         40 $body .= $indent . "/* Offset $offset gives the best switch position. */\n";
570              
571 14   100     42 my $do_front_chop = $offset == 0 && $namelen > 2;
572 14 100       31 if ($do_front_chop) {
573 2         8 $body .= $indent . "switch (*" . $self->name_param() . "++) {\n";
574             } else {
575 12         46 $body .= $indent . "switch (" . $self->name_param() . "[$offset]) {\n";
576             }
577 14         65 foreach my $char (sort keys %$best) {
578 54 50       121 confess sprintf "'$char' is %d bytes long, not 1", length $char
579             if length ($char) != 1;
580 54 50       103 confess sprintf "char %#X is out of range", ord $char if ord ($char) > 255;
581 54         156 $body .= $indent . "case '" . C_stringify ($char) . "':\n";
582 54         92 foreach my $thisone (sort {
583             # Deal with the case of an item actually being an array ref to 1 or 2
584             # hashrefs. Don't assign to $a or $b, as they're aliases to the
585             # original
586 7 100 33     24 my $l = ref $a eq 'ARRAY' ? ($a->[0] || $->[1]) : $a;
587 7 100 33     17 my $r = ref $b eq 'ARRAY' ? ($b->[0] || $->[1]) : $b;
588             # Sort by weight first
589             ($r->{weight} || 0) <=> ($l->{weight} || 0)
590             # Sort equal weights by name
591 7 0 50     57 or $l->{name} cmp $r->{name}}
      50        
592             # If this looks evil, maybe it is. $items is a
593             # hashref, and we're doing a hash slice on it
594 54         142 @{$items}{@{$best->{$char}}}) {
  54         96  
595             # warn "You are here";
596 60 100       107 if ($do_front_chop) {
597 14         54 $body .= $self->match_clause ({indent => 2 + length $indent,
598             checked_at => \$char}, $thisone);
599             } else {
600 46         195 $body .= $self->match_clause ({indent => 2 + length $indent,
601             checked_at => $offset}, $thisone);
602             }
603             }
604 54         111 $body .= $indent . " break;\n";
605             }
606 14         42 $body .= $indent . "}\n";
607 14         136 return $body;
608             }
609              
610             sub C_constant_return_type {
611 17     17 0 102 "static int";
612             }
613              
614             sub C_constant_prefix_param {
615 0     0 0 0 '';
616             }
617              
618             sub C_constant_prefix_param_definition {
619 0     0 0 0 '';
620             }
621              
622             sub name_param_definition {
623 17     17 0 70 "const char *" . $_[0]->name_param;
624             }
625              
626             sub namelen_param {
627 14     14 0 50 'len';
628             }
629              
630             sub namelen_param_definition {
631 0     0 0 0 'size_t ' . $_[0]->namelen_param;
632             }
633              
634             sub C_constant_other_params {
635 0     0 0 0 '';
636             }
637              
638             sub C_constant_other_params_definition {
639 0     0 0 0 '';
640             }
641              
642             =item params WHAT
643              
644             An "internal" method, subject to change, currently called to allow an
645             overriding class to cache information that will then be passed into all
646             the C<*param*> calls. (Yes, having to read the source to make sense of this is
647             considered a known bug). I is be a hashref of types the constant
648             function will return. In ExtUtils::Constant::XS this method is used to
649             returns a hashref keyed IV NV PV SV to show which combination of pointers will
650             be needed in the C argument list generated by
651             C_constant_other_params_definition and C_constant_other_params
652              
653             =cut
654              
655             sub params {
656 0     0 1 0 '';
657             }
658              
659              
660             =item dogfood arg_hashref, ITEM...
661              
662             An internal function to generate the embedded perl code that will regenerate
663             the constant subroutines. Parameters are the same as for C_constant.
664              
665             Currently the base class does nothing and returns an empty string.
666              
667             =cut
668              
669             sub dogfood {
670 0     0 1 0 ''
671             }
672              
673             =item normalise_items args, default_type, seen_types, seen_items, ITEM...
674              
675             Convert the items to a normalised form. For 8 bit and Unicode values converts
676             the item to an array of 1 or 2 items, both 8 bit and UTF-8 encoded.
677              
678             =cut
679              
680             sub normalise_items
681             {
682 14     14 1 40 my $self = shift;
683 14         44 my $args = shift;
684 14         29 my $default_type = shift;
685 14         33 my $what = shift;
686 14         26 my $items = shift;
687 14         75 my @new_items;
688 14         46 foreach my $orig (@_) {
689 241         316 my ($name, $item);
690 241 100       390 if (ref $orig) {
691             # Make a copy which is a normalised version of the ref passed in.
692 127         232 $name = $orig->{name};
693 127         295 my ($type, $macro, $value) = @$orig{qw (type macro value)};
694 127   66     222 $type ||= $default_type;
695 127         297 $what->{$type} = 1;
696 127         319 $item = {name=>$name, type=>$type};
697              
698 127 50 66     362 undef $macro if defined $macro and $macro eq $name;
699 127 100       246 $item->{macro} = $macro if defined $macro;
700 127 50 66     348 undef $value if defined $value and $value eq $name;
701 127 100       229 $item->{value} = $value if defined $value;
702 127         190 foreach my $key (qw(default pre post def_pre def_post weight
703             not_constant)) {
704 889         1067 my $value = $orig->{$key};
705 889 100       1417 $item->{$key} = $value if defined $value;
706             # warn "$key $value";
707             }
708             } else {
709 114         209 $name = $orig;
710 114         365 $item = {name=>$name, type=>$default_type};
711 114         189 $what->{$default_type} = 1;
712             }
713             warn +(ref ($self) || $self)
714             . "doesn't know how to handle values of type $_ used in macro $name"
715 241 50 0     562 unless $self->valid_type ($item->{type});
716             # tr///c is broken on 5.6.1 for utf8, so my original tr/\0-\177//c
717             # doesn't work. Upgrade to 5.8
718             # if ($name !~ tr/\0-\177//c || $] < 5.005_50) {
719 241 100 66     725 if ($name =~ tr/\0-\177// == length $name || $] < 5.005_50
      100        
720             || $args->{disable_utf8_duplication}) {
721             # No characters outside 7 bit ASCII.
722 235 50       388 if (exists $items->{$name}) {
723 0         0 die "Multiple definitions for macro $name";
724             }
725 235         584 $items->{$name} = $item;
726             } else {
727             # No characters outside 8 bit. This is hardest.
728 6 50 66     29 if (exists $items->{$name} and ref $items->{$name} ne 'ARRAY') {
729 0         0 confess "Unexpected ASCII definition for macro $name";
730             }
731             # Again, 5.6.1 tr broken, so s/5\.6.*/5\.8\.0/;
732             # if ($name !~ tr/\0-\377//c) {
733 6 100       17 if ($name =~ tr/\0-\377// == length $name) {
734             # if ($] < 5.007) {
735             # $name = pack "C*", unpack "U*", $name;
736             # }
737 5         15 $item->{utf8} = 'no';
738 5         14 $items->{$name}[1] = $item;
739 5         10 push @new_items, $item;
740             # Copy item, to create the utf8 variant.
741 5         26 $item = {%$item};
742             }
743             # Encode the name as utf8 bytes.
744 6         13 unless (is_perl56) {
745 6         17 utf8::encode($name);
746             } else {
747             # warn "Was >$name< " . length ${name};
748             $name = pack 'C*', unpack 'C*', $name . pack 'U*';
749             # warn "Now '${name}' " . length ${name};
750             }
751 6 50       22 if ($items->{$name}[0]) {
752 0         0 die "Multiple definitions for macro $name";
753             }
754 6         41 $item->{utf8} = 'yes';
755 6         12 $item->{name} = $name;
756 6         10 $items->{$name}[0] = $item;
757             # We have need for the utf8 flag.
758 6         11 $what->{''} = 1;
759             }
760 241         430 push @new_items, $item;
761             }
762 14         95 @new_items;
763             }
764              
765             =item C_constant arg_hashref, ITEM...
766              
767             A function that returns a B of C subroutine definitions that return
768             the value and type of constants when passed the name by the XS wrapper.
769             I gives a list of constant names. Each can either be a string,
770             which is taken as a C macro name, or a reference to a hash with the following
771             keys
772              
773             =over 8
774              
775             =item name
776              
777             The name of the constant, as seen by the perl code.
778              
779             =item type
780              
781             The type of the constant (I, I etc)
782              
783             =item value
784              
785             A C expression for the value of the constant, or a list of C expressions if
786             the type is aggregate. This defaults to the I if not given.
787              
788             =item macro
789              
790             The C pre-processor macro to use in the C<#ifdef>. This defaults to the
791             I, and is mainly used if I is an C. If a reference an
792             array is passed then the first element is used in place of the C<#ifdef>
793             line, and the second element in place of the C<#endif>. This allows
794             pre-processor constructions such as
795              
796             #if defined (foo)
797             #if !defined (bar)
798             ...
799             #endif
800             #endif
801              
802             to be used to determine if a constant is to be defined.
803              
804             A "macro" 1 signals that the constant is always defined, so the C<#if>/C<#endif>
805             test is omitted.
806              
807             =item default
808              
809             Default value to use (instead of Cing with "your vendor has not
810             defined...") to return if the macro isn't defined. Specify a reference to
811             an array with type followed by value(s).
812              
813             =item pre
814              
815             C code to use before the assignment of the value of the constant. This allows
816             you to use temporary variables to extract a value from part of a C
817             and return this as I. This C code is places at the start of a block,
818             so you can declare variables in it.
819              
820             =item post
821              
822             C code to place between the assignment of value (to a temporary) and the
823             return from the function. This allows you to clear up anything in I
. 
824             Rarely needed.
825              
826             =item def_pre
827              
828             =item def_post
829              
830             Equivalents of I
 and I for the default value. 
831              
832             =item utf8
833              
834             Generated internally. Is zero or undefined if name is 7 bit ASCII,
835             "no" if the name is 8 bit (and so should only match if SvUTF8() is false),
836             "yes" if the name is utf8 encoded.
837              
838             The internals automatically clone any name with characters 128-255 but none
839             256+ (ie one that could be either in bytes or utf8) into a second entry
840             which is utf8 encoded.
841              
842             =item weight
843              
844             Optional sorting weight for names, to determine the order of
845             linear testing when multiple names fall in the same case of a switch clause.
846             Higher comes earlier, undefined defaults to zero.
847              
848             =back
849              
850             In the argument hashref, I is the name of the package, and is only
851             used in comments inside the generated C code. I defaults to
852             C if undefined.
853              
854             I is the type returned by Cs that don't specify their
855             type. It defaults to the value of C. I should be given
856             either as a comma separated list of types that the C subroutine I
857             will generate or as a reference to a hash. I will be added to
858             the list if not present, as will any types given in the list of Is. The
859             resultant list should be the same list of types that C is
860             given. [Otherwise C and C may differ in the number of
861             parameters to the constant function. I is currently unused and
862             ignored. In future it may be used to pass in information used to change the C
863             indentation style used.] The best way to maintain consistency is to pass in a
864             hash reference and let this function update it.
865              
866             I governs when child functions of I are generated. If there
867             are I or more Is with the same length of name, then the code
868             to switch between them is placed into a function named I_I, for
869             example C for names 5 characters long. The default I is
870             3. A single C is always inlined.
871              
872             =cut
873              
874             # The parameter now BREAKOUT was previously documented as:
875             #
876             # I if defined signals that all the Is of the Is are of
877             # this length, and that the constant name passed in by perl is checked and
878             # also of this length. It is used during recursion, and should be C
879             # unless the caller has checked all the lengths during code generation, and
880             # the generated subroutine is only to be called with a name of this length.
881             #
882             # As you can see it now performs this function during recursion by being a
883             # scalar reference.
884              
885             sub C_constant {
886 17     17 1 83 my ($self, $args, @items) = @_;
887             my ($package, $subname, $default_type, $what, $indent, $breakout) =
888 17         33 @{$args}{qw(package subname default_type types indent breakout)};
  17         60  
889 17   50     49 $package ||= 'Foo';
890 17   100     48 $subname ||= 'constant';
891             # I'm not using this. But a hashref could be used for full formatting without
892             # breaking this API
893             # $indent ||= 0;
894              
895 17         33 my ($namelen, $items);
896 17 100       51 if (ref $breakout) {
897             # We are called recursively. We trust @items to be normalised, $what to
898             # be a hashref, and pinch %$items from our parent to save recalculation.
899 10         29 ($namelen, $items) = @$breakout;
900             } else {
901 7         17 $items = {};
902 7         17 if (is_perl56) {
903             # Need proper Unicode preserving hash keys.
904             require ExtUtils::Constant::Aaargh56Hash;
905             tie %$items, 'ExtUtils::Constant::Aaargh56Hash';
906             }
907 7   50     53 $breakout ||= 3;
908 7   66     37 $default_type ||= $self->default_type();
909 7 100       26 if (!ref $what) {
910             # Convert line of the form IV,UV,NV to hash
911 1   50     12 $what = {map {$_ => 1} split /,\s*/, ($what || '')};
  0         0  
912             # Figure out what types we're dealing with, and assign all unknowns to the
913             # default type
914             }
915 7         50 @items = $self->normalise_items ({}, $default_type, $what, $items, @items);
916             # use Data::Dumper; print Dumper @items;
917             }
918 17         77 my $params = $self->params ($what);
919              
920             # Probably "static int"
921 17         45 my ($body, @subs);
922 17         77 $body = $self->C_constant_return_type($params) . "\n$subname ("
923             # Eg "pTHX_ "
924             . $self->C_constant_prefix_param_definition($params)
925             # Probably "const char *name"
926             . $self->name_param_definition($params);
927             # Something like ", STRLEN len"
928 17 100       74 $body .= ", " . $self->namelen_param_definition($params)
929             unless defined $namelen;
930 17         58 $body .= $self->C_constant_other_params_definition($params);
931 17         34 $body .= ") {\n";
932              
933 17 100       37 if (defined $namelen) {
934             # We are a child subroutine. Print the simple description
935 10         26 my $comment = 'When generated this function returned values for the list'
936             . ' of names given here. However, subsequent manual editing may have'
937             . ' added or removed some.';
938 10         70 $body .= $self->switch_clause ({indent=>2, comment=>$comment},
939             $namelen, $items, @items);
940             } else {
941             # We are the top level.
942 7         17 $body .= " /* Initially switch on the length of the name. */\n";
943 7         78 $body .= $self->dogfood ({package => $package, subname => $subname,
944             default_type => $default_type, what => $what,
945             indent => $indent, breakout => $breakout},
946             @items);
947 7         35 $body .= ' switch ('.$self->namelen_param().") {\n";
948             # Need to group names of the same length
949 7         13 my @by_length;
950 7         17 foreach (@items) {
951 71         92 push @{$by_length[length $_->{name}]}, $_;
  71         134  
952             }
953 7         44 foreach my $i (0 .. $#by_length) {
954 37 100       105 next unless $by_length[$i]; # None of this length
955 25         60 $body .= " case $i:\n";
956 25 100       41 if (@{$by_length[$i]} == 1) {
  25 100       73  
957 11         19 my $only_thing = $by_length[$i]->[0];
958 11 100       25 if ($only_thing->{utf8}) {
959 4 100       13 if ($only_thing->{utf8} eq 'yes') {
960             # With utf8 on flag item is passed in element 0
961 2         9 $body .= $self->match_clause (undef, [$only_thing]);
962             } else {
963             # With utf8 off flag item is passed in element 1
964 2         14 $body .= $self->match_clause (undef, [undef, $only_thing]);
965             }
966             } else {
967 7         23 $body .= $self->match_clause (undef, $only_thing);
968             }
969 14         36 } elsif (@{$by_length[$i]} < $breakout) {
970             $body .= $self->switch_clause ({indent=>4},
971 4         12 $i, $items, @{$by_length[$i]});
  4         14  
972             } else {
973             # Only use the minimal set of parameters actually needed by the types
974             # of the names of this length.
975 10         21 my $what = {};
976 10         21 foreach (@{$by_length[$i]}) {
  10         25  
977 52         91 $what->{$_->{type}} = 1;
978 52 100       96 $what->{''} = 1 if $_->{utf8};
979             }
980 10         32 $params = $self->params ($what);
981             push @subs, $self->C_constant ({package=>$package,
982             subname=>"${subname}_$i",
983             default_type => $default_type,
984             types => $what, indent => $indent,
985             breakout => [$i, $items]},
986 10         75 @{$by_length[$i]});
  10         40  
987 10         58 $body .= " return ${subname}_$i ("
988             # Eg "aTHX_ "
989             . $self->C_constant_prefix_param($params)
990             # Probably "name"
991             . $self->name_param($params);
992 10         33 $body .= $self->C_constant_other_params($params);
993 10         30 $body .= ");\n";
994             }
995 25         66 $body .= " break;\n";
996             }
997 7         21 $body .= " }\n";
998             }
999 17         62 my $notfound = $self->return_statement_for_notfound();
1000 17 50       58 $body .= " $notfound\n" if $notfound;
1001 17         30 $body .= "}\n";
1002 17         138 return (@subs, $body);
1003             }
1004              
1005             1;
1006             __END__