File Coverage

blib/lib/GCC/Builtins.pm
Criterion Covered Total %
statement 8 8 100.0
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 11 11 100.0


line stmt bran cond sub pod time code
1             package GCC::Builtins;
2              
3 40     40   4528412 use 5.006;
  40         160  
4 40     40   235 use strict;
  40         262  
  40         1231  
5 40     40   214 use warnings;
  40         106  
  40         13980  
6              
7             our $VERSION = '0.06';
8              
9             our @ISA = (qw/Exporter DynaLoader/);
10             our %EXPORT_TAGS = ( 'all' => [qw( bswap16 bswap32 bswap64 clrsb clrsbl clrsbll clz clzl clzll ctz ctzl ctzll ffs ffsl ffsll huge_val huge_valf huge_vall inf infd128 infd32 infd64 inff infl nan nanf nanl parity parityl parityll popcount popcountl popcountll powi powif powil )] );
11             our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
12              
13             require DynaLoader;
14             __PACKAGE__->bootstrap($VERSION);
15              
16             #########################################################################
17             #### WARNING: do not edit GCC/Buildins.pm or GCC/Buildins.xs
18             #### or typemap or t/6*.t files
19             #### they are auto-generated by me and all changes will be lost
20             #### ALSO, no need to say that the POD here is also auto-generated
21             #### and will be short-lived.
22             #### EDIT all these in file
23             #### sbin/build-gcc-builtins-package.pl
24             #### and then run sbin/build-gcc-builtins-package.pl
25             #### to update the auto-generated files
26             #########################################################################
27              
28             =pod
29              
30             =encoding UTF-8
31              
32             =head1 NAME
33              
34             GCC::Builtins - access GCC compiler builtin functions via XS
35              
36             =head1 VERSION
37              
38             Version 0.06
39              
40             =head1 SYNOPSIS
41              
42             This module provides Perl access to GCC C compiler
43             builtin functions.
44              
45             use GCC::Builtins qw/:all/;
46             # or use GCC::Builtins qw/ ... clz ... /;
47             my $leading_zeros = GCC::Builtins::clz(10);
48             # 28
49              
50             =head1 EXPORT
51              
52             =over 2
53              
54             =item * C
55              
56             =item * C
57              
58             =item * C
59              
60             =item * C
61              
62             =item * C
63              
64             =item * C
65              
66             =item * C
67              
68             =item * C
69              
70             =item * C
71              
72             =item * C
73              
74             =item * C
75              
76             =item * C
77              
78             =item * C
79              
80             =item * C
81              
82             =item * C
83              
84             =item * C
85              
86             =item * C
87              
88             =item * C
89              
90             =item * C
91              
92             =item * C<_Decimal128 infd128()>
93              
94             =item * C<_Decimal32 infd32()>
95              
96             =item * C<_Decimal64 infd64()>
97              
98             =item * C
99              
100             =item * C
101              
102             =item * C
103              
104             =item * C
105              
106             =item * C
107              
108             =item * C
109              
110             =item * C
111              
112             =item * C
113              
114             =item * C
115              
116             =item * C
117              
118             =item * C
119              
120             =item * C
121              
122             =item * C
123              
124             =item * C
125              
126              
127              
128             =back
129              
130             Export tag C<:all> imports B exportable functions, like:
131              
132             use GCC::Builtins qw/:all/;
133              
134              
135             =head1 SUBROUTINES
136              
137             =head2 C
138              
139             Returns x with the order of the bytes reversed; for example,
140             0xaabb becomes 0xbbaa. Byte here always means
141             exactly 8 bits.
142              
143              
144             =head2 C
145              
146             Similar to __builtin_bswap16, except the argument and return types
147             are 32-bit.
148              
149              
150             =head2 C
151              
152             Similar to __builtin_bswap32, except the argument and return types
153             are 64-bit.
154              
155              
156             =head2 C
157              
158             Returns the number of leading redundant sign bits in x, i.e. the
159             number of bits following the most significant bit that are identical
160             to it. There are no special cases for 0 or other values.
161              
162              
163             =head2 C
164              
165             Similar to __builtin_clrsb, except the argument type is
166             long.
167              
168              
169             =head2 C
170              
171             Similar to __builtin_clrsb, except the argument type is
172             long long.
173              
174              
175             =head2 C
176              
177             Returns the number of leading 0-bits in x, starting at the most
178             significant bit position. If x is 0, the result is undefined.
179              
180              
181             =head2 C
182              
183             Similar to __builtin_clz, except the argument type is
184             unsigned long.
185              
186              
187             =head2 C
188              
189             Similar to __builtin_clz, except the argument type is
190             unsigned long long.
191              
192              
193             =head2 C
194              
195             Returns the number of trailing 0-bits in x, starting at the least
196             significant bit position. If x is 0, the result is undefined.
197              
198              
199             =head2 C
200              
201             Similar to __builtin_ctz, except the argument type is
202             unsigned long.
203              
204              
205             =head2 C
206              
207             Similar to __builtin_ctz, except the argument type is
208             unsigned long long.
209              
210              
211             =head2 C
212              
213             Returns one plus the index of the least significant 1-bit of x, or
214             if x is zero, returns zero.
215              
216              
217             =head2 C
218              
219             Similar to __builtin_ffs, except the argument type is
220             long.
221              
222              
223             =head2 C
224              
225             Similar to __builtin_ffs, except the argument type is
226             long long.
227              
228              
229             =head2 C
230              
231             Returns a positive infinity, if supported by the floating-point format,
232             else DBL_MAX. This function is suitable for implementing the
233             ISO C macro HUGE_VAL.
234              
235              
236             =head2 C
237              
238             Similar to __builtin_huge_val, except the return type is float.
239              
240              
241             =head2 C
242              
243             Similar to __builtin_huge_val, except the return
244             type is long double.
245              
246              
247             =head2 C
248              
249             Similar to __builtin_huge_val, except a warning is generated
250             if the target floating-point format does not support infinities.
251              
252              
253             =head2 C<_Decimal128 infd128()>
254              
255             Similar to __builtin_inf, except the return type is _Decimal128.
256              
257              
258             =head2 C<_Decimal32 infd32()>
259              
260             Similar to __builtin_inf, except the return type is _Decimal32.
261              
262              
263             =head2 C<_Decimal64 infd64()>
264              
265             Similar to __builtin_inf, except the return type is _Decimal64.
266              
267              
268             =head2 C
269              
270             Similar to __builtin_inf, except the return type is float.
271             This function is suitable for implementing the ISO C99 macro INFINITY.
272              
273              
274             =head2 C
275              
276             Similar to __builtin_inf, except the return
277             type is long double.
278              
279              
280             =head2 C
281              
282             This is an implementation of the ISO C99 function nan.
283              
284              
285             =head2 C
286              
287             Similar to __builtin_nan, except the return type is float.
288              
289              
290             =head2 C
291              
292             Similar to __builtin_nan, except the return type is long double.
293              
294              
295             =head2 C
296              
297             Returns the parity of x, i.e. the number of 1-bits in x
298             modulo 2.
299              
300              
301             =head2 C
302              
303             Similar to __builtin_parity, except the argument type is
304             unsigned long.
305              
306              
307             =head2 C
308              
309             Similar to __builtin_parity, except the argument type is
310             unsigned long long.
311              
312              
313             =head2 C
314              
315             Returns the number of 1-bits in x.
316              
317              
318             =head2 C
319              
320             Similar to __builtin_popcount, except the argument type is
321             unsigned long.
322              
323              
324             =head2 C
325              
326             Similar to __builtin_popcount, except the argument type is
327             unsigned long long.
328              
329              
330             =head2 C
331              
332             Returns the first argument raised to the power of the second. Unlike the
333             pow function no guarantees about precision and rounding are made.
334              
335              
336             =head2 C
337              
338             Returns the first argument raised to the power of the second. Unlike the
339             pow function no guarantees about precision and rounding are made.
340              
341              
342             =head2 C
343              
344             Returns the first argument raised to the power of the second. Unlike the
345             pow function no guarantees about precision and rounding are made.
346              
347              
348              
349              
350             =head1 UPDATING THE LIST OF FUNCTIONS
351              
352             The list of functions was extracted from L
353             using the script C This script is
354             part of the distribution but it is not installed in the host system.
355             This file is HTML documenting these functions. I found it easier to parse
356             this file than to parse GCC header files, mainly because the latter
357             contain macros and typedef which I could not parse without the help of
358             the C pre-processor.
359              
360             And so the list of provided files may not be perfect. Certainly there are some functions
361             missing. Simply because some functions do not make sense when called from Perl.
362             For example C,
363             C etc. Some others are missing because they
364             have exotic data types for function arguments and/or return
365             which I did not know how to implement that in Perl. Others
366             have reported missing symbols, perhaps they
367             need a higher C standard (adjusted via the C in C).
368              
369             If you need another builtin function to be supported please raise
370             an L.
371             Please make sure you provide me with a way to include this function.
372             What C, how to C its return type and arguments. And
373             also provide a test script to test it (similar to those found in C directory).
374              
375             An easy way to experiment is to use C (provided by L
376             to fetch and unpack the distribution
377             and then open a shell at the distribution directory:
378              
379             cpanm --look GCC::Builtins
380              
381             and then
382              
383             sbin/build-gcc-builtins-package.sh
384             sbin/build-gcc-builtins-package.pl
385             perl Makefile.PL && make all && make test
386              
387             Note that C, C
388             and C are auto-generated by above scripts. Do not
389             edit them. Edit C
390             instead.
391              
392             =head1 ALTERNATIVES
393              
394             The L section below suggests that a 100% performance gain
395             awaits users who prefer to call L rather than implementing
396             them in pure Perl.
397              
398             However, you can still harvest those gains by coding critical sections in your Perl code
399             in assembly via L. Assembly can be run from within a C program with the
400             Gnu C Compiler (GCC) which offers the C functionality.
401              
402             I have outlined how in this L
403             in this L,
404             over at the L Monastery.
405              
406             Here is the relevant code:
407              
408             use Inline C;
409              
410             use strict;
411             use warnings;
412              
413             # Assembly code via Inline::C to return the
414             # 1. number of leading zeros of the input integer
415             # 2. a number with only bit set where the MSSB is located
416             #
417             # by bliako
418             # for https://perlmonks.org/?node_id=11158279
419             # 21/03/2024
420              
421             my $z = 17;
422             my $res = mssb($z);
423             print "Leading zeros for $z : ".$res->[0]."\n";
424             print "MSSB for $z : ".sprintf("%032b\n", $res->[1])."\n";
425             # result:
426             # Leading zeros for 17 : 27
427             # MSSB for 17 : 00000000000000000000000000010000
428              
429             __END__
430             __C__
431             #include
432              
433             AV * mssb(int input){
434             int num_leading_zeros;
435             int mssb;
436             asm volatile(
437             /* note: lzcnt inp, out
438             mov src, dst
439             add what, dst
440             # set bit of value in dst at zero-based bitposition:
441             btsl bitposition, dst (it modifies dst)
442             */
443             "lzcnt %[input], %[num_leading_zeros] \n\t\
444             mov $32, %%eax \n\t\
445             sub %[num_leading_zeros], %%eax \n\t\
446             sub $1, %%eax \n\t\
447             xor %[mssb], %[mssb] \n\t\
448             bts %%eax, %[mssb] \n\t\
449             "
450             /* outputs */
451             : [num_leading_zeros] "=r" (num_leading_zeros)
452             , [mssb] "=r" (mssb)
453             /* inputs */
454             : [input] "mr" (input)
455             /* clobbers: we are messing with these registers: */
456             : "eax"
457             );
458              
459             // return an arrayref of the two outputs
460             AV* ret = newAV();
461             sv_2mortal((SV*)ret);
462             av_push(ret, newSViv(num_leading_zeros));
463             av_push(ret, newSViv(mssb));
464              
465             return ret;
466             }
467              
468             You can also inline assembly in your Perl code with L
469              
470             Be advised that GCC builtins are also calling assembly code.
471             In fact the above assembly code is how GCC implements C.
472             So, inline assembly and L should yield, more-or-less,
473             the same performance gain.
474              
475             =head1 TESTING
476              
477             For each exported sub there is a corresponding auto-generated
478             test file. The test goes as far as loading the library and
479             calling the function from Perl.
480              
481             However, there may be errors in the expected results
482             because that was done without verifying with a C test program.
483              
484             =head1 BENCHMARKS
485              
486             Counting leading zeros (clz) will be used to
487             benchmark the GCC builtin C<__builtin_clz()>
488             and a pure Perl implementation as suggested
489             by Perl Monk L
490             in this L
491              
492             C operating on the binary representation of a number
493             counts the zeros starting from the most significant end until
494             it finds the first bit set (to 1). Which essentially gives the
495             zero-based index of the MSB set to 1.
496              
497             The benchmarks favour the GCC builtin C<__builtin_clz()>
498             which is about twice as fast as the pure Perl implementation.
499              
500             The benchmarks can be run with C
501             An easy way to let Perl fetch and unpack the distribution
502             for you is to use C to open a shell
503              
504             cpanm --look GCC::Builtins
505              
506             and then
507              
508             perl Makefile.PL && make all && make test && make benchmarks
509              
510             The following benchamrk results indicate that the use
511             of L (C in this case)
512             yields more than 100% performance gain
513             over equivalent pure perl code:
514              
515             Benchmark: timing 50000000 iterations of clz/xs, clz/pp-ugly...
516             clz/xs: 3.92331 wallclock secs ( 3.92 usr + 0.00 sys = 3.92 CPU) @ 12755102.04/s (n=50000000)
517             clz/pp-ugly: 8.24574 wallclock secs ( 8.23 usr + 0.00 sys = 8.23 CPU) @ 6075334.14/s (n=50000000)
518             Rate clz/pp-ugly clz/xs
519             clz/pp-ugly 6075334/s -- -52%
520             clz/xs 12755102/s 110% --
521             KEY:
522             clz/xs : calling GCC builtin clz() via XS from Perl
523             clz/pp-ugly : as suggested by coldr3ality (see https://perlmonks.org/?node_id=11158279)
524              
525             Benchmark: timing 50000000 iterations of clzl/xs, clzl/pp-ugly...
526             clzl/xs: 3.84597 wallclock secs ( 3.84 usr + 0.00 sys = 3.84 CPU) @ 13020833.33/s (n=50000000)
527             clzl/pp-ugly: 8.44006 wallclock secs ( 8.43 usr + 0.00 sys = 8.43 CPU) @ 5931198.10/s (n=50000000)
528             Rate clzl/pp-ugly clzl/xs
529             clzl/pp-ugly 5931198/s -- -54%
530             clzl/xs 13020833/s 120% --
531             KEY:
532             clzl/xs : calling GCC builtin clzl() via XS from Perl
533             clzl/pp-ugly : as suggested by coldr3ality (see https://perlmonks.org/?node_id=11158279)
534              
535             So, it pays to use this module if performance is an issue.
536              
537             =head1 CAVEATS
538              
539             If you observe weird return results or core-dumps it is very likely that
540             the fault is mine while compiling the C. The file in the distribution
541             C was compiled by me to translate C's data types into Perls.
542             And for some of this I am not sure what the right type is. For example,
543             is C's C equivalent to Perl's C? How about
544             C's C mapping to Perl's C and C
545             to C?
546              
547             Please L any corrections.
548              
549             Note that C, C
550             and C are auto-generated by above scripts. Do not
551             edit them. Edit C
552             instead.
553              
554             =head1 AUTHOR
555              
556             Andreas Hadjiprocopis, C<< >>
557              
558             =head1 BUGS
559              
560             Please report any bugs or feature requests to C, or through
561             the web interface at L. I will be notified, and then you'll
562             automatically be notified of progress on your bug as I make changes.
563              
564              
565             =head1 SUPPORT
566              
567             You can find documentation for this module with the perldoc command.
568              
569             perldoc GCC::Builtins
570              
571              
572             You can also look for information at:
573              
574             =over 4
575              
576             =item * RT: CPAN's request tracker (report bugs here)
577              
578             L
579              
580              
581             =item * Review this module at PerlMonks
582              
583             L
584              
585             =item * Search CPAN
586              
587             L
588              
589             =back
590              
591              
592             =head1 ACKNOWLEDGEMENTS
593              
594             =over 2
595              
596             =item * This module started by this discussion at PerlMonks:
597              
598             L
599              
600             =item * Hackers of Free Software.
601              
602             =item * GNU and the Free Software Foundation, providers of GNU Compiler Collection.
603              
604             =back
605              
606             =head1 HUGS
607              
608             !Almaz!
609              
610              
611             =head1 LICENSE AND COPYRIGHT
612              
613             This software is Copyright (c) 2024 by Andreas Hadjiprocopis.
614              
615             This is free software, licensed under:
616              
617             The Artistic License 2.0 (GPL Compatible)
618              
619              
620             =cut
621              
622             1; # End of GCC::Builtins