File Coverage

blib/lib/Scalar/Type.pm
Criterion Covered Total %
statement 48 48 100.0
branch 29 34 85.2
condition 5 6 83.3
subroutine 17 17 100.0
pod 6 6 100.0
total 105 111 94.5


line stmt bran cond sub pod time code
1             package Scalar::Type;
2              
3 5     5   1212250 use strict;
  5         12  
  5         164  
4 5     5   38 use warnings;
  5         16  
  5         601  
5              
6             our $IS_BOOL_FN;
7              
8             # these shenanigans can be pared back a bit once 5.36 is out
9             BEGIN {
10 5 0   5   328 $IS_BOOL_FN = $] >= 5.035010 ? 'builtin::is_bool' :
    50          
11             $] >= 5.035007 ? 'builtin::isbool' :
12             0
13             }
14 197     197 1 187443 sub bool_supported { $IS_BOOL_FN; }
15              
16             # 5.35.7 has this without the underscore, and it's not yet marked experimental
17 5     5   35 use if bool_supported() eq 'builtin::isbool', qw(builtin);
  5         18  
  5         12  
18             # 5.35.10-to-be has the underscore, and it's experimental
19 5     5   23 use if bool_supported() eq 'builtin::is_bool', qw(experimental builtin);
  5         8  
  5         12  
20              
21 5     5   16777 use Carp qw(croak);
  5         9  
  5         291  
22 5     5   23 use Config;
  5         7  
  5         428  
23              
24             our $VERSION = '1.0.1';
25              
26             require XSLoader;
27             XSLoader::load(__PACKAGE__, $VERSION);
28              
29 5     5   26 use Scalar::Util qw(blessed);
  5         6  
  5         226  
30              
31 5     5   25 use base qw(Exporter);
  5         10  
  5         1616  
32              
33             =head1 NAME
34              
35             Scalar::Type - figure out what type a scalar is
36              
37             =head1 SYNOPSIS
38              
39             use Scalar::Type qw(is_number);
40              
41             if(is_number(2)) {
42             # yep, 2 is a number
43             # it is_integer too
44             }
45              
46             if(is_number("2")) {
47             # no, "2" is a string
48             }
49              
50             =head1 OVERVIEW
51              
52             Perl scalars can be either strings or numbers, and normally you don't really
53             care which is which as it will do all the necessary type conversions automagically.
54             This means that you can perform numeric operations on strings and provided that they
55             B a number you'll get a sensible result:
56              
57             my $string = "4";
58             my $number = 1;
59             my $result = $string + $number; # 5
60              
61             But in some rare cases, generally when you are serialising data, the difference
62             matters. This package provides some useful functions to help you figure out what's
63             what. The following functions are available. None of them are exported by default.
64             If you want all that are available, export ':all':
65              
66             use Scalar::Type qw(:all);
67              
68             and if you just want the 'is_*' functions you can get them all in one go:
69              
70             use Scalar::Type qw(is_*);
71              
72             For Reasons, C<:is_*> is equivalent.
73              
74             =cut
75              
76             our @EXPORT_OK = qw(
77             type sizeof is_integer is_number bool_supported
78             );
79             push @EXPORT_OK, 'is_bool' if(bool_supported());
80             our %EXPORT_TAGS = (
81             all => \@EXPORT_OK,
82             'is_*' => [grep { /^is_/ } @EXPORT_OK]
83             );
84              
85             sub import {
86 7 100   7   74 __PACKAGE__->export_to_level(1, map { $_ eq 'is_*' ? ':is_*' : $_ } @_);
  15         6362  
87             }
88              
89             =head1 FUNCTIONS
90              
91             All of these functions require an argument. It is a fatal error to call
92             them without.
93              
94             =head2 type
95              
96             Returns the type of its argument.
97              
98             If the argument is a reference then it returns either
99             C (if it's an object),
100             or C<'REF_TO_'.ref($argument)>.
101              
102             If the argument is C then it returns C<'UNDEF'>.
103              
104             If you are using perl 5.35.7 or later and the argument is the result of a
105             comparison then it returns C<'BOOL'>.
106              
107             Otherwise it looks for the IOK or NOK flags on the underlying SV (see
108             L for the exact mechanics) and returns C or C
109             as appropriate. Finally, if neither of those are set it returns C.
110              
111             =head2 bool_supported
112              
113             Returns true if the C<'BOOL'> type is supported on this perl (ie if your
114             perl version is 5.35.7 or later) and false otherwise.
115              
116             =cut
117              
118             sub type {
119 157 100   157 1 30430 croak(__PACKAGE__."::type requires an argument") if($#_ == -1);
120 156         289 my $arg = shift;
121 5     5   34 no strict 'refs';
  5         8  
  5         2551  
122             return blessed($arg) ? blessed($arg) :
123             ref($arg) ? 'REF_TO_'.ref($arg) :
124             !defined($arg) ? 'UNDEF' :
125 156 100 66     646 (bool_supported && &{$IS_BOOL_FN}($arg)) ? 'BOOL' :
    100          
    100          
    100          
126             _scalar_type($arg);
127             }
128              
129             =head2 sizeof
130              
131             Returns the size, in bytes, of the underlying storage for numeric types, and die()s for any other type.
132              
133             =cut
134              
135             sub sizeof {
136 4 100   4 1 234577 croak(__PACKAGE__."::sizeof requires an argument") if($#_ == -1);
137 3         8 my $arg = shift;
138 3         14 my $type = type($arg);
139 3 100       15 if($type eq 'INTEGER') {
    100          
140 1         95 return $Config{ivsize};
141             } elsif($type eq 'NUMBER') {
142 1         81 return $Config{nvsize};
143             } else {
144 1         4 croak(__PACKAGE__."::sizeof: '$arg' isn't numeric: ".type($arg)."\n");
145             }
146             }
147              
148             =head2 is_integer
149              
150             Returns true if its argument is an integer. Note that "1" is not an integer, it
151             is a string. 1 is an integer. 1.1 is obviously not an integer. 1.0 is also not
152             an integer, as it makes a different statement about precision - 1 is *exactly*
153             one, but 1.0 is only one to two significant figures.
154              
155             All integers are of course also numbers.
156              
157             =cut
158              
159             sub is_integer {
160 90 100   90 1 247953 croak(__PACKAGE__."::is_integer requires an argument") if($#_ == -1);
161 89 100       203 type(@_) eq 'INTEGER' ? 1 : 0;
162             }
163              
164             =head2 is_number
165              
166             Returns true if its argument is a number. "1" is not a number, it is a string.
167             1 is a number. 1.0 and 1.1 are numbers too.
168              
169             =cut
170              
171             sub is_number {
172 39 100   39 1 4983 croak(__PACKAGE__."::is_number requires an argument") if($#_ == -1);
173 38 100 100     96 is_integer(@_) || type(@_) eq 'NUMBER' ? 1 : 0;
174             }
175              
176             =head2 is_bool
177              
178             This is not available on perl versions earlier than 5.35.7. It is a fatal error
179             to call this or try to import it on older perls.
180              
181             Returns true if its argument is a Boolean - ie, the result of a comparison.
182              
183             =cut
184              
185             sub is_bool {
186 11 50   11 1 38 croak(__PACKAGE__."::is_bool not supported on your perl") if(!bool_supported);
187 11 50       30 croak(__PACKAGE__."::is_bool requires an argument") if($#_ == -1);
188 11         27 type(@_) eq 'BOOL';
189             }
190              
191             =head1 GORY DETAILS
192              
193             =head2 PERL VARIABLE INTERNALS
194              
195             As far as Perl code is concerned scalars will present themselves as integers,
196             floats or strings on demand. Internally scalars are stored in a C structure,
197             called an SV (scalar value), which contains several slots. The important ones
198             for our purposes are:
199              
200             =over
201              
202             =item IV
203              
204             an integer value
205              
206             =item UV
207              
208             an unsigned integer value, only used for ints > MAXINT / 2.
209              
210             =item NV
211              
212             a numeric value (ie a float)
213              
214             =item PV
215              
216             a pointer value (ie a string)
217              
218             =back
219              
220             When a value is created one of those slots will be filled. As various
221             operations are done on a value the slot's contents may change, and other
222             slots may be filled.
223              
224             For example:
225              
226             my $foo = "4"; # fill $foo's PV slot, as "4" is a string
227              
228             my $bar = $foo + 1; # fill $bar's IV slot, as 4 + 1 is an int,
229             # and fill $foo's IV slot, as we had to figure
230             # out the numeric value of the string
231              
232             $foo = "lemon"; # fill $foo's PV slot, as "lemon" is a string
233              
234             That last operation immediately shows a problem. C<$foo>'s IV slot was
235             filled with the integer value C<4>, but the assignment of the string
236             C<"lemon"> only filled the PV slot. So what's in the IV slot? There's a
237             handy tool for that, L, which is distributed with perl.
238             Here's part of Devel::Peek's output:
239              
240             $ perl -MDevel::Peek -E 'my $foo = 4; $foo = "lemon"; Dump($foo);'
241             IV = 4
242             PV = 0x7fe6e6c04c90 "lemon"\0
243              
244             So how, then, does perl know that even thought there's a value in the IV
245             slot it shouldn't be used? Because once you've assigned C<"lemon"> to
246             the variable you can't get that C<4> to show itself ever again, at least
247             not from pure perl code.
248              
249             The SV also has a flags field, which I missed out above. (I've also missed
250             out some of the flags here, I'm only showing you the relevant ones):
251              
252             $ perl -MDevel::Peek -E 'my $foo = 4; $foo = "lemon"; Dump($foo);'
253             FLAGS = (POK)
254             IV = 4
255             PV = 0x7fe6e6c04c90 "lemon"\0
256              
257             The C flag means, as you might have guessed, that the C slot has
258             valid contents - in case you're wondering, the C slot there contains
259             a pointer to the memory address C<0x7fe6e6c04c90>, at which can be found
260             the word C.
261              
262             It's possible to have multiple flags set. That's the case in the second
263             line of code in the example. In that example a variable contains the
264             string C<"4">, so the C slot is filled and the C flag is set. We
265             then take the value of that variable, add 1, and assign the result to
266             another variable. Obviously adding 1 to a string is meaningless, so the
267             string has to first be converted to a number. That fills the C slot:
268              
269             $ perl -MDevel::Peek -E 'my $foo = "4"; my $bar = $foo + 1; Dump($foo);'
270             FLAGS = (IOK,POK)
271             IV = 4
272             PV = 0x7fd6e7d05210 "4"\0
273              
274             Notice that there are now two flags. C means that the C slot's
275             contents are valid, and C that the C slot's contents are valid.
276             Why do we need both slots in this case? Because a non-numeric string such
277             as C<"lemon"> is treated as the integer C<0> if you perform numeric
278             operations on it.
279              
280             All that I have said above about Cs also applies to Cs, and you
281             will sometimes come across a variable with both the C and C slots
282             filled, or even all three:
283              
284             $ perl -MDevel::Peek -E 'my $foo = 1e2; my $bar = $foo + 0; $bar = $foo . ""; Dump($foo)'
285             FLAGS = (IOK,NOK,POK)
286             IV = 100
287             NV = 100
288             PV = 0x7f9ee9d12790 "100"\0
289              
290             Finally, it's possible to have multiple flags set even though the slots
291             contain what looks (to a human) like different values:
292              
293             $ perl -MDevel::Peek -E 'my $foo = "007"; $foo + 0; Dump($foo)'
294             FLAGS = (IOK,POK)
295             IV = 7
296             PV = 0x7fcf425046c0 "007"\0
297              
298             That code initialises the variable to the string C<"007">, then uses it
299             in a numeric operation. That causes the string to be numified, the C
300             slot to be filled, and the C flag set. It should, of course, be clear
301             to any fan of classic literature that "007" and 7 are very different things.
302             "007" is not an integer.
303              
304             =head3 Booleans
305              
306             In perl 5.35.7 and later, Boolean values - ie the results of comparisons -
307             have some extra magic. As well as their value, which is either C<1> (true,
308             an integer) or C<''> (false, an empty string), they have a flag to indicate
309             their Booleanness. This is exposed via the C perl function
310             so we don't need to do XS voodoo to interrogate it.
311              
312             =head2 WHAT Scalar::Type DOES (at least in version 1.0.0)
313              
314             NB that this section documents an internal function that is not intended
315             for public use. The interface of C<_scalar_type> should be considered to
316             be unstable, not fit for human consumption, and subject to change without
317             notice. This documentation is correct as of version 1.0.0 but may not be
318             updated for future versions - its purpose is pedagogical only.
319              
320             The C functions are just wrappers around the C function. That
321             in turn delegates most of the work to a few lines of C code which grovel
322             around looking at the contents of the individual slots and flags. That
323             function isn't exported, but if you really want to call it directly it's
324             called C<_scalar_type> and will return one of three strings, C,
325             C, or C. It will return C even for a reference or
326             undef, which is why I said that the C function only *mostly* wraps
327             around it :-)
328              
329             The first thing that C<_scalar_type> does is look at the C flag.
330             If it's set, and the C flag is not set, then it returns C.
331             If C and C are set it stringifies the contents of the C slot,
332             compares to the contents of the C slot, and returns C if
333             they are the same, or C otherwise.
334              
335             The reason for jumping through those hoops is so that we can correctly
336             divine the type of C<"007"> in the last example above.
337              
338             If C isn't set we then look at C. That follows exactly the same
339             logic, looking also at C, and returning either C or C,
340             being careful about strings like C<"007.5">.
341              
342             If neither C nor C is set then we return C.
343              
344             And what about Cs? They are treated exactly the same as Cs, and a
345             variable with a valid C slot will have the B> flag set. It will
346             also have the C flag set, which we use to determine how to stringify
347             the number.
348              
349             =head1 SEE ALSO
350              
351             L, which is bundled with this module.
352              
353             L in particular its C function.
354              
355             L if you have perl 5.35.7 or later.
356              
357             =head1 BUGS
358              
359             If you find any bugs please report them on Github, preferably with a test case.
360              
361             Integers that are specifed using exponential notation, such as if you say 1e2
362             instead of 100, are *not* internally treated as integers. The perl parser is
363             lazy and only bothers to convert them into an integer after you perform int-ish
364             operations on them, such as adding 0. Likewise if you add 0 to the thoroughly
365             non-numeric "100" perl will convert it to an integer. These edge cases are partly
366             why you almost certainly don't care about what this module does. If they irk
367             you, complain to p5p.
368              
369             =head1 FEEDBACK
370              
371             I welcome feedback about my code, especially constructive criticism.
372              
373             =head1 AUTHOR, COPYRIGHT and LICENCE
374              
375             Copyright 2024 David Cantrell EFE
376              
377             This software is free-as-in-speech software, and may be used,
378             distributed, and modified under the terms of either the GNU
379             General Public Licence version 2 or the Artistic Licence. It's
380             up to you which one you use. The full text of the licences can
381             be found in the files GPL2.txt and ARTISTIC.txt, respectively.
382              
383             =head1 CONSPIRACY
384              
385             This module is also free-as-in-mason software.
386              
387             =cut
388              
389             1;