File Coverage

blib/lib/Scalar/Type.pm
Criterion Covered Total %
statement 39 41 95.1
branch 26 30 86.6
condition 4 6 66.6
subroutine 14 14 100.0
pod 5 5 100.0
total 88 96 91.6


line stmt bran cond sub pod time code
1             package Scalar::Type;
2              
3 3     3   249602 use strict;
  3         32  
  3         90  
4 3     3   17 use warnings;
  3         4  
  3         132  
5              
6             our $BOOL_SUPPORTED;
7              
8 3     3   82 BEGIN { $BOOL_SUPPORTED = ($] >= 5.035007) }
9              
10 3     3   2089 use if $BOOL_SUPPORTED, qw(builtin isbool);
  3         43  
  3         15  
11              
12 3     3   131 use Carp qw(croak);
  3         6  
  3         160  
13 3     3   18 use Config;
  3         6  
  3         187  
14              
15             our $VERSION = '0.3.0';
16              
17             require XSLoader;
18             XSLoader::load(__PACKAGE__, $VERSION);
19              
20 3     3   17 use Scalar::Util qw(blessed);
  3         7  
  3         144  
21              
22 3     3   19 use base qw(Exporter);
  3         4  
  3         2124  
23              
24             =head1 NAME
25              
26             Scalar::Type
27              
28             =head1 DESCRIPTION
29              
30             Figure out what type a scalar is
31              
32             =head1 SYNOPSIS
33              
34             use Scalar::Type qw(is_number);
35              
36             if(is_number(2)) {
37             # yep, 2 is a number
38             # it is_integer too
39             }
40              
41             if(is_number("2")) {
42             # no, "2" is a string
43             }
44              
45             =head1 OVERVIEW
46              
47             Perl scalars can be either strings or numbers, and normally you don't really
48             care which is which as it will do all the necessary type conversions automagically.
49             This means that you can perform numeric operations on strings and provided that they
50             B a number you'll get a sensible result:
51              
52             my $string = "4";
53             my $number = 1;
54             my $result = $string + $number; # 5
55              
56             But in some rare cases, generally when you are serialising data, the difference
57             matters. This package provides some useful functions to help you figure out what's
58             what. The following functions are available. None of them are exported by default.
59             If you want them all, export ':all':
60              
61             use Scalar::Type qw(:all);
62              
63             and if you just want the 'is_*' functions you can get them all in one go:
64              
65             use Scalar::Type qw(is_*);
66              
67             For Reasons, C<:is_*> is equivalent.
68              
69             =cut
70              
71             our @EXPORT_OK = qw(
72             type sizeof is_integer is_number is_bool
73             );
74             our %EXPORT_TAGS = (
75             all => \@EXPORT_OK,
76             'is_*' => [grep { /^is_/ } @EXPORT_OK]
77             );
78              
79             sub import {
80 3 100   3   22 __PACKAGE__->export_to_level(1, map { $_ eq 'is_*' ? ':is_*' : $_ } @_);
  7         4970  
81             }
82              
83             =head1 FUNCTIONS
84              
85             All of these functions require an argument. It is a fatal error to call
86             them without.
87              
88             =head2 type
89              
90             Returns the type of its argument.
91              
92             If the argument is a reference then it returns either
93             C (if it's an object),
94             or C<'REF_TO_'.ref($argument)>.
95              
96             If the argument is C then it returns C<'UNDEF'>.
97              
98             If you are using perl 5.35.7 or later and the argument is the result of a
99             comparison then it returns C<'BOOL'>.
100              
101             Otherwise it looks for the IOK or NOK flags on the underlying SV (see
102             L for the exact mechanics) and returns C or C
103             as appropriate. Finally, if neither of those are set it returns C.
104              
105             =cut
106              
107             sub type {
108 99 100   99 1 25860 croak(__PACKAGE__."::type requires an argument") if($#_ == -1);
109 98         163 my $arg = shift;
110 98 50 33     918 return blessed($arg) ? blessed($arg) :
    100          
    100          
    100          
111             ref($arg) ? 'REF_TO_'.ref($arg) :
112             !defined($arg) ? 'UNDEF' :
113             ($BOOL_SUPPORTED && isbool($arg)) ? 'BOOL' :
114             _scalar_type($arg);
115             }
116              
117             =head2 sizeof
118              
119             Returns the size, in bytes, of the underlying storage for numeric types, and die()s for any other type.
120              
121             =cut
122              
123             sub sizeof {
124 4 100   4 1 8469 croak(__PACKAGE__."::sizeof requires an argument") if($#_ == -1);
125 3         9 my $arg = shift;
126 3         10 my $type = type($arg);
127 3 100       19 if($type eq 'INTEGER') {
    100          
128 1         115 return $Config{ivsize};
129             } elsif($type eq 'NUMBER') {
130 1         88 return $Config{nvsize};
131             } else {
132 1         5 croak(__PACKAGE__."::sizeof: '$arg' isn't numeric: ".type($arg)."\n");
133             }
134             }
135              
136             =head2 is_integer
137              
138             Returns true if its argument is an integer. Note that "1" is not an integer, it
139             is a string. 1 is an integer. 1.1 is obviously not an integer. 1.0 is also not
140             an integer, as it makes a different statement about precision - 1 is *exactly*
141             one, but 1.0 is only one to two significant figures.
142              
143             All integers are of course also numbers.
144              
145             =cut
146              
147             sub is_integer {
148 57 100   57 1 23002 croak(__PACKAGE__."::is_integer requires an argument") if($#_ == -1);
149 56 100       110 type(@_) eq 'INTEGER' ? 1 : 0;
150             }
151              
152             =head2 is_number
153              
154             Returns true if its argument is a number. "1" is not a number, it is a string.
155             1 is a number. 1.0 and 1.1 are numbers too.
156              
157             =cut
158              
159             sub is_number {
160 24 100   24 1 3406 croak(__PACKAGE__."::is_number requires an argument") if($#_ == -1);
161 23 100 100     47 is_integer(@_) || type(@_) eq 'NUMBER' ? 1 : 0;
162             }
163              
164             =head2 is_bool
165              
166             It is a fatal error to call this on perl versions earlier than 5.35.7.
167              
168             Returns true if its argument is a Boolean - ie, the result of a comparison.
169              
170             =cut
171              
172             sub is_bool {
173 1 50   1 1 194 croak(__PACKAGE__."::is_bool not supported on your perl") if(!$BOOL_SUPPORTED);
174 0 0         croak(__PACKAGE__."::is_bool requires an argument") if($#_ == -1);
175 0           type(@_) eq 'BOOL';
176             }
177              
178             =head1 GORY DETAILS
179              
180             =head2 PERL VARIABLE INTERNALS
181              
182             As far as Perl code is concerned scalars will present themselves as integers,
183             floats or strings on demand. Internally scalars are stored in a C structure,
184             called an SV (scalar value), which contains several slots. The important ones
185             for our purposes are:
186              
187             =over
188              
189             =item IV
190              
191             an integer value
192              
193             =item UV
194              
195             an unsigned integer value, only used for ints > MAXINT / 2.
196              
197             =item NV
198              
199             a numeric value (ie a float)
200              
201             =item PV
202              
203             a pointer value (ie a string)
204              
205             =back
206              
207             When a value is created one of those slots will be filled. As various
208             operations are done on a value the slot's contents may change, and other
209             slots may be filled.
210              
211             For example:
212              
213             my $foo = "4"; # fill $foo's PV slot, as "4" is a string
214              
215             my $bar = $foo + 1; # fill $bar's IV slot, as 4 + 1 is an int,
216             # and fill $foo's IV slot, as we had to figure
217             # out the numeric value of the string
218              
219             $foo = "lemon"; # fill $foo's PV slot, as "lemon" is a string
220              
221             That last operation immediately shows a problem. C<$foo>'s IV slot was
222             filled with the integer value C<4>, but the assignment of the string
223             C<"lemon"> only filled the PV slot. So what's in the IV slot? There's a
224             handy tool for that, L, which is distributed with perl.
225             Here's part of Devel::Peek's output:
226              
227             $ perl -MDevel::Peek -E 'my $foo = 4; $foo = "lemon"; Dump($foo);'
228             IV = 4
229             PV = 0x7fe6e6c04c90 "lemon"\0
230              
231             So how, then, does perl know that even thought there's a value in the IV
232             slot it shouldn't be used? Because once you've assigned C<"lemon"> to
233             the variable you can't get that C<4> to show itself ever again, at least
234             not from pure perl code.
235              
236             The SV also has a flags field, which I missed out above. (I've also missed
237             out some of the flags here, I'm only showing you the relevant ones):
238              
239             $ perl -MDevel::Peek -E 'my $foo = 4; $foo = "lemon"; Dump($foo);'
240             FLAGS = (POK)
241             IV = 4
242             PV = 0x7fe6e6c04c90 "lemon"\0
243              
244             The C flag means, as you might have guessed, that the C slot has
245             valid contents - in case you're wondering, the C slot there contains
246             a pointer to the memory address C<0x7fe6e6c04c90>, at which can be found
247             the word C.
248              
249             It's possible to have multiple flags set. That's the case in the second
250             line of code in the example. In that example a variable contains the
251             string C<"4">, so the C slot is filled and the C flag is set. We
252             then take the value of that variable, add 1, and assign the result to
253             another variable. Obviously adding 1 to a string is meaningless, so the
254             string has to first be converted to a number. That fills the C slot:
255              
256             $ perl -MDevel::Peek -E 'my $foo = "4"; my $bar = $foo + 1; Dump($foo);'
257             FLAGS = (IOK,POK)
258             IV = 4
259             PV = 0x7fd6e7d05210 "4"\0
260              
261             Notice that there are now two flags. C means that the C slot's
262             contents are valid, and C that the C slot's contents are valid.
263             Why do we need both slots in this case? Because a non-numeric string such
264             as C<"lemon"> is treated as the integer C<0> if you perform numeric
265             operations on it.
266              
267             All that I have said above about Cs also applies to Cs, and you
268             will sometimes come across a variable with both the C and C slots
269             filled, or even all three:
270              
271             $ perl -MDevel::Peek -E 'my $foo = 1e2; my $bar = $foo + 0; $bar = $foo . ""; Dump($foo)'
272             FLAGS = (IOK,NOK,POK)
273             IV = 100
274             NV = 100
275             PV = 0x7f9ee9d12790 "100"\0
276              
277             Finally, it's possible to have multiple flags set even though the slots
278             contain what looks (to a human) like different values:
279              
280             $ perl -MDevel::Peek -E 'my $foo = "007"; $foo + 0; Dump($foo)'
281             FLAGS = (IOK,POK)
282             IV = 7
283             PV = 0x7fcf425046c0 "007"\0
284              
285             That code initialises the variable to the string C<"007">, then uses it
286             in a numeric operation. That causes the string to be numified, the C
287             slot to be filled, and the C flag set. It should, of course, be clear
288             to any fan of classic literature that "007" and 7 are very different things.
289             "007" is not an integer.
290              
291             =head3 Booleans
292              
293             In perl 5.35.7 and later, Boolean values - ie the results of comparisons -
294             have some extra magic. As well as their value, which is either C<1> (true,
295             an integer) or C<''> (false, an empty string), they have a flag to indicate
296             their Booleanness. This is exposed via the C perl function
297             so we don't need to do XS voodoo to interrogate it.
298              
299             =head2 WHAT Scalar::Type DOES (at least in version 0.1.0)
300              
301             NB that this section documents an internal function that is not intended
302             for public use. The interface of C<_scalar_type> should be considered to
303             be unstable, not fit for human consumption, and subject to change without
304             notice. This documentation is correct as of version 0.1.0 but may not be
305             updated for future versions - its purpose is pedagogical only.
306              
307             The C functions are just wrappers around the C function. That
308             in turn delegates most of the work to a few lines of C code which grovel
309             around looking at the contents of the individual slots and flags. That
310             function isn't exported, but if you really want to call it directly it's
311             called C<_scalar_type> and will return one of four strings, C,
312             C, or C. It will return C even for a reference or
313             undef, which is why I said that the C function only *mostly* wraps
314             around it :-)
315              
316             The first thing that C<_scalar_type> does is look at the C flag.
317             If it's set, and the C flag is not set, the it returns C.
318             If C and C are set it stringifies the contents of the C slot,
319             compares to the contents of the C slot, and returns C if
320             they are the same, or C otherwise.
321              
322             The reason for jumping through those hoops is so that we can correctly
323             divine the type of C<"007"> in the last example above.
324              
325             If C isn't set we then look at C. That follows exactly the same
326             logic, looking also at C, and returning either C or C,
327             being careful about strings like C<"007.5">.
328              
329             If neither C nor C is set then we return C.
330              
331             And what about Cs? They are treated exactly the same as Cs, and a
332             variable with a valid C slot will have the B> flag set. It will
333             also have the C flag set, which we use to determine how to stringify
334             the number.
335              
336             =head1 SEE ALSO
337              
338             L in particular its C function.
339              
340             L if you have perl 5.35.7 or later.
341              
342             =head1 BUGS
343              
344             If you find any bugs please report them on Github, preferably with a test case.
345              
346             Integers that are specifed using exponential notation, such as if you say 1e2
347             instead of 100, are *not* internally treated as integers. The perl parser is
348             lazy and only bothers to convert them into an integer after you perform int-ish
349             operations on them, such as adding 0. Likewise if you add 0 to the thoroughly
350             non-numeric "100" perl will convert it to an integer. These edge cases are partly
351             why you almost certainly don't care about what this module does. If they irk
352             you, complain to p5p.
353              
354             =head1 FEEDBACK
355              
356             I welcome feedback about my code, especially constructive criticism.
357              
358             =head1 AUTHOR, COPYRIGHT and LICENCE
359              
360             Copyright 2021 David Cantrell EFE
361              
362             This software is free-as-in-speech software, and may be used,
363             distributed, and modified under the terms of either the GNU
364             General Public Licence version 2 or the Artistic Licence. It's
365             up to you which one you use. The full text of the licences can
366             be found in the files GPL2.txt and ARTISTIC.txt, respectively.
367              
368             =head1 CONSPIRACY
369              
370             This module is also free-as-in-mason software.
371              
372             =cut
373              
374             1;