File Coverage

blib/lib/XS/Parse/Infix.pm
Criterion Covered Total %
statement 30 36 83.3
branch 7 8 87.5
condition 9 15 60.0
subroutine 4 5 80.0
pod n/a
total 50 64 78.1


line stmt bran cond sub pod time code
1             # You may distribute under the terms of either the GNU General Public License
2             # or the Artistic License (the same terms as Perl itself)
3             #
4             # (C) Paul Evans, 2021-2022 -- leonerd@leonerd.org.uk
5              
6             package XS::Parse::Infix 0.28;
7              
8 2     2   2590 use v5.14;
  2         9  
9 2     2   10 use warnings;
  2         4  
  2         1304  
10              
11             # No actual .xs file; the code is implemented in XS::Parse::Keyword
12             require XS::Parse::Keyword;
13              
14             =head1 NAME
15              
16             C - XS functions to assist in parsing infix operators
17              
18             =head1 DESCRIPTION
19              
20             This module provides some XS functions to assist in writing syntax modules
21             that provide new infix operators as perl syntax, primarily for authors of
22             syntax plugins. It is unlikely to be of much use to anyone else; and highly
23             unlikely to be of any use when writing perl code using these. Unless you are
24             writing a syntax plugin using XS, this module is not for you.
25              
26             This module is also currently experimental, and the design is still evolving
27             and subject to change. Later versions may break ABI compatibility, requiring
28             changes or at least a rebuild of any module that depends on it.
29              
30             In addition, the places this functionality can be used are relatively small.
31             No current release of perl actually supports custom infix operators, though I
32             have a branch where I am currently experimenting with such support:
33              
34             L
35              
36             In addition, the various C token types of L
37             support querying on this module, so some syntax provided by other modules may
38             be able to make use of these new infix operators.
39              
40             =cut
41              
42             =head1 CONSTANTS
43              
44             =head2 HAVE_PL_INFIX_PLUGIN
45              
46             if( XS::Parse::Infix::HAVE_PL_INFIX_PLUGIN ) { ... }
47              
48             This constant is true if built on a perl that supports the C
49             extension mechanism, meaning that custom infix operators registered with this
50             module will actually be recognised by the perl parser.
51              
52             No actual production or development releases of perl yet support this feature,
53             but see above for details of a branch which does.
54              
55             =cut
56              
57             =head1 XS FUNCTIONS
58              
59             =head2 boot_xs_parse_infix
60              
61             void boot_xs_parse_infix(double ver);
62              
63             Call this function from your C section in order to initialise the module
64             and parsing hooks.
65              
66             I should either be 0 or a decimal number for the module version
67             requirement; e.g.
68              
69             boot_xs_parse_infix(0.14);
70              
71             =head2 parse_infix
72              
73             bool parse_infix(enum XSParseInfixSelection select, struct XSParseInfixInfo **infop);
74              
75             I
76              
77             This function attempts to parse syntax for an infix operator from the current
78             parser position. If it is successful, it fills in the variable pointed to by
79             I with a pointer to the actual information structure and returns
80             C. If no suitable operator is found, returns C.
81              
82             =head2 xs_parse_infix_new_op
83              
84             OP *xs_parse_infix_new_op(const struct XSParseInfixInfo *info, U32 flags,
85             OP *lhs, OP *rhs);
86              
87             This function constructs a new optree fragment to represent invoking the infix
88             operator with the given operands. It should be used much the same as core
89             perl's C function.
90              
91             The C structure pointer would be obtained from the C field of the
92             result of invoking the various C token types from
93             C, or by calling L directly.
94              
95             =head2 register_xs_parse_infix
96              
97             void register_xs_parse_infix(const char *opname,
98             const struct XSParseInfixHooks *hooks, void *hookdata);
99              
100             This function installs a set of parsing hooks to be associated with the given
101             operator name. This new operator will then be available via
102             L by the various C token types,
103             L, or to core perl's C if available.
104              
105             These tokens will all yield an info structure, with the following fields:
106              
107             struct XSParseInfixInfo {
108             const char *opname;
109             OPCODE opcode; /* for built-in operators, or OP_CUSTOM for
110             custom-registered ones */
111              
112             struct XSParseInfixHooks *hooks;
113             void *hookdata;
114              
115             enum XSParseInfixClassification cls; /* since version 0.28 */
116             };
117              
118             If the operator name contains any non-ASCII characters they are presumed to be
119             in UTF-8 encoding. This will matter for deparse purposes.
120              
121             =cut
122              
123             =head1 PARSE HOOKS
124              
125             The C structure provides the following fields which are
126             used at various stages of parsing.
127              
128             struct XSParseInfixHooks {
129             U16 flags; /* currently ignored */
130             U8 lhs_flags;
131             U8 rhs_flags;
132             enum XSParseInfixClassification cls;
133              
134             const char *wrapper_func_name;
135              
136             const char *permit_hintkey;
137             bool (*permit)(pTHX_ void *hookdata);
138              
139             OP *(*new_op)(pTHX_ U32 flags, OP *lhs, OP *rhs, ANY *parsedata, void *hookdata);
140             OP *(*ppaddr)(pTHX);
141              
142             /* optional */
143             void (*parse)(pTHX_ U32 flags, ANY *parsedata, void *hookdata);
144             };
145              
146             =head2 Flags
147              
148             The C field is currently ignored. It is defined simply to reserve the
149             space in case used in a later version. It should be set to zero.
150              
151             The C and C fields give details on how to handle the
152             left- and right-hand side operands, respectively.
153              
154             It should be set to one of the following constants, or left as zero:
155              
156             =over 4
157              
158             =item XPI_OPERAND_TERM_LIST
159              
160             The operand will be foced into list context, preserving the C at
161             the beginning. This means that the ppfunc for this infix operator will have to
162             C to find that.
163              
164             =item XPI_OPERAND_LIST
165              
166             The same as above.
167              
168             =back
169              
170             Older versions used to provide constants named C and
171             C but they related to an older version of the core perl
172             branch. These names are now aliases for zero, and can be removed from new
173             code.
174              
175             In addition the following extra bitflags are defined:
176              
177             =over 4
178              
179             =item XPI_OPERAND_ONLY_LOOK
180              
181             If set, the operator function promises that it will not mutate any of its
182             passed values, nor allow leaking of direct alias pointers to them via return
183             value or other locations.
184              
185             This flag is optional; omitting it when applicable will not change any
186             observed behaviour. Setting it may enable certain optimisations to be
187             performed.
188              
189             Currently, this flag simply enables an optimisation in the call-checker for
190             infix operator wrapper functions that take list-shaped operands. This
191             optimisation discards an C operation which would create a
192             temporary anonymous array reference for its operand values, allowing a slight
193             saving of memory use and CPU time. This optimisation is only safe to perform
194             if the operator does not mutate or retain aliases of any of the arguments, as
195             otherwise the caller might see unexpected modifications or value references to
196             the values passed.
197              
198             =back
199              
200             =head2 The Selection Stage
201              
202             The C field gives a "classification" of the operator, suggesting what
203             sort of operation it provides. This is used as a filter by the various
204             C selection macros.
205              
206             The classification should be one of the C constants found and
207             described further in the main F file.
208              
209             =head2 The C Stage
210              
211             As a shortcut for the common case, the C may point to a string
212             to look up from the hints hash. If the given key name is not found in the
213             hints hash then the keyword is not permitted. If the key is present then the
214             C function is invoked as normal.
215              
216             If not rejected by a hint key that was not found in the hints hash, the
217             function part of the stage is called next and should inspect whether the
218             keyword is permitted at this time perhaps by inspecting other lexical clues,
219             and return true only if the keyword is permitted.
220              
221             Both the string and the function are optional. Either or both may be present.
222             If neither is present then the keyword is always permitted - which is likely
223             not what you wanted to do.
224              
225             =head2 The C Stage
226              
227             If the optional C hook function is present, it is called immediately
228             after the parser has recognised the presence of the named operator itself but
229             before it attempts to consume the right-hand side term. This hook function can
230             attempt further parsing, in order to implement more complex syntax such as
231             hyper-operators.
232              
233             When invoked, it is passed a pointer to an C-typed storage variable. It
234             is free to use whichever field of this variable it desires to store a result,
235             which will then later be made available to the C function.
236              
237             =head2 The Op Generation Stage
238              
239             If the infix operator is going to be used, then one of the C or the
240             C fields explain how to create a new optree fragment.
241              
242             If C is defined then it will be used, and is expected to return an
243             optree fragment that consumes the LHS and RHS arguments to implement the
244             semantics of the operator. If the optional C stage had been present
245             earlier, the C pointer passed here will point to the same storage that
246             C had previously had access to, so it can retrieve the results.
247              
248             If C is not present, then the C will be used instead to
249             construct a new BINOP of the C type. If an earlier C stage
250             had stored additional results into the C variable these will be lost
251             here.
252              
253             =head2 The Wrapper Function
254              
255             Additionally, if the C field is set to a string, this gives
256             the (fully-qualified) name for a function to be generated as part of
257             registering the operator. This newly-generated function will act as a wrapper
258             for the operator.
259              
260             For operators whose RHS is a scalar, the wrapper function is assumed to take
261             two simple scalar arguments. The result of invoking the function on those
262             arguments will be determined by using the operator code.
263              
264             $result = $lhs OP $rhs;
265              
266             $result = WRAPPERFUNC( $lhs, $rhs );
267              
268             For operators whose RHS is a list, the wrapper function takes at least one
269             argument, possibly more. The first argument is the scalar on the LHS, and the
270             remaining arguments, however many there are, form the RHS:
271              
272             $result = $lhs OP @rhs;
273              
274             $result = WRAPPERFUNC( $lhs, @rhs );
275              
276             For operators whose LHS and RHS is a list, the wrapper function takes two
277             arguments which must be array references containing the lists.
278              
279             $result = @lhs OP @rhs;
280              
281             $result = WRAPPERFUNC( \@lhs, \@rhs );
282              
283             This creates a convenience for accessing the operator from perls that do not
284             support C.
285              
286             In the case of scalar infix operators, the wrapper function also includes a
287             call-checker which attempts to inline the operator directly into the callsite.
288             Thus, in simple cases where the function is called directly on exactly two
289             scalar arguments (such as in the following), no C overhead will be
290             incurred and the generated optree will be identical to that which would have
291             been generated by using infix operator syntax directly:
292              
293             WRAPPERFUNC( $lhs, $rhs );
294             WRAPPERFUNC( $lhs, CONSTANT );
295             WRAPPERFUNC( $args[0], $args[1] );
296             WRAPPERFUNC( $lhs, scalar otherfunc() );
297              
298             The checker is very pessimistic and will only rewrite callsites where it
299             determines this can be done safely. It will not rewrite any of the following
300             forms:
301              
302             WRAPPERFUNC( $onearg ); # not enough args
303             WRAPPERFUNC( $x, $y, $z ); # too many args
304             WRAPPERFUNC( @args[0,1] ); # not a scalar
305             WRAPPERFUNC( $lhs, otherfunc() ); # not a scalar
306              
307             The wrapper function for infix operators which take lists on both sides also
308             has a call-checker which will attempt to inline the operator in similar
309             circumstances. In addition to the optimisations described above for scalar
310             operators, this checker will also inline an array-reference operator and omit
311             the resulting dereference behaviour. Thus, the two following lines emit the
312             same optree, without an C or C:
313              
314             @lhs OP @rhs;
315             WRAPPERFUNC( \@lhs, \@rhs );
316              
317             B that technically, this optimisation isn't strictly transparent in the
318             odd cornercase that one of the referenced arrays is also the backing store for
319             a blessed object reference, and that object class has a C<@{}> overload.
320              
321             my @arr;
322             package SomeClass {
323             use overload '@{}' => sub { return ["values", "go", "here"]; };
324             }
325             bless \@arr, "SomeClass";
326              
327             # this will not actually invoke the overload operator
328             WRAPPERFUNC( \@arr, [4, 5, 6] );
329              
330             As this cornercase relates to taking duplicate references to the same blessed
331             object's backing store variable, it should not matter to any real code;
332             regular objects that are passed by reference into the wrapper function will
333             run their overload methods as normal.
334              
335             The callchecker for list operands can optionally also discard an op of the
336             C type, which is used by anonymous array-ref construction:
337              
338             ($u, $v, $w) OP ($x, $y, $z);
339             WRAPPERFUNC( [$u, $v, $w], [$x, $y, $z] );
340              
341             This optimisation is only performed if the operator declared it safe to do so,
342             via the C flag.
343              
344             If a function of the given name already exists at registration time it will be
345             left undisturbed and no new wrapper will be created. This permits the same
346             infix operator to have multiple spellings of its name; for example to allow
347             both a real Unicode and a fallback ASCII transliteration of the same operator.
348             The first registration will create the wrapper function; the subsequent one
349             will skip it because it would otherwise be identical.
350              
351             Note that when generating an optree for a wrapper function call, the C
352             hook function will be invoked with a C pointer for the C-typed
353             parse data storage, as there won't be an opporunity for the C hook to
354             run in this case.
355              
356             =cut
357              
358             =head1 DEPARSE
359              
360             This module operates with L in order to automatically provide
361             deparse support for infix operators. Every infix operator that is implemented
362             as a custom op (and thus has the C hook field set) will have deparse
363             logic added. This will allow it to deparse to either the named wrapper
364             function, or to the infix operator syntax if on a C-enabled
365             perl and the appropriate lexical hint is enabled at the callsite.
366              
367             In order for this to work, it is important that your custom operator is I
368             registered as a custom op using the C function.
369             This registration will be performed by C itself at the time
370             the infix operator is registered.
371              
372             =cut
373              
374             sub B::Deparse::_deparse_infix_wrapperfunc_scalarscalar
375             {
376 1     1   14572 my ( $self, $wrapper_func_name, $op, $ctx ) = @_;
377              
378 1         5 my $lhs = $op->first;
379 1         6 my $rhs = $op->last;
380              
381 1         374 $_ = $self->deparse( $_, 6 ) for $lhs, $rhs;
382              
383 1         141 return "$wrapper_func_name($lhs, $rhs)";
384             }
385              
386             sub B::Deparse::_deparse_infix_wrapperfunc_listlist
387             {
388 4     4   5080 my ( $self, $wrapper_func_name, $op, $ctx ) = @_;
389              
390 4         17 my $lhs = $op->first;
391 4         16 my $rhs = $op->last;
392              
393 4         14 foreach my $var ( \$lhs, \$rhs ) {
394 8         14 my $argop = $$var;
395 8         14 my $kid;
396              
397 8 100 33     174 if( $argop->name eq "null" and
      33        
      66        
398             $argop->first->name eq "pushmark" and
399             ($kid = $argop->first->sibling) and
400             B::Deparse::null($kid->sibling) ) {
401 7         30 my $add_refgen;
402              
403             # A list of a single item
404 7 100 100     68 if( $kid->name eq "rv2av" and $kid->first->name ne "gv" ) {
    50 66        
405 3         11 $argop = $kid->first;
406             }
407             elsif( $kid->name eq "padav" or $kid->name eq "rv2av" ) {
408 4         10 $add_refgen++;
409             }
410             else {
411 0         0 print STDERR "Maybe UNWRAP list ${\ $kid->name }\n";
  0         0  
412             }
413              
414 7         847 $$var = $self->deparse( $argop, 6 );
415              
416 7 100       56 $$var = "\\$$var" if $add_refgen;
417             }
418             else {
419             # Pretend the entire list was anonlist
420 1         12 my @args;
421 1         11 $argop = $argop->first->sibling; # skip pushmark
422 1         11 while( not B::Deparse::null($argop) ) {
423 2         40 push @args, $self->deparse( $argop, 6 );
424 2         17 $argop = $argop->sibling;
425             }
426              
427 1         7 $$var = "[" . join( ", ", @args ) . "]";
428             }
429             }
430              
431 4         334 return "$wrapper_func_name($lhs, $rhs)";
432             }
433              
434             sub B::Deparse::_deparse_infix_named
435             {
436 0     0     my ( $self, $opname, $op, $ctx ) = @_;
437              
438 0           my $lhs = $op->first;
439 0           my $rhs = $op->last;
440              
441 0           return join " ",
442             $self->deparse_binop_left( $op, $lhs, 14 ),
443             $opname,
444             $self->deparse_binop_right( $op, $rhs, 14 );
445             }
446              
447             =head1 TODO
448              
449             =over 4
450              
451             =item *
452              
453             Have the entersub checker for list/list operators unwrap arrayref or
454             anon-array argument forms (C or
455             C).
456              
457             =item *
458              
459             Further thoughts about how infix operators with C hooks will work with
460             automatic deparse, and also how to integrate them with L's
461             grammar piece.
462              
463             =back
464              
465             =cut
466              
467             =head1 AUTHOR
468              
469             Paul Evans
470              
471             =cut
472              
473             0x55AA;