File Coverage

blib/lib/XS/Parse/Infix.pm
Criterion Covered Total %
statement 30 36 83.3
branch 7 8 87.5
condition 9 15 60.0
subroutine 4 5 80.0
pod n/a
total 50 64 78.1


line stmt bran cond sub pod time code
1             # You may distribute under the terms of either the GNU General Public License
2             # or the Artistic License (the same terms as Perl itself)
3             #
4             # (C) Paul Evans, 2021-2022 -- leonerd@leonerd.org.uk
5              
6             package XS::Parse::Infix 0.27;
7              
8 2     2   2501 use v5.14;
  2         7  
9 2     2   10 use warnings;
  2         4  
  2         1239  
10              
11             # No actual .xs file; the code is implemented in XS::Parse::Keyword
12             require XS::Parse::Keyword;
13              
14             =head1 NAME
15              
16             C - XS functions to assist in parsing infix operators
17              
18             =head1 DESCRIPTION
19              
20             This module provides some XS functions to assist in writing syntax modules
21             that provide new infix operators as perl syntax, primarily for authors of
22             syntax plugins. It is unlikely to be of much use to anyone else; and highly
23             unlikely to be of any use when writing perl code using these. Unless you are
24             writing a syntax plugin using XS, this module is not for you.
25              
26             This module is also currently experimental, and the design is still evolving
27             and subject to change. Later versions may break ABI compatibility, requiring
28             changes or at least a rebuild of any module that depends on it.
29              
30             In addition, the places this functionality can be used are relatively small.
31             No current release of perl actually supports custom infix operators, though I
32             have a branch where I am currently experimenting with such support:
33              
34             L
35              
36             In addition, the various C token types of L
37             support querying on this module, so some syntax provided by other modules may
38             be able to make use of these new infix operators.
39              
40             =cut
41              
42             =head1 CONSTANTS
43              
44             =head2 HAVE_PL_INFIX_PLUGIN
45              
46             if( XS::Parse::Infix::HAVE_PL_INFIX_PLUGIN ) { ... }
47              
48             This constant is true if built on a perl that supports the C
49             extension mechanism, meaning that custom infix operators registered with this
50             module will actually be recognised by the perl parser.
51              
52             No actual production or development releases of perl yet support this feature,
53             but see above for details of a branch which does.
54              
55             =cut
56              
57             =head1 XS FUNCTIONS
58              
59             =head2 boot_xs_parse_infix
60              
61             void boot_xs_parse_infix(double ver);
62              
63             Call this function from your C section in order to initialise the module
64             and parsing hooks.
65              
66             I should either be 0 or a decimal number for the module version
67             requirement; e.g.
68              
69             boot_xs_parse_infix(0.14);
70              
71             =head2 parse_infix
72              
73             bool parse_infix(enum XSParseInfixSelection select, struct XSParseInfixInfo **infop);
74              
75             This function attempts to parse syntax for an infix operator from the current
76             parser position. If it is successful, it fills in the variable pointed to by
77             I with a pointer to the actual information structure and returns
78             C. If no suitable operator is found, returns C.
79              
80             =head2 xs_parse_infix_new_op
81              
82             OP *xs_parse_infix_new_op(const struct XSParseInfixInfo *info, U32 flags,
83             OP *lhs, OP *rhs);
84              
85             This function constructs a new optree fragment to represent invoking the infix
86             operator with the given operands. It should be used much the same as core
87             perl's C function.
88              
89             The C structure pointer would be obtained from the C field of the
90             result of invoking the various C token types from
91             C, or by calling L directly.
92              
93             =head2 register_xs_parse_infix
94              
95             void register_xs_parse_infix(const char *opname,
96             const struct XSParseInfixHooks *hooks, void *hookdata);
97              
98             This function installs a set of parsing hooks to be associated with the given
99             operator name. This new operator will then be available via
100             L by the various C token types,
101             L, or to core perl's C if available.
102              
103             These tokens will all yield an info structure, with the following fields:
104              
105             struct XSParseInfixInfo {
106             const char *opname;
107             OPCODE opcode; /* for built-in operators, or OP_CUSTOM for
108             custom-registered ones */
109              
110             struct XSParseInfixHooks *hooks;
111             void *hookdata;
112             };
113              
114             If the operator name contains any non-ASCII characters they are presumed to be
115             in UTF-8 encoding. This will matter for deparse purposes.
116              
117             =cut
118              
119             =head1 PARSE HOOKS
120              
121             The C structure provides the following fields which are
122             used at various stages of parsing.
123              
124             struct XSParseInfixHooks {
125             U16 flags; /* currently ignored */
126             U8 lhs_flags;
127             U8 rhs_flags;
128             enum XSParseInfixClassification cls;
129              
130             const char *wrapper_func_name;
131              
132             const char *permit_hintkey;
133             bool (*permit)(pTHX_ void *hookdata);
134              
135             OP *(*new_op)(pTHX_ U32 flags, OP *lhs, OP *rhs, ANY *parsedata, void *hookdata);
136             OP *(*ppaddr)(pTHX);
137              
138             /* optional */
139             void (*parse)(pTHX_ U32 flags, ANY *parsedata, void *hookdata);
140             };
141              
142             =head2 Flags
143              
144             The C field is currently ignored. It is defined simply to reserve the
145             space in case used in a later version. It should be set to zero.
146              
147             The C and C fields give details on how to handle the
148             left- and right-hand side operands, respectively.
149              
150             It should be set to one of the following constants, or left as zero:
151              
152             =over 4
153              
154             =item XPI_OPERAND_TERM_LIST
155              
156             The operand will be foced into list context, preserving the C at
157             the beginning. This means that the ppfunc for this infix operator will have to
158             C to find that.
159              
160             =item XPI_OPERAND_LIST
161              
162             The same as above.
163              
164             =back
165              
166             Older versions used to provide constants named C and
167             C but they related to an older version of the core perl
168             branch. These names are now aliases for zero, and can be removed from new
169             code.
170              
171             In addition the following extra bitflags are defined:
172              
173             =over 4
174              
175             =item XPI_OPERAND_ONLY_LOOK
176              
177             If set, the operator function promises that it will not mutate any of its
178             passed values, nor allow leaking of direct alias pointers to them via return
179             value or other locations.
180              
181             This flag is optional; omitting it when applicable will not change any
182             observed behaviour. Setting it may enable certain optimisations to be
183             performed.
184              
185             Currently, this flag simply enables an optimisation in the call-checker for
186             infix operator wrapper functions that take list-shaped operands. This
187             optimisation discards an C operation which would create a
188             temporary anonymous array reference for its operand values, allowing a slight
189             saving of memory use and CPU time. This optimisation is only safe to perform
190             if the operator does not mutate or retain aliases of any of the arguments, as
191             otherwise the caller might see unexpected modifications or value references to
192             the values passed.
193              
194             =back
195              
196             =head2 The Selection Stage
197              
198             The C field gives a "classification" of the operator, suggesting what
199             sort of operation it provides. This is used as a filter by the various
200             C selection macros.
201              
202             The classification should be one of the C constants found and
203             described further in the main F file.
204              
205             =head2 The C Stage
206              
207             As a shortcut for the common case, the C may point to a string
208             to look up from the hints hash. If the given key name is not found in the
209             hints hash then the keyword is not permitted. If the key is present then the
210             C function is invoked as normal.
211              
212             If not rejected by a hint key that was not found in the hints hash, the
213             function part of the stage is called next and should inspect whether the
214             keyword is permitted at this time perhaps by inspecting other lexical clues,
215             and return true only if the keyword is permitted.
216              
217             Both the string and the function are optional. Either or both may be present.
218             If neither is present then the keyword is always permitted - which is likely
219             not what you wanted to do.
220              
221             =head2 The C Stage
222              
223             If the optional C hook function is present, it is called immediately
224             after the parser has recognised the presence of the named operator itself but
225             before it attempts to consume the right-hand side term. This hook function can
226             attempt further parsing, in order to implement more complex syntax such as
227             hyper-operators.
228              
229             When invoked, it is passed a pointer to an C-typed storage variable. It
230             is free to use whichever field of this variable it desires to store a result,
231             which will then later be made available to the C function.
232              
233             =head2 The Op Generation Stage
234              
235             If the infix operator is going to be used, then one of the C or the
236             C fields explain how to create a new optree fragment.
237              
238             If C is defined then it will be used, and is expected to return an
239             optree fragment that consumes the LHS and RHS arguments to implement the
240             semantics of the operator. If the optional C stage had been present
241             earlier, the C pointer passed here will point to the same storage that
242             C had previously had access to, so it can retrieve the results.
243              
244             If C is not present, then the C will be used instead to
245             construct a new BINOP of the C type. If an earlier C stage
246             had stored additional results into the C variable these will be lost
247             here.
248              
249             =head2 The Wrapper Function
250              
251             Additionally, if the C field is set to a string, this gives
252             the (fully-qualified) name for a function to be generated as part of
253             registering the operator. This newly-generated function will act as a wrapper
254             for the operator.
255              
256             For operators whose RHS is a scalar, the wrapper function is assumed to take
257             two simple scalar arguments. The result of invoking the function on those
258             arguments will be determined by using the operator code.
259              
260             $result = $lhs OP $rhs;
261              
262             $result = WRAPPERFUNC( $lhs, $rhs );
263              
264             For operators whose RHS is a list, the wrapper function takes at least one
265             argument, possibly more. The first argument is the scalar on the LHS, and the
266             remaining arguments, however many there are, form the RHS:
267              
268             $result = $lhs OP @rhs;
269              
270             $result = WRAPPERFUNC( $lhs, @rhs );
271              
272             For operators whose LHS and RHS is a list, the wrapper function takes two
273             arguments which must be array references containing the lists.
274              
275             $result = @lhs OP @rhs;
276              
277             $result = WRAPPERFUNC( \@lhs, \@rhs );
278              
279             This creates a convenience for accessing the operator from perls that do not
280             support C.
281              
282             In the case of scalar infix operators, the wrapper function also includes a
283             call-checker which attempts to inline the operator directly into the callsite.
284             Thus, in simple cases where the function is called directly on exactly two
285             scalar arguments (such as in the following), no C overhead will be
286             incurred and the generated optree will be identical to that which would have
287             been generated by using infix operator syntax directly:
288              
289             WRAPPERFUNC( $lhs, $rhs );
290             WRAPPERFUNC( $lhs, CONSTANT );
291             WRAPPERFUNC( $args[0], $args[1] );
292             WRAPPERFUNC( $lhs, scalar otherfunc() );
293              
294             The checker is very pessimistic and will only rewrite callsites where it
295             determines this can be done safely. It will not rewrite any of the following
296             forms:
297              
298             WRAPPERFUNC( $onearg ); # not enough args
299             WRAPPERFUNC( $x, $y, $z ); # too many args
300             WRAPPERFUNC( @args[0,1] ); # not a scalar
301             WRAPPERFUNC( $lhs, otherfunc() ); # not a scalar
302              
303             The wrapper function for infix operators which take lists on both sides also
304             has a call-checker which will attempt to inline the operator in similar
305             circumstances. In addition to the optimisations described above for scalar
306             operators, this checker will also inline an array-reference operator and omit
307             the resulting dereference behaviour. Thus, the two following lines emit the
308             same optree, without an C or C:
309              
310             @lhs OP @rhs;
311             WRAPPERFUNC( \@lhs, \@rhs );
312              
313             B that technically, this optimisation isn't strictly transparent in the
314             odd cornercase that one of the referenced arrays is also the backing store for
315             a blessed object reference, and that object class has a C<@{}> overload.
316              
317             my @arr;
318             package SomeClass {
319             use overload '@{}' => sub { return ["values", "go", "here"]; };
320             }
321             bless \@arr, "SomeClass";
322              
323             # this will not actually invoke the overload operator
324             WRAPPERFUNC( \@arr, [4, 5, 6] );
325              
326             As this cornercase relates to taking duplicate references to the same blessed
327             object's backing store variable, it should not matter to any real code;
328             regular objects that are passed by reference into the wrapper function will
329             run their overload methods as normal.
330              
331             The callchecker for list operands can optionally also discard an op of the
332             C type, which is used by anonymous array-ref construction:
333              
334             ($u, $v, $w) OP ($x, $y, $z);
335             WRAPPERFUNC( [$u, $v, $w], [$x, $y, $z] );
336              
337             This optimisation is only performed if the operator declared it safe to do so,
338             via the C flag.
339              
340             If a function of the given name already exists at registration time it will be
341             left undisturbed and no new wrapper will be created. This permits the same
342             infix operator to have multiple spellings of its name; for example to allow
343             both a real Unicode and a fallback ASCII transliteration of the same operator.
344             The first registration will create the wrapper function; the subsequent one
345             will skip it because it would otherwise be identical.
346              
347             Note that when generating an optree for a wrapper function call, the C
348             hook function will be invoked with a C pointer for the C-typed
349             parse data storage, as there won't be an opporunity for the C hook to
350             run in this case.
351              
352             =cut
353              
354             =head1 DEPARSE
355              
356             This module operates with L in order to automatically provide
357             deparse support for infix operators. Every infix operator that is implemented
358             as a custom op (and thus has the C hook field set) will have deparse
359             logic added. This will allow it to deparse to either the named wrapper
360             function, or to the infix operator syntax if on a C-enabled
361             perl and the appropriate lexical hint is enabled at the callsite.
362              
363             In order for this to work, it is important that your custom operator is I
364             registered as a custom op using the C function.
365             This registration will be performed by C itself at the time
366             the infix operator is registered.
367              
368             =cut
369              
370             sub B::Deparse::_deparse_infix_wrapperfunc_scalarscalar
371             {
372 1     1   14558 my ( $self, $wrapper_func_name, $op, $ctx ) = @_;
373              
374 1         4 my $lhs = $op->first;
375 1         7 my $rhs = $op->last;
376              
377 1         378 $_ = $self->deparse( $_, 6 ) for $lhs, $rhs;
378              
379 1         148 return "$wrapper_func_name($lhs, $rhs)";
380             }
381              
382             sub B::Deparse::_deparse_infix_wrapperfunc_listlist
383             {
384 4     4   5221 my ( $self, $wrapper_func_name, $op, $ctx ) = @_;
385              
386 4         17 my $lhs = $op->first;
387 4         17 my $rhs = $op->last;
388              
389 4         11 foreach my $var ( \$lhs, \$rhs ) {
390 8         14 my $argop = $$var;
391 8         12 my $kid;
392              
393 8 100 33     169 if( $argop->name eq "null" and
      33        
      66        
394             $argop->first->name eq "pushmark" and
395             ($kid = $argop->first->sibling) and
396             B::Deparse::null($kid->sibling) ) {
397 7         13 my $add_refgen;
398              
399             # A list of a single item
400 7 100 100     66 if( $kid->name eq "rv2av" and $kid->first->name ne "gv" ) {
    50 66        
401 3         12 $argop = $kid->first;
402             }
403             elsif( $kid->name eq "padav" or $kid->name eq "rv2av" ) {
404 4         10 $add_refgen++;
405             }
406             else {
407 0         0 print STDERR "Maybe UNWRAP list ${\ $kid->name }\n";
  0         0  
408             }
409              
410 7         867 $$var = $self->deparse( $argop, 6 );
411              
412 7 100       41 $$var = "\\$$var" if $add_refgen;
413             }
414             else {
415             # Pretend the entire list was anonlist
416 1         12 my @args;
417 1         11 $argop = $argop->first->sibling; # skip pushmark
418 1         9 while( not B::Deparse::null($argop) ) {
419 2         58 push @args, $self->deparse( $argop, 6 );
420 2         18 $argop = $argop->sibling;
421             }
422              
423 1         7 $$var = "[" . join( ", ", @args ) . "]";
424             }
425             }
426              
427 4         334 return "$wrapper_func_name($lhs, $rhs)";
428             }
429              
430             sub B::Deparse::_deparse_infix_named
431             {
432 0     0     my ( $self, $opname, $op, $ctx ) = @_;
433              
434 0           my $lhs = $op->first;
435 0           my $rhs = $op->last;
436              
437 0           return join " ",
438             $self->deparse_binop_left( $op, $lhs, 14 ),
439             $opname,
440             $self->deparse_binop_right( $op, $rhs, 14 );
441             }
442              
443             =head1 TODO
444              
445             =over 4
446              
447             =item *
448              
449             Have the entersub checker for list/list operators unwrap arrayref or
450             anon-array argument forms (C or
451             C).
452              
453             =item *
454              
455             Further thoughts about how infix operators with C hooks will work with
456             automatic deparse, and also how to integrate them with L's
457             grammar piece.
458              
459             =back
460              
461             =cut
462              
463             =head1 AUTHOR
464              
465             Paul Evans
466              
467             =cut
468              
469             0x55AA;