File Coverage

blib/lib/Protocol/MTProto/TLSchemaParser.pm
Criterion Covered Total %
statement 109 111 98.2
branch 17 24 70.8
condition 5 9 55.5
subroutine 26 26 100.0
pod 0 11 0.0
total 157 181 86.7


line stmt bran cond sub pod time code
1             # You may distribute under the terms of either the GNU General Public License
2             # or the Artistic License (the same terms as Perl itself)
3             #
4             # (C) Paul Evans, 2017 -- leonerd@leonerd.org.uk
5              
6             package Protocol::MTProto::TLSchemaParser;
7              
8 4     4   53465 use strict;
  4         7  
  4         106  
9 4     4   15 use warnings;
  4         6  
  4         91  
10 4     4   11 use base qw( Parser::MGC );
  4         6  
  4         1969  
11             Parser::MGC->VERSION( '0.16' ); # ->committed_scope_of
12              
13 4     4   63247 use Struct::Dumb qw( readonly_struct );
  4         4155  
  4         13  
14              
15             our $VERSION = '0.01';
16              
17             # C-like block and C++-like line comments
18 4     4   298 use constant pattern_comment => qr{ //.*\n | /\*(?s:.*)\*/ }x;
  4         7  
  4         387  
19              
20             # Regular identifiers may not begin with "_"
21 4     4   18 use constant pattern_ident => qr/[[:alpha:]][[:alnum:]_]*/;
  4         4  
  4         4000  
22              
23             =head1 NAME
24              
25             C - a parser for TL schema declarations
26              
27             =head1 SYNOPSIS
28              
29             my $parser = Protocol::MTProto::TLSchemaParser->new;
30              
31             my $declarations = $parser->from_file( "telegram-l57.tl" );
32              
33             # $declarations is now a reference to an array of Declarations
34              
35             =head1 DESCRIPTION
36              
37             This subclass of L recognises the TL schema grammar. Or, at
38             least, at this early version it recognises a subset of the grammar sufficient
39             to pass its own unit tests and to parse the F TL schema files. It
40             does not yet recognise the full TL grammar.
41              
42             Details of this module should be considered very subject to change as the
43             implementation progresses.
44              
45             =head1 RETURNED TYPES
46              
47             =cut
48              
49             # See also
50             # https://core.telegram.org/mtproto/TL
51             # https://core.telegram.org/mtproto/TL-combinators
52              
53             sub parse
54             {
55 7     7 0 4567 my $self = shift;
56              
57 7         17 local $self->{declaration_kind} = "constructor";
58              
59             # A schema is simply a sequence of declarations
60 7         25 return $self->sequence_of( 'parse_decl' );
61             }
62              
63             =head2 Declaration
64              
65             The toplevel parse result is a reference to an array of C
66             instances. Each instance relates to a single declaration from the schema file.
67              
68             Responds to the following accessors:
69              
70             =head3 ident
71              
72             A string containing the full identifier name, including the namespace prefix.
73              
74             =head3 number
75              
76             A 32bit number giving the parsed identification hash from the schema file, or
77             C if one was not given in the file. Note that this implementation does
78             not generate numbers by CRC32 hashes at present.
79              
80             =head3 optargs
81              
82             Either C, or a reference to a non-empty array of C instances
83             relating to the optional arguments of the declaration.
84              
85             =head3 args
86              
87             Either C, or a reference to a non-empty array of C instances
88             relating to the required arguments of the declaration.
89              
90             =head3 result_type
91              
92             A L instance giving this constructor's result type.
93              
94             =head3 kind
95              
96             Either the string C if the declaration was parsed before
97             encountering the C<---functions---> marker, or the string C if
98             parsed after it.
99              
100             =cut
101              
102             readonly_struct Declaration => [qw(
103             ident number optargs args result_type
104             kind
105             )];
106              
107             sub parse_decl
108             {
109 9     9 0 471 my $self = shift;
110              
111             # As a special case, we accept the literal
112             # "---functions---"
113             # marker
114 9 100       45 if( $self->expect( qr/(?:---functions---)?/ ) ) {
115 1         28 $self->{declaration_kind} = "function";
116 1         2 return ();
117             }
118              
119             # A declaration is
120             # NS-IDENT [ "#" NUMBER ] [ OPT-ARGS...] [ ARGS... ] "=" RESULT-TYPE
121              
122 8         343 my $ident = $self->parse_namespaced_ident;
123 8         31 $self->commit;
124              
125             my $number = $self->maybe( sub {
126 8     8   78 $self->expect( "#" );
127 4         142 $self->commit;
128 4         21 hex $self->expect( qr/[0-9a-f]{1,8}/i );
129 8         64 });
130              
131 8         350 my $optargs = $self->sequence_of( 'parse_optarg' );
132 8 100 66     513 if( $optargs and @$optargs ) {
133             # Flatten the lists
134 2         5 $optargs = [ map { @$_ } @$optargs ];
  2         5  
135             }
136             else {
137 6         9 undef $optargs;
138             }
139              
140 8         21 my $args = $self->sequence_of( 'parse_arg' );
141 8 100       640 @$args or undef $args;
142              
143 8         16 $self->expect( "=" );
144              
145 8         253 my $result_type = $self->parse_result_type;
146              
147 8         39 $self->expect( ";" );
148              
149             return Declaration(
150             $ident, $number, $optargs, $args, $result_type,
151             $self->{declaration_kind},
152 8         257 );
153             }
154              
155             =head2 Argument
156              
157             Each element of the C and C arrays for a L will
158             be an C instance. Each relates to a single positional argument of
159             the declaration.
160              
161             Responds to the following accessors:
162              
163             =head3 name
164              
165             Either C or a string giving the name of the argument.
166              
167             =head3 type
168              
169             A L instance giving this argument's type.
170              
171             =head3 conditional_on
172              
173             Either C, or a string giving the name of an earlier argument that this
174             argument is conditional on.
175              
176             =head3 condition_mask
177              
178             Either C, or a number giving the bitmask to apply to the earlier that
179             argument this argument is conditional on.
180              
181             =head3 has_pling
182              
183             True if the type of this argument was declared using the C modifier.
184              
185             =cut
186              
187             readonly_struct Argument => [qw(
188             name type conditional_on condition_mask has_pling
189             )];
190              
191             readonly_struct Repetition => [qw(
192             multiplicity args
193             )];
194              
195             sub parse_arg
196             {
197 21     21 0 603 my $self = shift;
198              
199             # ARG is any of
200             # VAR-IDENT-OPT ":" [ CONDITIONAL-DEF ] [ "!" ] TYPE-TERM
201             # [ VAR-IDENT-OPT ":" ] [ MULTIPLICITY "*" ] "[" [ ARG... ] "]"
202             # "(" [ VAR-IDENT-OPT... ]+ : [ "!" ] TYPE-TERM ")" -- TODO
203             # [ "!" ] TYPE-TERM
204              
205             $self->any_of(
206             sub {
207 21     21   181 my $name = $self->token_var_ident( optional => 1 );
208 11         49 $self->expect( ":" );
209              
210 10         298 my $conditional_arg;
211             my $condition_mask;
212             $self->maybe( sub {
213 10         70 $conditional_arg = $self->token_var_ident;
214 10         20 $self->expect( "." );
215 3         83 $condition_mask = 1 << $self->token_int;
216 3         179 $self->expect( "?" );
217 10         37 });
218              
219 10         452 my $has_pling = $self->expect( qr/!?/ );
220 10         251 my $type = $self->parse_type_term;
221 10         40 return Argument( $name, $type, $conditional_arg, $condition_mask, $has_pling );
222             },
223             sub {
224             # TODO -- var-ident-opt, ":"
225             # TODO -- multiplicity
226             my $args = $self->committed_scope_of(
227             "[",
228 1         44 sub { $self->sequence_of( 'parse_arg' ) },
229 11     11   609 "]",
230             );
231             # TODO: name, multiplicity
232 1         58 return Repetition( "#", $args );
233             },
234             sub {
235 10     10   568 my $has_pling = $self->expect( qr/!?/ );
236 10         293 return Argument( undef, $self->parse_type_term, undef, undef, $has_pling );
237             },
238 21         121 );
239             }
240              
241             sub parse_optarg
242             {
243 10     10 0 322 my $self = shift;
244              
245             # OPT-ARGS is
246             # "{" [ IDENTS... ]+ ":" [ "!" ] TYPE-EXPR "}"
247              
248             $self->committed_scope_of(
249             "{",
250             sub {
251 2     2   103 my $names = $self->sequence_of( 'token_var_ident' );
252 2 50       99 @$names or $self->fail( "Expected at least one identifier name" );
253              
254 2         13 $self->expect( ":" );
255 2         67 my $has_pling = $self->expect( qr/!?/ );
256 2         50 my $type = $self->parse_type_expr;
257              
258             # Expand the list of names into a list of individual argument instances
259 2         4 return [ map { Argument( $_, $type, undef, undef, $has_pling ) } @$names ];
  2         7  
260             },
261 10         53 "}"
262             );
263             }
264              
265             *parse_type_expr = \&parse_expr;
266             sub parse_expr
267             {
268 3     3 0 39 my $self = shift;
269              
270             # EXPR is a sequence of SUBEXPR
271             # TODO: multiple of them imply being combined together in some
272             # polymorphic type but I don't quite understand how
273              
274 3         24 my $exprs = $self->sequence_of( 'parse_subexpr' );
275 3 50       75 return $exprs->[0] if @$exprs == 1;
276              
277 0         0 $self->fail( "TODO: combine subexprs" );
278             }
279              
280             sub parse_subexpr
281             {
282 13     13 0 316 my $self = shift;
283             # SUBEXPR is any of
284             # TERM
285             # NAT-CONST "+" SUBEXPR -- TODO
286             # SUBEXPR "+" NAT-CONST -- TODO
287              
288 13         25 return $self->parse_term;
289             }
290              
291             *parse_type_term = \&parse_term; # TODO - check that the result definitely is a type
292             sub parse_term
293             {
294 33     33 0 34 my $self = shift;
295              
296             # TERM is any of
297             # "(" EXPR ")" -- TODO
298             # TYPE-IDENT [ "<" [ EXPR "," EXPR... ]+ ">" ]
299             # VAR-IDENT -- TODO
300             # NAT-CONST -- TODO
301             # "%" TERM -- TODO
302              
303 33         46 $self->parse_type_ident( allow_polymorphic => 1 );
304             }
305              
306             =head2 Type
307              
308             The C of a L, and the C of an L
309             will be a C instance. At present, no attempt is made to intern the
310             instances; comparison for equallity should be performed on the string name and
311             its subtypes, not object identity.
312              
313             Responds to the following accessors:
314              
315             =head3 name
316              
317             A string giving the name of the type.
318              
319             =head3 is_boxed
320              
321             True if the type is boxed; that is, its name begins with a capital letter.
322              
323             =head3 is_polymorphic
324              
325             True if the type is polymorphic.
326              
327             =head3 subtypes (optional)
328              
329             If the type is polymorphic, a reference to an array of other L
330             instances corresponding to the subtypes. If the type is not polymorphic this
331             accessor will not exist.
332              
333             =cut
334              
335             readonly_struct BaseType => [qw(
336             name is_boxed is_polymorphic
337             )];
338              
339             readonly_struct PolymorphicType => [qw(
340             name is_boxed is_polymorphic subtypes
341             )];
342              
343             sub parse_type_ident
344             {
345 41     41 0 34 my $self = shift;
346 41         58 my %args = @_;
347              
348             # TYPE-IDENT is any of
349             # BOXED-TYPE-IDENT
350             # LC-IDENT-NS
351             # "#"
352             #
353             # The first two of which are covered by parse_namespaced_ident
354              
355             my $base_type = $self->any_of(
356             sub {
357 41     41   305 my $name = $self->parse_namespaced_ident;
358 23         83 return BaseType( $name, scalar $name =~ m/(?:\.|^)[A-Z]/, 0 );
359             },
360 18     18   817 sub { $self->expect( "#" ); return BaseType( "#", 0, 0 ) },
  2         68  
361 41         170 );
362              
363 25 50       269 return $base_type unless $args{allow_polymorphic};
364              
365             my $subtypes = $self->maybe( sub {
366             $self->committed_scope_of(
367             "<",
368             sub {
369 1         49 $self->list_of( ",", 'parse_expr' );
370             # TODO - check nonempty, types
371             },
372 25     25   200 ">",
373             );
374 25         83 });
375              
376 25 100       1444 return $base_type if !$subtypes;
377              
378 1         5 return PolymorphicType(
379             $base_type->name, $base_type->is_boxed, 1, $subtypes
380             );
381             }
382              
383             sub parse_result_type
384             {
385 8     8 0 9 my $self = shift;
386              
387             # RESULT-TYPE is any of
388             # BOXED-TYPE-IDENT [ SUBEXPR ... ]
389             # BOXED-TYPE-IDENT "<" [ SUBEXPR "," SUBEXPR... ]+ ">"
390              
391 8         14 my $type = $self->parse_type_ident( allow_polymorphic => 1 );
392 8 50       22 $type->is_boxed or $self->fail( "Result type must be Boxed" );
393              
394 8 50       57 return $type if $type->is_polymorphic;
395              
396 8         43 my $subtypes = $self->sequence_of( 'parse_subexpr' );
397             # TODO - check these are types
398              
399 8 100 66     494 if( $subtypes and @$subtypes ) {
400 2         9 return PolymorphicType(
401             $type->name, $type->is_boxed, 1, $subtypes
402             );
403             }
404              
405 6         8 return $type;
406             }
407              
408             sub parse_namespaced_ident
409             {
410 49     49 0 44 my $self = shift;
411              
412             my $namespace = $self->maybe( sub {
413 49     49   335 my $ns = $self->token_ident;
414 31         1362 $self->expect( "." );
415 0         0 $ns;
416 49         149 });
417              
418 49         2459 my $ident = $self->token_ident;
419              
420 31 50       1076 return "$namespace.$ident" if defined $namespace;
421 31         51 return "$ident";
422             }
423              
424             sub token_var_ident
425             {
426 35     35 0 118 my $self = shift;
427 35         53 my %args = @_;
428              
429 35         53 my $ident = $self->token_ident;
430 23 50 33     802 $self->fail( "Require a named identifier" ) if $ident eq "_" and !$args{optional};
431              
432 23         37 return $ident;
433             }
434              
435             =head1 AUTHOR
436              
437             Paul Evans
438              
439             =cut
440              
441             0x55AA;