File Coverage

blib/lib/Mail/Alias/Reader/Token.pm
Criterion Covered Total %
statement 76 76 100.0
branch 18 18 100.0
condition n/a
subroutine 23 23 100.0
pod 6 12 50.0
total 123 129 95.3


line stmt bran cond sub pod time code
1             # Copyright (c) 2012, cPanel, Inc.
2             # All rights reserved.
3             # http://cpanel.net/
4             #
5             # This is free software; you can redistribute it and/or modify it under the same
6             # terms as Perl itself. See the LICENSE file for further details.
7              
8             package Mail::Alias::Reader::Token;
9              
10 5     5   25711 use strict;
  5         11  
  5         163  
11 5     5   25 use warnings;
  5         11  
  5         121  
12              
13 5     5   26 use Carp;
  5         11  
  5         10711  
14              
15             =head1 NAME
16              
17             Mail::Alias::Reader::Token
18              
19             =head1 DESCRIPTION
20              
21             Mail::Alias::Reader::Token is not only the class represents an aliases(5) parser
22             token, but also itself is returned by L as a representation
23             of a mail alias destination. For the purposes of this documentation, only the
24             public-facing methods which facilitate the usage of instances of this class
25             shall be discussed.
26              
27             =cut
28              
29             my @TOKEN_TYPES = (
30             [ 'T_COMMENT' => qr/#\s*(.*)$/ ],
31             [ 'T_STRING' => qr/("(?:\\.|[^"\\]+)*")/ ],
32             [ 'T_COMMA' => qr/,/ ],
33             [ 'T_DIRECTIVE' => qr/:([^\:\s]+):([^\:\s,]*)/ ],
34             [ 'T_COMMAND' => qr/\|(\S+)/ ],
35             [ 'T_ADDRESS' => qr/([a-z0-9_\-@\.*]+)/i ],
36             [ 'T_COLON' => qr/\:/ ],
37             [ 'T_FILE' => qr/(\S+)/ ],
38             [ 'T_WHITESPACE' => qr/\s+/ ],
39             );
40              
41             my @TOKEN_STRING_TYPES = (
42             [ 'T_DIRECTIVE' => qr/:([^\:\s]+):\s*(.*)/s ],
43             [ 'T_COMMAND' => qr/\|(.*)/s ],
44             [ 'T_ADDRESS' => qr/([^\/]+)/s ],
45             [ 'T_FILE' => qr/(.*)/s ]
46             );
47              
48             #
49             # Mail::Alias::Reader::Token->new($type)
50             #
51             # Create a new mail alias parser token of the given type. This method isn't
52             # actually meant to be called publically; rather, it is simply a shortcut to
53             # create symbolic parser tokens that contain no data, but refer to a piece of
54             # punctuation, or similar.
55             #
56             sub new {
57 112     112 0 462055 my ( $class, $type ) = @_;
58              
59 112         966 return bless { 'type' => $type }, $class;
60             }
61              
62             #
63             # $token->isa(@types)
64             #
65             # Return true if the current token is of any of the types passed as an
66             # argument.
67             #
68             sub isa {
69 894     894 0 2427 my ( $self, @types ) = @_;
70              
71 894         1647 foreach my $type (@types) {
72 1464 100       10429 return 1 if $self->{'type'} eq $type;
73             }
74              
75 620         5039 return 0;
76             }
77              
78             #
79             # $token->is_punct()
80             #
81             # Returns true if the current token represents a piece of punctuation, or
82             # something that separates values, clauses, or declarations from one another.
83             #
84             sub is_punct {
85 10     10 0 11310 return shift->isa(qw/T_BEGIN T_END T_COLON T_COMMA/);
86             }
87              
88             #
89             # $token->is_value()
90             #
91             # Returns true if the current token represents a meaningful value recorded in
92             # text, such as a mail transfer agent directive, a command to pass message to,
93             # a local or remote mailing address, or a file to append messages to.
94             #
95             sub is_value {
96 104     104 0 4778 return shift->isa(qw/T_DIRECTIVE T_COMMAND T_ADDRESS T_FILE/);
97             }
98              
99             =head1 DETERMINING MAIL DESTINATION TYPE
100              
101             A variety of methods are provided to allow one to infer the type of a mail
102             alias (parser token) returned.
103              
104             =over
105              
106             =item $destination->is_address()
107              
108             Returns true if the mail destination described by the current token is a local
109             part or fully qualified mail address.
110              
111             =cut
112              
113             sub is_address {
114 52     52 1 6108 return shift->isa('T_ADDRESS');
115             }
116              
117             =item $destination->is_directive()
118              
119             Returns true if the mail destination described by the current token is a
120             mail transfer agent directive.
121              
122             =cut
123              
124             sub is_directive {
125 16     16 1 4258 return shift->isa('T_DIRECTIVE');
126             }
127              
128             =item $destination->is_command()
129              
130             Returns true if the mail destination described by the current token is a
131             command to which mail messages should be piped.
132              
133             =cut
134              
135             sub is_command {
136 14     14 1 4168 return shift->isa('T_COMMAND');
137             }
138              
139             =item $destination->is_file()
140              
141             Returns true if the mail destination described by the current token is a file
142             to which mail messages should be appended.
143              
144             =back
145              
146             =cut
147              
148             sub is_file {
149 10     10 1 5940 return shift->isa('T_FILE');
150             }
151              
152             =head1 CONVERTING THE MAIL DESTINATION TO A STRING
153              
154             =over
155              
156             =item $destination->value()
157              
158             Returns a parsed and unescaped logical representation of the mail alias
159             destination that was originally parsed to yield the current token object.
160              
161             =cut
162              
163             sub value {
164 7     7 1 4167 return shift->{'value'};
165             }
166              
167             =item $destination->to_string()
168              
169             Returns a string representation of the mail alias destination that was
170             originally parsed to yield the current token object.
171              
172             =back
173              
174             =cut
175              
176             sub to_string {
177 10     10 1 5949 my ($self) = @_;
178              
179 10         84 my %SUBSTITUTIONS = (
180             "\r" => '\r',
181             "\n" => '\n',
182             "\t" => '\t',
183             '"' => '\"'
184             );
185              
186 10         23 my $value = $self->{'value'};
187              
188 10 100       45 if ($value) {
189 6         21 foreach my $search ( keys %SUBSTITUTIONS ) {
190 24         377 $value =~ s/$search/$SUBSTITUTIONS{$search}/g;
191             }
192             }
193              
194             #
195             # Since not every token type has a "value", per se, lazy evaluation is
196             # necessary to prevent a Perl runtime warning when evaluating the 'T_COMMENT'
197             # part of this hash when dealing with tokens that are anything other than a
198             # comment.
199             #
200             my %FORMATTERS = (
201 1     1   18 'T_COMMENT' => sub { "# $value" },
202 1     1   29 'T_COMMA' => sub { ',' },
203 1     1   13 'T_COLON' => sub { ':' },
204 1     1   12 'T_WHITESPACE' => sub { ' ' }
205 10         134 );
206              
207 10 100       908 return $FORMATTERS{ $self->{'type'} }->() if exists $FORMATTERS{ $self->{'type'} };
208              
209 6         9 my $ret;
210              
211 6 100       16 if ( $self->is_directive ) {
    100          
212 2         7 $ret = ":$self->{'name'}:$value";
213             }
214             elsif ( $self->is_command ) {
215 2         31 $ret = "|$value";
216             }
217             else {
218 2         4 $ret = $value;
219             }
220              
221             #
222             # If the data to be returned contains spaces, then wrap it with double quotes
223             # before returning it to the user.
224             #
225 6 100       61 $ret =~ s/^(.*)$/"$1"/ if $ret =~ /\s/;
226              
227 6         91 return $ret;
228             }
229              
230             #
231             # Mail::Alias::Reader::Token->tokenize_for_types($buf, @types)
232             #
233             # Transform the given text buffer, $buf, into a series of tokens, based on the
234             # rules passed in @types (defined near the top of this file). Returns an ARRAY
235             # of tokens that were matched based on the rules in @types versus the input
236             # buffer.
237             #
238             # As the token types are associated with their parsing rules, and are given in
239             # an ordered manner, proper precedence can be followed and ambiguity in lexing
240             # can be overcome.
241             #
242             # This method does not provide the main tokenizing interface; rather, it only
243             # facilitates for the easy access of a single pass of tokenizing, and is called
244             # by the Mail::Alias::Reader::Token->tokenize() method.
245             #
246             sub tokenize_for_types {
247 41     41 0 167 my ( $class, $buf, @types ) = @_;
248 41         55 my @tokens;
249              
250 41         165 match: while ($buf) {
251 203         312 foreach my $type (@types) {
252 1266 100       34442 next unless $buf =~ s/^$type->[1]//;
253              
254 203         1395 my $token = bless {
255             'type' => $type->[0],
256             }, $class;
257              
258 203 100       829 if ( $type->[0] eq 'T_DIRECTIVE' ) {
259 2         5 @{$token}{qw(name value)} = ( $1, $2 );
  2         21  
260             }
261             else {
262 201         859 $token->{'value'} = $1;
263             }
264              
265 203         301 push @tokens, $token;
266              
267 203         639 next match;
268             }
269             }
270              
271 41         140 return \@tokens;
272             }
273              
274             #
275             # Mail::Alias::Reader::Token->tokenize($buf)
276             #
277             # Returns an ARRAY of tokens parsed from the given text buffer.
278             #
279             # This method tokenizes in two stages; first, it performs a high-level sweep of
280             # any statements not inside double quotes, though while grabbing double-quoted
281             # statements and holding onto them for a later second pass. During this second
282             # tokenization pass, performed for each double-quoted statement found and in the
283             # order of first-stage tokenization, statements containing spaces are parsed.
284             #
285             # Since this method is intended to be used on a single line of input, a T_BEGIN
286             # and T_END token comes as the first and the last token returned, respectively.
287             #
288             sub tokenize {
289 38     38 0 7398 my ( $class, $buf ) = @_;
290              
291             #
292             # When parsing token data contained within double quotes, the following
293             # escape sequence patterns and substitutions are iterated over for each
294             # double quoted expression, performing unescaping where necessary.
295             #
296 38         287 my %WHITESPACE = (
297             'r' => "\r",
298             'n' => "\n",
299             't' => "\t"
300             );
301              
302             my @STRING_ESCAPE_SEQUENCES = (
303 2     2   17 [ qr/\\(0\d*)/ => sub { pack 'W', oct($1) } ],
304 1     1   7 [ qr/\\(x[0-9a-f]+)/ => sub { pack 'W', hex("0$1") } ],
305 3     3   14 [ qr/\\([rnt])/ => sub { $WHITESPACE{$1} } ],
306 38     2   978 [ qr/\\([^rnt])/ => sub { $1 } ]
  2         10  
307             );
308              
309             #
310             # Perform first stage tokenization on the input.
311             #
312 38         194 my $tokens = $class->tokenize_for_types( $buf, @TOKEN_TYPES );
313              
314 38         57 foreach my $token ( @{$tokens} ) {
  38         102  
315              
316             #
317             # Perform second stage tokenization on any T_STRING tokens found. As the aliases(5)
318             # format lacks a string literal type, a second pass is required to parse the quote
319             # delimited string out for a more specific type.
320             #
321 200 100       384 if ( $token->isa('T_STRING') ) {
322 3         23 $token->{'value'} =~ s/^"(.*)"$/$1/s;
323              
324             #
325             # Parse for any escape sequences that may be present.
326             #
327 3         7 foreach my $sequence (@STRING_ESCAPE_SEQUENCES) {
328 12         31 my ( $pattern, $subst ) = @{$sequence};
  12         21  
329              
330 12         64 $token->{'value'} =~ s/$pattern/$subst->()/seg;
  8         12  
331             }
332              
333             #
334             # Create a new token from the second pass parsing step for the string
335             # contents, copying the data directly into the existing token (so as to
336             # not lose the previous reference).
337             #
338 3         5 %{$token} = %{ $class->tokenize_for_types( $token->{'value'}, @TOKEN_STRING_TYPES )->[0] };
  3         21  
  3         15  
339             }
340             }
341              
342             return [
343 38         179 $class->new('T_BEGIN'),
344 38         173 @{$tokens},
345             $class->new('T_END')
346             ];
347             }
348              
349             1;
350              
351             __END__