File Coverage

blib/lib/C/Sharp/Tokener.pm
Criterion Covered Total %
statement 34 62 54.8
branch 30 54 55.5
condition 6 15 40.0
subroutine 2 8 25.0
pod 0 8 0.0
total 72 147 48.9


line stmt bran cond sub pod time code
1             package C::Sharp::Tokener;
2             our $ref_name;
3             our ($ref_line, $line, $col, $current_token, $handle_get_set) = (1,1,1,0,0);
4              
5             our $VERSION = "0.08062001"; # MCS version number
6              
7             =head1 NAME
8              
9             C::Sharp::Tokener - Tokeniser for C#
10              
11             =head1 SYNOPSIS
12              
13             use C::Sharp::Tokener;
14             do {
15             ($token, $ttype, $remainder) = C::Sharp::Tokener::tokener($input);
16             } while ($input = $remainder)
17              
18             use C::Sharp::Parser;
19             $parser = new C::Sharp::Parser;
20             $parser->YYParse(yylex => \&C::Sharp::Tokener::yy_tokener);
21              
22             =head1 DESCRIPTION
23              
24             C# is the new .NET programming language from Microsoft; the Mono project
25             is an Open Source implementation of .NET. This code, based on the Mono
26             project, implements a tokeniser for C#. Combined with
27             C it can be used to create a compiler for C#.
28              
29             =head1 SEE ALSO
30              
31             L
32              
33             =head1 AUTHOR
34              
35             Simon Cozens (simon@cpan.org)
36             Based very, very heavily on code by Miguel de Icaza (miguel@gnu.org)
37              
38             =cut
39              
40             my %keywords;
41             my $number;
42             my $putback_char = -1;
43             my $val;
44             my $error_details;
45              
46             sub location {
47 0   0 0 0 0 return "Line: $line Col: $col\nVirtLine: $ref_line Token: $current_token ".
48             ($current_token eq "ERROR" && "Detail: $error_details");
49             }
50              
51             sub properties {
52 0 0   0 0 0 defined $_[0] ? $handle_get_set = $_[0] : $handle_get_set;
53             }
54              
55 0     0 0 0 sub error { $error_details }
56 0     0 0 0 sub Line { $line }
57 0     0 0 0 sub Col { $col }
58              
59             $keywords{lc($_)}=$_ for qw{
60             ABSTRACT AS ADD BASE BOOL BREAK BYTE CASE CATCH CHAR CHECKED CLASS CONST CONTINUE DECIMAL DEFAULT DELEGATE DO DOUBLE
61             ELSE ENUM EVENT EXPLICIT EXTERN FALSE FINALLY FIXED FLOAT FOR FOREACH GOTO IF IMPLICIT IN INT INTERFACE INTERNAL IS
62             LOCK LONG NAMESPACE NEW NULL OBJECT OPERATOR OUT OVERRIDE PARAMS PRIVATE PROTECTED PUBLIC READONLY REF RETURN REMOVE
63             SBYTE SEALED SHORT SIZEOF STATIC STRING STRUCT SWITCH THIS THROW TRUE TRY TYPEOF UINT ULONG UNCHECKED UNSAFE USHORT
64             USING VIRTUAL VOID WHILE
65             };
66              
67             sub is_keyword {
68 36 50 33 36 0 146 return 0 if (($_ eq "get" or $_ eq "set") and not $handle_get_set);
      33        
69 36         217 return exists $keywords{$_[0]};
70             }
71              
72             sub yy_tokener {
73 0     0 0 0 my $self = shift; # This is a Parse::Yapp object
74 0         0 my ($token,$ttype);
75 0         0 ($token, $ttype, $self->YYData->{INPUT}) = tokener($self->YYData->{INPUT});
76 0         0 return ($ttype, $token);
77             }
78              
79             sub tokener {
80 84     84 0 3987 $_ = shift;
81 84         89 my ($allow_keyword) = 0;
82 84         180 while ($_) {
83 86         288 s/^\s+//sm;
84 86 100       198 if (/^[a-zA-Z_]/) { # Check C# standard - may be Unicode aware
85 36         104 s/(.\w*)//;
86 36 100 66     55 return ($1, "IDENTIFIER", $_) if !is_keyword($1) or $allow_keyword;
87 10         45 return ($1, uc($1), $_);
88             }
89 50 100 66     203 if (/^\.\d/ || /^\d/) {
90 1         2 my $real = 0;
91 1         2 my $val = 0;
92 1 50       9 if (s/^0[Xx]([A-Fa-f0-9]+)//) {
    50          
    50          
93 0         0 $val = hex($1);
94 0 0       0 die "Oops: [UL] not handled yet." if /^[ULul]/;
95 0         0 return ($1, "LITERAL_INTEGER", $_);
96             } elsif (s/^(\d+)(\.\D)/$2/) {
97 0         0 return ($1, "LITERAL_INTEGER", $_);
98             } elsif (s/^(\d+\.\d+)//) {
99 0         0 $real =1;
100 0         0 $val = $1;
101             } else {
102 1         4 s/(\d+)//; $val = $1;
  1         2  
103             }
104 1 50       4 $val .= $1 if (s/^([eE][+-]\d+)//);
105 1 50       3 if (s/^[fF]//) { return ($val, "LITERAL_FLOAT", $_) }
  0         0  
106 1 50       3 if (s/^[dD]//) { return ($val, "LITERAL_DOUBLE", $_) }
  0         0  
107 1 50       3 if (s/^[mM]//) { return ($val, "LITERAL_DECIMAL", $_) }
  0         0  
108 1 50       4 if (!$real) {
109 1 50       4 die "Oops: [UL] not handled yet." if /^[ULul]/;
110 1         4 return ($val, "LITERAL_INTEGER", $_);
111             }
112 0         0 die "Something went wrong with value $val";
113             }
114 49 100       128 return (".", "DOT", $_) if s/^\.//;
115 44 100       77 s[^//.*][] and next;
116 43 100       97 s[^/\*.*?\*/][]ms and next;
117             # Handle preprocessor commands here. (Honest)
118 42 100       279 return $1, $1, $_ if s#^(
119             [{}\[\]\(\),:;~\?]
120             |
121             \+[\+=]?
122             |
123             -[\-=>]?
124             |
125             [!=/%^]=?
126             |
127             ==?
128             |
129             &[&=]?
130             |
131             \|[\|=]?
132             |
133             <
134             |
135             >>?=?
136             )
137             ##x; # Mighty.
138 2 100       15 if (s/^\"([^\\\"]*(?:\\.[^\\\"]*)*)\"//) { # Thank you, Text::Balanced
139 1         2 my $string = $1;
140             # $string =~ s/\\(.)/"\\$1"/eeg;
141 1         4 return ($string, "LITERAL_STRING", $_);
142             }
143 1 50       3 die "Urgh" if /^\"/;
144 1 50       3 if (s/^'//) {
145 0 0       0 die "CS1011: Empty character literal" if /^'/;
146 0         0 my $char;
147 0 0       0 if (s/^\\(.)//) { $char = eval qq{"\\$1"}; }
  0         0  
  0         0  
148 0         0 else {s/(.)//; $char = $1 };
149 0 0       0 die "CS1012: Too many characters in character literal" if not /^'/;
150 0         0 return ($char, "LITERAL_CHARACTER", $_);
151             }
152 1 50       4 ($allow_keyword = 1), next if s/^@//;
153 1 50       8 return ("","","") unless $_;
154 0           die "Unrecognised input character: ".substr($_,0,1);
155             }
156             }
157              
158             1;