| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# Copyright (C) 2004 Identity Commons. All Rights Reserved. |
|
2
|
|
|
|
|
|
|
# See LICENSE for licensing details |
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
# Author: Fen Labalme , |
|
5
|
|
|
|
|
|
|
# with a tip-of-the-cap to parse.py written by Gabe Wachob |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
# TODO: |
|
8
|
|
|
|
|
|
|
# fix FIXME sections |
|
9
|
|
|
|
|
|
|
# add UNICODE support |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
package XRI::Parse; |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our $VERSION = 0.1; |
|
14
|
|
|
|
|
|
|
|
|
15
|
6
|
|
|
6
|
|
100127
|
use Text::Balanced qw( extract_bracketed ); |
|
|
6
|
|
|
|
|
166321
|
|
|
|
6
|
|
|
|
|
709
|
|
|
16
|
6
|
|
|
6
|
|
5686
|
use URI::Escape; |
|
|
6
|
|
|
|
|
9437
|
|
|
|
6
|
|
|
|
|
7189
|
|
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
our @SEPARATORS = qw( / * : ); |
|
19
|
|
|
|
|
|
|
our @GCS_CHARS = qw( @ = + $ * ); |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
sub new { |
|
23
|
23
|
|
|
23
|
0
|
22568
|
my $self = shift; |
|
24
|
23
|
|
|
|
|
35
|
my $xri = shift; |
|
25
|
23
|
|
|
|
|
77
|
$xri =~ s/^xri://i; |
|
26
|
|
|
|
|
|
|
# $xri = stripComments( $xri ); |
|
27
|
23
|
|
|
|
|
108
|
my $this = { token=>undef, |
|
28
|
|
|
|
|
|
|
remainder=>undef, |
|
29
|
|
|
|
|
|
|
authority=>undef, |
|
30
|
|
|
|
|
|
|
xri=>$xri }; |
|
31
|
23
|
|
|
|
|
85
|
bless $this, $self; |
|
32
|
|
|
|
|
|
|
} |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# escapes an XRI (including relative XRIs) for inclusion in an HTTP request |
|
36
|
|
|
|
|
|
|
# FIXME: currently handles xrefs identically to sub-segments |
|
37
|
|
|
|
|
|
|
# |
|
38
|
|
|
|
|
|
|
sub escapeURI { |
|
39
|
6
|
|
|
6
|
0
|
19
|
my $this = shift; |
|
40
|
6
|
|
|
|
|
7
|
my $result; |
|
41
|
6
|
|
|
|
|
16
|
while (my $seg = $this->nextSegment) { |
|
42
|
8
|
|
|
|
|
9
|
$result = shift @$seg; # always one of qw( @ // /. /: ) |
|
43
|
8
|
|
|
|
|
14
|
foreach my $subseg ( @$seg ) { |
|
44
|
15
|
100
|
|
|
|
471
|
if ($subseg =~ m|^\(|) { # xref |
|
45
|
1
|
|
|
|
|
4
|
$result .= uri_escape($subseg, "^A-Za-z0-9\\\-\_\.\!\~\*\'"); |
|
46
|
|
|
|
|
|
|
} |
|
47
|
|
|
|
|
|
|
else { # sub-segment |
|
48
|
14
|
|
|
|
|
36
|
$result .= uri_escape($subseg, "^A-Za-z0-9\\\-\_\.\!\~\*\'"); |
|
49
|
|
|
|
|
|
|
} |
|
50
|
|
|
|
|
|
|
} |
|
51
|
|
|
|
|
|
|
} |
|
52
|
6
|
|
|
|
|
20
|
return $result; |
|
53
|
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# if an absolute-xri, emit the array ref [ [ firstSegment ], local-path ] |
|
56
|
|
|
|
|
|
|
# if a relative-xri, emit the local-path or relative-path as a string |
|
57
|
|
|
|
|
|
|
# |
|
58
|
|
|
|
|
|
|
sub splitAuthLocal { |
|
59
|
3
|
|
|
3
|
0
|
5
|
my $this = shift; |
|
60
|
3
|
|
|
|
|
7
|
my $firstRef = $this->nextSegment; |
|
61
|
|
|
|
|
|
|
|
|
62
|
3
|
100
|
|
|
|
8
|
if ( defined $this->{'authority'} ) { |
|
63
|
2
|
|
|
|
|
3
|
my @auth = (); |
|
64
|
|
|
|
|
|
|
# |
|
65
|
|
|
|
|
|
|
# lowercase the authority segments |
|
66
|
|
|
|
|
|
|
# |
|
67
|
2
|
|
|
|
|
5
|
foreach my $seg (@$firstRef) { |
|
68
|
5
|
|
|
|
|
13
|
push @auth, lc $seg; |
|
69
|
|
|
|
|
|
|
} |
|
70
|
2
|
|
|
|
|
11
|
return [ \@auth, $this->{remainder} ]; |
|
71
|
|
|
|
|
|
|
} |
|
72
|
|
|
|
|
|
|
else { |
|
73
|
1
|
|
|
|
|
6
|
return $this->{xri}; |
|
74
|
|
|
|
|
|
|
} |
|
75
|
|
|
|
|
|
|
} |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# Emits a series of segments, each of which is a |
|
78
|
|
|
|
|
|
|
# list of (separator, part, separator...) tuples |
|
79
|
|
|
|
|
|
|
# Segments are separated by forward slash '/' |
|
80
|
|
|
|
|
|
|
# Emits (gcs-char, part, separator, part...) |
|
81
|
|
|
|
|
|
|
# for the first segment if using a gcs-char |
|
82
|
|
|
|
|
|
|
# Separator is one of "/.", "/:", ".", or ":" |
|
83
|
|
|
|
|
|
|
# |
|
84
|
|
|
|
|
|
|
sub nextSegment { |
|
85
|
31
|
|
|
31
|
0
|
302
|
my $this = shift; |
|
86
|
31
|
|
|
|
|
33
|
my ( $token, @segment ); |
|
87
|
|
|
|
|
|
|
|
|
88
|
31
|
100
|
|
|
|
90
|
if (defined $this->{token}) { |
|
89
|
8
|
|
|
|
|
16
|
@segment = ( $this->{token} ); |
|
90
|
8
|
|
|
|
|
13
|
undef $this->{token}; |
|
91
|
|
|
|
|
|
|
} |
|
92
|
|
|
|
|
|
|
else { |
|
93
|
23
|
100
|
|
|
|
51
|
if ( $token = $this->nextToken ) { |
|
94
|
13
|
|
|
|
|
36
|
@segment = ( $token ); |
|
95
|
|
|
|
|
|
|
} |
|
96
|
|
|
|
|
|
|
else { |
|
97
|
10
|
|
|
|
|
36
|
return undef; |
|
98
|
|
|
|
|
|
|
} |
|
99
|
|
|
|
|
|
|
} |
|
100
|
21
|
|
100
|
|
|
48
|
while (( $token = $this->nextToken ) && $token !~ m|^\/| ) { |
|
101
|
45
|
|
|
|
|
103
|
push @segment, $token; |
|
102
|
|
|
|
|
|
|
} |
|
103
|
21
|
100
|
|
|
|
48
|
$this->{token} = $token if $token; |
|
104
|
21
|
|
|
|
|
57
|
return \@segment; |
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
sub getCrossReference { |
|
108
|
169
|
|
|
169
|
0
|
169
|
my $this = shift; |
|
109
|
169
|
|
|
|
|
179
|
my $xri = shift; |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# FIXME: what to do if: 'xri:(!comment1).(!comment2)' -- (is this legal?) |
|
112
|
|
|
|
|
|
|
# FIXME: raise error if unbalanced parens |
|
113
|
169
|
|
|
|
|
402
|
while (($this->{remainder} = $xri) =~ m|^\(|) { # cross-reference |
|
114
|
8
|
|
|
|
|
12
|
my $xref; |
|
115
|
8
|
|
|
|
|
37
|
($xref, $xri) = extract_bracketed($xri, '()'); |
|
116
|
8
|
100
|
|
|
|
1313
|
next if $xref =~ m|^\(\!|; # skip leading comments |
|
117
|
6
|
|
|
|
|
13
|
$this->{remainder} = $xri; |
|
118
|
6
|
|
|
|
|
30
|
return $xref; |
|
119
|
|
|
|
|
|
|
} |
|
120
|
163
|
|
|
|
|
353
|
return undef; |
|
121
|
|
|
|
|
|
|
} |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
# return initial qw( @ = * // ) or undef |
|
124
|
|
|
|
|
|
|
# created to better strip leading comments |
|
125
|
|
|
|
|
|
|
# perhaps comment stripping should occur on object instantiation? |
|
126
|
|
|
|
|
|
|
# |
|
127
|
|
|
|
|
|
|
sub getAuthority { |
|
128
|
23
|
|
|
23
|
0
|
32
|
my $this = shift; |
|
129
|
23
|
|
|
|
|
32
|
my $xri = $this->{xri}; |
|
130
|
23
|
|
|
|
|
24
|
my $xref; |
|
131
|
|
|
|
|
|
|
|
|
132
|
23
|
100
|
|
|
|
46
|
if ( $xref = $this->getCrossReference( $xri )) { |
|
133
|
2
|
|
|
|
|
5
|
$this->{'authority'} = $xref; |
|
134
|
2
|
|
|
|
|
10
|
return $xref; |
|
135
|
|
|
|
|
|
|
} |
|
136
|
21
|
100
|
|
|
|
126
|
if ($this->{remainder} =~ m|^\/\/(.*)$|) { # initial '//' |
|
137
|
9
|
|
|
|
|
13
|
$this->{'authority'} = '//'; |
|
138
|
9
|
|
|
|
|
27
|
$this->{remainder} = $1; |
|
139
|
9
|
|
|
|
|
45
|
return '//'; |
|
140
|
|
|
|
|
|
|
} |
|
141
|
12
|
100
|
|
|
|
51
|
if ($this->{remainder} =~ m|^([\@\=\*])(.*)$|) { # gcs-char |
|
142
|
4
|
|
|
|
|
11
|
my ($gcs, $rem) = ($1, $2); |
|
143
|
4
|
50
|
|
|
|
17
|
$this->{remainder} = (($rem =~ m|^[\/\*\:]|)?'':'*') . $rem; |
|
144
|
4
|
|
|
|
|
30
|
$this->{'authority'} = $gcs; |
|
145
|
4
|
|
|
|
|
19
|
return $gcs; |
|
146
|
|
|
|
|
|
|
} |
|
147
|
8
|
|
|
|
|
15
|
$this->{remainder} = $xri; |
|
148
|
8
|
|
|
|
|
34
|
return; |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
# Generates a list of (separator, string) pairs |
|
153
|
|
|
|
|
|
|
# Ignores the leading xri: |
|
154
|
|
|
|
|
|
|
# If the first two characters (ignoring the xri:) are //, returns this *once* as the |
|
155
|
|
|
|
|
|
|
# first token, as the // is only legal at the very beginning |
|
156
|
|
|
|
|
|
|
# Everything within () is treated as a single token |
|
157
|
|
|
|
|
|
|
# Yields a series of strings, one of the characters in SEPARATORS, or |
|
158
|
|
|
|
|
|
|
# a string of characters (a sub-segment) |
|
159
|
|
|
|
|
|
|
# FIXME: fix handling of '*' |
|
160
|
|
|
|
|
|
|
# FIXME: add handling of '&' |
|
161
|
|
|
|
|
|
|
# FIXME: strip comments: including multiple, before or after GCS |
|
162
|
|
|
|
|
|
|
# |
|
163
|
|
|
|
|
|
|
sub nextToken { |
|
164
|
161
|
|
|
161
|
0
|
371
|
my $this = shift; |
|
165
|
161
|
|
|
|
|
144
|
my $auth; |
|
166
|
|
|
|
|
|
|
|
|
167
|
161
|
100
|
100
|
|
|
415
|
if (!defined $this->{remainder} && ($auth = $this->getAuthority)) { |
|
168
|
15
|
|
|
|
|
42
|
return $auth; |
|
169
|
|
|
|
|
|
|
} |
|
170
|
146
|
100
|
|
|
|
312
|
return $xref if $xref = $this->getCrossReference( $this->{remainder} ); |
|
171
|
|
|
|
|
|
|
|
|
172
|
142
|
100
|
|
|
|
452
|
if ($this->{remainder} =~ m|^([\/\*\:])(.*)$|) { # initial separators |
|
173
|
57
|
|
|
|
|
131
|
my ($sep, $rem) = ($1, $2); |
|
174
|
57
|
100
|
|
|
|
119
|
if ($sep eq '/') { |
|
175
|
30
|
100
|
|
|
|
73
|
if ($rem =~ m|^([\*\:])(.*)$|) { |
|
176
|
1
|
|
|
|
|
2
|
$sep .= $1; # '/.' or '/:' |
|
177
|
1
|
|
|
|
|
3
|
$rem = $2; |
|
178
|
|
|
|
|
|
|
} |
|
179
|
|
|
|
|
|
|
else { |
|
180
|
29
|
|
|
|
|
41
|
$sep = '/*'; |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
} |
|
183
|
57
|
|
|
|
|
82
|
$this->{remainder} = $rem; |
|
184
|
57
|
|
|
|
|
257
|
return $sep; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
85
|
100
|
|
|
|
785
|
if ($this->{remainder} =~ m|^([^\/\*\:]+)(.*)$|) { # sub-segment |
|
187
|
55
|
|
|
|
|
103
|
$this->{remainder} = $2; |
|
188
|
55
|
|
|
|
|
237
|
return $1; |
|
189
|
|
|
|
|
|
|
} |
|
190
|
30
|
|
|
|
|
76
|
return undef; |
|
191
|
|
|
|
|
|
|
} |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
1; |
|
194
|
|
|
|
|
|
|
__END__ |