File Coverage

blib/lib/XRI/Parse.pm
Criterion Covered Total %
statement 84 84 100.0
branch 31 32 96.8
condition 6 6 100.0
subroutine 9 9 100.0
pod 0 7 0.0
total 130 138 94.2


line stmt bran cond sub pod time code
1             # Copyright (C) 2004 Identity Commons. All Rights Reserved.
2             # See LICENSE for licensing details
3              
4             # Author: Fen Labalme ,
5             # with a tip-of-the-cap to parse.py written by Gabe Wachob
6              
7             # TODO:
8             # fix FIXME sections
9             # add UNICODE support
10              
11             package XRI::Parse;
12              
13             our $VERSION = 0.1;
14              
15 6     6   100127 use Text::Balanced qw( extract_bracketed );
  6         166321  
  6         709  
16 6     6   5686 use URI::Escape;
  6         9437  
  6         7189  
17              
18             our @SEPARATORS = qw( / * : );
19             our @GCS_CHARS = qw( @ = + $ * );
20              
21              
22             sub new {
23 23     23 0 22568 my $self = shift;
24 23         35 my $xri = shift;
25 23         77 $xri =~ s/^xri://i;
26             # $xri = stripComments( $xri );
27 23         108 my $this = { token=>undef,
28             remainder=>undef,
29             authority=>undef,
30             xri=>$xri };
31 23         85 bless $this, $self;
32             }
33              
34              
35             # escapes an XRI (including relative XRIs) for inclusion in an HTTP request
36             # FIXME: currently handles xrefs identically to sub-segments
37             #
38             sub escapeURI {
39 6     6 0 19 my $this = shift;
40 6         7 my $result;
41 6         16 while (my $seg = $this->nextSegment) {
42 8         9 $result = shift @$seg; # always one of qw( @ // /. /: )
43 8         14 foreach my $subseg ( @$seg ) {
44 15 100       471 if ($subseg =~ m|^\(|) { # xref
45 1         4 $result .= uri_escape($subseg, "^A-Za-z0-9\\\-\_\.\!\~\*\'");
46             }
47             else { # sub-segment
48 14         36 $result .= uri_escape($subseg, "^A-Za-z0-9\\\-\_\.\!\~\*\'");
49             }
50             }
51             }
52 6         20 return $result;
53             }
54              
55             # if an absolute-xri, emit the array ref [ [ firstSegment ], local-path ]
56             # if a relative-xri, emit the local-path or relative-path as a string
57             #
58             sub splitAuthLocal {
59 3     3 0 5 my $this = shift;
60 3         7 my $firstRef = $this->nextSegment;
61              
62 3 100       8 if ( defined $this->{'authority'} ) {
63 2         3 my @auth = ();
64             #
65             # lowercase the authority segments
66             #
67 2         5 foreach my $seg (@$firstRef) {
68 5         13 push @auth, lc $seg;
69             }
70 2         11 return [ \@auth, $this->{remainder} ];
71             }
72             else {
73 1         6 return $this->{xri};
74             }
75             }
76              
77             # Emits a series of segments, each of which is a
78             # list of (separator, part, separator...) tuples
79             # Segments are separated by forward slash '/'
80             # Emits (gcs-char, part, separator, part...)
81             # for the first segment if using a gcs-char
82             # Separator is one of "/.", "/:", ".", or ":"
83             #
84             sub nextSegment {
85 31     31 0 302 my $this = shift;
86 31         33 my ( $token, @segment );
87              
88 31 100       90 if (defined $this->{token}) {
89 8         16 @segment = ( $this->{token} );
90 8         13 undef $this->{token};
91             }
92             else {
93 23 100       51 if ( $token = $this->nextToken ) {
94 13         36 @segment = ( $token );
95             }
96             else {
97 10         36 return undef;
98             }
99             }
100 21   100     48 while (( $token = $this->nextToken ) && $token !~ m|^\/| ) {
101 45         103 push @segment, $token;
102             }
103 21 100       48 $this->{token} = $token if $token;
104 21         57 return \@segment;
105             }
106              
107             sub getCrossReference {
108 169     169 0 169 my $this = shift;
109 169         179 my $xri = shift;
110              
111             # FIXME: what to do if: 'xri:(!comment1).(!comment2)' -- (is this legal?)
112             # FIXME: raise error if unbalanced parens
113 169         402 while (($this->{remainder} = $xri) =~ m|^\(|) { # cross-reference
114 8         12 my $xref;
115 8         37 ($xref, $xri) = extract_bracketed($xri, '()');
116 8 100       1313 next if $xref =~ m|^\(\!|; # skip leading comments
117 6         13 $this->{remainder} = $xri;
118 6         30 return $xref;
119             }
120 163         353 return undef;
121             }
122              
123             # return initial qw( @ = * // ) or undef
124             # created to better strip leading comments
125             # perhaps comment stripping should occur on object instantiation?
126             #
127             sub getAuthority {
128 23     23 0 32 my $this = shift;
129 23         32 my $xri = $this->{xri};
130 23         24 my $xref;
131              
132 23 100       46 if ( $xref = $this->getCrossReference( $xri )) {
133 2         5 $this->{'authority'} = $xref;
134 2         10 return $xref;
135             }
136 21 100       126 if ($this->{remainder} =~ m|^\/\/(.*)$|) { # initial '//'
137 9         13 $this->{'authority'} = '//';
138 9         27 $this->{remainder} = $1;
139 9         45 return '//';
140             }
141 12 100       51 if ($this->{remainder} =~ m|^([\@\=\*])(.*)$|) { # gcs-char
142 4         11 my ($gcs, $rem) = ($1, $2);
143 4 50       17 $this->{remainder} = (($rem =~ m|^[\/\*\:]|)?'':'*') . $rem;
144 4         30 $this->{'authority'} = $gcs;
145 4         19 return $gcs;
146             }
147 8         15 $this->{remainder} = $xri;
148 8         34 return;
149             }
150            
151              
152             # Generates a list of (separator, string) pairs
153             # Ignores the leading xri:
154             # If the first two characters (ignoring the xri:) are //, returns this *once* as the
155             # first token, as the // is only legal at the very beginning
156             # Everything within () is treated as a single token
157             # Yields a series of strings, one of the characters in SEPARATORS, or
158             # a string of characters (a sub-segment)
159             # FIXME: fix handling of '*'
160             # FIXME: add handling of '&'
161             # FIXME: strip comments: including multiple, before or after GCS
162             #
163             sub nextToken {
164 161     161 0 371 my $this = shift;
165 161         144 my $auth;
166              
167 161 100 100     415 if (!defined $this->{remainder} && ($auth = $this->getAuthority)) {
168 15         42 return $auth;
169             }
170 146 100       312 return $xref if $xref = $this->getCrossReference( $this->{remainder} );
171              
172 142 100       452 if ($this->{remainder} =~ m|^([\/\*\:])(.*)$|) { # initial separators
173 57         131 my ($sep, $rem) = ($1, $2);
174 57 100       119 if ($sep eq '/') {
175 30 100       73 if ($rem =~ m|^([\*\:])(.*)$|) {
176 1         2 $sep .= $1; # '/.' or '/:'
177 1         3 $rem = $2;
178             }
179             else {
180 29         41 $sep = '/*';
181             }
182             }
183 57         82 $this->{remainder} = $rem;
184 57         257 return $sep;
185             }
186 85 100       785 if ($this->{remainder} =~ m|^([^\/\*\:]+)(.*)$|) { # sub-segment
187 55         103 $this->{remainder} = $2;
188 55         237 return $1;
189             }
190 30         76 return undef;
191             }
192              
193             1;
194             __END__