File Coverage

deps/libgit2/deps/pcre/pcre_xclass.c
Criterion Covered Total %
statement 0 20 0.0
branch 0 24 0.0
condition n/a
subroutine n/a
pod n/a
total 0 44 0.0


line stmt bran cond sub pod time code
1             /*************************************************
2             * Perl-Compatible Regular Expressions *
3             *************************************************/
4              
5             /* PCRE is a library of functions to support regular expressions whose syntax
6             and semantics are as close as possible to those of the Perl 5 language.
7              
8             Written by Philip Hazel
9             Copyright (c) 1997-2013 University of Cambridge
10              
11             -----------------------------------------------------------------------------
12             Redistribution and use in source and binary forms, with or without
13             modification, are permitted provided that the following conditions are met:
14              
15             * Redistributions of source code must retain the above copyright notice,
16             this list of conditions and the following disclaimer.
17              
18             * Redistributions in binary form must reproduce the above copyright
19             notice, this list of conditions and the following disclaimer in the
20             documentation and/or other materials provided with the distribution.
21              
22             * Neither the name of the University of Cambridge nor the names of its
23             contributors may be used to endorse or promote products derived from
24             this software without specific prior written permission.
25              
26             THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27             AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28             IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29             ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30             LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31             CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32             SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33             INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34             CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35             ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36             POSSIBILITY OF SUCH DAMAGE.
37             -----------------------------------------------------------------------------
38             */
39              
40              
41             /* This module contains an internal function that is used to match an extended
42             class. It is used by both pcre_exec() and pcre_def_exec(). */
43              
44              
45             #ifdef HAVE_CONFIG_H
46             #include "config.h"
47             #endif
48              
49             #include "pcre_internal.h"
50              
51              
52             /*************************************************
53             * Match character against an XCLASS *
54             *************************************************/
55              
56             /* This function is called to match a character against an extended class that
57             might contain values > 255 and/or Unicode properties.
58              
59             Arguments:
60             c the character
61             data points to the flag byte of the XCLASS data
62              
63             Returns: TRUE if character matches, else FALSE
64             */
65              
66             BOOL
67 0           PRIV(xclass)(pcre_uint32 c, const pcre_uchar *data, BOOL utf)
68             {
69             pcre_uchar t;
70 0           BOOL negated = (*data & XCL_NOT) != 0;
71              
72             (void)utf;
73             #ifdef COMPILE_PCRE8
74             /* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
75 0           utf = TRUE;
76             #endif
77              
78             /* Character values < 256 are matched against a bitmap, if one is present. If
79             not, we still carry on, because there may be ranges that start below 256 in the
80             additional data. */
81              
82 0 0         if (c < 256)
83             {
84 0 0         if ((*data & XCL_HASPROP) == 0)
85             {
86 0 0         if ((*data & XCL_MAP) == 0) return negated;
87 0           return (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0;
88             }
89 0 0         if ((*data & XCL_MAP) != 0 &&
    0          
90 0           (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0)
91 0           return !negated; /* char found */
92             }
93              
94             /* First skip the bit map if present. Then match against the list of Unicode
95             properties or large chars or ranges that end with a large char. We won't ever
96             encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
97              
98 0 0         if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(pcre_uchar);
99              
100 0 0         while ((t = *data++) != XCL_END)
101             {
102             pcre_uint32 x, y;
103 0 0         if (t == XCL_SINGLE)
104             {
105             #ifdef SUPPORT_UTF
106             if (utf)
107             {
108             GETCHARINC(x, data); /* macro generates multiple statements */
109             }
110             else
111             #endif
112 0           x = *data++;
113 0 0         if (c == x) return !negated;
114             }
115 0 0         else if (t == XCL_RANGE)
116             {
117             #ifdef SUPPORT_UTF
118             if (utf)
119             {
120             GETCHARINC(x, data); /* macro generates multiple statements */
121             GETCHARINC(y, data); /* macro generates multiple statements */
122             }
123             else
124             #endif
125             {
126 0           x = *data++;
127 0           y = *data++;
128             }
129 0 0         if (c >= x && c <= y) return !negated;
    0          
130             }
131              
132             #ifdef SUPPORT_UCP
133             else /* XCL_PROP & XCL_NOTPROP */
134             {
135             const ucd_record *prop = GET_UCD(c);
136             BOOL isprop = t == XCL_PROP;
137              
138             switch(*data)
139             {
140             case PT_ANY:
141             if (isprop) return !negated;
142             break;
143              
144             case PT_LAMP:
145             if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
146             prop->chartype == ucp_Lt) == isprop) return !negated;
147             break;
148              
149             case PT_GC:
150             if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop)
151             return !negated;
152             break;
153              
154             case PT_PC:
155             if ((data[1] == prop->chartype) == isprop) return !negated;
156             break;
157              
158             case PT_SC:
159             if ((data[1] == prop->script) == isprop) return !negated;
160             break;
161              
162             case PT_ALNUM:
163             if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
164             PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop)
165             return !negated;
166             break;
167              
168             /* Perl space used to exclude VT, but from Perl 5.18 it is included,
169             which means that Perl space and POSIX space are now identical. PCRE
170             was changed at release 8.34. */
171              
172             case PT_SPACE: /* Perl space */
173             case PT_PXSPACE: /* POSIX space */
174             switch(c)
175             {
176             HSPACE_CASES:
177             VSPACE_CASES:
178             if (isprop) return !negated;
179             break;
180              
181             default:
182             if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop)
183             return !negated;
184             break;
185             }
186             break;
187              
188             case PT_WORD:
189             if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
190             PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
191             == isprop)
192             return !negated;
193             break;
194              
195             case PT_UCNC:
196             if (c < 0xa0)
197             {
198             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
199             c == CHAR_GRAVE_ACCENT) == isprop)
200             return !negated;
201             }
202             else
203             {
204             if ((c < 0xd800 || c > 0xdfff) == isprop)
205             return !negated;
206             }
207             break;
208              
209             /* The following three properties can occur only in an XCLASS, as there
210             is no \p or \P coding for them. */
211              
212             /* Graphic character. Implement this as not Z (space or separator) and
213             not C (other), except for Cf (format) with a few exceptions. This seems
214             to be what Perl does. The exceptional characters are:
215              
216             U+061C Arabic Letter Mark
217             U+180E Mongolian Vowel Separator
218             U+2066 - U+2069 Various "isolate"s
219             */
220              
221             case PT_PXGRAPH:
222             if ((PRIV(ucp_gentype)[prop->chartype] != ucp_Z &&
223             (PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
224             (prop->chartype == ucp_Cf &&
225             c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069))
226             )) == isprop)
227             return !negated;
228             break;
229              
230             /* Printable character: same as graphic, with the addition of Zs, i.e.
231             not Zl and not Zp, and U+180E. */
232              
233             case PT_PXPRINT:
234             if ((prop->chartype != ucp_Zl &&
235             prop->chartype != ucp_Zp &&
236             (PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
237             (prop->chartype == ucp_Cf &&
238             c != 0x061c && (c < 0x2066 || c > 0x2069))
239             )) == isprop)
240             return !negated;
241             break;
242              
243             /* Punctuation: all Unicode punctuation, plus ASCII characters that
244             Unicode treats as symbols rather than punctuation, for Perl
245             compatibility (these are $+<=>^`|~). */
246              
247             case PT_PXPUNCT:
248             if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
249             (c < 128 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
250             return !negated;
251             break;
252              
253             /* This should never occur, but compilers may mutter if there is no
254             default. */
255              
256             default:
257             return FALSE;
258             }
259              
260             data += 2;
261             }
262             #endif /* SUPPORT_UCP */
263             }
264              
265 0           return negated; /* char did not match */
266             }
267              
268             /* End of pcre_xclass.c */