| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
/* |
|
2
|
|
|
|
|
|
|
** 2002 April 25 |
|
3
|
|
|
|
|
|
|
** |
|
4
|
|
|
|
|
|
|
** The author disclaims copyright to this source code. In place of |
|
5
|
|
|
|
|
|
|
** a legal notice, here is a blessing: |
|
6
|
|
|
|
|
|
|
** |
|
7
|
|
|
|
|
|
|
** May you do good and not evil. |
|
8
|
|
|
|
|
|
|
** May you find forgiveness for yourself and forgive others. |
|
9
|
|
|
|
|
|
|
** May you share freely, never taking more than you give. |
|
10
|
|
|
|
|
|
|
** |
|
11
|
|
|
|
|
|
|
************************************************************************* |
|
12
|
|
|
|
|
|
|
** This file contains helper routines used to translate binary data into |
|
13
|
|
|
|
|
|
|
** a null-terminated string (suitable for use in SQLite) and back again. |
|
14
|
|
|
|
|
|
|
** These are convenience routines for use by people who want to store binary |
|
15
|
|
|
|
|
|
|
** data in an SQLite database. The code in this file is not used by any other |
|
16
|
|
|
|
|
|
|
** part of the SQLite library. |
|
17
|
|
|
|
|
|
|
** |
|
18
|
|
|
|
|
|
|
** $Id: encode.c,v 1.1.1.1 2004/08/08 15:03:57 matt Exp $ |
|
19
|
|
|
|
|
|
|
*/ |
|
20
|
|
|
|
|
|
|
#include |
|
21
|
|
|
|
|
|
|
#include |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
/* |
|
24
|
|
|
|
|
|
|
** How This Encoder Works |
|
25
|
|
|
|
|
|
|
** |
|
26
|
|
|
|
|
|
|
** The output is allowed to contain any character except 0x27 (') and |
|
27
|
|
|
|
|
|
|
** 0x00. This is accomplished by using an escape character to encode |
|
28
|
|
|
|
|
|
|
** 0x27 and 0x00 as a two-byte sequence. The escape character is always |
|
29
|
|
|
|
|
|
|
** 0x01. An 0x00 is encoded as the two byte sequence 0x01 0x01. The |
|
30
|
|
|
|
|
|
|
** 0x27 character is encoded as the two byte sequence 0x01 0x28. Finally, |
|
31
|
|
|
|
|
|
|
** the escape character itself is encoded as the two-character sequence |
|
32
|
|
|
|
|
|
|
** 0x01 0x02. |
|
33
|
|
|
|
|
|
|
** |
|
34
|
|
|
|
|
|
|
** To summarize, the encoder works by using an escape sequences as follows: |
|
35
|
|
|
|
|
|
|
** |
|
36
|
|
|
|
|
|
|
** 0x00 -> 0x01 0x01 |
|
37
|
|
|
|
|
|
|
** 0x01 -> 0x01 0x02 |
|
38
|
|
|
|
|
|
|
** 0x27 -> 0x01 0x28 |
|
39
|
|
|
|
|
|
|
** |
|
40
|
|
|
|
|
|
|
** If that were all the encoder did, it would work, but in certain cases |
|
41
|
|
|
|
|
|
|
** it could double the size of the encoded string. For example, to |
|
42
|
|
|
|
|
|
|
** encode a string of 100 0x27 characters would require 100 instances of |
|
43
|
|
|
|
|
|
|
** the 0x01 0x03 escape sequence resulting in a 200-character output. |
|
44
|
|
|
|
|
|
|
** We would prefer to keep the size of the encoded string smaller than |
|
45
|
|
|
|
|
|
|
** this. |
|
46
|
|
|
|
|
|
|
** |
|
47
|
|
|
|
|
|
|
** To minimize the encoding size, we first add a fixed offset value to each |
|
48
|
|
|
|
|
|
|
** byte in the sequence. The addition is modulo 256. (That is to say, if |
|
49
|
|
|
|
|
|
|
** the sum of the original character value and the offset exceeds 256, then |
|
50
|
|
|
|
|
|
|
** the higher order bits are truncated.) The offset is chosen to minimize |
|
51
|
|
|
|
|
|
|
** the number of characters in the string that need to be escaped. For |
|
52
|
|
|
|
|
|
|
** example, in the case above where the string was composed of 100 0x27 |
|
53
|
|
|
|
|
|
|
** characters, the offset might be 0x01. Each of the 0x27 characters would |
|
54
|
|
|
|
|
|
|
** then be converted into an 0x28 character which would not need to be |
|
55
|
|
|
|
|
|
|
** escaped at all and so the 100 character input string would be converted |
|
56
|
|
|
|
|
|
|
** into just 100 characters of output. Actually 101 characters of output - |
|
57
|
|
|
|
|
|
|
** we have to record the offset used as the first byte in the sequence so |
|
58
|
|
|
|
|
|
|
** that the string can be decoded. Since the offset value is stored as |
|
59
|
|
|
|
|
|
|
** part of the output string and the output string is not allowed to contain |
|
60
|
|
|
|
|
|
|
** characters 0x00 or 0x27, the offset cannot be 0x00 or 0x27. |
|
61
|
|
|
|
|
|
|
** |
|
62
|
|
|
|
|
|
|
** Here, then, are the encoding steps: |
|
63
|
|
|
|
|
|
|
** |
|
64
|
|
|
|
|
|
|
** (1) Choose an offset value and make it the first character of |
|
65
|
|
|
|
|
|
|
** output. |
|
66
|
|
|
|
|
|
|
** |
|
67
|
|
|
|
|
|
|
** (2) Copy each input character into the output buffer, one by |
|
68
|
|
|
|
|
|
|
** one, adding the offset value as you copy. |
|
69
|
|
|
|
|
|
|
** |
|
70
|
|
|
|
|
|
|
** (3) If the value of an input character plus offset is 0x00, replace |
|
71
|
|
|
|
|
|
|
** that one character by the two-character sequence 0x01 0x01. |
|
72
|
|
|
|
|
|
|
** If the sum is 0x01, replace it with 0x01 0x02. If the sum |
|
73
|
|
|
|
|
|
|
** is 0x27, replace it with 0x01 0x03. |
|
74
|
|
|
|
|
|
|
** |
|
75
|
|
|
|
|
|
|
** (4) Put a 0x00 terminator at the end of the output. |
|
76
|
|
|
|
|
|
|
** |
|
77
|
|
|
|
|
|
|
** Decoding is obvious: |
|
78
|
|
|
|
|
|
|
** |
|
79
|
|
|
|
|
|
|
** (5) Copy encoded characters except the first into the decode |
|
80
|
|
|
|
|
|
|
** buffer. Set the first encoded character aside for use as |
|
81
|
|
|
|
|
|
|
** the offset in step 7 below. |
|
82
|
|
|
|
|
|
|
** |
|
83
|
|
|
|
|
|
|
** (6) Convert each 0x01 0x01 sequence into a single character 0x00. |
|
84
|
|
|
|
|
|
|
** Convert 0x01 0x02 into 0x01. Convert 0x01 0x28 into 0x27. |
|
85
|
|
|
|
|
|
|
** |
|
86
|
|
|
|
|
|
|
** (7) Subtract the offset value that was the first character of |
|
87
|
|
|
|
|
|
|
** the encoded buffer from all characters in the output buffer. |
|
88
|
|
|
|
|
|
|
** |
|
89
|
|
|
|
|
|
|
** The only tricky part is step (1) - how to compute an offset value to |
|
90
|
|
|
|
|
|
|
** minimize the size of the output buffer. This is accomplished by testing |
|
91
|
|
|
|
|
|
|
** all offset values and picking the one that results in the fewest number |
|
92
|
|
|
|
|
|
|
** of escapes. To do that, we first scan the entire input and count the |
|
93
|
|
|
|
|
|
|
** number of occurances of each character value in the input. Suppose |
|
94
|
|
|
|
|
|
|
** the number of 0x00 characters is N(0), the number of occurances of 0x01 |
|
95
|
|
|
|
|
|
|
** is N(1), and so forth up to the number of occurances of 0xff is N(255). |
|
96
|
|
|
|
|
|
|
** An offset of 0 is not allowed so we don't have to test it. The number |
|
97
|
|
|
|
|
|
|
** of escapes required for an offset of 1 is N(1)+N(2)+N(40). The number |
|
98
|
|
|
|
|
|
|
** of escapes required for an offset of 2 is N(2)+N(3)+N(41). And so forth. |
|
99
|
|
|
|
|
|
|
** In this way we find the offset that gives the minimum number of escapes, |
|
100
|
|
|
|
|
|
|
** and thus minimizes the length of the output string. |
|
101
|
|
|
|
|
|
|
*/ |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
/* |
|
104
|
|
|
|
|
|
|
** Encode a binary buffer "in" of size n bytes so that it contains |
|
105
|
|
|
|
|
|
|
** no instances of characters '\'' or '\000'. The output is |
|
106
|
|
|
|
|
|
|
** null-terminated and can be used as a string value in an INSERT |
|
107
|
|
|
|
|
|
|
** or UPDATE statement. Use sqlite_decode_binary() to convert the |
|
108
|
|
|
|
|
|
|
** string back into its original binary. |
|
109
|
|
|
|
|
|
|
** |
|
110
|
|
|
|
|
|
|
** The result is written into a preallocated output buffer "out". |
|
111
|
|
|
|
|
|
|
** "out" must be able to hold at least 2 +(257*n)/254 bytes. |
|
112
|
|
|
|
|
|
|
** In other words, the output will be expanded by as much as 3 |
|
113
|
|
|
|
|
|
|
** bytes for every 254 bytes of input plus 2 bytes of fixed overhead. |
|
114
|
|
|
|
|
|
|
** (This is approximately 2 + 1.0118*n or about a 1.2% size increase.) |
|
115
|
|
|
|
|
|
|
** |
|
116
|
|
|
|
|
|
|
** The return value is the number of characters in the encoded |
|
117
|
|
|
|
|
|
|
** string, excluding the "\000" terminator. |
|
118
|
|
|
|
|
|
|
** |
|
119
|
|
|
|
|
|
|
** If out==NULL then no output is generated but the routine still returns |
|
120
|
|
|
|
|
|
|
** the number of characters that would have been generated if out had |
|
121
|
|
|
|
|
|
|
** not been NULL. |
|
122
|
|
|
|
|
|
|
*/ |
|
123
|
0
|
|
|
|
|
|
int sqlite_encode_binary(const unsigned char *in, int n, unsigned char *out){ |
|
124
|
|
|
|
|
|
|
int i, j, e, m; |
|
125
|
|
|
|
|
|
|
unsigned char x; |
|
126
|
|
|
|
|
|
|
int cnt[256]; |
|
127
|
0
|
0
|
|
|
|
|
if( n<=0 ){ |
|
128
|
0
|
0
|
|
|
|
|
if( out ){ |
|
129
|
0
|
|
|
|
|
|
out[0] = 'x'; |
|
130
|
0
|
|
|
|
|
|
out[1] = 0; |
|
131
|
|
|
|
|
|
|
} |
|
132
|
0
|
|
|
|
|
|
return 1; |
|
133
|
|
|
|
|
|
|
} |
|
134
|
0
|
|
|
|
|
|
memset(cnt, 0, sizeof(cnt)); |
|
135
|
0
|
0
|
|
|
|
|
for(i=n-1; i>=0; i--){ cnt[in[i]]++; } |
|
136
|
0
|
|
|
|
|
|
m = n; |
|
137
|
0
|
0
|
|
|
|
|
for(i=1; i<256; i++){ |
|
138
|
|
|
|
|
|
|
int sum; |
|
139
|
0
|
0
|
|
|
|
|
if( i=='\'' ) continue; |
|
140
|
0
|
|
|
|
|
|
sum = cnt[i] + cnt[(i+1)&0xff] + cnt[(i+'\'')&0xff]; |
|
141
|
0
|
0
|
|
|
|
|
if( sum
|
|
142
|
0
|
|
|
|
|
|
m = sum; |
|
143
|
0
|
|
|
|
|
|
e = i; |
|
144
|
0
|
0
|
|
|
|
|
if( m==0 ) break; |
|
145
|
|
|
|
|
|
|
} |
|
146
|
|
|
|
|
|
|
} |
|
147
|
0
|
0
|
|
|
|
|
if( out==0 ){ |
|
148
|
0
|
|
|
|
|
|
return n+m+1; |
|
149
|
|
|
|
|
|
|
} |
|
150
|
0
|
|
|
|
|
|
out[0] = e; |
|
151
|
0
|
|
|
|
|
|
j = 1; |
|
152
|
0
|
0
|
|
|
|
|
for(i=0; i
|
|
153
|
0
|
|
|
|
|
|
x = in[i] - e; |
|
154
|
0
|
0
|
|
|
|
|
if( x==0 || x==1 || x=='\''){ |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
155
|
0
|
|
|
|
|
|
out[j++] = 1; |
|
156
|
0
|
|
|
|
|
|
x++; |
|
157
|
|
|
|
|
|
|
} |
|
158
|
0
|
|
|
|
|
|
out[j++] = x; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
0
|
|
|
|
|
|
out[j] = 0; |
|
161
|
|
|
|
|
|
|
assert( j==n+m+1 ); |
|
162
|
0
|
|
|
|
|
|
return j; |
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
/* |
|
166
|
|
|
|
|
|
|
** Decode the string "in" into binary data and write it into "out". |
|
167
|
|
|
|
|
|
|
** This routine reverses the encoding created by sqlite_encode_binary(). |
|
168
|
|
|
|
|
|
|
** The output will always be a few bytes less than the input. The number |
|
169
|
|
|
|
|
|
|
** of bytes of output is returned. If the input is not a well-formed |
|
170
|
|
|
|
|
|
|
** encoding, -1 is returned. |
|
171
|
|
|
|
|
|
|
** |
|
172
|
|
|
|
|
|
|
** The "in" and "out" parameters may point to the same buffer in order |
|
173
|
|
|
|
|
|
|
** to decode a string in place. |
|
174
|
|
|
|
|
|
|
*/ |
|
175
|
0
|
|
|
|
|
|
int sqlite_decode_binary(const unsigned char *in, unsigned char *out){ |
|
176
|
|
|
|
|
|
|
int i, e; |
|
177
|
|
|
|
|
|
|
unsigned char c; |
|
178
|
0
|
|
|
|
|
|
e = *(in++); |
|
179
|
0
|
|
|
|
|
|
i = 0; |
|
180
|
0
|
0
|
|
|
|
|
while( (c = *(in++))!=0 ){ |
|
181
|
0
|
0
|
|
|
|
|
if( c==1 ){ |
|
182
|
0
|
|
|
|
|
|
c = *(in++) - 1; |
|
183
|
|
|
|
|
|
|
} |
|
184
|
0
|
|
|
|
|
|
out[i++] = c + e; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
0
|
|
|
|
|
|
return i; |
|
187
|
|
|
|
|
|
|
} |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
#ifdef ENCODER_TEST |
|
190
|
|
|
|
|
|
|
#include |
|
191
|
|
|
|
|
|
|
/* |
|
192
|
|
|
|
|
|
|
** The subroutines above are not tested by the usual test suite. To test |
|
193
|
|
|
|
|
|
|
** these routines, compile just this one file with a -DENCODER_TEST=1 option |
|
194
|
|
|
|
|
|
|
** and run the result. |
|
195
|
|
|
|
|
|
|
*/ |
|
196
|
|
|
|
|
|
|
int main(int argc, char **argv){ |
|
197
|
|
|
|
|
|
|
int i, j, n, m, nOut, nByteIn, nByteOut; |
|
198
|
|
|
|
|
|
|
unsigned char in[30000]; |
|
199
|
|
|
|
|
|
|
unsigned char out[33000]; |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
nByteIn = nByteOut = 0; |
|
202
|
|
|
|
|
|
|
for(i=0; i
|
|
203
|
|
|
|
|
|
|
printf("Test %d: ", i+1); |
|
204
|
|
|
|
|
|
|
n = rand() % (i+1); |
|
205
|
|
|
|
|
|
|
if( i%100==0 ){ |
|
206
|
|
|
|
|
|
|
int k; |
|
207
|
|
|
|
|
|
|
for(j=k=0; j
|
|
208
|
|
|
|
|
|
|
/* if( k==0 || k=='\'' ) k++; */ |
|
209
|
|
|
|
|
|
|
in[j] = k; |
|
210
|
|
|
|
|
|
|
k = (k+1)&0xff; |
|
211
|
|
|
|
|
|
|
} |
|
212
|
|
|
|
|
|
|
}else{ |
|
213
|
|
|
|
|
|
|
for(j=0; j
|
|
214
|
|
|
|
|
|
|
} |
|
215
|
|
|
|
|
|
|
nByteIn += n; |
|
216
|
|
|
|
|
|
|
nOut = sqlite_encode_binary(in, n, out); |
|
217
|
|
|
|
|
|
|
nByteOut += nOut; |
|
218
|
|
|
|
|
|
|
if( nOut!=strlen(out) ){ |
|
219
|
|
|
|
|
|
|
printf(" ERROR return value is %d instead of %d\n", nOut, strlen(out)); |
|
220
|
|
|
|
|
|
|
exit(1); |
|
221
|
|
|
|
|
|
|
} |
|
222
|
|
|
|
|
|
|
if( nOut!=sqlite_encode_binary(in, n, 0) ){ |
|
223
|
|
|
|
|
|
|
printf(" ERROR actual output size disagrees with predicted size\n"); |
|
224
|
|
|
|
|
|
|
exit(1); |
|
225
|
|
|
|
|
|
|
} |
|
226
|
|
|
|
|
|
|
m = (256*n + 1262)/253; |
|
227
|
|
|
|
|
|
|
printf("size %d->%d (max %d)", n, strlen(out)+1, m); |
|
228
|
|
|
|
|
|
|
if( strlen(out)+1>m ){ |
|
229
|
|
|
|
|
|
|
printf(" ERROR output too big\n"); |
|
230
|
|
|
|
|
|
|
exit(1); |
|
231
|
|
|
|
|
|
|
} |
|
232
|
|
|
|
|
|
|
for(j=0; out[j]; j++){ |
|
233
|
|
|
|
|
|
|
if( out[j]=='\'' ){ |
|
234
|
|
|
|
|
|
|
printf(" ERROR contains (')\n"); |
|
235
|
|
|
|
|
|
|
exit(1); |
|
236
|
|
|
|
|
|
|
} |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
j = sqlite_decode_binary(out, out); |
|
239
|
|
|
|
|
|
|
if( j!=n ){ |
|
240
|
|
|
|
|
|
|
printf(" ERROR decode size %d\n", j); |
|
241
|
|
|
|
|
|
|
exit(1); |
|
242
|
|
|
|
|
|
|
} |
|
243
|
|
|
|
|
|
|
if( memcmp(in, out, n)!=0 ){ |
|
244
|
|
|
|
|
|
|
printf(" ERROR decode mismatch\n"); |
|
245
|
|
|
|
|
|
|
exit(1); |
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
printf(" OK\n"); |
|
248
|
|
|
|
|
|
|
} |
|
249
|
|
|
|
|
|
|
fprintf(stderr,"Finished. Total encoding: %d->%d bytes\n", |
|
250
|
|
|
|
|
|
|
nByteIn, nByteOut); |
|
251
|
|
|
|
|
|
|
fprintf(stderr,"Avg size increase: %.3f%%\n", |
|
252
|
|
|
|
|
|
|
(nByteOut-nByteIn)*100.0/(double)nByteIn); |
|
253
|
|
|
|
|
|
|
} |
|
254
|
|
|
|
|
|
|
#endif /* ENCODER_TEST */ |