File Coverage

c-lib/s-bsdiff.c
Criterion Covered Total %
statement 365 441 82.7
branch 199 286 69.5
condition n/a
subroutine n/a
pod n/a
total 564 727 77.5


line stmt bran cond sub pod time code
1             /*@ Implementation of s-bsdipa-lib.h: s_bsdipa_diff() and support.
2             *
3             * SPDX-License-Identifier: BSD-2-Clause
4             *
5             * Copyright (c) 2024 - 2026 Steffen Nurpmeso .
6             * (Technical surroundings, bsdiff algorithm is solely Colin Percival.)
7             *
8             * Copyright 2003-2005 Colin Percival
9             * All rights reserved
10             *
11             * Redistribution and use in source and binary forms, with or without
12             * modification, are permitted providing that the following conditions
13             * are met:
14             * 1. Redistributions of source code must retain the above copyright
15             * notice, this list of conditions and the following disclaimer.
16             * 2. Redistributions in binary form must reproduce the above copyright
17             * notice, this list of conditions and the following disclaimer in the
18             * documentation and/or other materials provided with the distribution.
19             *
20             * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21             * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22             * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23             * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
24             * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25             * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26             * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27             * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28             * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
29             * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30             * POSSIBILITY OF SUCH DAMAGE.
31             */
32              
33             #include "s-bsdipa-lib.h"
34              
35             #include
36             #include
37             #include
38              
39             #ifndef s_BSDIPA_SMALL
40             # define DIVSUFSORT_API static
41             # include "libdivsufsort/divsufsort.h"
42             #endif
43              
44             /* Number of control block instances per s_bsdipa_ctrl_chunk */
45             #define a_BSDIPA_CTRL_NO 41
46              
47             /* What seems a good default */
48             #ifndef s_BSDIPA_MAGIC_WINDOW
49             # define s_BSDIPA_MAGIC_WINDOW 16
50             #endif
51              
52             #ifdef s_BSDIPA_SMALL
53             # define saidx_t s_bsdipa_off_t
54             #endif
55              
56             /* */
57             #undef MIN
58             #define MIN(x,y) (((x) < (y)) ? (x) : (y))
59              
60             /* With 32-bit off_t for now only s_BSDIPA_32 mode is supported */
61             #ifndef s_BSDIPA_32
62             # define OFF_MAX (sizeof(off_t) == 4 ? INT32_MAX : INT64_MAX)
63             typedef char ASSERTION_failed__off_max[OFF_MAX != INT32_MAX ? 1 : -1];
64             # undef OFF_MAX
65             #endif
66              
67             /* Checks use saidx_t, but the patch code uses s_bsdipa_off_t, so these must be of EQ size! */
68             typedef char ASSERTION_failed__bsdipa_off_eq_saidx[sizeof(s_bsdipa_off_t) == sizeof(saidx_t) ? 1 : -1];
69              
70             struct a_bsdiff_ctrl{
71             struct s_bsdipa_ctrl_chunk **c_ccpp;
72             struct s_bsdipa_ctrl_chunk *c_ccp;
73             uint32_t c_no; /* Left entries in .c_ccp */
74             int8_t c_need_dump; /* Delayed dump of ctrl_triple.ct_seek_bytes for <> .c_join_rel_pos */
75             int8_t c_pad[3];
76             s_bsdipa_off_t c_join_rel_pos; /* Join multiple successive data-less triples by moving once */
77             s_bsdipa_off_t c_len_max; /* Maximum possible header:h_ctrl_len for input data <> BSDIFF_CTL_LEN_MAX() */
78             #define a_BSDIFF_CTL_LEN_MAX(BEFLEN) (s_BSDIPA_OFF_MAX - (BEFLEN) - (sizeof(s_bsdipa_off_t) * 3) - 1)
79             };
80              
81             /* (Could be shared with s-bspatch.c) */
82             static void *a_bsdiff_alloc(void *vp, size_t size);
83             static void a_bsdiff_free(void *vp, void *dat);
84              
85             /**/
86             static inline void a_bsdiff_xout(s_bsdipa_off_t x, uint8_t *bp);
87              
88             /**/
89             static enum s_bsdipa_state a_bsdiff_xout_ctrl(struct s_bsdipa_diff_ctx *dcp, struct a_bsdiff_ctrl *ctrlp,
90             s_bsdipa_off_t diffl, s_bsdipa_off_t extral);
91              
92             /* below public fns */
93              
94             /* Colin Percival's code to turn suffix-sorted data into output */
95             static inline s_bsdipa_off_t a_bsdiff_matchlen(uint8_t const *aftdat, s_bsdipa_off_t aftlen,
96             uint8_t const *befdat, s_bsdipa_off_t beflen);
97             static s_bsdipa_off_t a_bsdiff_search(s_bsdipa_off_t const *Ip, uint8_t const *aftdat, s_bsdipa_off_t aftlen,
98             uint8_t const *befdat, s_bsdipa_off_t beflenp,
99             s_bsdipa_off_t st, s_bsdipa_off_t en, s_bsdipa_off_t *posp);
100              
101             static enum s_bsdipa_state a_bsdiff_bsdiff(struct s_bsdipa_diff_ctx *dcp);
102              
103             /* Later imported original BSDiff string suffix sort algorithm by Colin Percival.
104             * (It was replaced with libdivsufsort in the FreeBSD codebase to which he pointed me due to "existing fixes",
105             * because of notable performance improvements.)
106             * Except for types and names i did not adapt syntax to my style for those */
107             static void a_bsdiff_split(s_bsdipa_off_t *I, s_bsdipa_off_t *V, s_bsdipa_off_t start, s_bsdipa_off_t len,
108             s_bsdipa_off_t h);
109             static int a_bsdiff_qsufsort(s_bsdipa_off_t *I, uint8_t const *aftdat, s_bsdipa_off_t aftlen,
110             struct s_bsdipa_diff_ctx *dcp);
111              
112             /* Even further below: simple text line-based approach to create output patch format */
113             #ifdef s_BSDIPA_TEXT
114             static enum s_bsdipa_state a_bsdiff_text(struct s_bsdipa_diff_ctx *dcp);
115             #endif
116              
117             static void *
118 1584           a_bsdiff_alloc(void *vp, size_t size){
119             struct s_bsdipa_diff_ctx *dcp;
120              
121 1584           dcp = (struct s_bsdipa_diff_ctx*)vp;
122 1584           vp = (*dcp->dc_mem.mc_alloc)(size);
123              
124 1584           return vp;
125             }
126              
127             static void
128 1584           a_bsdiff_free(void *vp, void *dat){
129             struct s_bsdipa_diff_ctx *dcp;
130              
131 1584           dcp = (struct s_bsdipa_diff_ctx*)vp;
132 1584           (*dcp->dc_mem.mc_free)(dat);
133 1584           }
134              
135             static inline void
136 2236           a_bsdiff_xout(s_bsdipa_off_t x, uint8_t *buf){ /* xxx use endian.h stuff */
137             s_bsdipa_off_t y;
138             int lt0;
139              
140 2236           lt0 = (x < 0);
141 2236 100         y = lt0 ? -x : x;
142              
143             #ifndef s_BSDIPA_32
144             buf[7] = (uint8_t)(y & 0xFF); y >>= 8;
145             buf[6] = (uint8_t)(y & 0xFF); y >>= 8;
146             buf[5] = (uint8_t)(y & 0xFF); y >>= 8;
147             buf[4] = (uint8_t)(y & 0xFF); y >>= 8;
148             #endif
149 2236           buf[3] = (uint8_t)(y & 0xFF); y >>= 8;
150 2236           buf[2] = (uint8_t)(y & 0xFF); y >>= 8;
151 2236           buf[1] = (uint8_t)(y & 0xFF); y >>= 8;
152 2236           buf[0] = (uint8_t)(y /*& 0xFF*/);
153 2236 100         if(lt0)
154 8           buf[0] |= 0x80;
155 2236           }
156              
157             static enum s_bsdipa_state
158 324           a_bsdiff_xout_ctrl(struct s_bsdipa_diff_ctx *dcp, struct a_bsdiff_ctrl *ctrlp, s_bsdipa_off_t diffl, /* {{{ */
159             s_bsdipa_off_t extral){
160             enum s_bsdipa_state rv;
161             struct s_bsdipa_ctrl_chunk *ccp;
162              
163 324           ccp = ctrlp->c_ccp;
164              
165 324 50         if(ctrlp->c_need_dump){
166 0           a_bsdiff_xout(ctrlp->c_join_rel_pos, &ccp->cc_dat[ccp->cc_len]);
167 0           ccp->cc_len += sizeof(s_bsdipa_off_t);
168 0           ctrlp->c_join_rel_pos = 0;
169             }
170              
171 324 100         if(ctrlp->c_ccpp == NULL || --ctrlp->c_no == 0){
    50          
172             /* xxx Do not: sizeof(struct s_bsdipa_ctrl_triple) * a_BSDIPA_CTRL_NO */
173 316           ccp = (struct s_bsdipa_ctrl_chunk*)(*dcp->dc_mem.mc_custom_alloc)(dcp->dc_mem.mc_custom_cookie,
174             (sizeof(struct s_bsdipa_ctrl_chunk) +
175             (3 * sizeof(s_bsdipa_off_t) * a_BSDIPA_CTRL_NO)));
176 316 50         if(ccp == NULL){
177 0           rv = s_BSDIPA_NOMEM;
178 0           goto jleave;
179             }
180              
181 316 50         if(ctrlp->c_ccpp == NULL)
182 316           dcp->dc_ctrl = ccp;
183             else
184 0           *ctrlp->c_ccpp = ccp;
185 316           ccp->cc_next = NULL;
186 316           ccp->cc_len = 0;
187              
188 316           ctrlp->c_ccp = ccp;
189 316           ctrlp->c_ccpp = &ccp->cc_next;
190 316           ctrlp->c_no = a_BSDIPA_CTRL_NO;
191             }
192              
193 324           a_bsdiff_xout(diffl, &ccp->cc_dat[ccp->cc_len]);
194 324           ccp->cc_len += sizeof(s_bsdipa_off_t);
195 324           a_bsdiff_xout(extral, &ccp->cc_dat[ccp->cc_len]);
196 324           ccp->cc_len += sizeof(s_bsdipa_off_t);
197              
198 324           dcp->dc_ctrl_len += sizeof(s_bsdipa_off_t) * 3;
199 324 50         if(dcp->dc_ctrl_len > ctrlp->c_len_max){
200 0           rv = s_BSDIPA_FBIG;
201 0           goto jleave;
202             }
203              
204 324           ctrlp->c_need_dump = 1;
205 324           rv = s_BSDIPA_OK;
206 324           jleave:
207 324           return rv;
208             } /* }}} */
209              
210             void
211 0           s_bsdipa_i_to_buf(uint8_t *out, s_bsdipa_off_t in){
212 0           a_bsdiff_xout(in, out);
213 0           }
214              
215             enum s_bsdipa_state
216 316           s_bsdipa_diff(struct s_bsdipa_diff_ctx *dcp){ /* {{{ */
217             enum s_bsdipa_state rv;
218              
219 316 50         if(dcp->dc_mem.mc_alloc != NULL){
220 316           dcp->dc_mem.mc_custom_cookie = dcp;
221 316           dcp->dc_mem.mc_custom_alloc = &a_bsdiff_alloc;
222 316           dcp->dc_mem.mc_custom_free = &a_bsdiff_free;
223             }
224              
225 316           if(
226             #ifdef s_BSDIPA_TEXT
227 316 100         !dcp->dc_text_mode &&
228             #endif
229 288 50         dcp->dc_magic_window <= 0)
230 288           dcp->dc_magic_window = s_BSDIPA_MAGIC_WINDOW;
231              
232             /* Enable s_bsdipa_diff_free() */
233 316           memset(&dcp->dc_ctrl_len, 0, sizeof(*dcp) - (size_t)((uint8_t*)&dcp->dc_ctrl_len - (uint8_t*)dcp));
234              
235 316           rv = s_BSDIPA_FBIG;
236              
237             /* Fail early if we cannot create a patch with a header and one control triple */
238 316 50         if(dcp->dc_before_len >= s_BSDIPA_OFF_MAX -sizeof(struct s_bsdipa_header) -sizeof(struct s_bsdipa_ctrl_triple))
239 0           goto jleave;
240 316 50         if(dcp->dc_before_len + 1 >= SIZE_MAX / sizeof(saidx_t))
241 0           goto jleave;
242              
243 316 50         if(dcp->dc_after_len >= s_BSDIPA_OFF_MAX)
244 0           goto jleave;
245 316 50         if(dcp->dc_after_len + 1 >= SIZE_MAX / sizeof(saidx_t))
246 0           goto jleave;
247              
248 316           rv = s_BSDIPA_NOMEM;
249              
250 632           dcp->dc_extra_dat = (uint8_t*)(*dcp->dc_mem.mc_custom_alloc)(dcp->dc_mem.mc_custom_cookie,
251 316           (size_t)(dcp->dc_before_len + 1));
252 316 50         if(dcp->dc_extra_dat == NULL)
253 0           goto jleave;
254             /* Let's share that buffer */
255 316           dcp->dc_diff_dat = &dcp->dc_extra_dat[(size_t)(dcp->dc_before_len + 1)];
256              
257             /* Compute the differences, writing ctrl as we go */
258 316 50         if((dcp->dc_after_len | dcp->dc_before_len) == 0){
259 0           dcp->dc_is_equal_data = 1;
260 0           rv = s_BSDIPA_OK;
261 316 50         }else if(dcp->dc_before_len == 0){
262 0           dcp->dc_is_equal_data = 0;
263 0           rv = s_BSDIPA_OK;
264             }
265             #ifdef s_BSDIPA_TEXT
266 316 100         else if(dcp->dc_text_mode)
267 28           rv = a_bsdiff_text(dcp);
268             #endif
269             else
270 288           rv = a_bsdiff_bsdiff(dcp);
271              
272 316 50         if(rv == s_BSDIPA_OK){
273             /* Create readily prepared header; as documented, sum of lengths does not exceed _OFF_MAX */
274 316           a_bsdiff_xout(dcp->dc_ctrl_len, &dcp->dc_header[0]);
275 316           a_bsdiff_xout(dcp->dc_diff_len, &dcp->dc_header[sizeof(s_bsdipa_off_t)]);
276 316           a_bsdiff_xout(dcp->dc_extra_len, &dcp->dc_header[sizeof(s_bsdipa_off_t) * 2]);
277 316           a_bsdiff_xout(dcp->dc_before_len, &dcp->dc_header[sizeof(s_bsdipa_off_t) * 3]);
278             }
279              
280 0           jleave:
281 316           return rv;
282             } /* }}} */
283              
284             void
285 316           s_bsdipa_diff_free(struct s_bsdipa_diff_ctx *dcp){
286             struct s_bsdipa_ctrl_chunk *ccp;
287              
288 632 100         while((ccp = dcp->dc_ctrl) != NULL){
289 316           dcp->dc_ctrl = ccp->cc_next;
290 316           (*dcp->dc_mem.mc_custom_free)(dcp->dc_mem.mc_custom_cookie, ccp);
291             }
292              
293 316 50         if(dcp->dc_extra_dat != NULL)
294 316           (*dcp->dc_mem.mc_custom_free)(dcp->dc_mem.mc_custom_cookie, dcp->dc_extra_dat);
295 316           }
296              
297             /*
298             * Percivals BSDiff {{{
299             */
300             static inline s_bsdipa_off_t
301 103826688           a_bsdiff_matchlen(uint8_t const *aftdat, s_bsdipa_off_t aftlen, uint8_t const *befdat, s_bsdipa_off_t beflen){
302             s_bsdipa_off_t i;
303              
304 103826688           aftlen = MIN(aftlen, beflen);
305 122910096 100         for(i = 0; i < aftlen; ++i)
306 119332824 100         if(aftdat[i] != befdat[i])
307 100249416           break;
308              
309 103826688           return i;
310             }
311              
312             static s_bsdipa_off_t
313 980911224           a_bsdiff_search(s_bsdipa_off_t const *Ip, uint8_t const *aftdat, s_bsdipa_off_t aftlen,
314             uint8_t const *befdat, s_bsdipa_off_t beflen,
315             s_bsdipa_off_t st, s_bsdipa_off_t en, s_bsdipa_off_t *posp){
316             s_bsdipa_off_t x, y, r;
317              
318 980911224 100         if(en - st < 2){
319 51913344           x = a_bsdiff_matchlen(aftdat + Ip[st], aftlen - Ip[st], befdat, beflen);
320 51913344           y = a_bsdiff_matchlen(aftdat + Ip[en], aftlen - Ip[en], befdat, beflen);
321              
322 51913344 50         if(x > y){
323 0           *posp = Ip[st];
324 0           r = x;
325             }else{
326 51913344           *posp = Ip[en];
327 51913344           r = y;
328             }
329             }else{
330 928997880           x = st + ((en - st) / 2);
331 928997880           y = aftlen - Ip[x];
332 928997880           y = MIN(y, beflen);
333 928997880 100         if(memcmp(aftdat + Ip[x], befdat, (size_t)y) < 0)
334 100873920           r = a_bsdiff_search(Ip, aftdat, aftlen, befdat, beflen, x, en, posp);
335             else
336 828123960           r = a_bsdiff_search(Ip, aftdat, aftlen, befdat, beflen, st, x, posp);
337             }
338              
339 980911224           return r;
340             }
341              
342             static enum s_bsdipa_state
343 288           a_bsdiff_bsdiff(struct s_bsdipa_diff_ctx *dcp){ /* {{{ */
344             struct a_bsdiff_ctrl ctrl;
345             int maxoffdecr, isneq;
346             saidx_t *Ip;
347             uint8_t *extrap, *diffp;
348             uint8_t const *befdat, *aftdat;
349             s_bsdipa_off_t beflen, aftlen, scan, len, pos, lastscan, aftpos, lastoff, aftscore, i;
350             enum s_bsdipa_state rv;
351             assert(dcp->dc_before_len > 0);
352              
353 288           memset(&ctrl, 0, sizeof(ctrl));
354              
355 288           rv = s_BSDIPA_NOMEM;
356              
357 288           beflen = (s_bsdipa_off_t)dcp->dc_before_len;
358 288           befdat = dcp->dc_before_dat;
359 288           aftlen = (s_bsdipa_off_t)dcp->dc_after_len;
360 288           aftdat = dcp->dc_after_dat;
361 288           extrap = dcp->dc_extra_dat;
362 288           diffp = dcp->dc_diff_dat;
363              
364 288           Ip = (saidx_t*)(*dcp->dc_mem.mc_custom_alloc)(dcp->dc_mem.mc_custom_cookie,
365 288           (size_t)(aftlen + 1) * sizeof(saidx_t));
366 288 50         if(Ip == NULL)
367 0           goto jleave;
368              
369 288           maxoffdecr = 0;
370             #ifndef s_BSDIPA_SMALL
371             /* Limit is "a bit arbitrary", and surely also depends on memory cache performance etc */
372 288 100         if(aftlen > 4096 * 25){
373             /* divsufsort(): effectively only ENOMEM */
374 48 50         if(divsufsort(aftdat, Ip, aftlen, dcp))
375 0           goto jleave;
376 48           maxoffdecr = 1;
377             }else
378             #endif
379             /* Only ENOMEM */
380 240 50         if(a_bsdiff_qsufsort(Ip, aftdat, aftlen, dcp))
381 0           goto jleave;
382              
383 288           ctrl.c_len_max = a_BSDIFF_CTL_LEN_MAX(beflen);
384              
385 288           isneq = (aftlen != beflen);
386 288           scan = len = pos = lastscan = aftpos = lastoff = 0;
387              
388             /* a_bsdiff_search() is called with aftlen - 1, so bypass algorithm as such, then */
389 288 50         if(aftlen == 0)
390 0           goto Jaftlen0_bypass;
391              
392 288984 100         while(scan < beflen){
393 288408           aftscore = 0;
394              
395 51913632 100         for(i = (scan += len); scan < beflen; ++scan){
396 51913344           len = a_bsdiff_search(Ip, aftdat, aftlen, befdat + scan, beflen - scan, 0,
397             aftlen - maxoffdecr, &pos);
398              
399 104763456 100         for(; i < scan + len; ++i)
400 52850112 100         if(i + lastoff < aftlen && aftdat[i + lastoff] == befdat[i])
    100          
401 1225008           ++aftscore;
402              
403 51913344 100         if((len == aftscore && len != 0) || len > aftscore + dcp->dc_magic_window)
    100          
    50          
404             break;
405              
406 51625224 100         if(scan + lastoff < aftlen && aftdat[scan + lastoff] == befdat[scan])
    50          
407 0           --aftscore;
408             }
409              
410 288408 100         if(len != aftscore || scan == beflen) Jaftlen0_bypass:{
    100          
411             s_bsdipa_off_t s, Sf, diffl, lenb, extral;
412              
413 288 50         if(aftlen == 0){
414 0           diffl = lenb = 0;
415 0           extral = scan = beflen;
416 0           goto j_aftlen0_bypass;
417             }
418              
419 288           s = Sf = diffl = 0;
420              
421 26090232 100         for(i = 0; lastscan + i < scan && aftpos + i < aftlen;){
    100          
422 26089944 100         if(aftdat[aftpos + i] == befdat[lastscan + i])
423 1225008           ++s;
424 26089944           ++i;
425 26089944 100         if((s * 2) - i > (Sf * 2) - diffl){
426 1225008           Sf = s;
427 1225008           diffl = i;
428             }
429             }
430              
431 288           lenb = 0;
432 288 50         if(scan < beflen){
433             s_bsdipa_off_t Sb;
434              
435 0           s = Sb = 0;
436 0 0         for(i = 1; scan >= lastscan + i && pos >= i; ++i){
    0          
437 0 0         if(aftdat[pos - i] == befdat[scan - i])
438 0           ++s;
439 0 0         if((s * 2) - i > (Sb * 2) - lenb){
440 0           Sb = s;
441 0           lenb = i;
442             }
443             }
444             }
445              
446 288 50         if(lastscan + diffl > scan - lenb){
447             s_bsdipa_off_t overlap, Ss, lens;
448              
449 0           overlap = (lastscan + diffl) - (scan - lenb);
450 0           s = Ss = lens = 0;
451 0 0         for(i = 0; i < overlap; ++i){
452 0           if(befdat[lastscan + diffl - overlap + i
453 0 0         ] == aftdat[aftpos + diffl - overlap + i])
454 0           ++s;
455 0 0         if(befdat[scan - lenb + i] == aftdat[pos - lenb + i])
456 0           --s;
457 0 0         if(s > Ss){
458 0           Ss = s;
459 0           lens = i + 1;
460             }
461             }
462              
463 0           diffl += lens - overlap;
464 0           lenb -= lens;
465             }
466              
467 1225296 100         for(i = 0; i < diffl; ++i){
468             uint8_t u;
469              
470 1225008           u = befdat[lastscan + i] - aftdat[aftpos + i];
471 1225008           isneq |= (u != 0);
472 1225008           *--diffp = u;
473             }
474 288           dcp->dc_diff_len += diffl;
475             assert(diffp > extrap);
476              
477 288           extral = (scan - lenb) - (lastscan + diffl);
478 288           j_aftlen0_bypass:
479 288           isneq |= (extral > 0);
480 51625512 100         for(i = 0; i < extral; ++i)
481 51625224           *extrap++ = befdat[lastscan + diffl + i];
482 288           dcp->dc_extra_len += extral;
483             assert(extrap <= diffp);
484              
485             /* Since v0.9.0 we only create control chunks with data.
486             * We allow for a data-less first, but do not generate it for binary BSDiff */
487             assert(diffl != 0 || extral != 0 || ctrl.c_ccpp != NULL);
488 288 100         if(diffl != 0 || extral != 0){
    50          
489 288           rv = a_bsdiff_xout_ctrl(dcp, &ctrl, diffl, extral);
490 288 50         if(rv != s_BSDIPA_OK)
491 0           goto jleave;
492             }
493              
494 288           ctrl.c_join_rel_pos += (pos - lenb) - (aftpos + diffl);
495 288           lastscan = scan - lenb;
496 288           aftpos = pos - lenb;
497 288           lastoff = pos - scan;
498             }
499             }
500              
501 288 50         if(ctrl.c_need_dump){
502 288           a_bsdiff_xout(0, &ctrl.c_ccp->cc_dat[ctrl.c_ccp->cc_len]);
503 288           ctrl.c_ccp->cc_len += sizeof(s_bsdipa_off_t);
504             /*ctrl.c_need_dump = 0;*/
505             }
506              
507 288           dcp->dc_is_equal_data = !isneq;
508 288           dcp->dc_diff_dat = diffp;
509              
510 288           rv = s_BSDIPA_OK;
511 288           jleave:
512 288 50         if(Ip != NULL)
513 288           (*dcp->dc_mem.mc_custom_free)(dcp->dc_mem.mc_custom_cookie, Ip);
514              
515 288           return rv;
516             } /* }}} */
517             /* }}} */
518              
519             /*
520             * Percivals BSDiff: used string suffix sort algorithm(s) {{{
521             */
522             static void
523 583488           a_bsdiff_split(s_bsdipa_off_t *I, s_bsdipa_off_t *V, s_bsdipa_off_t start, s_bsdipa_off_t len, s_bsdipa_off_t h){
524             s_bsdipa_off_t i,j,k,x,tmp,jj,kk;
525              
526 583488 100         if(len<16) {
527 3300840 100         for(k=start;k
528 3009576           j=1;x=V[I[k]+h];
529 18619728 100         for(i=1;k+i
530 15610152 100         if(V[I[k+i]+h]
531 13437816           x=V[I[k+i]+h];
532 13437816           j=0;
533             }
534 15610152 100         if(V[I[k+i]+h]==x) {
535 13441104           tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
536 13441104           j++;
537             }
538             }
539 6021792 100         for(i=0;i
540 3009576 100         if(j==1) I[k]=-1;
541             }
542 291264           return;
543             }
544              
545 292224           x=V[I[start+len/2]+h];
546 292224           jj=0;kk=0;
547 65706384 100         for(i=start;i
548 65414160 100         if(V[I[i]+h]
549 65414160 100         if(V[I[i]+h]==x) kk++;
550             }
551 292224           jj+=start;kk+=jj;
552              
553 292224           i=start;j=0;k=0;
554 54435384 100         while(i
555 54143160 100         if(V[I[i]+h]
556 12716712           i++;
557 41426448 100         } else if(V[I[i]+h]==x) {
558 32564736           tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
559 32564736           j++;
560             } else {
561 8861712           tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
562 8861712           k++;
563             }
564             }
565              
566 7430232 100         while(jj+j
567 7138008 100         if(V[I[jj+j]+h]==x) {
568 7099008           j++;
569             } else {
570 39000           tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
571 39000           k++;
572             }
573             }
574              
575 292224 100         if(jj>start) a_bsdiff_split(I,V,start,jj-start,h);
576              
577 39955968 100         for(i=0;i
578 292224 100         if(jj==kk-1) I[jj]=-1;
579              
580 292224 100         if(start+len>kk) a_bsdiff_split(I,V,kk,start+len-kk,h);
581             }
582              
583             static int
584 240           a_bsdiff_qsufsort(s_bsdipa_off_t *I,const uint8_t *aftdat,s_bsdipa_off_t aftlen,struct s_bsdipa_diff_ctx *dcp){
585             s_bsdipa_off_t *V;
586             s_bsdipa_off_t buckets[256];
587             s_bsdipa_off_t i,h,len;
588              
589 240           V = (s_bsdipa_off_t*)(*dcp->dc_mem.mc_custom_alloc)(dcp->dc_mem.mc_custom_cookie,
590 240           (size_t)(aftlen + 1) * sizeof(saidx_t));
591 240 50         if(V == NULL)
592 0           return 1;
593              
594 240           memset(buckets, 0, sizeof(buckets)); /*for(i=0;i<256;i++) buckets[i]=0;*/
595 3290232 100         for(i=0;i
596 61440 100         for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
597 61440 100         for(i=255;i>0;i--) buckets[i]=buckets[i-1];
598 240           buckets[0]=0;
599              
600 3290232 100         for(i=0;i
601 240           I[0]=aftlen;
602 3290232 100         for(i=0;i
603 240           V[aftlen]=0;
604 61440 100         for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
    100          
605 240           I[0]=-1;
606              
607 2952 100         for(h=1;I[0]!=-(aftlen+1);h+=h) {
608 2712           len=0;
609 3317160 100         for(i=0;i
610 3314448 100         if(I[i]<0) {
611 3301200           len-=I[i];
612 3301200           i-=I[i];
613             } else {
614 13248 100         if(len) I[i-len]=-len;
615 13248           len=V[I[i]]+1-i;
616 13248           a_bsdiff_split(I,V,i,len,h);
617 13248           i+=len;
618 13248           len=0;
619             }
620             }
621 2712 100         if(len) I[i-len]=-len;
622             }
623              
624 3290472 100         for(i=0;i
625              
626 240           (*dcp->dc_mem.mc_custom_free)(dcp->dc_mem.mc_custom_cookie, V);
627 240           return 0;
628             }
629              
630             #ifndef s_BSDIPA_SMALL
631             # include "libdivsufsort/divsufsort.c"
632             # undef lg_table
633             # define lg_table a_sssort_lg_table
634             # include "libdivsufsort/sssort.c"
635             # undef lg_table
636             # define lg_table a_trsort_lg_table
637             # include "libdivsufsort/trsort.c"
638             #endif
639             /* }}} */
640              
641             #ifdef s_BSDIPA_TEXT /* {{{ */
642             static enum s_bsdipa_state
643 28           a_bsdiff_text(struct s_bsdipa_diff_ctx *dcp){
644             /* Chris Torek's hash algorithm, result stirred as shown by Bret Mulvey */
645             # define a_CSHASH_HASH(R,BP,L) \
646             do{\
647             uint8_t const *a__bp = BP;\
648             uint32_t a__l = L;\
649             uint32_t a__h = 0;\
650             \
651             while(a__l-- != 0){ /* xxx Duff's device, unroll 8? */\
652             uint8_t c = *a__bp++;\
653             a__h = (a__h << 5) + a__h + c;\
654             }\
655             \
656             if(a__h != 0){\
657             a__h += a__h << 13;\
658             a__h ^= a__h >> 7;\
659             a__h += a__h << 3;\
660             a__h ^= a__h >> 17;\
661             a__h += a__h << 5;\
662             }\
663             \
664             R = a__h;\
665             }while(0)
666              
667             /* XXX Very simple minded and blown-up line storage */
668             struct a_l{
669             struct a_l *l_hashnxt;
670             struct a_l *l_nxt;
671             uint8_t const *l_dat;
672             uint32_t l_len;
673             uint32_t l_hash;
674             };
675              
676             struct a_lchunk{
677             struct a_lchunk *lc_nxt;
678             size_t lc_no;
679             struct a_l lc_la[255];
680             };
681              
682             struct a_bsdiff_ctrl ctrl;
683             uint32_t alhprime;
684             s_bsdipa_off_t xlen;
685             uint8_t const *xdat;
686             struct a_l **alhpp;
687             struct a_lchunk *alcp_head, *alcp;
688             s_bsdipa_hash_ptf hptf;
689             enum s_bsdipa_state rv;
690             assert(dcp->dc_before_len > 0);
691              
692 28           memset(&ctrl, 0, sizeof(ctrl));
693              
694 28           rv = s_BSDIPA_NOMEM;
695              
696 28           hptf = dcp->dc_hash;
697 28           alcp_head = NULL;
698 28           alhpp = NULL;
699              
700             /* Create hashes for lines in "after"; create hashmap of lines {{{ */
701 28           xdat = dcp->dc_after_dat;
702 28           xlen = (s_bsdipa_off_t)dcp->dc_after_len;
703 28 50         if(xlen > 0){
704             s_bsdipa_off_t cnt, ll;
705             uint8_t const *beg, *end;
706             struct a_l *alp, *lp;
707              
708 28           alp = NULL;
709 28           beg = NULL;
710 28           cnt = 0;
711             /*ll = 0; uninit*/
712 28           goto Jalc_go;
713              
714 72076 100         while(xlen > 0){
715 72048           beg = xdat;
716 72048           end = memchr(beg, '\n', (size_t)xlen);
717 72048 100         if(end != NULL){
718 72040           ll = (s_bsdipa_off_t)(++end - xdat);
719 72040           xlen -= ll;
720             }else{
721 8           ll = xlen;
722 8           xlen = 0;
723             }
724 72048           xdat += ll;
725              
726             if(/*ll >= s_BSDIPA_OFF_MAX - 1 ||*/ ll != (s_bsdipa_off_t)(uint32_t)ll){
727             rv = s_BSDIPA_FBIG;
728             goto jleave;
729             }
730              
731 72048 100         if(alcp->lc_no == sizeof(alcp->lc_la) / sizeof(alcp->lc_la[0])) Jalc_go:{
732             struct a_lchunk *lcp;
733              
734 300           lcp = (struct a_lchunk*)(*dcp->dc_mem.mc_custom_alloc)(dcp->dc_mem.mc_custom_cookie,
735             sizeof(*lcp));
736 300 50         if(lcp == NULL)
737 0           goto jleave;
738 300           memset(lcp, 0, sizeof(*lcp));
739              
740 300 100         if(alcp_head == NULL){
741 28           alcp_head = alcp = lcp;
742 28           continue;
743             }
744 272           alcp->lc_nxt = lcp;
745 272           alcp = lcp;
746             }
747              
748             /* cnt cannot exceed s_BSDIPA_OFF_MAX-1 */
749 72048           ++cnt;
750 72048           lp = &alcp->lc_la[alcp->lc_no++];
751 72048           lp->l_hashnxt = alp;
752 72048 100         if(alp != NULL)
753 72020           alp->l_nxt = lp;
754 72048           alp = lp;
755             /*alp->l_nxt = NULL;*/
756 72048           alp->l_dat = beg;
757 72048           alp->l_len = (uint32_t)ll;
758 72048 50         if(hptf == NULL)
759 0 0         a_CSHASH_HASH(alp->l_hash, beg, alp->l_len);
    0          
760             else
761 72048           alp->l_hash = (*hptf)(dcp->dc_hash_cookie, beg, alp->l_len);
762             }
763              
764             /* Hashmap size, prime spaced: 2777, 14057, 47431 slots xxx really these fixed three only? */
765 28 50         if(cnt <= 0xAD9 * 5)
766 28           cnt = 0xAD9;
767 0 0         else if(cnt <= 0x36E9 * 5)
768 0           cnt = 0x36E9;
769 0 0         else if((size_t)cnt >= (((((size_t)UINT32_MAX < (size_t)s_BSDIPA_OFF_MAX)
770             ? (size_t)UINT32_MAX : (size_t)s_BSDIPA_OFF_MAX) - 1) / sizeof(struct a_l*))){
771 0           rv = s_BSDIPA_FBIG;
772 0           goto jleave;
773             }else
774 0           cnt = 0xB947;
775 28           ll = cnt * sizeof(struct a_l*);
776              
777 28           alhpp = (struct a_l**)(*dcp->dc_mem.mc_custom_alloc)(dcp->dc_mem.mc_custom_cookie, (size_t)ll);
778 28 50         if(alhpp == NULL)
779 0           goto jleave;
780 28           memset(alhpp, 0, ll);
781              
782 72076 100         while(alp != NULL){
783             uint32_t i;
784              
785 72048           lp = alp->l_hashnxt;
786 72048           i = alp->l_hash % cnt;
787 72048           alp->l_hashnxt = alhpp[i];
788 72048           alhpp[i] = alp;
789 72048           alp = lp;
790             }
791              
792 28           alhprime = (uint32_t)cnt;
793             } /* }}} */
794              
795 28           rv = s_BSDIPA_FBIG;
796              
797             /* Diff creation {{{ */
798             /* C99 */{
799             struct a_l *alp;
800             s_bsdipa_off_t extral, diffl, aabspos;
801             uint8_t *extrap, *diffp;
802             uint32_t isneq;
803              
804 28           xdat = dcp->dc_before_dat;
805 28           xlen = (s_bsdipa_off_t)dcp->dc_before_len;
806 28           ctrl.c_len_max = a_BSDIFF_CTL_LEN_MAX(xlen);
807             /*ctrl.c_join_rel_pos = 0;*/
808              
809 28           isneq = (xlen != (s_bsdipa_off_t)dcp->dc_after_len);
810              
811 28           extrap = dcp->dc_extra_dat;
812 28           diffp = dcp->dc_diff_dat;
813 28           aabspos = extral = diffl = 0;
814              
815             /* Walk over "before" data */
816 72068 100         for(alp = NULL; xlen > 0;){
817             s_bsdipa_off_t ll;
818             uint8_t const *beg, *end;
819              
820 72040           beg = xdat;
821 72040           end = memchr(beg, '\n', (size_t)xlen);
822 72040 50         if(end != NULL){
823 72040           ll = (s_bsdipa_off_t)(++end - xdat);
824 72040           xlen -= ll;
825             }else{
826 0           ll = xlen;
827 0           xlen = 0;
828             }
829 72040           xdat += ll;
830              
831             if(/*ll >= s_BSDIPA_OFF_MAX - 1 ||*/ ll != (s_bsdipa_off_t)(uint32_t)ll)
832             goto jleave;
833              
834             /* Differential data at all possible? */
835 72040 50         if(alhpp != NULL){
836             uint32_t h;
837              
838 72040 50         if(hptf == NULL)
839 0 0         a_CSHASH_HASH(h, beg, ll);
    0          
840             else
841 72040           h = (*hptf)(dcp->dc_hash_cookie, beg, ll);
842              
843             /* "diff" in the works? */
844 72040 100         if(alp != NULL){
845             struct a_l *lp;
846              
847             /* Try extend that range */
848 72012           lp = alp->l_nxt;
849              
850 72012 50         if(lp != NULL && h == lp->l_hash && ll == (s_bsdipa_off_t)lp->l_len &&
    100          
    50          
851 72004 50         !memcmp(beg, lp->l_dat, (size_t)ll)){
852 72004 50         if(diffl >= s_BSDIPA_OFF_MAX - ll - 1)
853 0           goto jleave;
854 72004           alp = lp;
855 72004           aabspos += ll;
856 72004           dcp->dc_diff_len += ll;
857 72004           diffl += ll;
858 72004           diffp -= ll;
859 72004           memset(diffp, 0, ll);
860 72004           continue;
861             }
862             }
863              
864             /* Not continuing diff. Maybe this line starts one? */
865 36           alp = alhpp[h % alhprime];
866 36 50         if(alp != NULL){
867 36 50         do if(h == alp->l_hash && ll == (s_bsdipa_off_t)alp->l_len &&
    50          
868 36 50         !memcmp(beg, alp->l_dat, ll))
869 36           break;
870 0 0         while((alp = alp->l_hashnxt) != NULL);
871              
872 36 50         if(alp != NULL){
873             s_bsdipa_off_t p;
874              
875 36           p = (s_bsdipa_off_t)(&alp->l_dat[0] - dcp->dc_after_dat);
876              
877             /* If we already had seen diff/extra in this ctrl chunk, dump first */
878 36 100         if((diffl | extral) != 0 || (!ctrl.c_need_dump && p != aabspos)){
    50          
    50          
879 8           rv = a_bsdiff_xout_ctrl(dcp, &ctrl, diffl, extral);
880 8 50         if(rv != s_BSDIPA_OK)
881 0           goto jleave;
882 8           rv = s_BSDIPA_FBIG;
883 8           extral = 0;
884             }
885              
886 36 100         if(p != aabspos){
887             assert(ctrl.c_need_dump);
888 8           ctrl.c_join_rel_pos = p - aabspos;
889 8           a_bsdiff_xout(ctrl.c_join_rel_pos,
890 8           &ctrl.c_ccp->cc_dat[ctrl.c_ccp->cc_len]);
891 8           ctrl.c_ccp->cc_len += sizeof(s_bsdipa_off_t);
892 8           ctrl.c_join_rel_pos = 0;
893 8           ctrl.c_need_dump = 0;
894             }
895              
896 36           p += ll;
897 36           aabspos = p;
898 36           dcp->dc_diff_len += ll;
899 36           diffl = ll;
900 36           diffp -= ll;
901             assert(extrap <= diffp);
902 36           memset(diffp, 0, ll);
903 36           continue;
904             }
905             }
906             }
907              
908             /* Extra data; can never generate an initial move */
909 0 0         if(extral >= s_BSDIPA_OFF_MAX - ll - 1)
910 0           goto jleave;
911 0           isneq = 1;
912 0           dcp->dc_extra_len += ll;
913 0           extral += ll;
914 0           memcpy(extrap, beg, (size_t)ll);
915 0           extrap += ll;
916             assert(extrap <= diffp);
917             }
918              
919 28 50         if((diffl | extral) != 0){
920 28           rv = a_bsdiff_xout_ctrl(dcp, &ctrl, diffl, extral);
921 28 50         if(rv != s_BSDIPA_OK)
922 0           goto jleave;
923             }
924              
925 28 50         if(ctrl.c_need_dump){
926 28           a_bsdiff_xout(0, &ctrl.c_ccp->cc_dat[ctrl.c_ccp->cc_len]);
927 28           ctrl.c_ccp->cc_len += sizeof(s_bsdipa_off_t);
928             /*ctrl.c_need_dump = 0;*/
929             }
930              
931 28           dcp->dc_is_equal_data = !isneq;
932 28           dcp->dc_diff_dat = diffp;
933             } /* }}} */
934              
935 28           rv = s_BSDIPA_OK;
936 28           jleave:
937 28 50         if(alhpp != NULL)
938 28           (*dcp->dc_mem.mc_custom_free)(dcp->dc_mem.mc_custom_cookie, alhpp);
939              
940 328 100         for(alcp = alcp_head; alcp != NULL;){
941             void *vp;
942              
943 300           vp = alcp;
944 300           alcp = alcp->lc_nxt;
945 300           (*dcp->dc_mem.mc_custom_free)(dcp->dc_mem.mc_custom_cookie, vp);
946             }
947              
948 28           return rv;
949             }
950              
951             # undef a_CSHASH_HASH
952             #endif /* s_BSDIPA_TEXT }}} */
953              
954             /* s-itt-mode */