File Coverage

deps/libgit2/src/xdiff/xdiffi.c
Criterion Covered Total %
statement 153 413 37.0
branch 89 322 27.6
condition n/a
subroutine n/a
pod n/a
total 242 735 32.9


line stmt bran cond sub pod time code
1             /*
2             * LibXDiff by Davide Libenzi ( File Differential Library )
3             * Copyright (C) 2003 Davide Libenzi
4             *
5             * This library is free software; you can redistribute it and/or
6             * modify it under the terms of the GNU Lesser General Public
7             * License as published by the Free Software Foundation; either
8             * version 2.1 of the License, or (at your option) any later version.
9             *
10             * This library is distributed in the hope that it will be useful,
11             * but WITHOUT ANY WARRANTY; without even the implied warranty of
12             * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13             * Lesser General Public License for more details.
14             *
15             * You should have received a copy of the GNU Lesser General Public
16             * License along with this library; if not, see
17             * .
18             *
19             * Davide Libenzi
20             *
21             */
22              
23             #include "xinclude.h"
24             #include "integer.h"
25              
26              
27             #define XDL_MAX_COST_MIN 256
28             #define XDL_HEUR_MIN_COST 256
29             #define XDL_LINE_MAX (long)((1UL << (CHAR_BIT * sizeof(long) - 1)) - 1)
30             #define XDL_SNAKE_CNT 20
31             #define XDL_K_HEUR 4
32              
33             /** Declare a function as always inlined. */
34             #if defined(_MSC_VER)
35             # define XDL_INLINE(type) static __inline type
36             #elif defined(__GNUC__)
37             # define XDL_INLINE(type) static __inline__ type
38             #else
39             # define XDL_INLINE(type) static type
40             #endif
41              
42             typedef struct s_xdpsplit {
43             long i1, i2;
44             int min_lo, min_hi;
45             } xdpsplit_t;
46              
47              
48              
49              
50             static long xdl_split(unsigned long const *ha1, long off1, long lim1,
51             unsigned long const *ha2, long off2, long lim2,
52             long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,
53             xdalgoenv_t *xenv);
54             static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2);
55              
56              
57              
58              
59              
60             /*
61             * See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.
62             * Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
63             * the forward diagonal starting from (off1, off2) and the backward diagonal
64             * starting from (lim1, lim2). If the K values on the same diagonal crosses
65             * returns the furthest point of reach. We might end up having to expensive
66             * cases using this algorithm is full, so a little bit of heuristic is needed
67             * to cut the search and to return a suboptimal point.
68             */
69 0           static long xdl_split(unsigned long const *ha1, long off1, long lim1,
70             unsigned long const *ha2, long off2, long lim2,
71             long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,
72             xdalgoenv_t *xenv) {
73 0           long dmin = off1 - lim2, dmax = lim1 - off2;
74 0           long fmid = off1 - off2, bmid = lim1 - lim2;
75 0           long odd = (fmid - bmid) & 1;
76 0           long fmin = fmid, fmax = fmid;
77 0           long bmin = bmid, bmax = bmid;
78             long ec, d, i1, i2, prev1, best, dd, v, k;
79              
80             /*
81             * Set initial diagonal values for both forward and backward path.
82             */
83 0           kvdf[fmid] = off1;
84 0           kvdb[bmid] = lim1;
85              
86 0           for (ec = 1;; ec++) {
87 0           int got_snake = 0;
88              
89             /*
90             * We need to extent the diagonal "domain" by one. If the next
91             * values exits the box boundaries we need to change it in the
92             * opposite direction because (max - min) must be a power of two.
93             * Also we initialize the external K value to -1 so that we can
94             * avoid extra conditions check inside the core loop.
95             */
96 0 0         if (fmin > dmin)
97 0           kvdf[--fmin - 1] = -1;
98             else
99 0           ++fmin;
100 0 0         if (fmax < dmax)
101 0           kvdf[++fmax + 1] = -1;
102             else
103 0           --fmax;
104              
105 0 0         for (d = fmax; d >= fmin; d -= 2) {
106 0 0         if (kvdf[d - 1] >= kvdf[d + 1])
107 0           i1 = kvdf[d - 1] + 1;
108             else
109 0           i1 = kvdf[d + 1];
110 0           prev1 = i1;
111 0           i2 = i1 - d;
112 0 0         for (; i1 < lim1 && i2 < lim2 && ha1[i1] == ha2[i2]; i1++, i2++);
    0          
    0          
113 0 0         if (i1 - prev1 > xenv->snake_cnt)
114 0           got_snake = 1;
115 0           kvdf[d] = i1;
116 0 0         if (odd && bmin <= d && d <= bmax && kvdb[d] <= i1) {
    0          
    0          
    0          
117 0           spl->i1 = i1;
118 0           spl->i2 = i2;
119 0           spl->min_lo = spl->min_hi = 1;
120 0           return ec;
121             }
122             }
123              
124             /*
125             * We need to extent the diagonal "domain" by one. If the next
126             * values exits the box boundaries we need to change it in the
127             * opposite direction because (max - min) must be a power of two.
128             * Also we initialize the external K value to -1 so that we can
129             * avoid extra conditions check inside the core loop.
130             */
131 0 0         if (bmin > dmin)
132 0           kvdb[--bmin - 1] = XDL_LINE_MAX;
133             else
134 0           ++bmin;
135 0 0         if (bmax < dmax)
136 0           kvdb[++bmax + 1] = XDL_LINE_MAX;
137             else
138 0           --bmax;
139              
140 0 0         for (d = bmax; d >= bmin; d -= 2) {
141 0 0         if (kvdb[d - 1] < kvdb[d + 1])
142 0           i1 = kvdb[d - 1];
143             else
144 0           i1 = kvdb[d + 1] - 1;
145 0           prev1 = i1;
146 0           i2 = i1 - d;
147 0 0         for (; i1 > off1 && i2 > off2 && ha1[i1 - 1] == ha2[i2 - 1]; i1--, i2--);
    0          
    0          
148 0 0         if (prev1 - i1 > xenv->snake_cnt)
149 0           got_snake = 1;
150 0           kvdb[d] = i1;
151 0 0         if (!odd && fmin <= d && d <= fmax && i1 <= kvdf[d]) {
    0          
    0          
    0          
152 0           spl->i1 = i1;
153 0           spl->i2 = i2;
154 0           spl->min_lo = spl->min_hi = 1;
155 0           return ec;
156             }
157             }
158              
159 0 0         if (need_min)
160 0           continue;
161              
162             /*
163             * If the edit cost is above the heuristic trigger and if
164             * we got a good snake, we sample current diagonals to see
165             * if some of the, have reached an "interesting" path. Our
166             * measure is a function of the distance from the diagonal
167             * corner (i1 + i2) penalized with the distance from the
168             * mid diagonal itself. If this value is above the current
169             * edit cost times a magic factor (XDL_K_HEUR) we consider
170             * it interesting.
171             */
172 0 0         if (got_snake && ec > xenv->heur_min) {
    0          
173 0 0         for (best = 0, d = fmax; d >= fmin; d -= 2) {
174 0 0         dd = d > fmid ? d - fmid: fmid - d;
175 0           i1 = kvdf[d];
176 0           i2 = i1 - d;
177 0           v = (i1 - off1) + (i2 - off2) - dd;
178              
179 0 0         if (v > XDL_K_HEUR * ec && v > best &&
    0          
    0          
180 0 0         off1 + xenv->snake_cnt <= i1 && i1 < lim1 &&
    0          
181 0 0         off2 + xenv->snake_cnt <= i2 && i2 < lim2) {
182 0 0         for (k = 1; ha1[i1 - k] == ha2[i2 - k]; k++)
183 0 0         if (k == xenv->snake_cnt) {
184 0           best = v;
185 0           spl->i1 = i1;
186 0           spl->i2 = i2;
187 0           break;
188             }
189             }
190             }
191 0 0         if (best > 0) {
192 0           spl->min_lo = 1;
193 0           spl->min_hi = 0;
194 0           return ec;
195             }
196              
197 0 0         for (best = 0, d = bmax; d >= bmin; d -= 2) {
198 0 0         dd = d > bmid ? d - bmid: bmid - d;
199 0           i1 = kvdb[d];
200 0           i2 = i1 - d;
201 0           v = (lim1 - i1) + (lim2 - i2) - dd;
202              
203 0 0         if (v > XDL_K_HEUR * ec && v > best &&
    0          
    0          
204 0 0         off1 < i1 && i1 <= lim1 - xenv->snake_cnt &&
    0          
205 0 0         off2 < i2 && i2 <= lim2 - xenv->snake_cnt) {
206 0 0         for (k = 0; ha1[i1 + k] == ha2[i2 + k]; k++)
207 0 0         if (k == xenv->snake_cnt - 1) {
208 0           best = v;
209 0           spl->i1 = i1;
210 0           spl->i2 = i2;
211 0           break;
212             }
213             }
214             }
215 0 0         if (best > 0) {
216 0           spl->min_lo = 0;
217 0           spl->min_hi = 1;
218 0           return ec;
219             }
220             }
221              
222             /*
223             * Enough is enough. We spent too much time here and now we collect
224             * the furthest reaching path using the (i1 + i2) measure.
225             */
226 0 0         if (ec >= xenv->mxcost) {
227             long fbest, fbest1, bbest, bbest1;
228              
229 0           fbest = fbest1 = -1;
230 0 0         for (d = fmax; d >= fmin; d -= 2) {
231 0           i1 = XDL_MIN(kvdf[d], lim1);
232 0           i2 = i1 - d;
233 0 0         if (lim2 < i2)
234 0           i1 = lim2 + d, i2 = lim2;
235 0 0         if (fbest < i1 + i2) {
236 0           fbest = i1 + i2;
237 0           fbest1 = i1;
238             }
239             }
240              
241 0           bbest = bbest1 = XDL_LINE_MAX;
242 0 0         for (d = bmax; d >= bmin; d -= 2) {
243 0           i1 = XDL_MAX(off1, kvdb[d]);
244 0           i2 = i1 - d;
245 0 0         if (i2 < off2)
246 0           i1 = off2 + d, i2 = off2;
247 0 0         if (i1 + i2 < bbest) {
248 0           bbest = i1 + i2;
249 0           bbest1 = i1;
250             }
251             }
252              
253 0 0         if ((lim1 + lim2) - bbest < fbest - (off1 + off2)) {
254 0           spl->i1 = fbest1;
255 0           spl->i2 = fbest - fbest1;
256 0           spl->min_lo = 1;
257 0           spl->min_hi = 0;
258             } else {
259 0           spl->i1 = bbest1;
260 0           spl->i2 = bbest - bbest1;
261 0           spl->min_lo = 0;
262 0           spl->min_hi = 1;
263             }
264 0           return ec;
265             }
266 0           }
267             }
268              
269              
270             /*
271             * Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling
272             * the box splitting function. Note that the real job (marking changed lines)
273             * is done in the two boundary reaching checks.
274             */
275 73           int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
276             diffdata_t *dd2, long off2, long lim2,
277             long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv) {
278 73           unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha;
279              
280             /*
281             * Shrink the box by walking through each diagonal snake (SW and NE).
282             */
283 75 100         for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++);
    100          
    50          
284 73 100         for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--);
    50          
    0          
285              
286             /*
287             * If one dimension is empty, then all records on the other one must
288             * be obviously changed.
289             */
290 73 100         if (off1 == lim1) {
291 71           char *rchg2 = dd2->rchg;
292 71           long *rindex2 = dd2->rindex;
293              
294 71 50         for (; off2 < lim2; off2++)
295 0           rchg2[rindex2[off2]] = 1;
296 2 50         } else if (off2 == lim2) {
297 2           char *rchg1 = dd1->rchg;
298 2           long *rindex1 = dd1->rindex;
299              
300 6 100         for (; off1 < lim1; off1++)
301 4           rchg1[rindex1[off1]] = 1;
302             } else {
303             xdpsplit_t spl;
304 0           spl.i1 = spl.i2 = 0;
305              
306             /*
307             * Divide ...
308             */
309 0 0         if (xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf, kvdb,
310             need_min, &spl, xenv) < 0) {
311              
312 0           return -1;
313             }
314              
315             /*
316             * ... et Impera.
317             */
318 0 0         if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2,
319 0 0         kvdf, kvdb, spl.min_lo, xenv) < 0 ||
320 0           xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2,
321             kvdf, kvdb, spl.min_hi, xenv) < 0) {
322              
323 0           return -1;
324             }
325             }
326              
327 73           return 0;
328             }
329              
330              
331 73           int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
332             xdfenv_t *xe) {
333             size_t ndiags, allocsize;
334             long *kvd, *kvdf, *kvdb;
335             xdalgoenv_t xenv;
336             diffdata_t dd1, dd2;
337              
338 73 50         if (XDF_DIFF_ALG(xpp->flags) == XDF_PATIENCE_DIFF)
339 0           return xdl_do_patience_diff(mf1, mf2, xpp, xe);
340              
341 73 50         if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF)
342 0           return xdl_do_histogram_diff(mf1, mf2, xpp, xe);
343              
344 73 50         if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) {
345              
346 0           return -1;
347             }
348              
349             /*
350             * Allocate and setup K vectors to be used by the differential algorithm.
351             * One is to store the forward path and one to store the backward path.
352             */
353 73 50         GIT_ERROR_CHECK_ALLOC_ADD3(&ndiags, xe->xdf1.nreff, xe->xdf2.nreff, 3);
    50          
354 73 50         GIT_ERROR_CHECK_ALLOC_MULTIPLY(&allocsize, ndiags, 2);
    50          
355 73 50         GIT_ERROR_CHECK_ALLOC_ADD(&allocsize, allocsize, 2);
    50          
356 73 50         GIT_ERROR_CHECK_ALLOC_MULTIPLY(&allocsize, allocsize, sizeof(long));
    50          
357              
358 73 50         if (!(kvd = (long *) xdl_malloc(allocsize))) {
359 0           xdl_free_env(xe);
360 0           return -1;
361             }
362 73           kvdf = kvd;
363 73           kvdb = kvdf + ndiags;
364 73           kvdf += xe->xdf2.nreff + 1;
365 73           kvdb += xe->xdf2.nreff + 1;
366              
367 73           xenv.mxcost = xdl_bogosqrt(ndiags);
368 73 50         if (xenv.mxcost < XDL_MAX_COST_MIN)
369 73           xenv.mxcost = XDL_MAX_COST_MIN;
370 73           xenv.snake_cnt = XDL_SNAKE_CNT;
371 73           xenv.heur_min = XDL_HEUR_MIN_COST;
372              
373 73           dd1.nrec = xe->xdf1.nreff;
374 73           dd1.ha = xe->xdf1.ha;
375 73           dd1.rchg = xe->xdf1.rchg;
376 73           dd1.rindex = xe->xdf1.rindex;
377 73           dd2.nrec = xe->xdf2.nreff;
378 73           dd2.ha = xe->xdf2.ha;
379 73           dd2.rchg = xe->xdf2.rchg;
380 73           dd2.rindex = xe->xdf2.rindex;
381              
382 73 50         if (xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec,
383 73           kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, &xenv) < 0) {
384              
385 0           xdl_free(kvd);
386 0           xdl_free_env(xe);
387 0           return -1;
388             }
389              
390 73           xdl_free(kvd);
391              
392 73           return 0;
393             }
394              
395              
396 75           static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) {
397             xdchange_t *xch;
398              
399 75 50         if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t))))
400 0           return NULL;
401              
402 75           xch->next = xscr;
403 75           xch->i1 = i1;
404 75           xch->i2 = i2;
405 75           xch->chg1 = chg1;
406 75           xch->chg2 = chg2;
407 75           xch->ignore = 0;
408              
409 75           return xch;
410             }
411              
412              
413 30           static int recs_match(xrecord_t *rec1, xrecord_t *rec2, long flags)
414             {
415 42           return (rec1->ha == rec2->ha &&
416 12           xdl_recmatch(rec1->ptr, rec1->size,
417             rec2->ptr, rec2->size,
418             flags));
419             }
420              
421             /*
422             * If a line is indented more than this, get_indent() just returns this value.
423             * This avoids having to do absurd amounts of work for data that are not
424             * human-readable text, and also ensures that the output of get_indent fits within
425             * an int.
426             */
427             #define MAX_INDENT 200
428              
429             /*
430             * Return the amount of indentation of the specified line, treating TAB as 8
431             * columns. Return -1 if line is empty or contains only whitespace. Clamp the
432             * output value at MAX_INDENT.
433             */
434 0           static int get_indent(xrecord_t *rec)
435             {
436             long i;
437 0           int ret = 0;
438              
439 0 0         for (i = 0; i < rec->size; i++) {
440 0           char c = rec->ptr[i];
441              
442 0 0         if (!XDL_ISSPACE(c))
443 0           return ret;
444 0 0         else if (c == ' ')
445 0           ret += 1;
446 0 0         else if (c == '\t')
447 0           ret += 8 - ret % 8;
448             /* ignore other whitespace characters */
449              
450 0 0         if (ret >= MAX_INDENT)
451 0           return MAX_INDENT;
452             }
453              
454             /* The line contains only whitespace. */
455 0           return -1;
456             }
457              
458             /*
459             * If more than this number of consecutive blank rows are found, just return this
460             * value. This avoids requiring O(N^2) work for pathological cases, and also
461             * ensures that the output of score_split fits in an int.
462             */
463             #define MAX_BLANKS 20
464              
465             /* Characteristics measured about a hypothetical split position. */
466             struct split_measurement {
467             /*
468             * Is the split at the end of the file (aside from any blank lines)?
469             */
470             int end_of_file;
471              
472             /*
473             * How much is the line immediately following the split indented (or -1 if
474             * the line is blank):
475             */
476             int indent;
477              
478             /*
479             * How many consecutive lines above the split are blank?
480             */
481             int pre_blank;
482              
483             /*
484             * How much is the nearest non-blank line above the split indented (or -1
485             * if there is no such line)?
486             */
487             int pre_indent;
488              
489             /*
490             * How many lines after the line following the split are blank?
491             */
492             int post_blank;
493              
494             /*
495             * How much is the nearest non-blank line after the line following the
496             * split indented (or -1 if there is no such line)?
497             */
498             int post_indent;
499             };
500              
501             struct split_score {
502             /* The effective indent of this split (smaller is preferred). */
503             int effective_indent;
504              
505             /* Penalty for this split (smaller is preferred). */
506             int penalty;
507             };
508              
509             /*
510             * Fill m with information about a hypothetical split of xdf above line split.
511             */
512 0           static void measure_split(const xdfile_t *xdf, long split,
513             struct split_measurement *m)
514             {
515             long i;
516              
517 0 0         if (split >= xdf->nrec) {
518 0           m->end_of_file = 1;
519 0           m->indent = -1;
520             } else {
521 0           m->end_of_file = 0;
522 0           m->indent = get_indent(xdf->recs[split]);
523             }
524              
525 0           m->pre_blank = 0;
526 0           m->pre_indent = -1;
527 0 0         for (i = split - 1; i >= 0; i--) {
528 0           m->pre_indent = get_indent(xdf->recs[i]);
529 0 0         if (m->pre_indent != -1)
530 0           break;
531 0           m->pre_blank += 1;
532 0 0         if (m->pre_blank == MAX_BLANKS) {
533 0           m->pre_indent = 0;
534 0           break;
535             }
536             }
537              
538 0           m->post_blank = 0;
539 0           m->post_indent = -1;
540 0 0         for (i = split + 1; i < xdf->nrec; i++) {
541 0           m->post_indent = get_indent(xdf->recs[i]);
542 0 0         if (m->post_indent != -1)
543 0           break;
544 0           m->post_blank += 1;
545 0 0         if (m->post_blank == MAX_BLANKS) {
546 0           m->post_indent = 0;
547 0           break;
548             }
549             }
550 0           }
551              
552             /*
553             * The empirically-determined weight factors used by score_split() below.
554             * Larger values means that the position is a less favorable place to split.
555             *
556             * Note that scores are only ever compared against each other, so multiplying
557             * all of these weight/penalty values by the same factor wouldn't change the
558             * heuristic's behavior. Still, we need to set that arbitrary scale *somehow*.
559             * In practice, these numbers are chosen to be large enough that they can be
560             * adjusted relative to each other with sufficient precision despite using
561             * integer math.
562             */
563              
564             /* Penalty if there are no non-blank lines before the split */
565             #define START_OF_FILE_PENALTY 1
566              
567             /* Penalty if there are no non-blank lines after the split */
568             #define END_OF_FILE_PENALTY 21
569              
570             /* Multiplier for the number of blank lines around the split */
571             #define TOTAL_BLANK_WEIGHT (-30)
572              
573             /* Multiplier for the number of blank lines after the split */
574             #define POST_BLANK_WEIGHT 6
575              
576             /*
577             * Penalties applied if the line is indented more than its predecessor
578             */
579             #define RELATIVE_INDENT_PENALTY (-4)
580             #define RELATIVE_INDENT_WITH_BLANK_PENALTY 10
581              
582             /*
583             * Penalties applied if the line is indented less than both its predecessor and
584             * its successor
585             */
586             #define RELATIVE_OUTDENT_PENALTY 24
587             #define RELATIVE_OUTDENT_WITH_BLANK_PENALTY 17
588              
589             /*
590             * Penalties applied if the line is indented less than its predecessor but not
591             * less than its successor
592             */
593             #define RELATIVE_DEDENT_PENALTY 23
594             #define RELATIVE_DEDENT_WITH_BLANK_PENALTY 17
595              
596             /*
597             * We only consider whether the sum of the effective indents for splits are
598             * less than (-1), equal to (0), or greater than (+1) each other. The resulting
599             * value is multiplied by the following weight and combined with the penalty to
600             * determine the better of two scores.
601             */
602             #define INDENT_WEIGHT 60
603              
604             /*
605             * Compute a badness score for the hypothetical split whose measurements are
606             * stored in m. The weight factors were determined empirically using the tools and
607             * corpus described in
608             *
609             * https://github.com/mhagger/diff-slider-tools
610             *
611             * Also see that project if you want to improve the weights based on, for example,
612             * a larger or more diverse corpus.
613             */
614 0           static void score_add_split(const struct split_measurement *m, struct split_score *s)
615             {
616             /*
617             * A place to accumulate penalty factors (positive makes this index more
618             * favored):
619             */
620             int post_blank, total_blank, indent, any_blanks;
621              
622 0 0         if (m->pre_indent == -1 && m->pre_blank == 0)
    0          
623 0           s->penalty += START_OF_FILE_PENALTY;
624              
625 0 0         if (m->end_of_file)
626 0           s->penalty += END_OF_FILE_PENALTY;
627              
628             /*
629             * Set post_blank to the number of blank lines following the split,
630             * including the line immediately after the split:
631             */
632 0 0         post_blank = (m->indent == -1) ? 1 + m->post_blank : 0;
633 0           total_blank = m->pre_blank + post_blank;
634              
635             /* Penalties based on nearby blank lines: */
636 0           s->penalty += TOTAL_BLANK_WEIGHT * total_blank;
637 0           s->penalty += POST_BLANK_WEIGHT * post_blank;
638              
639 0 0         if (m->indent != -1)
640 0           indent = m->indent;
641             else
642 0           indent = m->post_indent;
643              
644 0           any_blanks = (total_blank != 0);
645              
646             /* Note that the effective indent is -1 at the end of the file: */
647 0           s->effective_indent += indent;
648              
649 0 0         if (indent == -1) {
650             /* No additional adjustments needed. */
651 0 0         } else if (m->pre_indent == -1) {
652             /* No additional adjustments needed. */
653 0 0         } else if (indent > m->pre_indent) {
654             /*
655             * The line is indented more than its predecessor.
656             */
657 0           s->penalty += any_blanks ?
658 0 0         RELATIVE_INDENT_WITH_BLANK_PENALTY :
659             RELATIVE_INDENT_PENALTY;
660 0 0         } else if (indent == m->pre_indent) {
661             /*
662             * The line has the same indentation level as its predecessor.
663             * No additional adjustments needed.
664             */
665             } else {
666             /*
667             * The line is indented less than its predecessor. It could be
668             * the block terminator of the previous block, but it could
669             * also be the start of a new block (e.g., an "else" block, or
670             * maybe the previous block didn't have a block terminator).
671             * Try to distinguish those cases based on what comes next:
672             */
673 0 0         if (m->post_indent != -1 && m->post_indent > indent) {
    0          
674             /*
675             * The following line is indented more. So it is likely
676             * that this line is the start of a block.
677             */
678 0           s->penalty += any_blanks ?
679 0 0         RELATIVE_OUTDENT_WITH_BLANK_PENALTY :
680             RELATIVE_OUTDENT_PENALTY;
681             } else {
682             /*
683             * That was probably the end of a block.
684             */
685 0           s->penalty += any_blanks ?
686 0 0         RELATIVE_DEDENT_WITH_BLANK_PENALTY :
687             RELATIVE_DEDENT_PENALTY;
688             }
689             }
690 0           }
691              
692 0           static int score_cmp(struct split_score *s1, struct split_score *s2)
693             {
694             /* -1 if s1.effective_indent < s2->effective_indent, etc. */
695 0           int cmp_indents = ((s1->effective_indent > s2->effective_indent) -
696 0           (s1->effective_indent < s2->effective_indent));
697              
698 0           return INDENT_WEIGHT * cmp_indents + (s1->penalty - s2->penalty);
699             }
700              
701             /*
702             * Represent a group of changed lines in an xdfile_t (i.e., a contiguous group
703             * of lines that was inserted or deleted from the corresponding version of the
704             * file). We consider there to be such a group at the beginning of the file, at
705             * the end of the file, and between any two unchanged lines, though most such
706             * groups will usually be empty.
707             *
708             * If the first line in a group is equal to the line following the group, then
709             * the group can be slid down. Similarly, if the last line in a group is equal
710             * to the line preceding the group, then the group can be slid up. See
711             * group_slide_down() and group_slide_up().
712             *
713             * Note that loops that are testing for changed lines in xdf->rchg do not need
714             * index bounding since the array is prepared with a zero at position -1 and N.
715             */
716             struct xdlgroup {
717             /*
718             * The index of the first changed line in the group, or the index of
719             * the unchanged line above which the (empty) group is located.
720             */
721             long start;
722              
723             /*
724             * The index of the first unchanged line after the group. For an empty
725             * group, end is equal to start.
726             */
727             long end;
728             };
729              
730             /*
731             * Initialize g to point at the first group in xdf.
732             */
733 292           static void group_init(xdfile_t *xdf, struct xdlgroup *g)
734             {
735 292           g->start = g->end = 0;
736 516 100         while (xdf->rchg[g->end])
737 224           g->end++;
738 292           }
739              
740             /*
741             * Move g to describe the next (possibly empty) group in xdf and return 0. If g
742             * is already at the end of the file, do nothing and return -1.
743             */
744 366           XDL_INLINE(int) group_next(xdfile_t *xdf, struct xdlgroup *g)
745             {
746 366 100         if (g->end == xdf->nrec)
747 292           return -1;
748              
749 74           g->start = g->end + 1;
750 111 100         for (g->end = g->start; xdf->rchg[g->end]; g->end++)
751             ;
752              
753 74           return 0;
754             }
755              
756             /*
757             * Move g to describe the previous (possibly empty) group in xdf and return 0.
758             * If g is already at the beginning of the file, do nothing and return -1.
759             */
760 6           XDL_INLINE(int) group_previous(xdfile_t *xdf, struct xdlgroup *g)
761             {
762 6 50         if (g->start == 0)
763 0           return -1;
764              
765 6           g->end = g->start - 1;
766 9 100         for (g->start = g->end; xdf->rchg[g->start - 1]; g->start--)
767             ;
768              
769 6           return 0;
770             }
771              
772             /*
773             * If g can be slid toward the end of the file, do so, and if it bumps into a
774             * following group, expand this group to include it. Return 0 on success or -1
775             * if g cannot be slid down.
776             */
777 125           static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g, long flags)
778             {
779 138           if (g->end < xdf->nrec &&
780 13           recs_match(xdf->recs[g->start], xdf->recs[g->end], flags)) {
781 6           xdf->rchg[g->start++] = 0;
782 6           xdf->rchg[g->end++] = 1;
783              
784 6 50         while (xdf->rchg[g->end])
785 0           g->end++;
786              
787 6           return 0;
788             } else {
789 119           return -1;
790             }
791             }
792              
793             /*
794             * If g can be slid toward the beginning of the file, do so, and if it bumps
795             * into a previous group, expand this group to include it. Return 0 on success
796             * or -1 if g cannot be slid up.
797             */
798 125           static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g, long flags)
799             {
800 142           if (g->start > 0 &&
801 17           recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1], flags)) {
802 6           xdf->rchg[--g->start] = 1;
803 6           xdf->rchg[--g->end] = 0;
804              
805 6 50         while (xdf->rchg[g->start - 1])
806 0           g->start--;
807              
808 6           return 0;
809             } else {
810 119           return -1;
811             }
812             }
813              
814 0           static void xdl_bug(const char *msg)
815             {
816 0           fprintf(stderr, "BUG: %s\n", msg);
817 0           exit(1);
818             }
819              
820             /*
821             * Move back and forward change groups for a consistent and pretty diff output.
822             * This also helps in finding joinable change groups and reducing the diff
823             * size.
824             */
825 146           int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
826             struct xdlgroup g, go;
827             long earliest_end, end_matching_other;
828             long groupsize;
829              
830 146           group_init(xdf, &g);
831 146           group_init(xdfo, &go);
832              
833             while (1) {
834             /* If the group is empty in the to-be-compacted file, skip it: */
835 180 100         if (g.end == g.start)
836 61           goto next;
837              
838             /*
839             * Now shift the change up and then down as far as possible in
840             * each direction. If it bumps into any other changes, merge them.
841             */
842             do {
843 119           groupsize = g.end - g.start;
844              
845             /*
846             * Keep track of the last "end" index that causes this
847             * group to align with a group of changed lines in the
848             * other file. -1 indicates that we haven't found such
849             * a match yet:
850             */
851 119           end_matching_other = -1;
852              
853             /* Shift the group backward as much as possible: */
854 123 100         while (!group_slide_up(xdf, &g, flags))
855 4 50         if (group_previous(xdfo, &go))
856 0           xdl_bug("group sync broken sliding up");
857              
858             /*
859             * This is this highest that this group can be shifted.
860             * Record its end index:
861             */
862 119           earliest_end = g.end;
863              
864 119 100         if (go.end > go.start)
865 87           end_matching_other = g.end;
866              
867             /* Now shift the group forward as far as possible: */
868             while (1) {
869 125 100         if (group_slide_down(xdf, &g, flags))
870 119           break;
871 6 50         if (group_next(xdfo, &go))
872 0           xdl_bug("group sync broken sliding down");
873              
874 6 100         if (go.end > go.start)
875 3           end_matching_other = g.end;
876 6           }
877 119 50         } while (groupsize != g.end - g.start);
878              
879             /*
880             * If the group can be shifted, then we can possibly use this
881             * freedom to produce a more intuitive diff.
882             *
883             * The group is currently shifted as far down as possible, so the
884             * heuristics below only have to handle upwards shifts.
885             */
886              
887 119 100         if (g.end == earliest_end) {
888             /* no shifting was possible */
889 2 50         } else if (end_matching_other != -1) {
890             /*
891             * Move the possibly merged group of changes back to line
892             * up with the last group of changes from the other file
893             * that it can align with.
894             */
895 4 100         while (go.end == go.start) {
896 2 50         if (group_slide_up(xdf, &g, flags))
897 0           xdl_bug("match disappeared");
898 2 50         if (group_previous(xdfo, &go))
899 0           xdl_bug("group sync broken sliding to match");
900             }
901 0 0         } else if (flags & XDF_INDENT_HEURISTIC) {
902             /*
903             * Indent heuristic: a group of pure add/delete lines
904             * implies two splits, one between the end of the "before"
905             * context and the start of the group, and another between
906             * the end of the group and the beginning of the "after"
907             * context. Some splits are aesthetically better and some
908             * are worse. We compute a badness "score" for each split,
909             * and add the scores for the two splits to define a
910             * "score" for each position that the group can be shifted
911             * to. Then we pick the shift with the lowest score.
912             */
913 0           long shift, best_shift = -1;
914             struct split_score best_score;
915              
916 0 0         for (shift = earliest_end; shift <= g.end; shift++) {
917             struct split_measurement m;
918 0           struct split_score score = {0, 0};
919              
920 0           measure_split(xdf, shift, &m);
921 0           score_add_split(&m, &score);
922 0           measure_split(xdf, shift - groupsize, &m);
923 0           score_add_split(&m, &score);
924 0           if (best_shift == -1 ||
925 0           score_cmp(&score, &best_score) <= 0) {
926 0           best_score.effective_indent = score.effective_indent;
927 0           best_score.penalty = score.penalty;
928 0           best_shift = shift;
929             }
930             }
931              
932 0 0         while (g.end > best_shift) {
933 0 0         if (group_slide_up(xdf, &g, flags))
934 0           xdl_bug("best shift unreached");
935 0 0         if (group_previous(xdfo, &go))
936 0           xdl_bug("group sync broken sliding to blank line");
937             }
938             }
939              
940             next:
941             /* Move past the just-processed group: */
942 180 100         if (group_next(xdf, &g))
943 146           break;
944 34 50         if (group_next(xdfo, &go))
945 0           xdl_bug("group sync broken moving to next group");
946 34           }
947              
948 146 50         if (!group_next(xdfo, &go))
949 0           xdl_bug("group sync broken at end of file");
950              
951 146           return 0;
952             }
953              
954              
955 73           int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) {
956 73           xdchange_t *cscr = NULL, *xch;
957 73           char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg;
958             long i1, i2, l1, l2;
959              
960             /*
961             * Trivial. Collects "groups" of changes and creates an edit script.
962             */
963 163 100         for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--)
    50          
964 90 100         if (rchg1[i1 - 1] || rchg2[i2 - 1]) {
    100          
965 124 100         for (l1 = i1; rchg1[i1 - 1]; i1--);
966 155 100         for (l2 = i2; rchg2[i2 - 1]; i2--);
967              
968 75 50         if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) {
969 0           xdl_free_script(cscr);
970 0           return -1;
971             }
972 75           cscr = xch;
973             }
974              
975 73           *xscr = cscr;
976              
977 73           return 0;
978             }
979              
980              
981 73           void xdl_free_script(xdchange_t *xscr) {
982             xdchange_t *xch;
983              
984 148 100         while ((xch = xscr) != NULL) {
985 75           xscr = xscr->next;
986 75           xdl_free(xch);
987             }
988 73           }
989              
990 2           static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
991             xdemitconf_t const *xecfg)
992             {
993             xdchange_t *xch, *xche;
994              
995             (void)xe;
996              
997 4 100         for (xch = xscr; xch; xch = xche->next) {
998 2           xche = xdl_get_hunk(&xch, xecfg);
999 2 50         if (!xch)
1000 0           break;
1001 2 50         if (xecfg->hunk_func(xch->i1, xche->i1 + xche->chg1 - xch->i1,
1002 4           xch->i2, xche->i2 + xche->chg2 - xch->i2,
1003             ecb->priv) < 0)
1004 0           return -1;
1005             }
1006 2           return 0;
1007             }
1008              
1009 0           static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)
1010             {
1011             xdchange_t *xch;
1012              
1013 0 0         for (xch = xscr; xch; xch = xch->next) {
1014 0           int ignore = 1;
1015             xrecord_t **rec;
1016             long i;
1017              
1018 0           rec = &xe->xdf1.recs[xch->i1];
1019 0 0         for (i = 0; i < xch->chg1 && ignore; i++)
    0          
1020 0           ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
1021              
1022 0           rec = &xe->xdf2.recs[xch->i2];
1023 0 0         for (i = 0; i < xch->chg2 && ignore; i++)
    0          
1024 0           ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
1025              
1026 0           xch->ignore = ignore;
1027             }
1028 0           }
1029              
1030 40           int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
1031             xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
1032             xdchange_t *xscr;
1033             xdfenv_t xe;
1034 40 100         emit_func_t ef = xecfg->hunk_func ? xdl_call_hunk_func : xdl_emit_diff;
1035              
1036 40 50         if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) {
1037              
1038 0           return -1;
1039             }
1040 80           if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 ||
1041 80 50         xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 ||
1042 40           xdl_build_script(&xe, &xscr) < 0) {
1043              
1044 0           xdl_free_env(&xe);
1045 0           return -1;
1046             }
1047 40 50         if (xscr) {
1048 40 50         if (xpp->flags & XDF_IGNORE_BLANK_LINES)
1049 0           xdl_mark_ignorable(xscr, &xe, xpp->flags);
1050              
1051 40 50         if (ef(&xe, xscr, ecb, xecfg) < 0) {
1052              
1053 0           xdl_free_script(xscr);
1054 0           xdl_free_env(&xe);
1055 0           return -1;
1056             }
1057 40           xdl_free_script(xscr);
1058             }
1059 40           xdl_free_env(&xe);
1060              
1061 40           return 0;
1062             }