diff options
Diffstat (limited to 'xdiff/xdiffi.c')
| -rw-r--r-- | xdiff/xdiffi.c | 76 |
1 files changed, 59 insertions, 17 deletions
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 6f3998ee54..5455b4690d 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -22,9 +22,9 @@ #include "xinclude.h" -static unsigned long get_hash(xdfile_t *xdf, long index) +static size_t get_hash(xdfile_t *xdf, long index) { - return xdf->recs[xdf->rindex[index]].ha; + return xdf->recs[xdf->reference_index[index]].minimal_perfect_hash; } #define XDL_MAX_COST_MIN 256 @@ -278,10 +278,10 @@ int xdl_recs_cmp(xdfile_t *xdf1, long off1, long lim1, */ if (off1 == lim1) { for (; off2 < lim2; off2++) - xdf2->changed[xdf2->rindex[off2]] = true; + xdf2->changed[xdf2->reference_index[off2]] = true; } else if (off2 == lim2) { for (; off1 < lim1; off1++) - xdf1->changed[xdf1->rindex[off1]] = true; + xdf1->changed[xdf1->reference_index[off1]] = true; } else { xdpsplit_t spl; spl.i1 = spl.i2 = 0; @@ -385,7 +385,7 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, static int recs_match(xrecord_t *rec1, xrecord_t *rec2) { - return (rec1->ha == rec2->ha); + return rec1->minimal_perfect_hash == rec2->minimal_perfect_hash; } /* @@ -403,11 +403,10 @@ static int recs_match(xrecord_t *rec1, xrecord_t *rec2) */ static int get_indent(xrecord_t *rec) { - long i; int ret = 0; - for (i = 0; i < rec->size; i++) { - char c = rec->ptr[i]; + for (size_t i = 0; i < rec->size; i++) { + char c = (char) rec->ptr[i]; if (!XDL_ISSPACE(c)) return ret; @@ -484,7 +483,7 @@ static void measure_split(const xdfile_t *xdf, long split, { long i; - if (split >= xdf->nrec) { + if (split >= (long)xdf->nrec) { m->end_of_file = 1; m->indent = -1; } else { @@ -507,7 +506,7 @@ static void measure_split(const xdfile_t *xdf, long split, m->post_blank = 0; m->post_indent = -1; - for (i = split + 1; i < xdf->nrec; i++) { + for (i = split + 1; i < (long)xdf->nrec; i++) { m->post_indent = get_indent(&xdf->recs[i]); if (m->post_indent != -1) break; @@ -718,7 +717,7 @@ static void group_init(xdfile_t *xdf, struct xdlgroup *g) */ static inline int group_next(xdfile_t *xdf, struct xdlgroup *g) { - if (g->end == xdf->nrec) + if (g->end == (long)xdf->nrec) return -1; g->start = g->end + 1; @@ -751,7 +750,7 @@ static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g) */ static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g) { - if (g->end < xdf->nrec && + if (g->end < (long)xdf->nrec && recs_match(&xdf->recs[g->start], &xdf->recs[g->end])) { xdf->changed[g->start++] = false; xdf->changed[g->end++] = true; @@ -793,6 +792,7 @@ static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g) */ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { struct xdlgroup g, go; + struct xdlgroup g_orig; long earliest_end, end_matching_other; long groupsize; @@ -806,10 +806,12 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { if (g.end == g.start) goto next; + g_orig = g; + /* * Now shift the change up and then down as far as possible in * each direction. If it bumps into any other changes, merge - * them. + * them and restart the process. */ do { groupsize = g.end - g.start; @@ -862,7 +864,8 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { /* * Move the possibly merged group of changes back to * line up with the last group of changes from the - * other file that it can align with. + * other file that it can align with. This avoids breaking + * a single change into a separate addition/deletion. */ while (go.end == go.start) { if (group_slide_up(xdf, &g)) @@ -915,6 +918,45 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { } } + /* + * If we merged change groups during shifting, the new + * combined group could now have matching lines in both files, + * even if the original separate groups did not. Re-diff the + * new group to find these matching lines to mark them as + * unchanged. + * + * Only do this if the corresponding group in the other file is + * non-empty, as it's trivial otherwise. + * + * Only do this for histogram diff as its LCS algorithm allows + * for this scenario. In contrast, patience diff finds LCS + * of unique lines that groups cannot be shifted across. + * Myer's diff (standalone or used as fall-back in patience + * diff) already finds minimal edits so it is not possible for + * shifted groups to result in a smaller diff. (Without + * XDF_NEED_MINIMAL, Myer's isn't technically guaranteed to be + * minimal, but it should be so most of the time) + */ + if (go.end != go.start && + XDF_DIFF_ALG(flags) == XDF_HISTOGRAM_DIFF && + (g.start != g_orig.start || + g.end != g_orig.end)) { + xpparam_t xpp; + xdfenv_t xe; + + memset(&xpp, 0, sizeof(xpp)); + xpp.flags = flags & ~XDF_DIFF_ALGORITHM_MASK; + + xe.xdf1 = *xdf; + xe.xdf2 = *xdfo; + + if (xdl_fall_back_diff(&xe, &xpp, + g.start + 1, g.end - g.start, + go.start + 1, go.end - go.start)) { + return -1; + } + } + next: /* Move past the just-processed group: */ if (group_next(xdf, &g)) @@ -993,11 +1035,11 @@ static void xdl_mark_ignorable_lines(xdchange_t *xscr, xdfenv_t *xe, long flags) rec = &xe->xdf1.recs[xch->i1]; for (i = 0; i < xch->chg1 && ignore; i++) - ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags); + ignore = xdl_blankline((const char *)rec[i].ptr, (long)rec[i].size, flags); rec = &xe->xdf2.recs[xch->i2]; for (i = 0; i < xch->chg2 && ignore; i++) - ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags); + ignore = xdl_blankline((const char *)rec[i].ptr, (long)rec[i].size, flags); xch->ignore = ignore; } @@ -1008,7 +1050,7 @@ static int record_matches_regex(xrecord_t *rec, xpparam_t const *xpp) { size_t i; for (i = 0; i < xpp->ignore_regex_nr; i++) - if (!regexec_buf(xpp->ignore_regex[i], rec->ptr, rec->size, 1, + if (!regexec_buf(xpp->ignore_regex[i], (const char *)rec->ptr, rec->size, 1, ®match, 0)) return 1; |
