From 86e986f166d207e1f4b80062c2befb4f94c191c4 Mon Sep 17 00:00:00 2001 From: Michael Montalbo Date: Tue, 17 Mar 2026 02:21:33 +0000 Subject: line-log: route -L output through the standard diff pipeline `git log -L` has always bypassed the standard diff pipeline. `dump_diff_hacky()` in line-log.c hand-rolls its own diff headers and hunk output, which means most diff formatting options are silently ignored. A NEEDSWORK comment has acknowledged this since the feature was introduced: /* * NEEDSWORK: manually building a diff here is not the Right * Thing(tm). log -L should be built into the diff pipeline. */ Remove `dump_diff_hacky()` and its helpers and route -L output through `builtin_diff()` / `fn_out_consume()`, the same path used by `git diff` and `git log -p`. The mechanism is a pair of callback wrappers that sit between `xdi_diff_outf()` and `fn_out_consume()`, filtering xdiff's output to only the tracked line ranges. To ensure xdiff emits all lines within each range as context, the context length is inflated to span the largest range. Wire up the `-L` implies `--patch` default in revision setup rather than forcing it at output time, so `line_log_print()` is just `diffcore_std()` + `diff_flush()` with no format save/restore. Rename detection is a no-op since pairs are already resolved during the history walk in `queue_diffs()`, but running `diffcore_std()` means `-S`/`-G` (pickaxe), `--orderfile`, and `--diff-filter` now work with `-L`, and `diff_resolve_rename_copy()` sets pair statuses correctly without manual assignment. Switch `diff_filepair_dup()` from `xmalloc` to `xcalloc` so that new fields (including `line_ranges`) are zero-initialized by default. As a result, diff formatting options that were previously silently ignored (e.g. --word-diff, --no-prefix, -w, --color-moved) now work with -L, and output gains `index` lines, `new file mode` headers, and funcname context in `@@` headers. This is a user-visible output change: tools that parse -L output may need to handle the additional header lines. The context-length inflation means xdiff may process more output than needed for very wide line ranges, but benchmarks on files up to 7800 lines show no measurable regression. Signed-off-by: Michael Montalbo Signed-off-by: Junio C Hamano --- diff.c | 279 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 277 insertions(+), 2 deletions(-) (limited to 'diff.c') diff --git a/diff.c b/diff.c index 501648a5c4..f79e37a210 100644 --- a/diff.c +++ b/diff.c @@ -608,6 +608,52 @@ struct emit_callback { struct strbuf *header; }; +/* + * State for the line-range callback wrappers that sit between + * xdi_diff_outf() and fn_out_consume(). xdiff produces a normal, + * unfiltered diff; the wrappers intercept each hunk header and line, + * track post-image position, and forward only lines that fall within + * the requested ranges. Contiguous in-range lines are collected into + * range hunks and flushed with a synthetic @@ header so that + * fn_out_consume() sees well-formed unified-diff fragments. + * + * Removal lines ('-') cannot be classified by post-image position, so + * they are buffered in pending_rm until the next '+' or ' ' line + * reveals whether they precede an in-range line (flush into range hunk) or + * an out-of-range line (discard). + */ +struct line_range_callback { + xdiff_emit_line_fn orig_line_fn; + void *orig_cb_data; + const struct range_set *ranges; /* 0-based [start, end) */ + unsigned int cur_range; /* index into the range_set */ + + /* Post/pre-image line counters (1-based, set from hunk headers) */ + long lno_post; + long lno_pre; + + /* + * Function name from most recent xdiff hunk header; + * size matches struct func_line.buf in xdiff/xemit.c. + */ + char func[80]; + long funclen; + + /* Range hunk being accumulated for the current range */ + struct strbuf rhunk; + long rhunk_old_begin, rhunk_old_count; + long rhunk_new_begin, rhunk_new_count; + int rhunk_active; + int rhunk_has_changes; /* any '+' or '-' lines? */ + + /* Removal lines not yet known to be in-range */ + struct strbuf pending_rm; + int pending_rm_count; + long pending_rm_pre_begin; /* pre-image line of first pending */ + + int ret; /* latched error from orig_line_fn */ +}; + static int count_lines(const char *data, int size) { int count, ch, completely_empty = 1, nl_just_seen = 0; @@ -2493,6 +2539,188 @@ static int quick_consume(void *priv, char *line UNUSED, unsigned long len UNUSED return 1; } +static void discard_pending_rm(struct line_range_callback *s) +{ + strbuf_reset(&s->pending_rm); + s->pending_rm_count = 0; +} + +static void flush_rhunk(struct line_range_callback *s) +{ + struct strbuf hdr = STRBUF_INIT; + const char *p, *end; + + if (!s->rhunk_active || s->ret) + return; + + /* Drain any pending removal lines into the range hunk */ + if (s->pending_rm_count) { + strbuf_addbuf(&s->rhunk, &s->pending_rm); + s->rhunk_old_count += s->pending_rm_count; + s->rhunk_has_changes = 1; + discard_pending_rm(s); + } + + /* + * Suppress context-only hunks: they contain no actual changes + * and would just be noise. This can happen when the inflated + * ctxlen causes xdiff to emit context covering a range that + * has no changes in this commit. + */ + if (!s->rhunk_has_changes) { + s->rhunk_active = 0; + strbuf_reset(&s->rhunk); + return; + } + + strbuf_addf(&hdr, "@@ -%ld,%ld +%ld,%ld @@", + s->rhunk_old_begin, s->rhunk_old_count, + s->rhunk_new_begin, s->rhunk_new_count); + if (s->funclen > 0) { + strbuf_addch(&hdr, ' '); + strbuf_add(&hdr, s->func, s->funclen); + } + strbuf_addch(&hdr, '\n'); + + s->ret = s->orig_line_fn(s->orig_cb_data, hdr.buf, hdr.len); + strbuf_release(&hdr); + + /* + * Replay buffered lines one at a time through fn_out_consume. + * The cast discards const because xdiff_emit_line_fn takes + * char *, though fn_out_consume does not modify the buffer. + */ + p = s->rhunk.buf; + end = p + s->rhunk.len; + while (!s->ret && p < end) { + const char *eol = memchr(p, '\n', end - p); + unsigned long line_len = eol ? (unsigned long)(eol - p + 1) + : (unsigned long)(end - p); + s->ret = s->orig_line_fn(s->orig_cb_data, (char *)p, line_len); + p += line_len; + } + + s->rhunk_active = 0; + strbuf_reset(&s->rhunk); +} + +static void line_range_hunk_fn(void *data, + long old_begin, long old_nr UNUSED, + long new_begin, long new_nr UNUSED, + const char *func, long funclen) +{ + struct line_range_callback *s = data; + + /* + * When count > 0, begin is 1-based. When count == 0, begin is + * adjusted down by 1 by xdl_emit_hunk_hdr(), but no lines of + * that type will arrive, so the value is unused. + * + * Any pending removal lines from the previous xdiff hunk are + * intentionally left in pending_rm: the line callback will + * flush or discard them when the next content line reveals + * whether the removals precede in-range content. + */ + s->lno_post = new_begin; + s->lno_pre = old_begin; + + if (funclen > 0) { + if (funclen > (long)sizeof(s->func)) + funclen = sizeof(s->func); + memcpy(s->func, func, funclen); + } + s->funclen = funclen; +} + +static int line_range_line_fn(void *priv, char *line, unsigned long len) +{ + struct line_range_callback *s = priv; + const struct range *cur; + long lno_0, cur_pre; + + if (s->ret) + return s->ret; + + if (line[0] == '-') { + if (!s->pending_rm_count) + s->pending_rm_pre_begin = s->lno_pre; + s->lno_pre++; + strbuf_add(&s->pending_rm, line, len); + s->pending_rm_count++; + return s->ret; + } + + if (line[0] == '\\') { + if (s->pending_rm_count) + strbuf_add(&s->pending_rm, line, len); + else if (s->rhunk_active) + strbuf_add(&s->rhunk, line, len); + /* otherwise outside tracked range; drop silently */ + return s->ret; + } + + if (line[0] != '+' && line[0] != ' ') + BUG("unexpected diff line type '%c'", line[0]); + + lno_0 = s->lno_post - 1; + cur_pre = s->lno_pre; /* save before advancing for context lines */ + s->lno_post++; + if (line[0] == ' ') + s->lno_pre++; + + /* Advance past ranges we've passed */ + while (s->cur_range < s->ranges->nr && + lno_0 >= s->ranges->ranges[s->cur_range].end) { + if (s->rhunk_active) + flush_rhunk(s); + discard_pending_rm(s); + s->cur_range++; + } + + /* Past all ranges */ + if (s->cur_range >= s->ranges->nr) { + discard_pending_rm(s); + return s->ret; + } + + cur = &s->ranges->ranges[s->cur_range]; + + /* Before current range */ + if (lno_0 < cur->start) { + discard_pending_rm(s); + return s->ret; + } + + /* In range so start a new range hunk if needed */ + if (!s->rhunk_active) { + s->rhunk_active = 1; + s->rhunk_has_changes = 0; + s->rhunk_new_begin = lno_0 + 1; + s->rhunk_old_begin = s->pending_rm_count + ? s->pending_rm_pre_begin : cur_pre; + s->rhunk_old_count = 0; + s->rhunk_new_count = 0; + strbuf_reset(&s->rhunk); + } + + /* Flush pending removals into range hunk */ + if (s->pending_rm_count) { + strbuf_addbuf(&s->rhunk, &s->pending_rm); + s->rhunk_old_count += s->pending_rm_count; + s->rhunk_has_changes = 1; + discard_pending_rm(s); + } + + strbuf_add(&s->rhunk, line, len); + s->rhunk_new_count++; + if (line[0] == '+') + s->rhunk_has_changes = 1; + else + s->rhunk_old_count++; + + return s->ret; +} + static void pprint_rename(struct strbuf *name, const char *a, const char *b) { const char *old_name = a; @@ -3596,7 +3824,8 @@ static void builtin_diff(const char *name_a, const char *xfrm_msg, int must_show_header, struct diff_options *o, - int complete_rewrite) + int complete_rewrite, + const struct range_set *line_ranges) { mmfile_t mf1, mf2; const char *lbl[2]; @@ -3837,6 +4066,52 @@ static void builtin_diff(const char *name_a, */ xdi_diff_outf(&mf1, &mf2, NULL, quick_consume, &ecbdata, &xpp, &xecfg); + } else if (line_ranges) { + struct line_range_callback lr_state; + unsigned int i; + long max_span = 0; + + memset(&lr_state, 0, sizeof(lr_state)); + lr_state.orig_line_fn = fn_out_consume; + lr_state.orig_cb_data = &ecbdata; + lr_state.ranges = line_ranges; + strbuf_init(&lr_state.rhunk, 0); + strbuf_init(&lr_state.pending_rm, 0); + + /* + * Inflate ctxlen so that all changes within + * any single range are merged into one xdiff + * hunk and the inter-change context is emitted. + * The callback clips back to range boundaries. + * + * The optimal ctxlen depends on where changes + * fall within the range, which is only known + * after xdiff runs; the max range span is the + * upper bound that guarantees correctness in a + * single pass. + */ + for (i = 0; i < line_ranges->nr; i++) { + long span = line_ranges->ranges[i].end - + line_ranges->ranges[i].start; + if (span > max_span) + max_span = span; + } + if (max_span > xecfg.ctxlen) + xecfg.ctxlen = max_span; + + if (xdi_diff_outf(&mf1, &mf2, + line_range_hunk_fn, + line_range_line_fn, + &lr_state, &xpp, &xecfg)) + die("unable to generate diff for %s", + one->path); + + flush_rhunk(&lr_state); + if (lr_state.ret) + die("unable to generate diff for %s", + one->path); + strbuf_release(&lr_state.rhunk); + strbuf_release(&lr_state.pending_rm); } else if (xdi_diff_outf(&mf1, &mf2, NULL, fn_out_consume, &ecbdata, &xpp, &xecfg)) die("unable to generate diff for %s", one->path); @@ -4678,7 +4953,7 @@ static void run_diff_cmd(const struct external_diff *pgm, builtin_diff(name, other ? other : name, one, two, xfrm_msg, must_show_header, - o, complete_rewrite); + o, complete_rewrite, p->line_ranges); if (p->status == DIFF_STATUS_COPIED || p->status == DIFF_STATUS_RENAMED) o->found_changes = 1; -- cgit v1.3