From 33e5914863ab232164571c2e259cfa1e9c3f30dd Mon Sep 17 00:00:00 2001 From: Emily Shaffer Date: Wed, 28 Jan 2026 23:39:19 +0200 Subject: hook: provide stdin via callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a callback mechanism for feeding stdin to hooks alongside the existing path_to_stdin (which slurps a file's content to stdin). The advantage of this new callback is that it can feed stdin without going through the FS layer. This helps when feeding large amount of data and uses the run-command parallel stdin callback introduced in the preceding commit. Signed-off-by: Emily Shaffer Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- hook.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'hook.h') diff --git a/hook.h b/hook.h index 11863fa734..2169d4a6bd 100644 --- a/hook.h +++ b/hook.h @@ -1,6 +1,7 @@ #ifndef HOOK_H #define HOOK_H #include "strvec.h" +#include "run-command.h" struct repository; @@ -37,6 +38,43 @@ struct run_hooks_opt * Path to file which should be piped to stdin for each hook. */ const char *path_to_stdin; + + /** + * Callback used to incrementally feed a child hook stdin pipe. + * + * Useful especially if a hook consumes large quantities of data + * (e.g. a list of all refs in a client push), so feeding it via + * in-memory strings or slurping to/from files is inefficient. + * While the callback allows piecemeal writing, it can also be + * used for smaller inputs, where it gets called only once. + * + * Add hook callback initalization context to `feed_pipe_ctx`. + * Add hook callback internal state to `feed_pipe_cb_data`. + * + */ + feed_pipe_fn feed_pipe; + + /** + * Opaque data pointer used to pass context to `feed_pipe_fn`. + * + * It can be accessed via the second callback arg 'pp_cb': + * ((struct hook_cb_data *) pp_cb)->hook_cb->options->feed_pipe_ctx; + * + * The caller is responsible for managing the memory for this data. + * Only useful when using `run_hooks_opt.feed_pipe`, otherwise ignore it. + */ + void *feed_pipe_ctx; + + /** + * Opaque data pointer used to keep internal state across callback calls. + * + * It can be accessed directly via the third callback arg 'pp_task_cb': + * struct ... *state = pp_task_cb; + * + * The caller is responsible for managing the memory for this data. + * Only useful when using `run_hooks_opt.feed_pipe`, otherwise ignore it. + */ + void *feed_pipe_cb_data; }; #define RUN_HOOKS_OPT_INIT { \ -- cgit v1.3-5-g9baa From d816637f6c253e3829c4c6e817c7749b3a4f8bf4 Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Wed, 28 Jan 2026 23:39:21 +0200 Subject: hook: allow separate std[out|err] streams The hook API assumes that all hooks merge stdout to stderr. This assumption is proven wrong by pre-push: some of its users actually expect separate stdout and stderr streams and merging them will cause a regression. Therefore this adds a mechanism to allow pre-push to separate the streams, which will be used in the next commit. The mechanism is generic via struct run_hooks_opt just in case there are any more surprise exceptions like this. Reported-by: Chris Darroch Suggested-by: brian m. carlson Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- hook.c | 2 +- hook.h | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'hook.h') diff --git a/hook.c b/hook.c index 5ddd7678d1..fde1f88ce8 100644 --- a/hook.c +++ b/hook.c @@ -81,7 +81,7 @@ static int pick_next_hook(struct child_process *cp, cp->in = -1; } - cp->stdout_to_stderr = 1; + cp->stdout_to_stderr = hook_cb->options->stdout_to_stderr; cp->trace2_hook_name = hook_cb->hook_name; cp->dir = hook_cb->options->dir; diff --git a/hook.h b/hook.h index 2169d4a6bd..2c8a23a569 100644 --- a/hook.h +++ b/hook.h @@ -34,6 +34,15 @@ struct run_hooks_opt */ int *invoked_hook; + /** + * Send the hook's stdout to stderr. + * + * This is the default behavior for all hooks except pre-push, + * which has separate stdout and stderr streams for backwards + * compatibility reasons. + */ + unsigned int stdout_to_stderr:1; + /** * Path to file which should be piped to stdin for each hook. */ @@ -80,6 +89,7 @@ struct run_hooks_opt #define RUN_HOOKS_OPT_INIT { \ .env = STRVEC_INIT, \ .args = STRVEC_INIT, \ + .stdout_to_stderr = 1, \ } struct hook_cb_data { -- cgit v1.3-5-g9baa From c549a40547fb9765a9ea78dbe4a3397dbee715b2 Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Wed, 28 Jan 2026 23:39:24 +0200 Subject: hook: add jobs option Allow the API callers to specify the number of jobs across which hook execution can be parallelized. It defaults to 1 and no hook currently changes it, so all hooks run sequentially as before. This allows us to both pave the way for parallel hook execution (that will be a follow-up patch series building upon this) and to finish the API conversion of builtin/receive-pack.c, keeping the output async sideband thread ("muxer") design as Peff suggested. When .jobs==1 nothing changes, the "copy_to_sideband" async thread still outputs directly via sideband channel 2, keeping the current (mostly) real-time output characteristics, avoids unnecessary poll delays or deadlock risks. When .jobs > 1, a more complex muxer is needed to buffer the hook output and avoid interleaving. After working on this mux I quickly realized I was re-implementing run-command with ungroup=0 so that idea was dropped in favor of run-command which outputs to stderr. In other words, run-command itself already can buffer/deinterleave pp child outputs (ungroup=0), so we can just connect its stderr to the sideband async task when jobs > 1. Maybe it helps to illustrate how it works with ascii graphics: [ Sequential (jobs = 1) ] [ Parallel (jobs > 1) ] +--------------+ +--------+ +--------+ | Hook Process | | Hook 1 | | Hook 2 | +--------------+ +--------+ +--------+ | | | | stderr (inherited) | stderr pipe | | | (captured) | v v v +-------------------------------------------------------------+ | Parent Process | | | | (direct write) [run-command (buffered)] | | | | | | | | writes | | v v | | +-------------------------------------------+ | | | stderr (FD 2) | | | +-------------------------------------------+ | | | | | | (dup2'd to pipe) | | v | | +-----------------------+ | | | sideband async thread | | | +-----------------------+ | +-------------------------------------------------------------+ When use_sideband == 0, the sideband async thread is missing, so this same architecture just outputs via the parent stderr stream. See the following commits for the hook API conversions doing this, using pre-existing sideband thread logic from `copy_to_sideband`. Suggested-by: Jeff King Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- hook.c | 7 +++++-- hook.h | 9 +++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'hook.h') diff --git a/hook.c b/hook.c index fde1f88ce8..cde7198412 100644 --- a/hook.c +++ b/hook.c @@ -152,8 +152,8 @@ int run_hooks_opt(struct repository *r, const char *hook_name, .tr2_category = "hook", .tr2_label = hook_name, - .processes = 1, - .ungroup = 1, + .processes = options->jobs, + .ungroup = options->jobs == 1, .get_next_task = pick_next_hook, .start_failure = notify_start_failure, @@ -169,6 +169,9 @@ int run_hooks_opt(struct repository *r, const char *hook_name, if (options->path_to_stdin && options->feed_pipe) BUG("options path_to_stdin and feed_pipe are mutually exclusive"); + if (!options->jobs) + BUG("run_hooks_opt must be called with options.jobs >= 1"); + if (options->invoked_hook) *options->invoked_hook = 0; diff --git a/hook.h b/hook.h index 2c8a23a569..20eb56fd63 100644 --- a/hook.h +++ b/hook.h @@ -16,6 +16,14 @@ struct run_hooks_opt /* Emit an error if the hook is missing */ unsigned int error_if_missing:1; + /** + * Number of processes to parallelize across. + * + * If > 1, output will be buffered and de-interleaved (ungroup=0). + * If == 1, output will be real-time (ungroup=1). + */ + unsigned int jobs; + /** * An optional initial working directory for the hook, * translates to "struct child_process"'s "dir" member. @@ -90,6 +98,7 @@ struct run_hooks_opt .env = STRVEC_INIT, \ .args = STRVEC_INIT, \ .stdout_to_stderr = 1, \ + .jobs = 1, \ } struct hook_cb_data { -- cgit v1.3-5-g9baa