From 9cb3cab56063754d9ee5bb27886c616ca1aec134 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 10 Jun 2020 13:57:15 -0700 Subject: http: use --stdin when indexing dumb HTTP pack When Git fetches a pack using dumb HTTP, (among other things) it invokes index-pack on a ".pack.temp" packfile, specifying the filename as an argument. A future commit will require the aforementioned invocation of index-pack to also generate a "keep" file. To use this, we either have to use index-pack's naming convention (because --keep requires the pack's filename to end with ".pack") or to pass the pack through stdin. Of the two, it is simpler to pass the pack through stdin. Thus, teach http to pass --stdin to index-pack. As a bonus, the code is now simpler. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- http.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) (limited to 'http.c') diff --git a/http.c b/http.c index 62aa995245..39cbd56702 100644 --- a/http.c +++ b/http.c @@ -2270,9 +2270,9 @@ int finish_http_pack_request(struct http_pack_request *preq) { struct packed_git **lst; struct packed_git *p = preq->target; - char *tmp_idx; - size_t len; struct child_process ip = CHILD_PROCESS_INIT; + int tmpfile_fd; + int ret = 0; close_pack_index(p); @@ -2284,35 +2284,24 @@ int finish_http_pack_request(struct http_pack_request *preq) lst = &((*lst)->next); *lst = (*lst)->next; - if (!strip_suffix(preq->tmpfile.buf, ".pack.temp", &len)) - BUG("pack tmpfile does not end in .pack.temp?"); - tmp_idx = xstrfmt("%.*s.idx.temp", (int)len, preq->tmpfile.buf); + tmpfile_fd = xopen(preq->tmpfile.buf, O_RDONLY); argv_array_push(&ip.args, "index-pack"); - argv_array_pushl(&ip.args, "-o", tmp_idx, NULL); - argv_array_push(&ip.args, preq->tmpfile.buf); + argv_array_push(&ip.args, "--stdin"); ip.git_cmd = 1; - ip.no_stdin = 1; + ip.in = tmpfile_fd; ip.no_stdout = 1; if (run_command(&ip)) { - unlink(preq->tmpfile.buf); - unlink(tmp_idx); - free(tmp_idx); - return -1; - } - - unlink(sha1_pack_index_name(p->hash)); - - if (finalize_object_file(preq->tmpfile.buf, sha1_pack_name(p->hash)) - || finalize_object_file(tmp_idx, sha1_pack_index_name(p->hash))) { - free(tmp_idx); - return -1; + ret = -1; + goto cleanup; } install_packed_git(the_repository, p); - free(tmp_idx); - return 0; +cleanup: + close(tmpfile_fd); + unlink(preq->tmpfile.buf); + return ret; } struct http_pack_request *new_http_pack_request( -- cgit v1.3 From eb05349247415992644fc63ba0cf0c4821d4eef2 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 10 Jun 2020 13:57:16 -0700 Subject: http: refactor finish_http_pack_request() finish_http_pack_request() does multiple tasks, including some housekeeping on a struct packed_git - (1) closing its index, (2) removing it from a list, and (3) installing it. These concerns are independent of fetching a pack through HTTP: they are there only because (1) the calling code opens the pack's index before deciding to fetch it, (2) the calling code maintains a list of packfiles that can be fetched, and (3) the calling code fetches it in order to make use of its objects in the same process. In preparation for a subsequent commit, which adds a feature that does not need any of this housekeeping, remove (1), (2), and (3) from finish_http_pack_request(). (2) and (3) are now done by a helper function, and (1) is the responsibility of the caller (in this patch, done closer to the point where the pack index is opened). Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- http-push.c | 8 ++++++-- http-walker.c | 5 +++-- http.c | 31 ++++++++++++++++--------------- http.h | 13 ++++++++++--- 4 files changed, 35 insertions(+), 22 deletions(-) (limited to 'http.c') diff --git a/http-push.c b/http-push.c index 822f326599..ac7868ffee 100644 --- a/http-push.c +++ b/http-push.c @@ -117,6 +117,7 @@ enum transfer_state { struct transfer_request { struct object *obj; + struct packed_git *target; char *url; char *dest; struct remote_lock *lock; @@ -314,17 +315,18 @@ static void start_fetch_packed(struct transfer_request *request) release_request(request); return; } + close_pack_index(target); + request->target = target; fprintf(stderr, "Fetching pack %s\n", hash_to_hex(target->hash)); fprintf(stderr, " which contains %s\n", oid_to_hex(&request->obj->oid)); - preq = new_http_pack_request(target, repo->url); + preq = new_http_pack_request(target->hash, repo->url); if (preq == NULL) { repo->can_update_info_refs = 0; return; } - preq->lst = &repo->packs; /* Make sure there isn't another open request for this pack */ while (check_request) { @@ -597,6 +599,8 @@ static void finish_request(struct transfer_request *request) } if (fail) repo->can_update_info_refs = 0; + else + http_install_packfile(request->target, &repo->packs); release_request(request); } } diff --git a/http-walker.c b/http-walker.c index fe15e325fa..4fb1235cd4 100644 --- a/http-walker.c +++ b/http-walker.c @@ -439,6 +439,7 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigne target = find_sha1_pack(sha1, repo->packs); if (!target) return -1; + close_pack_index(target); if (walker->get_verbosely) { fprintf(stderr, "Getting pack %s\n", @@ -447,10 +448,9 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigne hash_to_hex(sha1)); } - preq = new_http_pack_request(target, repo->base); + preq = new_http_pack_request(target->hash, repo->base); if (preq == NULL) goto abort; - preq->lst = &repo->packs; preq->slot->results = &results; if (start_active_slot(preq->slot)) { @@ -469,6 +469,7 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigne release_http_pack_request(preq); if (ret) return ret; + http_install_packfile(target, &repo->packs); return 0; diff --git a/http.c b/http.c index 39cbd56702..4f6e1fb018 100644 --- a/http.c +++ b/http.c @@ -2268,22 +2268,13 @@ void release_http_pack_request(struct http_pack_request *preq) int finish_http_pack_request(struct http_pack_request *preq) { - struct packed_git **lst; - struct packed_git *p = preq->target; struct child_process ip = CHILD_PROCESS_INIT; int tmpfile_fd; int ret = 0; - close_pack_index(p); - fclose(preq->packfile); preq->packfile = NULL; - lst = preq->lst; - while (*lst != p) - lst = &((*lst)->next); - *lst = (*lst)->next; - tmpfile_fd = xopen(preq->tmpfile.buf, O_RDONLY); argv_array_push(&ip.args, "index-pack"); @@ -2297,15 +2288,26 @@ int finish_http_pack_request(struct http_pack_request *preq) goto cleanup; } - install_packed_git(the_repository, p); cleanup: close(tmpfile_fd); unlink(preq->tmpfile.buf); return ret; } +void http_install_packfile(struct packed_git *p, + struct packed_git **list_to_remove_from) +{ + struct packed_git **lst = list_to_remove_from; + + while (*lst != p) + lst = &((*lst)->next); + *lst = (*lst)->next; + + install_packed_git(the_repository, p); +} + struct http_pack_request *new_http_pack_request( - struct packed_git *target, const char *base_url) + const unsigned char *packed_git_hash, const char *base_url) { off_t prev_posn = 0; struct strbuf buf = STRBUF_INIT; @@ -2313,14 +2315,13 @@ struct http_pack_request *new_http_pack_request( preq = xcalloc(1, sizeof(*preq)); strbuf_init(&preq->tmpfile, 0); - preq->target = target; end_url_with_slash(&buf, base_url); strbuf_addf(&buf, "objects/pack/pack-%s.pack", - hash_to_hex(target->hash)); + hash_to_hex(packed_git_hash)); preq->url = strbuf_detach(&buf, NULL); - strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(target->hash)); + strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(packed_git_hash)); preq->packfile = fopen(preq->tmpfile.buf, "a"); if (!preq->packfile) { error("Unable to open local file %s for pack", @@ -2344,7 +2345,7 @@ struct http_pack_request *new_http_pack_request( if (http_is_verbose) fprintf(stderr, "Resuming fetch of pack %s at byte %"PRIuMAX"\n", - hash_to_hex(target->hash), + hash_to_hex(packed_git_hash), (uintmax_t)prev_posn); http_opt_request_remainder(preq->slot->curl, prev_posn); } diff --git a/http.h b/http.h index 5e0ad724f9..bbc6b070f1 100644 --- a/http.h +++ b/http.h @@ -216,18 +216,25 @@ int http_get_info_packs(const char *base_url, struct http_pack_request { char *url; - struct packed_git *target; - struct packed_git **lst; FILE *packfile; struct strbuf tmpfile; struct active_request_slot *slot; }; struct http_pack_request *new_http_pack_request( - struct packed_git *target, const char *base_url); + const unsigned char *packed_git_hash, const char *base_url); int finish_http_pack_request(struct http_pack_request *preq); void release_http_pack_request(struct http_pack_request *preq); +/* + * Remove p from the given list, and invoke install_packed_git() on it. + * + * This is a convenience function for users that have obtained a list of packs + * from http_get_info_packs() and have chosen a specific pack to fetch. + */ +void http_install_packfile(struct packed_git *p, + struct packed_git **list_to_remove_from); + /* Helpers for fetching object */ struct http_object_request { char *url; -- cgit v1.3 From 8d5d2a34df4f82cd9cce913fa25f3a3c2c07d126 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 10 Jun 2020 13:57:18 -0700 Subject: http-fetch: support fetching packfiles by URL Teach http-fetch the ability to download packfiles directly, given a URL, and to verify them. The http_pack_request suite has been augmented with a function that takes a URL directly. With this function, the hash is only used to determine the name of the temporary file. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- Documentation/git-http-fetch.txt | 9 +++++- http-fetch.c | 63 +++++++++++++++++++++++++++++++++------- http.c | 28 +++++++++++++----- http.h | 11 +++++++ t/t5550-http-fetch-dumb.sh | 30 +++++++++++++++++++ 5 files changed, 123 insertions(+), 18 deletions(-) (limited to 'http.c') diff --git a/Documentation/git-http-fetch.txt b/Documentation/git-http-fetch.txt index 666b042679..4deb4893f5 100644 --- a/Documentation/git-http-fetch.txt +++ b/Documentation/git-http-fetch.txt @@ -9,7 +9,7 @@ git-http-fetch - Download from a remote Git repository via HTTP SYNOPSIS -------- [verse] -'git http-fetch' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] [--stdin] +'git http-fetch' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] [--stdin | --packfile= | ] DESCRIPTION ----------- @@ -40,6 +40,13 @@ commit-id:: ['\t'] +--packfile=:: + Instead of a commit id on the command line (which is not expected in + this case), 'git http-fetch' fetches the packfile directly at the given + URL and uses index-pack to generate corresponding .idx and .keep files. + The hash is used to determine the name of the temporary file and is + arbitrary. The output of index-pack is printed to stdout. + --recover:: Verify that everything reachable from target is fetched. Used after an earlier fetch is interrupted. diff --git a/http-fetch.c b/http-fetch.c index e538174bde..1df376e745 100644 --- a/http-fetch.c +++ b/http-fetch.c @@ -5,7 +5,7 @@ #include "walker.h" static const char http_fetch_usage[] = "git http-fetch " -"[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url"; +"[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin | --packfile=hash | commit-id] url"; static int fetch_using_walker(const char *raw_url, int get_verbosely, int get_recover, int commits, char **commit_id, @@ -43,6 +43,37 @@ static int fetch_using_walker(const char *raw_url, int get_verbosely, return rc; } +static void fetch_single_packfile(struct object_id *packfile_hash, + const char *url) { + struct http_pack_request *preq; + struct slot_results results; + int ret; + + http_init(NULL, url, 0); + + preq = new_direct_http_pack_request(packfile_hash->hash, xstrdup(url)); + if (preq == NULL) + die("couldn't create http pack request"); + preq->slot->results = &results; + preq->generate_keep = 1; + + if (start_active_slot(preq->slot)) { + run_active_slot(preq->slot); + if (results.curl_result != CURLE_OK) { + die("Unable to get pack file %s\n%s", preq->url, + curl_errorstr); + } + } else { + die("Unable to start request"); + } + + if ((ret = finish_http_pack_request(preq))) + die("finish_http_pack_request gave result %d", ret); + + release_http_pack_request(preq); + http_cleanup(); +} + int cmd_main(int argc, const char **argv) { int commits_on_stdin = 0; @@ -52,8 +83,12 @@ int cmd_main(int argc, const char **argv) int arg = 1; int get_verbosely = 0; int get_recover = 0; + int packfile = 0; + struct object_id packfile_hash; while (arg < argc && argv[arg][0] == '-') { + const char *p; + if (argv[arg][1] == 't') { } else if (argv[arg][1] == 'c') { } else if (argv[arg][1] == 'a') { @@ -68,25 +103,33 @@ int cmd_main(int argc, const char **argv) get_recover = 1; } else if (!strcmp(argv[arg], "--stdin")) { commits_on_stdin = 1; + } else if (skip_prefix(argv[arg], "--packfile=", &p)) { + const char *end; + + packfile = 1; + if (parse_oid_hex(p, &packfile_hash, &end) || *end) + die(_("argument to --packfile must be a valid hash (got '%s')"), p); } arg++; } - if (argc != arg + 2 - commits_on_stdin) + if (argc != arg + 2 - (commits_on_stdin || packfile)) usage(http_fetch_usage); - if (commits_on_stdin) { - commits = walker_targets_stdin(&commit_id, &write_ref); - } else { - commit_id = (char **) &argv[arg++]; - commits = 1; - } setup_git_directory(); git_config(git_default_config, NULL); - if (!argv[arg]) - BUG("must have one arg remaining"); + if (packfile) { + fetch_single_packfile(&packfile_hash, argv[arg]); + return 0; + } + if (commits_on_stdin) { + commits = walker_targets_stdin(&commit_id, &write_ref); + } else { + commit_id = (char **) &argv[arg++]; + commits = 1; + } return fetch_using_walker(argv[arg], get_verbosely, get_recover, commits, commit_id, write_ref, commits_on_stdin); diff --git a/http.c b/http.c index 4f6e1fb018..3aa0fa9fe6 100644 --- a/http.c +++ b/http.c @@ -2281,7 +2281,13 @@ int finish_http_pack_request(struct http_pack_request *preq) argv_array_push(&ip.args, "--stdin"); ip.git_cmd = 1; ip.in = tmpfile_fd; - ip.no_stdout = 1; + if (preq->generate_keep) { + argv_array_pushf(&ip.args, "--keep=git %"PRIuMAX, + (uintmax_t)getpid()); + ip.out = 0; + } else { + ip.no_stdout = 1; + } if (run_command(&ip)) { ret = -1; @@ -2307,19 +2313,27 @@ void http_install_packfile(struct packed_git *p, } struct http_pack_request *new_http_pack_request( - const unsigned char *packed_git_hash, const char *base_url) + const unsigned char *packed_git_hash, const char *base_url) { + + struct strbuf buf = STRBUF_INIT; + + end_url_with_slash(&buf, base_url); + strbuf_addf(&buf, "objects/pack/pack-%s.pack", + hash_to_hex(packed_git_hash)); + return new_direct_http_pack_request(packed_git_hash, + strbuf_detach(&buf, NULL)); +} + +struct http_pack_request *new_direct_http_pack_request( + const unsigned char *packed_git_hash, char *url) { off_t prev_posn = 0; - struct strbuf buf = STRBUF_INIT; struct http_pack_request *preq; preq = xcalloc(1, sizeof(*preq)); strbuf_init(&preq->tmpfile, 0); - end_url_with_slash(&buf, base_url); - strbuf_addf(&buf, "objects/pack/pack-%s.pack", - hash_to_hex(packed_git_hash)); - preq->url = strbuf_detach(&buf, NULL); + preq->url = url; strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(packed_git_hash)); preq->packfile = fopen(preq->tmpfile.buf, "a"); diff --git a/http.h b/http.h index bbc6b070f1..dc49c60165 100644 --- a/http.h +++ b/http.h @@ -216,6 +216,15 @@ int http_get_info_packs(const char *base_url, struct http_pack_request { char *url; + + /* + * If this is true, finish_http_pack_request() will pass "--keep" to + * index-pack, resulting in the creation of a keep file, and will not + * suppress its stdout (that is, the "keep\t\n" line will be + * printed to stdout). + */ + unsigned generate_keep : 1; + FILE *packfile; struct strbuf tmpfile; struct active_request_slot *slot; @@ -223,6 +232,8 @@ struct http_pack_request { struct http_pack_request *new_http_pack_request( const unsigned char *packed_git_hash, const char *base_url); +struct http_pack_request *new_direct_http_pack_request( + const unsigned char *packed_git_hash, char *url); int finish_http_pack_request(struct http_pack_request *preq); void release_http_pack_request(struct http_pack_request *preq); diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh index 50485300eb..ca2e8af022 100755 --- a/t/t5550-http-fetch-dumb.sh +++ b/t/t5550-http-fetch-dumb.sh @@ -199,6 +199,28 @@ test_expect_success 'fetch packed objects' ' git clone $HTTPD_URL/dumb/repo_pack.git ' +test_expect_success 'http-fetch --packfile' ' + # Arbitrary hash. Use rev-parse so that we get one of the correct + # length. + ARBITRARY=$(git -C "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git rev-parse HEAD) && + + git init packfileclient && + p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git && ls objects/pack/pack-*.pack) && + git -C packfileclient http-fetch --packfile=$ARBITRARY "$HTTPD_URL"/dumb/repo_pack.git/$p >out && + + grep "^keep.[0-9a-f]\{16,\}$" out && + cut -c6- out >packhash && + + # Ensure that the expected files are generated + test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).pack" && + test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).idx" && + test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).keep" && + + # Ensure that it has the HEAD of repo_pack, at least + HASH=$(git -C "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git rev-parse HEAD) && + git -C packfileclient cat-file -e "$HASH" +' + test_expect_success 'fetch notices corrupt pack' ' cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git && (cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git && @@ -214,6 +236,14 @@ test_expect_success 'fetch notices corrupt pack' ' ) ' +test_expect_success 'http-fetch --packfile with corrupt pack' ' + rm -rf packfileclient && + git init packfileclient && + p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git && ls objects/pack/pack-*.pack) && + test_must_fail git -C packfileclient http-fetch --packfile \ + "$HTTPD_URL"/dumb/repo_bad1.git/$p +' + test_expect_success 'fetch notices corrupt idx' ' cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git && (cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git && -- cgit v1.3