From 9a59b65dba0820671753f636e9417bfd63ea20c1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 May 2024 10:47:06 +0200 Subject: reftable/reader: avoid copying index iterator When doing an indexed seek we need to walk down the multi-level index until we finally hit a record of the desired indexed type. This loop performs a copy of the index iterator on every iteration, which is both hard to understand and completely unnecessary. Refactor the code so that we use a single iterator to walk down the indices, only. Note that while this should improve performance, the improvement is negligible in all but the most unreasonable repositories. This is because the effect is only really noticeable when we have to walk down many levels of indices, which is not something that a repository would typically have. So the motivation for this change is really only about readability. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/reader.c | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) (limited to 'reftable/reader.c') diff --git a/reftable/reader.c b/reftable/reader.c index 481dff10d4..6bfadcad71 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -510,13 +510,11 @@ static int reader_seek_indexed(struct reftable_reader *r, .type = BLOCK_TYPE_INDEX, .u.idx = { .last_key = STRBUF_INIT }, }; - struct table_iter index_iter = TABLE_ITER_INIT; - struct table_iter empty = TABLE_ITER_INIT; - struct table_iter next = TABLE_ITER_INIT; + struct table_iter ti = TABLE_ITER_INIT, *malloced; int err = 0; reftable_record_key(rec, &want_index.u.idx.last_key); - err = reader_start(r, &index_iter, reftable_record_type(rec), 1); + err = reader_start(r, &ti, reftable_record_type(rec), 1); if (err < 0) goto done; @@ -526,7 +524,7 @@ static int reader_seek_indexed(struct reftable_reader *r, * highest layer that identifies the relevant index block as well as * the record inside that block that corresponds to our wanted key. */ - err = reader_seek_linear(&index_iter, &want_index); + err = reader_seek_linear(&ti, &want_index); if (err < 0) goto done; @@ -552,44 +550,36 @@ static int reader_seek_indexed(struct reftable_reader *r, * all levels of the index only to find out that the key does * not exist. */ - err = table_iter_next(&index_iter, &index_result); + err = table_iter_next(&ti, &index_result); if (err != 0) goto done; - err = reader_table_iter_at(r, &next, index_result.u.idx.offset, - 0); + err = reader_table_iter_at(r, &ti, index_result.u.idx.offset, 0); if (err != 0) goto done; - err = block_iter_seek_key(&next.bi, &next.br, &want_index.u.idx.last_key); + err = block_iter_seek_key(&ti.bi, &ti.br, &want_index.u.idx.last_key); if (err < 0) goto done; - if (next.typ == reftable_record_type(rec)) { + if (ti.typ == reftable_record_type(rec)) { err = 0; break; } - if (next.typ != BLOCK_TYPE_INDEX) { + if (ti.typ != BLOCK_TYPE_INDEX) { err = REFTABLE_FORMAT_ERROR; - break; + goto done; } - - table_iter_close(&index_iter); - index_iter = next; - next = empty; } - if (err == 0) { - struct table_iter *malloced = reftable_calloc(1, sizeof(*malloced)); - *malloced = next; - next = empty; - iterator_from_table_iter(it, malloced); - } + REFTABLE_ALLOC_ARRAY(malloced, 1); + *malloced = ti; + iterator_from_table_iter(it, malloced); done: - table_iter_close(&next); - table_iter_close(&index_iter); + if (err) + table_iter_close(&ti); reftable_record_release(&want_index); reftable_record_release(&index_result); return err; -- cgit v1.3-5-g9baa From dfdd1455bbb2eed6674964000d037cd213687a33 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 May 2024 10:47:11 +0200 Subject: reftable/reader: unify indexed and linear seeking In `reader_seek_internal()` we either end up doing an indexed seek when there is one or a linear seek otherwise. These two code paths are disjunct without a good reason, where the indexed seek will cause us to exit early. Refactor the two code paths such that it becomes possible to share a bit more code between them. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/reader.c | 42 ++++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 26 deletions(-) (limited to 'reftable/reader.c') diff --git a/reftable/reader.c b/reftable/reader.c index 6bfadcad71..cf7f126d8d 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -425,7 +425,7 @@ static int reader_seek_linear(struct table_iter *ti, struct strbuf want_key = STRBUF_INIT; struct strbuf got_key = STRBUF_INIT; struct reftable_record rec; - int err = -1; + int err; reftable_record_init(&rec, reftable_record_type(want)); reftable_record_key(want, &want_key); @@ -499,8 +499,8 @@ done: return err; } -static int reader_seek_indexed(struct reftable_reader *r, - struct reftable_iterator *it, +static int reader_seek_indexed(struct table_iter *ti, + struct reftable_reader *r, struct reftable_record *rec) { struct reftable_record want_index = { @@ -510,13 +510,9 @@ static int reader_seek_indexed(struct reftable_reader *r, .type = BLOCK_TYPE_INDEX, .u.idx = { .last_key = STRBUF_INIT }, }; - struct table_iter ti = TABLE_ITER_INIT, *malloced; - int err = 0; + int err; reftable_record_key(rec, &want_index.u.idx.last_key); - err = reader_start(r, &ti, reftable_record_type(rec), 1); - if (err < 0) - goto done; /* * The index may consist of multiple levels, where each level may have @@ -524,7 +520,7 @@ static int reader_seek_indexed(struct reftable_reader *r, * highest layer that identifies the relevant index block as well as * the record inside that block that corresponds to our wanted key. */ - err = reader_seek_linear(&ti, &want_index); + err = reader_seek_linear(ti, &want_index); if (err < 0) goto done; @@ -550,36 +546,30 @@ static int reader_seek_indexed(struct reftable_reader *r, * all levels of the index only to find out that the key does * not exist. */ - err = table_iter_next(&ti, &index_result); + err = table_iter_next(ti, &index_result); if (err != 0) goto done; - err = reader_table_iter_at(r, &ti, index_result.u.idx.offset, 0); + err = reader_table_iter_at(r, ti, index_result.u.idx.offset, 0); if (err != 0) goto done; - err = block_iter_seek_key(&ti.bi, &ti.br, &want_index.u.idx.last_key); + err = block_iter_seek_key(&ti->bi, &ti->br, &want_index.u.idx.last_key); if (err < 0) goto done; - if (ti.typ == reftable_record_type(rec)) { + if (ti->typ == reftable_record_type(rec)) { err = 0; break; } - if (ti.typ != BLOCK_TYPE_INDEX) { + if (ti->typ != BLOCK_TYPE_INDEX) { err = REFTABLE_FORMAT_ERROR; goto done; } } - REFTABLE_ALLOC_ARRAY(malloced, 1); - *malloced = ti; - iterator_from_table_iter(it, malloced); - done: - if (err) - table_iter_close(&ti); reftable_record_release(&want_index); reftable_record_release(&index_result); return err; @@ -595,15 +585,15 @@ static int reader_seek_internal(struct reftable_reader *r, struct table_iter ti = TABLE_ITER_INIT, *p; int err; - if (idx > 0) - return reader_seek_indexed(r, it, rec); - - err = reader_start(r, &ti, reftable_record_type(rec), 0); + err = reader_start(r, &ti, reftable_record_type(rec), !!idx); if (err < 0) goto out; - err = reader_seek_linear(&ti, rec); - if (err < 0) + if (idx) + err = reader_seek_indexed(&ti, r, rec); + else + err = reader_seek_linear(&ti, rec); + if (err) goto out; REFTABLE_ALLOC_ARRAY(p, 1); -- cgit v1.3-5-g9baa From 81a03a323664d3d14a25e2ef8b29fe52dcef2126 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 May 2024 10:47:16 +0200 Subject: reftable/reader: separate concerns of table iter and reftable reader In "reftable/reader.c" we implement two different interfaces: - The reftable reader contains the logic to read reftables. - The table iterator is used to iterate through a single reftable read by the reader. The way those two types are used in the code is somewhat confusing though because seeking inside a table is implemented as if it was part of the reftable reader, even though it is ultimately more of a detail implemented by the table iterator. Make the boundary between those two types clearer by renaming functions that seek records in a table such that they clearly belong to the table iterator's logic. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/reader.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'reftable/reader.c') diff --git a/reftable/reader.c b/reftable/reader.c index cf7f126d8d..b210753441 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -386,9 +386,8 @@ static void iterator_from_table_iter(struct reftable_iterator *it, it->ops = &table_iter_vtable; } -static int reader_table_iter_at(struct reftable_reader *r, - struct table_iter *ti, uint64_t off, - uint8_t typ) +static int table_iter_seek_to(struct table_iter *ti, struct reftable_reader *r, + uint64_t off, uint8_t typ) { int err; @@ -403,8 +402,8 @@ static int reader_table_iter_at(struct reftable_reader *r, return 0; } -static int reader_start(struct reftable_reader *r, struct table_iter *ti, - uint8_t typ, int index) +static int table_iter_seek_start(struct table_iter *ti, struct reftable_reader *r, + uint8_t typ, int index) { struct reftable_reader_offsets *offs = reader_offsets_for(r, typ); uint64_t off = offs->offset; @@ -416,11 +415,11 @@ static int reader_start(struct reftable_reader *r, struct table_iter *ti, typ = BLOCK_TYPE_INDEX; } - return reader_table_iter_at(r, ti, off, typ); + return table_iter_seek_to(ti, r, off, typ); } -static int reader_seek_linear(struct table_iter *ti, - struct reftable_record *want) +static int table_iter_seek_linear(struct table_iter *ti, + struct reftable_record *want) { struct strbuf want_key = STRBUF_INIT; struct strbuf got_key = STRBUF_INIT; @@ -499,9 +498,8 @@ done: return err; } -static int reader_seek_indexed(struct table_iter *ti, - struct reftable_reader *r, - struct reftable_record *rec) +static int table_iter_seek_indexed(struct table_iter *ti, + struct reftable_record *rec) { struct reftable_record want_index = { .type = BLOCK_TYPE_INDEX, .u.idx = { .last_key = STRBUF_INIT } @@ -520,7 +518,7 @@ static int reader_seek_indexed(struct table_iter *ti, * highest layer that identifies the relevant index block as well as * the record inside that block that corresponds to our wanted key. */ - err = reader_seek_linear(ti, &want_index); + err = table_iter_seek_linear(ti, &want_index); if (err < 0) goto done; @@ -550,7 +548,7 @@ static int reader_seek_indexed(struct table_iter *ti, if (err != 0) goto done; - err = reader_table_iter_at(r, ti, index_result.u.idx.offset, 0); + err = table_iter_seek_to(ti, ti->r, index_result.u.idx.offset, 0); if (err != 0) goto done; @@ -585,14 +583,14 @@ static int reader_seek_internal(struct reftable_reader *r, struct table_iter ti = TABLE_ITER_INIT, *p; int err; - err = reader_start(r, &ti, reftable_record_type(rec), !!idx); + err = table_iter_seek_start(&ti, r, reftable_record_type(rec), !!idx); if (err < 0) goto out; if (idx) - err = reader_seek_indexed(&ti, r, rec); + err = table_iter_seek_indexed(&ti, rec); else - err = reader_seek_linear(&ti, rec); + err = table_iter_seek_linear(&ti, rec); if (err) goto out; @@ -742,7 +740,7 @@ static int reftable_reader_refs_for_unindexed(struct reftable_reader *r, int err; *ti = ti_empty; - err = reader_start(r, ti, BLOCK_TYPE_REF, 0); + err = table_iter_seek_start(ti, r, BLOCK_TYPE_REF, 0); if (err < 0) { reftable_free(ti); return err; -- cgit v1.3-5-g9baa From f1e3c12196b6c91086b58495499db0f4802fa5d1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 May 2024 10:47:21 +0200 Subject: reftable/reader: inline `reader_seek_internal()` We have both `reader_seek()` and `reader_seek_internal()`, where the former function only exists so that we can exit early in case the given table has no records of the sought-after type. Merge these two functions into one. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/reader.c | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) (limited to 'reftable/reader.c') diff --git a/reftable/reader.c b/reftable/reader.c index b210753441..c3541e2c43 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -573,21 +573,25 @@ done: return err; } -static int reader_seek_internal(struct reftable_reader *r, - struct reftable_iterator *it, - struct reftable_record *rec) +static int reader_seek(struct reftable_reader *r, struct reftable_iterator *it, + struct reftable_record *rec) { - struct reftable_reader_offsets *offs = - reader_offsets_for(r, reftable_record_type(rec)); - uint64_t idx = offs->index_offset; + uint8_t typ = reftable_record_type(rec); + struct reftable_reader_offsets *offs = reader_offsets_for(r, typ); struct table_iter ti = TABLE_ITER_INIT, *p; int err; - err = table_iter_seek_start(&ti, r, reftable_record_type(rec), !!idx); + if (!offs->is_present) { + iterator_set_empty(it); + return 0; + } + + err = table_iter_seek_start(&ti, r, reftable_record_type(rec), + !!offs->index_offset); if (err < 0) goto out; - if (idx) + if (offs->index_offset) err = table_iter_seek_indexed(&ti, rec); else err = table_iter_seek_linear(&ti, rec); @@ -604,20 +608,6 @@ out: return err; } -static int reader_seek(struct reftable_reader *r, struct reftable_iterator *it, - struct reftable_record *rec) -{ - uint8_t typ = reftable_record_type(rec); - - struct reftable_reader_offsets *offs = reader_offsets_for(r, typ); - if (!offs->is_present) { - iterator_set_empty(it); - return 0; - } - - return reader_seek_internal(r, it, rec); -} - int reftable_reader_seek_ref(struct reftable_reader *r, struct reftable_iterator *it, const char *name) { -- cgit v1.3-5-g9baa From c82692f75591b320fbe5f4d2018505bd902f95e6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 May 2024 10:47:26 +0200 Subject: reftable/reader: set up the reader when initializing table iterator All the seeking functions accept a `struct reftable_reader` as input such that they can use the reader to look up the respective blocks. Refactor the code to instead set up the reader as a member of `struct table_iter` during initialization such that we don't have to pass the reader on every single call. This step is required to move seeking of records into the generic `struct reftable_iterator` infrastructure. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/reader.c | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'reftable/reader.c') diff --git a/reftable/reader.c b/reftable/reader.c index c3541e2c43..021608f638 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -224,8 +224,14 @@ struct table_iter { struct block_iter bi; int is_finished; }; -#define TABLE_ITER_INIT { \ - .bi = BLOCK_ITER_INIT \ + +static int table_iter_init(struct table_iter *ti, struct reftable_reader *r) +{ + struct block_iter bi = BLOCK_ITER_INIT; + memset(ti, 0, sizeof(*ti)); + ti->r = r; + ti->bi = bi; + return 0; } static int table_iter_next_in_block(struct table_iter *ti, @@ -386,26 +392,23 @@ static void iterator_from_table_iter(struct reftable_iterator *it, it->ops = &table_iter_vtable; } -static int table_iter_seek_to(struct table_iter *ti, struct reftable_reader *r, - uint64_t off, uint8_t typ) +static int table_iter_seek_to(struct table_iter *ti, uint64_t off, uint8_t typ) { int err; - err = reader_init_block_reader(r, &ti->br, off, typ); + err = reader_init_block_reader(ti->r, &ti->br, off, typ); if (err != 0) return err; - ti->r = r; ti->typ = block_reader_type(&ti->br); ti->block_off = off; block_iter_seek_start(&ti->bi, &ti->br); return 0; } -static int table_iter_seek_start(struct table_iter *ti, struct reftable_reader *r, - uint8_t typ, int index) +static int table_iter_seek_start(struct table_iter *ti, uint8_t typ, int index) { - struct reftable_reader_offsets *offs = reader_offsets_for(r, typ); + struct reftable_reader_offsets *offs = reader_offsets_for(ti->r, typ); uint64_t off = offs->offset; if (index) { off = offs->index_offset; @@ -415,7 +418,7 @@ static int table_iter_seek_start(struct table_iter *ti, struct reftable_reader * typ = BLOCK_TYPE_INDEX; } - return table_iter_seek_to(ti, r, off, typ); + return table_iter_seek_to(ti, off, typ); } static int table_iter_seek_linear(struct table_iter *ti, @@ -548,7 +551,7 @@ static int table_iter_seek_indexed(struct table_iter *ti, if (err != 0) goto done; - err = table_iter_seek_to(ti, ti->r, index_result.u.idx.offset, 0); + err = table_iter_seek_to(ti, index_result.u.idx.offset, 0); if (err != 0) goto done; @@ -578,7 +581,7 @@ static int reader_seek(struct reftable_reader *r, struct reftable_iterator *it, { uint8_t typ = reftable_record_type(rec); struct reftable_reader_offsets *offs = reader_offsets_for(r, typ); - struct table_iter ti = TABLE_ITER_INIT, *p; + struct table_iter ti, *p; int err; if (!offs->is_present) { @@ -586,7 +589,9 @@ static int reader_seek(struct reftable_reader *r, struct reftable_iterator *it, return 0; } - err = table_iter_seek_start(&ti, r, reftable_record_type(rec), + table_iter_init(&ti, r); + + err = table_iter_seek_start(&ti, reftable_record_type(rec), !!offs->index_offset); if (err < 0) goto out; @@ -722,15 +727,15 @@ static int reftable_reader_refs_for_unindexed(struct reftable_reader *r, struct reftable_iterator *it, uint8_t *oid) { - struct table_iter ti_empty = TABLE_ITER_INIT; - struct table_iter *ti = reftable_calloc(1, sizeof(*ti)); + struct table_iter *ti; struct filtering_ref_iterator *filter = NULL; struct filtering_ref_iterator empty = FILTERING_REF_ITERATOR_INIT; int oid_len = hash_size(r->hash_id); int err; - *ti = ti_empty; - err = table_iter_seek_start(ti, r, BLOCK_TYPE_REF, 0); + REFTABLE_ALLOC_ARRAY(ti, 1); + table_iter_init(ti, r); + err = table_iter_seek_start(ti, BLOCK_TYPE_REF, 0); if (err < 0) { reftable_free(ti); return err; -- cgit v1.3-5-g9baa From 5bf96e0c393827481afab55d3b73ae09ec1ea0de Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 May 2024 10:47:41 +0200 Subject: reftable/generic: move seeking of records into the iterator Reftable iterators are created by seeking on the parent structure of a corresponding record. For example, to create an iterator for the merged table you would call `reftable_merged_table_seek_ref()`. Most notably, it is not posible to create an iterator and then seek it afterwards. While this may be a bit easier to reason about, it comes with two significant downsides. The first downside is that the logic to find records is split up between the parent data structure and the iterator itself. Conceptually, it is more straight forward if all that logic was contained in a single place, which should be the iterator. The second and more significant downside is that it is impossible to reuse iterators for multiple seeks. Whenever you want to look up a record, you need to re-create the whole infrastructure again, which is quite a waste of time. Furthermore, it is impossible to optimize seeks, such as when seeking the same record multiple times. To address this, we essentially split up the concerns properly such that the parent data structure is responsible for setting up the iterator via a new `init_iter()` callback, whereas the iterator handles seeks via a new `seek()` callback. This will eventually allow us to call `seek()` on the iterator multiple times, where every iterator can potentially optimize for certain cases. Note that at this point in time we are not yet ready to reuse the iterators. This will be left for a future patch series. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/generic.c | 47 +++++++++++++++----- reftable/generic.h | 9 +++- reftable/iter.c | 15 +++++++ reftable/merged.c | 97 +++++++++++++++++++++-------------------- reftable/reader.c | 126 +++++++++++++++++++++++++++++------------------------ 5 files changed, 177 insertions(+), 117 deletions(-) (limited to 'reftable/reader.c') diff --git a/reftable/generic.c b/reftable/generic.c index b9f1c7c18a..1cf68fe124 100644 --- a/reftable/generic.c +++ b/reftable/generic.c @@ -12,25 +12,39 @@ https://developers.google.com/open-source/licenses/bsd #include "reftable-iterator.h" #include "reftable-generic.h" +void table_init_iter(struct reftable_table *tab, + struct reftable_iterator *it, + uint8_t typ) +{ + + tab->ops->init_iter(tab->table_arg, it, typ); +} + int reftable_table_seek_ref(struct reftable_table *tab, struct reftable_iterator *it, const char *name) { - struct reftable_record rec = { .type = BLOCK_TYPE_REF, - .u.ref = { - .refname = (char *)name, - } }; - return tab->ops->seek_record(tab->table_arg, it, &rec); + struct reftable_record want = { + .type = BLOCK_TYPE_REF, + .u.ref = { + .refname = (char *)name, + }, + }; + table_init_iter(tab, it, BLOCK_TYPE_REF); + return it->ops->seek(it->iter_arg, &want); } int reftable_table_seek_log(struct reftable_table *tab, struct reftable_iterator *it, const char *name) { - struct reftable_record rec = { .type = BLOCK_TYPE_LOG, - .u.log = { - .refname = (char *)name, - .update_index = ~((uint64_t)0), - } }; - return tab->ops->seek_record(tab->table_arg, it, &rec); + struct reftable_record want = { + .type = BLOCK_TYPE_LOG, + .u.log = { + .refname = (char *)name, + .update_index = ~((uint64_t)0), + }, + }; + table_init_iter(tab, it, BLOCK_TYPE_LOG); + return it->ops->seek(it->iter_arg, &want); } int reftable_table_read_ref(struct reftable_table *tab, const char *name, @@ -152,11 +166,21 @@ int reftable_iterator_next_log(struct reftable_iterator *it, return err; } +int iterator_seek(struct reftable_iterator *it, struct reftable_record *want) +{ + return it->ops->seek(it->iter_arg, want); +} + int iterator_next(struct reftable_iterator *it, struct reftable_record *rec) { return it->ops->next(it->iter_arg, rec); } +static int empty_iterator_seek(void *arg, struct reftable_record *want) +{ + return 0; +} + static int empty_iterator_next(void *arg, struct reftable_record *rec) { return 1; @@ -167,6 +191,7 @@ static void empty_iterator_close(void *arg) } static struct reftable_iterator_vtable empty_vtable = { + .seek = &empty_iterator_seek, .next = &empty_iterator_next, .close = &empty_iterator_close, }; diff --git a/reftable/generic.h b/reftable/generic.h index 98886a0640..8341fa570e 100644 --- a/reftable/generic.h +++ b/reftable/generic.h @@ -14,19 +14,24 @@ https://developers.google.com/open-source/licenses/bsd /* generic interface to reftables */ struct reftable_table_vtable { - int (*seek_record)(void *tab, struct reftable_iterator *it, - struct reftable_record *); + void (*init_iter)(void *tab, struct reftable_iterator *it, uint8_t typ); uint32_t (*hash_id)(void *tab); uint64_t (*min_update_index)(void *tab); uint64_t (*max_update_index)(void *tab); }; +void table_init_iter(struct reftable_table *tab, + struct reftable_iterator *it, + uint8_t typ); + struct reftable_iterator_vtable { + int (*seek)(void *iter_arg, struct reftable_record *want); int (*next)(void *iter_arg, struct reftable_record *rec); void (*close)(void *iter_arg); }; void iterator_set_empty(struct reftable_iterator *it); +int iterator_seek(struct reftable_iterator *it, struct reftable_record *want); int iterator_next(struct reftable_iterator *it, struct reftable_record *rec); #endif diff --git a/reftable/iter.c b/reftable/iter.c index aa9ac199b1..b4528fab47 100644 --- a/reftable/iter.c +++ b/reftable/iter.c @@ -23,6 +23,13 @@ static void filtering_ref_iterator_close(void *iter_arg) reftable_iterator_destroy(&fri->it); } +static int filtering_ref_iterator_seek(void *iter_arg, + struct reftable_record *want) +{ + struct filtering_ref_iterator *fri = iter_arg; + return iterator_seek(&fri->it, want); +} + static int filtering_ref_iterator_next(void *iter_arg, struct reftable_record *rec) { @@ -73,6 +80,7 @@ static int filtering_ref_iterator_next(void *iter_arg, } static struct reftable_iterator_vtable filtering_ref_iterator_vtable = { + .seek = &filtering_ref_iterator_seek, .next = &filtering_ref_iterator_next, .close = &filtering_ref_iterator_close, }; @@ -119,6 +127,12 @@ static int indexed_table_ref_iter_next_block(struct indexed_table_ref_iter *it) return 0; } +static int indexed_table_ref_iter_seek(void *p, struct reftable_record *want) +{ + BUG("seeking indexed table is not supported"); + return -1; +} + static int indexed_table_ref_iter_next(void *p, struct reftable_record *rec) { struct indexed_table_ref_iter *it = p; @@ -175,6 +189,7 @@ int new_indexed_table_ref_iter(struct indexed_table_ref_iter **dest, } static struct reftable_iterator_vtable indexed_table_ref_iter_vtable = { + .seek = &indexed_table_ref_iter_seek, .next = &indexed_table_ref_iter_next, .close = &indexed_table_ref_iter_close, }; diff --git a/reftable/merged.c b/reftable/merged.c index 18a2a6f09b..fc7946d90d 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -31,12 +31,18 @@ struct merged_iter { }; static void merged_iter_init(struct merged_iter *mi, - struct reftable_merged_table *mt) + struct reftable_merged_table *mt, + uint8_t typ) { memset(mi, 0, sizeof(*mi)); mi->advance_index = -1; mi->suppress_deletions = mt->suppress_deletions; + REFTABLE_CALLOC_ARRAY(mi->subiters, mt->stack_len); + for (size_t i = 0; i < mt->stack_len; i++) { + reftable_record_init(&mi->subiters[i].rec, typ); + table_init_iter(&mt->stack[i], &mi->subiters[i].iter, typ); + } mi->stack_len = mt->stack_len; } @@ -68,6 +74,27 @@ static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx) return 0; } +static int merged_iter_seek(struct merged_iter *mi, struct reftable_record *want) +{ + int err; + + mi->advance_index = -1; + + for (size_t i = 0; i < mi->stack_len; i++) { + err = iterator_seek(&mi->subiters[i].iter, want); + if (err < 0) + return err; + if (err > 0) + continue; + + err = merged_iter_advance_subiter(mi, i); + if (err < 0) + return err; + } + + return 0; +} + static int merged_iter_next_entry(struct merged_iter *mi, struct reftable_record *rec) { @@ -133,6 +160,11 @@ static int merged_iter_next_entry(struct merged_iter *mi, return 0; } +static int merged_iter_seek_void(void *it, struct reftable_record *want) +{ + return merged_iter_seek(it, want); +} + static int merged_iter_next_void(void *p, struct reftable_record *rec) { struct merged_iter *mi = p; @@ -147,6 +179,7 @@ static int merged_iter_next_void(void *p, struct reftable_record *rec) } static struct reftable_iterator_vtable merged_iter_vtable = { + .seek = merged_iter_seek_void, .next = &merged_iter_next_void, .close = &merged_iter_close, }; @@ -220,47 +253,13 @@ reftable_merged_table_min_update_index(struct reftable_merged_table *mt) return mt->min; } -static int reftable_table_seek_record(struct reftable_table *tab, - struct reftable_iterator *it, - struct reftable_record *rec) -{ - return tab->ops->seek_record(tab->table_arg, it, rec); -} - -static int merged_table_seek_record(struct reftable_merged_table *mt, - struct reftable_iterator *it, - struct reftable_record *rec) +static void merged_table_init_iter(struct reftable_merged_table *mt, + struct reftable_iterator *it, + uint8_t typ) { - struct merged_iter merged, *p; - int err; - - merged_iter_init(&merged, mt); - - for (size_t i = 0; i < mt->stack_len; i++) { - reftable_record_init(&merged.subiters[i].rec, - reftable_record_type(rec)); - - err = reftable_table_seek_record(&mt->stack[i], - &merged.subiters[i].iter, rec); - if (err < 0) - goto out; - if (err > 0) - continue; - - err = merged_iter_advance_subiter(&merged, i); - if (err < 0) - goto out; - } - - p = reftable_malloc(sizeof(*p)); - *p = merged; - iterator_from_merged_iter(it, p); - err = 0; - -out: - if (err < 0) - merged_iter_close(&merged); - return err; + struct merged_iter *mi = reftable_malloc(sizeof(*mi)); + merged_iter_init(mi, mt, typ); + iterator_from_merged_iter(it, mi); } int reftable_merged_table_seek_ref(struct reftable_merged_table *mt, @@ -273,7 +272,8 @@ int reftable_merged_table_seek_ref(struct reftable_merged_table *mt, .refname = (char *)name, }, }; - return merged_table_seek_record(mt, it, &rec); + merged_table_init_iter(mt, it, BLOCK_TYPE_REF); + return iterator_seek(it, &rec); } int reftable_merged_table_seek_log_at(struct reftable_merged_table *mt, @@ -285,7 +285,8 @@ int reftable_merged_table_seek_log_at(struct reftable_merged_table *mt, .refname = (char *)name, .update_index = update_index, } }; - return merged_table_seek_record(mt, it, &rec); + merged_table_init_iter(mt, it, BLOCK_TYPE_LOG); + return iterator_seek(it, &rec); } int reftable_merged_table_seek_log(struct reftable_merged_table *mt, @@ -301,11 +302,11 @@ uint32_t reftable_merged_table_hash_id(struct reftable_merged_table *mt) return mt->hash_id; } -static int reftable_merged_table_seek_void(void *tab, - struct reftable_iterator *it, - struct reftable_record *rec) +static void reftable_merged_table_init_iter_void(void *tab, + struct reftable_iterator *it, + uint8_t typ) { - return merged_table_seek_record(tab, it, rec); + merged_table_init_iter(tab, it, typ); } static uint32_t reftable_merged_table_hash_id_void(void *tab) @@ -324,7 +325,7 @@ static uint64_t reftable_merged_table_max_update_index_void(void *tab) } static struct reftable_table_vtable merged_table_vtable = { - .seek_record = reftable_merged_table_seek_void, + .init_iter = reftable_merged_table_init_iter_void, .hash_id = reftable_merged_table_hash_id_void, .min_update_index = reftable_merged_table_min_update_index_void, .max_update_index = reftable_merged_table_max_update_index_void, diff --git a/reftable/reader.c b/reftable/reader.c index 021608f638..a5a13cb0b9 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -369,29 +369,6 @@ static int table_iter_next(struct table_iter *ti, struct reftable_record *rec) } } -static int table_iter_next_void(void *ti, struct reftable_record *rec) -{ - return table_iter_next(ti, rec); -} - -static void table_iter_close_void(void *ti) -{ - table_iter_close(ti); -} - -static struct reftable_iterator_vtable table_iter_vtable = { - .next = &table_iter_next_void, - .close = &table_iter_close_void, -}; - -static void iterator_from_table_iter(struct reftable_iterator *it, - struct table_iter *ti) -{ - assert(!it->ops); - it->iter_arg = ti; - it->ops = &table_iter_vtable; -} - static int table_iter_seek_to(struct table_iter *ti, uint64_t off, uint8_t typ) { int err; @@ -576,43 +553,74 @@ done: return err; } -static int reader_seek(struct reftable_reader *r, struct reftable_iterator *it, - struct reftable_record *rec) +static int table_iter_seek(struct table_iter *ti, + struct reftable_record *want) { - uint8_t typ = reftable_record_type(rec); - struct reftable_reader_offsets *offs = reader_offsets_for(r, typ); - struct table_iter ti, *p; + uint8_t typ = reftable_record_type(want); + struct reftable_reader_offsets *offs = reader_offsets_for(ti->r, typ); int err; - if (!offs->is_present) { - iterator_set_empty(it); - return 0; - } - - table_iter_init(&ti, r); - - err = table_iter_seek_start(&ti, reftable_record_type(rec), + err = table_iter_seek_start(ti, reftable_record_type(want), !!offs->index_offset); if (err < 0) goto out; if (offs->index_offset) - err = table_iter_seek_indexed(&ti, rec); + err = table_iter_seek_indexed(ti, want); else - err = table_iter_seek_linear(&ti, rec); + err = table_iter_seek_linear(ti, want); if (err) goto out; - REFTABLE_ALLOC_ARRAY(p, 1); - *p = ti; - iterator_from_table_iter(it, p); - out: - if (err) - table_iter_close(&ti); return err; } +static int table_iter_seek_void(void *ti, struct reftable_record *want) +{ + return table_iter_seek(ti, want); +} + +static int table_iter_next_void(void *ti, struct reftable_record *rec) +{ + return table_iter_next(ti, rec); +} + +static void table_iter_close_void(void *ti) +{ + table_iter_close(ti); +} + +static struct reftable_iterator_vtable table_iter_vtable = { + .seek = &table_iter_seek_void, + .next = &table_iter_next_void, + .close = &table_iter_close_void, +}; + +static void iterator_from_table_iter(struct reftable_iterator *it, + struct table_iter *ti) +{ + assert(!it->ops); + it->iter_arg = ti; + it->ops = &table_iter_vtable; +} + +static void reader_init_iter(struct reftable_reader *r, + struct reftable_iterator *it, + uint8_t typ) +{ + struct reftable_reader_offsets *offs = reader_offsets_for(r, typ); + + if (offs->is_present) { + struct table_iter *ti; + REFTABLE_ALLOC_ARRAY(ti, 1); + table_iter_init(ti, r); + iterator_from_table_iter(it, ti); + } else { + iterator_set_empty(it); + } +} + int reftable_reader_seek_ref(struct reftable_reader *r, struct reftable_iterator *it, const char *name) { @@ -622,19 +630,23 @@ int reftable_reader_seek_ref(struct reftable_reader *r, .refname = (char *)name, }, }; - return reader_seek(r, it, &rec); + reader_init_iter(r, it, BLOCK_TYPE_REF); + return iterator_seek(it, &rec); } int reftable_reader_seek_log_at(struct reftable_reader *r, struct reftable_iterator *it, const char *name, uint64_t update_index) { - struct reftable_record rec = { .type = BLOCK_TYPE_LOG, - .u.log = { - .refname = (char *)name, - .update_index = update_index, - } }; - return reader_seek(r, it, &rec); + struct reftable_record rec = { + .type = BLOCK_TYPE_LOG, + .u.log = { + .refname = (char *)name, + .update_index = update_index, + }, + }; + reader_init_iter(r, it, BLOCK_TYPE_LOG); + return iterator_seek(it, &rec); } int reftable_reader_seek_log(struct reftable_reader *r, @@ -692,7 +704,8 @@ static int reftable_reader_refs_for_indexed(struct reftable_reader *r, struct indexed_table_ref_iter *itr = NULL; /* Look through the reverse index. */ - err = reader_seek(r, &oit, &want); + reader_init_iter(r, &oit, BLOCK_TYPE_OBJ); + err = iterator_seek(&oit, &want); if (err != 0) goto done; @@ -773,10 +786,11 @@ uint64_t reftable_reader_min_update_index(struct reftable_reader *r) /* generic table interface. */ -static int reftable_reader_seek_void(void *tab, struct reftable_iterator *it, - struct reftable_record *rec) +static void reftable_reader_init_iter_void(void *tab, + struct reftable_iterator *it, + uint8_t typ) { - return reader_seek(tab, it, rec); + reader_init_iter(tab, it, typ); } static uint32_t reftable_reader_hash_id_void(void *tab) @@ -795,7 +809,7 @@ static uint64_t reftable_reader_max_update_index_void(void *tab) } static struct reftable_table_vtable reader_vtable = { - .seek_record = reftable_reader_seek_void, + .init_iter = reftable_reader_init_iter_void, .hash_id = reftable_reader_hash_id_void, .min_update_index = reftable_reader_min_update_index_void, .max_update_index = reftable_reader_max_update_index_void, -- cgit v1.3-5-g9baa From 0e7be2b3ea444fc5375e76e42d81b1e1d3d4971f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 May 2024 10:47:52 +0200 Subject: reftable/reader: adapt interface to allow reuse of iterators Refactor the interfaces exposed by `struct reftable_reader` and `struct table_iterator` such that they support iterator reuse. This is done by separating initialization of the iterator and seeking on it. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/reader.c | 31 ++++--------------------------- reftable/readwrite_test.c | 35 ++++++++++++++++++++++++----------- reftable/reftable-reader.h | 45 +++++++-------------------------------------- 3 files changed, 35 insertions(+), 76 deletions(-) (limited to 'reftable/reader.c') diff --git a/reftable/reader.c b/reftable/reader.c index a5a13cb0b9..bbdb4bdafa 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -621,39 +621,16 @@ static void reader_init_iter(struct reftable_reader *r, } } -int reftable_reader_seek_ref(struct reftable_reader *r, - struct reftable_iterator *it, const char *name) +void reftable_reader_init_ref_iterator(struct reftable_reader *r, + struct reftable_iterator *it) { - struct reftable_record rec = { - .type = BLOCK_TYPE_REF, - .u.ref = { - .refname = (char *)name, - }, - }; reader_init_iter(r, it, BLOCK_TYPE_REF); - return iterator_seek(it, &rec); } -int reftable_reader_seek_log_at(struct reftable_reader *r, - struct reftable_iterator *it, const char *name, - uint64_t update_index) +void reftable_reader_init_log_iterator(struct reftable_reader *r, + struct reftable_iterator *it) { - struct reftable_record rec = { - .type = BLOCK_TYPE_LOG, - .u.log = { - .refname = (char *)name, - .update_index = update_index, - }, - }; reader_init_iter(r, it, BLOCK_TYPE_LOG); - return iterator_seek(it, &rec); -} - -int reftable_reader_seek_log(struct reftable_reader *r, - struct reftable_iterator *it, const char *name) -{ - uint64_t max = ~((uint64_t)0); - return reftable_reader_seek_log_at(r, it, name, max); } void reader_close(struct reftable_reader *r) diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c index a6dbd214c5..d99543bbd6 100644 --- a/reftable/readwrite_test.c +++ b/reftable/readwrite_test.c @@ -239,7 +239,9 @@ static void test_log_write_read(void) err = init_reader(&rd, &source, "file.log"); EXPECT_ERR(err); - err = reftable_reader_seek_ref(&rd, &it, names[N - 1]); + reftable_reader_init_ref_iterator(&rd, &it); + + err = reftable_iterator_seek_ref(&it, names[N - 1]); EXPECT_ERR(err); err = reftable_iterator_next_ref(&it, &ref); @@ -252,7 +254,9 @@ static void test_log_write_read(void) reftable_iterator_destroy(&it); reftable_ref_record_release(&ref); - err = reftable_reader_seek_log(&rd, &it, ""); + reftable_reader_init_log_iterator(&rd, &it); + + err = reftable_iterator_seek_log(&it, ""); EXPECT_ERR(err); i = 0; @@ -330,7 +334,8 @@ static void test_log_zlib_corruption(void) err = init_reader(&rd, &source, "file.log"); EXPECT_ERR(err); - err = reftable_reader_seek_log(&rd, &it, "refname"); + reftable_reader_init_log_iterator(&rd, &it); + err = reftable_iterator_seek_log(&it, "refname"); EXPECT(err == REFTABLE_ZLIB_ERROR); reftable_iterator_destroy(&it); @@ -358,7 +363,8 @@ static void test_table_read_write_sequential(void) err = init_reader(&rd, &source, "file.ref"); EXPECT_ERR(err); - err = reftable_reader_seek_ref(&rd, &it, ""); + reftable_reader_init_ref_iterator(&rd, &it); + err = reftable_iterator_seek_ref(&it, ""); EXPECT_ERR(err); while (1) { @@ -412,7 +418,8 @@ static void test_table_read_api(void) err = init_reader(&rd, &source, "file.ref"); EXPECT_ERR(err); - err = reftable_reader_seek_ref(&rd, &it, names[0]); + reftable_reader_init_ref_iterator(&rd, &it); + err = reftable_iterator_seek_ref(&it, names[0]); EXPECT_ERR(err); err = reftable_iterator_next_log(&it, &log); @@ -457,7 +464,8 @@ static void test_table_read_write_seek(int index, int hash_id) } for (i = 1; i < N; i++) { - int err = reftable_reader_seek_ref(&rd, &it, names[i]); + reftable_reader_init_ref_iterator(&rd, &it); + err = reftable_iterator_seek_ref(&it, names[i]); EXPECT_ERR(err); err = reftable_iterator_next_ref(&it, &ref); EXPECT_ERR(err); @@ -472,7 +480,8 @@ static void test_table_read_write_seek(int index, int hash_id) strbuf_addstr(&pastLast, names[N - 1]); strbuf_addstr(&pastLast, "/"); - err = reftable_reader_seek_ref(&rd, &it, pastLast.buf); + reftable_reader_init_ref_iterator(&rd, &it); + err = reftable_iterator_seek_ref(&it, pastLast.buf); if (err == 0) { struct reftable_ref_record ref = { NULL }; int err = reftable_iterator_next_ref(&it, &ref); @@ -576,7 +585,8 @@ static void test_table_refs_for(int indexed) rd.obj_offsets.is_present = 0; } - err = reftable_reader_seek_ref(&rd, &it, ""); + reftable_reader_init_ref_iterator(&rd, &it); + err = reftable_iterator_seek_ref(&it, ""); EXPECT_ERR(err); reftable_iterator_destroy(&it); @@ -639,7 +649,8 @@ static void test_write_empty_table(void) err = reftable_new_reader(&rd, &source, "filename"); EXPECT_ERR(err); - err = reftable_reader_seek_ref(rd, &it, ""); + reftable_reader_init_ref_iterator(rd, &it); + err = reftable_iterator_seek_ref(&it, ""); EXPECT_ERR(err); err = reftable_iterator_next_ref(&it, &rec); @@ -846,7 +857,8 @@ static void test_write_multiple_indices(void) * Seeking the log uses the log index now. In case there is any * confusion regarding indices we would notice here. */ - err = reftable_reader_seek_log(reader, &it, ""); + reftable_reader_init_log_iterator(reader, &it); + err = reftable_iterator_seek_log(&it, ""); EXPECT_ERR(err); reftable_iterator_destroy(&it); @@ -901,7 +913,8 @@ static void test_write_multi_level_index(void) /* * Seeking the last ref should work as expected. */ - err = reftable_reader_seek_ref(reader, &it, "refs/heads/199"); + reftable_reader_init_ref_iterator(reader, &it); + err = reftable_iterator_seek_ref(&it, "refs/heads/199"); EXPECT_ERR(err); reftable_iterator_destroy(&it); diff --git a/reftable/reftable-reader.h b/reftable/reftable-reader.h index 4a4bc2fdf8..52e4942b7b 100644 --- a/reftable/reftable-reader.h +++ b/reftable/reftable-reader.h @@ -36,48 +36,17 @@ struct reftable_table; int reftable_new_reader(struct reftable_reader **pp, struct reftable_block_source *src, const char *name); -/* reftable_reader_seek_ref returns an iterator where 'name' would be inserted - in the table. To seek to the start of the table, use name = "". - - example: - - struct reftable_reader *r = NULL; - int err = reftable_new_reader(&r, &src, "filename"); - if (err < 0) { ... } - struct reftable_iterator it = {0}; - err = reftable_reader_seek_ref(r, &it, "refs/heads/master"); - if (err < 0) { ... } - struct reftable_ref_record ref = {0}; - while (1) { - err = reftable_iterator_next_ref(&it, &ref); - if (err > 0) { - break; - } - if (err < 0) { - ..error handling.. - } - ..found.. - } - reftable_iterator_destroy(&it); - reftable_ref_record_release(&ref); -*/ -int reftable_reader_seek_ref(struct reftable_reader *r, - struct reftable_iterator *it, const char *name); +/* Initialize a reftable iterator for reading refs. */ +void reftable_reader_init_ref_iterator(struct reftable_reader *r, + struct reftable_iterator *it); + +/* Initialize a reftable iterator for reading logs. */ +void reftable_reader_init_log_iterator(struct reftable_reader *r, + struct reftable_iterator *it); /* returns the hash ID used in this table. */ uint32_t reftable_reader_hash_id(struct reftable_reader *r); -/* seek to logs for the given name, older than update_index. To seek to the - start of the table, use name = "". -*/ -int reftable_reader_seek_log_at(struct reftable_reader *r, - struct reftable_iterator *it, const char *name, - uint64_t update_index); - -/* seek to newest log entry for given name. */ -int reftable_reader_seek_log(struct reftable_reader *r, - struct reftable_iterator *it, const char *name); - /* closes and deallocates a reader. */ void reftable_reader_free(struct reftable_reader *); -- cgit v1.3-5-g9baa