From a155ab2bf4e16b8fe1e0644d9d57d2c62eecd452 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 8 Apr 2024 14:24:35 +0200 Subject: reftable/block: reuse zstream when writing log blocks While most reftable blocks are written to disk as-is, blocks for log records are compressed with zlib. To compress them we use `compress2()`, which is a simple wrapper around the more complex `zstream` interface that would require multiple function invocations. One downside of this interface is that `compress2()` will reallocate internal state of the `zstream` interface on every single invocation. Consequently, as we call `compress2()` for every single log block which we are about to write, this can lead to quite some memory allocation churn. Refactor the code so that the block writer reuses a `zstream`. This significantly reduces the number of bytes allocated when writing many refs in a single transaction, as demonstrated by the following benchmark that writes 100k refs in a single transaction. Before: HEAP SUMMARY: in use at exit: 671,931 bytes in 151 blocks total heap usage: 22,631,887 allocs, 22,631,736 frees, 1,854,670,793 bytes allocated After: HEAP SUMMARY: in use at exit: 671,931 bytes in 151 blocks total heap usage: 22,620,528 allocs, 22,620,377 frees, 1,245,549,984 bytes allocated Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/block.c | 80 ++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 28 deletions(-) (limited to 'reftable/block.c') diff --git a/reftable/block.c b/reftable/block.c index e2a2cee58d..9129305515 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -76,6 +76,10 @@ void block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf, bw->entries = 0; bw->restart_len = 0; bw->last_key.len = 0; + if (!bw->zstream) { + REFTABLE_CALLOC_ARRAY(bw->zstream, 1); + deflateInit(bw->zstream, 9); + } } uint8_t block_writer_type(struct block_writer *bw) @@ -139,39 +143,57 @@ int block_writer_finish(struct block_writer *w) w->next += 2; put_be24(w->buf + 1 + w->header_off, w->next); + /* + * Log records are stored zlib-compressed. Note that the compression + * also spans over the restart points we have just written. + */ if (block_writer_type(w) == BLOCK_TYPE_LOG) { int block_header_skip = 4 + w->header_off; - uLongf src_len = w->next - block_header_skip; - uLongf dest_cap = src_len * 1.001 + 12; - uint8_t *compressed; - - REFTABLE_ALLOC_ARRAY(compressed, dest_cap); - - while (1) { - uLongf out_dest_len = dest_cap; - int zresult = compress2(compressed, &out_dest_len, - w->buf + block_header_skip, - src_len, 9); - if (zresult == Z_BUF_ERROR && dest_cap < LONG_MAX) { - dest_cap *= 2; - compressed = - reftable_realloc(compressed, dest_cap); - if (compressed) - continue; - } - - if (Z_OK != zresult) { - reftable_free(compressed); - return REFTABLE_ZLIB_ERROR; - } - - memcpy(w->buf + block_header_skip, compressed, - out_dest_len); - w->next = out_dest_len + block_header_skip; + uLongf src_len = w->next - block_header_skip, compressed_len; + unsigned char *compressed; + int ret; + + ret = deflateReset(w->zstream); + if (ret != Z_OK) + return REFTABLE_ZLIB_ERROR; + + /* + * Precompute the upper bound of how many bytes the compressed + * data may end up with. Combined with `Z_FINISH`, `deflate()` + * is guaranteed to return `Z_STREAM_END`. + */ + compressed_len = deflateBound(w->zstream, src_len); + REFTABLE_ALLOC_ARRAY(compressed, compressed_len); + + w->zstream->next_out = compressed; + w->zstream->avail_out = compressed_len; + w->zstream->next_in = w->buf + block_header_skip; + w->zstream->avail_in = src_len; + + /* + * We want to perform all decompression in a single step, which + * is why we can pass Z_FINISH here. As we have precomputed the + * deflated buffer's size via `deflateBound()` this function is + * guaranteed to succeed according to the zlib documentation. + */ + ret = deflate(w->zstream, Z_FINISH); + if (ret != Z_STREAM_END) { reftable_free(compressed); - break; + return REFTABLE_ZLIB_ERROR; } + + /* + * Overwrite the uncompressed data we have already written and + * adjust the `next` pointer to point right after the + * compressed data. + */ + memcpy(w->buf + block_header_skip, compressed, + w->zstream->total_out); + w->next = w->zstream->total_out + block_header_skip; + + reftable_free(compressed); } + return w->next; } @@ -425,6 +447,8 @@ done: void block_writer_release(struct block_writer *bw) { + deflateEnd(bw->zstream); + FREE_AND_NULL(bw->zstream); FREE_AND_NULL(bw->restarts); strbuf_release(&bw->last_key); /* the block is not owned. */ -- cgit v1.3-5-g9baa From fa74f32291d0e2b642345d25e16724567f9b881f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 8 Apr 2024 14:24:40 +0200 Subject: reftable/block: reuse compressed array Similar to the preceding commit, let's reuse the `compressed` array that we use to store compressed data in. This results in a small reduction in memory allocations when writing many refs. Before: HEAP SUMMARY: in use at exit: 671,931 bytes in 151 blocks total heap usage: 22,620,528 allocs, 22,620,377 frees, 1,245,549,984 bytes allocated After: HEAP SUMMARY: in use at exit: 671,931 bytes in 151 blocks total heap usage: 22,618,257 allocs, 22,618,106 frees, 1,236,351,528 bytes allocated So while the reduction in allocations isn't really all that big, it's a low hanging fruit and thus there isn't much of a reason not to pick it. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/block.c | 14 +++++--------- reftable/block.h | 3 +++ 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'reftable/block.c') diff --git a/reftable/block.c b/reftable/block.c index 9129305515..f190c05520 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -150,7 +150,6 @@ int block_writer_finish(struct block_writer *w) if (block_writer_type(w) == BLOCK_TYPE_LOG) { int block_header_skip = 4 + w->header_off; uLongf src_len = w->next - block_header_skip, compressed_len; - unsigned char *compressed; int ret; ret = deflateReset(w->zstream); @@ -163,9 +162,9 @@ int block_writer_finish(struct block_writer *w) * is guaranteed to return `Z_STREAM_END`. */ compressed_len = deflateBound(w->zstream, src_len); - REFTABLE_ALLOC_ARRAY(compressed, compressed_len); + REFTABLE_ALLOC_GROW(w->compressed, compressed_len, w->compressed_cap); - w->zstream->next_out = compressed; + w->zstream->next_out = w->compressed; w->zstream->avail_out = compressed_len; w->zstream->next_in = w->buf + block_header_skip; w->zstream->avail_in = src_len; @@ -177,21 +176,17 @@ int block_writer_finish(struct block_writer *w) * guaranteed to succeed according to the zlib documentation. */ ret = deflate(w->zstream, Z_FINISH); - if (ret != Z_STREAM_END) { - reftable_free(compressed); + if (ret != Z_STREAM_END) return REFTABLE_ZLIB_ERROR; - } /* * Overwrite the uncompressed data we have already written and * adjust the `next` pointer to point right after the * compressed data. */ - memcpy(w->buf + block_header_skip, compressed, + memcpy(w->buf + block_header_skip, w->compressed, w->zstream->total_out); w->next = w->zstream->total_out + block_header_skip; - - reftable_free(compressed); } return w->next; @@ -450,6 +445,7 @@ void block_writer_release(struct block_writer *bw) deflateEnd(bw->zstream); FREE_AND_NULL(bw->zstream); FREE_AND_NULL(bw->restarts); + FREE_AND_NULL(bw->compressed); strbuf_release(&bw->last_key); /* the block is not owned. */ } diff --git a/reftable/block.h b/reftable/block.h index 1375957fc8..657498014c 100644 --- a/reftable/block.h +++ b/reftable/block.h @@ -19,6 +19,9 @@ https://developers.google.com/open-source/licenses/bsd */ struct block_writer { z_stream *zstream; + unsigned char *compressed; + size_t compressed_cap; + uint8_t *buf; uint32_t block_size; -- cgit v1.3-5-g9baa