From f57cc987a937bef3f6c8e4911cf98e0f51eee1e2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 8 Apr 2024 14:23:52 +0200 Subject: [PATCH 01/11] refs/reftable: fix D/F conflict error message on ref copy The `write_copy_table()` function is shared between the reftable implementations for renaming and copying refs. The only difference between those two cases is that the rename will also delete the old reference, whereas copying won't. This has resulted in a bug though where we don't properly verify refname availability. When calling `refs_verify_refname_available()`, we always add the old ref name to the list of refs to be skipped when computing availability, which indicates that the name would be available even if it already exists at the current point in time. This is only the right thing to do for renames though, not for copies. The consequence of this bug is quite harmless because the reftable backend has its own checks for D/F conflicts further down in the call stack, and thus we refuse the update regardless of the bug. But all the user gets in this case is an uninformative message that copying the ref has failed, without any further details. Fix the bug and only add the old name to the skip-list in case we rename the ref. Consequently, this error case will now be handled by `refs_verify_refname_available()`, which knows to provide a proper error message. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 3 ++- t/t0610-reftable-basics.sh | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index e206d5a073..0358da14db 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1351,7 +1351,8 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data) /* * Verify that the new refname is available. */ - string_list_insert(&skip, arg->oldname); + if (arg->delete_old) + string_list_insert(&skip, arg->oldname); ret = refs_verify_refname_available(&arg->refs->base, arg->newname, NULL, &skip, &errbuf); if (ret < 0) { diff --git a/t/t0610-reftable-basics.sh b/t/t0610-reftable-basics.sh index 686781192e..055231a707 100755 --- a/t/t0610-reftable-basics.sh +++ b/t/t0610-reftable-basics.sh @@ -730,6 +730,39 @@ test_expect_success 'reflog: updates via HEAD update HEAD reflog' ' ) ' +test_expect_success 'branch: copying branch with D/F conflict' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit A && + git branch branch && + cat >expect <<-EOF && + error: ${SQ}refs/heads/branch${SQ} exists; cannot create ${SQ}refs/heads/branch/moved${SQ} + fatal: branch copy failed + EOF + test_must_fail git branch -c branch branch/moved 2>err && + test_cmp expect err + ) +' + +test_expect_success 'branch: moving branch with D/F conflict' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit A && + git branch branch && + git branch conflict && + cat >expect <<-EOF && + error: ${SQ}refs/heads/conflict${SQ} exists; cannot create ${SQ}refs/heads/conflict/moved${SQ} + fatal: branch rename failed + EOF + test_must_fail git branch -m branch conflict/moved 2>err && + test_cmp expect err + ) +' + test_expect_success 'worktree: adding worktree creates separate stack' ' test_when_finished "rm -rf repo worktree" && git init repo && From 455d61b6d22b41176194483f82c9caa531661bec Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 8 Apr 2024 14:23:56 +0200 Subject: [PATCH 02/11] refs/reftable: perform explicit D/F check when writing symrefs We already perform explicit D/F checks in all reftable callbacks which write refs, except when writing symrefs. For one this leads to an error message which isn't perfectly actionable because we only tell the user that there was a D/F conflict, but not which refs conflicted with each other. But second, once all ref updating callbacks explicitly check for D/F conflicts, we can disable the D/F checks in the reftable library itself and thus avoid some duplicated efforts. Refactor the code that writes symref tables to explicitly call into `refs_verify_refname_available()` when writing symrefs. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 20 +++++++++++++++++--- t/t0610-reftable-basics.sh | 2 +- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 0358da14db..8a54b0d8b2 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1217,6 +1217,7 @@ static int reftable_be_pack_refs(struct ref_store *ref_store, struct write_create_symref_arg { struct reftable_ref_store *refs; struct reftable_stack *stack; + struct strbuf *err; const char *refname; const char *target; const char *logmsg; @@ -1239,6 +1240,11 @@ static int write_create_symref_table(struct reftable_writer *writer, void *cb_da reftable_writer_set_limits(writer, ts, ts); + ret = refs_verify_refname_available(&create->refs->base, create->refname, + NULL, NULL, create->err); + if (ret < 0) + return ret; + ret = reftable_writer_add_ref(writer, &ref); if (ret) return ret; @@ -1280,12 +1286,14 @@ static int reftable_be_create_symref(struct ref_store *ref_store, struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_WRITE, "create_symref"); struct reftable_stack *stack = stack_for(refs, refname, &refname); + struct strbuf err = STRBUF_INIT; struct write_create_symref_arg arg = { .refs = refs, .stack = stack, .refname = refname, .target = target, .logmsg = logmsg, + .err = &err, }; int ret; @@ -1301,9 +1309,15 @@ static int reftable_be_create_symref(struct ref_store *ref_store, done: assert(ret != REFTABLE_API_ERROR); - if (ret) - error("unable to write symref for %s: %s", refname, - reftable_error_str(ret)); + if (ret) { + if (err.len) + error("%s", err.buf); + else + error("unable to write symref for %s: %s", refname, + reftable_error_str(ret)); + } + + strbuf_release(&err); return ret; } diff --git a/t/t0610-reftable-basics.sh b/t/t0610-reftable-basics.sh index 055231a707..12b0004781 100755 --- a/t/t0610-reftable-basics.sh +++ b/t/t0610-reftable-basics.sh @@ -255,7 +255,7 @@ test_expect_success 'ref transaction: creating symbolic ref fails with F/D confl git init repo && test_commit -C repo A && cat >expect <<-EOF && - error: unable to write symref for refs/heads: file/directory conflict + error: ${SQ}refs/heads/main${SQ} exists; cannot create ${SQ}refs/heads${SQ} EOF test_must_fail git -C repo symbolic-ref refs/heads refs/heads/foo 2>err && test_cmp expect err From 4af31dc84a1d63c0c78cb004421405aa5a4f1f80 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 8 Apr 2024 14:24:01 +0200 Subject: [PATCH 03/11] refs/reftable: skip duplicate name checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All the callback functions which write refs in the reftable backend perform D/F conflict checks via `refs_verify_refname_available()`. But in reality we perform these D/F conflict checks a second time in the reftable library via `stack_check_addition()`. Interestingly, the code in the reftable library is inferior compared to the generic function: - It is slower than `refs_verify_refname_available()`, even though this can probably be optimized. - It does not provide a proper error message to the caller, and thus all the user would see is a generic "file/directory conflict" message. Disable the D/F conflict checks in the reftable library by setting the `skip_name_check` write option. This results in a non-negligible speedup when writing many refs. The following benchmark writes 100k refs in a single transaction: Benchmark 1: update-ref: create many refs (HEAD~) Time (mean ± σ): 3.241 s ± 0.040 s [User: 1.854 s, System: 1.381 s] Range (min … max): 3.185 s … 3.454 s 100 runs Benchmark 2: update-ref: create many refs (HEAD) Time (mean ± σ): 2.878 s ± 0.024 s [User: 1.506 s, System: 1.367 s] Range (min … max): 2.838 s … 2.960 s 100 runs Summary update-ref: create many refs (HEAD~) ran 1.13 ± 0.02 times faster than update-ref: create many refs (HEAD) Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 8a54b0d8b2..7515dd3019 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -247,6 +247,11 @@ static struct ref_store *reftable_be_init(struct repository *repo, refs->write_options.block_size = 4096; refs->write_options.hash_id = repo->hash_algo->format_id; refs->write_options.default_permissions = calc_shared_perm(0666 & ~mask); + /* + * We verify names via `refs_verify_refname_available()`, so there is + * no need to do the same checks in the reftable library again. + */ + refs->write_options.skip_name_check = 1; /* * Set up the main reftable stack that is hosted in GIT_COMMON_DIR. From 485c63cf5c8a325d2df14f2eeb22f1a01b55a11c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 8 Apr 2024 14:24:06 +0200 Subject: [PATCH 04/11] reftable: remove name checks In the preceding commit we have disabled name checks in the "reftable" backend. These checks were responsible for verifying multiple things when writing records to the reftable stack: - Detecting file/directory conflicts. Starting with the preceding commits this is now handled by the reftable backend itself via `refs_verify_refname_available()`. - Validating refnames. This is handled by `check_refname_format()` in the generic ref transacton layer. The code in the reftable library is thus not used anymore and likely to bitrot over time. Remove it. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 2 - refs/reftable-backend.c | 5 - reftable/error.c | 2 - reftable/refname.c | 209 ------------------------------------- reftable/refname.h | 29 ----- reftable/refname_test.c | 101 ------------------ reftable/reftable-error.h | 3 - reftable/reftable-tests.h | 1 - reftable/reftable-writer.h | 4 - reftable/stack.c | 67 +----------- reftable/stack_test.c | 39 ------- t/helper/test-reftable.c | 1 - 12 files changed, 1 insertion(+), 462 deletions(-) delete mode 100644 reftable/refname.c delete mode 100644 reftable/refname.h delete mode 100644 reftable/refname_test.c diff --git a/Makefile b/Makefile index c43c1bd1a0..05e3d37581 100644 --- a/Makefile +++ b/Makefile @@ -2655,7 +2655,6 @@ REFTABLE_OBJS += reftable/merged.o REFTABLE_OBJS += reftable/pq.o REFTABLE_OBJS += reftable/reader.o REFTABLE_OBJS += reftable/record.o -REFTABLE_OBJS += reftable/refname.o REFTABLE_OBJS += reftable/generic.o REFTABLE_OBJS += reftable/stack.o REFTABLE_OBJS += reftable/tree.o @@ -2668,7 +2667,6 @@ REFTABLE_TEST_OBJS += reftable/merged_test.o REFTABLE_TEST_OBJS += reftable/pq_test.o REFTABLE_TEST_OBJS += reftable/record_test.o REFTABLE_TEST_OBJS += reftable/readwrite_test.o -REFTABLE_TEST_OBJS += reftable/refname_test.o REFTABLE_TEST_OBJS += reftable/stack_test.o REFTABLE_TEST_OBJS += reftable/test_framework.o REFTABLE_TEST_OBJS += reftable/tree_test.o diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 7515dd3019..8a54b0d8b2 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -247,11 +247,6 @@ static struct ref_store *reftable_be_init(struct repository *repo, refs->write_options.block_size = 4096; refs->write_options.hash_id = repo->hash_algo->format_id; refs->write_options.default_permissions = calc_shared_perm(0666 & ~mask); - /* - * We verify names via `refs_verify_refname_available()`, so there is - * no need to do the same checks in the reftable library again. - */ - refs->write_options.skip_name_check = 1; /* * Set up the main reftable stack that is hosted in GIT_COMMON_DIR. diff --git a/reftable/error.c b/reftable/error.c index 0d1766735e..169f89d2f1 100644 --- a/reftable/error.c +++ b/reftable/error.c @@ -27,8 +27,6 @@ const char *reftable_error_str(int err) return "misuse of the reftable API"; case REFTABLE_ZLIB_ERROR: return "zlib failure"; - case REFTABLE_NAME_CONFLICT: - return "file/directory conflict"; case REFTABLE_EMPTY_TABLE_ERROR: return "wrote empty table"; case REFTABLE_REFNAME_ERROR: diff --git a/reftable/refname.c b/reftable/refname.c deleted file mode 100644 index 7570e4acf9..0000000000 --- a/reftable/refname.c +++ /dev/null @@ -1,209 +0,0 @@ -/* - Copyright 2020 Google LLC - - Use of this source code is governed by a BSD-style - license that can be found in the LICENSE file or at - https://developers.google.com/open-source/licenses/bsd -*/ - -#include "system.h" -#include "reftable-error.h" -#include "basics.h" -#include "refname.h" -#include "reftable-iterator.h" - -struct find_arg { - char **names; - const char *want; -}; - -static int find_name(size_t k, void *arg) -{ - struct find_arg *f_arg = arg; - return strcmp(f_arg->names[k], f_arg->want) >= 0; -} - -static int modification_has_ref(struct modification *mod, const char *name) -{ - struct reftable_ref_record ref = { NULL }; - int err = 0; - - if (mod->add_len > 0) { - struct find_arg arg = { - .names = mod->add, - .want = name, - }; - int idx = binsearch(mod->add_len, find_name, &arg); - if (idx < mod->add_len && !strcmp(mod->add[idx], name)) { - return 0; - } - } - - if (mod->del_len > 0) { - struct find_arg arg = { - .names = mod->del, - .want = name, - }; - int idx = binsearch(mod->del_len, find_name, &arg); - if (idx < mod->del_len && !strcmp(mod->del[idx], name)) { - return 1; - } - } - - err = reftable_table_read_ref(&mod->tab, name, &ref); - reftable_ref_record_release(&ref); - return err; -} - -static void modification_release(struct modification *mod) -{ - /* don't delete the strings themselves; they're owned by ref records. - */ - FREE_AND_NULL(mod->add); - FREE_AND_NULL(mod->del); - mod->add_len = 0; - mod->del_len = 0; -} - -static int modification_has_ref_with_prefix(struct modification *mod, - const char *prefix) -{ - struct reftable_iterator it = { NULL }; - struct reftable_ref_record ref = { NULL }; - int err = 0; - - if (mod->add_len > 0) { - struct find_arg arg = { - .names = mod->add, - .want = prefix, - }; - int idx = binsearch(mod->add_len, find_name, &arg); - if (idx < mod->add_len && - !strncmp(prefix, mod->add[idx], strlen(prefix))) - goto done; - } - err = reftable_table_seek_ref(&mod->tab, &it, prefix); - if (err) - goto done; - - while (1) { - err = reftable_iterator_next_ref(&it, &ref); - if (err) - goto done; - - if (mod->del_len > 0) { - struct find_arg arg = { - .names = mod->del, - .want = ref.refname, - }; - int idx = binsearch(mod->del_len, find_name, &arg); - if (idx < mod->del_len && - !strcmp(ref.refname, mod->del[idx])) { - continue; - } - } - - if (strncmp(ref.refname, prefix, strlen(prefix))) { - err = 1; - goto done; - } - err = 0; - goto done; - } - -done: - reftable_ref_record_release(&ref); - reftable_iterator_destroy(&it); - return err; -} - -static int validate_refname(const char *name) -{ - while (1) { - char *next = strchr(name, '/'); - if (!*name) { - return REFTABLE_REFNAME_ERROR; - } - if (!next) { - return 0; - } - if (next - name == 0 || (next - name == 1 && *name == '.') || - (next - name == 2 && name[0] == '.' && name[1] == '.')) - return REFTABLE_REFNAME_ERROR; - name = next + 1; - } - return 0; -} - -int validate_ref_record_addition(struct reftable_table tab, - struct reftable_ref_record *recs, size_t sz) -{ - struct modification mod = { - .tab = tab, - .add = reftable_calloc(sz, sizeof(*mod.add)), - .del = reftable_calloc(sz, sizeof(*mod.del)), - }; - int i = 0; - int err = 0; - for (; i < sz; i++) { - if (reftable_ref_record_is_deletion(&recs[i])) { - mod.del[mod.del_len++] = recs[i].refname; - } else { - mod.add[mod.add_len++] = recs[i].refname; - } - } - - err = modification_validate(&mod); - modification_release(&mod); - return err; -} - -static void strbuf_trim_component(struct strbuf *sl) -{ - while (sl->len > 0) { - int is_slash = (sl->buf[sl->len - 1] == '/'); - strbuf_setlen(sl, sl->len - 1); - if (is_slash) - break; - } -} - -int modification_validate(struct modification *mod) -{ - struct strbuf slashed = STRBUF_INIT; - int err = 0; - int i = 0; - for (; i < mod->add_len; i++) { - err = validate_refname(mod->add[i]); - if (err) - goto done; - strbuf_reset(&slashed); - strbuf_addstr(&slashed, mod->add[i]); - strbuf_addstr(&slashed, "/"); - - err = modification_has_ref_with_prefix(mod, slashed.buf); - if (err == 0) { - err = REFTABLE_NAME_CONFLICT; - goto done; - } - if (err < 0) - goto done; - - strbuf_reset(&slashed); - strbuf_addstr(&slashed, mod->add[i]); - while (slashed.len) { - strbuf_trim_component(&slashed); - err = modification_has_ref(mod, slashed.buf); - if (err == 0) { - err = REFTABLE_NAME_CONFLICT; - goto done; - } - if (err < 0) - goto done; - } - } - err = 0; -done: - strbuf_release(&slashed); - return err; -} diff --git a/reftable/refname.h b/reftable/refname.h deleted file mode 100644 index a24b40fcb4..0000000000 --- a/reftable/refname.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - Copyright 2020 Google LLC - - Use of this source code is governed by a BSD-style - license that can be found in the LICENSE file or at - https://developers.google.com/open-source/licenses/bsd -*/ -#ifndef REFNAME_H -#define REFNAME_H - -#include "reftable-record.h" -#include "reftable-generic.h" - -struct modification { - struct reftable_table tab; - - char **add; - size_t add_len; - - char **del; - size_t del_len; -}; - -int validate_ref_record_addition(struct reftable_table tab, - struct reftable_ref_record *recs, size_t sz); - -int modification_validate(struct modification *mod); - -#endif diff --git a/reftable/refname_test.c b/reftable/refname_test.c deleted file mode 100644 index b9cc62554e..0000000000 --- a/reftable/refname_test.c +++ /dev/null @@ -1,101 +0,0 @@ -/* -Copyright 2020 Google LLC - -Use of this source code is governed by a BSD-style -license that can be found in the LICENSE file or at -https://developers.google.com/open-source/licenses/bsd -*/ - -#include "basics.h" -#include "block.h" -#include "blocksource.h" -#include "reader.h" -#include "record.h" -#include "refname.h" -#include "reftable-error.h" -#include "reftable-writer.h" -#include "system.h" - -#include "test_framework.h" -#include "reftable-tests.h" - -struct testcase { - char *add; - char *del; - int error_code; -}; - -static void test_conflict(void) -{ - struct reftable_write_options opts = { 0 }; - struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); - struct reftable_ref_record rec = { - .refname = "a/b", - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "destination", /* make sure it's not a symref. - */ - .update_index = 1, - }; - int err; - int i; - struct reftable_block_source source = { NULL }; - struct reftable_reader *rd = NULL; - struct reftable_table tab = { NULL }; - struct testcase cases[] = { - { "a/b/c", NULL, REFTABLE_NAME_CONFLICT }, - { "b", NULL, 0 }, - { "a", NULL, REFTABLE_NAME_CONFLICT }, - { "a", "a/b", 0 }, - - { "p/", NULL, REFTABLE_REFNAME_ERROR }, - { "p//q", NULL, REFTABLE_REFNAME_ERROR }, - { "p/./q", NULL, REFTABLE_REFNAME_ERROR }, - { "p/../q", NULL, REFTABLE_REFNAME_ERROR }, - - { "a/b/c", "a/b", 0 }, - { NULL, "a//b", 0 }, - }; - reftable_writer_set_limits(w, 1, 1); - - err = reftable_writer_add_ref(w, &rec); - EXPECT_ERR(err); - - err = reftable_writer_close(w); - EXPECT_ERR(err); - reftable_writer_free(w); - - block_source_from_strbuf(&source, &buf); - err = reftable_new_reader(&rd, &source, "filename"); - EXPECT_ERR(err); - - reftable_table_from_reader(&tab, rd); - - for (i = 0; i < ARRAY_SIZE(cases); i++) { - struct modification mod = { - .tab = tab, - }; - - if (cases[i].add) { - mod.add = &cases[i].add; - mod.add_len = 1; - } - if (cases[i].del) { - mod.del = &cases[i].del; - mod.del_len = 1; - } - - err = modification_validate(&mod); - EXPECT(err == cases[i].error_code); - } - - reftable_reader_free(rd); - strbuf_release(&buf); -} - -int refname_test_main(int argc, const char *argv[]) -{ - RUN_TEST(test_conflict); - return 0; -} diff --git a/reftable/reftable-error.h b/reftable/reftable-error.h index 4c457aaaf8..3a5f5b92c6 100644 --- a/reftable/reftable-error.h +++ b/reftable/reftable-error.h @@ -48,9 +48,6 @@ enum reftable_error { /* Wrote a table without blocks. */ REFTABLE_EMPTY_TABLE_ERROR = -8, - /* Dir/file conflict. */ - REFTABLE_NAME_CONFLICT = -9, - /* Invalid ref name. */ REFTABLE_REFNAME_ERROR = -10, diff --git a/reftable/reftable-tests.h b/reftable/reftable-tests.h index 0019cbcfa4..114cc3d053 100644 --- a/reftable/reftable-tests.h +++ b/reftable/reftable-tests.h @@ -14,7 +14,6 @@ int block_test_main(int argc, const char **argv); int merged_test_main(int argc, const char **argv); int pq_test_main(int argc, const char **argv); int record_test_main(int argc, const char **argv); -int refname_test_main(int argc, const char **argv); int readwrite_test_main(int argc, const char **argv); int stack_test_main(int argc, const char **argv); int tree_test_main(int argc, const char **argv); diff --git a/reftable/reftable-writer.h b/reftable/reftable-writer.h index 7c7cae5f99..3c119e2bbb 100644 --- a/reftable/reftable-writer.h +++ b/reftable/reftable-writer.h @@ -38,10 +38,6 @@ struct reftable_write_options { /* Default mode for creating files. If unset, use 0666 (+umask) */ unsigned int default_permissions; - /* boolean: do not check ref names for validity or dir/file conflicts. - */ - unsigned skip_name_check : 1; - /* boolean: copy log messages exactly. If unset, check that the message * is a single line, and add '\n' if missing. */ diff --git a/reftable/stack.c b/reftable/stack.c index 1ecf1b9751..e264df5ced 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -12,8 +12,8 @@ license that can be found in the LICENSE file or at #include "system.h" #include "merged.h" #include "reader.h" -#include "refname.h" #include "reftable-error.h" +#include "reftable-generic.h" #include "reftable-record.h" #include "reftable-merged.h" #include "writer.h" @@ -27,8 +27,6 @@ static int stack_write_compact(struct reftable_stack *st, struct reftable_writer *wr, size_t first, size_t last, struct reftable_log_expiry_config *config); -static int stack_check_addition(struct reftable_stack *st, - const char *new_tab_name); static void reftable_addition_close(struct reftable_addition *add); static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, int reuse_open); @@ -781,10 +779,6 @@ int reftable_addition_add(struct reftable_addition *add, goto done; } - err = stack_check_addition(add->stack, get_tempfile_path(tab_file)); - if (err < 0) - goto done; - if (wr->min_update_index < add->next_update_index) { err = REFTABLE_API_ERROR; goto done; @@ -1340,65 +1334,6 @@ int reftable_stack_read_log(struct reftable_stack *st, const char *refname, return err; } -static int stack_check_addition(struct reftable_stack *st, - const char *new_tab_name) -{ - int err = 0; - struct reftable_block_source src = { NULL }; - struct reftable_reader *rd = NULL; - struct reftable_table tab = { NULL }; - struct reftable_ref_record *refs = NULL; - struct reftable_iterator it = { NULL }; - int cap = 0; - int len = 0; - int i = 0; - - if (st->config.skip_name_check) - return 0; - - err = reftable_block_source_from_file(&src, new_tab_name); - if (err < 0) - goto done; - - err = reftable_new_reader(&rd, &src, new_tab_name); - if (err < 0) - goto done; - - err = reftable_reader_seek_ref(rd, &it, ""); - if (err > 0) { - err = 0; - goto done; - } - if (err < 0) - goto done; - - while (1) { - struct reftable_ref_record ref = { NULL }; - err = reftable_iterator_next_ref(&it, &ref); - if (err > 0) - break; - if (err < 0) - goto done; - - REFTABLE_ALLOC_GROW(refs, len + 1, cap); - refs[len++] = ref; - } - - reftable_table_from_merged_table(&tab, reftable_stack_merged_table(st)); - - err = validate_ref_record_addition(tab, refs, len); - -done: - for (i = 0; i < len; i++) { - reftable_ref_record_release(&refs[i]); - } - - free(refs); - reftable_iterator_destroy(&it); - reftable_reader_free(rd); - return err; -} - static int is_table_name(const char *s) { const char *dot = strrchr(s, '.'); diff --git a/reftable/stack_test.c b/reftable/stack_test.c index 0dc9a44648..b88097c3b6 100644 --- a/reftable/stack_test.c +++ b/reftable/stack_test.c @@ -353,44 +353,6 @@ static void test_reftable_stack_transaction_api_performs_auto_compaction(void) clear_dir(dir); } -static void test_reftable_stack_validate_refname(void) -{ - struct reftable_write_options cfg = { 0 }; - struct reftable_stack *st = NULL; - int err; - char *dir = get_tmp_dir(__LINE__); - - int i; - struct reftable_ref_record ref = { - .refname = "a/b", - .update_index = 1, - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "master", - }; - char *additions[] = { "a", "a/b/c" }; - - err = reftable_new_stack(&st, dir, cfg); - EXPECT_ERR(err); - - err = reftable_stack_add(st, &write_test_ref, &ref); - EXPECT_ERR(err); - - for (i = 0; i < ARRAY_SIZE(additions); i++) { - struct reftable_ref_record ref = { - .refname = additions[i], - .update_index = 1, - .value_type = REFTABLE_REF_SYMREF, - .value.symref = "master", - }; - - err = reftable_stack_add(st, &write_test_ref, &ref); - EXPECT(err == REFTABLE_NAME_CONFLICT); - } - - reftable_stack_destroy(st); - clear_dir(dir); -} - static int write_error(struct reftable_writer *wr, void *arg) { return *((int *)arg); @@ -1097,7 +1059,6 @@ int stack_test_main(int argc, const char *argv[]) RUN_TEST(test_reftable_stack_transaction_api_performs_auto_compaction); RUN_TEST(test_reftable_stack_update_index_check); RUN_TEST(test_reftable_stack_uptodate); - RUN_TEST(test_reftable_stack_validate_refname); RUN_TEST(test_sizes_to_segments); RUN_TEST(test_sizes_to_segments_all_equal); RUN_TEST(test_sizes_to_segments_empty); diff --git a/t/helper/test-reftable.c b/t/helper/test-reftable.c index 00237ef0d9..bae731669c 100644 --- a/t/helper/test-reftable.c +++ b/t/helper/test-reftable.c @@ -13,7 +13,6 @@ int cmd__reftable(int argc, const char **argv) readwrite_test_main(argc, argv); merged_test_main(argc, argv); stack_test_main(argc, argv); - refname_test_main(argc, argv); return 0; } From 44afd85fbd4cf69a0f030df98b3626f82e51bb33 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 8 Apr 2024 14:24:11 +0200 Subject: [PATCH 05/11] refs/reftable: don't recompute committer ident MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to write reflog entries we need to compute the committer's identity as it gets encoded in the log record itself. The reftable backend does this via `git_committer_info()` and `split_ident_line()` in `fill_reftable_log_record()`, which use the Git config as well as environment variables to figure out the identity. While most callers would only call `fill_reftable_log_record()` once or twice, `write_transaction_table()` will call it as many times as there are queued ref updates. This can be quite a waste of effort when writing many refs with reflog entries in a single transaction. Refactor the code to pre-compute the committer information. This results in a small speedup when writing 100000 refs in a single transaction: Benchmark 1: update-ref: create many refs (HEAD~) Time (mean ± σ): 2.895 s ± 0.020 s [User: 1.516 s, System: 1.374 s] Range (min … max): 2.868 s … 2.983 s 100 runs Benchmark 2: update-ref: create many refs (HEAD) Time (mean ± σ): 2.845 s ± 0.017 s [User: 1.461 s, System: 1.379 s] Range (min … max): 2.803 s … 2.913 s 100 runs Summary update-ref: create many refs (HEAD) ran 1.02 ± 0.01 times faster than update-ref: create many refs (HEAD~) Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 52 +++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 8a54b0d8b2..a5ef36ffa9 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -171,32 +171,30 @@ static int should_write_log(struct ref_store *refs, const char *refname) } } -static void fill_reftable_log_record(struct reftable_log_record *log) +static void fill_reftable_log_record(struct reftable_log_record *log, const struct ident_split *split) { - const char *info = git_committer_info(0); - struct ident_split split = {0}; + const char *tz_begin; int sign = 1; - if (split_ident_line(&split, info, strlen(info))) - BUG("failed splitting committer info"); - reftable_log_record_release(log); log->value_type = REFTABLE_LOG_UPDATE; log->value.update.name = - xstrndup(split.name_begin, split.name_end - split.name_begin); + xstrndup(split->name_begin, split->name_end - split->name_begin); log->value.update.email = - xstrndup(split.mail_begin, split.mail_end - split.mail_begin); - log->value.update.time = atol(split.date_begin); - if (*split.tz_begin == '-') { + xstrndup(split->mail_begin, split->mail_end - split->mail_begin); + log->value.update.time = atol(split->date_begin); + + tz_begin = split->tz_begin; + if (*tz_begin == '-') { sign = -1; - split.tz_begin++; + tz_begin++; } - if (*split.tz_begin == '+') { + if (*tz_begin == '+') { sign = 1; - split.tz_begin++; + tz_begin++; } - log->value.update.tz_offset = sign * atoi(split.tz_begin); + log->value.update.tz_offset = sign * atoi(tz_begin); } static int read_ref_without_reload(struct reftable_stack *stack, @@ -1018,9 +1016,15 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data reftable_stack_merged_table(arg->stack); uint64_t ts = reftable_stack_next_update_index(arg->stack); struct reftable_log_record *logs = NULL; + struct ident_split committer_ident = {0}; size_t logs_nr = 0, logs_alloc = 0, i; + const char *committer_info; int ret = 0; + committer_info = git_committer_info(0); + if (split_ident_line(&committer_ident, committer_info, strlen(committer_info))) + BUG("failed splitting committer info"); + QSORT(arg->updates, arg->updates_nr, transaction_update_cmp); reftable_writer_set_limits(writer, ts, ts); @@ -1086,7 +1090,7 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data log = &logs[logs_nr++]; memset(log, 0, sizeof(*log)); - fill_reftable_log_record(log); + fill_reftable_log_record(log, &committer_ident); log->update_index = ts; log->refname = xstrdup(u->refname); memcpy(log->value.update.new_hash, u->new_oid.hash, GIT_MAX_RAWSZ); @@ -1233,9 +1237,11 @@ static int write_create_symref_table(struct reftable_writer *writer, void *cb_da .value.symref = (char *)create->target, .update_index = ts, }; + struct ident_split committer_ident = {0}; struct reftable_log_record log = {0}; struct object_id new_oid; struct object_id old_oid; + const char *committer_info; int ret; reftable_writer_set_limits(writer, ts, ts); @@ -1263,7 +1269,11 @@ static int write_create_symref_table(struct reftable_writer *writer, void *cb_da !should_write_log(&create->refs->base, create->refname)) return 0; - fill_reftable_log_record(&log); + committer_info = git_committer_info(0); + if (split_ident_line(&committer_ident, committer_info, strlen(committer_info))) + BUG("failed splitting committer info"); + + fill_reftable_log_record(&log, &committer_ident); log.refname = xstrdup(create->refname); log.update_index = ts; log.value.update.message = xstrndup(create->logmsg, @@ -1339,10 +1349,16 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data) struct reftable_log_record old_log = {0}, *logs = NULL; struct reftable_iterator it = {0}; struct string_list skip = STRING_LIST_INIT_NODUP; + struct ident_split committer_ident = {0}; struct strbuf errbuf = STRBUF_INIT; size_t logs_nr = 0, logs_alloc = 0, i; + const char *committer_info; int ret; + committer_info = git_committer_info(0); + if (split_ident_line(&committer_ident, committer_info, strlen(committer_info))) + BUG("failed splitting committer info"); + if (reftable_stack_read_ref(arg->stack, arg->oldname, &old_ref)) { ret = error(_("refname %s not found"), arg->oldname); goto done; @@ -1417,7 +1433,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data) ALLOC_GROW(logs, logs_nr + 1, logs_alloc); memset(&logs[logs_nr], 0, sizeof(logs[logs_nr])); - fill_reftable_log_record(&logs[logs_nr]); + fill_reftable_log_record(&logs[logs_nr], &committer_ident); logs[logs_nr].refname = (char *)arg->newname; logs[logs_nr].update_index = deletion_ts; logs[logs_nr].value.update.message = @@ -1449,7 +1465,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data) */ ALLOC_GROW(logs, logs_nr + 1, logs_alloc); memset(&logs[logs_nr], 0, sizeof(logs[logs_nr])); - fill_reftable_log_record(&logs[logs_nr]); + fill_reftable_log_record(&logs[logs_nr], &committer_ident); logs[logs_nr].refname = (char *)arg->newname; logs[logs_nr].update_index = creation_ts; logs[logs_nr].value.update.message = From d0dd119f72286d6d198911c1b33397a210e56846 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 8 Apr 2024 14:24:16 +0200 Subject: [PATCH 06/11] reftable/writer: refactorings for `writer_add_record()` Large parts of the reftable library do not conform to Git's typical code style. Refactor `writer_add_record()` such that it conforms better to it and add some documentation that explains some of its more intricate behaviour. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/writer.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/reftable/writer.c b/reftable/writer.c index 1d9ff0fbfa..0ad5eb8887 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -209,7 +209,8 @@ static int writer_add_record(struct reftable_writer *w, struct reftable_record *rec) { struct strbuf key = STRBUF_INIT; - int err = -1; + int err; + reftable_record_key(rec, &key); if (strbuf_cmp(&w->last_key, &key) >= 0) { err = REFTABLE_API_ERROR; @@ -218,27 +219,42 @@ static int writer_add_record(struct reftable_writer *w, strbuf_reset(&w->last_key); strbuf_addbuf(&w->last_key, &key); - if (!w->block_writer) { + if (!w->block_writer) writer_reinit_block_writer(w, reftable_record_type(rec)); - } - assert(block_writer_type(w->block_writer) == reftable_record_type(rec)); + if (block_writer_type(w->block_writer) != reftable_record_type(rec)) + BUG("record of type %d added to writer of type %d", + reftable_record_type(rec), block_writer_type(w->block_writer)); - if (block_writer_add(w->block_writer, rec) == 0) { + /* + * Try to add the record to the writer. If this succeeds then we're + * done. Otherwise the block writer may have hit the block size limit + * and needs to be flushed. + */ + if (!block_writer_add(w->block_writer, rec)) { err = 0; goto done; } + /* + * The current block is full, so we need to flush and reinitialize the + * writer to start writing the next block. + */ err = writer_flush_block(w); - if (err < 0) { + if (err < 0) goto done; - } - writer_reinit_block_writer(w, reftable_record_type(rec)); + + /* + * Try to add the record to the writer again. If this still fails then + * the record does not fit into the block size. + * + * TODO: it would be great to have `block_writer_add()` return proper + * error codes so that we don't have to second-guess the failure + * mode here. + */ err = block_writer_add(w->block_writer, rec); - if (err == -1) { - /* we are writing into memory, so an error can only mean it - * doesn't fit. */ + if (err) { err = REFTABLE_ENTRY_TOO_BIG_ERROR; goto done; } From 7e892fec47645ce0b791da468fb4df6c9c6d9f37 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 8 Apr 2024 14:24:20 +0200 Subject: [PATCH 07/11] reftable/writer: refactorings for `writer_flush_nonempty_block()` Large parts of the reftable library do not conform to Git's typical code style. Refactor `writer_flush_nonempty_block()` such that it conforms better to it and add some documentation that explains some of its more intricate behaviour. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/writer.c | 72 +++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/reftable/writer.c b/reftable/writer.c index 0ad5eb8887..d347ec4cc6 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -659,58 +659,74 @@ static void writer_clear_index(struct reftable_writer *w) w->index_cap = 0; } -static const int debug = 0; - static int writer_flush_nonempty_block(struct reftable_writer *w) { + struct reftable_index_record index_record = { + .last_key = STRBUF_INIT, + }; uint8_t typ = block_writer_type(w->block_writer); - struct reftable_block_stats *bstats = - writer_reftable_block_stats(w, typ); - uint64_t block_typ_off = (bstats->blocks == 0) ? w->next : 0; - int raw_bytes = block_writer_finish(w->block_writer); - int padding = 0; - int err = 0; - struct reftable_index_record ir = { .last_key = STRBUF_INIT }; + struct reftable_block_stats *bstats; + int raw_bytes, padding = 0, err; + uint64_t block_typ_off; + + /* + * Finish the current block. This will cause the block writer to emit + * restart points and potentially compress records in case we are + * writing a log block. + * + * Note that this is still happening in memory. + */ + raw_bytes = block_writer_finish(w->block_writer); if (raw_bytes < 0) return raw_bytes; - if (!w->opts.unpadded && typ != BLOCK_TYPE_LOG) { + /* + * By default, all records except for log records are padded to the + * block size. + */ + if (!w->opts.unpadded && typ != BLOCK_TYPE_LOG) padding = w->opts.block_size - raw_bytes; - } - if (block_typ_off > 0) { + bstats = writer_reftable_block_stats(w, typ); + block_typ_off = (bstats->blocks == 0) ? w->next : 0; + if (block_typ_off > 0) bstats->offset = block_typ_off; - } - bstats->entries += w->block_writer->entries; bstats->restarts += w->block_writer->restart_len; bstats->blocks++; w->stats.blocks++; - if (debug) { - fprintf(stderr, "block %c off %" PRIu64 " sz %d (%d)\n", typ, - w->next, raw_bytes, - get_be24(w->block + w->block_writer->header_off + 1)); - } - - if (w->next == 0) { + /* + * If this is the first block we're writing to the table then we need + * to also write the reftable header. + */ + if (!w->next) writer_write_header(w, w->block); - } err = padded_write(w, w->block, raw_bytes, padding); if (err < 0) return err; + /* + * Add an index record for every block that we're writing. If we end up + * having more than a threshold of index records we will end up writing + * an index section in `writer_finish_section()`. Each index record + * contains the last record key of the block it is indexing as well as + * the offset of that block. + * + * Note that this also applies when flushing index blocks, in which + * case we will end up with a multi-level index. + */ REFTABLE_ALLOC_GROW(w->index, w->index_len + 1, w->index_cap); - - ir.offset = w->next; - strbuf_reset(&ir.last_key); - strbuf_addbuf(&ir.last_key, &w->block_writer->last_key); - w->index[w->index_len] = ir; - + index_record.offset = w->next; + strbuf_reset(&index_record.last_key); + strbuf_addbuf(&index_record.last_key, &w->block_writer->last_key); + w->index[w->index_len] = index_record; w->index_len++; + w->next += padding + raw_bytes; w->block_writer = NULL; + return 0; } From 60dd319519b41cc5cc79bdef5ee8556297db6984 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 8 Apr 2024 14:24:25 +0200 Subject: [PATCH 08/11] reftable/writer: unify releasing memory There are two code paths which release memory of the reftable writer: - `reftable_writer_close()` releases internal state after it has written data. - `reftable_writer_free()` releases the block that was written to and the writer itself. Both code paths free different parts of the writer, and consequently the caller must make sure to call both. And while callers mostly do this already, this falls apart when a write failure causes the caller to skip calling `reftable_write_close()`. Introduce a new function `reftable_writer_release()` that releases all internal state and call it from both paths. Like this it is fine for the caller to not call `reftable_writer_close()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/writer.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/reftable/writer.c b/reftable/writer.c index d347ec4cc6..4eeb736445 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -149,11 +149,21 @@ void reftable_writer_set_limits(struct reftable_writer *w, uint64_t min, w->max_update_index = max; } +static void writer_release(struct reftable_writer *w) +{ + if (w) { + reftable_free(w->block); + w->block = NULL; + block_writer_release(&w->block_writer_data); + w->block_writer = NULL; + writer_clear_index(w); + strbuf_release(&w->last_key); + } +} + void reftable_writer_free(struct reftable_writer *w) { - if (!w) - return; - reftable_free(w->block); + writer_release(w); reftable_free(w); } @@ -643,16 +653,13 @@ int reftable_writer_close(struct reftable_writer *w) } done: - /* free up memory. */ - block_writer_release(&w->block_writer_data); - writer_clear_index(w); - strbuf_release(&w->last_key); + writer_release(w); return err; } static void writer_clear_index(struct reftable_writer *w) { - for (size_t i = 0; i < w->index_len; i++) + for (size_t i = 0; w->index && i < w->index_len; i++) strbuf_release(&w->index[i].last_key); FREE_AND_NULL(w->index); w->index_len = 0; From 8aaeffe3b5b85f996ac82ec4b0b0e538f4b5a0ac Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 8 Apr 2024 14:24:30 +0200 Subject: [PATCH 09/11] reftable/writer: reset `last_key` instead of releasing it The reftable writer tracks the last key that it has written so that it can properly compute the compressed prefix for the next record it is about to write. This last key must be reset whenever we move on to write the next block, which is done in `writer_reinit_block_writer()`. We do this by calling `strbuf_release()` though, which needlessly deallocates the underlying buffer. Convert the code to use `strbuf_reset()` instead, which saves one allocation per block we're about to write. This requires us to also amend `reftable_writer_free()` to release the buffer's memory now as we previously seemingly relied on `writer_reinit_block_writer()` to release the memory for us. Releasing memory here is the right thing to do anyway. While at it, convert a callsite where we truncate the buffer by setting its length to zero to instead use `strbuf_reset()`, too. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/writer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reftable/writer.c b/reftable/writer.c index 4eeb736445..10eccaaa07 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -109,7 +109,7 @@ static void writer_reinit_block_writer(struct reftable_writer *w, uint8_t typ) block_start = header_size(writer_version(w)); } - strbuf_release(&w->last_key); + strbuf_reset(&w->last_key); block_writer_init(&w->block_writer_data, typ, w->block, w->opts.block_size, block_start, hash_size(w->opts.hash_id)); @@ -478,7 +478,7 @@ static int writer_finish_section(struct reftable_writer *w) bstats->max_index_level = max_level; /* Reinit lastKey, as the next section can start with any key. */ - w->last_key.len = 0; + strbuf_reset(&w->last_key); return 0; } From a155ab2bf4e16b8fe1e0644d9d57d2c62eecd452 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 8 Apr 2024 14:24:35 +0200 Subject: [PATCH 10/11] reftable/block: reuse zstream when writing log blocks While most reftable blocks are written to disk as-is, blocks for log records are compressed with zlib. To compress them we use `compress2()`, which is a simple wrapper around the more complex `zstream` interface that would require multiple function invocations. One downside of this interface is that `compress2()` will reallocate internal state of the `zstream` interface on every single invocation. Consequently, as we call `compress2()` for every single log block which we are about to write, this can lead to quite some memory allocation churn. Refactor the code so that the block writer reuses a `zstream`. This significantly reduces the number of bytes allocated when writing many refs in a single transaction, as demonstrated by the following benchmark that writes 100k refs in a single transaction. Before: HEAP SUMMARY: in use at exit: 671,931 bytes in 151 blocks total heap usage: 22,631,887 allocs, 22,631,736 frees, 1,854,670,793 bytes allocated After: HEAP SUMMARY: in use at exit: 671,931 bytes in 151 blocks total heap usage: 22,620,528 allocs, 22,620,377 frees, 1,245,549,984 bytes allocated Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/block.c | 72 ++++++++++++++++++++++++++++++++---------------- reftable/block.h | 1 + 2 files changed, 49 insertions(+), 24 deletions(-) diff --git a/reftable/block.c b/reftable/block.c index e2a2cee58d..9129305515 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -76,6 +76,10 @@ void block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf, bw->entries = 0; bw->restart_len = 0; bw->last_key.len = 0; + if (!bw->zstream) { + REFTABLE_CALLOC_ARRAY(bw->zstream, 1); + deflateInit(bw->zstream, 9); + } } uint8_t block_writer_type(struct block_writer *bw) @@ -139,39 +143,57 @@ int block_writer_finish(struct block_writer *w) w->next += 2; put_be24(w->buf + 1 + w->header_off, w->next); + /* + * Log records are stored zlib-compressed. Note that the compression + * also spans over the restart points we have just written. + */ if (block_writer_type(w) == BLOCK_TYPE_LOG) { int block_header_skip = 4 + w->header_off; - uLongf src_len = w->next - block_header_skip; - uLongf dest_cap = src_len * 1.001 + 12; - uint8_t *compressed; + uLongf src_len = w->next - block_header_skip, compressed_len; + unsigned char *compressed; + int ret; - REFTABLE_ALLOC_ARRAY(compressed, dest_cap); + ret = deflateReset(w->zstream); + if (ret != Z_OK) + return REFTABLE_ZLIB_ERROR; - while (1) { - uLongf out_dest_len = dest_cap; - int zresult = compress2(compressed, &out_dest_len, - w->buf + block_header_skip, - src_len, 9); - if (zresult == Z_BUF_ERROR && dest_cap < LONG_MAX) { - dest_cap *= 2; - compressed = - reftable_realloc(compressed, dest_cap); - if (compressed) - continue; - } + /* + * Precompute the upper bound of how many bytes the compressed + * data may end up with. Combined with `Z_FINISH`, `deflate()` + * is guaranteed to return `Z_STREAM_END`. + */ + compressed_len = deflateBound(w->zstream, src_len); + REFTABLE_ALLOC_ARRAY(compressed, compressed_len); - if (Z_OK != zresult) { - reftable_free(compressed); - return REFTABLE_ZLIB_ERROR; - } + w->zstream->next_out = compressed; + w->zstream->avail_out = compressed_len; + w->zstream->next_in = w->buf + block_header_skip; + w->zstream->avail_in = src_len; - memcpy(w->buf + block_header_skip, compressed, - out_dest_len); - w->next = out_dest_len + block_header_skip; + /* + * We want to perform all decompression in a single step, which + * is why we can pass Z_FINISH here. As we have precomputed the + * deflated buffer's size via `deflateBound()` this function is + * guaranteed to succeed according to the zlib documentation. + */ + ret = deflate(w->zstream, Z_FINISH); + if (ret != Z_STREAM_END) { reftable_free(compressed); - break; + return REFTABLE_ZLIB_ERROR; } + + /* + * Overwrite the uncompressed data we have already written and + * adjust the `next` pointer to point right after the + * compressed data. + */ + memcpy(w->buf + block_header_skip, compressed, + w->zstream->total_out); + w->next = w->zstream->total_out + block_header_skip; + + reftable_free(compressed); } + return w->next; } @@ -425,6 +447,8 @@ int block_reader_seek(struct block_reader *br, struct block_iter *it, void block_writer_release(struct block_writer *bw) { + deflateEnd(bw->zstream); + FREE_AND_NULL(bw->zstream); FREE_AND_NULL(bw->restarts); strbuf_release(&bw->last_key); /* the block is not owned. */ diff --git a/reftable/block.h b/reftable/block.h index 47acc62c0a..1375957fc8 100644 --- a/reftable/block.h +++ b/reftable/block.h @@ -18,6 +18,7 @@ license that can be found in the LICENSE file or at * allocation overhead. */ struct block_writer { + z_stream *zstream; uint8_t *buf; uint32_t block_size; From fa74f32291d0e2b642345d25e16724567f9b881f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 8 Apr 2024 14:24:40 +0200 Subject: [PATCH 11/11] reftable/block: reuse compressed array Similar to the preceding commit, let's reuse the `compressed` array that we use to store compressed data in. This results in a small reduction in memory allocations when writing many refs. Before: HEAP SUMMARY: in use at exit: 671,931 bytes in 151 blocks total heap usage: 22,620,528 allocs, 22,620,377 frees, 1,245,549,984 bytes allocated After: HEAP SUMMARY: in use at exit: 671,931 bytes in 151 blocks total heap usage: 22,618,257 allocs, 22,618,106 frees, 1,236,351,528 bytes allocated So while the reduction in allocations isn't really all that big, it's a low hanging fruit and thus there isn't much of a reason not to pick it. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/block.c | 14 +++++--------- reftable/block.h | 3 +++ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/reftable/block.c b/reftable/block.c index 9129305515..f190c05520 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -150,7 +150,6 @@ int block_writer_finish(struct block_writer *w) if (block_writer_type(w) == BLOCK_TYPE_LOG) { int block_header_skip = 4 + w->header_off; uLongf src_len = w->next - block_header_skip, compressed_len; - unsigned char *compressed; int ret; ret = deflateReset(w->zstream); @@ -163,9 +162,9 @@ int block_writer_finish(struct block_writer *w) * is guaranteed to return `Z_STREAM_END`. */ compressed_len = deflateBound(w->zstream, src_len); - REFTABLE_ALLOC_ARRAY(compressed, compressed_len); + REFTABLE_ALLOC_GROW(w->compressed, compressed_len, w->compressed_cap); - w->zstream->next_out = compressed; + w->zstream->next_out = w->compressed; w->zstream->avail_out = compressed_len; w->zstream->next_in = w->buf + block_header_skip; w->zstream->avail_in = src_len; @@ -177,21 +176,17 @@ int block_writer_finish(struct block_writer *w) * guaranteed to succeed according to the zlib documentation. */ ret = deflate(w->zstream, Z_FINISH); - if (ret != Z_STREAM_END) { - reftable_free(compressed); + if (ret != Z_STREAM_END) return REFTABLE_ZLIB_ERROR; - } /* * Overwrite the uncompressed data we have already written and * adjust the `next` pointer to point right after the * compressed data. */ - memcpy(w->buf + block_header_skip, compressed, + memcpy(w->buf + block_header_skip, w->compressed, w->zstream->total_out); w->next = w->zstream->total_out + block_header_skip; - - reftable_free(compressed); } return w->next; @@ -450,6 +445,7 @@ void block_writer_release(struct block_writer *bw) deflateEnd(bw->zstream); FREE_AND_NULL(bw->zstream); FREE_AND_NULL(bw->restarts); + FREE_AND_NULL(bw->compressed); strbuf_release(&bw->last_key); /* the block is not owned. */ } diff --git a/reftable/block.h b/reftable/block.h index 1375957fc8..657498014c 100644 --- a/reftable/block.h +++ b/reftable/block.h @@ -19,6 +19,9 @@ license that can be found in the LICENSE file or at */ struct block_writer { z_stream *zstream; + unsigned char *compressed; + size_t compressed_cap; + uint8_t *buf; uint32_t block_size;