diff options
| author | Jonathan Amsterdam <jba@google.com> | 2024-02-21 20:46:19 -0500 |
|---|---|---|
| committer | Jonathan Amsterdam <jba@google.com> | 2024-02-22 22:32:45 +0000 |
| commit | 094b90f1e862df40cb520acaec64948fb544e37c (patch) | |
| tree | e64ba7b9716d6341c5b540c422ae7ff8db89acc5 /internal/postgres | |
| parent | 463d7c943fe04a6c6f2b97e71def0fb165fff76a (diff) | |
| download | go-x-pkgsite-094b90f1e862df40cb520acaec64948fb544e37c.tar.xz | |
internal/worker: add batch param to update-imported-by-count route
Inserting 5000 imported-by counts into search_documents timed out.
Make that size configurable, so we can experiment with something
smaller.
Change-Id: Ie0c5f42a28c0468c600af6d836edc345529b769f
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/565682
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Matloob <matloob@golang.org>
kokoro-CI: kokoro <noreply+kokoro@google.com>
Diffstat (limited to 'internal/postgres')
| -rw-r--r-- | internal/postgres/search.go | 15 | ||||
| -rw-r--r-- | internal/postgres/search_test.go | 27 |
2 files changed, 20 insertions, 22 deletions
diff --git a/internal/postgres/search.go b/internal/postgres/search.go index a93aae0c..36cb72ae 100644 --- a/internal/postgres/search.go +++ b/internal/postgres/search.go @@ -743,9 +743,11 @@ func (db *DB) GetPackagesForSearchDocumentUpsert(ctx context.Context, before tim // from the imports_unique table. // // UpdateSearchDocumentsImportedByCount returns the number of rows updated. -func (db *DB) UpdateSearchDocumentsImportedByCount(ctx context.Context) (nUpdated int64, err error) { +func (db *DB) UpdateSearchDocumentsImportedByCount(ctx context.Context, batchSize int) (nUpdated int64, err error) { defer derrors.WrapStack(&err, "UpdateSearchDocumentsImportedByCount(ctx)") + log.Infof(ctx, "updating imported-by counts, batch size = %d", batchSize) + curCounts, err := db.getSearchPackages(ctx) if err != nil { return 0, err @@ -754,6 +756,7 @@ func (db *DB) UpdateSearchDocumentsImportedByCount(ctx context.Context) (nUpdate if err != nil { return 0, err } + // Include only changed counts for packages that are in search_documents. changedCounts := map[string]int{} for p, nc := range newCounts { @@ -767,19 +770,15 @@ func (db *DB) UpdateSearchDocumentsImportedByCount(ctx context.Context) (nUpdate pct = len(changedCounts) * 100 / len(curCounts) } log.Debugf(ctx, "update-imported-by-counts: %d changed (%d%%)", len(changedCounts), pct) - return db.UpdateSearchDocumentsImportedByCountWithCounts(ctx, changedCounts) + return db.UpdateSearchDocumentsImportedByCountWithCounts(ctx, changedCounts, batchSize) } -// How many imported-by counts to update at a time. -// A variable for testing. -var countBatchSize = 5_000 - -func (db *DB) UpdateSearchDocumentsImportedByCountWithCounts(ctx context.Context, counts map[string]int) (nUpdated int64, err error) { +func (db *DB) UpdateSearchDocumentsImportedByCountWithCounts(ctx context.Context, counts map[string]int, batchSize int) (nUpdated int64, err error) { defer derrors.WrapStack(&err, "UpdateSearchDocumentsImportedByCountWithCounts") for len(counts) > 0 { var nu int64 err := db.db.Transact(ctx, sql.LevelDefault, func(tx *database.DB) error { - if err := insertImportedByCounts(ctx, tx, counts, countBatchSize); err != nil { + if err := insertImportedByCounts(ctx, tx, counts, batchSize); err != nil { return err } nu, err = updateImportedByCounts(ctx, tx) diff --git a/internal/postgres/search_test.go b/internal/postgres/search_test.go index 1c24b7fb..007a2ead 100644 --- a/internal/postgres/search_test.go +++ b/internal/postgres/search_test.go @@ -442,7 +442,7 @@ func TestSearch(t *testing.T) { for _, m := range test.modules { MustInsertModule(ctx, t, testDB, m) } - if _, err := testDB.UpdateSearchDocumentsImportedByCount(ctx); err != nil { + if _, err := testDB.UpdateSearchDocumentsImportedByCount(ctx, 100); err != nil { t.Fatal(err) } guardTestResult := resultGuard(t, test.resultOrder) @@ -534,7 +534,7 @@ func TestSearchErrors(t *testing.T) { for _, v := range modules { MustInsertModule(ctx, t, testDB, v) } - if _, err := testDB.UpdateSearchDocumentsImportedByCount(ctx); err != nil { + if _, err := testDB.UpdateSearchDocumentsImportedByCount(ctx, 100); err != nil { t.Fatal(err) } guardTestResult := resultGuard(t, test.resultOrder) @@ -1083,9 +1083,9 @@ func TestUpdateSearchDocumentsImportedByCount(t *testing.T) { return m } - updateImportedByCount := func(db *DB) { + updateImportedByCount := func(db *DB, batchSize int) { t.Helper() - if _, err := db.UpdateSearchDocumentsImportedByCount(ctx); err != nil { + if _, err := db.UpdateSearchDocumentsImportedByCount(ctx, batchSize); err != nil { t.Fatal(err) } } @@ -1111,19 +1111,19 @@ func TestUpdateSearchDocumentsImportedByCount(t *testing.T) { // Test imported_by_count = 0 when only pkgA is added. mA := insertPackageVersion(t, testDB, "A", "v1.0.0", nil) - updateImportedByCount(testDB) + updateImportedByCount(testDB, 100) _ = validateImportedByCountAndGetSearchDocument(t, testDB, pkgPath(mA), 0) // Test imported_by_count = 1 for pkgA when pkgB is added. mB := insertPackageVersion(t, testDB, "B", "v1.0.0", []string{"A"}) - updateImportedByCount(testDB) + updateImportedByCount(testDB, 100) _ = validateImportedByCountAndGetSearchDocument(t, testDB, pkgPath(mA), 1) sdB := validateImportedByCountAndGetSearchDocument(t, testDB, pkgPath(mB), 0) wantSearchDocBUpdatedAt := sdB.importedByCountUpdatedAt // Test imported_by_count = 2 for pkgA, when C is added. mC := insertPackageVersion(t, testDB, "C", "v1.0.0", []string{"A"}) - updateImportedByCount(testDB) + updateImportedByCount(testDB, 100) sdA := validateImportedByCountAndGetSearchDocument(t, testDB, pkgPath(mA), 2) sdC := validateImportedByCountAndGetSearchDocument(t, testDB, pkgPath(mC), 0) @@ -1146,13 +1146,13 @@ func TestUpdateSearchDocumentsImportedByCount(t *testing.T) { // because imports_unique only records the latest version of each package. mD := insertPackageVersion(t, testDB, "D", "v1.0.0", nil) insertPackageVersion(t, testDB, "A", "v0.9.0", []string{"D"}) - updateImportedByCount(testDB) + updateImportedByCount(testDB, 100) _ = validateImportedByCountAndGetSearchDocument(t, testDB, pkgPath(mA), 2) _ = validateImportedByCountAndGetSearchDocument(t, testDB, pkgPath(mD), 0) // When a newer version of A imports D, however, the counts do change. insertPackageVersion(t, testDB, "A", "v1.1.0", []string{"D"}) - updateImportedByCount(testDB) + updateImportedByCount(testDB, 100) _ = validateImportedByCountAndGetSearchDocument(t, testDB, pkgPath(mA), 2) _ = validateImportedByCountAndGetSearchDocument(t, testDB, pkgPath(mD), 1) }) @@ -1198,13 +1198,10 @@ func TestUpdateSearchDocumentsImportedByCount(t *testing.T) { MustInsertModule(ctx, t, testDB, mAlt) // Although B is imported by two packages, only one is in search_documents, so its // imported-by count is 1. - updateImportedByCount(testDB) + updateImportedByCount(testDB, 100) validateImportedByCountAndGetSearchDocument(t, testDB, "mod.com/B/B", 1) }) t.Run("multiple", func(t *testing.T) { - defer func(old int) { countBatchSize = old }(countBatchSize) - countBatchSize = 1 - testDB, release := acquire(t) defer release() @@ -1215,7 +1212,7 @@ func TestUpdateSearchDocumentsImportedByCount(t *testing.T) { insertPackageVersion(t, testDB, "D", "v1.0.0", []string{"A"}) insertPackageVersion(t, testDB, "E", "v1.0.0", []string{"B"}) - updateImportedByCount(testDB) + updateImportedByCount(testDB, 1) _ = validateImportedByCountAndGetSearchDocument(t, testDB, pkgPath(mA), 2) _ = validateImportedByCountAndGetSearchDocument(t, testDB, pkgPath(mB), 1) }) @@ -1364,6 +1361,7 @@ func TestHllZeros(t *testing.T) { } func TestGroupSearchResults(t *testing.T) { + t.Parallel() for _, test := range []struct { name string in, want []*SearchResult @@ -1502,6 +1500,7 @@ func TestGroupSearchResults(t *testing.T) { } func TestGroupAndMajorVersion(t *testing.T) { + t.Parallel() for _, test := range []struct { in SearchResult wantSeries string |
