diff options
| author | Jonathan Amsterdam <jba@google.com> | 2020-01-31 18:31:13 -0500 |
|---|---|---|
| committer | Julie Qiu <julie@golang.org> | 2020-03-27 16:46:52 -0400 |
| commit | b792d1667ec0765b9d4434de09bd2fd7de767aef (patch) | |
| tree | 5aac09b7933231963cf276958ea03c474219f026 /internal/postgres | |
| parent | 8f46ad8e27ae62db046b48d61f56925b9cc1def9 (diff) | |
| download | go-x-pkgsite-b792d1667ec0765b9d4434de09bd2fd7de767aef.tar.xz | |
internal/postgres: limit imported-by counts to packages in search_documents
When computing imported-by counts, only consider packages
that are in the search_documents table.
Fixes b/148690268.
Change-Id: Id4cb3bce92dff45fc6a5384f58f3578e6cb7ee09
Reviewed-on: https://team-review.git.corp.google.com/c/golang/discovery/+/653252
Reviewed-by: Julie Qiu <julieqiu@google.com>
Diffstat (limited to 'internal/postgres')
| -rw-r--r-- | internal/postgres/search.go | 31 | ||||
| -rw-r--r-- | internal/postgres/search_test.go | 138 |
2 files changed, 118 insertions, 51 deletions
diff --git a/internal/postgres/search.go b/internal/postgres/search.go index 3d40d5f9..4d5a0dc1 100644 --- a/internal/postgres/search.go +++ b/internal/postgres/search.go @@ -654,7 +654,11 @@ func (db *DB) getSearchDocument(ctx context.Context, path string) (*searchDocume func (db *DB) UpdateSearchDocumentsImportedByCount(ctx context.Context) (nUpdated int64, err error) { defer derrors.Wrap(&err, "UpdateSearchDocumentsImportedByCount(ctx)") - counts, err := db.computeImportedByCounts(ctx) + searchPackages, err := db.getSearchPackages(ctx) + if err != nil { + return 0, err + } + counts, err := db.computeImportedByCounts(ctx, searchPackages) if err != nil { return 0, err } @@ -671,7 +675,26 @@ func (db *DB) UpdateSearchDocumentsImportedByCount(ctx context.Context) (nUpdate return nUpdated, err } -func (db *DB) computeImportedByCounts(ctx context.Context) (counts map[string]int, err error) { +// getSearchPackages returns the set of package paths that are in the search_documents table. +func (db *DB) getSearchPackages(ctx context.Context) (set map[string]bool, err error) { + defer derrors.Wrap(&err, "DB.getSearchPackages(ctx)") + + set = map[string]bool{} + err = db.db.RunQuery(ctx, `SELECT package_path FROM search_documents`, func(rows *sql.Rows) error { + var p string + if err := rows.Scan(&p); err != nil { + return err + } + set[p] = true + return nil + }) + if err != nil { + return nil, err + } + return set, nil +} + +func (db *DB) computeImportedByCounts(ctx context.Context, searchDocsPackages map[string]bool) (counts map[string]int, err error) { defer derrors.Wrap(&err, "db.computeImportedByCounts(ctx)") counts = map[string]int{} @@ -694,6 +717,10 @@ func (db *DB) computeImportedByCounts(ctx context.Context) (counts map[string]in if err := rows.Scan(&from, &fromMod, &to); err != nil { return nil, err } + // Don't count an importer if it's not in search_documents. + if !searchDocsPackages[from] { + continue + } // Don't count an importer if it's in the same module as what it's importing. // Approximate that check by seeing if from_module_path is a prefix of to_path. // (In some cases, e.g. when to_path is in a nested module, that is not correct.) diff --git a/internal/postgres/search_test.go b/internal/postgres/search_test.go index 724c37ea..0ca9ffe5 100644 --- a/internal/postgres/search_test.go +++ b/internal/postgres/search_test.go @@ -12,6 +12,7 @@ import ( "io" "math" "sort" + "strings" "testing" "time" @@ -19,6 +20,7 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "go.opencensus.io/stats/view" "golang.org/x/discovery/internal" + "golang.org/x/discovery/internal/derrors" "golang.org/x/discovery/internal/testing/sample" ) @@ -621,25 +623,25 @@ func TestUpsertSearchDocumentVersionHasGoMod(t *testing.T) { } func TestUpdateSearchDocumentsImportedByCount(t *testing.T) { - defer ResetTestDB(testDB, t) - ctx, cancel := context.WithTimeout(context.Background(), testTimeout) defer cancel() - insertPackageVersion := func(pkg *internal.Package, version string) { + insertPackageVersion := func(pkg *internal.Package, version string) string { + // insert pkg at version, return its module path t.Helper() v := sample.Version() v.Packages = []*internal.Package{pkg} v.ModulePath = v.ModulePath + pkg.Path v.Version = version if err := testDB.InsertVersion(ctx, v); err != nil { - t.Fatalf("testDB.InsertVersionAndSearchDocuments(%q %q): %v", v.ModulePath, v.Version, err) + t.Fatal(err) } + return v.ModulePath } updateImportedByCount := func() { t.Helper() if _, err := testDB.UpdateSearchDocumentsImportedByCount(ctx); err != nil { - t.Fatalf("testDB.UpdateSearchDocumentsImportedByCount(ctx): %v", err) + t.Fatal(err) } } validateImportedByCountAndGetSearchDocument := func(path string, count int) *searchDocument { @@ -657,57 +659,95 @@ func TestUpdateSearchDocumentsImportedByCount(t *testing.T) { return sd } - // Test imported_by_count = 0 when only pkgA is added. - pkgA := &internal.Package{Path: "A", Name: "A"} - insertPackageVersion(pkgA, "v1.0.0") - updateImportedByCount() - _ = validateImportedByCountAndGetSearchDocument(pkgA.Path, 0) + t.Run("basic", func(t *testing.T) { + defer ResetTestDB(testDB, t) - // Test imported_by_count = 1 for pkgA when pkgB is added. - pkgB := &internal.Package{Path: "B", Name: "B", Imports: []string{"A"}} - insertPackageVersion(pkgB, "v1.0.0") - updateImportedByCount() - _ = validateImportedByCountAndGetSearchDocument(pkgA.Path, 1) - sdB := validateImportedByCountAndGetSearchDocument(pkgB.Path, 0) - wantSearchDocBUpdatedAt := sdB.importedByCountUpdatedAt + // Test imported_by_count = 0 when only pkgA is added. + pkgA := &internal.Package{Path: "A", Name: "A"} + insertPackageVersion(pkgA, "v1.0.0") + updateImportedByCount() + _ = validateImportedByCountAndGetSearchDocument(pkgA.Path, 0) - // Test imported_by_count = 2 for pkgA, when C is added. - pkgC := &internal.Package{Path: "C", Name: "C", Imports: []string{"A"}} - insertPackageVersion(pkgC, "v1.0.0") - updateImportedByCount() - sdA := validateImportedByCountAndGetSearchDocument(pkgA.Path, 2) - sdC := validateImportedByCountAndGetSearchDocument(pkgC.Path, 0) + // Test imported_by_count = 1 for pkgA when pkgB is added. + pkgB := &internal.Package{Path: "B", Name: "B", Imports: []string{"A"}} + insertPackageVersion(pkgB, "v1.0.0") + updateImportedByCount() + _ = validateImportedByCountAndGetSearchDocument(pkgA.Path, 1) + sdB := validateImportedByCountAndGetSearchDocument(pkgB.Path, 0) + wantSearchDocBUpdatedAt := sdB.importedByCountUpdatedAt - // Nothing imports C, so it has never been updated. - if !sdC.importedByCountUpdatedAt.IsZero() { - t.Fatalf("pkgC imported_by_count_updated_at should be zero, but is %v", sdC.importedByCountUpdatedAt) - } - if sdA.importedByCountUpdatedAt.IsZero() { - t.Fatal("pkgA imported_by_count_updated_at should be non-zero, but is zero") - } + // Test imported_by_count = 2 for pkgA, when C is added. + pkgC := &internal.Package{Path: "C", Name: "C", Imports: []string{"A"}} + insertPackageVersion(pkgC, "v1.0.0") + updateImportedByCount() + sdA := validateImportedByCountAndGetSearchDocument(pkgA.Path, 2) + sdC := validateImportedByCountAndGetSearchDocument(pkgC.Path, 0) - // Test imported_by_count_updated_at for B has not changed. - sdB = validateImportedByCountAndGetSearchDocument(pkgB.Path, 0) - if sdB.importedByCountUpdatedAt != wantSearchDocBUpdatedAt { - t.Fatalf("expected imported_by_count_updated_at for pkgB not to have changed; old = %v, new = %v", wantSearchDocBUpdatedAt, sdB.importedByCountUpdatedAt) - } + // Nothing imports C, so it has never been updated. + if !sdC.importedByCountUpdatedAt.IsZero() { + t.Fatalf("pkgC imported_by_count_updated_at should be zero, but is %v", sdC.importedByCountUpdatedAt) + } + if sdA.importedByCountUpdatedAt.IsZero() { + t.Fatal("pkgA imported_by_count_updated_at should be non-zero, but is zero") + } + + // Test imported_by_count_updated_at for B has not changed. + sdB = validateImportedByCountAndGetSearchDocument(pkgB.Path, 0) + if sdB.importedByCountUpdatedAt != wantSearchDocBUpdatedAt { + t.Fatalf("expected imported_by_count_updated_at for pkgB not to have changed; old = %v, new = %v", + wantSearchDocBUpdatedAt, sdB.importedByCountUpdatedAt) + } + + // When an older version of A imports D, nothing happens to the counts, + // because imports_unique only records the latest version of each package. + pkgD := &internal.Package{Path: "D", Name: "D"} + insertPackageVersion(pkgD, "v1.0.0") + pkgA.Imports = []string{"D"} + insertPackageVersion(pkgA, "v0.9.0") + updateImportedByCount() + _ = validateImportedByCountAndGetSearchDocument(pkgA.Path, 2) + _ = validateImportedByCountAndGetSearchDocument(pkgD.Path, 0) + + // When a newer version of A imports D, however, the counts do change. + insertPackageVersion(pkgA, "v1.1.0") + updateImportedByCount() + _ = validateImportedByCountAndGetSearchDocument(pkgA.Path, 2) + _ = validateImportedByCountAndGetSearchDocument(pkgD.Path, 1) + }) + + t.Run("alternative", func(t *testing.T) { + // Test with alternative modules that are removed from search_documents. + defer ResetTestDB(testDB, t) + + insertPackageVersion(&internal.Package{Path: "B", Name: "B"}, "v1.0.0") - // When an older version of A imports D, nothing happens to the counts, - // because imports_unique only records the latest version of each package. - pkgD := &internal.Package{Path: "D", Name: "D"} - insertPackageVersion(pkgD, "v1.0.0") - pkgA.Imports = []string{"D"} - insertPackageVersion(pkgA, "v0.9.0") - updateImportedByCount() - _ = validateImportedByCountAndGetSearchDocument(pkgA.Path, 2) - _ = validateImportedByCountAndGetSearchDocument(pkgD.Path, 0) + // Insert a package with the canonical module path. + pkgA := &internal.Package{Path: "A", Name: "A", Imports: []string{"B"}} + canonicalModulePath := insertPackageVersion(pkgA, "v1.0.0") - // When a newer version of A imports D, however, the counts do change. - insertPackageVersion(pkgA, "v1.1.0") - updateImportedByCount() - _ = validateImportedByCountAndGetSearchDocument(pkgA.Path, 2) - _ = validateImportedByCountAndGetSearchDocument(pkgD.Path, 1) + // Image we see a package with an alternative path at v1.2.0. + // We add that information to module_version_states. + alternativeModulePath := strings.ToLower(canonicalModulePath) + alternativeStatus := derrors.ToHTTPStatus(derrors.AlternativeModule) + err := testDB.UpsertModuleVersionState(ctx, alternativeModulePath, "v1.2.0", "", + time.Now(), alternativeStatus, canonicalModulePath, nil) + if err != nil { + t.Fatal(err) + } + + // Now we see an earlier version of that package, without a go.mod file, so we insert it. + // It should not get inserted into search_documents. + pkga := &internal.Package{Path: "a", Name: "A", Imports: []string{"B"}} + mp := insertPackageVersion(pkga, "v1.0.0") + if mp != alternativeModulePath { + t.Fatal("bad alternativeModulePath") + } + // Although B is imported by two packages, only one is in search_documents, so its + // imported-by count is 1. + updateImportedByCount() + validateImportedByCountAndGetSearchDocument("B", 1) + }) } func TestGetPackagesForSearchDocumentUpsert(t *testing.T) { |
