diff options
| author | Jean Barkhuysen <jean.barkhuysen@gmail.com> | 2025-07-02 09:56:03 -0400 |
|---|---|---|
| committer | Jonathan Amsterdam <jba@google.com> | 2025-08-14 08:09:23 -0700 |
| commit | ed9d06afb3a5d788c2bd4d6eef1b2a667cff4782 (patch) | |
| tree | 671ef790c1193b1f2cefbb002af8f94337f961e2 | |
| parent | 259676f24e15489ed57d5b99fe83c18d411d728f (diff) | |
| download | go-x-pkgsite-ed9d06afb3a5d788c2bd4d6eef1b2a667cff4782.tar.xz | |
internal/postgres: apply bypassLicenseCheck to IsRedistributable column
This enables symbol search for non-LICENSE'd repos when -bypass_license_check is supplied.
I think the way this works is that when these get insert into units, we don't consider bypassLicenseCheck, and the IsRedistributable column gets set to false. That all seems reasonable by itself.
But later, in upsertSymbolSearchDocuments, we only upsert symbol docs for units that are redistributable. That check happens in SQL, so it's hard to factor in the bypassLicenseCheck flag.
It's not really clear to me whether we should "pollute" the database with this column set to true under the flag. If you run the program again with the flag turned off, your database keeps the values set under the old run. Anyways, I think this CL is probably the pragmatic take: it's a small code change that achieves the desired result without added complexity, and it's unlikely that anyone will run into the undesired outcome I describe above (who is running this program sometimes with the flag on, sometimes with it off?).
Change-Id: Ie15bcf0605b945e4af6574abf689827ea946ddae
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/685457
Reviewed-by: Jonathan Amsterdam <jba@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
kokoro-CI: kokoro <noreply+kokoro@google.com>
Commit-Queue: Jonathan Amsterdam <jba@google.com>
| -rw-r--r-- | internal/postgres/insert_module.go | 2 | ||||
| -rw-r--r-- | internal/postgres/licenses_test.go | 4 | ||||
| -rw-r--r-- | internal/postgres/search.go | 2 | ||||
| -rw-r--r-- | internal/postgres/search_test.go | 210 | ||||
| -rw-r--r-- | internal/postgres/unit_test.go | 76 |
5 files changed, 157 insertions, 137 deletions
diff --git a/internal/postgres/insert_module.go b/internal/postgres/insert_module.go index a97dcd03..a87f42d5 100644 --- a/internal/postgres/insert_module.go +++ b/internal/postgres/insert_module.go @@ -393,7 +393,7 @@ func (pdb *DB) insertUnits(ctx context.Context, tx *database.DB, u.Name, pq.Array(licenseTypes), pq.Array(licensePaths), - u.IsRedistributable, + pdb.bypassLicenseCheck || u.IsRedistributable, ) if u.Readme != nil { pathToReadme[u.Path] = u.Readme diff --git a/internal/postgres/licenses_test.go b/internal/postgres/licenses_test.go index 300d5c52..7105647a 100644 --- a/internal/postgres/licenses_test.go +++ b/internal/postgres/licenses_test.go @@ -196,7 +196,9 @@ func TestGetLicensesBypass(t *testing.T) { } // Read with license bypass includes non-redistributable license contents. - check(true, sample.NonRedistributableLicense) + redist := *sample.NonRedistributableLicense + redist.Contents = []uint8{} + check(true, &redist) // Read without license bypass does not include non-redistributable license contents. nonRedist := *sample.NonRedistributableLicense diff --git a/internal/postgres/search.go b/internal/postgres/search.go index c01ac661..4015aa73 100644 --- a/internal/postgres/search.go +++ b/internal/postgres/search.go @@ -731,7 +731,7 @@ func (db *DB) GetPackagesForSearchDocumentUpsert(ctx context.Context, before tim a UpsertSearchDocumentArgs redist bool ) - if err := rows.Scan(&a.PackagePath, &a.ModulePath, &a.Version, &a.Synopsis, &redist, + if err := rows.Scan(&a.PackagePath, &a.ModulePath, &a.Version, database.NullIsEmpty(&a.Synopsis), &redist, database.NullIsEmpty(&a.ReadmeFilePath), database.NullIsEmpty(&a.ReadmeContents)); err != nil { return err } diff --git a/internal/postgres/search_test.go b/internal/postgres/search_test.go index 208d941a..b3990865 100644 --- a/internal/postgres/search_test.go +++ b/internal/postgres/search_test.go @@ -823,29 +823,34 @@ func TestExcludedFromSearch(t *testing.T) { } func TestSearchBypass(t *testing.T) { - t.Parallel() - testDB, release := acquire(t) - defer release() - ctx := context.Background() - bypassDB := NewBypassingLicenseCheck(testDB.db) - - m := nonRedistributableModule() - MustInsertModule(ctx, t, bypassDB, m) - for _, test := range []struct { - db *DB + bypass bool wantEmpty bool }{ - {testDB, true}, - {bypassDB, false}, + {false, true}, + {true, false}, } { - rs, err := test.db.Search(ctx, m.ModulePath, SearchOptions{MaxResults: 10}) - if err != nil { - t.Fatal(err) - } - if got := (rs[0].Synopsis == ""); got != test.wantEmpty { - t.Errorf("bypass %t: got empty %t, want %t", test.db == bypassDB, got, test.wantEmpty) - } + t.Run(fmt.Sprintf("bypass %t", test.bypass), func(t *testing.T) { + t.Parallel() + testDB, release := acquire(t) + defer release() + ctx := context.Background() + + if test.bypass { + testDB = NewBypassingLicenseCheck(testDB.db) + } + + m := nonRedistributableModule() + MustInsertModule(ctx, t, testDB, m) + + rs, err := testDB.Search(ctx, m.ModulePath, SearchOptions{MaxResults: 10}) + if err != nil { + t.Fatal(err) + } + if got := (rs[0].Synopsis == ""); got != test.wantEmpty { + t.Errorf("got empty %t, want %t", got, test.wantEmpty) + } + }) } } @@ -1219,90 +1224,99 @@ func TestUpdateSearchDocumentsImportedByCount(t *testing.T) { } func TestGetPackagesForSearchDocumentUpsert(t *testing.T) { - t.Parallel() - testDB, release := acquire(t) - defer release() - ctx := context.Background() + for _, test := range []struct{ bypass bool }{{false}, {true}} { + t.Run(fmt.Sprintf("bypass %t", test.bypass), func(t *testing.T) { + ctx := context.Background() - moduleA := sample.Module("mod.com", "v1.2.3", - "A", "A/notinternal", "A/internal", "A/internal/B") + moduleA := sample.Module("mod.com", "v1.2.3", + "A", "A/notinternal", "A/internal", "A/internal/B") - // moduleA.Units[1] is mod.com/A. - moduleA.Units[1].Readme = &internal.Readme{ - Filepath: sample.ReadmeFilePath, - Contents: sample.ReadmeContents, - } - // moduleA.Units[2] is mod.com/A/notinternal. - moduleA.Units[2].Readme = &internal.Readme{ - Filepath: sample.ReadmeFilePath, - Contents: sample.ReadmeContents, - } - moduleN := nonRedistributableModule() - bypassDB := NewBypassingLicenseCheck(testDB.db) - for _, m := range []*internal.Module{moduleA, moduleN} { - MustInsertModule(ctx, t, bypassDB, m) - } + // moduleA.Units[1] is mod.com/A. + moduleA.Units[1].Readme = &internal.Readme{ + Filepath: sample.ReadmeFilePath, + Contents: sample.ReadmeContents, + } + // moduleA.Units[2] is mod.com/A/notinternal. + moduleA.Units[2].Readme = &internal.Readme{ + Filepath: sample.ReadmeFilePath, + Contents: sample.ReadmeContents, + } + moduleN := nonRedistributableModule() - // We are asking for all packages in search_documents updated before now, which is - // all the non-internal packages. - got, err := testDB.GetPackagesForSearchDocumentUpsert(ctx, time.Now(), 10) - if err != nil { - t.Fatal(err) - } - sort.Slice(got, func(i, j int) bool { return got[i].PackagePath < got[j].PackagePath }) - want := []UpsertSearchDocumentArgs{ - { - PackagePath: moduleN.ModulePath, - ModulePath: moduleN.ModulePath, - Version: "v1.2.3", - ReadmeFilePath: "", - ReadmeContents: "", - Synopsis: "", - }, - { - PackagePath: "mod.com/A", - ModulePath: "mod.com", - Version: "v1.2.3", - ReadmeFilePath: "README.md", - ReadmeContents: "readme", - Synopsis: sample.Doc.Synopsis, - }, - { - PackagePath: "mod.com/A/notinternal", - ModulePath: "mod.com", - Version: "v1.2.3", - ReadmeFilePath: "README.md", - ReadmeContents: "readme", - Synopsis: sample.Doc.Synopsis, - }, - } - if diff := cmp.Diff(want, got); diff != "" { - t.Fatalf("testDB.GetPackagesForSearchDocumentUpsert mismatch(-want +got):\n%s", diff) - } + testDB, release := acquire(t) + defer release() - // Reading with license bypass should return the non-redistributable fields. - got, err = bypassDB.GetPackagesForSearchDocumentUpsert(ctx, time.Now(), 10) - if err != nil { - t.Fatal(err) - } - if len(got) == 0 { - t.Fatal("len(got)==0") - } - sort.Slice(got, func(i, j int) bool { return got[i].PackagePath < got[j].PackagePath }) - gm := got[0] - for _, got := range []string{gm.ReadmeFilePath, gm.ReadmeContents, gm.Synopsis} { - if got == "" { - t.Errorf("got empty field, want non-empty") - } - } + if test.bypass { + testDB = NewBypassingLicenseCheck(testDB.db) + } + MustInsertModule(ctx, t, testDB, moduleA) + MustInsertModule(ctx, t, testDB, moduleN) - // pkgPaths should be an empty slice, all packages were inserted more recently than yesterday. - got, err = testDB.GetPackagesForSearchDocumentUpsert(ctx, time.Now().Add(-24*time.Hour), 10) - if err != nil { - t.Fatal(err) - } - if len(got) != 0 { - t.Fatalf("expected testDB.GetPackagesForSearchDocumentUpsert to return an empty slice; got %v", got) + // We are asking for all packages in search_documents updated before now, which is + // all the non-internal packages. + got, err := testDB.GetPackagesForSearchDocumentUpsert(ctx, time.Now(), 10) + if err != nil { + t.Fatal(err) + } + sort.Slice(got, func(i, j int) bool { return got[i].PackagePath < got[j].PackagePath }) + wantNonRedistributable := UpsertSearchDocumentArgs{ + PackagePath: moduleN.ModulePath, + ModulePath: moduleN.ModulePath, + Version: "v1.2.3", + ReadmeFilePath: "", + ReadmeContents: "", + Synopsis: "", + } + if test.bypass { + // If we inserted with bypass mode, these exist in the database + // and will be returned by search. + wantNonRedistributable.ReadmeFilePath = "README.md" + wantNonRedistributable.ReadmeContents = "readme" + wantNonRedistributable.Synopsis = sample.Doc.Synopsis + } + want := []UpsertSearchDocumentArgs{ + wantNonRedistributable, + { + PackagePath: "mod.com/A", + ModulePath: "mod.com", + Version: "v1.2.3", + ReadmeFilePath: "README.md", + ReadmeContents: "readme", + Synopsis: sample.Doc.Synopsis, + }, + { + PackagePath: "mod.com/A/notinternal", + ModulePath: "mod.com", + Version: "v1.2.3", + ReadmeFilePath: "README.md", + ReadmeContents: "readme", + Synopsis: sample.Doc.Synopsis, + }, + } + if diff := cmp.Diff(want, got); diff != "" { + t.Fatalf("testDB.GetPackagesForSearchDocumentUpsert mismatch(-want +got):\n%s", diff) + } + + // Reading with license bypass should return the non-redistributable fields. + if test.bypass { + sort.Slice(got, func(i, j int) bool { return got[i].PackagePath < got[j].PackagePath }) + gm := got[0] + for _, got := range []string{gm.ReadmeFilePath, gm.ReadmeContents, gm.Synopsis} { + if got == "" { + t.Errorf("got empty field, want non-empty") + } + } + + // pkgPaths should be an empty slice, all packages were inserted more recently than yesterday. + got, err = testDB.GetPackagesForSearchDocumentUpsert(ctx, time.Now().Add(-24*time.Hour), 10) + if err != nil { + t.Fatal(err) + } + if len(got) != 0 { + t.Fatalf("expected testDB.GetPackagesForSearchDocumentUpsert to return an empty slice; got %v", got) + } + } + }) } } diff --git a/internal/postgres/unit_test.go b/internal/postgres/unit_test.go index 835344a9..60ebfb83 100644 --- a/internal/postgres/unit_test.go +++ b/internal/postgres/unit_test.go @@ -804,46 +804,50 @@ func subdirectories(modulePath string, suffixes []string) []*internal.PackageMet } func TestGetUnitBypass(t *testing.T) { - t.Parallel() - testDB, release := acquire(t) - defer release() - ctx := context.Background() - bypassDB := NewBypassingLicenseCheck(testDB.db) - - m := nonRedistributableModule() - MustInsertModule(ctx, t, bypassDB, m) - for _, test := range []struct { - db *DB + bypass bool wantEmpty bool }{ - {testDB, true}, - {bypassDB, false}, + {false, true}, + {true, false}, } { - pathInfo := newUnitMeta(m.ModulePath, m.ModulePath, m.Version) - d, err := test.db.GetUnit(ctx, pathInfo, internal.AllFields, internal.BuildContext{}) - if err != nil { - t.Fatal(err) - } - if got := (d.Readme == nil); got != test.wantEmpty { - t.Errorf("readme empty: got %t, want %t", got, test.wantEmpty) - } - if got := (d.Documentation == nil); got != test.wantEmpty { - t.Errorf("synopsis empty: got %t, want %t", got, test.wantEmpty) - } - if got := (d.Documentation == nil); got != test.wantEmpty { - t.Errorf("doc empty: got %t, want %t", got, test.wantEmpty) - } - if got := d.IsRedistributable; got != !test.wantEmpty { // wantEmpty iff !IsRedistributable - t.Errorf("IsRedistributable is %v: want %v", got, !test.wantEmpty) - } - pkgs := d.Subdirectories - if len(pkgs) != 1 { - t.Fatal("len(pkgs) != 1") - } - if got := (pkgs[0].Synopsis == ""); got != test.wantEmpty { - t.Errorf("synopsis empty: got %t, want %t", got, test.wantEmpty) - } + t.Run(fmt.Sprintf("bypass %t", test.bypass), func(t *testing.T) { + t.Parallel() + testDB, release := acquire(t) + defer release() + ctx := context.Background() + + if test.bypass { + testDB = NewBypassingLicenseCheck(testDB.db) + } + + m := nonRedistributableModule() + MustInsertModule(ctx, t, testDB, m) + pathInfo := newUnitMeta(m.ModulePath, m.ModulePath, m.Version) + d, err := testDB.GetUnit(ctx, pathInfo, internal.AllFields, internal.BuildContext{}) + if err != nil { + t.Fatal(err) + } + if got := (d.Readme == nil); got != test.wantEmpty { + t.Errorf("readme empty: got %t, want %t", got, test.wantEmpty) + } + if got := (d.Documentation == nil); got != test.wantEmpty { + t.Errorf("synopsis empty: got %t, want %t", got, test.wantEmpty) + } + if got := (d.Documentation == nil); got != test.wantEmpty { + t.Errorf("doc empty: got %t, want %t", got, test.wantEmpty) + } + if got := d.IsRedistributable; got != !test.wantEmpty { // wantEmpty iff !IsRedistributable + t.Errorf("IsRedistributable is %v: want %v", got, !test.wantEmpty) + } + pkgs := d.Subdirectories + if len(pkgs) != 1 { + t.Fatal("len(pkgs) != 1") + } + if got := (pkgs[0].Synopsis == ""); got != test.wantEmpty { + t.Errorf("synopsis empty: got %t, want %t", got, test.wantEmpty) + } + }) } } |
