aboutsummaryrefslogtreecommitdiff
path: root/internal/postgres
diff options
context:
space:
mode:
authorJonathan Amsterdam <jba@google.com>2020-08-21 19:31:08 -0400
committerJonathan Amsterdam <jba@google.com>2020-08-25 18:52:53 +0000
commitd25b304adb7670d05913a46af0cddcfca4e5cc02 (patch)
tree5492903ac5630ead161bdee035f134ad5a8a1f9e /internal/postgres
parent41dc6b274dec8f67939c151c82b50044b08a9fb5 (diff)
downloadgo-x-pkgsite-d25b304adb7670d05913a46af0cddcfca4e5cc02.tar.xz
internal,internal/postgres: remove non-redist data on read
The postgres package removes non-redistributable data during insertion. Also remove it when reading, unless bypassLicenseCheck is true. Handling this in the DataSource layer simplifies frontend code, which can avoid redistributability checks and knowing about the bypass flag except when it constructs its DataSource. As part of this work, move the removal of non-redistributable data to the internal package, so it can eventually be used by the proxy data source. For golang/go#39602 Change-Id: Ia1362ead917b42f844b4c4d25ade74cdcb03d4dc Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/250017 Reviewed-by: Julie Qiu <julie@golang.org>
Diffstat (limited to 'internal/postgres')
-rw-r--r--internal/postgres/details.go8
-rw-r--r--internal/postgres/directory.go16
-rw-r--r--internal/postgres/directory_test.go77
-rw-r--r--internal/postgres/insert_module.go35
-rw-r--r--internal/postgres/licenses.go16
-rw-r--r--internal/postgres/licenses_test.go61
-rw-r--r--internal/postgres/package.go3
-rw-r--r--internal/postgres/package_test.go35
-rw-r--r--internal/postgres/postgres.go4
-rw-r--r--internal/postgres/search.go12
-rw-r--r--internal/postgres/search_test.go28
11 files changed, 251 insertions, 44 deletions
diff --git a/internal/postgres/details.go b/internal/postgres/details.go
index 7a061095..936f88f7 100644
--- a/internal/postgres/details.go
+++ b/internal/postgres/details.go
@@ -69,6 +69,11 @@ func (db *DB) LegacyGetPackagesInModule(ctx context.Context, modulePath, version
if err := db.db.RunQuery(ctx, query, collect, modulePath, version); err != nil {
return nil, fmt.Errorf("DB.LegacyGetPackagesInModule(ctx, %q, %q): %w", modulePath, version, err)
}
+ if !db.bypassLicenseCheck {
+ for _, p := range packages {
+ p.RemoveNonRedistributableData()
+ }
+ }
return packages, nil
}
@@ -238,6 +243,9 @@ func (db *DB) LegacyGetModuleInfo(ctx context.Context, modulePath string, versio
}
return nil, fmt.Errorf("row.Scan(): %v", err)
}
+ if !db.bypassLicenseCheck {
+ mi.RemoveNonRedistributableData()
+ }
return &mi, nil
}
diff --git a/internal/postgres/directory.go b/internal/postgres/directory.go
index 41cb8b4d..a93bf800 100644
--- a/internal/postgres/directory.go
+++ b/internal/postgres/directory.go
@@ -75,6 +75,11 @@ func (db *DB) GetPackagesInDirectory(ctx context.Context, dirPath, modulePath, r
if len(packages) == 0 {
return nil, fmt.Errorf("directory does not contain any packages: %w", derrors.NotFound)
}
+ if !db.bypassLicenseCheck {
+ for _, p := range packages {
+ p.RemoveNonRedistributableData()
+ }
+ }
return packages, nil
}
@@ -194,6 +199,9 @@ func (db *DB) GetDirectory(ctx context.Context, path, modulePath, version string
dir.Readme = &readme
}
dir.ModuleInfo = mi
+ if !db.bypassLicenseCheck {
+ dir.RemoveNonRedistributableData()
+ }
return &dir, nil
}
@@ -309,11 +317,15 @@ func (db *DB) LegacyGetDirectory(ctx context.Context, dirPath, modulePath, versi
sort.Slice(packages, func(i, j int) bool {
return packages[i].Path < packages[j].Path
})
- return &internal.LegacyDirectory{
+ ld := &internal.LegacyDirectory{
Path: dirPath,
LegacyModuleInfo: mi,
Packages: packages,
- }, nil
+ }
+ if !db.bypassLicenseCheck {
+ ld.RemoveNonRedistributableData()
+ }
+ return ld, nil
}
func directoryColumns(fields internal.FieldSet) string {
diff --git a/internal/postgres/directory_test.go b/internal/postgres/directory_test.go
index 539db07e..ebc82745 100644
--- a/internal/postgres/directory_test.go
+++ b/internal/postgres/directory_test.go
@@ -175,6 +175,38 @@ func TestGetPackagesInDirectory(t *testing.T) {
}
}
+func TestGetPackagesInDirectoryBypass(t *testing.T) {
+ ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
+ defer cancel()
+ defer ResetTestDB(testDB, t)
+ bypassDB := NewBypassingLicenseCheck(testDB.db)
+
+ // Insert a non-redistributable module.
+ m := nonRedistributableModule()
+ if err := bypassDB.InsertModule(ctx, m); err != nil {
+ t.Fatal(err)
+ }
+
+ for _, test := range []struct {
+ db *DB
+ want string
+ }{
+ {bypassDB, sample.Synopsis}, // Reading with license bypass returns the synopsis.
+ {testDB, ""}, // Without bypass, the synopsis is empty.
+ } {
+ pkgs, err := test.db.GetPackagesInDirectory(ctx, m.ModulePath, m.ModulePath, m.Version)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatal("len(pkgs) != 1")
+ }
+ if got := pkgs[0].Synopsis; got != test.want {
+ t.Errorf("bypass %t: got %q, want %q", test.db == bypassDB, got, test.want)
+ }
+ }
+}
+
func TestLegacyGetDirectory(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
defer cancel()
@@ -595,6 +627,51 @@ func TestGetDirectory(t *testing.T) {
}
}
+func TestGetDirectoryBypass(t *testing.T) {
+ ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
+ defer cancel()
+ defer ResetTestDB(testDB, t)
+ bypassDB := NewBypassingLicenseCheck(testDB.db)
+
+ m := nonRedistributableModule()
+ if err := bypassDB.InsertModule(ctx, m); err != nil {
+ t.Fatal(err)
+ }
+
+ for _, test := range []struct {
+ db *DB
+ wantEmpty bool
+ }{
+ {testDB, true},
+ {bypassDB, false},
+ } {
+ d, err := test.db.GetDirectory(ctx, m.ModulePath, m.ModulePath, m.Version)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if got := (d.Readme == nil); got != test.wantEmpty {
+ t.Errorf("readme empty: got %t, want %t", got, test.wantEmpty)
+ }
+ if got := (d.Package.Documentation.Synopsis == ""); got != test.wantEmpty {
+ t.Errorf("synopsis empty: got %t, want %t", got, test.wantEmpty)
+ }
+ if got := (d.Package.Documentation.HTML == safehtml.HTML{}); got != test.wantEmpty {
+ t.Errorf("doc empty: got %t, want %t", got, test.wantEmpty)
+ }
+
+ ld, err := test.db.LegacyGetDirectory(ctx, m.ModulePath, m.ModulePath, m.Version, internal.AllFields)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if got := (ld.Packages[0].Synopsis == ""); got != test.wantEmpty {
+ t.Errorf("legacy synopsis empty: got %t, want %t", got, test.wantEmpty)
+ }
+ if got := (ld.Packages[0].DocumentationHTML == safehtml.HTML{}); got != test.wantEmpty {
+ t.Errorf("legacy doc empty: got %t, want %t", got, test.wantEmpty)
+ }
+ }
+}
+
func findDirectory(m *internal.Module, path string) *internal.Directory {
for _, d := range m.Directories {
if d.Path == path {
diff --git a/internal/postgres/insert_module.go b/internal/postgres/insert_module.go
index 474b6ed5..2af86f90 100644
--- a/internal/postgres/insert_module.go
+++ b/internal/postgres/insert_module.go
@@ -17,7 +17,6 @@ import (
"strings"
"unicode/utf8"
- "github.com/google/safehtml"
"github.com/lib/pq"
"go.opencensus.io/trace"
"golang.org/x/mod/module"
@@ -25,7 +24,6 @@ import (
"golang.org/x/pkgsite/internal"
"golang.org/x/pkgsite/internal/database"
"golang.org/x/pkgsite/internal/derrors"
- "golang.org/x/pkgsite/internal/licenses"
"golang.org/x/pkgsite/internal/log"
"golang.org/x/pkgsite/internal/stdlib"
"golang.org/x/pkgsite/internal/version"
@@ -64,8 +62,8 @@ func (db *DB) InsertModule(ctx context.Context, m *internal.Module) (err error)
return err
}
if !db.bypassLicenseCheck {
- // If we are bypassing license checking, remove data for non-redistributable modules.
- removeNonDistributableData(m)
+ // If we are not bypassing license checking, remove data for non-redistributable modules.
+ m.RemoveNonRedistributableData()
}
return db.saveModule(ctx, m)
}
@@ -692,35 +690,6 @@ func (db *DB) comparePaths(ctx context.Context, m *internal.Module) (err error)
return nil
}
-// removeNonDistributableData removes information from the module
-// if it is not redistributable.
-func removeNonDistributableData(m *internal.Module) {
- for _, p := range m.LegacyPackages {
- if !p.IsRedistributable {
- // Prune derived information that can't be stored.
- p.Synopsis = ""
- p.DocumentationHTML = safehtml.HTML{}
- }
- }
- if !m.IsRedistributable {
- m.LegacyReadmeFilePath = ""
- m.LegacyReadmeContents = ""
- }
- for _, d := range m.Directories {
- if !d.IsRedistributable {
- d.Readme = nil
- if d.Package != nil {
- d.Package.Documentation = nil
- }
- }
- }
- for _, l := range m.Licenses {
- if !licenses.Redistributable(l.Types) {
- l.Contents = nil
- }
- }
-}
-
// DeleteModule deletes a Version from the database.
func (db *DB) DeleteModule(ctx context.Context, modulePath, version string) (err error) {
defer derrors.Wrap(&err, "DeleteModule(ctx, db, %q, %q)", modulePath, version)
diff --git a/internal/postgres/licenses.go b/internal/postgres/licenses.go
index e1811627..55712af5 100644
--- a/internal/postgres/licenses.go
+++ b/internal/postgres/licenses.go
@@ -54,7 +54,7 @@ func (db *DB) GetLicenses(ctx context.Context, fullPath, modulePath, resolvedVer
}
defer rows.Close()
- moduleLicenses, err := collectLicenses(rows)
+ moduleLicenses, err := collectLicenses(rows, db.bypassLicenseCheck)
if err != nil {
return nil, err
}
@@ -74,6 +74,11 @@ func (db *DB) GetLicenses(ctx context.Context, fullPath, modulePath, resolvedVer
}
}
}
+ if !db.bypassLicenseCheck {
+ for _, l := range lics {
+ l.RemoveNonRedistributableData()
+ }
+ }
return lics, nil
}
@@ -99,7 +104,7 @@ func (db *DB) LegacyGetModuleLicenses(ctx context.Context, modulePath, version s
return nil, err
}
defer rows.Close()
- return collectLicenses(rows)
+ return collectLicenses(rows, db.bypassLicenseCheck)
}
// LegacyGetPackageLicenses returns all licenses associated with the given package path and
@@ -141,12 +146,12 @@ func (db *DB) LegacyGetPackageLicenses(ctx context.Context, pkgPath, modulePath,
return nil, err
}
defer rows.Close()
- return collectLicenses(rows)
+ return collectLicenses(rows, db.bypassLicenseCheck)
}
// collectLicenses converts the sql rows to a list of licenses. The columns
// must be types, file_path and contents, in that order.
-func collectLicenses(rows *sql.Rows) ([]*licenses.License, error) {
+func collectLicenses(rows *sql.Rows, bypassLicenseCheck bool) ([]*licenses.License, error) {
mustHaveColumns(rows, "types", "file_path", "contents", "coverage")
var lics []*licenses.License
for rows.Next() {
@@ -158,6 +163,9 @@ func collectLicenses(rows *sql.Rows) ([]*licenses.License, error) {
return nil, fmt.Errorf("row.Scan(): %v", err)
}
lic.Types = licenseTypes
+ if !bypassLicenseCheck {
+ lic.RemoveNonRedistributableData()
+ }
lics = append(lics, lic)
}
sort.Slice(lics, func(i, j int) bool {
diff --git a/internal/postgres/licenses_test.go b/internal/postgres/licenses_test.go
index 57dc2377..b7e64035 100644
--- a/internal/postgres/licenses_test.go
+++ b/internal/postgres/licenses_test.go
@@ -12,6 +12,7 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
+ "golang.org/x/pkgsite/internal"
"golang.org/x/pkgsite/internal/derrors"
"golang.org/x/pkgsite/internal/licenses"
"golang.org/x/pkgsite/internal/stdlib"
@@ -199,3 +200,63 @@ func TestLegacyGetPackageLicenses(t *testing.T) {
})
}
}
+
+func TestGetLicensesBypass(t *testing.T) {
+ ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
+ defer cancel()
+ defer ResetTestDB(testDB, t)
+
+ const version = "v1.2.3"
+
+ bypassDB := NewBypassingLicenseCheck(testDB.db)
+
+ // Insert with non-redistributable license contents.
+ m := nonRedistributableModule()
+ if err := bypassDB.InsertModule(ctx, m); err != nil {
+ t.Fatal(err)
+ }
+
+ // check reads and the second license in the module and compares it with want.
+ check := func(bypass, legacy bool, want *licenses.License) {
+ t.Helper()
+ db := testDB
+ if bypass {
+ db = bypassDB
+ }
+ var lics []*licenses.License
+ var err error
+ if legacy {
+ lics, err = db.LegacyGetModuleLicenses(ctx, sample.ModulePath, m.Version)
+ } else {
+ lics, err = db.GetLicenses(ctx, sample.ModulePath, sample.ModulePath, m.Version)
+ }
+ if err != nil {
+ t.Fatal(err)
+ }
+ if len(lics) != 2 {
+ t.Fatal("did not read two licenses")
+ }
+ if diff := cmp.Diff(want, lics[1]); diff != "" {
+ t.Errorf("mismatch (-want, +got):\n%s", diff)
+ }
+ }
+
+ // Read with license bypass includes non-redistributable license contents.
+ check(true, false, sample.NonRedistributableLicense)
+ check(true, true, sample.NonRedistributableLicense)
+
+ // Read without license bypass does not include non-redistributable license contents.
+ nonRedist := *sample.NonRedistributableLicense
+ nonRedist.Contents = nil
+ check(false, false, &nonRedist)
+ check(false, true, &nonRedist)
+}
+
+func nonRedistributableModule() *internal.Module {
+ m := sample.Module(sample.ModulePath, "v1.2.3", "")
+ sample.AddLicense(m, sample.NonRedistributableLicense)
+ m.IsRedistributable = false
+ m.LegacyPackages[0].IsRedistributable = false
+ m.Directories[0].IsRedistributable = false
+ return m
+}
diff --git a/internal/postgres/package.go b/internal/postgres/package.go
index 0ce65ec9..b73a50df 100644
--- a/internal/postgres/package.go
+++ b/internal/postgres/package.go
@@ -144,5 +144,8 @@ func (db *DB) LegacyGetPackage(ctx context.Context, pkgPath, modulePath, version
}
pkg.Licenses = lics
pkg.DocumentationHTML = convertDocumentation(docHTML)
+ if !db.bypassLicenseCheck {
+ pkg.RemoveNonRedistributableData()
+ }
return &pkg, nil
}
diff --git a/internal/postgres/package_test.go b/internal/postgres/package_test.go
index d210b678..9618e53d 100644
--- a/internal/postgres/package_test.go
+++ b/internal/postgres/package_test.go
@@ -240,3 +240,38 @@ func TestLegacyGetPackageInvalidArguments(t *testing.T) {
})
}
}
+
+func TestLegacyGetPackageBypass(t *testing.T) {
+ ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
+ defer cancel()
+ defer ResetTestDB(testDB, t)
+ bypassDB := NewBypassingLicenseCheck(testDB.db)
+
+ m := nonRedistributableModule()
+ if err := bypassDB.InsertModule(ctx, m); err != nil {
+ t.Fatal(err)
+ }
+
+ for _, test := range []struct {
+ db *DB
+ wantEmpty bool
+ }{
+ {testDB, true},
+ {bypassDB, false},
+ } {
+ pkg, err := test.db.LegacyGetPackage(ctx, m.ModulePath, m.ModulePath, m.Version)
+ if err != nil {
+ t.Fatal(err)
+ }
+ for _, got := range []string{
+ pkg.LegacyPackage.Synopsis,
+ pkg.LegacyPackage.DocumentationHTML.String(),
+ pkg.LegacyModuleInfo.LegacyReadmeFilePath,
+ pkg.LegacyModuleInfo.LegacyReadmeContents,
+ } {
+ if (got == "") != test.wantEmpty {
+ t.Errorf("got %q, want empty %t", got, test.wantEmpty)
+ }
+ }
+ }
+}
diff --git a/internal/postgres/postgres.go b/internal/postgres/postgres.go
index 0fe6614c..c9287954 100644
--- a/internal/postgres/postgres.go
+++ b/internal/postgres/postgres.go
@@ -19,8 +19,8 @@ func New(db *database.DB) *DB {
}
// NewBypassingLicenseCheck returns a new postgres DB that bypasses license
-// checks on insertion. That means all data will be inserted for
-// non-redistributable modules and packages.
+// checks. That means all data will be inserted and returned for
+// non-redistributable modules, packages and directories.
func NewBypassingLicenseCheck(db *database.DB) *DB {
return &DB{db, true}
}
diff --git a/internal/postgres/search.go b/internal/postgres/search.go
index 1952be1b..ded4156b 100644
--- a/internal/postgres/search.go
+++ b/internal/postgres/search.go
@@ -456,7 +456,8 @@ func (db *DB) addPackageDataToSearchResults(ctx context.Context, results []*inte
path,
name,
synopsis,
- license_types
+ license_types,
+ redistributable
FROM
packages
WHERE
@@ -465,8 +466,9 @@ func (db *DB) addPackageDataToSearchResults(ctx context.Context, results []*inte
var (
path, name, synopsis string
licenseTypes []string
+ redist bool
)
- if err := rows.Scan(&path, &name, &synopsis, pq.Array(&licenseTypes)); err != nil {
+ if err := rows.Scan(&path, &name, &synopsis, pq.Array(&licenseTypes), &redist); err != nil {
return fmt.Errorf("rows.Scan(): %v", err)
}
r, ok := resultMap[path]
@@ -474,7 +476,9 @@ func (db *DB) addPackageDataToSearchResults(ctx context.Context, results []*inte
return fmt.Errorf("BUG: unexpected package path: %q", path)
}
r.Name = name
- r.Synopsis = synopsis
+ if redist || db.bypassLicenseCheck {
+ r.Synopsis = synopsis
+ }
for _, l := range licenseTypes {
if l != "" {
r.Licenses = append(r.Licenses, l)
@@ -561,6 +565,8 @@ func UpsertSearchDocuments(ctx context.Context, db *database.DB, mod *internal.M
if isInternalPackage(pkg.Path) {
continue
}
+ // TODO(golang/go#40971): don't store synopsis unless the package is redistributable or
+ // we are bypassing license checks.
err := UpsertSearchDocument(ctx, db, upsertSearchDocumentArgs{
PackagePath: pkg.Path,
ModulePath: mod.ModulePath,
diff --git a/internal/postgres/search_test.go b/internal/postgres/search_test.go
index 14922f22..9ab6b8f8 100644
--- a/internal/postgres/search_test.go
+++ b/internal/postgres/search_test.go
@@ -652,6 +652,34 @@ func TestExcludedFromSearch(t *testing.T) {
}
}
+func TestSearchBypass(t *testing.T) {
+ ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
+ defer cancel()
+ defer ResetTestDB(testDB, t)
+ bypassDB := NewBypassingLicenseCheck(testDB.db)
+
+ m := nonRedistributableModule()
+ if err := bypassDB.InsertModule(ctx, m); err != nil {
+ t.Fatal(err)
+ }
+
+ for _, test := range []struct {
+ db *DB
+ wantEmpty bool
+ }{
+ {testDB, true},
+ {bypassDB, false},
+ } {
+ rs, err := test.db.Search(ctx, m.ModulePath, 10, 0)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if got := (rs[0].Synopsis == ""); got != test.wantEmpty {
+ t.Errorf("bypass %t: got empty %t, want %t", test.db == bypassDB, got, test.wantEmpty)
+ }
+ }
+}
+
type searchDocument struct {
packagePath string
modulePath string