aboutsummaryrefslogtreecommitdiff
path: root/internal/fetch
diff options
context:
space:
mode:
authorJonathan Amsterdam <jba@google.com>2021-09-01 07:22:46 -0400
committerJonathan Amsterdam <jba@google.com>2021-09-01 14:16:41 +0000
commitbb6da5216eba144f9891c1c0dd2228fdbfa45163 (patch)
tree230b553713bffd364321db456594aa70026c8dca /internal/fetch
parent6e28a50e59803c0b1f05ca9e18a38d33b9e2868b (diff)
downloadgo-x-pkgsite-bb6da5216eba144f9891c1c0dd2228fdbfa45163.tar.xz
internal/{worker/fetch}: include DB activity in load-shedding
Move the load-shedding logic to the worker and have it span both the fetch and processing of the module (as previously) as well as inserting it into the database. This is a more accurate estimation of load, since running a lot of concurrent queries definitely slows down processing. Most of the time this won't make much difference, but under high load, such as when processing multiple large modules, it will reduce DB contention and should result in greater throughput. For golang/go#48010 Change-Id: I7d0922e02d00182e867fd3b29fc284c32ecab5ee Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/346749 Trust: Jonathan Amsterdam <jba@google.com> Run-TryBot: Jonathan Amsterdam <jba@google.com> TryBot-Result: kokoro <noreply+kokoro@google.com> Reviewed-by: Julie Qiu <julie@golang.org>
Diffstat (limited to 'internal/fetch')
-rw-r--r--internal/fetch/fetch.go53
-rw-r--r--internal/fetch/fetch_test.go3
-rw-r--r--internal/fetch/load.go36
-rw-r--r--internal/fetch/loadshedding.go69
-rw-r--r--internal/fetch/loadshedding_test.go59
5 files changed, 2 insertions, 218 deletions
diff --git a/internal/fetch/fetch.go b/internal/fetch/fetch.go
index 3833379e..b7adfc09 100644
--- a/internal/fetch/fetch.go
+++ b/internal/fetch/fetch.go
@@ -41,11 +41,6 @@ var (
"Latency of a fetch request.",
stats.UnitSeconds,
)
- fetchesShedded = stats.Int64(
- "go-discovery/worker/fetch-shedded",
- "Count of shedded fetches.",
- stats.UnitDimensionless,
- )
fetchedPackages = stats.Int64(
"go-discovery/worker/fetch-package-count",
"Count of successfully fetched packages.",
@@ -76,13 +71,6 @@ var (
Aggregation: view.Count(),
Description: "Count of packages successfully fetched",
}
- // SheddedFetchCount counts the number of fetches that were shedded.
- SheddedFetchCount = &view.View{
- Name: "go-discovery/worker/fetch-shedded",
- Measure: fetchesShedded,
- Aggregation: view.Count(),
- Description: "Count of shedded fetches",
- }
)
type FetchResult struct {
@@ -98,7 +86,6 @@ type FetchResult struct {
GoModPath string
Status int
Error error
- Defer func() // caller must defer this on all code paths
Module *internal.Module
PackageVersionStates []*internal.PackageVersionState
}
@@ -108,10 +95,6 @@ type FetchResult struct {
// *internal.Module and related information.
//
// Even if err is non-nil, the result may contain useful information, like the go.mod path.
-//
-// Callers of FetchModule must
-// defer fr.Defer()
-// immediately after the call.
func FetchModule(ctx context.Context, modulePath, requestedVersion string, mg ModuleGetter, sourceClient *source.Client) (fr *FetchResult) {
start := time.Now()
defer func() {
@@ -125,7 +108,6 @@ func FetchModule(ctx context.Context, modulePath, requestedVersion string, mg Mo
fr = &FetchResult{
ModulePath: modulePath,
RequestedVersion: requestedVersion,
- Defer: func() {},
}
defer derrors.Wrap(&fr.Error, "FetchModule(%q, %q)", modulePath, requestedVersion)
@@ -151,35 +133,11 @@ func fetchModule(ctx context.Context, fr *FetchResult, mg ModuleGetter, sourceCl
fr.ResolvedVersion = info.Version
commitTime := info.Time
- var zipSize int64
- if zipLoadShedder != nil {
- var err error
- zipSize, err = getZipSize(ctx, fr.ModulePath, fr.ResolvedVersion, mg)
- if err != nil {
- return nil, err
- }
- // Load shed or mark module as too large.
- // We treat zip size as a proxy for the total memory consumed by
- // processing a module, and use it to decide whether we can currently
- // afford to process a module.
- shouldShed, deferFunc := zipLoadShedder.shouldShed(uint64(zipSize))
- fr.Defer = deferFunc
- if shouldShed {
- stats.Record(ctx, fetchesShedded.M(1))
- return nil, fmt.Errorf("%w: size=%dMi", derrors.SheddingLoad, zipSize/mib)
- }
- if zipSize > maxModuleZipSize {
- log.Warningf(ctx, "FetchModule: %s@%s zip size %dMi exceeds max %dMi",
- fr.ModulePath, fr.ResolvedVersion, zipSize/mib, maxModuleZipSize/mib)
- return nil, derrors.ModuleTooLarge
- }
- }
-
- // Proceed with the fetch.
+ // TODO(golang/go#48010): move fetch info to the worker.
fi := &FetchInfo{
ModulePath: fr.ModulePath,
Version: fr.ResolvedVersion,
- ZipSize: uint64(zipSize),
+ ZipSize: uint64(0),
Start: time.Now(),
}
startFetchInfo(fi)
@@ -268,13 +226,6 @@ func GetInfo(ctx context.Context, modulePath, requestedVersion string, mg Module
return mg.Info(ctx, modulePath, requestedVersion)
}
-func getZipSize(ctx context.Context, modulePath, resolvedVersion string, mg ModuleGetter) (_ int64, err error) {
- if modulePath == stdlib.ModulePath {
- return stdlib.EstimatedZipSize, nil
- }
- return mg.ZipSize(ctx, modulePath, resolvedVersion)
-}
-
// getGoModPath returns the module path from the go.mod file, as well as the
// contents of the file obtained from the module getter. If modulePath is the
// standard library, then the contents will be nil.
diff --git a/internal/fetch/fetch_test.go b/internal/fetch/fetch_test.go
index dd260e32..cb6c5a1b 100644
--- a/internal/fetch/fetch_test.go
+++ b/internal/fetch/fetch_test.go
@@ -118,7 +118,6 @@ func TestFetchModule(t *testing.T) {
defer cancel()
got, d := fetcher.fetch(t, true, ctx, mod, test.fetchVersion)
- defer got.Defer()
if got.Error != nil {
t.Fatalf("fetching failed: %v", got.Error)
}
@@ -129,7 +128,6 @@ func TestFetchModule(t *testing.T) {
opts := []cmp.Option{
cmpopts.IgnoreFields(internal.Documentation{}, "Source"),
cmpopts.IgnoreFields(internal.PackageVersionState{}, "Error"),
- cmpopts.IgnoreFields(FetchResult{}, "Defer"),
cmp.AllowUnexported(source.Info{}),
cmpopts.EquateEmpty(),
}
@@ -216,7 +214,6 @@ func TestFetchModule_Errors(t *testing.T) {
} {
t.Run(fmt.Sprintf("%s:%s", fetcher.name, test.name), func(t *testing.T) {
got, _ := fetcher.fetch(t, false, ctx, test.mod.mod, "")
- defer got.Defer()
if !errors.Is(got.Error, test.wantErr) {
t.Fatalf("got error = %v; wantErr = %v)", got.Error, test.wantErr)
}
diff --git a/internal/fetch/load.go b/internal/fetch/load.go
index 8c47df79..ab3135bd 100644
--- a/internal/fetch/load.go
+++ b/internal/fetch/load.go
@@ -17,7 +17,6 @@ import (
"io"
"io/fs"
"io/ioutil"
- "math"
"net/http"
"os"
"path"
@@ -26,10 +25,8 @@ import (
"go.opencensus.io/trace"
"golang.org/x/pkgsite/internal"
- "golang.org/x/pkgsite/internal/config"
"golang.org/x/pkgsite/internal/derrors"
"golang.org/x/pkgsite/internal/godoc"
- "golang.org/x/pkgsite/internal/log"
"golang.org/x/pkgsite/internal/source"
"golang.org/x/pkgsite/internal/stdlib"
)
@@ -358,36 +355,3 @@ func readFSFile(fsys fs.FS, path string, limit int64) (_ []byte, err error) {
defer f.Close()
return ioutil.ReadAll(io.LimitReader(f, limit))
}
-
-// mib is the number of bytes in a mebibyte (Mi).
-const mib = 1024 * 1024
-
-// The largest module zip size we can comfortably process.
-// We probably will OOM if we process a module whose zip is larger.
-var maxModuleZipSize int64 = math.MaxInt64
-
-func init() {
- v := config.GetEnvInt(context.Background(), "GO_DISCOVERY_MAX_MODULE_ZIP_MI", -1)
- if v > 0 {
- maxModuleZipSize = int64(v) * mib
- }
-}
-
-var zipLoadShedder *loadShedder
-
-func init() {
- ctx := context.Background()
- mebis := config.GetEnvInt(ctx, "GO_DISCOVERY_MAX_IN_FLIGHT_ZIP_MI", -1)
- if mebis > 0 {
- log.Infof(ctx, "shedding load over %dMi", mebis)
- zipLoadShedder = &loadShedder{maxSizeInFlight: uint64(mebis) * mib}
- }
-}
-
-// ZipLoadShedStats returns a snapshot of the current LoadShedStats for zip files.
-func ZipLoadShedStats() LoadShedStats {
- if zipLoadShedder != nil {
- return zipLoadShedder.stats()
- }
- return LoadShedStats{}
-}
diff --git a/internal/fetch/loadshedding.go b/internal/fetch/loadshedding.go
deleted file mode 100644
index 6f609c84..00000000
--- a/internal/fetch/loadshedding.go
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package fetch
-
-import (
- "sync"
-)
-
-type loadShedder struct {
- // The maximum size of requests that can be processed at once. If an
- // incoming request would cause sizeInFlight to exceed this value, it won't
- // be processed.
- maxSizeInFlight uint64
-
- // Protects the variables below, and also serializes shedding decisions so
- // multiple simultaneous requests are handled properly.
- mu sync.Mutex
-
- sizeInFlight uint64 // size of requests currently in progress.
- requestsInFlight int // number of request currently in progress
- requestsTotal int // total fetch requests ever seen
- requestsShed int // number of requests that were shedded
-}
-
-// shouldShed reports whether a request of size should be shed (not processed).
-// Its second return value is a function that should be deferred by the caller.
-func (ls *loadShedder) shouldShed(size uint64) (_ bool, deferFunc func()) {
- ls.mu.Lock()
- defer ls.mu.Unlock()
-
- ls.requestsTotal++
- // Shed if size exceeds our limit--except that if nothing is being
- // processed, accept this request to avoid starving it forever.
- if ls.sizeInFlight > 0 && ls.sizeInFlight+size > ls.maxSizeInFlight {
- ls.requestsShed++
- return true, func() {}
- }
- ls.sizeInFlight += size
- ls.requestsInFlight++
- return false, func() {
- ls.mu.Lock()
- defer ls.mu.Unlock()
- ls.sizeInFlight -= size
- ls.requestsInFlight--
- }
-}
-
-// LoadShedStats holds statistics about load shedding.
-type LoadShedStats struct {
- SizeInFlight uint64
- MaxSizeInFlight uint64
- RequestsInFlight int
- RequestsShed int
- RequestsTotal int
-}
-
-func (ls *loadShedder) stats() LoadShedStats {
- ls.mu.Lock()
- defer ls.mu.Unlock()
- return LoadShedStats{
- RequestsInFlight: ls.requestsInFlight,
- SizeInFlight: ls.sizeInFlight,
- MaxSizeInFlight: ls.maxSizeInFlight,
- RequestsShed: ls.requestsShed,
- RequestsTotal: ls.requestsTotal,
- }
-}
diff --git a/internal/fetch/loadshedding_test.go b/internal/fetch/loadshedding_test.go
deleted file mode 100644
index 0d84f32e..00000000
--- a/internal/fetch/loadshedding_test.go
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package fetch
-
-import (
- "math"
- "testing"
-)
-
-func TestDecideToShed(t *testing.T) {
- // With a large maxSizeInFlight, we should never decide to shed no matter
- // the size.
- ls := loadShedder{maxSizeInFlight: math.MaxUint64}
- got, d := ls.shouldShed(1e10)
- if want := false; got != want {
- t.Fatalf("got %t, want %t", got, want)
- }
- d() // reset sizeInFlight
-
- // If nothing else is in flight, accept something too large.
- ls.maxSizeInFlight = 10 * mib
- got, d = ls.shouldShed(20 * mib)
- if want := false; got != want {
- t.Fatalf("got %t, want %t", got, want)
- }
- d()
-
- got, d = ls.shouldShed(3 * mib)
- if want := false; got != want {
- t.Fatalf("got %t, want %t", got, want)
- }
-
- bytesInFlight := func() int {
- return int(ls.stats().SizeInFlight)
- }
-
- if got, want := bytesInFlight(), 3*mib; got != want {
- t.Fatalf("got %d, want %d", got, want)
- }
- got, d2 := ls.shouldShed(8 * mib) // 8 + 3 > 10; shed
- d2()
- if want := true; got != want {
- t.Fatalf("got %t, want %t", got, want)
- }
- d() // should decrement zipSizeInFlight
- if got, want := bytesInFlight(), 0; got != want {
- t.Fatalf("got %d, want %d", got, want)
- }
- got, d = ls.shouldShed(8 * mib) // 8 < 10; do not shed
- if want := false; got != want {
- t.Fatalf("got %t, want %t", got, want)
- }
- d()
- if got, want := bytesInFlight(), 0; got != want {
- t.Fatalf("got %d, want %d", got, want)
- }
-}