aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEthan Lee <ethanalee@google.com>2025-11-11 15:50:21 -0500
committerEthan Lee <ethanalee@google.com>2025-11-13 13:26:16 -0800
commitd92b528acce930344e7b33009a7f6b71ba3af6c2 (patch)
treedf11a81d0ced1c17bf30b7c625aeaf1aa1234ce7
parent087c5e66582ec0797a67817d659b758bb732627f (diff)
downloadgo-x-pkgsite-d92b528acce930344e7b33009a7f6b71ba3af6c2.tar.xz
internal/postgres: use JOINs in GetModuleVersionsToClean query
- Previously, the query used EXCEPT clauses to filter module versions to clean from the database. - Now, using JOINs in the query avoids expensive sequential scans. For golang/go#76284 Change-Id: I103b99b633d57ab00f26d0b24c4d4a8993de4872 Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/719800 kokoro-CI: kokoro <noreply+kokoro@google.com> Reviewed-by: Robert Findley <rfindley@google.com> Auto-Submit: Ethan Lee <ethanalee@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
-rw-r--r--internal/postgres/clean.go45
1 files changed, 26 insertions, 19 deletions
diff --git a/internal/postgres/clean.go b/internal/postgres/clean.go
index 5310d34f..9968747d 100644
--- a/internal/postgres/clean.go
+++ b/internal/postgres/clean.go
@@ -26,25 +26,32 @@ func (db *DB) GetModuleVersionsToClean(ctx context.Context, daysOld, limit int)
// - The ones in search_documents (since the latest version of a package might be at an older version),
// - The ones that the master or main branch resolves to.
query := `
- SELECT module_path, version
- FROM modules
- WHERE version_type = 'pseudo'
- AND CURRENT_TIMESTAMP - updated_at > make_interval(days => $1)
- EXCEPT (
- SELECT p.path, l.good_version
- FROM latest_module_versions l
- INNER JOIN paths p ON p.id = l.module_path_id
- WHERE good_version != ''
- )
- EXCEPT (
- SELECT module_path, version
- FROM search_documents
- )
- EXCEPT (
- SELECT module_path, resolved_version
- FROM version_map
- WHERE requested_version IN ('master', 'main', 'dev.fuzz')
- )
+ SELECT
+ m.module_path,
+ m.version
+ FROM
+ modules m
+ LEFT JOIN
+ (
+ SELECT p.path, l.good_version
+ FROM latest_module_versions l
+ JOIN paths p ON p.id = l.module_path_id
+ WHERE l.good_version != ''
+ ) latest ON m.module_path = latest.path AND m.version = latest.good_version
+ LEFT JOIN
+ search_documents sd ON m.module_path = sd.module_path AND m.version = sd.version
+ LEFT JOIN
+ (
+ SELECT module_path, resolved_version
+ FROM version_map
+ WHERE requested_version IN ('master', 'main', 'dev.fuzz')
+ ) vm_filtered ON m.module_path = vm_filtered.module_path AND m.version = vm_filtered.resolved_version
+ WHERE
+ m.version_type = 'pseudo'
+ AND CURRENT_TIMESTAMP - m.updated_at > make_interval(days => $1)
+ AND latest.path IS NULL
+ AND sd.module_path IS NULL
+ AND vm_filtered.module_path IS NULL
LIMIT $2
`