diff options
| author | Ethan Lee <ethanalee@google.com> | 2025-11-11 15:50:21 -0500 |
|---|---|---|
| committer | Ethan Lee <ethanalee@google.com> | 2025-11-13 13:26:16 -0800 |
| commit | d92b528acce930344e7b33009a7f6b71ba3af6c2 (patch) | |
| tree | df11a81d0ced1c17bf30b7c625aeaf1aa1234ce7 | |
| parent | 087c5e66582ec0797a67817d659b758bb732627f (diff) | |
| download | go-x-pkgsite-d92b528acce930344e7b33009a7f6b71ba3af6c2.tar.xz | |
internal/postgres: use JOINs in GetModuleVersionsToClean query
- Previously, the query used EXCEPT clauses to filter module versions to clean from the database.
- Now, using JOINs in the query avoids expensive sequential scans.
For golang/go#76284
Change-Id: I103b99b633d57ab00f26d0b24c4d4a8993de4872
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/719800
kokoro-CI: kokoro <noreply+kokoro@google.com>
Reviewed-by: Robert Findley <rfindley@google.com>
Auto-Submit: Ethan Lee <ethanalee@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
| -rw-r--r-- | internal/postgres/clean.go | 45 |
1 files changed, 26 insertions, 19 deletions
diff --git a/internal/postgres/clean.go b/internal/postgres/clean.go index 5310d34f..9968747d 100644 --- a/internal/postgres/clean.go +++ b/internal/postgres/clean.go @@ -26,25 +26,32 @@ func (db *DB) GetModuleVersionsToClean(ctx context.Context, daysOld, limit int) // - The ones in search_documents (since the latest version of a package might be at an older version), // - The ones that the master or main branch resolves to. query := ` - SELECT module_path, version - FROM modules - WHERE version_type = 'pseudo' - AND CURRENT_TIMESTAMP - updated_at > make_interval(days => $1) - EXCEPT ( - SELECT p.path, l.good_version - FROM latest_module_versions l - INNER JOIN paths p ON p.id = l.module_path_id - WHERE good_version != '' - ) - EXCEPT ( - SELECT module_path, version - FROM search_documents - ) - EXCEPT ( - SELECT module_path, resolved_version - FROM version_map - WHERE requested_version IN ('master', 'main', 'dev.fuzz') - ) + SELECT + m.module_path, + m.version + FROM + modules m + LEFT JOIN + ( + SELECT p.path, l.good_version + FROM latest_module_versions l + JOIN paths p ON p.id = l.module_path_id + WHERE l.good_version != '' + ) latest ON m.module_path = latest.path AND m.version = latest.good_version + LEFT JOIN + search_documents sd ON m.module_path = sd.module_path AND m.version = sd.version + LEFT JOIN + ( + SELECT module_path, resolved_version + FROM version_map + WHERE requested_version IN ('master', 'main', 'dev.fuzz') + ) vm_filtered ON m.module_path = vm_filtered.module_path AND m.version = vm_filtered.resolved_version + WHERE + m.version_type = 'pseudo' + AND CURRENT_TIMESTAMP - m.updated_at > make_interval(days => $1) + AND latest.path IS NULL + AND sd.module_path IS NULL + AND vm_filtered.module_path IS NULL LIMIT $2 ` |
