aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDamien Neil <dneil@google.com>2025-11-04 17:00:33 -0800
committerGopher Robot <gobot@golang.org>2026-01-15 10:35:56 -0800
commitbb7c0c717c8b3517210dce8f38cb2c91694af4e2 (patch)
treee97f7849dea3ecd44002c595c13d3d6d14cf7ca4
parent2dcaaa751295597e1f603b7488c4624db6a84d2b (diff)
downloadgo-bb7c0c717c8b3517210dce8f38cb2c91694af4e2.tar.xz
archive/zip: reduce CPU usage in index construction
Constructing the zip index (which is done once when first opening a file in an archive) can consume large amounts of CPU when processing deeply-nested directory paths. Switch to a less inefficient algorithm. Thanks to Jakub Ciolek for reporting this issue. goos: darwin goarch: arm64 pkg: archive/zip cpu: Apple M4 Pro │ /tmp/bench.0 │ /tmp/bench.1 │ │ sec/op │ sec/op vs base │ ReaderOneDeepDir-14 25983.62m ± 2% 46.01m ± 2% -99.82% (p=0.000 n=8) ReaderManyDeepDirs-14 16.221 ± 1% 2.763 ± 6% -82.96% (p=0.000 n=8) ReaderManyShallowFiles-14 130.3m ± 1% 128.8m ± 2% -1.20% (p=0.003 n=8) geomean 3.801 253.9m -93.32% Fixes #77102 Fixes CVE-2025-61728 Change-Id: I2c9c864be01b2a2769eb67fbab1b250aeb8f6c42 Reviewed-on: https://go-internal-review.googlesource.com/c/go/+/3060 Reviewed-by: Nicholas Husin <husin@google.com> Reviewed-by: Neal Patel <nealpatel@google.com> Reviewed-on: https://go-review.googlesource.com/c/go/+/736713 Auto-Submit: Michael Pratt <mpratt@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Junyang Shao <shaojunyang@google.com>
-rw-r--r--src/archive/zip/reader.go11
-rw-r--r--src/archive/zip/reader_test.go81
2 files changed, 91 insertions, 1 deletions
diff --git a/src/archive/zip/reader.go b/src/archive/zip/reader.go
index 6b57f767fc..b2a4ed6042 100644
--- a/src/archive/zip/reader.go
+++ b/src/archive/zip/reader.go
@@ -834,7 +834,16 @@ func (r *Reader) initFileList() {
continue
}
- for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) {
+ dir := name
+ for {
+ if idx := strings.LastIndex(dir, "/"); idx < 0 {
+ break
+ } else {
+ dir = dir[:idx]
+ }
+ if dirs[dir] {
+ break
+ }
dirs[dir] = true
}
diff --git a/src/archive/zip/reader_test.go b/src/archive/zip/reader_test.go
index cb8a0c2871..5ce994e4dd 100644
--- a/src/archive/zip/reader_test.go
+++ b/src/archive/zip/reader_test.go
@@ -9,6 +9,7 @@ import (
"encoding/binary"
"encoding/hex"
"errors"
+ "fmt"
"internal/obscuretestdata"
"io"
"io/fs"
@@ -1874,3 +1875,83 @@ func TestBaseOffsetPlusOverflow(t *testing.T) {
// as the section reader offset & size were < 0.
NewReader(bytes.NewReader(data), int64(len(data))+1875)
}
+
+func BenchmarkReaderOneDeepDir(b *testing.B) {
+ var buf bytes.Buffer
+ zw := NewWriter(&buf)
+
+ for i := range 4000 {
+ name := strings.Repeat("a/", i) + "data"
+ zw.CreateHeader(&FileHeader{
+ Name: name,
+ Method: Store,
+ })
+ }
+
+ if err := zw.Close(); err != nil {
+ b.Fatal(err)
+ }
+ data := buf.Bytes()
+
+ for b.Loop() {
+ zr, err := NewReader(bytes.NewReader(data), int64(len(data)))
+ if err != nil {
+ b.Fatal(err)
+ }
+ zr.Open("does-not-exist")
+ }
+}
+
+func BenchmarkReaderManyDeepDirs(b *testing.B) {
+ var buf bytes.Buffer
+ zw := NewWriter(&buf)
+
+ for i := range 2850 {
+ name := fmt.Sprintf("%x", i)
+ name = strings.Repeat("/"+name, i+1)[1:]
+
+ zw.CreateHeader(&FileHeader{
+ Name: name,
+ Method: Store,
+ })
+ }
+
+ if err := zw.Close(); err != nil {
+ b.Fatal(err)
+ }
+ data := buf.Bytes()
+
+ for b.Loop() {
+ zr, err := NewReader(bytes.NewReader(data), int64(len(data)))
+ if err != nil {
+ b.Fatal(err)
+ }
+ zr.Open("does-not-exist")
+ }
+}
+
+func BenchmarkReaderManyShallowFiles(b *testing.B) {
+ var buf bytes.Buffer
+ zw := NewWriter(&buf)
+
+ for i := range 310000 {
+ name := fmt.Sprintf("%v", i)
+ zw.CreateHeader(&FileHeader{
+ Name: name,
+ Method: Store,
+ })
+ }
+
+ if err := zw.Close(); err != nil {
+ b.Fatal(err)
+ }
+ data := buf.Bytes()
+
+ for b.Loop() {
+ zr, err := NewReader(bytes.NewReader(data), int64(len(data)))
+ if err != nil {
+ b.Fatal(err)
+ }
+ zr.Open("does-not-exist")
+ }
+}