diff options
| author | Damien Neil <dneil@google.com> | 2026-03-03 08:12:25 -0800 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2026-03-12 08:10:34 -0700 |
| commit | afef73a5aacdb4f921ca5fddfb03b0a29998cbfe (patch) | |
| tree | 859bf1bcd53465b761f569e55c51adab69ed18cb /src/net/http | |
| parent | 74649c1cc56b7b9a979b579feab8705378b410f0 (diff) | |
| download | go-afef73a5aacdb4f921ca5fddfb03b0a29998cbfe.tar.xz | |
net/http: move DetectContentType into net/http/internal
The http2 package needs access to DetectContentType,
so move it into a common location.
For #67810
Change-Id: Ibff3d57a4931106c2f69c5717c06bd5f6a6a6964
Reviewed-on: https://go-review.googlesource.com/c/go/+/751701
Reviewed-by: Nicholas Husin <nsh@golang.org>
Auto-Submit: Damien Neil <dneil@google.com>
Reviewed-by: Nicholas Husin <husin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/net/http')
| -rw-r--r-- | src/net/http/fs.go | 3 | ||||
| -rw-r--r-- | src/net/http/internal/sniff.go | 304 | ||||
| -rw-r--r-- | src/net/http/server.go | 5 | ||||
| -rw-r--r-- | src/net/http/sniff.go | 291 |
4 files changed, 311 insertions, 292 deletions
diff --git a/src/net/http/fs.go b/src/net/http/fs.go index 92bd94f72d..821b9b1266 100644 --- a/src/net/http/fs.go +++ b/src/net/http/fs.go @@ -14,6 +14,7 @@ import ( "io/fs" "mime" "mime/multipart" + "net/http/internal" "net/textproto" "net/url" "os" @@ -288,7 +289,7 @@ func serveContent(w ResponseWriter, r *Request, name string, modtime time.Time, ctype = mime.TypeByExtension(filepath.Ext(name)) if ctype == "" { // read a chunk to decide between utf-8 text and binary - var buf [sniffLen]byte + var buf [internal.SniffLen]byte n, _ := io.ReadFull(content, buf[:]) ctype = DetectContentType(buf[:n]) _, err := content.Seek(0, io.SeekStart) // rewind to output whole file diff --git a/src/net/http/internal/sniff.go b/src/net/http/internal/sniff.go new file mode 100644 index 0000000000..23b96e8992 --- /dev/null +++ b/src/net/http/internal/sniff.go @@ -0,0 +1,304 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package internal + +import ( + "bytes" + "encoding/binary" +) + +// The algorithm uses at most SniffLen bytes to make its decision. +const SniffLen = 512 + +// DetectContentType implements the algorithm described +// at https://mimesniff.spec.whatwg.org/ to determine the +// Content-Type of the given data. It considers at most the +// first 512 bytes of data. DetectContentType always returns +// a valid MIME type: if it cannot determine a more specific one, it +// returns "application/octet-stream". +func DetectContentType(data []byte) string { + if len(data) > SniffLen { + data = data[:SniffLen] + } + + // Index of the first non-whitespace byte in data. + firstNonWS := 0 + for ; firstNonWS < len(data) && isWS(data[firstNonWS]); firstNonWS++ { + } + + for _, sig := range sniffSignatures { + if ct := sig.match(data, firstNonWS); ct != "" { + return ct + } + } + + return "application/octet-stream" // fallback +} + +// isWS reports whether the provided byte is a whitespace byte (0xWS) +// as defined in https://mimesniff.spec.whatwg.org/#terminology. +func isWS(b byte) bool { + switch b { + case '\t', '\n', '\x0c', '\r', ' ': + return true + } + return false +} + +// isTT reports whether the provided byte is a tag-terminating byte (0xTT) +// as defined in https://mimesniff.spec.whatwg.org/#terminology. +func isTT(b byte) bool { + switch b { + case ' ', '>': + return true + } + return false +} + +type sniffSig interface { + // match returns the MIME type of the data, or "" if unknown. + match(data []byte, firstNonWS int) string +} + +// Data matching the table in section 6. +var sniffSignatures = []sniffSig{ + htmlSig("<!DOCTYPE HTML"), + htmlSig("<HTML"), + htmlSig("<HEAD"), + htmlSig("<SCRIPT"), + htmlSig("<IFRAME"), + htmlSig("<H1"), + htmlSig("<DIV"), + htmlSig("<FONT"), + htmlSig("<TABLE"), + htmlSig("<A"), + htmlSig("<STYLE"), + htmlSig("<TITLE"), + htmlSig("<B"), + htmlSig("<BODY"), + htmlSig("<BR"), + htmlSig("<P"), + htmlSig("<!--"), + &maskedSig{ + mask: []byte("\xFF\xFF\xFF\xFF\xFF"), + pat: []byte("<?xml"), + skipWS: true, + ct: "text/xml; charset=utf-8"}, + &exactSig{[]byte("%PDF-"), "application/pdf"}, + &exactSig{[]byte("%!PS-Adobe-"), "application/postscript"}, + + // UTF BOMs. + &maskedSig{ + mask: []byte("\xFF\xFF\x00\x00"), + pat: []byte("\xFE\xFF\x00\x00"), + ct: "text/plain; charset=utf-16be", + }, + &maskedSig{ + mask: []byte("\xFF\xFF\x00\x00"), + pat: []byte("\xFF\xFE\x00\x00"), + ct: "text/plain; charset=utf-16le", + }, + &maskedSig{ + mask: []byte("\xFF\xFF\xFF\x00"), + pat: []byte("\xEF\xBB\xBF\x00"), + ct: "text/plain; charset=utf-8", + }, + + // Image types + // For posterity, we originally returned "image/vnd.microsoft.icon" from + // https://tools.ietf.org/html/draft-ietf-websec-mime-sniff-03#section-7 + // https://codereview.appspot.com/4746042 + // but that has since been replaced with "image/x-icon" in Section 6.2 + // of https://mimesniff.spec.whatwg.org/#matching-an-image-type-pattern + &exactSig{[]byte("\x00\x00\x01\x00"), "image/x-icon"}, + &exactSig{[]byte("\x00\x00\x02\x00"), "image/x-icon"}, + &exactSig{[]byte("BM"), "image/bmp"}, + &exactSig{[]byte("GIF87a"), "image/gif"}, + &exactSig{[]byte("GIF89a"), "image/gif"}, + &maskedSig{ + mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF"), + pat: []byte("RIFF\x00\x00\x00\x00WEBPVP"), + ct: "image/webp", + }, + &exactSig{[]byte("\x89PNG\x0D\x0A\x1A\x0A"), "image/png"}, + &exactSig{[]byte("\xFF\xD8\xFF"), "image/jpeg"}, + + // Audio and Video types + // Enforce the pattern match ordering as prescribed in + // https://mimesniff.spec.whatwg.org/#matching-an-audio-or-video-type-pattern + &maskedSig{ + mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF"), + pat: []byte("FORM\x00\x00\x00\x00AIFF"), + ct: "audio/aiff", + }, + &maskedSig{ + mask: []byte("\xFF\xFF\xFF"), + pat: []byte("ID3"), + ct: "audio/mpeg", + }, + &maskedSig{ + mask: []byte("\xFF\xFF\xFF\xFF\xFF"), + pat: []byte("OggS\x00"), + ct: "application/ogg", + }, + &maskedSig{ + mask: []byte("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"), + pat: []byte("MThd\x00\x00\x00\x06"), + ct: "audio/midi", + }, + &maskedSig{ + mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF"), + pat: []byte("RIFF\x00\x00\x00\x00AVI "), + ct: "video/avi", + }, + &maskedSig{ + mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF"), + pat: []byte("RIFF\x00\x00\x00\x00WAVE"), + ct: "audio/wave", + }, + // 6.2.0.2. video/mp4 + mp4Sig{}, + // 6.2.0.3. video/webm + &exactSig{[]byte("\x1A\x45\xDF\xA3"), "video/webm"}, + + // Font types + &maskedSig{ + // 34 NULL bytes followed by the string "LP" + pat: []byte("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00LP"), + // 34 NULL bytes followed by \xF\xF + mask: []byte("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xFF\xFF"), + ct: "application/vnd.ms-fontobject", + }, + &exactSig{[]byte("\x00\x01\x00\x00"), "font/ttf"}, + &exactSig{[]byte("OTTO"), "font/otf"}, + &exactSig{[]byte("ttcf"), "font/collection"}, + &exactSig{[]byte("wOFF"), "font/woff"}, + &exactSig{[]byte("wOF2"), "font/woff2"}, + + // Archive types + &exactSig{[]byte("\x1F\x8B\x08"), "application/x-gzip"}, + &exactSig{[]byte("PK\x03\x04"), "application/zip"}, + // RAR's signatures are incorrectly defined by the MIME spec as per + // https://github.com/whatwg/mimesniff/issues/63 + // However, RAR Labs correctly defines it at: + // https://www.rarlab.com/technote.htm#rarsign + // so we use the definition from RAR Labs. + // TODO: do whatever the spec ends up doing. + &exactSig{[]byte("Rar!\x1A\x07\x00"), "application/x-rar-compressed"}, // RAR v1.5-v4.0 + &exactSig{[]byte("Rar!\x1A\x07\x01\x00"), "application/x-rar-compressed"}, // RAR v5+ + + &exactSig{[]byte("\x00\x61\x73\x6D"), "application/wasm"}, + + textSig{}, // should be last +} + +type exactSig struct { + sig []byte + ct string +} + +func (e *exactSig) match(data []byte, firstNonWS int) string { + if bytes.HasPrefix(data, e.sig) { + return e.ct + } + return "" +} + +type maskedSig struct { + mask, pat []byte + skipWS bool + ct string +} + +func (m *maskedSig) match(data []byte, firstNonWS int) string { + // pattern matching algorithm section 6 + // https://mimesniff.spec.whatwg.org/#pattern-matching-algorithm + + if m.skipWS { + data = data[firstNonWS:] + } + if len(m.pat) != len(m.mask) { + return "" + } + if len(data) < len(m.pat) { + return "" + } + for i, pb := range m.pat { + maskedData := data[i] & m.mask[i] + if maskedData != pb { + return "" + } + } + return m.ct +} + +type htmlSig []byte + +func (h htmlSig) match(data []byte, firstNonWS int) string { + data = data[firstNonWS:] + if len(data) < len(h)+1 { + return "" + } + for i, b := range h { + db := data[i] + if 'A' <= b && b <= 'Z' { + db &= 0xDF + } + if b != db { + return "" + } + } + // Next byte must be a tag-terminating byte(0xTT). + if !isTT(data[len(h)]) { + return "" + } + return "text/html; charset=utf-8" +} + +var mp4ftype = []byte("ftyp") +var mp4 = []byte("mp4") + +type mp4Sig struct{} + +func (mp4Sig) match(data []byte, firstNonWS int) string { + // https://mimesniff.spec.whatwg.org/#signature-for-mp4 + // c.f. section 6.2.1 + if len(data) < 12 { + return "" + } + boxSize := int(binary.BigEndian.Uint32(data[:4])) + if len(data) < boxSize || boxSize%4 != 0 { + return "" + } + if !bytes.Equal(data[4:8], mp4ftype) { + return "" + } + for st := 8; st < boxSize; st += 4 { + if st == 12 { + // Ignores the four bytes that correspond to the version number of the "major brand". + continue + } + if bytes.Equal(data[st:st+3], mp4) { + return "video/mp4" + } + } + return "" +} + +type textSig struct{} + +func (textSig) match(data []byte, firstNonWS int) string { + // c.f. section 5, step 4. + for _, b := range data[firstNonWS:] { + switch { + case b <= 0x08, + b == 0x0B, + 0x0E <= b && b <= 0x1A, + 0x1C <= b && b <= 0x1F: + return "" + } + } + return "text/plain; charset=utf-8" +} diff --git a/src/net/http/server.go b/src/net/http/server.go index fb167ac7a1..902d6d3367 100644 --- a/src/net/http/server.go +++ b/src/net/http/server.go @@ -19,6 +19,7 @@ import ( "maps" "math/rand/v2" "net" + "net/http/internal" "net/textproto" "net/url" urlpkg "net/url" @@ -603,9 +604,9 @@ func (w *response) ReadFrom(src io.Reader) (n int64, err error) { // source is available (see golang.org/issue/5660) and provides // enough bytes to perform Content-Type sniffing when required. if !w.cw.wroteHeader { - n0, err := io.CopyBuffer(writerOnly{w}, io.LimitReader(src, sniffLen), buf) + n0, err := io.CopyBuffer(writerOnly{w}, io.LimitReader(src, internal.SniffLen), buf) n += n0 - if err != nil || n0 < sniffLen { + if err != nil || n0 < internal.SniffLen { return n, err } } diff --git a/src/net/http/sniff.go b/src/net/http/sniff.go index ac18ab979d..dee63ed4ad 100644 --- a/src/net/http/sniff.go +++ b/src/net/http/sniff.go @@ -4,13 +4,7 @@ package http -import ( - "bytes" - "encoding/binary" -) - -// The algorithm uses at most sniffLen bytes to make its decision. -const sniffLen = 512 +import "net/http/internal" // DetectContentType implements the algorithm described // at https://mimesniff.spec.whatwg.org/ to determine the @@ -19,286 +13,5 @@ const sniffLen = 512 // a valid MIME type: if it cannot determine a more specific one, it // returns "application/octet-stream". func DetectContentType(data []byte) string { - if len(data) > sniffLen { - data = data[:sniffLen] - } - - // Index of the first non-whitespace byte in data. - firstNonWS := 0 - for ; firstNonWS < len(data) && isWS(data[firstNonWS]); firstNonWS++ { - } - - for _, sig := range sniffSignatures { - if ct := sig.match(data, firstNonWS); ct != "" { - return ct - } - } - - return "application/octet-stream" // fallback -} - -// isWS reports whether the provided byte is a whitespace byte (0xWS) -// as defined in https://mimesniff.spec.whatwg.org/#terminology. -func isWS(b byte) bool { - switch b { - case '\t', '\n', '\x0c', '\r', ' ': - return true - } - return false -} - -// isTT reports whether the provided byte is a tag-terminating byte (0xTT) -// as defined in https://mimesniff.spec.whatwg.org/#terminology. -func isTT(b byte) bool { - switch b { - case ' ', '>': - return true - } - return false -} - -type sniffSig interface { - // match returns the MIME type of the data, or "" if unknown. - match(data []byte, firstNonWS int) string -} - -// Data matching the table in section 6. -var sniffSignatures = []sniffSig{ - htmlSig("<!DOCTYPE HTML"), - htmlSig("<HTML"), - htmlSig("<HEAD"), - htmlSig("<SCRIPT"), - htmlSig("<IFRAME"), - htmlSig("<H1"), - htmlSig("<DIV"), - htmlSig("<FONT"), - htmlSig("<TABLE"), - htmlSig("<A"), - htmlSig("<STYLE"), - htmlSig("<TITLE"), - htmlSig("<B"), - htmlSig("<BODY"), - htmlSig("<BR"), - htmlSig("<P"), - htmlSig("<!--"), - &maskedSig{ - mask: []byte("\xFF\xFF\xFF\xFF\xFF"), - pat: []byte("<?xml"), - skipWS: true, - ct: "text/xml; charset=utf-8"}, - &exactSig{[]byte("%PDF-"), "application/pdf"}, - &exactSig{[]byte("%!PS-Adobe-"), "application/postscript"}, - - // UTF BOMs. - &maskedSig{ - mask: []byte("\xFF\xFF\x00\x00"), - pat: []byte("\xFE\xFF\x00\x00"), - ct: "text/plain; charset=utf-16be", - }, - &maskedSig{ - mask: []byte("\xFF\xFF\x00\x00"), - pat: []byte("\xFF\xFE\x00\x00"), - ct: "text/plain; charset=utf-16le", - }, - &maskedSig{ - mask: []byte("\xFF\xFF\xFF\x00"), - pat: []byte("\xEF\xBB\xBF\x00"), - ct: "text/plain; charset=utf-8", - }, - - // Image types - // For posterity, we originally returned "image/vnd.microsoft.icon" from - // https://tools.ietf.org/html/draft-ietf-websec-mime-sniff-03#section-7 - // https://codereview.appspot.com/4746042 - // but that has since been replaced with "image/x-icon" in Section 6.2 - // of https://mimesniff.spec.whatwg.org/#matching-an-image-type-pattern - &exactSig{[]byte("\x00\x00\x01\x00"), "image/x-icon"}, - &exactSig{[]byte("\x00\x00\x02\x00"), "image/x-icon"}, - &exactSig{[]byte("BM"), "image/bmp"}, - &exactSig{[]byte("GIF87a"), "image/gif"}, - &exactSig{[]byte("GIF89a"), "image/gif"}, - &maskedSig{ - mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF"), - pat: []byte("RIFF\x00\x00\x00\x00WEBPVP"), - ct: "image/webp", - }, - &exactSig{[]byte("\x89PNG\x0D\x0A\x1A\x0A"), "image/png"}, - &exactSig{[]byte("\xFF\xD8\xFF"), "image/jpeg"}, - - // Audio and Video types - // Enforce the pattern match ordering as prescribed in - // https://mimesniff.spec.whatwg.org/#matching-an-audio-or-video-type-pattern - &maskedSig{ - mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF"), - pat: []byte("FORM\x00\x00\x00\x00AIFF"), - ct: "audio/aiff", - }, - &maskedSig{ - mask: []byte("\xFF\xFF\xFF"), - pat: []byte("ID3"), - ct: "audio/mpeg", - }, - &maskedSig{ - mask: []byte("\xFF\xFF\xFF\xFF\xFF"), - pat: []byte("OggS\x00"), - ct: "application/ogg", - }, - &maskedSig{ - mask: []byte("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"), - pat: []byte("MThd\x00\x00\x00\x06"), - ct: "audio/midi", - }, - &maskedSig{ - mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF"), - pat: []byte("RIFF\x00\x00\x00\x00AVI "), - ct: "video/avi", - }, - &maskedSig{ - mask: []byte("\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF"), - pat: []byte("RIFF\x00\x00\x00\x00WAVE"), - ct: "audio/wave", - }, - // 6.2.0.2. video/mp4 - mp4Sig{}, - // 6.2.0.3. video/webm - &exactSig{[]byte("\x1A\x45\xDF\xA3"), "video/webm"}, - - // Font types - &maskedSig{ - // 34 NULL bytes followed by the string "LP" - pat: []byte("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00LP"), - // 34 NULL bytes followed by \xF\xF - mask: []byte("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xFF\xFF"), - ct: "application/vnd.ms-fontobject", - }, - &exactSig{[]byte("\x00\x01\x00\x00"), "font/ttf"}, - &exactSig{[]byte("OTTO"), "font/otf"}, - &exactSig{[]byte("ttcf"), "font/collection"}, - &exactSig{[]byte("wOFF"), "font/woff"}, - &exactSig{[]byte("wOF2"), "font/woff2"}, - - // Archive types - &exactSig{[]byte("\x1F\x8B\x08"), "application/x-gzip"}, - &exactSig{[]byte("PK\x03\x04"), "application/zip"}, - // RAR's signatures are incorrectly defined by the MIME spec as per - // https://github.com/whatwg/mimesniff/issues/63 - // However, RAR Labs correctly defines it at: - // https://www.rarlab.com/technote.htm#rarsign - // so we use the definition from RAR Labs. - // TODO: do whatever the spec ends up doing. - &exactSig{[]byte("Rar!\x1A\x07\x00"), "application/x-rar-compressed"}, // RAR v1.5-v4.0 - &exactSig{[]byte("Rar!\x1A\x07\x01\x00"), "application/x-rar-compressed"}, // RAR v5+ - - &exactSig{[]byte("\x00\x61\x73\x6D"), "application/wasm"}, - - textSig{}, // should be last -} - -type exactSig struct { - sig []byte - ct string -} - -func (e *exactSig) match(data []byte, firstNonWS int) string { - if bytes.HasPrefix(data, e.sig) { - return e.ct - } - return "" -} - -type maskedSig struct { - mask, pat []byte - skipWS bool - ct string -} - -func (m *maskedSig) match(data []byte, firstNonWS int) string { - // pattern matching algorithm section 6 - // https://mimesniff.spec.whatwg.org/#pattern-matching-algorithm - - if m.skipWS { - data = data[firstNonWS:] - } - if len(m.pat) != len(m.mask) { - return "" - } - if len(data) < len(m.pat) { - return "" - } - for i, pb := range m.pat { - maskedData := data[i] & m.mask[i] - if maskedData != pb { - return "" - } - } - return m.ct -} - -type htmlSig []byte - -func (h htmlSig) match(data []byte, firstNonWS int) string { - data = data[firstNonWS:] - if len(data) < len(h)+1 { - return "" - } - for i, b := range h { - db := data[i] - if 'A' <= b && b <= 'Z' { - db &= 0xDF - } - if b != db { - return "" - } - } - // Next byte must be a tag-terminating byte(0xTT). - if !isTT(data[len(h)]) { - return "" - } - return "text/html; charset=utf-8" -} - -var mp4ftype = []byte("ftyp") -var mp4 = []byte("mp4") - -type mp4Sig struct{} - -func (mp4Sig) match(data []byte, firstNonWS int) string { - // https://mimesniff.spec.whatwg.org/#signature-for-mp4 - // c.f. section 6.2.1 - if len(data) < 12 { - return "" - } - boxSize := int(binary.BigEndian.Uint32(data[:4])) - if len(data) < boxSize || boxSize%4 != 0 { - return "" - } - if !bytes.Equal(data[4:8], mp4ftype) { - return "" - } - for st := 8; st < boxSize; st += 4 { - if st == 12 { - // Ignores the four bytes that correspond to the version number of the "major brand". - continue - } - if bytes.Equal(data[st:st+3], mp4) { - return "video/mp4" - } - } - return "" -} - -type textSig struct{} - -func (textSig) match(data []byte, firstNonWS int) string { - // c.f. section 5, step 4. - for _, b := range data[firstNonWS:] { - switch { - case b <= 0x08, - b == 0x0B, - 0x0E <= b && b <= 0x1A, - 0x1C <= b && b <= 0x1F: - return "" - } - } - return "text/plain; charset=utf-8" + return internal.DetectContentType(data) } |
