diff options
| author | Shulhan <ms@kilabit.info> | 2023-06-04 00:58:04 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2023-06-04 01:01:01 +0700 |
| commit | ca1ea634fb17588132beebb2997dec9dacef55e3 (patch) | |
| tree | deb6c452857519aeaffab2756400c233f1a2a644 | |
| parent | 749d617a532a0099e277bc8b466cfb008e3d011c (diff) | |
| download | pakakeh.go-ca1ea634fb17588132beebb2997dec9dacef55e3.tar.xz | |
lib/email: handle obsolete white spaces and comment when unpacking date
In the obsolete syntax, white space and comments can appear between many
more element, for example the folloing Date value are valid
Date : Fri, 21 Nov 1997 09(comment): 55 : 06 -0600
This changes handle this by sanitizing the Field value, removing comment
and merge multiple spaces into one, before parsing it.
| -rw-r--r-- | lib/email/email.go | 36 | ||||
| -rw-r--r-- | lib/email/email_test.go | 31 | ||||
| -rw-r--r-- | lib/email/field.go | 32 |
3 files changed, 93 insertions, 6 deletions
diff --git a/lib/email/email.go b/lib/email/email.go index 7618cbeb..f03ed6c6 100644 --- a/lib/email/email.go +++ b/lib/email/email.go @@ -45,3 +45,39 @@ func randomChars(n int) []byte { rand.Seed(Epoch()) return ascii.Random([]byte(ascii.LettersNumber), n) } + +// sanitize remove comment from in and merge multiple spaces into one. +// A comment start with '(' and end with ')' and can be nested +// "(...(...(...)...)". +func sanitize(in []byte) (out []byte) { + var ( + c byte + inComment int + hasSpace bool + ) + out = make([]byte, 0, len(in)) + for _, c = range in { + if inComment != 0 { + if c == ')' { + inComment-- + } else if c == '(' { + inComment++ + } + continue + } + if c == '(' { + inComment++ + continue + } + if ascii.IsSpace(c) { + hasSpace = true + continue + } + if hasSpace { + out = append(out, ' ') + hasSpace = false + } + out = append(out, c) + } + return out +} diff --git a/lib/email/email_test.go b/lib/email/email_test.go index 20163ee8..34cd0d8c 100644 --- a/lib/email/email_test.go +++ b/lib/email/email_test.go @@ -13,6 +13,7 @@ import ( "time" "github.com/shuLhan/share/lib/email/dkim" + "github.com/shuLhan/share/lib/test" ) var ( @@ -61,3 +62,33 @@ func initKeys(t *testing.T) { dname := "brisbane._domainkey.example.com" dkim.DefaultKeyPool.Put(dname, key) } + +func TestSanitize(t *testing.T) { + type testCase struct { + in []byte + exp []byte + } + + var cases = []testCase{{ + in: []byte("not\n a\t comment"), + exp: []byte("not a comment"), + }, { + in: []byte("A B \n (comment \t) C \r\n ( \tcomment )\r\n\tD\r\n "), + exp: []byte(`A B C D`), + }, { + in: []byte("A B \r\n ( C (D\r\n (E)) \t) F\r\n "), + exp: []byte(`A B F`), + }, { + in: []byte("Fri, 21 Nov 1997 09(comment): 55 : 06 -0600\r\n"), + exp: []byte("Fri, 21 Nov 1997 09: 55 : 06 -0600"), + }} + + var ( + c testCase + got []byte + ) + for _, c = range cases { + got = sanitize(c.in) + test.Assert(t, `sanitize`, string(c.exp), string(got)) + } +} diff --git a/lib/email/field.go b/lib/email/field.go index ecf377e4..4e7a0ffe 100644 --- a/lib/email/field.go +++ b/lib/email/field.go @@ -362,7 +362,8 @@ func (field *Field) unpackDate() (err error) { } var ( - parser = libbytes.NewParser(field.Value, []byte{',', ' ', cr, lf}) + value = sanitize(field.Value) + parser = libbytes.NewParser(value, []byte{',', ' '}) vstr string token []byte @@ -429,6 +430,9 @@ func (field *Field) unpackDate() (err error) { if hour < 0 || hour > 23 { return fmt.Errorf(`%s: invalid hour %d`, logp, hour) } + if c == ' ' { + _, c = parser.SkipSpaces() + } if c != ':' { return fmt.Errorf(`%s: invalid or missing time separator`, logp) } @@ -443,11 +447,24 @@ func (field *Field) unpackDate() (err error) { if min < 0 || min > 59 { return fmt.Errorf(`%s: invalid minute %d`, logp, min) } + token = nil + + if c == ' ' { + token, c = parser.ReadNoSpace() + if c == ':' && len(token) != 0 { + return fmt.Errorf(`%s: unknown token after minute %q`, logp, token) + } + // At this point the date may have second and token may be a + // zone. + // We check again later if token is nil after parsing the + // second part. + } + + parser.RemoveDelimiters([]byte{':'}) // Get second ... var sec int64 if c == ':' { - parser.RemoveDelimiters([]byte{':'}) token, _ = parser.ReadNoSpace() sec, err = strconv.ParseInt(string(token), 10, 64) if err != nil { @@ -456,16 +473,19 @@ func (field *Field) unpackDate() (err error) { if sec < 0 || sec > 59 { return fmt.Errorf(`%s: invalid second %d`, logp, sec) } + token = nil } - // Get zone offset ... + // Get zone offset. var ( off int64 zone string ) - token, _ = parser.ReadNoSpace() - if len(token) == 0 { - return fmt.Errorf(`%s: invalid or missing zone %s`, logp, token) + if token == nil { // The data contains second. + token, _ = parser.ReadNoSpace() + if len(token) == 0 { + return fmt.Errorf(`%s: invalid or missing zone %s`, logp, token) + } } if len(token) != 0 { if token[0] == '+' || token[0] == '-' { |
