lib/email: handle obsolete white spaces and comment when unpacking date

In the obsolete syntax, white space and comments can appear between many more element, for example the folloing Date value are valid Date : Fri, 21 Nov 1997 09(comment): 55 : 06 -0600 This changes handle this by sanitizing the Field value, removing comment and merge multiple spaces into one, before parsing it.
author: Shulhan <ms@kilabit.info> 2023-06-04 00:58:04 +0700
committer: Shulhan <ms@kilabit.info> 2023-06-04 01:01:01 +0700
commit: ca1ea634fb17588132beebb2997dec9dacef55e3 (patch)
tree: deb6c452857519aeaffab2756400c233f1a2a644 /lib/email
parent: 749d617a532a0099e277bc8b466cfb008e3d011c (diff)
download: pakakeh.go-ca1ea634fb17588132beebb2997dec9dacef55e3.tar.xz
3 files changed, 93 insertions, 6 deletions
diff --git a/lib/email/email.go b/lib/email/email.go
index 7618cbeb..f03ed6c6 100644
--- a/lib/email/email.go
+++ b/lib/email/email.go
@@ -45,3 +45,39 @@ func randomChars(n int) []byte {
 	rand.Seed(Epoch())
 	return ascii.Random([]byte(ascii.LettersNumber), n)
 }
+
+// sanitize remove comment from in and merge multiple spaces into one.
+// A comment start with '(' and end with ')' and can be nested
+// "(...(...(...)...)".
+func sanitize(in []byte) (out []byte) {
+	var (
+		c         byte
+		inComment int
+		hasSpace  bool
+	)
+	out = make([]byte, 0, len(in))
+	for _, c = range in {
+		if inComment != 0 {
+			if c == ')' {
+				inComment--
+			} else if c == '(' {
+				inComment++
+			}
+			continue
+		}
+		if c == '(' {
+			inComment++
+			continue
+		}
+		if ascii.IsSpace(c) {
+			hasSpace = true
+			continue
+		}
+		if hasSpace {
+			out = append(out, ' ')
+			hasSpace = false
+		}
+		out = append(out, c)
+	}
+	return out
+}
diff --git a/lib/email/email_test.go b/lib/email/email_test.go
index 20163ee8..34cd0d8c 100644
--- a/lib/email/email_test.go
+++ b/lib/email/email_test.go
@@ -13,6 +13,7 @@ import (
 	"time"
 
 	"github.com/shuLhan/share/lib/email/dkim"
+	"github.com/shuLhan/share/lib/test"
 )
 
 var (
@@ -61,3 +62,33 @@ func initKeys(t *testing.T) {
 	dname := "brisbane._domainkey.example.com"
 	dkim.DefaultKeyPool.Put(dname, key)
 }
+
+func TestSanitize(t *testing.T) {
+	type testCase struct {
+		in  []byte
+		exp []byte
+	}
+
+	var cases = []testCase{{
+		in:  []byte("not\n a\t comment"),
+		exp: []byte("not a comment"),
+	}, {
+		in:  []byte("A B \n (comment \t) C \r\n ( \tcomment )\r\n\tD\r\n "),
+		exp: []byte(`A B C D`),
+	}, {
+		in:  []byte("A B \r\n ( C (D\r\n (E)) \t) F\r\n "),
+		exp: []byte(`A B F`),
+	}, {
+		in:  []byte("Fri, 21 Nov 1997 09(comment): 55 : 06 -0600\r\n"),
+		exp: []byte("Fri, 21 Nov 1997 09: 55 : 06 -0600"),
+	}}
+
+	var (
+		c   testCase
+		got []byte
+	)
+	for _, c = range cases {
+		got = sanitize(c.in)
+		test.Assert(t, `sanitize`, string(c.exp), string(got))
+	}
+}
diff --git a/lib/email/field.go b/lib/email/field.go
index ecf377e4..4e7a0ffe 100644
--- a/lib/email/field.go
+++ b/lib/email/field.go
@@ -362,7 +362,8 @@ func (field *Field) unpackDate() (err error) {
 	}
 
 	var (
-		parser = libbytes.NewParser(field.Value, []byte{',', ' ', cr, lf})
+		value  = sanitize(field.Value)
+		parser = libbytes.NewParser(value, []byte{',', ' '})
 
 		vstr  string
 		token []byte
@@ -429,6 +430,9 @@ func (field *Field) unpackDate() (err error) {
 	if hour < 0 || hour > 23 {
 		return fmt.Errorf(`%s: invalid hour %d`, logp, hour)
 	}
+	if c == ' ' {
+		_, c = parser.SkipSpaces()
+	}
 	if c != ':' {
 		return fmt.Errorf(`%s: invalid or missing time separator`, logp)
 	}
@@ -443,11 +447,24 @@ func (field *Field) unpackDate() (err error) {
 	if min < 0 || min > 59 {
 		return fmt.Errorf(`%s: invalid minute %d`, logp, min)
 	}
+	token = nil
+
+	if c == ' ' {
+		token, c = parser.ReadNoSpace()
+		if c == ':' && len(token) != 0 {
+			return fmt.Errorf(`%s: unknown token after minute %q`, logp, token)
+		}
+		// At this point the date may have second and token may be a
+		// zone.
+		// We check again later if token is nil after parsing the
+		// second part.
+	}
+
+	parser.RemoveDelimiters([]byte{':'})
 
 	// Get second ...
 	var sec int64
 	if c == ':' {
-		parser.RemoveDelimiters([]byte{':'})
 		token, _ = parser.ReadNoSpace()
 		sec, err = strconv.ParseInt(string(token), 10, 64)
 		if err != nil {
@@ -456,16 +473,19 @@ func (field *Field) unpackDate() (err error) {
 		if sec < 0 || sec > 59 {
 			return fmt.Errorf(`%s: invalid second %d`, logp, sec)
 		}
+		token = nil
 	}
 
-	// Get zone offset ...
+	// Get zone offset.
 	var (
 		off  int64
 		zone string
 	)
-	token, _ = parser.ReadNoSpace()
-	if len(token) == 0 {
-		return fmt.Errorf(`%s: invalid or missing zone %s`, logp, token)
+	if token == nil { // The data contains second.
+		token, _ = parser.ReadNoSpace()
+		if len(token) == 0 {
+			return fmt.Errorf(`%s: invalid or missing zone %s`, logp, token)
+		}
 	}
 	if len(token) != 0 {
 		if token[0] == '+' || token[0] == '-' {
author	Shulhan <ms@kilabit.info>	2023-06-04 00:58:04 +0700
committer	Shulhan <ms@kilabit.info>	2023-06-04 01:01:01 +0700
commit	ca1ea634fb17588132beebb2997dec9dacef55e3 (patch)
tree	deb6c452857519aeaffab2756400c233f1a2a644 /lib/email
parent	749d617a532a0099e277bc8b466cfb008e3d011c (diff)
download	pakakeh.go-ca1ea634fb17588132beebb2997dec9dacef55e3.tar.xz