summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2023-06-01 17:45:55 +0700
committerShulhan <ms@kilabit.info>2023-06-03 01:08:02 +0700
commita9198587b02ee060d8cacbe9d5ff19c5c1532a89 (patch)
treed27cb338e5484f45239b762dc1b9f259d97d4e12
parenta9f6156024d5e7def26640bd6448001d3da19e4e (diff)
downloadpakakeh.go-a9198587b02ee060d8cacbe9d5ff19c5c1532a89.tar.xz
lib/email: refactoring Field parsing
Split the parsing into two methods: parseName and parseValue. The error returned from those methods are prefixed by its name.
-rw-r--r--_doc/RFC_5322__IMF.adoc29
-rw-r--r--lib/email/body_test.go2
-rw-r--r--lib/email/doc.go2
-rw-r--r--lib/email/field.go127
-rw-r--r--lib/email/field_test.go30
-rw-r--r--lib/email/header_test.go4
-rw-r--r--lib/email/message_test.go2
-rw-r--r--lib/email/mime_test.go2
8 files changed, 112 insertions, 86 deletions
diff --git a/_doc/RFC_5322__IMF.adoc b/_doc/RFC_5322__IMF.adoc
index fc2e9699..0359f6d2 100644
--- a/_doc/RFC_5322__IMF.adoc
+++ b/_doc/RFC_5322__IMF.adoc
@@ -12,26 +12,29 @@ Message Format as defined in {url-rfc5322}[RFC 5322^].
== Syntax
....
-message = (fields / obs-fields)
+message = header
[CRLF body]
-fields = *(field-name ":" (field-body / unstructured) CRLF)
+header = *field
-field-name = 1*ftext
+field = field-name ":" field-body CRLF
-field-body = (*([FWS] VCHAR) *WSP)
+field-name = 1*(ftext / obs-ftext)
-unstructured = (*([FWS] VCHAR) *WSP) / obs-unstruct
+field-body = *(FWS / WSP / VCHAR)
VCHAR = %d33-126
WSP = %d9 / %d32
; tab or space
+obs-ftext = %d32 / ftext
+ ; Allow space in obsolete syntax.
+
ftext = %d33-57 / %d59-126
; Printable US-ASCII, except %d0-32 and %d58 (":")
-body = (*(*998text CRLF) *998text) / obs-body
+body = (*(*998text CRLF) *998text)
text = %d1-9 / ; Characters excluding CR
%d11 / ; and LF
@@ -39,12 +42,11 @@ text = %d1-9 / ; Characters excluding CR
%d14-127
....
-* Each line MUST be no more than 998 characters, excluding CRLF.
-
-* Each line SHOULD be no more than 78 characters, excluding the CRLF.
+* Each line in a message (header and body) MUST be no more than 998
+ characters, excluding CRLF.
-* CR and LF MUST only occur together as CRLF; they MUST NOT appear
- independently in the body.
+* Each line in a message SHOULD be no more than 78 characters, excluding the
+ CRLF.
* Each header field SHOULD be treated in its unfolded form for further
syntactic and semantic evaluation.
@@ -52,13 +54,16 @@ text = %d1-9 / ; Characters excluding CR
* "field-body" MUST NOT include CR and LF except when used in "folding" and
"unfolding".
+* CR and LF MUST only occur together as CRLF; they MUST NOT appear
+ independently in the body.
+
=== Folding White Space and Comments
....
CFWS = (1*([FWS] comment) [FWS]) / FWS
-FWS = ([*WSP CRLF] 1*WSP) / obs-FWS
+FWS = CRLF 1*WSP / obs-FWS
; Folding white space
comment = "(" *([FWS] ccontent) [FWS] ")"
diff --git a/lib/email/body_test.go b/lib/email/body_test.go
index 409765dc..d53d217d 100644
--- a/lib/email/body_test.go
+++ b/lib/email/body_test.go
@@ -38,7 +38,7 @@ func TestParseBody(t *testing.T) {
"--boundary\r\n" +
"Content-Encoding:\r\n\r\n",
boundary: "boundary",
- expErr: "email: empty field value at 'Content-Encoding:\r\n'",
+ expErr: `ParseField: parseValue: empty field value`,
}, {
desc: "With epilogue",
in: "preamble\r\n\r\n" +
diff --git a/lib/email/doc.go b/lib/email/doc.go
index 802b58a1..44f26f77 100644
--- a/lib/email/doc.go
+++ b/lib/email/doc.go
@@ -29,7 +29,7 @@
// | Name | Value | Type |
// +------+-------+------+
//
-// [Field] is parsed line that contains Name and Value separated by ": ".
+// [Field] is parsed line that contains Name and Value separated by colon ':'.
//
// A [ContentType] is special Field where Name is "Content-Type", and its
// Value is parsed from string "top/sub; <param>; ...".
diff --git a/lib/email/field.go b/lib/email/field.go
index cbd25530..ecf377e4 100644
--- a/lib/email/field.go
+++ b/lib/email/field.go
@@ -37,6 +37,10 @@ type Field struct {
// Type of field, the numeric representation of field name.
Type FieldType
+ // isFolded set to true if field line contains folding, CRLF following
+ // by space and values.
+ isFolded bool
+
// true if field.unpack has been called, false when field.setValue is
// called again.
unpacked bool
@@ -54,54 +58,85 @@ func ParseField(raw []byte) (field *Field, rest []byte, err error) {
return nil, nil, nil
}
+ var logp = `ParseField`
+
field = &Field{}
- isFolded := false
- start := 0
- // Get field's name.
- // Valid values: %d33-57 / %d59-126 .
- x := 0
+ raw, err = field.parseName(raw)
+ if err != nil {
+ return nil, nil, fmt.Errorf(`%s: %w`, logp, err)
+ }
+
+ raw, err = field.parseValue(raw)
+ if err != nil {
+ return nil, nil, fmt.Errorf(`%s: %w`, logp, err)
+ }
+
+ if !field.isFolded {
+ if (len(field.oriName) + len(field.oriValue) + 1) > 1000 {
+ return nil, nil, fmt.Errorf(`%s: field line greater than 998 characters`, logp)
+ }
+ }
+
+ rest = raw
+ return field, rest, nil
+}
+
+// parseName parse the field Name.
+// Format,
+//
+// field-name = 1*(ftext / obs-ftext) ":"
+// obs-ftext = %d32 / ftext
+// ; space allowed in [obsolete] specification.
+// [ftext] = %d33-57 / %d59-126
+// ; printable ASCII character except colon (%d58).
+//
+// [ftext]: https://datatracker.ietf.org/doc/html/rfc5322#section-2.2
+// [obsolete]: https://datatracker.ietf.org/doc/html/rfc5322#section-4.5
+func (field *Field) parseName(raw []byte) (rest []byte, err error) {
+ var (
+ logp = `parseName`
+ x int
+ )
for ; x < len(raw); x++ {
- if raw[x] == ' ' || raw[x] == ':' {
+ if raw[x] == '\t' || raw[x] == ' ' || raw[x] == ':' {
break
}
if raw[x] < 33 || raw[x] > 126 {
- err = fmt.Errorf("email: invalid field at '%s'", raw[:x])
- goto invalid
+ return nil, fmt.Errorf(`%s: invalid character %q`, logp, raw[x])
}
}
- if len(raw) == x {
- err = fmt.Errorf("email: invalid field at '%s'", raw[:x])
- goto invalid
- }
-
// Skip WSP before ':'.
for ; x < len(raw) && (raw[x] == '\t' || raw[x] == ' '); x++ {
}
if len(raw) == x {
- err = fmt.Errorf("email: invalid field at '%s'", raw[:x])
- goto invalid
+ return nil, fmt.Errorf(`%s: missing value`, logp)
}
if raw[x] != ':' {
- err = fmt.Errorf("email: missing field separator at '%s'", raw[:x])
- goto invalid
+ return nil, fmt.Errorf(`%s: missing field separator`, logp)
}
field.setName(raw[:x])
- x++
- start = x
- // Skip WSP after ':'.
- for ; x < len(raw) && (raw[x] == '\t' || raw[x] == ' '); x++ {
- }
+ rest = raw[x+1:]
- if len(raw) == x {
- err = fmt.Errorf("email: empty field value at '%s'", raw[:x])
- goto invalid
- }
+ return rest, nil
+}
+
+// parseValue parse field value.
+// Format,
+//
+// field-body = 1*(FWS / WSP / %d33-126) CRLF
+// FWS = CRLF WSP ; \r\n followed by space.
+// WSP = %d9 / %d32 ; tab or space.
+//
+// [Reference]: https://datatracker.ietf.org/doc/html/rfc5322#section-2.2
+func (field *Field) parseValue(raw []byte) (rest []byte, err error) {
+ var (
+ logp = `parseValue`
+ x int
+ )
- // Get field's value.
- // Valid values: WSP / %d33-126 .
for ; x < len(raw); x++ {
for ; x < len(raw); x++ {
if raw[x] == '\t' || raw[x] == ' ' {
@@ -112,48 +147,34 @@ func ParseField(raw []byte) (field *Field, rest []byte, err error) {
break
}
if raw[x] < 33 || raw[x] > 126 {
- err = fmt.Errorf("email: invalid field value at '%s'", raw[:x])
- goto invalid
+ return nil, fmt.Errorf(`%s: invalid field value %q`, logp, raw[x])
}
}
if x == len(raw) || raw[x] != lf {
- err = fmt.Errorf("email: field value without CRLF at '%s'", raw[:x])
- goto invalid
+ return nil, fmt.Errorf(`%s: invalid or missing termination`, logp)
}
- if x++; x == len(raw) {
+ x++
+ if x == len(raw) {
break
}
-
// Unfolding ...
if raw[x] == '\t' || raw[x] == ' ' {
- isFolded = true
+ field.isFolded = true
continue
}
+ // End with CRLF.
break
}
- if !isFolded && x > 1000 {
- err = fmt.Errorf("email: field line greater than 998 characters")
- return nil, nil, err
- }
- field.setValue(raw[start:x])
+ field.setValue(raw[:x])
if len(field.Value) == 0 {
- err = fmt.Errorf("email: empty field value at '%s'", raw[:x])
- goto invalid
+ return nil, fmt.Errorf(`%s: empty field value`, logp)
}
- if len(raw) > x {
- rest = raw[x:]
- }
-
- return field, rest, nil
+ rest = raw[x:]
-invalid:
- if x < len(raw) {
- rest = raw[x:]
- }
- return nil, rest, err
+ return rest, nil
}
// addMailboxes append zero or more mailboxes to current mboxes.
@@ -213,7 +234,7 @@ func (field *Field) appendValue(raw []byte) {
// setName set field Name by canonicalizing raw field name using "simple" and
// "relaxed" algorithms.
-// .
+//
// "simple" algorithm store raw field name as is.
//
// "relaxed" algorithm convert field name to lowercase and removing trailing
diff --git a/lib/email/field_test.go b/lib/email/field_test.go
index 672cf181..b8d7c5cb 100644
--- a/lib/email/field_test.go
+++ b/lib/email/field_test.go
@@ -26,59 +26,59 @@ func TestParseField(t *testing.T) {
}, {
desc: "With long line",
raw: []byte("name:" + longValue + "\r\n"),
- expErr: "email: field line greater than 998 characters",
+ expErr: `ParseField: field line greater than 998 characters`,
}, {
desc: "With only whitespaces",
raw: []byte(" "),
- expErr: "email: invalid field at ' '",
+ expErr: `ParseField: parseName: missing value`,
}, {
desc: "With only CRLF",
raw: []byte("\r\n"),
- expErr: "email: invalid field at ''",
+ expErr: `ParseField: parseName: invalid character '\r'`,
}, {
desc: "Without separator and CRLF",
raw: []byte("name"),
- expErr: "email: invalid field at 'name'",
+ expErr: `ParseField: parseName: missing value`,
}, {
desc: "Without separator",
raw: []byte("name\r\n"),
- expErr: "email: invalid field at 'name'",
+ expErr: `ParseField: parseName: invalid character '\r'`,
}, {
desc: "With space on name",
raw: []byte("na me\r\n"),
- expErr: "email: missing field separator at 'na '",
+ expErr: `ParseField: parseName: missing field separator`,
}, {
desc: "Without value and CRLF",
raw: []byte("name:"),
- expErr: "email: empty field value at 'name:'",
+ expErr: `ParseField: parseValue: empty field value`,
}, {
desc: "Without value and CRLF",
raw: []byte("name: "),
- expErr: "email: empty field value at 'name: '",
+ expErr: `ParseField: parseValue: invalid or missing termination`,
}, {
desc: "Without value",
raw: []byte("name:\r\n"),
- expErr: "email: empty field value at 'name:\r\n'",
+ expErr: `ParseField: parseValue: empty field value`,
}, {
desc: "Without value",
raw: []byte("name: \r\n"),
- expErr: "email: empty field value at 'name: \r\n'",
+ expErr: `ParseField: parseValue: empty field value`,
}, {
desc: "Without CRLF",
raw: []byte("name:value"),
- expErr: "email: field value without CRLF at 'name:value'",
+ expErr: `ParseField: parseValue: invalid or missing termination`,
}, {
desc: "Without CR",
raw: []byte("name:value\n"),
- expErr: "email: invalid field value at 'name:value'",
+ expErr: `ParseField: parseValue: invalid field value '\n'`,
}, {
desc: "Without LF",
raw: []byte("name:value\r"),
- expErr: "email: field value without CRLF at 'name:value\r'",
+ expErr: `ParseField: parseValue: invalid or missing termination`,
}, {
desc: "With CR inside value",
raw: []byte("name:valu\re"),
- expErr: "email: field value without CRLF at 'name:valu\r'",
+ expErr: `ParseField: parseValue: invalid or missing termination`,
}, {
desc: "With valid input",
raw: []byte("NAME : VALUE\r\n"),
@@ -308,7 +308,7 @@ func TestUnpackMailboxList(t *testing.T) {
in []byte
}{{
in: []byte("From: \r\n"),
- expErr: "email: empty field value at 'From: \r\n'",
+ expErr: `ParseField: parseValue: empty field value`,
}, {
in: []byte("From: test@one, test@two\r\n"),
exp: "from:test@one, test@two\r\n",
diff --git a/lib/email/header_test.go b/lib/email/header_test.go
index f0b8c86a..ce5b9ead 100644
--- a/lib/email/header_test.go
+++ b/lib/email/header_test.go
@@ -69,7 +69,7 @@ func TestParseHeader(t *testing.T) {
}, {
desc: "With whitespaces only",
raw: []byte(" \t"),
- expErr: "email: invalid field at ' \t'",
+ expErr: `ParseField: parseName: missing value`,
}, {
desc: "With CRLF only",
raw: []byte("\r\n"),
@@ -81,7 +81,7 @@ func TestParseHeader(t *testing.T) {
}, {
desc: "With invalid field: missing value",
raw: []byte("a:\r\n\t"),
- expErr: "email: empty field value at 'a:\r\n\t'",
+ expErr: `ParseField: parseValue: empty field value`,
}, {
desc: "With single field",
raw: []byte("a:1\r\n"),
diff --git a/lib/email/message_test.go b/lib/email/message_test.go
index 7813929b..a323b0f8 100644
--- a/lib/email/message_test.go
+++ b/lib/email/message_test.go
@@ -90,7 +90,7 @@ func TestMessageParseMessage(t *testing.T) {
exp: "\r\n",
}, {
in: "testdata/invalid-header.txt",
- expErr: "ParseMessage: email: invalid field value at 'From : John Doe <jdoe@machine(comment). example>'",
+ expErr: `ParseMessage: ParseField: parseValue: invalid field value '\n'`,
}, {
in: "testdata/rfc5322-A.6.3.txt",
exp: "from:John Doe <jdoe@machine(comment). example>\r\n" +
diff --git a/lib/email/mime_test.go b/lib/email/mime_test.go
index 51826004..794faf67 100644
--- a/lib/email/mime_test.go
+++ b/lib/email/mime_test.go
@@ -55,7 +55,7 @@ func TestParseBodyPart(t *testing.T) {
in: "--boundary\r\n" +
"Content-Encoding:\r\n\r\n",
boundary: "boundary",
- expErr: "email: empty field value at 'Content-Encoding:\r\n'",
+ expErr: `ParseField: parseValue: empty field value`,
}, {
desc: "With end of body",
in: "--boundary--\r\n\r\n" +