diff options
| author | Shulhan <ms@kilabit.info> | 2023-06-01 17:45:55 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2023-06-03 01:08:02 +0700 |
| commit | a9198587b02ee060d8cacbe9d5ff19c5c1532a89 (patch) | |
| tree | d27cb338e5484f45239b762dc1b9f259d97d4e12 | |
| parent | a9f6156024d5e7def26640bd6448001d3da19e4e (diff) | |
| download | pakakeh.go-a9198587b02ee060d8cacbe9d5ff19c5c1532a89.tar.xz | |
lib/email: refactoring Field parsing
Split the parsing into two methods: parseName and parseValue.
The error returned from those methods are prefixed by its name.
| -rw-r--r-- | _doc/RFC_5322__IMF.adoc | 29 | ||||
| -rw-r--r-- | lib/email/body_test.go | 2 | ||||
| -rw-r--r-- | lib/email/doc.go | 2 | ||||
| -rw-r--r-- | lib/email/field.go | 127 | ||||
| -rw-r--r-- | lib/email/field_test.go | 30 | ||||
| -rw-r--r-- | lib/email/header_test.go | 4 | ||||
| -rw-r--r-- | lib/email/message_test.go | 2 | ||||
| -rw-r--r-- | lib/email/mime_test.go | 2 |
8 files changed, 112 insertions, 86 deletions
diff --git a/_doc/RFC_5322__IMF.adoc b/_doc/RFC_5322__IMF.adoc index fc2e9699..0359f6d2 100644 --- a/_doc/RFC_5322__IMF.adoc +++ b/_doc/RFC_5322__IMF.adoc @@ -12,26 +12,29 @@ Message Format as defined in {url-rfc5322}[RFC 5322^]. == Syntax .... -message = (fields / obs-fields) +message = header [CRLF body] -fields = *(field-name ":" (field-body / unstructured) CRLF) +header = *field -field-name = 1*ftext +field = field-name ":" field-body CRLF -field-body = (*([FWS] VCHAR) *WSP) +field-name = 1*(ftext / obs-ftext) -unstructured = (*([FWS] VCHAR) *WSP) / obs-unstruct +field-body = *(FWS / WSP / VCHAR) VCHAR = %d33-126 WSP = %d9 / %d32 ; tab or space +obs-ftext = %d32 / ftext + ; Allow space in obsolete syntax. + ftext = %d33-57 / %d59-126 ; Printable US-ASCII, except %d0-32 and %d58 (":") -body = (*(*998text CRLF) *998text) / obs-body +body = (*(*998text CRLF) *998text) text = %d1-9 / ; Characters excluding CR %d11 / ; and LF @@ -39,12 +42,11 @@ text = %d1-9 / ; Characters excluding CR %d14-127 .... -* Each line MUST be no more than 998 characters, excluding CRLF. - -* Each line SHOULD be no more than 78 characters, excluding the CRLF. +* Each line in a message (header and body) MUST be no more than 998 + characters, excluding CRLF. -* CR and LF MUST only occur together as CRLF; they MUST NOT appear - independently in the body. +* Each line in a message SHOULD be no more than 78 characters, excluding the + CRLF. * Each header field SHOULD be treated in its unfolded form for further syntactic and semantic evaluation. @@ -52,13 +54,16 @@ text = %d1-9 / ; Characters excluding CR * "field-body" MUST NOT include CR and LF except when used in "folding" and "unfolding". +* CR and LF MUST only occur together as CRLF; they MUST NOT appear + independently in the body. + === Folding White Space and Comments .... CFWS = (1*([FWS] comment) [FWS]) / FWS -FWS = ([*WSP CRLF] 1*WSP) / obs-FWS +FWS = CRLF 1*WSP / obs-FWS ; Folding white space comment = "(" *([FWS] ccontent) [FWS] ")" diff --git a/lib/email/body_test.go b/lib/email/body_test.go index 409765dc..d53d217d 100644 --- a/lib/email/body_test.go +++ b/lib/email/body_test.go @@ -38,7 +38,7 @@ func TestParseBody(t *testing.T) { "--boundary\r\n" + "Content-Encoding:\r\n\r\n", boundary: "boundary", - expErr: "email: empty field value at 'Content-Encoding:\r\n'", + expErr: `ParseField: parseValue: empty field value`, }, { desc: "With epilogue", in: "preamble\r\n\r\n" + diff --git a/lib/email/doc.go b/lib/email/doc.go index 802b58a1..44f26f77 100644 --- a/lib/email/doc.go +++ b/lib/email/doc.go @@ -29,7 +29,7 @@ // | Name | Value | Type | // +------+-------+------+ // -// [Field] is parsed line that contains Name and Value separated by ": ". +// [Field] is parsed line that contains Name and Value separated by colon ':'. // // A [ContentType] is special Field where Name is "Content-Type", and its // Value is parsed from string "top/sub; <param>; ...". diff --git a/lib/email/field.go b/lib/email/field.go index cbd25530..ecf377e4 100644 --- a/lib/email/field.go +++ b/lib/email/field.go @@ -37,6 +37,10 @@ type Field struct { // Type of field, the numeric representation of field name. Type FieldType + // isFolded set to true if field line contains folding, CRLF following + // by space and values. + isFolded bool + // true if field.unpack has been called, false when field.setValue is // called again. unpacked bool @@ -54,54 +58,85 @@ func ParseField(raw []byte) (field *Field, rest []byte, err error) { return nil, nil, nil } + var logp = `ParseField` + field = &Field{} - isFolded := false - start := 0 - // Get field's name. - // Valid values: %d33-57 / %d59-126 . - x := 0 + raw, err = field.parseName(raw) + if err != nil { + return nil, nil, fmt.Errorf(`%s: %w`, logp, err) + } + + raw, err = field.parseValue(raw) + if err != nil { + return nil, nil, fmt.Errorf(`%s: %w`, logp, err) + } + + if !field.isFolded { + if (len(field.oriName) + len(field.oriValue) + 1) > 1000 { + return nil, nil, fmt.Errorf(`%s: field line greater than 998 characters`, logp) + } + } + + rest = raw + return field, rest, nil +} + +// parseName parse the field Name. +// Format, +// +// field-name = 1*(ftext / obs-ftext) ":" +// obs-ftext = %d32 / ftext +// ; space allowed in [obsolete] specification. +// [ftext] = %d33-57 / %d59-126 +// ; printable ASCII character except colon (%d58). +// +// [ftext]: https://datatracker.ietf.org/doc/html/rfc5322#section-2.2 +// [obsolete]: https://datatracker.ietf.org/doc/html/rfc5322#section-4.5 +func (field *Field) parseName(raw []byte) (rest []byte, err error) { + var ( + logp = `parseName` + x int + ) for ; x < len(raw); x++ { - if raw[x] == ' ' || raw[x] == ':' { + if raw[x] == '\t' || raw[x] == ' ' || raw[x] == ':' { break } if raw[x] < 33 || raw[x] > 126 { - err = fmt.Errorf("email: invalid field at '%s'", raw[:x]) - goto invalid + return nil, fmt.Errorf(`%s: invalid character %q`, logp, raw[x]) } } - if len(raw) == x { - err = fmt.Errorf("email: invalid field at '%s'", raw[:x]) - goto invalid - } - // Skip WSP before ':'. for ; x < len(raw) && (raw[x] == '\t' || raw[x] == ' '); x++ { } if len(raw) == x { - err = fmt.Errorf("email: invalid field at '%s'", raw[:x]) - goto invalid + return nil, fmt.Errorf(`%s: missing value`, logp) } if raw[x] != ':' { - err = fmt.Errorf("email: missing field separator at '%s'", raw[:x]) - goto invalid + return nil, fmt.Errorf(`%s: missing field separator`, logp) } field.setName(raw[:x]) - x++ - start = x - // Skip WSP after ':'. - for ; x < len(raw) && (raw[x] == '\t' || raw[x] == ' '); x++ { - } + rest = raw[x+1:] - if len(raw) == x { - err = fmt.Errorf("email: empty field value at '%s'", raw[:x]) - goto invalid - } + return rest, nil +} + +// parseValue parse field value. +// Format, +// +// field-body = 1*(FWS / WSP / %d33-126) CRLF +// FWS = CRLF WSP ; \r\n followed by space. +// WSP = %d9 / %d32 ; tab or space. +// +// [Reference]: https://datatracker.ietf.org/doc/html/rfc5322#section-2.2 +func (field *Field) parseValue(raw []byte) (rest []byte, err error) { + var ( + logp = `parseValue` + x int + ) - // Get field's value. - // Valid values: WSP / %d33-126 . for ; x < len(raw); x++ { for ; x < len(raw); x++ { if raw[x] == '\t' || raw[x] == ' ' { @@ -112,48 +147,34 @@ func ParseField(raw []byte) (field *Field, rest []byte, err error) { break } if raw[x] < 33 || raw[x] > 126 { - err = fmt.Errorf("email: invalid field value at '%s'", raw[:x]) - goto invalid + return nil, fmt.Errorf(`%s: invalid field value %q`, logp, raw[x]) } } if x == len(raw) || raw[x] != lf { - err = fmt.Errorf("email: field value without CRLF at '%s'", raw[:x]) - goto invalid + return nil, fmt.Errorf(`%s: invalid or missing termination`, logp) } - if x++; x == len(raw) { + x++ + if x == len(raw) { break } - // Unfolding ... if raw[x] == '\t' || raw[x] == ' ' { - isFolded = true + field.isFolded = true continue } + // End with CRLF. break } - if !isFolded && x > 1000 { - err = fmt.Errorf("email: field line greater than 998 characters") - return nil, nil, err - } - field.setValue(raw[start:x]) + field.setValue(raw[:x]) if len(field.Value) == 0 { - err = fmt.Errorf("email: empty field value at '%s'", raw[:x]) - goto invalid + return nil, fmt.Errorf(`%s: empty field value`, logp) } - if len(raw) > x { - rest = raw[x:] - } - - return field, rest, nil + rest = raw[x:] -invalid: - if x < len(raw) { - rest = raw[x:] - } - return nil, rest, err + return rest, nil } // addMailboxes append zero or more mailboxes to current mboxes. @@ -213,7 +234,7 @@ func (field *Field) appendValue(raw []byte) { // setName set field Name by canonicalizing raw field name using "simple" and // "relaxed" algorithms. -// . +// // "simple" algorithm store raw field name as is. // // "relaxed" algorithm convert field name to lowercase and removing trailing diff --git a/lib/email/field_test.go b/lib/email/field_test.go index 672cf181..b8d7c5cb 100644 --- a/lib/email/field_test.go +++ b/lib/email/field_test.go @@ -26,59 +26,59 @@ func TestParseField(t *testing.T) { }, { desc: "With long line", raw: []byte("name:" + longValue + "\r\n"), - expErr: "email: field line greater than 998 characters", + expErr: `ParseField: field line greater than 998 characters`, }, { desc: "With only whitespaces", raw: []byte(" "), - expErr: "email: invalid field at ' '", + expErr: `ParseField: parseName: missing value`, }, { desc: "With only CRLF", raw: []byte("\r\n"), - expErr: "email: invalid field at ''", + expErr: `ParseField: parseName: invalid character '\r'`, }, { desc: "Without separator and CRLF", raw: []byte("name"), - expErr: "email: invalid field at 'name'", + expErr: `ParseField: parseName: missing value`, }, { desc: "Without separator", raw: []byte("name\r\n"), - expErr: "email: invalid field at 'name'", + expErr: `ParseField: parseName: invalid character '\r'`, }, { desc: "With space on name", raw: []byte("na me\r\n"), - expErr: "email: missing field separator at 'na '", + expErr: `ParseField: parseName: missing field separator`, }, { desc: "Without value and CRLF", raw: []byte("name:"), - expErr: "email: empty field value at 'name:'", + expErr: `ParseField: parseValue: empty field value`, }, { desc: "Without value and CRLF", raw: []byte("name: "), - expErr: "email: empty field value at 'name: '", + expErr: `ParseField: parseValue: invalid or missing termination`, }, { desc: "Without value", raw: []byte("name:\r\n"), - expErr: "email: empty field value at 'name:\r\n'", + expErr: `ParseField: parseValue: empty field value`, }, { desc: "Without value", raw: []byte("name: \r\n"), - expErr: "email: empty field value at 'name: \r\n'", + expErr: `ParseField: parseValue: empty field value`, }, { desc: "Without CRLF", raw: []byte("name:value"), - expErr: "email: field value without CRLF at 'name:value'", + expErr: `ParseField: parseValue: invalid or missing termination`, }, { desc: "Without CR", raw: []byte("name:value\n"), - expErr: "email: invalid field value at 'name:value'", + expErr: `ParseField: parseValue: invalid field value '\n'`, }, { desc: "Without LF", raw: []byte("name:value\r"), - expErr: "email: field value without CRLF at 'name:value\r'", + expErr: `ParseField: parseValue: invalid or missing termination`, }, { desc: "With CR inside value", raw: []byte("name:valu\re"), - expErr: "email: field value without CRLF at 'name:valu\r'", + expErr: `ParseField: parseValue: invalid or missing termination`, }, { desc: "With valid input", raw: []byte("NAME : VALUE\r\n"), @@ -308,7 +308,7 @@ func TestUnpackMailboxList(t *testing.T) { in []byte }{{ in: []byte("From: \r\n"), - expErr: "email: empty field value at 'From: \r\n'", + expErr: `ParseField: parseValue: empty field value`, }, { in: []byte("From: test@one, test@two\r\n"), exp: "from:test@one, test@two\r\n", diff --git a/lib/email/header_test.go b/lib/email/header_test.go index f0b8c86a..ce5b9ead 100644 --- a/lib/email/header_test.go +++ b/lib/email/header_test.go @@ -69,7 +69,7 @@ func TestParseHeader(t *testing.T) { }, { desc: "With whitespaces only", raw: []byte(" \t"), - expErr: "email: invalid field at ' \t'", + expErr: `ParseField: parseName: missing value`, }, { desc: "With CRLF only", raw: []byte("\r\n"), @@ -81,7 +81,7 @@ func TestParseHeader(t *testing.T) { }, { desc: "With invalid field: missing value", raw: []byte("a:\r\n\t"), - expErr: "email: empty field value at 'a:\r\n\t'", + expErr: `ParseField: parseValue: empty field value`, }, { desc: "With single field", raw: []byte("a:1\r\n"), diff --git a/lib/email/message_test.go b/lib/email/message_test.go index 7813929b..a323b0f8 100644 --- a/lib/email/message_test.go +++ b/lib/email/message_test.go @@ -90,7 +90,7 @@ func TestMessageParseMessage(t *testing.T) { exp: "\r\n", }, { in: "testdata/invalid-header.txt", - expErr: "ParseMessage: email: invalid field value at 'From : John Doe <jdoe@machine(comment). example>'", + expErr: `ParseMessage: ParseField: parseValue: invalid field value '\n'`, }, { in: "testdata/rfc5322-A.6.3.txt", exp: "from:John Doe <jdoe@machine(comment). example>\r\n" + diff --git a/lib/email/mime_test.go b/lib/email/mime_test.go index 51826004..794faf67 100644 --- a/lib/email/mime_test.go +++ b/lib/email/mime_test.go @@ -55,7 +55,7 @@ func TestParseBodyPart(t *testing.T) { in: "--boundary\r\n" + "Content-Encoding:\r\n\r\n", boundary: "boundary", - expErr: "email: empty field value at 'Content-Encoding:\r\n'", + expErr: `ParseField: parseValue: empty field value`, }, { desc: "With end of body", in: "--boundary--\r\n\r\n" + |
