diff options
| author | Shulhan <ms@kilabit.info> | 2019-02-01 11:32:34 +0700 |
|---|---|---|
| committer | Shulhan <ms@kilabit.info> | 2019-02-05 07:48:52 +0700 |
| commit | 930b8e7bdeb7b4909905f3185647d8d64d89ebbf (patch) | |
| tree | 623956d90d0e33b85bc23f97ccc93bf15b1950e9 | |
| parent | 45a23e05d85eac33e1a4b44c0888be5d91a73cdb (diff) | |
| download | pakakeh.go-930b8e7bdeb7b4909905f3185647d8d64d89ebbf.tar.xz | |
lib/email: new package for working with Internet Message Format
This package provide library for parsing email message format as
specified in RFC 5322.
| -rw-r--r-- | doc/IMF.adoc | 585 | ||||
| -rw-r--r-- | doc/IMF.html | 707 | ||||
| -rw-r--r-- | lib/email/body.go | 23 | ||||
| -rw-r--r-- | lib/email/doc.go | 9 | ||||
| -rw-r--r-- | lib/email/email.go | 33 | ||||
| -rw-r--r-- | lib/email/field.go | 363 | ||||
| -rw-r--r-- | lib/email/field_test.go | 255 | ||||
| -rw-r--r-- | lib/email/fieldtype.go | 12 | ||||
| -rw-r--r-- | lib/email/header.go | 79 | ||||
| -rw-r--r-- | lib/email/header_test.go | 72 | ||||
| -rw-r--r-- | lib/email/mime.go | 16 | ||||
| -rw-r--r-- | lib/io/reader.go | 66 | ||||
| -rw-r--r-- | lib/io/reader_test.go | 69 | ||||
| -rw-r--r-- | lib/time/time.go | 29 |
14 files changed, 1875 insertions, 443 deletions
diff --git a/doc/IMF.adoc b/doc/IMF.adoc index fdfbcc35..497a3f01 100644 --- a/doc/IMF.adoc +++ b/doc/IMF.adoc @@ -1,4 +1,4 @@ -= Internet Message Format (IMF) += Internet Message Format (IMF) :author: Shulhan :email: <ms@kilabit.info> :toc: left @@ -10,13 +10,28 @@ This documentation provide summary and notes on implementation of Internet Message Format as defined in {url-rfc5322}[RFC 5322]. -== Syntax + +== Syntax .... message = (fields / obs-fields) - [CRLF body] + [CRLF body] + +fields = *(field-name ":" (field-body / unstructured) CRLF) + +field-name = 1*ftext + +field-body = (*([FWS] VCHAR) *WSP) + +unstructured = (*([FWS] VCHAR) *WSP) / obs-unstruct -fields = *(header-key ":" header-value CRLF) +VCHAR = %d33-126 + +WSP = %d9 / %d32 + ; tab or space + +ftext = %d33-57 / %d59-126 + ; Printable US-ASCII, except %d0-32 and %d58 (":") body = (*(*998text CRLF) *998text) / obs-body @@ -26,15 +41,39 @@ text = %d1-9 / ; Characters excluding CR %d14-127 .... -Each line MUST be no more than 998 characters, and SHOULD be no more than 78 -characters, excluding the CRLF. +* Each line MUST be no more than 998 characters, excluding CRLF. + +* Each line SHOULD be no more than 78 characters, excluding the CRLF. + +* CR and LF MUST only occur together as CRLF; they MUST NOT appear + independently in the body. + +* Each header field SHOULD be treated in its unfolded form for further + syntactic and semantic evaluation. + +* "field-body" MUST NOT include CR and LF except when used in "folding" and + "unfolding". + + +=== Folding White Space and Comments + +.... +CFWS = (1*([FWS] comment) [FWS]) / FWS + +FWS = ([*WSP CRLF] 1*WSP) / obs-FWS + ; Folding white space + +comment = "(" *([FWS] ccontent) [FWS] ")" -CR and LF MUST only occur together as CRLF; they MUST NOT appear -independently in the body. +ccontent = ctext / quoted-pair / comment -`header-key` MUST be composed of printable US-ASCII characters, except colon. -`header-value` MUST NOT include CR and LF except when used in "folding" and -"unfolding". +ctext = %d33-39 / ; Printable US-ASCII + %d42-91 / ; characters not including + %d93-126 / ; "(", ")", or "\" + obs-ctext + +quoted-pair = ("\" (VCHAR / WSP)) / obs-qp +.... _Folding_ is a function to split a line into multiline with CRLF and WSP. For example, the following line, @@ -45,185 +84,253 @@ example, the following line, can be folded into, .... -"Subject: This is" CRLF -WSP "a test" CRLF +"Subject: This" CRLF +WSP "is a test" CRLF +.... + +_Unfolding_ is the process that reverse the output of folding into original +input. + +* An unfolded header field has no length restriction and therefore may be + indeterminately long. + +* Any CRLF that appears in FWS is semantically "invisible". + +* The "\" in any quoted-pair is semantically "invisible". + +* Folding is permitted within the comment. + +* The parentheses and backslash characters may appear in a comment, so long + as they appear as a quoted-pair. + +* Comment is not including the enclosing paretheses. + + +=== Atom + +.... +word = atom / quoted-string + +phrase = 1*word / obs-phrase + +atom = [CFWS] 1*atext [CFWS] + +dot-atom = [CFWS] dot-atom-text [CFWS] + +dot-atom-text = 1*atext *("." 1*atext) + +atext = ALPHA / DIGIT / ; Printable US-ASCII + "!" / "#" / ; characters not including + "$" / "%" / ; specials. Used for atoms. + "&" / "'" / + "*" / "+" / + "-" / "/" / + "=" / "?" / + "^" / "_" / + "`" / "{" / + "|" / "}" / + "~" + +specials = "(" / ")" / ; Special characters that do + "<" / ">" / ; not appear in atext + "[" / "]" / + ":" / ";" / + "@" / "\" / + "," / "." / + DQUOTE +.... + +* The optional comments and FWS surrounding the rest of the characters are + not part of the atom. + + +=== Quoted Strings + +.... +quoted-string = [CFWS] + DQUOTE *([FWS] qcontent) [FWS] DQUOTE + [CFWS] + +qcontent = qtext / quoted-pair + +qtext = %d33 / ; Printable US-ASCII + %d35-91 / ; characters not including + %d93-126 / ; "\" or the quote character + obs-qtext .... -_Unfolding_ is the process that convert the multiline representation into a -single line. -=== Date and Time Specification +=== Date and Time Specification Syntax, .... - date-time = [ day-of-week "," ] date time [CFWS] +date-time = [ day-of-week "," ] date time [CFWS] - day-of-week = ([FWS] day-name) / obs-day-of-week +day-of-week = ([FWS] day-name) / obs-day-of-week - day-name = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" +day-name = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" - date = day month year +date = day month year - day = ([FWS] 1*2DIGIT FWS) / obs-day +day = ([FWS] 1*2DIGIT FWS) / obs-day - month = "Jan" / "Feb" / "Mar" / "Apr" / - "May" / "Jun" / "Jul" / "Aug" / - "Sep" / "Oct" / "Nov" / "Dec" +month = "Jan" / "Feb" / "Mar" / "Apr" / + "May" / "Jun" / "Jul" / "Aug" / + "Sep" / "Oct" / "Nov" / "Dec" - year = (FWS 4*DIGIT FWS) / obs-year +year = (FWS 4*DIGIT FWS) / obs-year - time = time-of-day zone +time = time-of-day zone - time-of-day = hour ":" minute [ ":" second ] +time-of-day = hour ":" minute [ ":" second ] - hour = 2DIGIT / obs-hour +hour = 2DIGIT / obs-hour - minute = 2DIGIT / obs-minute +minute = 2DIGIT / obs-minute - second = 2DIGIT / obs-second +second = 2DIGIT / obs-second - zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone +zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone .... -The date and time-of-day SHOULD express local time. +* The date and time-of-day SHOULD express local time. + +* The form "+0000" on zone SHOULD be used to indicate a time zone at + Universal Time. + +* The form "-0000" on zone indicate that the time was generated on a system + that may be in a local time zone other than Universal Time and that the + date-time contains no information about the local time zone. + +* A date-time specification MUST be semantically valid. + +* The day-of-week MUST be the day implied by the date. -The form "+0000" on zone SHOULD be used to indicate a time zone at Universal -Time. +* The numeric day-of-month MUST be between 1 and the number of days allowed + for the specified month (in the specified year). -The form "-0000" on zone indicate that the time was generated on a system that -may be in a local time zone other than Universal Time and that the date-time -contains no information about the local time zone. +* The time-of-day MUST be in the range 00:00:00 through 23:59:60 (the number + of seconds allowing for a leap second. -A date-time specification MUST be semantically valid. -The day-of-week MUST be the day implied by the date. -The numeric day-of-month MUST be between 1 and the number of days allowed -for the specified month (in the specified year), -The time-of-day MUST be in the range 00:00:00 through 23:59:60 (the number of -seconds allowing for a leap second. -The last two digits of the zone MUST be within the range 00 through 59. +* The last two digits of the zone MUST be within the range 00 through 59. -=== Address Specification + +=== Address Specification An address may either be an individual mailbox, or a group of mailboxes. Format, .... - group-list = mailbox-list / CFWS / obs-group-list +group-list = mailbox-list / CFWS / obs-group-list - mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list +mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list - address-list = (address *("," address)) / obs-addr-list +address-list = (address *("," address)) / obs-addr-list - address = mailbox / group +address = mailbox / group - mailbox = name-addr / addr-spec +mailbox = name-addr / addr-spec - name-addr = [display-name] angle-addr +name-addr = [display-name] angle-addr - angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / - obs-angle-addr +angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / + obs-angle-addr - group = display-name ":" [group-list] ";" [CFWS] +group = display-name ":" [group-list] ";" [CFWS] - display-name = phrase +display-name = phrase - addr-spec = local-part "@" domain +addr-spec = local-part "@" domain - local-part = dot-atom / quoted-string / obs-local-part +local-part = dot-atom / quoted-string / obs-local-part - domain = dot-atom / domain-literal / obs-domain +domain = dot-atom / domain-literal / obs-domain - domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] +domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] - dtext = %d33-90 / ; Printable US-ASCII - %d94-126 / ; characters not including - obs-dtext ; "[", "]", or "\" +dtext = %d33-90 / ; Printable US-ASCII + %d94-126 / ; characters not including + obs-dtext ; "[", "]", or "\" .... -dot-atom form SHOULD be used and the quoted-string form SHOULD NOT be used. -Comments and folding white space SHOULD NOT be used around the "@" in the -addr-spec. +* dot-atom form SHOULD be used, + +* quoted-string form SHOULD NOT be used; + +* Comments and folding white space SHOULD NOT be used around the "@" in the + addr-spec. -== Header +== Header Format, .... - fields = *(trace - *optional-field / - *(resent-date / - resent-from / - resent-sender / - resent-to / - resent-cc / - resent-bcc / - resent-msg-id)) - *(orig-date / - from / - sender / - reply-to / - to / - cc / - bcc / - message-id / - in-reply-to / - references / - subject / - comments / - keywords / - optional-field) - - +----------------+--------+------------+----------------------------+ - | Field | Min | Max number | Notes | - | | number | | | - +----------------+--------+------------+----------------------------+ - | trace | 0 | unlimited | Block prepended - see | - | | | | 3.6.7 | - | resent-date | 0* | unlimited* | One per block, required if | - | | | | other resent fields are | - | | | | present - see 3.6.6 | - | resent-from | 0 | unlimited* | One per block - see 3.6.6 | - | resent-sender | 0* | unlimited* | One per block, MUST occur | - | | | | with multi-address | - | | | | resent-from - see 3.6.6 | - | resent-to | 0 | unlimited* | One per block - see 3.6.6 | - | resent-cc | 0 | unlimited* | One per block - see 3.6.6 | - | resent-bcc | 0 | unlimited* | One per block - see 3.6.6 | - | resent-msg-id | 0 | unlimited* | One per block - see 3.6.6 | - | orig-date | 1 | 1 | | - | from | 1 | 1 | See sender and 3.6.2 | - | sender | 0* | 1 | MUST occur with | - | | | | multi-address from - see | - | | | | 3.6.2 | - | reply-to | 0 | 1 | | - | to | 0 | 1 | | - | cc | 0 | 1 | | - | bcc | 0 | 1 | | - | message-id | 0* | 1 | SHOULD be present - see | - | | | | 3.6.4 | - | in-reply-to | 0* | 1 | SHOULD occur in some | - | | | | replies - see 3.6.4 | - | references | 0* | 1 | SHOULD occur in some | - | | | | replies - see 3.6.4 | - | subject | 0 | 1 | | - | comments | 0 | unlimited | | - | keywords | 0 | unlimited | | - | optional-field | 0 | unlimited | | - +----------------+--------+------------+----------------------------+ +fields = *(trace + *optional-field / + *(resent-date / + resent-from / + resent-sender / + resent-to / + resent-cc / + resent-bcc / + resent-msg-id)) + *(orig-date / + from / + sender / + reply-to / + to / + cc / + bcc / + message-id / + in-reply-to / + references / + subject / + comments / + keywords / + optional-field) .... -Header fields SHOULD NOT be reordered when a message is transported or -transformed. -More importantly, the trace header fields and resent header fields MUST NOT be -reordered, and SHOULD be kept in blocks prepended to the message. +[cols=".<2,.^1,.<1,.<6",options="header"] +|=== +| Field | Min number | Max number | Notes + +| trace | 0 | unlimited | Block prepended - see 3.6.7 +| resent-date | 0* | unlimited* | One per block, required if other resent fields are present - see 3.6.6 +| resent-from | 0 | unlimited* | One per block - see 3.6.6 +| resent-sender | 0* | unlimited* | One per block, MUST occur with multi-address resent-from - see 3.6.6 +| resent-to | 0 | unlimited* | One per block - see 3.6.6 +| resent-cc | 0 | unlimited* | One per block - see 3.6.6 +| resent-bcc | 0 | unlimited* | One per block - see 3.6.6 +| resent-msg-id | 0 | unlimited* | One per block - see 3.6.6 +| orig-date | 1 | 1 | +| from | 1 | 1 | See sender and 3.6.2 +| sender | 0* | 1 | MUST occur withmulti-address from - see 3.6.2 +| reply-to | 0 | 1 | +| to | 0 | 1 | +| cc | 0 | 1 | +| bcc | 0 | 1 | +| message-id | 0* | 1 | SHOULD be present - see 3.6.4 +| in-reply-to | 0* | 1 | SHOULD occur in some replies - see 3.6.4 +| references | 0* | 1 | SHOULD occur in some replies - see 3.6.4 +| subject | 0 | 1 | +| comments | 0 | unlimited | +| keywords | 0 | unlimited | +| optional-field | 0 | unlimited | +|=== -The only required header fields are the "Date" field and the originator -address field(s) (which is "From", "Sender", and "Reply-To"). +* Header fields SHOULD NOT be reordered when a message is transported or + transformed. +* The trace header fields and resent header fields MUST NOT be + reordered, and SHOULD be kept in blocks prepended to the message. -=== Date Field +* The only required header fields are the "Date" field and the originator + address field(s) (which is "From", "Sender", and "Reply-To"). + + +=== Date Field The origination date specifies the date and time at which the creator of the message indicated that the message was complete and ready to enter the mail @@ -233,14 +340,14 @@ delivery system. orig-date = "Date:" date-time CRLF .... -=== Originator Fields +=== Originator Fields .... - from = "From:" mailbox-list CRLF +from = "From:" mailbox-list CRLF - sender = "Sender:" mailbox CRLF +sender = "Sender:" mailbox CRLF - reply-to = "Reply-To:" address-list CRLF +reply-to = "Reply-To:" address-list CRLF .... If the from field contains more than one mailbox, then the sender field MUST @@ -260,14 +367,14 @@ In all cases, the "From:" field SHOULD NOT contain any mailbox that does not belong to the author(s) of the message. -=== Destination Fields +=== Destination Fields .... - to = "To:" address-list CRLF +to = "To:" address-list CRLF - cc = "Cc:" address-list CRLF +cc = "Cc:" address-list CRLF - bcc = "Bcc:" [address-list / CFWS] CRLF +bcc = "Bcc:" [address-list / CFWS] CRLF .... The "To:" field contains the address(es) of the primary recipient(s) of the @@ -284,16 +391,18 @@ revealed to other recipients of the message. There are three ways in which the "Bcc:" field is used, -1. The "Bcc:" line is removed even though all of the recipients (including +. The "Bcc:" line is removed even though all of the recipients (including those specified in the "Bcc:" field) are sent a copy of the message. -2. Recipients specified in the "To:" and "Cc:" lines each are sent + +. Recipients specified in the "To:" and "Cc:" lines each are sent a copy of the message with the "Bcc:" line removed as above, but the recipients on the "Bcc:" line get a separate copy of the message containing a "Bcc:" line. (When there are multiple recipient addresses in the "Bcc:" field, some implementations actually send a separate copy of the message to each recipient with a "Bcc:" containing only the address of that particular recipient.) -3. Since a "Bcc:" field may contain no addresses, a "Bcc:" field can be + +. Since a "Bcc:" field may contain no addresses, a "Bcc:" field can be sent without any addresses indicating to the recipients that blind copies were sent to someone. @@ -302,29 +411,29 @@ to the "Security Considerations" section of this document for a discussion of each. -=== Identification Field - -Every message SHOULD have a "Message-ID:" field. - -Reply messages SHOULD have "In-Reply-To:" and "References:" fields. +=== Identification Field Format, .... - message-id = "Message-ID:" msg-id CRLF +message-id = "Message-ID:" msg-id CRLF - in-reply-to = "In-Reply-To:" 1*msg-id CRLF +in-reply-to = "In-Reply-To:" 1*msg-id CRLF - references = "References:" 1*msg-id CRLF +references = "References:" 1*msg-id CRLF - msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS] +msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS] - id-left = dot-atom-text / obs-id-left +id-left = dot-atom-text / obs-id-left - id-right = dot-atom-text / no-fold-literal / obs-id-right +id-right = dot-atom-text / no-fold-literal / obs-id-right - no-fold-literal = "[" *dtext "]" +no-fold-literal = "[" *dtext "]" .... +* Every message SHOULD have a "Message-ID:" field. + +* Reply messages SHOULD have "In-Reply-To:" and "References:" fields. + msg-id is intended to be machine readable and not necessarily meaningful to humans. @@ -347,14 +456,14 @@ Semantically, the angle bracket characters are not part of the msg-id; the msg-id is what is contained between the two angle bracket characters. -=== Informational Fields +=== Informational Fields .... - subject = "Subject:" unstructured CRLF +subject = "Subject:" unstructured CRLF - comments = "Comments:" unstructured CRLF +comments = "Comments:" unstructured CRLF - keywords = "Keywords:" phrase *("," phrase) CRLF +keywords = "Keywords:" phrase *("," phrase) CRLF .... When used in a reply, the "Subject" body MAY start with the string "Re: " (an @@ -365,134 +474,138 @@ used since use of other strings or more than one instance can lead to undesirable consequences. -=== Resent Fields - -Resent fields SHOULD be added to any message that is reintroduced by -a user into the transport system. -A separate set of resent fields SHOULD be added each time this is done. -All of the resent fields corresponding to a particular resending of the -message SHOULD be grouped together. -Each new set of resent fields is prepended to the message; that is, the most -recent set of resent fields appears earlier in the message. -No other fields in the message are changed when resent fields are added. +=== Resent Fields Each of the resent fields corresponds to a particular field elsewhere in the syntax. .... - resent-date = "Resent-Date:" date-time CRLF +resent-date = "Resent-Date:" date-time CRLF - resent-from = "Resent-From:" mailbox-list CRLF +resent-from = "Resent-From:" mailbox-list CRLF - resent-sender = "Resent-Sender:" mailbox CRLF +resent-sender = "Resent-Sender:" mailbox CRLF - resent-to = "Resent-To:" address-list CRLF +resent-to = "Resent-To:" address-list CRLF - resent-cc = "Resent-Cc:" address-list CRLF +resent-cc = "Resent-Cc:" address-list CRLF - resent-bcc = "Resent-Bcc:" [address-list / CFWS] CRLF +resent-bcc = "Resent-Bcc:" [address-list / CFWS] CRLF - resent-msg-id = "Resent-Message-ID:" msg-id CRLF +resent-msg-id = "Resent-Message-ID:" msg-id CRLF .... -When resent fields are used, the "Resent-From:" and "Resent-Date:" -fields MUST be sent. -The "Resent-Message-ID:" field SHOULD be sent. -"Resent-Sender:" SHOULD NOT be used if "Resent-Sender:" would be identical to -"Resent-From:". +* Resent fields SHOULD be added to any message that is reintroduced by + a user into the transport system. -The "Resent-Message-ID:" field provides a unique identifier for the resent -message. +* A separate set of resent fields SHOULD be added each time this is done. + +* All of the resent fields corresponding to a particular resending of the + message SHOULD be grouped together. + +* Each new set of resent fields is prepended to the message; that is, the + most recent set of resent fields appears earlier in the message. + +* No other fields in the message are changed when resent fields are added. + +* When resent fields are used, the "Resent-From:" and "Resent-Date:" + fields MUST be sent. + +* The "Resent-Message-ID:" field SHOULD be sent. -=== Trace Fields +* "Resent-Sender:" SHOULD NOT be used if "Resent-Sender:" would be identical + to "Resent-From:". + +* The "Resent-Message-ID:" field provides a unique identifier for the resent + message. + + +=== Trace Fields .... - trace = [return] - 1*received +trace = [return] 1*received - return = "Return-Path:" path CRLF +return = "Return-Path:" path CRLF - path = angle-addr / ([CFWS] "<" [CFWS] ">" [CFWS]) +path = angle-addr / ([CFWS] "<" [CFWS] ">" [CFWS]) - received = "Received:" *received-token ";" date-time CRLF +received = "Received:" *received-token ";" date-time CRLF - received-token = word / angle-addr / addr-spec / domain +received-token = word / angle-addr / addr-spec / domain .... -=== Optional Fields +=== Optional Fields The field names of any optional field MUST NOT be identical to any field name specified elsewhere in this document. .... - optional-field = field-name ":" unstructured CRLF - - field-name = 1*ftext - - ftext = %d33-57 / ; Printable US-ASCII - %d59-126 ; characters not including - ; ":". +optional-field = field-name ":" unstructured CRLF .... -== Obsolete Specification +== Obsolete Specification -=== Obsolete Date and Time +=== Obsolete Date and Time The syntax for the obsolete date format allows -1. a 2 digit year in the date field, and -2. alphabetic time zone specifiers +. a 2 digit year in the date field, and +. alphabetic time zone specifiers Where a two or three digit year occurs in a date, the year is to be interpreted as follows: -1. If a two digit year is encountered whose value is between 00 and 49, the -year is interpreted by adding 2000, ending up with a value between 2000 and -2049. +. If a two digit year is encountered whose value is between 00 and 49, the + year is interpreted by adding 2000, ending up with a value between 2000 and + 2049. -2. If a two digit year is encountered with a value between 50 and 99, or any -three digit year is encountered, the year is interpreted by adding 1900. +. If a two digit year is encountered with a value between 50 and 99, or any + three digit year is encountered, the year is interpreted by adding 1900. Obsolete zones, - EDT is semantically equivalent to -0400 - EST is semantically equivalent to -0500 - CDT is semantically equivalent to -0500 - CST is semantically equivalent to -0600 - MDT is semantically equivalent to -0600 - MST is semantically equivalent to -0700 - PDT is semantically equivalent to -0700 - PST is semantically equivalent to -0800 +.... +EDT is semantically equivalent to -0400 +EST is semantically equivalent to -0500 +CDT is semantically equivalent to -0500 +CST is semantically equivalent to -0600 +MDT is semantically equivalent to -0600 +MST is semantically equivalent to -0700 +PDT is semantically equivalent to -0700 +PST is semantically equivalent to -0800 +.... However, because of the error in [RFC0822], any time zones SHOULD all be considered equivalent to "-0000" unless there is out-of-band information confirming their meaning. -=== Obsolete Addressing +=== Obsolete Addressing There are four primary differences in addressing. -1. mailbox addresses were allowed to have a route portion before the -addr-spec when enclosed in "<" and ">". -The route is simply a comma-separated list of domain names, each preceded by -"@", and the list terminated by a colon. +. mailbox addresses were allowed to have a route portion before the + addr-spec when enclosed in "<" and ">". + The route is simply a comma-separated list of domain names, each preceded + by "@", and the list terminated by a colon. + +. CFWS were allowed between the period-separated elements of local-part and + domain (i.e., dot-atom was not used). + In addition, local-part is allowed to contain quoted-string in addition to + just atom. + +. mailbox-list and address-list were allowed to have "null" members. + That is, there could be two or more commas in such a list with nothing in + between them, or commas at the beginning or end of the list. -2. CFWS were allowed between the period-separated elements of local-part and -domain (i.e., dot-atom was not used). -In addition, local-part is allowed to contain quoted-string in addition to -just atom. +. US-ASCII control characters and quoted-pairs were allowed in domain + literals and are added here. -3. mailbox-list and address-list were allowed to have "null" members. -That is, there could be two or more commas in such a list with nothing in -between them, or commas at the beginning or end of the list. -4. US-ASCII control characters and quoted-pairs were allowed in domain literals and are added here. +=== Obsolete Header Fields +* Allows multiple occurrences of any of the fields. -=== Obsolete Header Fields +* Fields may occur in any order. -Syntactically, the primary difference in the obsolete field syntax is -that it allows multiple occurrences of any of the fields and they may -occur in any order. -Also, any amount of white space is allowed before the ":" at the end of the -field name. +* Any amount of white space is allowed before the ":" at the end of the + field name. diff --git a/doc/IMF.html b/doc/IMF.html index 909bead3..3d7cfdd8 100644 --- a/doc/IMF.html +++ b/doc/IMF.html @@ -21,8 +21,11 @@ <ul class="sectlevel1"> <li><a href="#_syntax">1. Syntax</a> <ul class="sectlevel2"> -<li><a href="#_date_and_time_specification">1.1. Date and Time Specification</a></li> -<li><a href="#_address_specification">1.2. Address Specification</a></li> +<li><a href="#_folding_white_space_and_comments">1.1. Folding White Space and Comments</a></li> +<li><a href="#_atom">1.2. Atom</a></li> +<li><a href="#_quoted_strings">1.3. Quoted Strings</a></li> +<li><a href="#_date_and_time_specification">1.4. Date and Time Specification</a></li> +<li><a href="#_address_specification">1.5. Address Specification</a></li> </ul> </li> <li><a href="#_header">2. Header</a> @@ -62,9 +65,23 @@ Message Format as defined in <a href="https://tools.ietf.org/html/rfc5322">RFC 5 <div class="literalblock"> <div class="content"> <pre>message = (fields / obs-fields) - [CRLF body] + [CRLF body] -fields = *(header-key ":" header-value CRLF) +fields = *(field-name ":" (field-body / unstructured) CRLF) + +field-name = 1*ftext + +field-body = (*([FWS] VCHAR) *WSP) + +unstructured = (*([FWS] VCHAR) *WSP) / obs-unstruct + +VCHAR = %d33-126 + +WSP = %d9 / %d32 + ; tab or space + +ftext = %d33-57 / %d59-126 + ; Printable US-ASCII, except %d0-32 and %d58 (":") body = (*(*998text CRLF) *998text) / obs-body @@ -74,18 +91,48 @@ text = %d1-9 / ; Characters excluding CR %d14-127</pre> </div> </div> -<div class="paragraph"> -<p>Each line MUST be no more than 998 characters, and SHOULD be no more than 78 -characters, excluding the CRLF.</p> -</div> -<div class="paragraph"> +<div class="ulist"> +<ul> +<li> +<p>Each line MUST be no more than 998 characters, excluding CRLF.</p> +</li> +<li> +<p>Each line SHOULD be no more than 78 characters, excluding the CRLF.</p> +</li> +<li> <p>CR and LF MUST only occur together as CRLF; they MUST NOT appear independently in the body.</p> -</div> -<div class="paragraph"> -<p><code>header-key</code> MUST be composed of printable US-ASCII characters, except colon. -<code>header-value</code> MUST NOT include CR and LF except when used in "folding" and +</li> +<li> +<p>Each header field SHOULD be treated in its unfolded form for further +syntactic and semantic evaluation.</p> +</li> +<li> +<p>"field-body" MUST NOT include CR and LF except when used in "folding" and "unfolding".</p> +</li> +</ul> +</div> +<div class="sect2"> +<h3 id="_folding_white_space_and_comments">1.1. Folding White Space and Comments</h3> +<div class="literalblock"> +<div class="content"> +<pre>CFWS = (1*([FWS] comment) [FWS]) / FWS + +FWS = ([*WSP CRLF] 1*WSP) / obs-FWS + ; Folding white space + +comment = "(" *([FWS] ccontent) [FWS] ")" + +ccontent = ctext / quoted-pair / comment + +ctext = %d33-39 / ; Printable US-ASCII + %d42-91 / ; characters not including + %d93-126 / ; "(", ")", or "\" + obs-ctext + +quoted-pair = ("\" (VCHAR / WSP)) / obs-qp</pre> +</div> </div> <div class="paragraph"> <p><em>Folding</em> is a function to split a line into multiline with CRLF and WSP. For @@ -101,74 +148,172 @@ example, the following line,</p> </div> <div class="literalblock"> <div class="content"> -<pre>"Subject: This is" CRLF -WSP "a test" CRLF</pre> +<pre>"Subject: This" CRLF +WSP "is a test" CRLF</pre> </div> </div> <div class="paragraph"> -<p><em>Unfolding</em> is the process that convert the multiline representation into a -single line.</p> +<p><em>Unfolding</em> is the process that reverse the output of folding into original +input.</p> +</div> +<div class="ulist"> +<ul> +<li> +<p>An unfolded header field has no length restriction and therefore may be +indeterminately long.</p> +</li> +<li> +<p>Any CRLF that appears in FWS is semantically "invisible".</p> +</li> +<li> +<p>The "\" in any quoted-pair is semantically "invisible".</p> +</li> +<li> +<p>Folding is permitted within the comment.</p> +</li> +<li> +<p>The parentheses and backslash characters may appear in a comment, so long +as they appear as a quoted-pair.</p> +</li> +<li> +<p>Comment is not including the enclosing paretheses.</p> +</li> +</ul> +</div> </div> <div class="sect2"> -<h3 id="_date_and_time_specification">1.1. Date and Time Specification</h3> +<h3 id="_atom">1.2. Atom</h3> +<div class="literalblock"> +<div class="content"> +<pre>word = atom / quoted-string + +phrase = 1*word / obs-phrase + +atom = [CFWS] 1*atext [CFWS] + +dot-atom = [CFWS] dot-atom-text [CFWS] + +dot-atom-text = 1*atext *("." 1*atext) + +atext = ALPHA / DIGIT / ; Printable US-ASCII + "!" / "#" / ; characters not including + "$" / "%" / ; specials. Used for atoms. + "&" / "'" / + "*" / "+" / + "-" / "/" / + "=" / "?" / + "^" / "_" / + "`" / "{" / + "|" / "}" / + "~" + +specials = "(" / ")" / ; Special characters that do + "<" / ">" / ; not appear in atext + "[" / "]" / + ":" / ";" / + "@" / "\" / + "," / "." / + DQUOTE</pre> +</div> +</div> +<div class="ulist"> +<ul> +<li> +<p>The optional comments and FWS surrounding the rest of the characters are +not part of the atom.</p> +</li> +</ul> +</div> +</div> +<div class="sect2"> +<h3 id="_quoted_strings">1.3. Quoted Strings</h3> +<div class="literalblock"> +<div class="content"> +<pre>quoted-string = [CFWS] + DQUOTE *([FWS] qcontent) [FWS] DQUOTE + [CFWS] + +qcontent = qtext / quoted-pair + +qtext = %d33 / ; Printable US-ASCII + %d35-91 / ; characters not including + %d93-126 / ; "\" or the quote character + obs-qtext</pre> +</div> +</div> +</div> +<div class="sect2"> +<h3 id="_date_and_time_specification">1.4. Date and Time Specification</h3> <div class="paragraph"> <p>Syntax,</p> </div> <div class="literalblock"> <div class="content"> -<pre> date-time = [ day-of-week "," ] date time [CFWS] +<pre>date-time = [ day-of-week "," ] date time [CFWS] - day-of-week = ([FWS] day-name) / obs-day-of-week +day-of-week = ([FWS] day-name) / obs-day-of-week - day-name = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" +day-name = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" - date = day month year +date = day month year - day = ([FWS] 1*2DIGIT FWS) / obs-day +day = ([FWS] 1*2DIGIT FWS) / obs-day - month = "Jan" / "Feb" / "Mar" / "Apr" / - "May" / "Jun" / "Jul" / "Aug" / - "Sep" / "Oct" / "Nov" / "Dec" +month = "Jan" / "Feb" / "Mar" / "Apr" / + "May" / "Jun" / "Jul" / "Aug" / + "Sep" / "Oct" / "Nov" / "Dec" - year = (FWS 4*DIGIT FWS) / obs-year +year = (FWS 4*DIGIT FWS) / obs-year - time = time-of-day zone +time = time-of-day zone - time-of-day = hour ":" minute [ ":" second ] +time-of-day = hour ":" minute [ ":" second ] - hour = 2DIGIT / obs-hour +hour = 2DIGIT / obs-hour - minute = 2DIGIT / obs-minute +minute = 2DIGIT / obs-minute - second = 2DIGIT / obs-second +second = 2DIGIT / obs-second - zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone</pre> +zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone</pre> </div> </div> -<div class="paragraph"> +<div class="ulist"> +<ul> +<li> <p>The date and time-of-day SHOULD express local time.</p> -</div> -<div class="paragraph"> -<p>The form "+0000" on zone SHOULD be used to indicate a time zone at Universal -Time.</p> -</div> -<div class="paragraph"> -<p>The form "-0000" on zone indicate that the time was generated on a system that -may be in a local time zone other than Universal Time and that the date-time -contains no information about the local time zone.</p> -</div> -<div class="paragraph"> -<p>A date-time specification MUST be semantically valid. -The day-of-week MUST be the day implied by the date. -The numeric day-of-month MUST be between 1 and the number of days allowed -for the specified month (in the specified year), -The time-of-day MUST be in the range 00:00:00 through 23:59:60 (the number of -seconds allowing for a leap second. -The last two digits of the zone MUST be within the range 00 through 59.</p> +</li> +<li> +<p>The form "+0000" on zone SHOULD be used to indicate a time zone at +Universal Time.</p> +</li> +<li> +<p>The form "-0000" on zone indicate that the time was generated on a system +that may be in a local time zone other than Universal Time and that the +date-time contains no information about the local time zone.</p> +</li> +<li> +<p>A date-time specification MUST be semantically valid.</p> +</li> +<li> +<p>The day-of-week MUST be the day implied by the date.</p> +</li> +<li> +<p>The numeric day-of-month MUST be between 1 and the number of days allowed +for the specified month (in the specified year).</p> +</li> +<li> +<p>The time-of-day MUST be in the range 00:00:00 through 23:59:60 (the number +of seconds allowing for a leap second.</p> +</li> +<li> +<p>The last two digits of the zone MUST be within the range 00 through 59.</p> +</li> +</ul> </div> </div> <div class="sect2"> -<h3 id="_address_specification">1.2. Address Specification</h3> +<h3 id="_address_specification">1.5. Address Specification</h3> <div class="paragraph"> <p>An address may either be an individual mailbox, or a group of mailboxes.</p> </div> @@ -177,42 +322,51 @@ The last two digits of the zone MUST be within the range 00 through 59.</p> </div> <div class="literalblock"> <div class="content"> -<pre> group-list = mailbox-list / CFWS / obs-group-list +<pre>group-list = mailbox-list / CFWS / obs-group-list - mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list +mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list - address-list = (address *("," address)) / obs-addr-list +address-list = (address *("," address)) / obs-addr-list - address = mailbox / group +address = mailbox / group - mailbox = name-addr / addr-spec +mailbox = name-addr / addr-spec - name-addr = [display-name] angle-addr +name-addr = [display-name] angle-addr - angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / - obs-angle-addr +angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / + obs-angle-addr - group = display-name ":" [group-list] ";" [CFWS] +group = display-name ":" [group-list] ";" [CFWS] - display-name = phrase +display-name = phrase - addr-spec = local-part "@" domain +addr-spec = local-part "@" domain - local-part = dot-atom / quoted-string / obs-local-part +local-part = dot-atom / quoted-string / obs-local-part - domain = dot-atom / domain-literal / obs-domain +domain = dot-atom / domain-literal / obs-domain - domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] +domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] - dtext = %d33-90 / ; Printable US-ASCII - %d94-126 / ; characters not including - obs-dtext ; "[", "]", or "\"</pre> +dtext = %d33-90 / ; Printable US-ASCII + %d94-126 / ; characters not including + obs-dtext ; "[", "]", or "\"</pre> </div> </div> -<div class="paragraph"> -<p>dot-atom form SHOULD be used and the quoted-string form SHOULD NOT be used. -Comments and folding white space SHOULD NOT be used around the "@" in the +<div class="ulist"> +<ul> +<li> +<p>dot-atom form SHOULD be used,</p> +</li> +<li> +<p>quoted-string form SHOULD NOT be used;</p> +</li> +<li> +<p>Comments and folding white space SHOULD NOT be used around the "@" in the addr-spec.</p> +</li> +</ul> </div> </div> </div> @@ -225,78 +379,196 @@ addr-spec.</p> </div> <div class="literalblock"> <div class="content"> -<pre> fields = *(trace - *optional-field / - *(resent-date / - resent-from / - resent-sender / - resent-to / - resent-cc / - resent-bcc / - resent-msg-id)) - *(orig-date / - from / - sender / - reply-to / - to / - cc / - bcc / - message-id / - in-reply-to / - references / - subject / - comments / - keywords / - optional-field) - - +----------------+--------+------------+----------------------------+ - | Field | Min | Max number | Notes | - | | number | | | - +----------------+--------+------------+----------------------------+ - | trace | 0 | unlimited | Block prepended - see | - | | | | 3.6.7 | - | resent-date | 0* | unlimited* | One per block, required if | - | | | | other resent fields are | - | | | | present - see 3.6.6 | - | resent-from | 0 | unlimited* | One per block - see 3.6.6 | - | resent-sender | 0* | unlimited* | One per block, MUST occur | - | | | | with multi-address | - | | | | resent-from - see 3.6.6 | - | resent-to | 0 | unlimited* | One per block - see 3.6.6 | - | resent-cc | 0 | unlimited* | One per block - see 3.6.6 | - | resent-bcc | 0 | unlimited* | One per block - see 3.6.6 | - | resent-msg-id | 0 | unlimited* | One per block - see 3.6.6 | - | orig-date | 1 | 1 | | - | from | 1 | 1 | See sender and 3.6.2 | - | sender | 0* | 1 | MUST occur with | - | | | | multi-address from - see | - | | | | 3.6.2 | - | reply-to | 0 | 1 | | - | to | 0 | 1 | | - | cc | 0 | 1 | | - | bcc | 0 | 1 | | - | message-id | 0* | 1 | SHOULD be present - see | - | | | | 3.6.4 | - | in-reply-to | 0* | 1 | SHOULD occur in some | - | | | | replies - see 3.6.4 | - | references | 0* | 1 | SHOULD occur in some | - | | | | replies - see 3.6.4 | - | subject | 0 | 1 | | - | comments | 0 | unlimited | | - | keywords | 0 | unlimited | | - | optional-field | 0 | unlimited | | - +----------------+--------+------------+----------------------------+</pre> +<pre>fields = *(trace + *optional-field / + *(resent-date / + resent-from / + resent-sender / + resent-to / + resent-cc / + resent-bcc / + resent-msg-id)) + *(orig-date / + from / + sender / + reply-to / + to / + cc / + bcc / + message-id / + in-reply-to / + references / + subject / + comments / + keywords / + optional-field)</pre> </div> </div> -<div class="paragraph"> +<table class="tableblock frame-all grid-all stretch"> +<colgroup> +<col style="width: 20%;"> +<col style="width: 10%;"> +<col style="width: 10%;"> +<col style="width: 60%;"> +</colgroup> +<thead> +<tr> +<th class="tableblock halign-left valign-top">Field</th> +<th class="tableblock halign-left valign-middle">Min number</th> +<th class="tableblock halign-left valign-top">Max number</th> +<th class="tableblock halign-left valign-top">Notes</th> +</tr> +</thead> +<tbody> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">trace</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">Block prepended - see 3.6.7</p></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">resent-date</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0*</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited*</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">One per block, required if other resent fields are present - see 3.6.6</p></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">resent-from</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited*</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">One per block - see 3.6.6</p></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">resent-sender</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0*</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited*</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">One per block, MUST occur with multi-address resent-from - see 3.6.6</p></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">resent-to</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited*</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">One per block - see 3.6.6</p></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">resent-cc</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited*</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">One per block - see 3.6.6</p></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">resent-bcc</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited*</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">One per block - see 3.6.6</p></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">resent-msg-id</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited*</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">One per block - see 3.6.6</p></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">orig-date</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">1</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td> +<td class="tableblock halign-left valign-top"></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">from</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">1</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">See sender and 3.6.2</p></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">sender</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0*</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">MUST occur withmulti-address from - see 3.6.2</p></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">reply-to</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td> +<td class="tableblock halign-left valign-top"></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">to</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td> +<td class="tableblock halign-left valign-top"></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">cc</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td> +<td class="tableblock halign-left valign-top"></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">bcc</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td> +<td class="tableblock halign-left valign-top"></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">message-id</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0*</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">SHOULD be present - see 3.6.4</p></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">in-reply-to</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0*</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">SHOULD occur in some replies - see 3.6.4</p></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">references</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0*</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">SHOULD occur in some replies - see 3.6.4</p></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">subject</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td> +<td class="tableblock halign-left valign-top"></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">comments</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited</p></td> +<td class="tableblock halign-left valign-top"></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">keywords</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited</p></td> +<td class="tableblock halign-left valign-top"></td> +</tr> +<tr> +<td class="tableblock halign-left valign-top"><p class="tableblock">optional-field</p></td> +<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td> +<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited</p></td> +<td class="tableblock halign-left valign-top"></td> +</tr> +</tbody> +</table> +<div class="ulist"> +<ul> +<li> <p>Header fields SHOULD NOT be reordered when a message is transported or -transformed. -More importantly, the trace header fields and resent header fields MUST NOT be +transformed.</p> +</li> +<li> +<p>The trace header fields and resent header fields MUST NOT be reordered, and SHOULD be kept in blocks prepended to the message.</p> -</div> -<div class="paragraph"> +</li> +<li> <p>The only required header fields are the "Date" field and the originator address field(s) (which is "From", "Sender", and "Reply-To").</p> +</li> +</ul> </div> <div class="sect2"> <h3 id="_date_field">2.1. Date Field</h3> @@ -315,11 +587,11 @@ delivery system.</p> <h3 id="_originator_fields">2.2. Originator Fields</h3> <div class="literalblock"> <div class="content"> -<pre> from = "From:" mailbox-list CRLF +<pre>from = "From:" mailbox-list CRLF - sender = "Sender:" mailbox CRLF +sender = "Sender:" mailbox CRLF - reply-to = "Reply-To:" address-list CRLF</pre> +reply-to = "Reply-To:" address-list CRLF</pre> </div> </div> <div class="paragraph"> @@ -347,11 +619,11 @@ belong to the author(s) of the message.</p> <h3 id="_destination_fields">2.3. Destination Fields</h3> <div class="literalblock"> <div class="content"> -<pre> to = "To:" address-list CRLF +<pre>to = "To:" address-list CRLF - cc = "Cc:" address-list CRLF +cc = "Cc:" address-list CRLF - bcc = "Bcc:" [address-list / CFWS] CRLF</pre> +bcc = "Bcc:" [address-list / CFWS] CRLF</pre> </div> </div> <div class="paragraph"> @@ -403,30 +675,34 @@ each.</p> <div class="sect2"> <h3 id="_identification_field">2.4. Identification Field</h3> <div class="paragraph"> -<p>Every message SHOULD have a "Message-ID:" field.</p> -</div> -<div class="paragraph"> -<p>Reply messages SHOULD have "In-Reply-To:" and "References:" fields.</p> -</div> -<div class="paragraph"> <p>Format,</p> </div> <div class="literalblock"> <div class="content"> -<pre> message-id = "Message-ID:" msg-id CRLF +<pre>message-id = "Message-ID:" msg-id CRLF - in-reply-to = "In-Reply-To:" 1*msg-id CRLF +in-reply-to = "In-Reply-To:" 1*msg-id CRLF - references = "References:" 1*msg-id CRLF +references = "References:" 1*msg-id CRLF - msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS] +msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS] - id-left = dot-atom-text / obs-id-left +id-left = dot-atom-text / obs-id-left - id-right = dot-atom-text / no-fold-literal / obs-id-right +id-right = dot-atom-text / no-fold-literal / obs-id-right - no-fold-literal = "[" *dtext "]"</pre> +no-fold-literal = "[" *dtext "]"</pre> +</div> </div> +<div class="ulist"> +<ul> +<li> +<p>Every message SHOULD have a "Message-ID:" field.</p> +</li> +<li> +<p>Reply messages SHOULD have "In-Reply-To:" and "References:" fields.</p> +</li> +</ul> </div> <div class="paragraph"> <p>msg-id is intended to be machine readable and not necessarily meaningful to @@ -460,11 +736,11 @@ msg-id is what is contained between the two angle bracket characters.</p> <h3 id="_informational_fields">2.5. Informational Fields</h3> <div class="literalblock"> <div class="content"> -<pre> subject = "Subject:" unstructured CRLF +<pre>subject = "Subject:" unstructured CRLF - comments = "Comments:" unstructured CRLF +comments = "Comments:" unstructured CRLF - keywords = "Keywords:" phrase *("," phrase) CRLF</pre> +keywords = "Keywords:" phrase *("," phrase) CRLF</pre> </div> </div> <div class="paragraph"> @@ -479,62 +755,77 @@ undesirable consequences.</p> <div class="sect2"> <h3 id="_resent_fields">2.6. Resent Fields</h3> <div class="paragraph"> -<p>Resent fields SHOULD be added to any message that is reintroduced by -a user into the transport system. -A separate set of resent fields SHOULD be added each time this is done. -All of the resent fields corresponding to a particular resending of the -message SHOULD be grouped together. -Each new set of resent fields is prepended to the message; that is, the most -recent set of resent fields appears earlier in the message. -No other fields in the message are changed when resent fields are added.</p> -</div> -<div class="paragraph"> <p>Each of the resent fields corresponds to a particular field elsewhere in the syntax.</p> </div> <div class="literalblock"> <div class="content"> -<pre> resent-date = "Resent-Date:" date-time CRLF +<pre>resent-date = "Resent-Date:" date-time CRLF - resent-from = "Resent-From:" mailbox-list CRLF +resent-from = "Resent-From:" mailbox-list CRLF - resent-sender = "Resent-Sender:" mailbox CRLF +resent-sender = "Resent-Sender:" mailbox CRLF - resent-to = "Resent-To:" address-list CRLF +resent-to = "Resent-To:" address-list CRLF - resent-cc = "Resent-Cc:" address-list CRLF +resent-cc = "Resent-Cc:" address-list CRLF - resent-bcc = "Resent-Bcc:" [address-list / CFWS] CRLF +resent-bcc = "Resent-Bcc:" [address-list / CFWS] CRLF - resent-msg-id = "Resent-Message-ID:" msg-id CRLF</pre> +resent-msg-id = "Resent-Message-ID:" msg-id CRLF</pre> </div> </div> -<div class="paragraph"> +<div class="ulist"> +<ul> +<li> +<p>Resent fields SHOULD be added to any message that is reintroduced by +a user into the transport system.</p> +</li> +<li> +<p>A separate set of resent fields SHOULD be added each time this is done.</p> +</li> +<li> +<p>All of the resent fields corresponding to a particular resending of the +message SHOULD be grouped together.</p> +</li> +<li> +<p>Each new set of resent fields is prepended to the message; that is, the +most recent set of resent fields appears earlier in the message.</p> +</li> +<li> +<p>No other fields in the message are changed when resent fields are added.</p> +</li> +<li> <p>When resent fields are used, the "Resent-From:" and "Resent-Date:" -fields MUST be sent. -The "Resent-Message-ID:" field SHOULD be sent. -"Resent-Sender:" SHOULD NOT be used if "Resent-Sender:" would be identical to -"Resent-From:".</p> -</div> -<div class="paragraph"> +fields MUST be sent.</p> +</li> +<li> +<p>The "Resent-Message-ID:" field SHOULD be sent.</p> +</li> +<li> +<p>"Resent-Sender:" SHOULD NOT be used if "Resent-Sender:" would be identical +to "Resent-From:".</p> +</li> +<li> <p>The "Resent-Message-ID:" field provides a unique identifier for the resent message.</p> +</li> +</ul> </div> </div> <div class="sect2"> <h3 id="_trace_fields">2.7. Trace Fields</h3> <div class="literalblock"> <div class="content"> -<pre> trace = [return] - 1*received +<pre>trace = [return] 1*received - return = "Return-Path:" path CRLF +return = "Return-Path:" path CRLF - path = angle-addr / ([CFWS] "<" [CFWS] ">" [CFWS]) +path = angle-addr / ([CFWS] "<" [CFWS] ">" [CFWS]) - received = "Received:" *received-token ";" date-time CRLF +received = "Received:" *received-token ";" date-time CRLF - received-token = word / angle-addr / addr-spec / domain</pre> +received-token = word / angle-addr / addr-spec / domain</pre> </div> </div> </div> @@ -546,13 +837,7 @@ specified elsewhere in this document.</p> </div> <div class="literalblock"> <div class="content"> -<pre> optional-field = field-name ":" unstructured CRLF - - field-name = 1*ftext - - ftext = %d33-57 / ; Printable US-ASCII - %d59-126 ; characters not including - ; ":".</pre> +<pre>optional-field = field-name ":" unstructured CRLF</pre> </div> </div> </div> @@ -624,8 +909,8 @@ confirming their meaning.</p> <li> <p>mailbox addresses were allowed to have a route portion before the addr-spec when enclosed in "<" and ">". -The route is simply a comma-separated list of domain names, each preceded by -"@", and the list terminated by a colon.</p> +The route is simply a comma-separated list of domain names, each preceded +by "@", and the list terminated by a colon.</p> </li> <li> <p>CFWS were allowed between the period-separated elements of local-part and @@ -639,19 +924,27 @@ That is, there could be two or more commas in such a list with nothing in between them, or commas at the beginning or end of the list.</p> </li> <li> -<p>US-ASCII control characters and quoted-pairs were allowed in domain literals and are added here.</p> +<p>US-ASCII control characters and quoted-pairs were allowed in domain +literals and are added here.</p> </li> </ol> </div> </div> <div class="sect2"> <h3 id="_obsolete_header_fields">3.3. Obsolete Header Fields</h3> -<div class="paragraph"> -<p>Syntactically, the primary difference in the obsolete field syntax is -that it allows multiple occurrences of any of the fields and they may -occur in any order. -Also, any amount of white space is allowed before the ":" at the end of the +<div class="ulist"> +<ul> +<li> +<p>Allows multiple occurrences of any of the fields.</p> +</li> +<li> +<p>Fields may occur in any order.</p> +</li> +<li> +<p>Any amount of white space is allowed before the ":" at the end of the field name.</p> +</li> +</ul> </div> </div> </div> @@ -659,7 +952,7 @@ field name.</p> </div> <div id="footer"> <div id="footer-text"> -Last updated 2018-12-31 20:35:09 +0700 +Last updated 2019-02-02 02:24:54 +0700 </div> </div> </body> diff --git a/lib/email/body.go b/lib/email/body.go new file mode 100644 index 00000000..c1e3d31f --- /dev/null +++ b/lib/email/body.go @@ -0,0 +1,23 @@ +// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package email + +// +// Body represent multiple message body. +// +type Body struct { + // + // We are not using map here it to prevent the body parts being reordeded when + // packing the message back into raw format. + // + mimes []*MIME // nolint: structcheck,unused +} + +// +// Unpack the message's body using boundary. +// +func (body *Body) Unpack(raw, boundary []byte) ([]byte, error) { + return raw, nil +} diff --git a/lib/email/doc.go b/lib/email/doc.go new file mode 100644 index 00000000..34b959a8 --- /dev/null +++ b/lib/email/doc.go @@ -0,0 +1,9 @@ +// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +// Package email provide a library for working with Internet Message Format as +// defined by RFC 5322. +// +package email diff --git a/lib/email/email.go b/lib/email/email.go new file mode 100644 index 00000000..64bf4e16 --- /dev/null +++ b/lib/email/email.go @@ -0,0 +1,33 @@ +// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package email + +var ( // nolint: gochecknoglobals + crlf = []byte{'\r', '\n'} +) + +// +// Email represent an internet message. +// +type Email struct { + Header Header + Body Body +} + +// +// Unpack the raw message header and body. +// +func (email *Email) Unpack(raw []byte) ([]byte, error) { + var err error + + raw, err = email.Header.Unpack(raw) + if err != nil { + return raw, err + } + + raw, err = email.Body.Unpack(raw, nil) + + return raw, err +} diff --git a/lib/email/field.go b/lib/email/field.go new file mode 100644 index 00000000..cdf7b90a --- /dev/null +++ b/lib/email/field.go @@ -0,0 +1,363 @@ +// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package email + +import ( + "bytes" + "fmt" + "time" + + libbytes "github.com/shuLhan/share/lib/bytes" + libio "github.com/shuLhan/share/lib/io" + libtime "github.com/shuLhan/share/lib/time" +) + +var ( + FieldNameDate = []byte("date") +) + +// +// Field represent field name and value in header. +// +type Field struct { + // Type of field, the numeric representation of field name. + Type FieldType + + // Name contains "relaxed" canonicalization of field name. + Name []byte + // Value contains "relaxed" canonicalization of field value. + Value []byte + + // oriName contains "simple" canonicalization of field name. + oriName []byte + // oriValue contains "simple" canonicalization of field value. + oriValue []byte + + date *time.Time +} + +// +// ParseField create and initialize Field by parsing a single line message +// header field from raw input. +// +// If raw input contains multiple lines, the rest of lines will be returned. +// +// On error, it will return nil Field, and rest will contains the beginning of +// invalid input. +// +func ParseField(raw []byte) (field *Field, rest []byte, err error) { // nolint: gocyclo + if len(raw) == 0 { + return nil, nil, nil + } + + field = &Field{} + isFolded := false + start := 0 + + // Get field's name. + // Valid values: %d33-57 / %d59-126 . + x := 0 + for ; x < len(raw); x++ { + if raw[x] == ' ' || raw[x] == ':' { + break + } + if raw[x] < 33 || raw[x] > 126 { + goto invalid + } + } + if len(raw) == x { + goto invalid + } + + // Skip spaces before ':'. + for ; x < len(raw) && raw[x] == ' '; x++ { + } + if len(raw) == x { + goto invalid + } + if raw[x] != ':' { + goto invalid + } + + field.SetName(raw[:x]) + x++ + start = x + + // Skip WSP after ':'. + for ; x < len(raw) && (raw[x] == '\t' || raw[x] == ' '); x++ { + } + if len(raw) == x { + goto invalid + } + + // Get field's value. + // Valid values: WSP / %d33-126 . + for ; x < len(raw); x++ { + for ; x < len(raw); x++ { + if raw[x] == '\t' || raw[x] == ' ' { + continue + } + if raw[x] == '\r' { + x++ + break + } + if raw[x] < 33 || raw[x] > 126 { + goto invalid + } + } + if x == len(raw) || raw[x] != '\n' { + goto invalid + } + if x++; x == len(raw) { + break + } + + // Unfolding ... + if raw[x] == '\t' || raw[x] == ' ' { + isFolded = true + continue + } + break + } + if !isFolded && x > 1000 { + err = fmt.Errorf("ParseField: line greater than 998 characters") + return nil, nil, err + } + + field.SetValue(raw[start:x]) + + if len(field.Value) == 0 { + goto invalid + } + + if len(raw) > x { + rest = raw[x:] + } + + return field, rest, nil + +invalid: + if x < len(raw) { + err = fmt.Errorf("ParseField: invalid character at index %d", x) + rest = raw[x:] + } else { + err = fmt.Errorf("ParseField: invalid input") + } + return nil, rest, err +} + +// +// SetName set field Name by canonicalizing raw field name using "simple" and +// "relaxed" algorithms. +//. +// "simple" algorithm store raw field name as is. +// +// "relaxed" algorithm convert field name to lowercase and removing trailing +// whitespaces. +// +func (field *Field) SetName(raw []byte) { + field.oriName = raw + field.Name = make([]byte, 0, len(raw)) + for x := 0; x < len(raw); x++ { + if raw[x] == ' ' || raw[x] < 33 || raw[x] > 126 { + break + } + if raw[x] >= 'A' && raw[x] <= 'Z' { + field.Name = append(field.Name, raw[x]+32) + } else { + field.Name = append(field.Name, raw[x]) + } + } + field.updateType() +} + +// +// SetValue set the field Value by canonicalizing raw input using "simple" and +// "relaxed" algorithms. +// +// "simple" algorithm store raw field value as is. +// +// "relaxed" algorithm remove leading and trailing WSP, replacing all +// CFWS with single space, but not removing CRLF at end. +// +func (field *Field) SetValue(raw []byte) { + field.oriValue = raw + field.Value = make([]byte, 0, len(raw)) + + x := 0 + // Skip leading spaces. + for ; x < len(raw); x++ { + if !libbytes.IsSpace(raw[x]) { + break + } + } + + spaces := 0 + for ; x < len(raw); x++ { + if libbytes.IsSpace(raw[x]) { + spaces++ + continue + } + if spaces > 0 { + field.Value = append(field.Value, ' ') + spaces = 0 + } + field.Value = append(field.Value, raw[x]) + } + if len(field.Value) > 0 { + field.Value = append(field.Value, crlf...) + } +} + +// +// String return the relaxed canonicalization of field name and value +// separated by colon. +// +func (field *Field) String() string { + return string(field.Name) + ":" + string(field.Value) +} + +// +// Unpack the field Value based on field Name. +// +func (field *Field) Unpack() (err error) { + switch field.Type { + case FieldTypeDate: + err = field.unpackDate() + } + + return err +} + +// +// updateType update the field type based on field name. +// +func (field *Field) updateType() { + switch { + case bytes.Equal(FieldNameDate, field.Name): + field.Type = FieldTypeDate + default: + field.Type = FieldTypeOptional + } +} + +// +// unpackDate from field value into time.Time. +// +// Format, +// +// [day-of-week ","] day month year hour ":" minute [ ":" second ] zone +// +// day-of-week = "Mon" / ... / "Sun" +// day = 1*2DIGIT +// month = "Jan" / ... / "Dec" +// year = 4*DIGIT +// hour = 2DIGIT +// minute = 2DIGIT +// second = 2DIGIT +// zone = ("+" / "-") 4DIGIT +// +// +// +func (field *Field) unpackDate() (err error) { + var ( + v []byte + ok bool + c byte + space = []byte{' ', '\r', '\n'} + day, year int64 + hour, min, sec int64 + off int64 + month time.Month + loc *time.Location = time.UTC + ) + + if len(field.Value) == 0 { + return fmt.Errorf("unpackDate: empty date") + } + + r := &libio.Reader{} + r.InitBytes(field.Value) + + c = r.SkipSpace() + if !libbytes.IsDigit(c) { + v, _, c = r.ReadUntil([]byte{','}, nil) + if len(v) == 0 || c != ',' { + return fmt.Errorf("unpackDate: invalid date format") + } + if c = r.SkipSpace(); c == 0 { + return fmt.Errorf("unpackDate: invalid date format") + } + } + + // Get day .... + if day, c = r.ScanInt64(); c == 0 || c != ' ' { + return fmt.Errorf("unpackDate: missing month") + } + // Get month ... + r.SkipSpace() + v, _, c = r.ReadUntil(space, nil) + month, ok = libtime.ShortMonths[string(v)] + if !ok { + return fmt.Errorf("unpackDate: invalid month: '%s'", v) + } + + // Get year ... + r.SkipSpace() + if year, c = r.ScanInt64(); c == 0 || c != ' ' { + return fmt.Errorf("unpackDate: invalid year") + } + + // Obsolete year allow two or three digits. + switch { + case year < 50: + year += 2000 + case year >= 50 && year < 1000: + year += 1900 + } + + // Get hour ... + if hour, c = r.ScanInt64(); c == 0 || c != ':' { + return fmt.Errorf("unpackDate: invalid hour") + } + if hour < 0 || hour > 23 { + return fmt.Errorf("unpackDate: invalid hour: %d", hour) + } + + // Get minute ... + r.SkipN(1) + min, c = r.ScanInt64() + if min < 0 || min > 59 { + return fmt.Errorf("unpackDate: invalid minute: %d", min) + } + + // Get second ... + if c == ':' { + r.SkipN(1) + sec, c = r.ScanInt64() + if sec < 0 || sec > 59 { + return fmt.Errorf("unpackDate: invalid second: %d", sec) + } + } + + // Get zone offset ... + c = r.SkipSpace() + if c == 0 { + return fmt.Errorf("unpackDate: missing zone") + } + off, c = r.ScanInt64() + + loc = time.FixedZone("UTC", computeOffSeconds(off)) + td := time.Date(int(year), month, int(day), int(hour), int(min), int(sec), 0, loc) + field.date = &td + + return err +} + +func computeOffSeconds(off int64) int { + hour := int(off / 100) + min := int(off) - (hour * 100) + return ((hour * 60) + min) * 60 +} diff --git a/lib/email/field_test.go b/lib/email/field_test.go new file mode 100644 index 00000000..4e9b65ac --- /dev/null +++ b/lib/email/field_test.go @@ -0,0 +1,255 @@ +// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package email + +import ( + "testing" + "time" + + libbytes "github.com/shuLhan/share/lib/bytes" + "github.com/shuLhan/share/lib/test" +) + +func TestParseField(t *testing.T) { + longValue := string(libbytes.Random([]byte(libbytes.ASCIILetters), 994)) + + cases := []struct { + desc string + raw []byte + expErr string + exp *Field + expRest []byte + }{{ + desc: "With empty input", + }, { + desc: "With long line", + raw: []byte("name:" + longValue + "\r\n"), + expErr: "ParseField: line greater than 998 characters", + }, { + desc: "With only whitespaces", + raw: []byte(" "), + expErr: "ParseField: invalid input", + }, { + desc: "With only CRLF", + raw: []byte("\r\n"), + expErr: "ParseField: invalid character at index 0", + }, { + desc: "Without separator and CRLF", + raw: []byte("name"), + expErr: "ParseField: invalid input", + }, { + desc: "Without separator", + raw: []byte("name\r\n"), + expErr: "ParseField: invalid character at index 4", + }, { + desc: "With space on name", + raw: []byte("na me\r\n"), + expErr: "ParseField: invalid character at index 3", + }, { + desc: "Without value and CRLF", + raw: []byte("name:"), + expErr: "ParseField: invalid input", + }, { + desc: "Without value and CRLF", + raw: []byte("name: "), + expErr: "ParseField: invalid input", + }, { + desc: "Without value", + raw: []byte("name:\r\n"), + expErr: "ParseField: invalid input", + }, { + desc: "Without value", + raw: []byte("name: \r\n"), + expErr: "ParseField: invalid input", + }, { + desc: "Without CRLF", + raw: []byte("name:value"), + expErr: "ParseField: invalid input", + }, { + desc: "Without CR", + raw: []byte("name:value\n"), + expErr: "ParseField: invalid character at index 10", + }, { + desc: "Without LF", + raw: []byte("name:value\r"), + expErr: "ParseField: invalid input", + }, { + desc: "With CR inside value", + raw: []byte("name:valu\re"), + expErr: "ParseField: invalid character at index 10", + }, { + desc: "With valid input", + raw: []byte("NAME : VALUE\r\n"), + exp: &Field{ + Name: []byte("name"), + Value: []byte("VALUE\r\n"), + oriName: []byte("NAME "), + oriValue: []byte(" VALUE\r\n"), + }, + }, { + desc: "With single folding", + raw: []byte("Name : \r\n \t Value\r\n"), + exp: &Field{ + Name: []byte("name"), + Value: []byte("Value\r\n"), + oriName: []byte("Name "), + oriValue: []byte(" \r\n \t Value\r\n"), + }, + }, { + desc: "With multiple folding between value", + raw: []byte("namE : This\r\n is\r\n\ta\r\n \tvalue\r\n"), + exp: &Field{ + Name: []byte("name"), + Value: []byte("This is a value\r\n"), + oriName: []byte("namE "), + oriValue: []byte(" This\r\n is\r\n\ta\r\n \tvalue\r\n"), + }, + }, { + desc: "With multiple fields", + raw: []byte("a : 1\r\nb : 2\r\n"), + exp: &Field{ + Name: []byte("a"), + Value: []byte("1\r\n"), + oriName: []byte("a "), + oriValue: []byte(" 1\r\n"), + }, + expRest: []byte("b : 2\r\n"), + }} + + for _, c := range cases { + t.Log(c.desc) + + got, rest, err := ParseField(c.raw) + if err != nil { + test.Assert(t, "error", c.expErr, err.Error(), true) + continue + } + if got == nil { + test.Assert(t, "Field", c.exp, got, true) + continue + } + + test.Assert(t, "Field.oriName", c.exp.oriName, got.oriName, true) + test.Assert(t, "Field.oriValue", c.exp.oriValue, got.oriValue, true) + test.Assert(t, "Field.Name", c.exp.Name, got.Name, true) + test.Assert(t, "Field.Value", c.exp.Value, got.Value, true) + + test.Assert(t, "rest", c.expRest, rest, true) + } +} + +func TestUnpackDate(t *testing.T) { + cases := []struct { + desc string + value []byte + exp time.Time + expErr string + }{{ + desc: "With empty value", + expErr: "unpackDate: empty date", + }, { + desc: "With only spaces", + value: []byte(" "), + expErr: "unpackDate: empty date", + }, { + desc: "With invalid date format", + value: []byte("Sat"), + expErr: "unpackDate: invalid date format", + }, { + desc: "With invalid date format", + value: []byte("Sat,"), + expErr: "unpackDate: invalid date format", + }, { + desc: "With missing month", + value: []byte("Sat, 2"), + expErr: "unpackDate: missing month", + }, { + desc: "With missing month", + value: []byte("Sat, 2 "), + expErr: "unpackDate: missing month", + }, { + desc: "With invalid month", + value: []byte("Sat, 2 X 2019"), + expErr: "unpackDate: invalid month: 'X'", + }, { + desc: "With missing year", + value: []byte("Sat, 2 Feb"), + expErr: "unpackDate: invalid year", + }, { + desc: "With invalid year", + value: []byte("Sat, 2 Feb 2019"), + expErr: "unpackDate: invalid year", + }, { + desc: "With invalid hour", + value: []byte("Sat, 2 Feb 2019 00"), + expErr: "unpackDate: invalid hour", + }, { + desc: "With invalid hour", + value: []byte("Sat, 2 Feb 2019 24:55:16 +0000"), + expErr: "unpackDate: invalid hour: 24", + }, { + desc: "With invalid minute", + value: []byte("Sat, 2 Feb 2019 00:60:16 +0000"), + expErr: "unpackDate: invalid minute: 60", + }, { + desc: "Without second and missing zone", + value: []byte("Sat, 2 Feb 2019 00:55"), + expErr: "unpackDate: missing zone", + }, { + desc: "With invalid second", + value: []byte("Sat, 2 Feb 2019 00:55:60 +0000"), + expErr: "unpackDate: invalid second: 60", + }, { + desc: "With missing zone", + value: []byte("Sat, 2 Feb 2019 00:55:16"), + expErr: "unpackDate: missing zone", + }, { + desc: "With zone", + value: []byte("Sat, 2 Feb 2019 00:55:16 UTC"), + exp: time.Date(2019, time.February, 2, 0, 55, 16, 0, time.UTC), + }, { + desc: "With +0800", + value: []byte("Sat, 2 Feb 2019 00:55:16 +0800"), + exp: time.Date(2019, time.February, 2, 0, 55, 16, 0, time.FixedZone("UTC", 8*60*60)), + }, { + desc: "Without week day", + value: []byte("2 Feb 2019 00:55:16 UTC"), + exp: time.Date(2019, time.February, 2, 0, 55, 16, 0, time.UTC), + }, { + desc: "Without second", + value: []byte("Sat, 2 Feb 2019 00:55 UTC"), + exp: time.Date(2019, time.February, 2, 0, 55, 0, 0, time.UTC), + }, { + desc: "Without week-day and second", + value: []byte("2 Feb 2019 00:55 UTC"), + exp: time.Date(2019, time.February, 2, 0, 55, 0, 0, time.UTC), + }, { + desc: "With obsolete year 2 digits", + value: []byte("2 Feb 19 00:55 UTC"), + exp: time.Date(2019, time.February, 2, 0, 55, 0, 0, time.UTC), + }, { + desc: "With obsolete year 3 digits", + value: []byte("2 Feb 89 00:55 UTC"), + exp: time.Date(1989, time.February, 2, 0, 55, 0, 0, time.UTC), + }} + + field := &Field{ + Type: FieldTypeDate, + } + + for _, c := range cases { + t.Log(c.desc) + + field.SetValue(c.value) + + err := field.Unpack() + if err != nil { + test.Assert(t, "error", c.expErr, err.Error(), true) + continue + } + + test.Assert(t, "date", c.exp.String(), field.date.String(), true) + } +} diff --git a/lib/email/fieldtype.go b/lib/email/fieldtype.go new file mode 100644 index 00000000..c419bdad --- /dev/null +++ b/lib/email/fieldtype.go @@ -0,0 +1,12 @@ +// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package email + +type FieldType int + +const ( + FieldTypeOptional FieldType = 0 + FieldTypeDate FieldType = 1 << iota +) diff --git a/lib/email/header.go b/lib/email/header.go new file mode 100644 index 00000000..54d92f07 --- /dev/null +++ b/lib/email/header.go @@ -0,0 +1,79 @@ +// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package email + +import ( + "fmt" + "strings" +) + +// +// Header represent list of field. +// +// We are not using map here it to prevent the header being reordeded when +// packing the message back into raw format. +// +type Header struct { + fields []*Field +} + +// +// Unpack the raw header from top to bottom. +// +// The raw header may end with optional CRLF, an empty line that separate +// header from body of message. +// +// On success it will return the rest of raw input (possible message's body) +// without leading CRLF. +// +func (hdr *Header) Unpack(raw []byte) ([]byte, error) { + var ( + field *Field + err error + ) + + for len(raw) > 2 { + field, raw, err = ParseField(raw) + if err != nil { + return raw, err + } + hdr.fields = append(hdr.fields, field) + if len(raw) > 2 { + if raw[0] == crlf[0] && raw[1] == crlf[1] { + break + } + } + } + + switch len(raw) { + case 0: + case 1: + err = fmt.Errorf("Header.Unpack: invalid end of header: '%s'", raw) + case 2: + if raw[0] != crlf[0] || raw[1] != crlf[1] { + err = fmt.Errorf("Header.Unpack: invalid end of header: '%s'", raw) + } else { + raw = raw[2:] + } + default: + raw = raw[2:] + } + + return raw, err +} + +// +// String return the text representation of header, by concatenating all +// sanitized fields with CRLF. +// +func (hdr *Header) String() string { + var sb strings.Builder + + for _, f := range hdr.fields { + sb.WriteString(f.String()) + } + + return sb.String() +} diff --git a/lib/email/header_test.go b/lib/email/header_test.go new file mode 100644 index 00000000..29cb93a1 --- /dev/null +++ b/lib/email/header_test.go @@ -0,0 +1,72 @@ +// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package email + +import ( + "testing" + + "github.com/shuLhan/share/lib/test" +) + +func TestHeaderUnpack(t *testing.T) { + cases := []struct { + desc string + raw []byte + expErr string + exp string + expRest []byte + }{{ + desc: "With empty input", + }, { + desc: "With whitespaces only", + raw: []byte(" \t"), + expErr: "Header.Unpack: invalid end of header: ' \t'", + }, { + desc: "With CRLF only", + raw: crlf, + expRest: []byte{}, + }, { + desc: "With invalid end", + raw: []byte("a: 1\r\nx"), + expErr: "Header.Unpack: invalid end of header: 'x'", + }, { + desc: "With invalid field: missing value", + raw: []byte("a:\r\n\t"), + expErr: "ParseField: invalid input", + }, { + desc: "With single field", + raw: []byte("a:1\r\n"), + exp: "a:1\r\n", + }, { + desc: "With multiple fields", + raw: []byte("a:1\r\nb : 2\r\n"), + exp: "a:1\r\nb:2\r\n", + }, { + desc: "With empty line at the end", + raw: []byte("a:1\r\nb : 2\r\n\r\n"), + exp: "a:1\r\nb:2\r\n", + expRest: []byte{}, + }, { + desc: "With body", + raw: []byte("a:1\r\nb : 2\r\n\r\nBody."), + exp: "a:1\r\nb:2\r\n", + expRest: []byte("Body."), + }} + + for _, c := range cases { + t.Log(c.desc) + + header := &Header{} + + rest, err := header.Unpack(c.raw) + if err != nil { + test.Assert(t, "error", c.expErr, err.Error(), true) + continue + } + + test.Assert(t, "Header.String", c.exp, header.String(), true) + test.Assert(t, "rest", c.expRest, rest, true) + } +} diff --git a/lib/email/mime.go b/lib/email/mime.go new file mode 100644 index 00000000..8e4102a5 --- /dev/null +++ b/lib/email/mime.go @@ -0,0 +1,16 @@ +// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package email + +// +// MIME represent part of message body with id, content type, encoding, +// description, and content. +// +type MIME struct { + ID []byte + Type []byte + Description []byte + Content []byte +} diff --git a/lib/io/reader.go b/lib/io/reader.go index 5a8896f3..43b0c504 100644 --- a/lib/io/reader.go +++ b/lib/io/reader.go @@ -43,6 +43,14 @@ func (r *Reader) Init(src string) { } // +// InitBytes initialize reader buffer from slice of byte. +// +func (r *Reader) InitBytes(src []byte) { + r.p = 0 + r.v = src +} + +// // ReadUntil read the content of file until one of separator found, or until // it reach the terminator character, or until EOF. // The content will be returned along the status of termination. @@ -73,6 +81,64 @@ func (r *Reader) ReadUntil(seps []byte, terms []byte) (b []byte, isTerm bool, c } // +// ScanInt64 convert textual representation of number into int64 and return +// it. +// Any spaces before actual reading of text will be ignored. +// The number may prefixed with '-' or '+', if its '-', the returned value +// must be negative. +// +// On success, c is non digit character that terminate scan, if its 0, its +// mean EOF. +// +func (r *Reader) ScanInt64() (n int64, c byte) { + var min int64 = 1 + if len(r.v) == r.p { + return + } + + for ; r.p < len(r.v); r.p++ { + c = r.v[r.p] + if !libbytes.IsSpace(c) { + break + } + } + if c == '-' { + min = -1 + r.p++ + } else if c == '+' { + r.p++ + } + for r.p < len(r.v) { + c = r.v[r.p] + if !libbytes.IsDigit(c) { + break + } + c = c - '0' + n *= 10 + n += int64(c) + r.p++ + } + n *= min + if r.p == len(r.v) { + return n, 0 + } + + return n, c +} + +// +// SkipN skip reading n bytes from buffer and return true if EOF. +// +func (r *Reader) SkipN(n int) bool { + r.p += n + if r.p >= len(r.v) { + r.p = len(r.v) + return true + } + return false +} + +// // SkipSpace read until no white spaces found and return the first byte that // is not white spaces. // On EOF, it will return 0. diff --git a/lib/io/reader_test.go b/lib/io/reader_test.go new file mode 100644 index 00000000..bae919d0 --- /dev/null +++ b/lib/io/reader_test.go @@ -0,0 +1,69 @@ +// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package io + +import ( + "testing" + + "github.com/shuLhan/share/lib/test" +) + +func TestReaderScanInt64(t *testing.T) { + cases := []struct { + desc string + src []byte + exp int64 + expc byte + }{{ + desc: "With empty input", + }, { + desc: "With non digit", + src: []byte("a -1"), + expc: 'a', + }, { + desc: "With leading spaces", + src: []byte(" +1"), + exp: 1, + }, { + desc: "With -1", + src: []byte("-1"), + exp: -1, + }, { + desc: "With -1", + src: []byte("-1x"), + exp: -1, + expc: 'x', + }, { + desc: "With +1", + src: []byte("+1"), + exp: 1, + }, { + desc: "With 1000", + src: []byte("1000"), + exp: 1000, + }, { + desc: "With 9876543210 1", + src: []byte("9876543210 1"), + exp: 9876543210, + expc: ' ', + }, { + desc: "With leading zero 001", + src: []byte("-001"), + exp: -1, + }} + + r := &Reader{} + + for _, c := range cases { + t.Log(c.desc) + + r.InitBytes(c.src) + + got, gotc := r.ScanInt64() + + test.Assert(t, "n", c.exp, got, true) + test.Assert(t, "c", c.expc, gotc, true) + } +} diff --git a/lib/time/time.go b/lib/time/time.go index 63d359b8..4a88f828 100644 --- a/lib/time/time.go +++ b/lib/time/time.go @@ -4,3 +4,32 @@ // Package time provide a library for working with time. package time + +import ( + "time" +) + +var ( + ShortDayNames = []string{ + "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun", + } + + // + // ShortMonths provide mapping between text of month, in English, + // short format to their time.Month value + // + ShortMonths = map[string]time.Month{ + "Jan": time.January, + "Feb": time.February, + "Mar": time.March, + "Apr": time.April, + "May": time.May, + "Jun": time.June, + "Jul": time.July, + "Aug": time.August, + "Sep": time.September, + "Oct": time.October, + "Nov": time.November, + "Dec": time.December, + } +) |
