aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShulhan <ms@kilabit.info>2019-02-01 11:32:34 +0700
committerShulhan <ms@kilabit.info>2019-02-05 07:48:52 +0700
commit930b8e7bdeb7b4909905f3185647d8d64d89ebbf (patch)
tree623956d90d0e33b85bc23f97ccc93bf15b1950e9
parent45a23e05d85eac33e1a4b44c0888be5d91a73cdb (diff)
downloadpakakeh.go-930b8e7bdeb7b4909905f3185647d8d64d89ebbf.tar.xz
lib/email: new package for working with Internet Message Format
This package provide library for parsing email message format as specified in RFC 5322.
-rw-r--r--doc/IMF.adoc585
-rw-r--r--doc/IMF.html707
-rw-r--r--lib/email/body.go23
-rw-r--r--lib/email/doc.go9
-rw-r--r--lib/email/email.go33
-rw-r--r--lib/email/field.go363
-rw-r--r--lib/email/field_test.go255
-rw-r--r--lib/email/fieldtype.go12
-rw-r--r--lib/email/header.go79
-rw-r--r--lib/email/header_test.go72
-rw-r--r--lib/email/mime.go16
-rw-r--r--lib/io/reader.go66
-rw-r--r--lib/io/reader_test.go69
-rw-r--r--lib/time/time.go29
14 files changed, 1875 insertions, 443 deletions
diff --git a/doc/IMF.adoc b/doc/IMF.adoc
index fdfbcc35..497a3f01 100644
--- a/doc/IMF.adoc
+++ b/doc/IMF.adoc
@@ -1,4 +1,4 @@
-= Internet Message Format (IMF)
+= Internet Message Format (IMF)
:author: Shulhan
:email: <ms@kilabit.info>
:toc: left
@@ -10,13 +10,28 @@
This documentation provide summary and notes on implementation of Internet
Message Format as defined in {url-rfc5322}[RFC 5322].
-== Syntax
+
+== Syntax
....
message = (fields / obs-fields)
- [CRLF body]
+ [CRLF body]
+
+fields = *(field-name ":" (field-body / unstructured) CRLF)
+
+field-name = 1*ftext
+
+field-body = (*([FWS] VCHAR) *WSP)
+
+unstructured = (*([FWS] VCHAR) *WSP) / obs-unstruct
-fields = *(header-key ":" header-value CRLF)
+VCHAR = %d33-126
+
+WSP = %d9 / %d32
+ ; tab or space
+
+ftext = %d33-57 / %d59-126
+ ; Printable US-ASCII, except %d0-32 and %d58 (":")
body = (*(*998text CRLF) *998text) / obs-body
@@ -26,15 +41,39 @@ text = %d1-9 / ; Characters excluding CR
%d14-127
....
-Each line MUST be no more than 998 characters, and SHOULD be no more than 78
-characters, excluding the CRLF.
+* Each line MUST be no more than 998 characters, excluding CRLF.
+
+* Each line SHOULD be no more than 78 characters, excluding the CRLF.
+
+* CR and LF MUST only occur together as CRLF; they MUST NOT appear
+ independently in the body.
+
+* Each header field SHOULD be treated in its unfolded form for further
+ syntactic and semantic evaluation.
+
+* "field-body" MUST NOT include CR and LF except when used in "folding" and
+ "unfolding".
+
+
+=== Folding White Space and Comments
+
+....
+CFWS = (1*([FWS] comment) [FWS]) / FWS
+
+FWS = ([*WSP CRLF] 1*WSP) / obs-FWS
+ ; Folding white space
+
+comment = "(" *([FWS] ccontent) [FWS] ")"
-CR and LF MUST only occur together as CRLF; they MUST NOT appear
-independently in the body.
+ccontent = ctext / quoted-pair / comment
-`header-key` MUST be composed of printable US-ASCII characters, except colon.
-`header-value` MUST NOT include CR and LF except when used in "folding" and
-"unfolding".
+ctext = %d33-39 / ; Printable US-ASCII
+ %d42-91 / ; characters not including
+ %d93-126 / ; "(", ")", or "\"
+ obs-ctext
+
+quoted-pair = ("\" (VCHAR / WSP)) / obs-qp
+....
_Folding_ is a function to split a line into multiline with CRLF and WSP. For
example, the following line,
@@ -45,185 +84,253 @@ example, the following line,
can be folded into,
....
-"Subject: This is" CRLF
-WSP "a test" CRLF
+"Subject: This" CRLF
+WSP "is a test" CRLF
+....
+
+_Unfolding_ is the process that reverse the output of folding into original
+input.
+
+* An unfolded header field has no length restriction and therefore may be
+ indeterminately long.
+
+* Any CRLF that appears in FWS is semantically "invisible".
+
+* The "\" in any quoted-pair is semantically "invisible".
+
+* Folding is permitted within the comment.
+
+* The parentheses and backslash characters may appear in a comment, so long
+ as they appear as a quoted-pair.
+
+* Comment is not including the enclosing paretheses.
+
+
+=== Atom
+
+....
+word = atom / quoted-string
+
+phrase = 1*word / obs-phrase
+
+atom = [CFWS] 1*atext [CFWS]
+
+dot-atom = [CFWS] dot-atom-text [CFWS]
+
+dot-atom-text = 1*atext *("." 1*atext)
+
+atext = ALPHA / DIGIT / ; Printable US-ASCII
+ "!" / "#" / ; characters not including
+ "$" / "%" / ; specials. Used for atoms.
+ "&" / "'" /
+ "*" / "+" /
+ "-" / "/" /
+ "=" / "?" /
+ "^" / "_" /
+ "`" / "{" /
+ "|" / "}" /
+ "~"
+
+specials = "(" / ")" / ; Special characters that do
+ "<" / ">" / ; not appear in atext
+ "[" / "]" /
+ ":" / ";" /
+ "@" / "\" /
+ "," / "." /
+ DQUOTE
+....
+
+* The optional comments and FWS surrounding the rest of the characters are
+ not part of the atom.
+
+
+=== Quoted Strings
+
+....
+quoted-string = [CFWS]
+ DQUOTE *([FWS] qcontent) [FWS] DQUOTE
+ [CFWS]
+
+qcontent = qtext / quoted-pair
+
+qtext = %d33 / ; Printable US-ASCII
+ %d35-91 / ; characters not including
+ %d93-126 / ; "\" or the quote character
+ obs-qtext
....
-_Unfolding_ is the process that convert the multiline representation into a
-single line.
-=== Date and Time Specification
+=== Date and Time Specification
Syntax,
....
- date-time = [ day-of-week "," ] date time [CFWS]
+date-time = [ day-of-week "," ] date time [CFWS]
- day-of-week = ([FWS] day-name) / obs-day-of-week
+day-of-week = ([FWS] day-name) / obs-day-of-week
- day-name = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
+day-name = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
- date = day month year
+date = day month year
- day = ([FWS] 1*2DIGIT FWS) / obs-day
+day = ([FWS] 1*2DIGIT FWS) / obs-day
- month = "Jan" / "Feb" / "Mar" / "Apr" /
- "May" / "Jun" / "Jul" / "Aug" /
- "Sep" / "Oct" / "Nov" / "Dec"
+month = "Jan" / "Feb" / "Mar" / "Apr" /
+ "May" / "Jun" / "Jul" / "Aug" /
+ "Sep" / "Oct" / "Nov" / "Dec"
- year = (FWS 4*DIGIT FWS) / obs-year
+year = (FWS 4*DIGIT FWS) / obs-year
- time = time-of-day zone
+time = time-of-day zone
- time-of-day = hour ":" minute [ ":" second ]
+time-of-day = hour ":" minute [ ":" second ]
- hour = 2DIGIT / obs-hour
+hour = 2DIGIT / obs-hour
- minute = 2DIGIT / obs-minute
+minute = 2DIGIT / obs-minute
- second = 2DIGIT / obs-second
+second = 2DIGIT / obs-second
- zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone
+zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone
....
-The date and time-of-day SHOULD express local time.
+* The date and time-of-day SHOULD express local time.
+
+* The form "+0000" on zone SHOULD be used to indicate a time zone at
+ Universal Time.
+
+* The form "-0000" on zone indicate that the time was generated on a system
+ that may be in a local time zone other than Universal Time and that the
+ date-time contains no information about the local time zone.
+
+* A date-time specification MUST be semantically valid.
+
+* The day-of-week MUST be the day implied by the date.
-The form "+0000" on zone SHOULD be used to indicate a time zone at Universal
-Time.
+* The numeric day-of-month MUST be between 1 and the number of days allowed
+ for the specified month (in the specified year).
-The form "-0000" on zone indicate that the time was generated on a system that
-may be in a local time zone other than Universal Time and that the date-time
-contains no information about the local time zone.
+* The time-of-day MUST be in the range 00:00:00 through 23:59:60 (the number
+ of seconds allowing for a leap second.
-A date-time specification MUST be semantically valid.
-The day-of-week MUST be the day implied by the date.
-The numeric day-of-month MUST be between 1 and the number of days allowed
-for the specified month (in the specified year),
-The time-of-day MUST be in the range 00:00:00 through 23:59:60 (the number of
-seconds allowing for a leap second.
-The last two digits of the zone MUST be within the range 00 through 59.
+* The last two digits of the zone MUST be within the range 00 through 59.
-=== Address Specification
+
+=== Address Specification
An address may either be an individual mailbox, or a group of mailboxes.
Format,
....
- group-list = mailbox-list / CFWS / obs-group-list
+group-list = mailbox-list / CFWS / obs-group-list
- mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
+mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
- address-list = (address *("," address)) / obs-addr-list
+address-list = (address *("," address)) / obs-addr-list
- address = mailbox / group
+address = mailbox / group
- mailbox = name-addr / addr-spec
+mailbox = name-addr / addr-spec
- name-addr = [display-name] angle-addr
+name-addr = [display-name] angle-addr
- angle-addr = [CFWS] "<" addr-spec ">" [CFWS] /
- obs-angle-addr
+angle-addr = [CFWS] "<" addr-spec ">" [CFWS] /
+ obs-angle-addr
- group = display-name ":" [group-list] ";" [CFWS]
+group = display-name ":" [group-list] ";" [CFWS]
- display-name = phrase
+display-name = phrase
- addr-spec = local-part "@" domain
+addr-spec = local-part "@" domain
- local-part = dot-atom / quoted-string / obs-local-part
+local-part = dot-atom / quoted-string / obs-local-part
- domain = dot-atom / domain-literal / obs-domain
+domain = dot-atom / domain-literal / obs-domain
- domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
+domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
- dtext = %d33-90 / ; Printable US-ASCII
- %d94-126 / ; characters not including
- obs-dtext ; "[", "]", or "\"
+dtext = %d33-90 / ; Printable US-ASCII
+ %d94-126 / ; characters not including
+ obs-dtext ; "[", "]", or "\"
....
-dot-atom form SHOULD be used and the quoted-string form SHOULD NOT be used.
-Comments and folding white space SHOULD NOT be used around the "@" in the
-addr-spec.
+* dot-atom form SHOULD be used,
+
+* quoted-string form SHOULD NOT be used;
+
+* Comments and folding white space SHOULD NOT be used around the "@" in the
+ addr-spec.
-== Header
+== Header
Format,
....
- fields = *(trace
- *optional-field /
- *(resent-date /
- resent-from /
- resent-sender /
- resent-to /
- resent-cc /
- resent-bcc /
- resent-msg-id))
- *(orig-date /
- from /
- sender /
- reply-to /
- to /
- cc /
- bcc /
- message-id /
- in-reply-to /
- references /
- subject /
- comments /
- keywords /
- optional-field)
-
- +----------------+--------+------------+----------------------------+
- | Field | Min | Max number | Notes |
- | | number | | |
- +----------------+--------+------------+----------------------------+
- | trace | 0 | unlimited | Block prepended - see |
- | | | | 3.6.7 |
- | resent-date | 0* | unlimited* | One per block, required if |
- | | | | other resent fields are |
- | | | | present - see 3.6.6 |
- | resent-from | 0 | unlimited* | One per block - see 3.6.6 |
- | resent-sender | 0* | unlimited* | One per block, MUST occur |
- | | | | with multi-address |
- | | | | resent-from - see 3.6.6 |
- | resent-to | 0 | unlimited* | One per block - see 3.6.6 |
- | resent-cc | 0 | unlimited* | One per block - see 3.6.6 |
- | resent-bcc | 0 | unlimited* | One per block - see 3.6.6 |
- | resent-msg-id | 0 | unlimited* | One per block - see 3.6.6 |
- | orig-date | 1 | 1 | |
- | from | 1 | 1 | See sender and 3.6.2 |
- | sender | 0* | 1 | MUST occur with |
- | | | | multi-address from - see |
- | | | | 3.6.2 |
- | reply-to | 0 | 1 | |
- | to | 0 | 1 | |
- | cc | 0 | 1 | |
- | bcc | 0 | 1 | |
- | message-id | 0* | 1 | SHOULD be present - see |
- | | | | 3.6.4 |
- | in-reply-to | 0* | 1 | SHOULD occur in some |
- | | | | replies - see 3.6.4 |
- | references | 0* | 1 | SHOULD occur in some |
- | | | | replies - see 3.6.4 |
- | subject | 0 | 1 | |
- | comments | 0 | unlimited | |
- | keywords | 0 | unlimited | |
- | optional-field | 0 | unlimited | |
- +----------------+--------+------------+----------------------------+
+fields = *(trace
+ *optional-field /
+ *(resent-date /
+ resent-from /
+ resent-sender /
+ resent-to /
+ resent-cc /
+ resent-bcc /
+ resent-msg-id))
+ *(orig-date /
+ from /
+ sender /
+ reply-to /
+ to /
+ cc /
+ bcc /
+ message-id /
+ in-reply-to /
+ references /
+ subject /
+ comments /
+ keywords /
+ optional-field)
....
-Header fields SHOULD NOT be reordered when a message is transported or
-transformed.
-More importantly, the trace header fields and resent header fields MUST NOT be
-reordered, and SHOULD be kept in blocks prepended to the message.
+[cols=".<2,.^1,.<1,.<6",options="header"]
+|===
+| Field | Min number | Max number | Notes
+
+| trace | 0 | unlimited | Block prepended - see 3.6.7
+| resent-date | 0* | unlimited* | One per block, required if other resent fields are present - see 3.6.6
+| resent-from | 0 | unlimited* | One per block - see 3.6.6
+| resent-sender | 0* | unlimited* | One per block, MUST occur with multi-address resent-from - see 3.6.6
+| resent-to | 0 | unlimited* | One per block - see 3.6.6
+| resent-cc | 0 | unlimited* | One per block - see 3.6.6
+| resent-bcc | 0 | unlimited* | One per block - see 3.6.6
+| resent-msg-id | 0 | unlimited* | One per block - see 3.6.6
+| orig-date | 1 | 1 |
+| from | 1 | 1 | See sender and 3.6.2
+| sender | 0* | 1 | MUST occur withmulti-address from - see 3.6.2
+| reply-to | 0 | 1 |
+| to | 0 | 1 |
+| cc | 0 | 1 |
+| bcc | 0 | 1 |
+| message-id | 0* | 1 | SHOULD be present - see 3.6.4
+| in-reply-to | 0* | 1 | SHOULD occur in some replies - see 3.6.4
+| references | 0* | 1 | SHOULD occur in some replies - see 3.6.4
+| subject | 0 | 1 |
+| comments | 0 | unlimited |
+| keywords | 0 | unlimited |
+| optional-field | 0 | unlimited |
+|===
-The only required header fields are the "Date" field and the originator
-address field(s) (which is "From", "Sender", and "Reply-To").
+* Header fields SHOULD NOT be reordered when a message is transported or
+ transformed.
+* The trace header fields and resent header fields MUST NOT be
+ reordered, and SHOULD be kept in blocks prepended to the message.
-=== Date Field
+* The only required header fields are the "Date" field and the originator
+ address field(s) (which is "From", "Sender", and "Reply-To").
+
+
+=== Date Field
The origination date specifies the date and time at which the creator of the
message indicated that the message was complete and ready to enter the mail
@@ -233,14 +340,14 @@ delivery system.
orig-date = "Date:" date-time CRLF
....
-=== Originator Fields
+=== Originator Fields
....
- from = "From:" mailbox-list CRLF
+from = "From:" mailbox-list CRLF
- sender = "Sender:" mailbox CRLF
+sender = "Sender:" mailbox CRLF
- reply-to = "Reply-To:" address-list CRLF
+reply-to = "Reply-To:" address-list CRLF
....
If the from field contains more than one mailbox, then the sender field MUST
@@ -260,14 +367,14 @@ In all cases, the "From:" field SHOULD NOT contain any mailbox that does not
belong to the author(s) of the message.
-=== Destination Fields
+=== Destination Fields
....
- to = "To:" address-list CRLF
+to = "To:" address-list CRLF
- cc = "Cc:" address-list CRLF
+cc = "Cc:" address-list CRLF
- bcc = "Bcc:" [address-list / CFWS] CRLF
+bcc = "Bcc:" [address-list / CFWS] CRLF
....
The "To:" field contains the address(es) of the primary recipient(s) of the
@@ -284,16 +391,18 @@ revealed to other recipients of the message.
There are three ways in which the "Bcc:" field is used,
-1. The "Bcc:" line is removed even though all of the recipients (including
+. The "Bcc:" line is removed even though all of the recipients (including
those specified in the "Bcc:" field) are sent a copy of the message.
-2. Recipients specified in the "To:" and "Cc:" lines each are sent
+
+. Recipients specified in the "To:" and "Cc:" lines each are sent
a copy of the message with the "Bcc:" line removed as above, but the
recipients on the "Bcc:" line get a separate copy of the message
containing a "Bcc:" line. (When there are multiple recipient
addresses in the "Bcc:" field, some implementations actually send a
separate copy of the message to each recipient with a "Bcc:"
containing only the address of that particular recipient.)
-3. Since a "Bcc:" field may contain no addresses, a "Bcc:" field can be
+
+. Since a "Bcc:" field may contain no addresses, a "Bcc:" field can be
sent without any addresses indicating to the recipients that blind
copies were sent to someone.
@@ -302,29 +411,29 @@ to the "Security Considerations" section of this document for a discussion of
each.
-=== Identification Field
-
-Every message SHOULD have a "Message-ID:" field.
-
-Reply messages SHOULD have "In-Reply-To:" and "References:" fields.
+=== Identification Field
Format,
....
- message-id = "Message-ID:" msg-id CRLF
+message-id = "Message-ID:" msg-id CRLF
- in-reply-to = "In-Reply-To:" 1*msg-id CRLF
+in-reply-to = "In-Reply-To:" 1*msg-id CRLF
- references = "References:" 1*msg-id CRLF
+references = "References:" 1*msg-id CRLF
- msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS]
+msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS]
- id-left = dot-atom-text / obs-id-left
+id-left = dot-atom-text / obs-id-left
- id-right = dot-atom-text / no-fold-literal / obs-id-right
+id-right = dot-atom-text / no-fold-literal / obs-id-right
- no-fold-literal = "[" *dtext "]"
+no-fold-literal = "[" *dtext "]"
....
+* Every message SHOULD have a "Message-ID:" field.
+
+* Reply messages SHOULD have "In-Reply-To:" and "References:" fields.
+
msg-id is intended to be machine readable and not necessarily meaningful to
humans.
@@ -347,14 +456,14 @@ Semantically, the angle bracket characters are not part of the msg-id; the
msg-id is what is contained between the two angle bracket characters.
-=== Informational Fields
+=== Informational Fields
....
- subject = "Subject:" unstructured CRLF
+subject = "Subject:" unstructured CRLF
- comments = "Comments:" unstructured CRLF
+comments = "Comments:" unstructured CRLF
- keywords = "Keywords:" phrase *("," phrase) CRLF
+keywords = "Keywords:" phrase *("," phrase) CRLF
....
When used in a reply, the "Subject" body MAY start with the string "Re: " (an
@@ -365,134 +474,138 @@ used since use of other strings or more than one instance can lead to
undesirable consequences.
-=== Resent Fields
-
-Resent fields SHOULD be added to any message that is reintroduced by
-a user into the transport system.
-A separate set of resent fields SHOULD be added each time this is done.
-All of the resent fields corresponding to a particular resending of the
-message SHOULD be grouped together.
-Each new set of resent fields is prepended to the message; that is, the most
-recent set of resent fields appears earlier in the message.
-No other fields in the message are changed when resent fields are added.
+=== Resent Fields
Each of the resent fields corresponds to a particular field elsewhere in the
syntax.
....
- resent-date = "Resent-Date:" date-time CRLF
+resent-date = "Resent-Date:" date-time CRLF
- resent-from = "Resent-From:" mailbox-list CRLF
+resent-from = "Resent-From:" mailbox-list CRLF
- resent-sender = "Resent-Sender:" mailbox CRLF
+resent-sender = "Resent-Sender:" mailbox CRLF
- resent-to = "Resent-To:" address-list CRLF
+resent-to = "Resent-To:" address-list CRLF
- resent-cc = "Resent-Cc:" address-list CRLF
+resent-cc = "Resent-Cc:" address-list CRLF
- resent-bcc = "Resent-Bcc:" [address-list / CFWS] CRLF
+resent-bcc = "Resent-Bcc:" [address-list / CFWS] CRLF
- resent-msg-id = "Resent-Message-ID:" msg-id CRLF
+resent-msg-id = "Resent-Message-ID:" msg-id CRLF
....
-When resent fields are used, the "Resent-From:" and "Resent-Date:"
-fields MUST be sent.
-The "Resent-Message-ID:" field SHOULD be sent.
-"Resent-Sender:" SHOULD NOT be used if "Resent-Sender:" would be identical to
-"Resent-From:".
+* Resent fields SHOULD be added to any message that is reintroduced by
+ a user into the transport system.
-The "Resent-Message-ID:" field provides a unique identifier for the resent
-message.
+* A separate set of resent fields SHOULD be added each time this is done.
+
+* All of the resent fields corresponding to a particular resending of the
+ message SHOULD be grouped together.
+
+* Each new set of resent fields is prepended to the message; that is, the
+ most recent set of resent fields appears earlier in the message.
+
+* No other fields in the message are changed when resent fields are added.
+
+* When resent fields are used, the "Resent-From:" and "Resent-Date:"
+ fields MUST be sent.
+
+* The "Resent-Message-ID:" field SHOULD be sent.
-=== Trace Fields
+* "Resent-Sender:" SHOULD NOT be used if "Resent-Sender:" would be identical
+ to "Resent-From:".
+
+* The "Resent-Message-ID:" field provides a unique identifier for the resent
+ message.
+
+
+=== Trace Fields
....
- trace = [return]
- 1*received
+trace = [return] 1*received
- return = "Return-Path:" path CRLF
+return = "Return-Path:" path CRLF
- path = angle-addr / ([CFWS] "<" [CFWS] ">" [CFWS])
+path = angle-addr / ([CFWS] "<" [CFWS] ">" [CFWS])
- received = "Received:" *received-token ";" date-time CRLF
+received = "Received:" *received-token ";" date-time CRLF
- received-token = word / angle-addr / addr-spec / domain
+received-token = word / angle-addr / addr-spec / domain
....
-=== Optional Fields
+=== Optional Fields
The field names of any optional field MUST NOT be identical to any field name
specified elsewhere in this document.
....
- optional-field = field-name ":" unstructured CRLF
-
- field-name = 1*ftext
-
- ftext = %d33-57 / ; Printable US-ASCII
- %d59-126 ; characters not including
- ; ":".
+optional-field = field-name ":" unstructured CRLF
....
-== Obsolete Specification
+== Obsolete Specification
-=== Obsolete Date and Time
+=== Obsolete Date and Time
The syntax for the obsolete date format allows
-1. a 2 digit year in the date field, and
-2. alphabetic time zone specifiers
+. a 2 digit year in the date field, and
+. alphabetic time zone specifiers
Where a two or three digit year occurs in a date, the year is to be
interpreted as follows:
-1. If a two digit year is encountered whose value is between 00 and 49, the
-year is interpreted by adding 2000, ending up with a value between 2000 and
-2049.
+. If a two digit year is encountered whose value is between 00 and 49, the
+ year is interpreted by adding 2000, ending up with a value between 2000 and
+ 2049.
-2. If a two digit year is encountered with a value between 50 and 99, or any
-three digit year is encountered, the year is interpreted by adding 1900.
+. If a two digit year is encountered with a value between 50 and 99, or any
+ three digit year is encountered, the year is interpreted by adding 1900.
Obsolete zones,
- EDT is semantically equivalent to -0400
- EST is semantically equivalent to -0500
- CDT is semantically equivalent to -0500
- CST is semantically equivalent to -0600
- MDT is semantically equivalent to -0600
- MST is semantically equivalent to -0700
- PDT is semantically equivalent to -0700
- PST is semantically equivalent to -0800
+....
+EDT is semantically equivalent to -0400
+EST is semantically equivalent to -0500
+CDT is semantically equivalent to -0500
+CST is semantically equivalent to -0600
+MDT is semantically equivalent to -0600
+MST is semantically equivalent to -0700
+PDT is semantically equivalent to -0700
+PST is semantically equivalent to -0800
+....
However, because of the error in [RFC0822], any time zones SHOULD all be
considered equivalent to "-0000" unless there is out-of-band information
confirming their meaning.
-=== Obsolete Addressing
+=== Obsolete Addressing
There are four primary differences in addressing.
-1. mailbox addresses were allowed to have a route portion before the
-addr-spec when enclosed in "<" and ">".
-The route is simply a comma-separated list of domain names, each preceded by
-"@", and the list terminated by a colon.
+. mailbox addresses were allowed to have a route portion before the
+ addr-spec when enclosed in "<" and ">".
+ The route is simply a comma-separated list of domain names, each preceded
+ by "@", and the list terminated by a colon.
+
+. CFWS were allowed between the period-separated elements of local-part and
+ domain (i.e., dot-atom was not used).
+ In addition, local-part is allowed to contain quoted-string in addition to
+ just atom.
+
+. mailbox-list and address-list were allowed to have "null" members.
+ That is, there could be two or more commas in such a list with nothing in
+ between them, or commas at the beginning or end of the list.
-2. CFWS were allowed between the period-separated elements of local-part and
-domain (i.e., dot-atom was not used).
-In addition, local-part is allowed to contain quoted-string in addition to
-just atom.
+. US-ASCII control characters and quoted-pairs were allowed in domain
+ literals and are added here.
-3. mailbox-list and address-list were allowed to have "null" members.
-That is, there could be two or more commas in such a list with nothing in
-between them, or commas at the beginning or end of the list.
-4. US-ASCII control characters and quoted-pairs were allowed in domain literals and are added here.
+=== Obsolete Header Fields
+* Allows multiple occurrences of any of the fields.
-=== Obsolete Header Fields
+* Fields may occur in any order.
-Syntactically, the primary difference in the obsolete field syntax is
-that it allows multiple occurrences of any of the fields and they may
-occur in any order.
-Also, any amount of white space is allowed before the ":" at the end of the
-field name.
+* Any amount of white space is allowed before the ":" at the end of the
+ field name.
diff --git a/doc/IMF.html b/doc/IMF.html
index 909bead3..3d7cfdd8 100644
--- a/doc/IMF.html
+++ b/doc/IMF.html
@@ -21,8 +21,11 @@
<ul class="sectlevel1">
<li><a href="#_syntax">1. Syntax</a>
<ul class="sectlevel2">
-<li><a href="#_date_and_time_specification">1.1. Date and Time Specification</a></li>
-<li><a href="#_address_specification">1.2. Address Specification</a></li>
+<li><a href="#_folding_white_space_and_comments">1.1. Folding White Space and Comments</a></li>
+<li><a href="#_atom">1.2. Atom</a></li>
+<li><a href="#_quoted_strings">1.3. Quoted Strings</a></li>
+<li><a href="#_date_and_time_specification">1.4. Date and Time Specification</a></li>
+<li><a href="#_address_specification">1.5. Address Specification</a></li>
</ul>
</li>
<li><a href="#_header">2. Header</a>
@@ -62,9 +65,23 @@ Message Format as defined in <a href="https://tools.ietf.org/html/rfc5322">RFC 5
<div class="literalblock">
<div class="content">
<pre>message = (fields / obs-fields)
- [CRLF body]
+ [CRLF body]
-fields = *(header-key ":" header-value CRLF)
+fields = *(field-name ":" (field-body / unstructured) CRLF)
+
+field-name = 1*ftext
+
+field-body = (*([FWS] VCHAR) *WSP)
+
+unstructured = (*([FWS] VCHAR) *WSP) / obs-unstruct
+
+VCHAR = %d33-126
+
+WSP = %d9 / %d32
+ ; tab or space
+
+ftext = %d33-57 / %d59-126
+ ; Printable US-ASCII, except %d0-32 and %d58 (":")
body = (*(*998text CRLF) *998text) / obs-body
@@ -74,18 +91,48 @@ text = %d1-9 / ; Characters excluding CR
%d14-127</pre>
</div>
</div>
-<div class="paragraph">
-<p>Each line MUST be no more than 998 characters, and SHOULD be no more than 78
-characters, excluding the CRLF.</p>
-</div>
-<div class="paragraph">
+<div class="ulist">
+<ul>
+<li>
+<p>Each line MUST be no more than 998 characters, excluding CRLF.</p>
+</li>
+<li>
+<p>Each line SHOULD be no more than 78 characters, excluding the CRLF.</p>
+</li>
+<li>
<p>CR and LF MUST only occur together as CRLF; they MUST NOT appear
independently in the body.</p>
-</div>
-<div class="paragraph">
-<p><code>header-key</code> MUST be composed of printable US-ASCII characters, except colon.
-<code>header-value</code> MUST NOT include CR and LF except when used in "folding" and
+</li>
+<li>
+<p>Each header field SHOULD be treated in its unfolded form for further
+syntactic and semantic evaluation.</p>
+</li>
+<li>
+<p>"field-body" MUST NOT include CR and LF except when used in "folding" and
"unfolding".</p>
+</li>
+</ul>
+</div>
+<div class="sect2">
+<h3 id="_folding_white_space_and_comments">1.1. Folding White Space and Comments</h3>
+<div class="literalblock">
+<div class="content">
+<pre>CFWS = (1*([FWS] comment) [FWS]) / FWS
+
+FWS = ([*WSP CRLF] 1*WSP) / obs-FWS
+ ; Folding white space
+
+comment = "(" *([FWS] ccontent) [FWS] ")"
+
+ccontent = ctext / quoted-pair / comment
+
+ctext = %d33-39 / ; Printable US-ASCII
+ %d42-91 / ; characters not including
+ %d93-126 / ; "(", ")", or "\"
+ obs-ctext
+
+quoted-pair = ("\" (VCHAR / WSP)) / obs-qp</pre>
+</div>
</div>
<div class="paragraph">
<p><em>Folding</em> is a function to split a line into multiline with CRLF and WSP. For
@@ -101,74 +148,172 @@ example, the following line,</p>
</div>
<div class="literalblock">
<div class="content">
-<pre>"Subject: This is" CRLF
-WSP "a test" CRLF</pre>
+<pre>"Subject: This" CRLF
+WSP "is a test" CRLF</pre>
</div>
</div>
<div class="paragraph">
-<p><em>Unfolding</em> is the process that convert the multiline representation into a
-single line.</p>
+<p><em>Unfolding</em> is the process that reverse the output of folding into original
+input.</p>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p>An unfolded header field has no length restriction and therefore may be
+indeterminately long.</p>
+</li>
+<li>
+<p>Any CRLF that appears in FWS is semantically "invisible".</p>
+</li>
+<li>
+<p>The "\" in any quoted-pair is semantically "invisible".</p>
+</li>
+<li>
+<p>Folding is permitted within the comment.</p>
+</li>
+<li>
+<p>The parentheses and backslash characters may appear in a comment, so long
+as they appear as a quoted-pair.</p>
+</li>
+<li>
+<p>Comment is not including the enclosing paretheses.</p>
+</li>
+</ul>
+</div>
</div>
<div class="sect2">
-<h3 id="_date_and_time_specification">1.1. Date and Time Specification</h3>
+<h3 id="_atom">1.2. Atom</h3>
+<div class="literalblock">
+<div class="content">
+<pre>word = atom / quoted-string
+
+phrase = 1*word / obs-phrase
+
+atom = [CFWS] 1*atext [CFWS]
+
+dot-atom = [CFWS] dot-atom-text [CFWS]
+
+dot-atom-text = 1*atext *("." 1*atext)
+
+atext = ALPHA / DIGIT / ; Printable US-ASCII
+ "!" / "#" / ; characters not including
+ "$" / "%" / ; specials. Used for atoms.
+ "&amp;" / "'" /
+ "*" / "+" /
+ "-" / "/" /
+ "=" / "?" /
+ "^" / "_" /
+ "`" / "{" /
+ "|" / "}" /
+ "~"
+
+specials = "(" / ")" / ; Special characters that do
+ "&lt;" / "&gt;" / ; not appear in atext
+ "[" / "]" /
+ ":" / ";" /
+ "@" / "\" /
+ "," / "." /
+ DQUOTE</pre>
+</div>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p>The optional comments and FWS surrounding the rest of the characters are
+not part of the atom.</p>
+</li>
+</ul>
+</div>
+</div>
+<div class="sect2">
+<h3 id="_quoted_strings">1.3. Quoted Strings</h3>
+<div class="literalblock">
+<div class="content">
+<pre>quoted-string = [CFWS]
+ DQUOTE *([FWS] qcontent) [FWS] DQUOTE
+ [CFWS]
+
+qcontent = qtext / quoted-pair
+
+qtext = %d33 / ; Printable US-ASCII
+ %d35-91 / ; characters not including
+ %d93-126 / ; "\" or the quote character
+ obs-qtext</pre>
+</div>
+</div>
+</div>
+<div class="sect2">
+<h3 id="_date_and_time_specification">1.4. Date and Time Specification</h3>
<div class="paragraph">
<p>Syntax,</p>
</div>
<div class="literalblock">
<div class="content">
-<pre> date-time = [ day-of-week "," ] date time [CFWS]
+<pre>date-time = [ day-of-week "," ] date time [CFWS]
- day-of-week = ([FWS] day-name) / obs-day-of-week
+day-of-week = ([FWS] day-name) / obs-day-of-week
- day-name = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
+day-name = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
- date = day month year
+date = day month year
- day = ([FWS] 1*2DIGIT FWS) / obs-day
+day = ([FWS] 1*2DIGIT FWS) / obs-day
- month = "Jan" / "Feb" / "Mar" / "Apr" /
- "May" / "Jun" / "Jul" / "Aug" /
- "Sep" / "Oct" / "Nov" / "Dec"
+month = "Jan" / "Feb" / "Mar" / "Apr" /
+ "May" / "Jun" / "Jul" / "Aug" /
+ "Sep" / "Oct" / "Nov" / "Dec"
- year = (FWS 4*DIGIT FWS) / obs-year
+year = (FWS 4*DIGIT FWS) / obs-year
- time = time-of-day zone
+time = time-of-day zone
- time-of-day = hour ":" minute [ ":" second ]
+time-of-day = hour ":" minute [ ":" second ]
- hour = 2DIGIT / obs-hour
+hour = 2DIGIT / obs-hour
- minute = 2DIGIT / obs-minute
+minute = 2DIGIT / obs-minute
- second = 2DIGIT / obs-second
+second = 2DIGIT / obs-second
- zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone</pre>
+zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone</pre>
</div>
</div>
-<div class="paragraph">
+<div class="ulist">
+<ul>
+<li>
<p>The date and time-of-day SHOULD express local time.</p>
-</div>
-<div class="paragraph">
-<p>The form "+0000" on zone SHOULD be used to indicate a time zone at Universal
-Time.</p>
-</div>
-<div class="paragraph">
-<p>The form "-0000" on zone indicate that the time was generated on a system that
-may be in a local time zone other than Universal Time and that the date-time
-contains no information about the local time zone.</p>
-</div>
-<div class="paragraph">
-<p>A date-time specification MUST be semantically valid.
-The day-of-week MUST be the day implied by the date.
-The numeric day-of-month MUST be between 1 and the number of days allowed
-for the specified month (in the specified year),
-The time-of-day MUST be in the range 00:00:00 through 23:59:60 (the number of
-seconds allowing for a leap second.
-The last two digits of the zone MUST be within the range 00 through 59.</p>
+</li>
+<li>
+<p>The form "+0000" on zone SHOULD be used to indicate a time zone at
+Universal Time.</p>
+</li>
+<li>
+<p>The form "-0000" on zone indicate that the time was generated on a system
+that may be in a local time zone other than Universal Time and that the
+date-time contains no information about the local time zone.</p>
+</li>
+<li>
+<p>A date-time specification MUST be semantically valid.</p>
+</li>
+<li>
+<p>The day-of-week MUST be the day implied by the date.</p>
+</li>
+<li>
+<p>The numeric day-of-month MUST be between 1 and the number of days allowed
+for the specified month (in the specified year).</p>
+</li>
+<li>
+<p>The time-of-day MUST be in the range 00:00:00 through 23:59:60 (the number
+of seconds allowing for a leap second.</p>
+</li>
+<li>
+<p>The last two digits of the zone MUST be within the range 00 through 59.</p>
+</li>
+</ul>
</div>
</div>
<div class="sect2">
-<h3 id="_address_specification">1.2. Address Specification</h3>
+<h3 id="_address_specification">1.5. Address Specification</h3>
<div class="paragraph">
<p>An address may either be an individual mailbox, or a group of mailboxes.</p>
</div>
@@ -177,42 +322,51 @@ The last two digits of the zone MUST be within the range 00 through 59.</p>
</div>
<div class="literalblock">
<div class="content">
-<pre> group-list = mailbox-list / CFWS / obs-group-list
+<pre>group-list = mailbox-list / CFWS / obs-group-list
- mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
+mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
- address-list = (address *("," address)) / obs-addr-list
+address-list = (address *("," address)) / obs-addr-list
- address = mailbox / group
+address = mailbox / group
- mailbox = name-addr / addr-spec
+mailbox = name-addr / addr-spec
- name-addr = [display-name] angle-addr
+name-addr = [display-name] angle-addr
- angle-addr = [CFWS] "&lt;" addr-spec "&gt;" [CFWS] /
- obs-angle-addr
+angle-addr = [CFWS] "&lt;" addr-spec "&gt;" [CFWS] /
+ obs-angle-addr
- group = display-name ":" [group-list] ";" [CFWS]
+group = display-name ":" [group-list] ";" [CFWS]
- display-name = phrase
+display-name = phrase
- addr-spec = local-part "@" domain
+addr-spec = local-part "@" domain
- local-part = dot-atom / quoted-string / obs-local-part
+local-part = dot-atom / quoted-string / obs-local-part
- domain = dot-atom / domain-literal / obs-domain
+domain = dot-atom / domain-literal / obs-domain
- domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
+domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
- dtext = %d33-90 / ; Printable US-ASCII
- %d94-126 / ; characters not including
- obs-dtext ; "[", "]", or "\"</pre>
+dtext = %d33-90 / ; Printable US-ASCII
+ %d94-126 / ; characters not including
+ obs-dtext ; "[", "]", or "\"</pre>
</div>
</div>
-<div class="paragraph">
-<p>dot-atom form SHOULD be used and the quoted-string form SHOULD NOT be used.
-Comments and folding white space SHOULD NOT be used around the "@" in the
+<div class="ulist">
+<ul>
+<li>
+<p>dot-atom form SHOULD be used,</p>
+</li>
+<li>
+<p>quoted-string form SHOULD NOT be used;</p>
+</li>
+<li>
+<p>Comments and folding white space SHOULD NOT be used around the "@" in the
addr-spec.</p>
+</li>
+</ul>
</div>
</div>
</div>
@@ -225,78 +379,196 @@ addr-spec.</p>
</div>
<div class="literalblock">
<div class="content">
-<pre> fields = *(trace
- *optional-field /
- *(resent-date /
- resent-from /
- resent-sender /
- resent-to /
- resent-cc /
- resent-bcc /
- resent-msg-id))
- *(orig-date /
- from /
- sender /
- reply-to /
- to /
- cc /
- bcc /
- message-id /
- in-reply-to /
- references /
- subject /
- comments /
- keywords /
- optional-field)
-
- +----------------+--------+------------+----------------------------+
- | Field | Min | Max number | Notes |
- | | number | | |
- +----------------+--------+------------+----------------------------+
- | trace | 0 | unlimited | Block prepended - see |
- | | | | 3.6.7 |
- | resent-date | 0* | unlimited* | One per block, required if |
- | | | | other resent fields are |
- | | | | present - see 3.6.6 |
- | resent-from | 0 | unlimited* | One per block - see 3.6.6 |
- | resent-sender | 0* | unlimited* | One per block, MUST occur |
- | | | | with multi-address |
- | | | | resent-from - see 3.6.6 |
- | resent-to | 0 | unlimited* | One per block - see 3.6.6 |
- | resent-cc | 0 | unlimited* | One per block - see 3.6.6 |
- | resent-bcc | 0 | unlimited* | One per block - see 3.6.6 |
- | resent-msg-id | 0 | unlimited* | One per block - see 3.6.6 |
- | orig-date | 1 | 1 | |
- | from | 1 | 1 | See sender and 3.6.2 |
- | sender | 0* | 1 | MUST occur with |
- | | | | multi-address from - see |
- | | | | 3.6.2 |
- | reply-to | 0 | 1 | |
- | to | 0 | 1 | |
- | cc | 0 | 1 | |
- | bcc | 0 | 1 | |
- | message-id | 0* | 1 | SHOULD be present - see |
- | | | | 3.6.4 |
- | in-reply-to | 0* | 1 | SHOULD occur in some |
- | | | | replies - see 3.6.4 |
- | references | 0* | 1 | SHOULD occur in some |
- | | | | replies - see 3.6.4 |
- | subject | 0 | 1 | |
- | comments | 0 | unlimited | |
- | keywords | 0 | unlimited | |
- | optional-field | 0 | unlimited | |
- +----------------+--------+------------+----------------------------+</pre>
+<pre>fields = *(trace
+ *optional-field /
+ *(resent-date /
+ resent-from /
+ resent-sender /
+ resent-to /
+ resent-cc /
+ resent-bcc /
+ resent-msg-id))
+ *(orig-date /
+ from /
+ sender /
+ reply-to /
+ to /
+ cc /
+ bcc /
+ message-id /
+ in-reply-to /
+ references /
+ subject /
+ comments /
+ keywords /
+ optional-field)</pre>
</div>
</div>
-<div class="paragraph">
+<table class="tableblock frame-all grid-all stretch">
+<colgroup>
+<col style="width: 20%;">
+<col style="width: 10%;">
+<col style="width: 10%;">
+<col style="width: 60%;">
+</colgroup>
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top">Field</th>
+<th class="tableblock halign-left valign-middle">Min number</th>
+<th class="tableblock halign-left valign-top">Max number</th>
+<th class="tableblock halign-left valign-top">Notes</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">trace</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">Block prepended - see 3.6.7</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">resent-date</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0*</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited*</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">One per block, required if other resent fields are present - see 3.6.6</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">resent-from</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited*</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">One per block - see 3.6.6</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">resent-sender</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0*</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited*</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">One per block, MUST occur with multi-address resent-from - see 3.6.6</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">resent-to</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited*</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">One per block - see 3.6.6</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">resent-cc</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited*</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">One per block - see 3.6.6</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">resent-bcc</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited*</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">One per block - see 3.6.6</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">resent-msg-id</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited*</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">One per block - see 3.6.6</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">orig-date</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top"></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">from</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">See sender and 3.6.2</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">sender</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0*</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">MUST occur withmulti-address from - see 3.6.2</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">reply-to</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top"></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">to</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top"></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cc</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top"></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">bcc</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top"></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">message-id</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0*</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">SHOULD be present - see 3.6.4</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">in-reply-to</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0*</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">SHOULD occur in some replies - see 3.6.4</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">references</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0*</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">SHOULD occur in some replies - see 3.6.4</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">subject</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td>
+<td class="tableblock halign-left valign-top"></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">comments</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited</p></td>
+<td class="tableblock halign-left valign-top"></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">keywords</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited</p></td>
+<td class="tableblock halign-left valign-top"></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">optional-field</p></td>
+<td class="tableblock halign-left valign-middle"><p class="tableblock">0</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">unlimited</p></td>
+<td class="tableblock halign-left valign-top"></td>
+</tr>
+</tbody>
+</table>
+<div class="ulist">
+<ul>
+<li>
<p>Header fields SHOULD NOT be reordered when a message is transported or
-transformed.
-More importantly, the trace header fields and resent header fields MUST NOT be
+transformed.</p>
+</li>
+<li>
+<p>The trace header fields and resent header fields MUST NOT be
reordered, and SHOULD be kept in blocks prepended to the message.</p>
-</div>
-<div class="paragraph">
+</li>
+<li>
<p>The only required header fields are the "Date" field and the originator
address field(s) (which is "From", "Sender", and "Reply-To").</p>
+</li>
+</ul>
</div>
<div class="sect2">
<h3 id="_date_field">2.1. Date Field</h3>
@@ -315,11 +587,11 @@ delivery system.</p>
<h3 id="_originator_fields">2.2. Originator Fields</h3>
<div class="literalblock">
<div class="content">
-<pre> from = "From:" mailbox-list CRLF
+<pre>from = "From:" mailbox-list CRLF
- sender = "Sender:" mailbox CRLF
+sender = "Sender:" mailbox CRLF
- reply-to = "Reply-To:" address-list CRLF</pre>
+reply-to = "Reply-To:" address-list CRLF</pre>
</div>
</div>
<div class="paragraph">
@@ -347,11 +619,11 @@ belong to the author(s) of the message.</p>
<h3 id="_destination_fields">2.3. Destination Fields</h3>
<div class="literalblock">
<div class="content">
-<pre> to = "To:" address-list CRLF
+<pre>to = "To:" address-list CRLF
- cc = "Cc:" address-list CRLF
+cc = "Cc:" address-list CRLF
- bcc = "Bcc:" [address-list / CFWS] CRLF</pre>
+bcc = "Bcc:" [address-list / CFWS] CRLF</pre>
</div>
</div>
<div class="paragraph">
@@ -403,30 +675,34 @@ each.</p>
<div class="sect2">
<h3 id="_identification_field">2.4. Identification Field</h3>
<div class="paragraph">
-<p>Every message SHOULD have a "Message-ID:" field.</p>
-</div>
-<div class="paragraph">
-<p>Reply messages SHOULD have "In-Reply-To:" and "References:" fields.</p>
-</div>
-<div class="paragraph">
<p>Format,</p>
</div>
<div class="literalblock">
<div class="content">
-<pre> message-id = "Message-ID:" msg-id CRLF
+<pre>message-id = "Message-ID:" msg-id CRLF
- in-reply-to = "In-Reply-To:" 1*msg-id CRLF
+in-reply-to = "In-Reply-To:" 1*msg-id CRLF
- references = "References:" 1*msg-id CRLF
+references = "References:" 1*msg-id CRLF
- msg-id = [CFWS] "&lt;" id-left "@" id-right "&gt;" [CFWS]
+msg-id = [CFWS] "&lt;" id-left "@" id-right "&gt;" [CFWS]
- id-left = dot-atom-text / obs-id-left
+id-left = dot-atom-text / obs-id-left
- id-right = dot-atom-text / no-fold-literal / obs-id-right
+id-right = dot-atom-text / no-fold-literal / obs-id-right
- no-fold-literal = "[" *dtext "]"</pre>
+no-fold-literal = "[" *dtext "]"</pre>
+</div>
</div>
+<div class="ulist">
+<ul>
+<li>
+<p>Every message SHOULD have a "Message-ID:" field.</p>
+</li>
+<li>
+<p>Reply messages SHOULD have "In-Reply-To:" and "References:" fields.</p>
+</li>
+</ul>
</div>
<div class="paragraph">
<p>msg-id is intended to be machine readable and not necessarily meaningful to
@@ -460,11 +736,11 @@ msg-id is what is contained between the two angle bracket characters.</p>
<h3 id="_informational_fields">2.5. Informational Fields</h3>
<div class="literalblock">
<div class="content">
-<pre> subject = "Subject:" unstructured CRLF
+<pre>subject = "Subject:" unstructured CRLF
- comments = "Comments:" unstructured CRLF
+comments = "Comments:" unstructured CRLF
- keywords = "Keywords:" phrase *("," phrase) CRLF</pre>
+keywords = "Keywords:" phrase *("," phrase) CRLF</pre>
</div>
</div>
<div class="paragraph">
@@ -479,62 +755,77 @@ undesirable consequences.</p>
<div class="sect2">
<h3 id="_resent_fields">2.6. Resent Fields</h3>
<div class="paragraph">
-<p>Resent fields SHOULD be added to any message that is reintroduced by
-a user into the transport system.
-A separate set of resent fields SHOULD be added each time this is done.
-All of the resent fields corresponding to a particular resending of the
-message SHOULD be grouped together.
-Each new set of resent fields is prepended to the message; that is, the most
-recent set of resent fields appears earlier in the message.
-No other fields in the message are changed when resent fields are added.</p>
-</div>
-<div class="paragraph">
<p>Each of the resent fields corresponds to a particular field elsewhere in the
syntax.</p>
</div>
<div class="literalblock">
<div class="content">
-<pre> resent-date = "Resent-Date:" date-time CRLF
+<pre>resent-date = "Resent-Date:" date-time CRLF
- resent-from = "Resent-From:" mailbox-list CRLF
+resent-from = "Resent-From:" mailbox-list CRLF
- resent-sender = "Resent-Sender:" mailbox CRLF
+resent-sender = "Resent-Sender:" mailbox CRLF
- resent-to = "Resent-To:" address-list CRLF
+resent-to = "Resent-To:" address-list CRLF
- resent-cc = "Resent-Cc:" address-list CRLF
+resent-cc = "Resent-Cc:" address-list CRLF
- resent-bcc = "Resent-Bcc:" [address-list / CFWS] CRLF
+resent-bcc = "Resent-Bcc:" [address-list / CFWS] CRLF
- resent-msg-id = "Resent-Message-ID:" msg-id CRLF</pre>
+resent-msg-id = "Resent-Message-ID:" msg-id CRLF</pre>
</div>
</div>
-<div class="paragraph">
+<div class="ulist">
+<ul>
+<li>
+<p>Resent fields SHOULD be added to any message that is reintroduced by
+a user into the transport system.</p>
+</li>
+<li>
+<p>A separate set of resent fields SHOULD be added each time this is done.</p>
+</li>
+<li>
+<p>All of the resent fields corresponding to a particular resending of the
+message SHOULD be grouped together.</p>
+</li>
+<li>
+<p>Each new set of resent fields is prepended to the message; that is, the
+most recent set of resent fields appears earlier in the message.</p>
+</li>
+<li>
+<p>No other fields in the message are changed when resent fields are added.</p>
+</li>
+<li>
<p>When resent fields are used, the "Resent-From:" and "Resent-Date:"
-fields MUST be sent.
-The "Resent-Message-ID:" field SHOULD be sent.
-"Resent-Sender:" SHOULD NOT be used if "Resent-Sender:" would be identical to
-"Resent-From:".</p>
-</div>
-<div class="paragraph">
+fields MUST be sent.</p>
+</li>
+<li>
+<p>The "Resent-Message-ID:" field SHOULD be sent.</p>
+</li>
+<li>
+<p>"Resent-Sender:" SHOULD NOT be used if "Resent-Sender:" would be identical
+to "Resent-From:".</p>
+</li>
+<li>
<p>The "Resent-Message-ID:" field provides a unique identifier for the resent
message.</p>
+</li>
+</ul>
</div>
</div>
<div class="sect2">
<h3 id="_trace_fields">2.7. Trace Fields</h3>
<div class="literalblock">
<div class="content">
-<pre> trace = [return]
- 1*received
+<pre>trace = [return] 1*received
- return = "Return-Path:" path CRLF
+return = "Return-Path:" path CRLF
- path = angle-addr / ([CFWS] "&lt;" [CFWS] "&gt;" [CFWS])
+path = angle-addr / ([CFWS] "&lt;" [CFWS] "&gt;" [CFWS])
- received = "Received:" *received-token ";" date-time CRLF
+received = "Received:" *received-token ";" date-time CRLF
- received-token = word / angle-addr / addr-spec / domain</pre>
+received-token = word / angle-addr / addr-spec / domain</pre>
</div>
</div>
</div>
@@ -546,13 +837,7 @@ specified elsewhere in this document.</p>
</div>
<div class="literalblock">
<div class="content">
-<pre> optional-field = field-name ":" unstructured CRLF
-
- field-name = 1*ftext
-
- ftext = %d33-57 / ; Printable US-ASCII
- %d59-126 ; characters not including
- ; ":".</pre>
+<pre>optional-field = field-name ":" unstructured CRLF</pre>
</div>
</div>
</div>
@@ -624,8 +909,8 @@ confirming their meaning.</p>
<li>
<p>mailbox addresses were allowed to have a route portion before the
addr-spec when enclosed in "&lt;" and "&gt;".
-The route is simply a comma-separated list of domain names, each preceded by
-"@", and the list terminated by a colon.</p>
+The route is simply a comma-separated list of domain names, each preceded
+by "@", and the list terminated by a colon.</p>
</li>
<li>
<p>CFWS were allowed between the period-separated elements of local-part and
@@ -639,19 +924,27 @@ That is, there could be two or more commas in such a list with nothing in
between them, or commas at the beginning or end of the list.</p>
</li>
<li>
-<p>US-ASCII control characters and quoted-pairs were allowed in domain literals and are added here.</p>
+<p>US-ASCII control characters and quoted-pairs were allowed in domain
+literals and are added here.</p>
</li>
</ol>
</div>
</div>
<div class="sect2">
<h3 id="_obsolete_header_fields">3.3. Obsolete Header Fields</h3>
-<div class="paragraph">
-<p>Syntactically, the primary difference in the obsolete field syntax is
-that it allows multiple occurrences of any of the fields and they may
-occur in any order.
-Also, any amount of white space is allowed before the ":" at the end of the
+<div class="ulist">
+<ul>
+<li>
+<p>Allows multiple occurrences of any of the fields.</p>
+</li>
+<li>
+<p>Fields may occur in any order.</p>
+</li>
+<li>
+<p>Any amount of white space is allowed before the ":" at the end of the
field name.</p>
+</li>
+</ul>
</div>
</div>
</div>
@@ -659,7 +952,7 @@ field name.</p>
</div>
<div id="footer">
<div id="footer-text">
-Last updated 2018-12-31 20:35:09 +0700
+Last updated 2019-02-02 02:24:54 +0700
</div>
</div>
</body>
diff --git a/lib/email/body.go b/lib/email/body.go
new file mode 100644
index 00000000..c1e3d31f
--- /dev/null
+++ b/lib/email/body.go
@@ -0,0 +1,23 @@
+// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package email
+
+//
+// Body represent multiple message body.
+//
+type Body struct {
+ //
+ // We are not using map here it to prevent the body parts being reordeded when
+ // packing the message back into raw format.
+ //
+ mimes []*MIME // nolint: structcheck,unused
+}
+
+//
+// Unpack the message's body using boundary.
+//
+func (body *Body) Unpack(raw, boundary []byte) ([]byte, error) {
+ return raw, nil
+}
diff --git a/lib/email/doc.go b/lib/email/doc.go
new file mode 100644
index 00000000..34b959a8
--- /dev/null
+++ b/lib/email/doc.go
@@ -0,0 +1,9 @@
+// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// Package email provide a library for working with Internet Message Format as
+// defined by RFC 5322.
+//
+package email
diff --git a/lib/email/email.go b/lib/email/email.go
new file mode 100644
index 00000000..64bf4e16
--- /dev/null
+++ b/lib/email/email.go
@@ -0,0 +1,33 @@
+// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package email
+
+var ( // nolint: gochecknoglobals
+ crlf = []byte{'\r', '\n'}
+)
+
+//
+// Email represent an internet message.
+//
+type Email struct {
+ Header Header
+ Body Body
+}
+
+//
+// Unpack the raw message header and body.
+//
+func (email *Email) Unpack(raw []byte) ([]byte, error) {
+ var err error
+
+ raw, err = email.Header.Unpack(raw)
+ if err != nil {
+ return raw, err
+ }
+
+ raw, err = email.Body.Unpack(raw, nil)
+
+ return raw, err
+}
diff --git a/lib/email/field.go b/lib/email/field.go
new file mode 100644
index 00000000..cdf7b90a
--- /dev/null
+++ b/lib/email/field.go
@@ -0,0 +1,363 @@
+// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package email
+
+import (
+ "bytes"
+ "fmt"
+ "time"
+
+ libbytes "github.com/shuLhan/share/lib/bytes"
+ libio "github.com/shuLhan/share/lib/io"
+ libtime "github.com/shuLhan/share/lib/time"
+)
+
+var (
+ FieldNameDate = []byte("date")
+)
+
+//
+// Field represent field name and value in header.
+//
+type Field struct {
+ // Type of field, the numeric representation of field name.
+ Type FieldType
+
+ // Name contains "relaxed" canonicalization of field name.
+ Name []byte
+ // Value contains "relaxed" canonicalization of field value.
+ Value []byte
+
+ // oriName contains "simple" canonicalization of field name.
+ oriName []byte
+ // oriValue contains "simple" canonicalization of field value.
+ oriValue []byte
+
+ date *time.Time
+}
+
+//
+// ParseField create and initialize Field by parsing a single line message
+// header field from raw input.
+//
+// If raw input contains multiple lines, the rest of lines will be returned.
+//
+// On error, it will return nil Field, and rest will contains the beginning of
+// invalid input.
+//
+func ParseField(raw []byte) (field *Field, rest []byte, err error) { // nolint: gocyclo
+ if len(raw) == 0 {
+ return nil, nil, nil
+ }
+
+ field = &Field{}
+ isFolded := false
+ start := 0
+
+ // Get field's name.
+ // Valid values: %d33-57 / %d59-126 .
+ x := 0
+ for ; x < len(raw); x++ {
+ if raw[x] == ' ' || raw[x] == ':' {
+ break
+ }
+ if raw[x] < 33 || raw[x] > 126 {
+ goto invalid
+ }
+ }
+ if len(raw) == x {
+ goto invalid
+ }
+
+ // Skip spaces before ':'.
+ for ; x < len(raw) && raw[x] == ' '; x++ {
+ }
+ if len(raw) == x {
+ goto invalid
+ }
+ if raw[x] != ':' {
+ goto invalid
+ }
+
+ field.SetName(raw[:x])
+ x++
+ start = x
+
+ // Skip WSP after ':'.
+ for ; x < len(raw) && (raw[x] == '\t' || raw[x] == ' '); x++ {
+ }
+ if len(raw) == x {
+ goto invalid
+ }
+
+ // Get field's value.
+ // Valid values: WSP / %d33-126 .
+ for ; x < len(raw); x++ {
+ for ; x < len(raw); x++ {
+ if raw[x] == '\t' || raw[x] == ' ' {
+ continue
+ }
+ if raw[x] == '\r' {
+ x++
+ break
+ }
+ if raw[x] < 33 || raw[x] > 126 {
+ goto invalid
+ }
+ }
+ if x == len(raw) || raw[x] != '\n' {
+ goto invalid
+ }
+ if x++; x == len(raw) {
+ break
+ }
+
+ // Unfolding ...
+ if raw[x] == '\t' || raw[x] == ' ' {
+ isFolded = true
+ continue
+ }
+ break
+ }
+ if !isFolded && x > 1000 {
+ err = fmt.Errorf("ParseField: line greater than 998 characters")
+ return nil, nil, err
+ }
+
+ field.SetValue(raw[start:x])
+
+ if len(field.Value) == 0 {
+ goto invalid
+ }
+
+ if len(raw) > x {
+ rest = raw[x:]
+ }
+
+ return field, rest, nil
+
+invalid:
+ if x < len(raw) {
+ err = fmt.Errorf("ParseField: invalid character at index %d", x)
+ rest = raw[x:]
+ } else {
+ err = fmt.Errorf("ParseField: invalid input")
+ }
+ return nil, rest, err
+}
+
+//
+// SetName set field Name by canonicalizing raw field name using "simple" and
+// "relaxed" algorithms.
+//.
+// "simple" algorithm store raw field name as is.
+//
+// "relaxed" algorithm convert field name to lowercase and removing trailing
+// whitespaces.
+//
+func (field *Field) SetName(raw []byte) {
+ field.oriName = raw
+ field.Name = make([]byte, 0, len(raw))
+ for x := 0; x < len(raw); x++ {
+ if raw[x] == ' ' || raw[x] < 33 || raw[x] > 126 {
+ break
+ }
+ if raw[x] >= 'A' && raw[x] <= 'Z' {
+ field.Name = append(field.Name, raw[x]+32)
+ } else {
+ field.Name = append(field.Name, raw[x])
+ }
+ }
+ field.updateType()
+}
+
+//
+// SetValue set the field Value by canonicalizing raw input using "simple" and
+// "relaxed" algorithms.
+//
+// "simple" algorithm store raw field value as is.
+//
+// "relaxed" algorithm remove leading and trailing WSP, replacing all
+// CFWS with single space, but not removing CRLF at end.
+//
+func (field *Field) SetValue(raw []byte) {
+ field.oriValue = raw
+ field.Value = make([]byte, 0, len(raw))
+
+ x := 0
+ // Skip leading spaces.
+ for ; x < len(raw); x++ {
+ if !libbytes.IsSpace(raw[x]) {
+ break
+ }
+ }
+
+ spaces := 0
+ for ; x < len(raw); x++ {
+ if libbytes.IsSpace(raw[x]) {
+ spaces++
+ continue
+ }
+ if spaces > 0 {
+ field.Value = append(field.Value, ' ')
+ spaces = 0
+ }
+ field.Value = append(field.Value, raw[x])
+ }
+ if len(field.Value) > 0 {
+ field.Value = append(field.Value, crlf...)
+ }
+}
+
+//
+// String return the relaxed canonicalization of field name and value
+// separated by colon.
+//
+func (field *Field) String() string {
+ return string(field.Name) + ":" + string(field.Value)
+}
+
+//
+// Unpack the field Value based on field Name.
+//
+func (field *Field) Unpack() (err error) {
+ switch field.Type {
+ case FieldTypeDate:
+ err = field.unpackDate()
+ }
+
+ return err
+}
+
+//
+// updateType update the field type based on field name.
+//
+func (field *Field) updateType() {
+ switch {
+ case bytes.Equal(FieldNameDate, field.Name):
+ field.Type = FieldTypeDate
+ default:
+ field.Type = FieldTypeOptional
+ }
+}
+
+//
+// unpackDate from field value into time.Time.
+//
+// Format,
+//
+// [day-of-week ","] day month year hour ":" minute [ ":" second ] zone
+//
+// day-of-week = "Mon" / ... / "Sun"
+// day = 1*2DIGIT
+// month = "Jan" / ... / "Dec"
+// year = 4*DIGIT
+// hour = 2DIGIT
+// minute = 2DIGIT
+// second = 2DIGIT
+// zone = ("+" / "-") 4DIGIT
+//
+//
+//
+func (field *Field) unpackDate() (err error) {
+ var (
+ v []byte
+ ok bool
+ c byte
+ space = []byte{' ', '\r', '\n'}
+ day, year int64
+ hour, min, sec int64
+ off int64
+ month time.Month
+ loc *time.Location = time.UTC
+ )
+
+ if len(field.Value) == 0 {
+ return fmt.Errorf("unpackDate: empty date")
+ }
+
+ r := &libio.Reader{}
+ r.InitBytes(field.Value)
+
+ c = r.SkipSpace()
+ if !libbytes.IsDigit(c) {
+ v, _, c = r.ReadUntil([]byte{','}, nil)
+ if len(v) == 0 || c != ',' {
+ return fmt.Errorf("unpackDate: invalid date format")
+ }
+ if c = r.SkipSpace(); c == 0 {
+ return fmt.Errorf("unpackDate: invalid date format")
+ }
+ }
+
+ // Get day ....
+ if day, c = r.ScanInt64(); c == 0 || c != ' ' {
+ return fmt.Errorf("unpackDate: missing month")
+ }
+ // Get month ...
+ r.SkipSpace()
+ v, _, c = r.ReadUntil(space, nil)
+ month, ok = libtime.ShortMonths[string(v)]
+ if !ok {
+ return fmt.Errorf("unpackDate: invalid month: '%s'", v)
+ }
+
+ // Get year ...
+ r.SkipSpace()
+ if year, c = r.ScanInt64(); c == 0 || c != ' ' {
+ return fmt.Errorf("unpackDate: invalid year")
+ }
+
+ // Obsolete year allow two or three digits.
+ switch {
+ case year < 50:
+ year += 2000
+ case year >= 50 && year < 1000:
+ year += 1900
+ }
+
+ // Get hour ...
+ if hour, c = r.ScanInt64(); c == 0 || c != ':' {
+ return fmt.Errorf("unpackDate: invalid hour")
+ }
+ if hour < 0 || hour > 23 {
+ return fmt.Errorf("unpackDate: invalid hour: %d", hour)
+ }
+
+ // Get minute ...
+ r.SkipN(1)
+ min, c = r.ScanInt64()
+ if min < 0 || min > 59 {
+ return fmt.Errorf("unpackDate: invalid minute: %d", min)
+ }
+
+ // Get second ...
+ if c == ':' {
+ r.SkipN(1)
+ sec, c = r.ScanInt64()
+ if sec < 0 || sec > 59 {
+ return fmt.Errorf("unpackDate: invalid second: %d", sec)
+ }
+ }
+
+ // Get zone offset ...
+ c = r.SkipSpace()
+ if c == 0 {
+ return fmt.Errorf("unpackDate: missing zone")
+ }
+ off, c = r.ScanInt64()
+
+ loc = time.FixedZone("UTC", computeOffSeconds(off))
+ td := time.Date(int(year), month, int(day), int(hour), int(min), int(sec), 0, loc)
+ field.date = &td
+
+ return err
+}
+
+func computeOffSeconds(off int64) int {
+ hour := int(off / 100)
+ min := int(off) - (hour * 100)
+ return ((hour * 60) + min) * 60
+}
diff --git a/lib/email/field_test.go b/lib/email/field_test.go
new file mode 100644
index 00000000..4e9b65ac
--- /dev/null
+++ b/lib/email/field_test.go
@@ -0,0 +1,255 @@
+// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package email
+
+import (
+ "testing"
+ "time"
+
+ libbytes "github.com/shuLhan/share/lib/bytes"
+ "github.com/shuLhan/share/lib/test"
+)
+
+func TestParseField(t *testing.T) {
+ longValue := string(libbytes.Random([]byte(libbytes.ASCIILetters), 994))
+
+ cases := []struct {
+ desc string
+ raw []byte
+ expErr string
+ exp *Field
+ expRest []byte
+ }{{
+ desc: "With empty input",
+ }, {
+ desc: "With long line",
+ raw: []byte("name:" + longValue + "\r\n"),
+ expErr: "ParseField: line greater than 998 characters",
+ }, {
+ desc: "With only whitespaces",
+ raw: []byte(" "),
+ expErr: "ParseField: invalid input",
+ }, {
+ desc: "With only CRLF",
+ raw: []byte("\r\n"),
+ expErr: "ParseField: invalid character at index 0",
+ }, {
+ desc: "Without separator and CRLF",
+ raw: []byte("name"),
+ expErr: "ParseField: invalid input",
+ }, {
+ desc: "Without separator",
+ raw: []byte("name\r\n"),
+ expErr: "ParseField: invalid character at index 4",
+ }, {
+ desc: "With space on name",
+ raw: []byte("na me\r\n"),
+ expErr: "ParseField: invalid character at index 3",
+ }, {
+ desc: "Without value and CRLF",
+ raw: []byte("name:"),
+ expErr: "ParseField: invalid input",
+ }, {
+ desc: "Without value and CRLF",
+ raw: []byte("name: "),
+ expErr: "ParseField: invalid input",
+ }, {
+ desc: "Without value",
+ raw: []byte("name:\r\n"),
+ expErr: "ParseField: invalid input",
+ }, {
+ desc: "Without value",
+ raw: []byte("name: \r\n"),
+ expErr: "ParseField: invalid input",
+ }, {
+ desc: "Without CRLF",
+ raw: []byte("name:value"),
+ expErr: "ParseField: invalid input",
+ }, {
+ desc: "Without CR",
+ raw: []byte("name:value\n"),
+ expErr: "ParseField: invalid character at index 10",
+ }, {
+ desc: "Without LF",
+ raw: []byte("name:value\r"),
+ expErr: "ParseField: invalid input",
+ }, {
+ desc: "With CR inside value",
+ raw: []byte("name:valu\re"),
+ expErr: "ParseField: invalid character at index 10",
+ }, {
+ desc: "With valid input",
+ raw: []byte("NAME : VALUE\r\n"),
+ exp: &Field{
+ Name: []byte("name"),
+ Value: []byte("VALUE\r\n"),
+ oriName: []byte("NAME "),
+ oriValue: []byte(" VALUE\r\n"),
+ },
+ }, {
+ desc: "With single folding",
+ raw: []byte("Name : \r\n \t Value\r\n"),
+ exp: &Field{
+ Name: []byte("name"),
+ Value: []byte("Value\r\n"),
+ oriName: []byte("Name "),
+ oriValue: []byte(" \r\n \t Value\r\n"),
+ },
+ }, {
+ desc: "With multiple folding between value",
+ raw: []byte("namE : This\r\n is\r\n\ta\r\n \tvalue\r\n"),
+ exp: &Field{
+ Name: []byte("name"),
+ Value: []byte("This is a value\r\n"),
+ oriName: []byte("namE "),
+ oriValue: []byte(" This\r\n is\r\n\ta\r\n \tvalue\r\n"),
+ },
+ }, {
+ desc: "With multiple fields",
+ raw: []byte("a : 1\r\nb : 2\r\n"),
+ exp: &Field{
+ Name: []byte("a"),
+ Value: []byte("1\r\n"),
+ oriName: []byte("a "),
+ oriValue: []byte(" 1\r\n"),
+ },
+ expRest: []byte("b : 2\r\n"),
+ }}
+
+ for _, c := range cases {
+ t.Log(c.desc)
+
+ got, rest, err := ParseField(c.raw)
+ if err != nil {
+ test.Assert(t, "error", c.expErr, err.Error(), true)
+ continue
+ }
+ if got == nil {
+ test.Assert(t, "Field", c.exp, got, true)
+ continue
+ }
+
+ test.Assert(t, "Field.oriName", c.exp.oriName, got.oriName, true)
+ test.Assert(t, "Field.oriValue", c.exp.oriValue, got.oriValue, true)
+ test.Assert(t, "Field.Name", c.exp.Name, got.Name, true)
+ test.Assert(t, "Field.Value", c.exp.Value, got.Value, true)
+
+ test.Assert(t, "rest", c.expRest, rest, true)
+ }
+}
+
+func TestUnpackDate(t *testing.T) {
+ cases := []struct {
+ desc string
+ value []byte
+ exp time.Time
+ expErr string
+ }{{
+ desc: "With empty value",
+ expErr: "unpackDate: empty date",
+ }, {
+ desc: "With only spaces",
+ value: []byte(" "),
+ expErr: "unpackDate: empty date",
+ }, {
+ desc: "With invalid date format",
+ value: []byte("Sat"),
+ expErr: "unpackDate: invalid date format",
+ }, {
+ desc: "With invalid date format",
+ value: []byte("Sat,"),
+ expErr: "unpackDate: invalid date format",
+ }, {
+ desc: "With missing month",
+ value: []byte("Sat, 2"),
+ expErr: "unpackDate: missing month",
+ }, {
+ desc: "With missing month",
+ value: []byte("Sat, 2 "),
+ expErr: "unpackDate: missing month",
+ }, {
+ desc: "With invalid month",
+ value: []byte("Sat, 2 X 2019"),
+ expErr: "unpackDate: invalid month: 'X'",
+ }, {
+ desc: "With missing year",
+ value: []byte("Sat, 2 Feb"),
+ expErr: "unpackDate: invalid year",
+ }, {
+ desc: "With invalid year",
+ value: []byte("Sat, 2 Feb 2019"),
+ expErr: "unpackDate: invalid year",
+ }, {
+ desc: "With invalid hour",
+ value: []byte("Sat, 2 Feb 2019 00"),
+ expErr: "unpackDate: invalid hour",
+ }, {
+ desc: "With invalid hour",
+ value: []byte("Sat, 2 Feb 2019 24:55:16 +0000"),
+ expErr: "unpackDate: invalid hour: 24",
+ }, {
+ desc: "With invalid minute",
+ value: []byte("Sat, 2 Feb 2019 00:60:16 +0000"),
+ expErr: "unpackDate: invalid minute: 60",
+ }, {
+ desc: "Without second and missing zone",
+ value: []byte("Sat, 2 Feb 2019 00:55"),
+ expErr: "unpackDate: missing zone",
+ }, {
+ desc: "With invalid second",
+ value: []byte("Sat, 2 Feb 2019 00:55:60 +0000"),
+ expErr: "unpackDate: invalid second: 60",
+ }, {
+ desc: "With missing zone",
+ value: []byte("Sat, 2 Feb 2019 00:55:16"),
+ expErr: "unpackDate: missing zone",
+ }, {
+ desc: "With zone",
+ value: []byte("Sat, 2 Feb 2019 00:55:16 UTC"),
+ exp: time.Date(2019, time.February, 2, 0, 55, 16, 0, time.UTC),
+ }, {
+ desc: "With +0800",
+ value: []byte("Sat, 2 Feb 2019 00:55:16 +0800"),
+ exp: time.Date(2019, time.February, 2, 0, 55, 16, 0, time.FixedZone("UTC", 8*60*60)),
+ }, {
+ desc: "Without week day",
+ value: []byte("2 Feb 2019 00:55:16 UTC"),
+ exp: time.Date(2019, time.February, 2, 0, 55, 16, 0, time.UTC),
+ }, {
+ desc: "Without second",
+ value: []byte("Sat, 2 Feb 2019 00:55 UTC"),
+ exp: time.Date(2019, time.February, 2, 0, 55, 0, 0, time.UTC),
+ }, {
+ desc: "Without week-day and second",
+ value: []byte("2 Feb 2019 00:55 UTC"),
+ exp: time.Date(2019, time.February, 2, 0, 55, 0, 0, time.UTC),
+ }, {
+ desc: "With obsolete year 2 digits",
+ value: []byte("2 Feb 19 00:55 UTC"),
+ exp: time.Date(2019, time.February, 2, 0, 55, 0, 0, time.UTC),
+ }, {
+ desc: "With obsolete year 3 digits",
+ value: []byte("2 Feb 89 00:55 UTC"),
+ exp: time.Date(1989, time.February, 2, 0, 55, 0, 0, time.UTC),
+ }}
+
+ field := &Field{
+ Type: FieldTypeDate,
+ }
+
+ for _, c := range cases {
+ t.Log(c.desc)
+
+ field.SetValue(c.value)
+
+ err := field.Unpack()
+ if err != nil {
+ test.Assert(t, "error", c.expErr, err.Error(), true)
+ continue
+ }
+
+ test.Assert(t, "date", c.exp.String(), field.date.String(), true)
+ }
+}
diff --git a/lib/email/fieldtype.go b/lib/email/fieldtype.go
new file mode 100644
index 00000000..c419bdad
--- /dev/null
+++ b/lib/email/fieldtype.go
@@ -0,0 +1,12 @@
+// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package email
+
+type FieldType int
+
+const (
+ FieldTypeOptional FieldType = 0
+ FieldTypeDate FieldType = 1 << iota
+)
diff --git a/lib/email/header.go b/lib/email/header.go
new file mode 100644
index 00000000..54d92f07
--- /dev/null
+++ b/lib/email/header.go
@@ -0,0 +1,79 @@
+// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package email
+
+import (
+ "fmt"
+ "strings"
+)
+
+//
+// Header represent list of field.
+//
+// We are not using map here it to prevent the header being reordeded when
+// packing the message back into raw format.
+//
+type Header struct {
+ fields []*Field
+}
+
+//
+// Unpack the raw header from top to bottom.
+//
+// The raw header may end with optional CRLF, an empty line that separate
+// header from body of message.
+//
+// On success it will return the rest of raw input (possible message's body)
+// without leading CRLF.
+//
+func (hdr *Header) Unpack(raw []byte) ([]byte, error) {
+ var (
+ field *Field
+ err error
+ )
+
+ for len(raw) > 2 {
+ field, raw, err = ParseField(raw)
+ if err != nil {
+ return raw, err
+ }
+ hdr.fields = append(hdr.fields, field)
+ if len(raw) > 2 {
+ if raw[0] == crlf[0] && raw[1] == crlf[1] {
+ break
+ }
+ }
+ }
+
+ switch len(raw) {
+ case 0:
+ case 1:
+ err = fmt.Errorf("Header.Unpack: invalid end of header: '%s'", raw)
+ case 2:
+ if raw[0] != crlf[0] || raw[1] != crlf[1] {
+ err = fmt.Errorf("Header.Unpack: invalid end of header: '%s'", raw)
+ } else {
+ raw = raw[2:]
+ }
+ default:
+ raw = raw[2:]
+ }
+
+ return raw, err
+}
+
+//
+// String return the text representation of header, by concatenating all
+// sanitized fields with CRLF.
+//
+func (hdr *Header) String() string {
+ var sb strings.Builder
+
+ for _, f := range hdr.fields {
+ sb.WriteString(f.String())
+ }
+
+ return sb.String()
+}
diff --git a/lib/email/header_test.go b/lib/email/header_test.go
new file mode 100644
index 00000000..29cb93a1
--- /dev/null
+++ b/lib/email/header_test.go
@@ -0,0 +1,72 @@
+// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package email
+
+import (
+ "testing"
+
+ "github.com/shuLhan/share/lib/test"
+)
+
+func TestHeaderUnpack(t *testing.T) {
+ cases := []struct {
+ desc string
+ raw []byte
+ expErr string
+ exp string
+ expRest []byte
+ }{{
+ desc: "With empty input",
+ }, {
+ desc: "With whitespaces only",
+ raw: []byte(" \t"),
+ expErr: "Header.Unpack: invalid end of header: ' \t'",
+ }, {
+ desc: "With CRLF only",
+ raw: crlf,
+ expRest: []byte{},
+ }, {
+ desc: "With invalid end",
+ raw: []byte("a: 1\r\nx"),
+ expErr: "Header.Unpack: invalid end of header: 'x'",
+ }, {
+ desc: "With invalid field: missing value",
+ raw: []byte("a:\r\n\t"),
+ expErr: "ParseField: invalid input",
+ }, {
+ desc: "With single field",
+ raw: []byte("a:1\r\n"),
+ exp: "a:1\r\n",
+ }, {
+ desc: "With multiple fields",
+ raw: []byte("a:1\r\nb : 2\r\n"),
+ exp: "a:1\r\nb:2\r\n",
+ }, {
+ desc: "With empty line at the end",
+ raw: []byte("a:1\r\nb : 2\r\n\r\n"),
+ exp: "a:1\r\nb:2\r\n",
+ expRest: []byte{},
+ }, {
+ desc: "With body",
+ raw: []byte("a:1\r\nb : 2\r\n\r\nBody."),
+ exp: "a:1\r\nb:2\r\n",
+ expRest: []byte("Body."),
+ }}
+
+ for _, c := range cases {
+ t.Log(c.desc)
+
+ header := &Header{}
+
+ rest, err := header.Unpack(c.raw)
+ if err != nil {
+ test.Assert(t, "error", c.expErr, err.Error(), true)
+ continue
+ }
+
+ test.Assert(t, "Header.String", c.exp, header.String(), true)
+ test.Assert(t, "rest", c.expRest, rest, true)
+ }
+}
diff --git a/lib/email/mime.go b/lib/email/mime.go
new file mode 100644
index 00000000..8e4102a5
--- /dev/null
+++ b/lib/email/mime.go
@@ -0,0 +1,16 @@
+// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package email
+
+//
+// MIME represent part of message body with id, content type, encoding,
+// description, and content.
+//
+type MIME struct {
+ ID []byte
+ Type []byte
+ Description []byte
+ Content []byte
+}
diff --git a/lib/io/reader.go b/lib/io/reader.go
index 5a8896f3..43b0c504 100644
--- a/lib/io/reader.go
+++ b/lib/io/reader.go
@@ -43,6 +43,14 @@ func (r *Reader) Init(src string) {
}
//
+// InitBytes initialize reader buffer from slice of byte.
+//
+func (r *Reader) InitBytes(src []byte) {
+ r.p = 0
+ r.v = src
+}
+
+//
// ReadUntil read the content of file until one of separator found, or until
// it reach the terminator character, or until EOF.
// The content will be returned along the status of termination.
@@ -73,6 +81,64 @@ func (r *Reader) ReadUntil(seps []byte, terms []byte) (b []byte, isTerm bool, c
}
//
+// ScanInt64 convert textual representation of number into int64 and return
+// it.
+// Any spaces before actual reading of text will be ignored.
+// The number may prefixed with '-' or '+', if its '-', the returned value
+// must be negative.
+//
+// On success, c is non digit character that terminate scan, if its 0, its
+// mean EOF.
+//
+func (r *Reader) ScanInt64() (n int64, c byte) {
+ var min int64 = 1
+ if len(r.v) == r.p {
+ return
+ }
+
+ for ; r.p < len(r.v); r.p++ {
+ c = r.v[r.p]
+ if !libbytes.IsSpace(c) {
+ break
+ }
+ }
+ if c == '-' {
+ min = -1
+ r.p++
+ } else if c == '+' {
+ r.p++
+ }
+ for r.p < len(r.v) {
+ c = r.v[r.p]
+ if !libbytes.IsDigit(c) {
+ break
+ }
+ c = c - '0'
+ n *= 10
+ n += int64(c)
+ r.p++
+ }
+ n *= min
+ if r.p == len(r.v) {
+ return n, 0
+ }
+
+ return n, c
+}
+
+//
+// SkipN skip reading n bytes from buffer and return true if EOF.
+//
+func (r *Reader) SkipN(n int) bool {
+ r.p += n
+ if r.p >= len(r.v) {
+ r.p = len(r.v)
+ return true
+ }
+ return false
+}
+
+//
// SkipSpace read until no white spaces found and return the first byte that
// is not white spaces.
// On EOF, it will return 0.
diff --git a/lib/io/reader_test.go b/lib/io/reader_test.go
new file mode 100644
index 00000000..bae919d0
--- /dev/null
+++ b/lib/io/reader_test.go
@@ -0,0 +1,69 @@
+// Copyright 2019, Shulhan <ms@kilabit.info>. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package io
+
+import (
+ "testing"
+
+ "github.com/shuLhan/share/lib/test"
+)
+
+func TestReaderScanInt64(t *testing.T) {
+ cases := []struct {
+ desc string
+ src []byte
+ exp int64
+ expc byte
+ }{{
+ desc: "With empty input",
+ }, {
+ desc: "With non digit",
+ src: []byte("a -1"),
+ expc: 'a',
+ }, {
+ desc: "With leading spaces",
+ src: []byte(" +1"),
+ exp: 1,
+ }, {
+ desc: "With -1",
+ src: []byte("-1"),
+ exp: -1,
+ }, {
+ desc: "With -1",
+ src: []byte("-1x"),
+ exp: -1,
+ expc: 'x',
+ }, {
+ desc: "With +1",
+ src: []byte("+1"),
+ exp: 1,
+ }, {
+ desc: "With 1000",
+ src: []byte("1000"),
+ exp: 1000,
+ }, {
+ desc: "With 9876543210 1",
+ src: []byte("9876543210 1"),
+ exp: 9876543210,
+ expc: ' ',
+ }, {
+ desc: "With leading zero 001",
+ src: []byte("-001"),
+ exp: -1,
+ }}
+
+ r := &Reader{}
+
+ for _, c := range cases {
+ t.Log(c.desc)
+
+ r.InitBytes(c.src)
+
+ got, gotc := r.ScanInt64()
+
+ test.Assert(t, "n", c.exp, got, true)
+ test.Assert(t, "c", c.expc, gotc, true)
+ }
+}
diff --git a/lib/time/time.go b/lib/time/time.go
index 63d359b8..4a88f828 100644
--- a/lib/time/time.go
+++ b/lib/time/time.go
@@ -4,3 +4,32 @@
// Package time provide a library for working with time.
package time
+
+import (
+ "time"
+)
+
+var (
+ ShortDayNames = []string{
+ "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun",
+ }
+
+ //
+ // ShortMonths provide mapping between text of month, in English,
+ // short format to their time.Month value
+ //
+ ShortMonths = map[string]time.Month{
+ "Jan": time.January,
+ "Feb": time.February,
+ "Mar": time.March,
+ "Apr": time.April,
+ "May": time.May,
+ "Jun": time.June,
+ "Jul": time.July,
+ "Aug": time.August,
+ "Sep": time.September,
+ "Oct": time.October,
+ "Nov": time.November,
+ "Dec": time.December,
+ }
+)