add strict mode when parsing messages, typically enabled for incoming special-use messages like tls/dmarc reports, subjectpass emails

and pass a logger to the message parser, so problems with message parsing get
the cid logged.
This commit is contained in:
Mechiel Lukkien
2023-08-15 08:25:56 +02:00
parent f5f953b3ab
commit 34c2dcd49d
24 changed files with 312 additions and 153 deletions

View File

@ -11,9 +11,12 @@ import (
"strings"
"testing"
"github.com/mjl-/mox/mlog"
"github.com/mjl-/mox/moxvar"
)
var xlog = mlog.New("message")
func tcheck(t *testing.T, err error, msg string) {
t.Helper()
if err != nil {
@ -37,7 +40,7 @@ func tfail(t *testing.T, err, expErr error) {
func TestEmptyHeader(t *testing.T) {
s := "\r\nx"
p, err := EnsurePart(strings.NewReader(s), int64(len(s)))
p, err := EnsurePart(xlog, true, strings.NewReader(s), int64(len(s)))
tcheck(t, err, "parse empty headers")
buf, err := io.ReadAll(p.Reader())
tcheck(t, err, "read")
@ -48,15 +51,85 @@ func TestEmptyHeader(t *testing.T) {
}
func TestBadContentType(t *testing.T) {
expBody := "test"
// Pedantic is like strict.
moxvar.Pedantic = true
s := "content-type: text/html;;\r\n\r\ntest"
p, err := EnsurePart(strings.NewReader(s), int64(len(s)))
p, err := EnsurePart(xlog, false, strings.NewReader(s), int64(len(s)))
tfail(t, err, ErrBadContentType)
buf, err := io.ReadAll(p.Reader())
tcheck(t, err, "read")
expBody := "test"
tcompare(t, string(buf), expBody)
tcompare(t, p.MediaType, "APPLICATION")
tcompare(t, p.MediaSubType, "OCTET-STREAM")
moxvar.Pedantic = false
// Strict
s = "content-type: text/html;;\r\n\r\ntest"
p, err = EnsurePart(xlog, true, strings.NewReader(s), int64(len(s)))
tfail(t, err, ErrBadContentType)
buf, err = io.ReadAll(p.Reader())
tcheck(t, err, "read")
tcompare(t, string(buf), expBody)
tcompare(t, p.MediaType, "APPLICATION")
tcompare(t, p.MediaSubType, "OCTET-STREAM")
// Non-strict but unrecoverable content-type.
s = "content-type: not a content type;;\r\n\r\ntest"
p, err = EnsurePart(xlog, false, strings.NewReader(s), int64(len(s)))
tcheck(t, err, "parsing message with bad but recoverable content-type")
buf, err = io.ReadAll(p.Reader())
tcheck(t, err, "read")
tcompare(t, string(buf), expBody)
tcompare(t, p.MediaType, "APPLICATION")
tcompare(t, p.MediaSubType, "OCTET-STREAM")
// We try to use only the content-type, typically better than application/octet-stream.
s = "content-type: text/html;;\r\n\r\ntest"
p, err = EnsurePart(xlog, false, strings.NewReader(s), int64(len(s)))
tcheck(t, err, "parsing message with bad but recoverable content-type")
buf, err = io.ReadAll(p.Reader())
tcheck(t, err, "read")
tcompare(t, string(buf), expBody)
tcompare(t, p.MediaType, "TEXT")
tcompare(t, p.MediaSubType, "HTML")
// Not recovering multipart, we won't have a boundary.
s = "content-type: multipart/mixed;;\r\n\r\ntest"
p, err = EnsurePart(xlog, false, strings.NewReader(s), int64(len(s)))
tcheck(t, err, "parsing message with bad but recoverable content-type")
buf, err = io.ReadAll(p.Reader())
tcheck(t, err, "read")
tcompare(t, string(buf), expBody)
tcompare(t, p.MediaType, "APPLICATION")
tcompare(t, p.MediaSubType, "OCTET-STREAM")
}
func TestBareCR(t *testing.T) {
s := "content-type: text/html\r\n\r\nbare\rcr\r\n"
expBody := "bare\rcr\r\n"
// Pedantic is like strict.
moxvar.Pedantic = true
p, err := EnsurePart(xlog, false, strings.NewReader(s), int64(len(s)))
tfail(t, err, errBareCR)
_, err = io.ReadAll(p.Reader())
tfail(t, err, errBareCR)
moxvar.Pedantic = false
// Strict.
p, err = EnsurePart(xlog, true, strings.NewReader(s), int64(len(s)))
tfail(t, err, errBareCR)
_, err = io.ReadAll(p.Reader())
tcheck(t, err, "read fallback part without error")
// Non-strict allows bare cr.
p, err = EnsurePart(xlog, false, strings.NewReader(s), int64(len(s)))
tcheck(t, err, "parse")
buf, err := io.ReadAll(p.Reader())
tcheck(t, err, "read")
tcompare(t, string(buf), expBody)
}
var basicMsg = strings.ReplaceAll(`From: <mjl@mox.example>
@ -68,7 +141,7 @@ aGkK
func TestBasic(t *testing.T) {
r := strings.NewReader(basicMsg)
p, err := Parse(r)
p, err := Parse(xlog, true, r)
tcheck(t, err, "new reader")
buf, err := io.ReadAll(p.RawReader())
@ -103,7 +176,7 @@ Hello Joe, do you think we can meet at 3:30 tomorrow?
func TestBasic2(t *testing.T) {
r := strings.NewReader(basicMsg2)
p, err := Parse(r)
p, err := Parse(xlog, true, r)
tcheck(t, err, "new reader")
buf, err := io.ReadAll(p.RawReader())
@ -123,9 +196,9 @@ func TestBasic2(t *testing.T) {
}
r = strings.NewReader(basicMsg2)
p, err = Parse(r)
p, err = Parse(xlog, true, r)
tcheck(t, err, "new reader")
err = p.Walk(nil)
err = p.Walk(xlog, nil)
tcheck(t, err, "walk")
if p.RawLineCount != 2 {
t.Fatalf("basic message, got %d lines, expected 2", p.RawLineCount)
@ -164,25 +237,25 @@ This is the epilogue. It is also to be ignored.
func TestMime(t *testing.T) {
// from ../rfc/2046:1148
r := strings.NewReader(mimeMsg)
p, err := Parse(r)
p, err := Parse(xlog, true, r)
tcheck(t, err, "new reader")
if len(p.bound) == 0 {
t.Fatalf("got no bound, expected bound for mime message")
}
pp, err := p.ParseNextPart()
pp, err := p.ParseNextPart(xlog)
tcheck(t, err, "next part")
buf, err := io.ReadAll(pp.Reader())
tcheck(t, err, "read all")
tcompare(t, string(buf), "This is implicitly typed plain US-ASCII text.\r\nIt does NOT end with a linebreak.")
pp, err = p.ParseNextPart()
pp, err = p.ParseNextPart(xlog)
tcheck(t, err, "next part")
buf, err = io.ReadAll(pp.Reader())
tcheck(t, err, "read all")
tcompare(t, string(buf), "This is explicitly typed plain US-ASCII text.\r\nIt DOES end with a linebreak.\r\n")
_, err = p.ParseNextPart()
_, err = p.ParseNextPart(xlog)
tcompare(t, err, io.EOF)
if len(p.Parts) != 2 {
@ -201,33 +274,38 @@ func TestLongLine(t *testing.T) {
for i := range line {
line[i] = 'a'
}
_, err := Parse(bytes.NewReader(line))
_, err := Parse(xlog, true, bytes.NewReader(line))
tfail(t, err, errLineTooLong)
}
func TestHalfCrLf(t *testing.T) {
parse := func(s string) error {
p, err := Parse(strings.NewReader(s))
func TestBareCrLf(t *testing.T) {
parse := func(strict bool, s string) error {
p, err := Parse(xlog, strict, strings.NewReader(s))
if err != nil {
return err
}
return p.Walk(nil)
return p.Walk(xlog, nil)
}
err := parse("subject: test\ntest\r\n")
err := parse(false, "subject: test\ntest\r\n")
tfail(t, err, errBareLF)
err = parse("\r\ntest\ntest\r\n")
err = parse(false, "\r\ntest\ntest\r\n")
tfail(t, err, errBareLF)
moxvar.Pedantic = true
err = parse("subject: test\rtest\r\n")
err = parse(false, "subject: test\rtest\r\n")
tfail(t, err, errBareCR)
err = parse("\r\ntest\rtest\r\n")
err = parse(false, "\r\ntest\rtest\r\n")
tfail(t, err, errBareCR)
moxvar.Pedantic = false
err = parse("subject: test\rtest\r\n")
err = parse(true, "subject: test\rtest\r\n")
tfail(t, err, errBareCR)
err = parse(true, "\r\ntest\rtest\r\n")
tfail(t, err, errBareCR)
err = parse(false, "subject: test\rtest\r\n")
tcheck(t, err, "header with bare cr")
err = parse("\r\ntest\rtest\r\n")
err = parse(false, "\r\ntest\rtest\r\n")
tcheck(t, err, "body with bare cr")
}
@ -238,25 +316,25 @@ func TestMissingClosingBoundary(t *testing.T) {
test
`, "\n", "\r\n")
msg, err := Parse(strings.NewReader(message))
msg, err := Parse(xlog, false, strings.NewReader(message))
tcheck(t, err, "new reader")
err = walkmsg(&msg)
tfail(t, err, errMissingClosingBoundary)
msg, _ = Parse(strings.NewReader(message))
err = msg.Walk(nil)
msg, _ = Parse(xlog, false, strings.NewReader(message))
err = msg.Walk(xlog, nil)
tfail(t, err, errMissingClosingBoundary)
}
func TestHeaderEOF(t *testing.T) {
message := "header: test"
_, err := Parse(strings.NewReader(message))
_, err := Parse(xlog, false, strings.NewReader(message))
tfail(t, err, errUnexpectedEOF)
}
func TestBodyEOF(t *testing.T) {
message := "header: test\r\n\r\ntest"
msg, err := Parse(strings.NewReader(message))
msg, err := Parse(xlog, true, strings.NewReader(message))
tcheck(t, err, "new reader")
buf, err := io.ReadAll(msg.Reader())
tcheck(t, err, "read body")
@ -287,7 +365,7 @@ test
`, "\n", "\r\n")
msg, err := Parse(strings.NewReader(message))
msg, err := Parse(xlog, false, strings.NewReader(message))
tcheck(t, err, "new reader")
enforceSequential = true
defer func() {
@ -296,8 +374,8 @@ test
err = walkmsg(&msg)
tcheck(t, err, "walkmsg")
msg, _ = Parse(strings.NewReader(message))
err = msg.Walk(nil)
msg, _ = Parse(xlog, false, strings.NewReader(message))
err = msg.Walk(xlog, nil)
tcheck(t, err, "msg.Walk")
}
@ -374,7 +452,7 @@ Content-Transfer-Encoding: Quoted-printable
--unique-boundary-1--
`, "\n", "\r\n")
msg, err := Parse(strings.NewReader(nestedMessage))
msg, err := Parse(xlog, true, strings.NewReader(nestedMessage))
tcheck(t, err, "new reader")
enforceSequential = true
defer func() {
@ -399,8 +477,8 @@ Content-Transfer-Encoding: Quoted-printable
t.Fatalf("got %q, expected %q", buf, exp)
}
msg, _ = Parse(strings.NewReader(nestedMessage))
err = msg.Walk(nil)
msg, _ = Parse(xlog, false, strings.NewReader(nestedMessage))
err = msg.Walk(xlog, nil)
tcheck(t, err, "msg.Walk")
}
@ -440,7 +518,7 @@ func walk(path string) error {
return err
}
defer r.Close()
msg, err := Parse(r)
msg, err := Parse(xlog, false, r)
if err != nil {
return err
}
@ -460,7 +538,7 @@ func walkmsg(msg *Part) error {
}
if msg.MediaType == "MESSAGE" && (msg.MediaSubType == "RFC822" || msg.MediaSubType == "GLOBAL") {
mp, err := Parse(bytes.NewReader(buf))
mp, err := Parse(xlog, false, bytes.NewReader(buf))
if err != nil {
return err
}
@ -488,7 +566,7 @@ func walkmsg(msg *Part) error {
}
for {
pp, err := msg.ParseNextPart()
pp, err := msg.ParseNextPart(xlog)
if err == io.EOF {
return nil
}
@ -507,7 +585,7 @@ func TestEmbedded(t *testing.T) {
tcheck(t, err, "open")
fi, err := f.Stat()
tcheck(t, err, "stat")
_, err = EnsurePart(f, fi.Size())
_, err = EnsurePart(xlog, false, f, fi.Size())
tcheck(t, err, "parse")
}
@ -516,6 +594,6 @@ func TestEmbedded2(t *testing.T) {
tcheck(t, err, "readfile")
buf = bytes.ReplaceAll(buf, []byte("\n"), []byte("\r\n"))
_, err = EnsurePart(bytes.NewReader(buf), int64(len(buf)))
_, err = EnsurePart(xlog, false, bytes.NewReader(buf), int64(len(buf)))
tfail(t, err, nil)
}