mox!

2025-07-12 17:44:35 +03:00 · 2023-01-30 14:27:06 +01:00
commit cb229cb6cf
1256 changed files with 491723 additions and 0 deletions
--- a/message/doc.go
+++ b/message/doc.go
@ -0,0 +1,3 @@
+// Package message provides functions for reading and writing email messages,
+// ensuring they are correctly formatted.
+package message
--- a/message/from.go
+++ b/message/from.go
@ -0,0 +1,43 @@
+package message
+
+import (
+	"fmt"
+	"io"
+	"net/textproto"
+
+	"github.com/mjl-/mox/dns"
+	"github.com/mjl-/mox/smtp"
+)
+
+// From extracts the address in the From-header.
+//
+// An RFC5322 message must have a From header.
+// In theory, multiple addresses may be present. In practice zero or multiple
+// From headers may be present. From returns an error if there is not exactly
+// one address. This address can be used for evaluating a DMARC policy against
+// SPF and DKIM results.
+func From(r io.ReaderAt) (raddr smtp.Address, header textproto.MIMEHeader, rerr error) {
+	// ../rfc/7489:1243
+
+	// todo: only allow utf8 if enabled in session/message?
+
+	p, err := Parse(r)
+	if err != nil {
+		// todo: should we continue with p, perhaps headers can be parsed?
+		return raddr, nil, fmt.Errorf("parsing message: %v", err)
+	}
+	header, err = p.Header()
+	if err != nil {
+		return raddr, nil, fmt.Errorf("parsing message header: %v", err)
+	}
+	from := p.Envelope.From
+	if len(from) != 1 {
+		return raddr, nil, fmt.Errorf("from header has %d addresses, need exactly 1 address", len(from))
+	}
+	d, err := dns.ParseDomain(from[0].Host)
+	if err != nil {
+		return raddr, nil, fmt.Errorf("bad domain in from address: %v", err)
+	}
+	addr := smtp.Address{Localpart: smtp.Localpart(from[0].User), Domain: d}
+	return addr, textproto.MIMEHeader(header), nil
+}
--- a/message/headerwriter.go
+++ b/message/headerwriter.go
@ -0,0 +1,65 @@
+package message
+
+import (
+	"fmt"
+	"strings"
+)
+
+// HeaderWriter helps create headers, folding to the next line when it would
+// become too large. Useful for creating Received and DKIM-Signature headers.
+type HeaderWriter struct {
+	b        *strings.Builder
+	lineLen  int
+	nonfirst bool
+}
+
+// Addf formats the string and calls Add.
+func (w *HeaderWriter) Addf(separator string, format string, args ...any) {
+	w.Add(separator, fmt.Sprintf(format, args...))
+}
+
+// Add adds texts, each separated by separator. Individual elements in text are
+// not wrapped.
+func (w *HeaderWriter) Add(separator string, texts ...string) {
+	if w.b == nil {
+		w.b = &strings.Builder{}
+	}
+	for _, text := range texts {
+		n := len(text)
+		if w.nonfirst && w.lineLen > 1 && w.lineLen+len(separator)+n > 78 {
+			w.b.WriteString("\r\n\t")
+			w.lineLen = 1
+		} else if w.nonfirst && separator != "" {
+			w.b.WriteString(separator)
+			w.lineLen += len(separator)
+		}
+		w.b.WriteString(text)
+		w.lineLen += len(text)
+		w.nonfirst = true
+	}
+}
+
+// AddWrap adds data, folding anywhere in the buffer. E.g. for base64 data.
+func (w *HeaderWriter) AddWrap(buf []byte) {
+	for len(buf) > 0 {
+		line := buf
+		n := 78 - w.lineLen
+		if len(buf) > n {
+			line, buf = buf[:n], buf[n:]
+		} else {
+			buf = nil
+			n = len(buf)
+		}
+		w.b.Write(line)
+		w.lineLen += n
+		if len(buf) > 0 {
+			w.b.WriteString("\r\n\t")
+			w.lineLen = 1
+		}
+	}
+}
+
+// String returns the header in string form, ending with \r\n.
+func (w *HeaderWriter) String() string {
+	return w.b.String() + "\r\n"
+}
--- a/message/part.go
+++ b/message/part.go
@ -0,0 +1,777 @@
+package message
+
+// todo: we should be more forgiving when parsing, at least as an option for imported messages, possibly incoming as well, but not for submitted/outgoing messages.
+// todo: allow more invalid content-type values, we now stop parsing on: empty media type (eg "content-type: ; name=..."), empty value for property (eg "charset=", missing quotes for characters that should be quoted (eg boundary containing "=" but without quotes), duplicate properties (two charsets), empty pairs (eg "text/html;;").
+// todo: what should our max line length be? rfc says 1000. messages exceed that. we should enforce 1000 for outgoing messages.
+// todo: should we be forgiving when closing boundary in multipart message is missing? seems like spam messages do this...
+// todo: allow bare \r (without \n)? this does happen in messages.
+// todo: should we allow base64 messages where a line starts with a space? and possibly more whitespace. is happening in messages. coreutils base64 accepts it, encoding/base64 does not.
+// todo: handle comments in headers?
+// todo: should we just always store messages with \n instead of \r\n? \r\n seems easier for use with imap.
+// todo: is a header always \r\n\r\n-separated? or is \r\n enough at the beginning of a file? because what would this mean: "\r\ndata"? data isn't a header.
+// todo: can use a cleanup
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/base64"
+	"errors"
+	"fmt"
+	"io"
+	"mime"
+	"mime/quotedprintable"
+	"net/mail"
+	"net/textproto"
+	"strings"
+	"time"
+
+	"github.com/mjl-/mox/mlog"
+	"github.com/mjl-/mox/smtp"
+)
+
+var xlog = mlog.New("message")
+
+var (
+	ErrBadContentType = errors.New("bad content-type")
+)
+
+var (
+	errNotMultipart           = errors.New("not a multipart message")
+	errFirstBoundCloses       = errors.New("first boundary cannot be finishing boundary")
+	errLineTooLong            = errors.New("line too long")
+	errMissingBoundaryParam   = errors.New("missing/empty boundary content-type parameter")
+	errMissingClosingBoundary = errors.New("eof without closing boundary")
+	errHalfLineSep            = errors.New("invalid CR or LF without the other")
+	errUnexpectedEOF          = errors.New("unexpected eof")
+)
+
+// If set, during tests, attempts to reparse a part will cause an error, because sequentially reading parts should not lead to reparsing.
+var enforceSequential bool
+
+// Part represents a whole mail message, or a part of a multipart message. It
+// is designed to handle IMAP requirements efficiently.
+type Part struct {
+	BoundaryOffset int64 // Offset in message where bound starts. -1 for top-level message.
+	HeaderOffset   int64 // Offset in message file where header starts.
+	BodyOffset     int64 // Offset in message file where body starts.
+	EndOffset      int64 // Where body of part ends. Set when part is fully read.
+	RawLineCount   int64 // Number of lines in raw, undecoded, body of part. Set when part is fully read.
+	DecodedSize    int64 // Number of octets when decoded. If this is a text mediatype, lines ending only in LF are changed end in CRLF and DecodedSize reflects that.
+
+	MediaType               string            // From Content-Type, upper case. E.g. "TEXT". Can be empty because content-type may be absent. In this case, the part may be treated as TEXT/PLAIN.
+	MediaSubType            string            // From Content-Type, upper case. E.g. "PLAIN".
+	ContentTypeParams       map[string]string // E.g. holds "boundary" for multipart messages. Has lower-case keys, and original case values.
+	ContentID               string
+	ContentDescription      string
+	ContentTransferEncoding string    // In upper case.
+	Envelope                *Envelope // Email message headers. Not for non-message parts.
+
+	Parts []Part // Parts if this is a multipart.
+
+	// Only for message/rfc822 and message/global. This part may have a buffer as
+	// backing io.ReaderAt, because a message/global can have a non-identity
+	// content-transfer-encoding. This part has a nil parent.
+	Message *Part
+
+	r               io.ReaderAt
+	header          textproto.MIMEHeader // Parsed header.
+	nextBoundOffset int64                // If >= 0, the offset where the next part header starts. We can set this when a user fully reads each part.
+	lastBoundOffset int64                // Start of header of last/previous part. Used to skip a part if ParseNextPart is called and nextBoundOffset is -1.
+	parent          *Part                // Parent part, for getting bound from, and setting nextBoundOffset when a part has finished reading. Only for subparts, not top-level parts.
+	bound           []byte               // Only set if valid multipart with boundary, includes leading --, excludes \r\n.
+}
+
+// Envelope holds the basic/common message headers as used in IMAP4.
+type Envelope struct {
+	Date      time.Time
+	Subject   string
+	From      []Address
+	Sender    []Address
+	ReplyTo   []Address
+	To        []Address
+	CC        []Address
+	BCC       []Address
+	InReplyTo string
+	MessageID string
+}
+
+// Address as used in From and To headers.
+type Address struct {
+	Name string // Free-form name for display in mail applications.
+	User string // Localpart.
+	Host string // Domain in ASCII.
+}
+
+// Parse reads the headers of the mail message and returns a part.
+// A part provides access to decoded and raw contents of a message and its multiple parts.
+func Parse(r io.ReaderAt) (Part, error) {
+	return newPart(r, 0, nil)
+}
+
+// EnsurePart parses a part as with Parse, but ensures a usable part is always
+// returned, even if error is non-nil. If a parse error occurs, the message is
+// returned as application/octet-stream, and headers can still be read if they
+// were valid.
+func EnsurePart(r io.ReaderAt, size int64) (Part, error) {
+	p, err := Parse(r)
+	if err == nil {
+		err = p.Walk()
+	}
+	if err != nil {
+		np := Part{
+			HeaderOffset:            p.HeaderOffset,
+			BodyOffset:              p.BodyOffset,
+			EndOffset:               size,
+			MediaType:               "APPLICATION",
+			MediaSubType:            "OCTET-STREAM",
+			ContentTypeParams:       p.ContentTypeParams,
+			ContentID:               p.ContentID,
+			ContentDescription:      p.ContentDescription,
+			ContentTransferEncoding: p.ContentTransferEncoding,
+			Envelope:                p.Envelope,
+			// We don't keep:
+			//   - BoundaryOffset: irrelevant for top-level message.
+			//   - RawLineCount and DecodedSize: set below.
+			//   - Parts: we are not treating this as a multipart message.
+		}
+		p = np
+		p.SetReaderAt(r)
+		// By reading body, the number of lines and decoded size will be set.
+		_, err2 := io.Copy(io.Discard, p.Reader())
+		if err2 != nil {
+			err = err2
+		}
+	}
+	return p, err
+}
+
+// SetReaderAt sets r as reader for this part and all its sub parts, recursively.
+// No reader is set for any Message subpart, see SetMessageReaderAt.
+func (p *Part) SetReaderAt(r io.ReaderAt) {
+	if r == nil {
+		panic("nil reader")
+	}
+	p.r = r
+	for i := range p.Parts {
+		pp := &p.Parts[i]
+		pp.SetReaderAt(r)
+	}
+}
+
+// SetMessageReaderAt sets a reader on p.Message, which must be non-nil.
+func (p *Part) SetMessageReaderAt() error {
+	// todo: if p.Message does not contain any non-identity content-transfer-encoding, we should set an offsetReader of p.Message, recursively.
+	buf, err := io.ReadAll(p.Reader())
+	if err != nil {
+		return err
+	}
+	p.Message.SetReaderAt(bytes.NewReader(buf))
+	return nil
+}
+
+// Walk through message, decoding along the way, and collecting mime part offsets and sizes, and line counts.
+func (p *Part) Walk() error {
+	if len(p.bound) == 0 {
+		if p.MediaType == "MESSAGE" && (p.MediaSubType == "RFC822" || p.MediaSubType == "GLOBAL") {
+			// todo: don't read whole submessage in memory...
+			buf, err := io.ReadAll(p.Reader())
+			if err != nil {
+				return err
+			}
+			mp, err := Parse(bytes.NewReader(buf))
+			if err != nil {
+				return fmt.Errorf("parsing embedded message: %w", err)
+			}
+			// todo: if this is a DSN, we should have a lax parser that doesn't fail on unexpected end of file. this is quite common because MTA's can just truncate the original message.
+			if err := mp.Walk(); err != nil {
+				return fmt.Errorf("parsing parts of embedded message: %w", err)
+			}
+			// todo: if mp does not contain any non-identity content-transfer-encoding, we should set an offsetReader of p.r on mp, recursively.
+			p.Message = &mp
+			return nil
+		}
+		_, err := io.Copy(io.Discard, p.Reader())
+		return err
+	}
+
+	for {
+		pp, err := p.ParseNextPart()
+		if err == io.EOF {
+			return nil
+		}
+		if err != nil {
+			return err
+		}
+		if err := pp.Walk(); err != nil {
+			return err
+		}
+	}
+}
+
+// String returns a debugging representation of the part.
+func (p *Part) String() string {
+	return fmt.Sprintf("&Part{%s/%s offsets %d/%d/%d/%d lines %d decodedsize %d next %d last %d bound %q parts %v}", p.MediaType, p.MediaSubType, p.BoundaryOffset, p.HeaderOffset, p.BodyOffset, p.EndOffset, p.RawLineCount, p.DecodedSize, p.nextBoundOffset, p.lastBoundOffset, p.bound, p.Parts)
+}
+
+// newPart parses a new part, which can be the top-level message.
+// offset is the bound offset for parts, and the start of message for top-level messages. parent indicates if this is a top-level message or sub-part.
+// If an error occurs, p's exported values can still be relevant. EnsurePart uses these values.
+func newPart(r io.ReaderAt, offset int64, parent *Part) (p Part, rerr error) {
+	if r == nil {
+		panic("nil reader")
+	}
+	p = Part{
+		BoundaryOffset: -1,
+		EndOffset:      -1,
+		r:              r,
+		parent:         parent,
+	}
+
+	b := &bufAt{r: r, offset: offset}
+
+	if parent != nil {
+		p.BoundaryOffset = offset
+		if line, _, err := b.ReadLine(true); err != nil {
+			return p, err
+		} else if match, finish := checkBound(line, parent.bound); !match {
+			return p, fmt.Errorf("missing bound")
+		} else if finish {
+			return p, fmt.Errorf("new part for closing boundary")
+		}
+	}
+
+	// Collect header.
+	p.HeaderOffset = b.offset
+	p.BodyOffset = b.offset
+	hb := &bytes.Buffer{}
+	for {
+		line, _, err := b.ReadLine(true)
+		if err != nil {
+			return p, err
+		}
+		hb.Write(line)
+		if len(line) == 2 {
+			break // crlf
+		}
+	}
+	p.BodyOffset = b.offset
+
+	h, err := parseHeader(hb)
+	if err != nil {
+		return p, fmt.Errorf("parsing header: %w", err)
+	}
+	p.header = h
+
+	ct := h.Get("Content-Type")
+	mt, params, err := mime.ParseMediaType(ct)
+	if err != nil && ct != "" {
+		return p, fmt.Errorf("%w: %s: %q", ErrBadContentType, err, ct)
+	}
+	if mt != "" {
+		t := strings.SplitN(strings.ToUpper(mt), "/", 2)
+		if len(t) != 2 {
+			return p, fmt.Errorf("bad content-type: %q (content-type %q)", mt, ct)
+		}
+		p.MediaType = t[0]
+		p.MediaSubType = t[1]
+		p.ContentTypeParams = params
+	}
+
+	p.ContentID = h.Get("Content-Id")
+	p.ContentDescription = h.Get("Content-Description")
+	p.ContentTransferEncoding = strings.ToUpper(h.Get("Content-Transfer-Encoding"))
+
+	if parent == nil {
+		p.Envelope, err = parseEnvelope(mail.Header(h))
+		if err != nil {
+			return p, err
+		}
+	}
+
+	if p.MediaType == "MULTIPART" {
+		s := params["boundary"]
+		if s == "" {
+			return p, errMissingBoundaryParam
+		}
+		p.bound = append([]byte("--"), s...)
+
+		// Discard preamble, before first boundary.
+		for {
+			line, _, err := b.PeekLine(true)
+			if err != nil {
+				return p, fmt.Errorf("parsing line for part preamble: %w", err)
+			}
+			// Line only needs boundary prefix, not exact match. ../rfc/2046:1103
+			// Well, for compatibility, we require whitespace after the boundary. Because some
+			// software use the same boundary but with text appended for sub parts.
+			if match, finish := checkBound(line, p.bound); match {
+				if finish {
+					return p, errFirstBoundCloses
+				}
+				break
+			}
+			b.ReadLine(true)
+		}
+		p.nextBoundOffset = b.offset
+		p.lastBoundOffset = b.offset
+	}
+
+	return p, nil
+}
+
+// Header returns the parsed header of this part.
+func (p *Part) Header() (textproto.MIMEHeader, error) {
+	if p.header != nil {
+		return p.header, nil
+	}
+	h, err := parseHeader(p.HeaderReader())
+	p.header = h
+	return h, err
+}
+
+// HeaderReader returns a reader for the header section of this part, including ending bare CRLF.
+func (p *Part) HeaderReader() io.Reader {
+	return io.NewSectionReader(p.r, p.HeaderOffset, p.BodyOffset-p.HeaderOffset)
+}
+
+func parseHeader(r io.Reader) (textproto.MIMEHeader, error) {
+	return textproto.NewReader(bufio.NewReader(r)).ReadMIMEHeader()
+}
+
+func parseEnvelope(h mail.Header) (*Envelope, error) {
+	date, _ := h.Date()
+	env := &Envelope{
+		date,
+		h.Get("Subject"),
+		parseAddressList(h, "from"),
+		parseAddressList(h, "sender"),
+		parseAddressList(h, "reply-to"),
+		parseAddressList(h, "to"),
+		parseAddressList(h, "cc"),
+		parseAddressList(h, "bcc"),
+		h.Get("In-Reply-To"),
+		h.Get("Message-Id"),
+	}
+	return env, nil
+}
+
+func parseAddressList(h mail.Header, k string) []Address {
+	l, err := h.AddressList(k)
+	if err != nil {
+		return nil
+	}
+	var r []Address
+	for _, a := range l {
+		// todo: parse more fully according to ../rfc/5322:959
+		var user, host string
+		addr, err := smtp.ParseAddress(a.Address)
+		if err != nil {
+			// todo: pass a ctx to this function so we can log with cid.
+			xlog.Infox("parsing address", err, mlog.Field("address", a.Address))
+		} else {
+			user = addr.Localpart.String()
+			host = addr.Domain.ASCII
+		}
+		r = append(r, Address{a.Name, user, host})
+	}
+	return r
+}
+
+// ParseNextPart parses the next (sub)part of this multipart message.
+// ParseNextPart returns io.EOF and a nil part when there are no more parts.
+// Only use for initial parsing of message. Once parsed, use p.Parts.
+func (p *Part) ParseNextPart() (*Part, error) {
+	if len(p.bound) == 0 {
+		return nil, errNotMultipart
+	}
+	if p.nextBoundOffset == -1 {
+		if enforceSequential {
+			panic("access not sequential")
+		}
+		// Set nextBoundOffset by fully reading the last part.
+		last, err := newPart(p.r, p.lastBoundOffset, p)
+		if err != nil {
+			return nil, err
+		}
+		if _, err := io.Copy(io.Discard, last.RawReader()); err != nil {
+			return nil, err
+		}
+		if p.nextBoundOffset == -1 {
+			return nil, fmt.Errorf("internal error: reading part did not set nextBoundOffset")
+		}
+	}
+	b := &bufAt{r: p.r, offset: p.nextBoundOffset}
+	// todo: should we require a crlf on final closing bound? we don't require it because some message/rfc822 don't have a crlf after their closing boundary, so those messages don't end in crlf.
+	line, crlf, err := b.ReadLine(false)
+	if err != nil {
+		return nil, err
+	}
+	if match, finish := checkBound(line, p.bound); !match {
+		return nil, fmt.Errorf("expected bound, got %q", line)
+	} else if finish {
+		// Read any trailing data.
+		if p.parent != nil {
+			for {
+				line, _, err := b.PeekLine(false)
+				if err != nil {
+					break
+				}
+				if match, _ := checkBound(line, p.parent.bound); match {
+					break
+				}
+				b.ReadLine(false)
+			}
+			if p.parent.lastBoundOffset == p.BoundaryOffset {
+				p.parent.nextBoundOffset = b.offset
+			}
+		}
+		p.EndOffset = b.offset
+		return nil, io.EOF
+	} else if !crlf {
+		return nil, fmt.Errorf("non-finishing bound without crlf: %w", errUnexpectedEOF)
+	}
+	boundOffset := p.nextBoundOffset
+	p.lastBoundOffset = boundOffset
+	p.nextBoundOffset = -1
+	np, err := newPart(p.r, boundOffset, p)
+	if err != nil {
+		return nil, err
+	}
+	p.Parts = append(p.Parts, np)
+	return &p.Parts[len(p.Parts)-1], nil
+}
+
+// Reader returns a reader for the decoded body content.
+func (p *Part) Reader() io.Reader {
+	return p.bodyReader(p.RawReader())
+}
+
+func (p *Part) bodyReader(r io.Reader) io.Reader {
+	r = newDecoder(p.ContentTransferEncoding, r)
+	if p.MediaType == "TEXT" {
+		return &textReader{p, bufio.NewReader(r), 0, false}
+	}
+	return &countReader{p, r, 0}
+}
+
+// countReader is an io.Reader that passes Reads to the underlying reader.
+// when eof is read, it sets p.DecodedSize to the number of bytes returned.
+type countReader struct {
+	p     *Part
+	r     io.Reader
+	count int64
+}
+
+func (cr *countReader) Read(buf []byte) (int, error) {
+	n, err := cr.r.Read(buf)
+	if n >= 0 {
+		cr.count += int64(n)
+	}
+	if err == io.EOF {
+		cr.p.DecodedSize = cr.count
+	}
+	return n, err
+}
+
+// textReader is an io.Reader that ensures all lines return end in CRLF.
+// when eof is read from the underlying reader, it sets p.DecodedSize.
+type textReader struct {
+	p      *Part
+	r      *bufio.Reader
+	count  int64
+	prevcr bool // If previous byte returned was a CR.
+}
+
+func (tr *textReader) Read(buf []byte) (int, error) {
+	o := 0
+	for o < len(buf) {
+		c, err := tr.r.ReadByte()
+		if err != nil {
+			tr.count += int64(o)
+			tr.p.DecodedSize = tr.count
+			return o, err
+		}
+		if c == '\n' && !tr.prevcr {
+			buf[o] = '\r'
+			o++
+			tr.prevcr = true
+			tr.r.UnreadByte()
+			continue
+		}
+		buf[o] = c
+		tr.prevcr = c == '\r'
+		o++
+	}
+	tr.count += int64(o)
+	return o, nil
+}
+
+func newDecoder(cte string, r io.Reader) io.Reader {
+	// ../rfc/2045:775
+	switch cte {
+	case "BASE64":
+		return base64.NewDecoder(base64.StdEncoding, r)
+	case "QUOTED-PRINTABLE":
+		return quotedprintable.NewReader(r)
+	}
+	return r
+}
+
+// RawReader returns a reader for the raw, undecoded body content. E.g. with
+// quoted-printable or base64 content intact.
+// Fully reading a part helps its parent part find its next part efficiently.
+func (p *Part) RawReader() io.Reader {
+	if p.r == nil {
+		panic("missing reader")
+	}
+	if p.EndOffset >= 0 {
+		return io.NewSectionReader(p.r, p.BodyOffset, p.EndOffset-p.BodyOffset)
+	}
+	p.RawLineCount = 0
+	if p.parent == nil {
+		return &offsetReader{p, p.BodyOffset, true}
+	}
+	return &boundReader{p: p, b: &bufAt{r: p.r, offset: p.BodyOffset}, lastnewline: true}
+}
+
+// bufAt is a buffered reader on an underlying ReaderAt.
+type bufAt struct {
+	offset int64 // Offset in r currently consumed, i.e. ignoring any buffered data.
+
+	r       io.ReaderAt
+	buf     []byte // Buffered data.
+	nbuf    int    // Valid bytes in buf.
+	scratch []byte
+}
+
+// todo: lower max line length? at least have a mode where we refuse anything beyong 1000 bytes. ../rfc/5321:3512
+const maxLineLength = 8 * 1024
+
+// ensure makes sure b.nbuf is up to maxLineLength, unless eof is encountered.
+func (b *bufAt) ensure() error {
+	for _, c := range b.buf[:b.nbuf] {
+		if c == '\n' {
+			return nil
+		}
+	}
+	if b.scratch == nil {
+		b.scratch = make([]byte, maxLineLength)
+	}
+	if b.buf == nil {
+		b.buf = make([]byte, maxLineLength)
+	}
+	for b.nbuf < maxLineLength {
+		n, err := b.r.ReadAt(b.buf[b.nbuf:], b.offset+int64(b.nbuf))
+		if n > 0 {
+			b.nbuf += n
+		}
+		if err != nil && err != io.EOF || err == io.EOF && b.nbuf+n == 0 {
+			return err
+		}
+		if n == 0 || err == io.EOF {
+			break
+		}
+	}
+	return nil
+}
+
+// ReadLine reads a line until \r\n is found, returning the line including \r\n.
+// If not found, or a single \r or \n is encountered, ReadLine returns an error, e.g. io.EOF.
+func (b *bufAt) ReadLine(requirecrlf bool) (buf []byte, crlf bool, err error) {
+	return b.line(true, requirecrlf)
+}
+
+func (b *bufAt) PeekLine(requirecrlf bool) (buf []byte, crlf bool, err error) {
+	return b.line(false, requirecrlf)
+}
+
+func (b *bufAt) line(consume, requirecrlf bool) (buf []byte, crlf bool, err error) {
+	if err := b.ensure(); err != nil {
+		return nil, false, err
+	}
+	for i, c := range b.buf[:b.nbuf] {
+		if c == '\n' {
+			return nil, false, errHalfLineSep
+		}
+		if c != '\r' {
+			continue
+		}
+		i++
+		if i >= b.nbuf || b.buf[i] != '\n' {
+			return nil, false, errHalfLineSep
+		}
+		b.scratch = b.scratch[:i+1]
+		copy(b.scratch, b.buf[:i+1])
+		if consume {
+			copy(b.buf, b.buf[i+1:])
+			b.offset += int64(i + 1)
+			b.nbuf -= i + 1
+		}
+		return b.scratch, true, nil
+	}
+	if b.nbuf >= maxLineLength {
+		return nil, false, errLineTooLong
+	}
+	if requirecrlf {
+		return nil, false, errUnexpectedEOF
+	}
+	b.scratch = b.scratch[:b.nbuf]
+	copy(b.scratch, b.buf[:b.nbuf])
+	if consume {
+		b.offset += int64(b.nbuf)
+		b.nbuf = 0
+	}
+	return b.scratch, false, nil
+}
+
+// PeekByte returns the next unread byte, or an error.
+func (b *bufAt) PeekByte() (byte, error) {
+	if err := b.ensure(); err != nil {
+		return 0, err
+	}
+	if b.nbuf == 0 {
+		return 0, io.EOF
+	}
+	return b.buf[0], nil
+}
+
+type offsetReader struct {
+	p           *Part
+	offset      int64
+	lastnewline bool
+}
+
+func (r *offsetReader) Read(buf []byte) (int, error) {
+	n, err := r.p.r.ReadAt(buf, r.offset)
+	if n > 0 {
+		r.offset += int64(n)
+
+		for _, c := range buf[:n] {
+			if r.lastnewline {
+				r.p.RawLineCount++
+			}
+			r.lastnewline = c == '\n'
+		}
+	}
+	if err == io.EOF {
+		r.p.EndOffset = r.offset
+	}
+	return n, err
+}
+
+var crlf = []byte("\r\n")
+
+// boundReader is a reader that stops at a closing multipart boundary.
+type boundReader struct {
+	p           *Part
+	b           *bufAt
+	buf         []byte // Data from previous line, to be served first.
+	nbuf        int    // Number of valid bytes in buf.
+	crlf        []byte // Possible crlf, to be returned if we do not yet encounter a boundary.
+	lastnewline bool   // If last char return was a newline. For counting lines.
+}
+
+func (b *boundReader) Read(buf []byte) (count int, rerr error) {
+	origBuf := buf
+	defer func() {
+		if count > 0 {
+			for _, c := range origBuf[:count] {
+				if b.lastnewline {
+					b.p.RawLineCount++
+				}
+				b.lastnewline = c == '\n'
+			}
+		}
+	}()
+
+	for {
+		// Read data from earlier line.
+		if b.nbuf > 0 {
+			n := b.nbuf
+			if n > len(buf) {
+				n = len(buf)
+			}
+			copy(buf, b.buf[:n])
+			copy(b.buf, b.buf[n:])
+			buf = buf[n:]
+			b.nbuf -= n
+			count += n
+			if b.nbuf > 0 {
+				break
+			}
+		}
+
+		// Look at next line. If it is a boundary, we are done and won't serve the crlf from the last line.
+		line, _, err := b.b.PeekLine(false)
+		if match, _ := checkBound(line, b.p.parent.bound); match {
+			b.p.EndOffset = b.b.offset - int64(len(b.crlf))
+			if b.p.parent.lastBoundOffset == b.p.BoundaryOffset {
+				b.p.parent.nextBoundOffset = b.b.offset
+			} else if enforceSequential {
+				panic("access not sequential")
+			}
+			return count, io.EOF
+		}
+		if err == io.EOF {
+			err = errMissingClosingBoundary
+		}
+		if err != nil && err != io.EOF {
+			return count, err
+		}
+		if len(b.crlf) > 0 {
+			n := len(b.crlf)
+			if n > len(buf) {
+				n = len(buf)
+			}
+			copy(buf, b.crlf[:n])
+			count += n
+			buf = buf[n:]
+			b.crlf = b.crlf[n:]
+		}
+		if len(buf) == 0 {
+			break
+		}
+		line, _, err = b.b.ReadLine(true)
+		if err != nil {
+			// Could be an unexpected end of the part.
+			return 0, err
+		}
+		b.crlf = crlf // crlf will be read next time, but not if a boundary follows.
+		n := len(line) - 2
+		line = line[:n]
+		if n > len(buf) {
+			n = len(buf)
+		}
+		copy(buf, line[:n])
+		count += n
+		buf = buf[n:]
+		line = line[n:]
+		if len(line) > 0 {
+			if b.buf == nil {
+				b.buf = make([]byte, maxLineLength)
+			}
+			copy(b.buf, line)
+			b.nbuf = len(line)
+		}
+	}
+	return count, nil
+}
+
+func checkBound(line, bound []byte) (bool, bool) {
+	if !bytes.HasPrefix(line, bound) {
+		return false, false
+	}
+	line = line[len(bound):]
+	if bytes.HasPrefix(line, []byte("--")) {
+		return true, true
+	}
+	if len(line) == 0 {
+		return true, false
+	}
+	c := line[0]
+	switch c {
+	case ' ', '\t', '\r', '\n':
+		return true, false
+	}
+	return false, false
+}
--- a/message/part_test.go
+++ b/message/part_test.go
@ -0,0 +1,501 @@
+package message
+
+import (
+	"bytes"
+	"errors"
+	"io"
+	"log"
+	"os"
+	"path/filepath"
+	"reflect"
+	"strings"
+	"testing"
+)
+
+func tcheck(t *testing.T, err error, msg string) {
+	t.Helper()
+	if err != nil {
+		t.Fatalf("%s: %s", msg, err)
+	}
+}
+
+func tcompare(t *testing.T, got, exp any) {
+	t.Helper()
+	if !reflect.DeepEqual(got, exp) {
+		t.Fatalf("got %q, expected %q", got, exp)
+	}
+}
+
+func tfail(t *testing.T, err, expErr error) {
+	t.Helper()
+	if (err == nil) != (expErr == nil) || expErr != nil && !errors.Is(err, expErr) {
+		t.Fatalf("got err %v, expected %v", err, expErr)
+	}
+}
+
+func TestEmptyHeader(t *testing.T) {
+	s := "\r\nx"
+	p, err := EnsurePart(strings.NewReader(s), int64(len(s)))
+	tcheck(t, err, "parse empty headers")
+	buf, err := io.ReadAll(p.Reader())
+	tcheck(t, err, "read")
+	expBody := "x"
+	tcompare(t, string(buf), expBody)
+	tcompare(t, p.MediaType, "")
+	tcompare(t, p.MediaSubType, "")
+}
+
+func TestBadContentType(t *testing.T) {
+	s := "content-type: text/html;;\r\n\r\ntest"
+	p, err := EnsurePart(strings.NewReader(s), int64(len(s)))
+	tfail(t, err, ErrBadContentType)
+	buf, err := io.ReadAll(p.Reader())
+	tcheck(t, err, "read")
+	expBody := "test"
+	tcompare(t, string(buf), expBody)
+	tcompare(t, p.MediaType, "APPLICATION")
+	tcompare(t, p.MediaSubType, "OCTET-STREAM")
+}
+
+var basicMsg = strings.ReplaceAll(`From: <mjl@mox.example>
+Content-Type: text/plain
+Content-Transfer-Encoding: base64
+
+aGkK
+`, "\n", "\r\n")
+
+func TestBasic(t *testing.T) {
+	r := strings.NewReader(basicMsg)
+	p, err := Parse(r)
+	tcheck(t, err, "new reader")
+
+	buf, err := io.ReadAll(p.RawReader())
+	tcheck(t, err, "read raw")
+	expBody := "aGkK\r\n"
+	tcompare(t, string(buf), expBody)
+
+	buf, err = io.ReadAll(p.Reader())
+	tcheck(t, err, "read decoded")
+	tcompare(t, string(buf), "hi\r\n")
+
+	if p.RawLineCount != 1 {
+		t.Fatalf("basic message, got %d lines, expected 1", p.RawLineCount)
+	}
+	if size := p.EndOffset - p.BodyOffset; size != int64(len(expBody)) {
+		t.Fatalf("basic message, got size %d, expected %d", size, len(expBody))
+	}
+}
+
+// From ../rfc/3501:2589
+var basicMsg2 = strings.ReplaceAll(`Date: Mon, 7 Feb 1994 21:52:25 -0800 (PST)
+From: Fred Foobar <foobar@Blurdybloop.example>
+Subject: afternoon meeting
+To: mooch@owatagu.siam.edu.example
+Message-Id: <B27397-0100000@Blurdybloop.example>
+MIME-Version: 1.0
+Content-Type: TEXT/PLAIN; CHARSET=US-ASCII
+
+Hello Joe, do you think we can meet at 3:30 tomorrow?
+
+`, "\n", "\r\n")
+
+func TestBasic2(t *testing.T) {
+	r := strings.NewReader(basicMsg2)
+	p, err := Parse(r)
+	tcheck(t, err, "new reader")
+
+	buf, err := io.ReadAll(p.RawReader())
+	tcheck(t, err, "read raw")
+	expBody := "Hello Joe, do you think we can meet at 3:30 tomorrow?\r\n\r\n"
+	tcompare(t, string(buf), expBody)
+
+	buf, err = io.ReadAll(p.Reader())
+	tcheck(t, err, "read decoded")
+	tcompare(t, string(buf), expBody)
+
+	if p.RawLineCount != 2 {
+		t.Fatalf("basic message, got %d lines, expected 2", p.RawLineCount)
+	}
+	if size := p.EndOffset - p.BodyOffset; size != int64(len(expBody)) {
+		t.Fatalf("basic message, got size %d, expected %d", size, len(expBody))
+	}
+
+	r = strings.NewReader(basicMsg2)
+	p, err = Parse(r)
+	tcheck(t, err, "new reader")
+	err = p.Walk()
+	tcheck(t, err, "walk")
+	if p.RawLineCount != 2 {
+		t.Fatalf("basic message, got %d lines, expected 2", p.RawLineCount)
+	}
+	if size := p.EndOffset - p.BodyOffset; size != int64(len(expBody)) {
+		t.Fatalf("basic message, got size %d, expected %d", size, len(expBody))
+	}
+}
+
+var mimeMsg = strings.ReplaceAll(`From: Nathaniel Borenstein <nsb@bellcore.com>
+To: Ned Freed <ned@innosoft.com>
+Date: Sun, 21 Mar 1993 23:56:48 -0800 (PST)
+Subject: Sample message
+MIME-Version: 1.0
+Content-type: multipart/mixed; boundary="simple boundary"
+
+This is the preamble.  It is to be ignored, though it
+is a handy place for composition agents to include an
+explanatory note to non-MIME conformant readers.
+
+--simple boundary
+
+This is implicitly typed plain US-ASCII text.
+It does NOT end with a linebreak.
+--simple boundary
+Content-type: text/plain; charset=us-ascii
+
+This is explicitly typed plain US-ASCII text.
+It DOES end with a linebreak.
+
+--simple boundary--
+
+This is the epilogue.  It is also to be ignored.
+`, "\n", "\r\n")
+
+func TestMime(t *testing.T) {
+	// from ../rfc/2046:1148
+	r := strings.NewReader(mimeMsg)
+	p, err := Parse(r)
+	tcheck(t, err, "new reader")
+	if len(p.bound) == 0 {
+		t.Fatalf("got no bound, expected bound for mime message")
+	}
+
+	pp, err := p.ParseNextPart()
+	tcheck(t, err, "next part")
+	buf, err := io.ReadAll(pp.Reader())
+	tcheck(t, err, "read all")
+	tcompare(t, string(buf), "This is implicitly typed plain US-ASCII text.\r\nIt does NOT end with a linebreak.")
+
+	pp, err = p.ParseNextPart()
+	tcheck(t, err, "next part")
+	buf, err = io.ReadAll(pp.Reader())
+	tcheck(t, err, "read all")
+	tcompare(t, string(buf), "This is explicitly typed plain US-ASCII text.\r\nIt DOES end with a linebreak.\r\n")
+
+	_, err = p.ParseNextPart()
+	tcompare(t, err, io.EOF)
+
+	if len(p.Parts) != 2 {
+		t.Fatalf("got %d parts, expected 2", len(p.Parts))
+	}
+	if p.Parts[0].RawLineCount != 2 {
+		t.Fatalf("got %d lines for first part, expected 2", p.Parts[0].RawLineCount)
+	}
+	if p.Parts[1].RawLineCount != 2 {
+		t.Fatalf("got %d lines for second part, expected 2", p.Parts[1].RawLineCount)
+	}
+}
+
+func TestLongLine(t *testing.T) {
+	line := make([]byte, maxLineLength+1)
+	for i := range line {
+		line[i] = 'a'
+	}
+	_, err := Parse(bytes.NewReader(line))
+	tfail(t, err, errLineTooLong)
+}
+
+func TestHalfCrLf(t *testing.T) {
+	_, err := Parse(strings.NewReader("test\rtest"))
+	tfail(t, err, errHalfLineSep)
+
+	_, err = Parse(strings.NewReader("test\ntest"))
+	tfail(t, err, errHalfLineSep)
+}
+
+func TestMissingClosingBoundary(t *testing.T) {
+	message := strings.ReplaceAll(`Content-Type: multipart/mixed; boundary=x
+
+--x
+
+test
+`, "\n", "\r\n")
+	msg, err := Parse(strings.NewReader(message))
+	tcheck(t, err, "new reader")
+	err = walkmsg(&msg)
+	tfail(t, err, errMissingClosingBoundary)
+
+	msg, _ = Parse(strings.NewReader(message))
+	err = msg.Walk()
+	tfail(t, err, errMissingClosingBoundary)
+}
+
+func TestHeaderEOF(t *testing.T) {
+	message := "header: test"
+	_, err := Parse(strings.NewReader(message))
+	tfail(t, err, errUnexpectedEOF)
+}
+
+func TestBodyEOF(t *testing.T) {
+	message := "header: test\r\n\r\ntest"
+	msg, err := Parse(strings.NewReader(message))
+	tcheck(t, err, "new reader")
+	buf, err := io.ReadAll(msg.Reader())
+	tcheck(t, err, "read body")
+	tcompare(t, string(buf), "test")
+}
+
+func TestWalk(t *testing.T) {
+	var message = strings.ReplaceAll(`Content-Type: multipart/related; boundary="----=_NextPart_afb3ad6f146b12b709deac3e387a3ad7"
+
+------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7
+Content-Type: multipart/alternative; boundary="----=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt"
+
+------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: 8bit
+
+test
+
+
+------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt
+Content-Type: text/html; charset="utf-8"
+Content-Transfer-Encoding: 8bit
+
+test
+
+------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt--
+------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7--
+
+`, "\n", "\r\n")
+
+	msg, err := Parse(strings.NewReader(message))
+	tcheck(t, err, "new reader")
+	enforceSequential = true
+	defer func() {
+		enforceSequential = false
+	}()
+	err = walkmsg(&msg)
+	tcheck(t, err, "walkmsg")
+
+	msg, _ = Parse(strings.NewReader(message))
+	err = msg.Walk()
+	tcheck(t, err, "msg.Walk")
+}
+
+func TestNested(t *testing.T) {
+	// From ../rfc/2049:801
+	nestedMessage := strings.ReplaceAll(`MIME-Version: 1.0
+From: Nathaniel Borenstein <nsb@nsb.fv.com>
+To: Ned Freed <ned@innosoft.com>
+Date: Fri, 07 Oct 1994 16:15:05 -0700 (PDT)
+Subject: A multipart example
+Content-Type: multipart/mixed;
+              boundary=unique-boundary-1
+
+This is the preamble area of a multipart message.
+Mail readers that understand multipart format
+should ignore this preamble.
+
+If you are reading this text, you might want to
+consider changing to a mail reader that understands
+how to properly display multipart messages.
+
+--unique-boundary-1
+
+  ... Some text appears here ...
+
+[Note that the blank between the boundary and the start
+ of the text in this part means no header fields were
+ given and this is text in the US-ASCII character set.
+ It could have been done with explicit typing as in the
+ next part.]
+
+--unique-boundary-1
+Content-type: text/plain; charset=US-ASCII
+
+This could have been part of the previous part, but
+illustrates explicit versus implicit typing of body
+parts.
+
+--unique-boundary-1
+Content-Type: multipart/parallel; boundary=unique-boundary-2
+
+--unique-boundary-2
+Content-Type: audio/basic
+Content-Transfer-Encoding: base64
+
+
+--unique-boundary-2
+Content-Type: image/jpeg
+Content-Transfer-Encoding: base64
+
+
+--unique-boundary-2--
+
+--unique-boundary-1
+Content-type: text/enriched
+
+This is <bold><italic>enriched.</italic></bold>
+<smaller>as defined in RFC 1896</smaller>
+
+Isn't it
+<bigger><bigger>cool?</bigger></bigger>
+
+--unique-boundary-1
+Content-Type: message/rfc822
+
+From: (mailbox in US-ASCII)
+To: (address in US-ASCII)
+Subject: (subject in US-ASCII)
+Content-Type: Text/plain; charset=ISO-8859-1
+Content-Transfer-Encoding: Quoted-printable
+
+  ... Additional text in ISO-8859-1 goes here ...
+
+--unique-boundary-1--
+`, "\n", "\r\n")
+
+	msg, err := Parse(strings.NewReader(nestedMessage))
+	tcheck(t, err, "new reader")
+	enforceSequential = true
+	defer func() {
+		enforceSequential = false
+	}()
+	err = walkmsg(&msg)
+	tcheck(t, err, "walkmsg")
+
+	if len(msg.Parts) != 5 {
+		t.Fatalf("got %d parts, expected 5", len(msg.Parts))
+	}
+	sub := msg.Parts[4].Message
+	if sub == nil {
+		t.Fatalf("missing part.Message")
+	}
+	buf, err := io.ReadAll(sub.Reader())
+	if err != nil {
+		t.Fatalf("read message body: %v", err)
+	}
+	exp := "  ... Additional text in ISO-8859-1 goes here ...\r\n"
+	if string(buf) != exp {
+		t.Fatalf("got %q, expected %q", buf, exp)
+	}
+
+	msg, _ = Parse(strings.NewReader(nestedMessage))
+	err = msg.Walk()
+	tcheck(t, err, "msg.Walk")
+
+}
+
+func TestWalkdir(t *testing.T) {
+	// Ensure these dirs exist. Developers should bring their own ham/spam example
+	// emails.
+	os.MkdirAll("../testdata/train/ham", 0770)
+	os.MkdirAll("../testdata/train/spam", 0770)
+
+	var n, nfail int
+	twalkdir(t, "../testdata/train/ham", &n, &nfail)
+	twalkdir(t, "../testdata/train/spam", &n, &nfail)
+	log.Printf("parsing messages: %d/%d failed", nfail, n)
+}
+
+func twalkdir(t *testing.T, dir string, n, nfail *int) {
+	names, err := os.ReadDir(dir)
+	tcheck(t, err, "readdir")
+	if len(names) > 1000 {
+		names = names[:1000]
+	}
+	for _, name := range names {
+		p := filepath.Join(dir, name.Name())
+		*n++
+		err := walk(p)
+		if err != nil {
+			*nfail++
+			log.Printf("%s: %v", p, err)
+		}
+	}
+}
+
+func walk(path string) error {
+	r, err := os.Open(path)
+	if err != nil {
+		return err
+	}
+	defer r.Close()
+	msg, err := Parse(r)
+	if err != nil {
+		return err
+	}
+	return walkmsg(&msg)
+}
+
+func walkmsg(msg *Part) error {
+	enforceSequential = true
+	defer func() {
+		enforceSequential = false
+	}()
+
+	if len(msg.bound) == 0 {
+		buf, err := io.ReadAll(msg.Reader())
+		if err != nil {
+			return err
+		}
+
+		if msg.MediaType == "MESSAGE" && (msg.MediaSubType == "RFC822" || msg.MediaSubType == "GLOBAL") {
+			mp, err := Parse(bytes.NewReader(buf))
+			if err != nil {
+				return err
+			}
+			msg.Message = &mp
+			walkmsg(msg.Message)
+		}
+
+		size := msg.EndOffset - msg.BodyOffset
+		if size < 0 {
+			log.Printf("msg %v", msg)
+			panic("inconsistent body/end offset")
+		}
+		sr := io.NewSectionReader(msg.r, msg.BodyOffset, size)
+		decsr := msg.bodyReader(sr)
+		buf2, err := io.ReadAll(decsr)
+		if err != nil {
+			return err
+		}
+
+		if !bytes.Equal(buf, buf2) {
+			panic("data mismatch reading sequentially vs via offsets")
+		}
+
+		return nil
+	}
+
+	for {
+		pp, err := msg.ParseNextPart()
+		if err == io.EOF {
+			return nil
+		}
+		if err != nil {
+			return err
+		}
+		if err := walkmsg(pp); err != nil {
+			return err
+		}
+		enforceSequential = true
+	}
+}
+
+func TestEmbedded(t *testing.T) {
+	f, err := os.Open("../testdata/message/message-rfc822-multipart.eml")
+	tcheck(t, err, "open")
+	fi, err := f.Stat()
+	tcheck(t, err, "stat")
+	_, err = EnsurePart(f, fi.Size())
+	tcheck(t, err, "parse")
+}
+
+func TestEmbedded2(t *testing.T) {
+	buf, err := os.ReadFile("../testdata/message/message-rfc822-multipart2.eml")
+	tcheck(t, err, "readfile")
+	buf = bytes.ReplaceAll(buf, []byte("\n"), []byte("\r\n"))
+
+	_, err = EnsurePart(bytes.NewReader(buf), int64(len(buf)))
+	tfail(t, err, errUnexpectedEOF) // todo: be able to parse this without an error? truncate message/rfc822 in dsn.
+}
--- a/message/readheaders.go
+++ b/message/readheaders.go
@ -0,0 +1,31 @@
+package message
+
+import (
+	"bufio"
+	"bytes"
+	"errors"
+	"io"
+)
+
+var crlf2x = []byte("\r\n\r\n")
+
+var ErrHeaderSeparator = errors.New("no header separator found")
+
+// ReadHeaders returns the headers of a message, ending with a single crlf.
+// Returns ErrHeaderSeparator if no header separator is found.
+func ReadHeaders(msg *bufio.Reader) ([]byte, error) {
+	buf := []byte{}
+	for {
+		line, err := msg.ReadBytes('\n')
+		if err != io.EOF && err != nil {
+			return nil, err
+		}
+		buf = append(buf, line...)
+		if bytes.HasSuffix(buf, crlf2x) {
+			return buf[:len(buf)-2], nil
+		}
+		if err == io.EOF {
+			return nil, ErrHeaderSeparator
+		}
+	}
+}
--- a/message/time.go
+++ b/message/time.go
@ -0,0 +1,4 @@
+package message
+
+// Timestamp as used in internet mail messages.
+const RFC5322Z = "02 Jan 2006 15:04 -0700"
--- a/message/todo.go
+++ b/message/todo.go
@ -0,0 +1,12 @@
+package message
+
+// todo: we should parse headers ourselves
+
+// Link rfc updates about UTF-8 characters in messages.
+// These productions list valid characters in contexts:
+// VCHAR, visible printing: ../rfc/5234:774 ../rfc/6532:236
+// ctext, in comment: ../rfc/5322:602 ../rfc/6532:238
+// atext, in atom: ../rfc/5322:679 ../rfc/6532:240
+// qtext, in quoted string: ../rfc/5322:735 ../rfc/6532:242
+// text, in message body: ../rfc/5322:1001 ../rfc/6532:244
+// dtext, in domain: ../rfc/5322:967 ../rfc/6532:247
--- a/message/writer.go
+++ b/message/writer.go
@ -0,0 +1,55 @@
+package message
+
+import (
+	"io"
+)
+
+// Writer is a write-through helper, collecting properties about the written
+// message.
+type Writer struct {
+	Writer      io.Writer
+	HaveHeaders bool
+	Has8bit     bool // Whether a byte with the high/8bit has been read. So whether this is 8BITMIME instead of 7BIT.
+	Size        int64
+	tail        [3]byte // For detecting crlfcrlf.
+	// todo: should be parsing headers here, as we go
+}
+
+// Write implements io.Writer.
+func (w *Writer) Write(buf []byte) (int, error) {
+	if !w.HaveHeaders && len(buf) > 0 {
+		get := func(i int) byte {
+			if i < 0 {
+				return w.tail[3+i]
+			}
+			return buf[i]
+		}
+
+		for i, b := range buf {
+			if b == '\n' && get(i-3) == '\r' && get(i-2) == '\n' && get(i-1) == '\r' {
+				w.HaveHeaders = true
+				break
+			}
+		}
+
+		n := len(buf)
+		if n > 3 {
+			n = 3
+		}
+		copy(w.tail[:], w.tail[n:])
+		copy(w.tail[3-n:], buf[len(buf)-n:])
+	}
+	if !w.Has8bit {
+		for _, b := range buf {
+			if b&0x80 != 0 {
+				w.Has8bit = true
+				break
+			}
+		}
+	}
+	n, err := w.Writer.Write(buf)
+	if n > 0 {
+		w.Size += int64(n)
+	}
+	return n, err
+}
--- a/message/writer_test.go
+++ b/message/writer_test.go
@ -0,0 +1,41 @@
+package message
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestMsgWriter(t *testing.T) {
+	check := func(data string, want bool) {
+		t.Helper()
+
+		b := &strings.Builder{}
+		mw := &Writer{Writer: b}
+		if _, err := mw.Write([]byte(data)); err != nil {
+			t.Fatalf("write for message %q: %s", data, err)
+		}
+		if mw.HaveHeaders != want {
+			t.Fatalf("got %v, expected %v, for message %q", mw.HaveHeaders, want, data)
+		}
+
+		b = &strings.Builder{}
+		mw = &Writer{Writer: b}
+		for i := range data {
+			if _, err := mw.Write([]byte(data[i : i+1])); err != nil {
+				t.Fatalf("write for message %q: %s", data, err)
+			}
+		}
+		if mw.HaveHeaders != want {
+			t.Fatalf("got %v, expected %v, for message %q", mw.HaveHeaders, want, data)
+		}
+	}
+
+	check("no header", false)
+	check("no header\r\n", false)
+	check("key: value\r\n\r\n", true)
+	check("key: value\r\n\r\nbody", true)
+	check("key: value\n\nbody", false)
+	check("key: value\r\rbody", false)
+	check("\r\n\r\n", true)
+	check("\r\n\r\nbody", true)
+}