webmail: recognize q/b-word-encoded filenames in attachments in messages

according to the rfc's (2231, and 2047), non-ascii filenames in content-type
and content-disposition headers should be encoded like this:

	Content-Type: text/plain; name*=utf-8''hi%E2%98%BA.txt
	Content-Disposition: attachment; filename*=utf-8''hi%E2%98%BA.txt

and that is what the Go standard library mime.ParseMediaType and
mime.FormatMediaType parse and generate.

this is what thunderbird sends:

	Content-Type: text/plain; charset=UTF-8; name="=?UTF-8?B?aGnimLoudHh0?="
	Content-Disposition: attachment; filename*=UTF-8''%68%69%E2%98%BA%2E%74%78%74

(thunderbird will also correctly split long filenames over multiple parameters,
named "filename*0*", "filename*1*", etc.)

this is what gmail sends:

	Content-Type: text/plain; charset="US-ASCII"; name="=?UTF-8?B?aGnimLoudHh0?="
	Content-Disposition: attachment; filename="=?UTF-8?B?aGnimLoudHh0?="

i cannot find where the q/b-word encoded values in "name" and "filename" are
allowed. until that time, we try parsing them unless in pedantic mode.

we didn't generate correctly encoded filenames yet, this commit also fixes that.

for issue #82 by mattfbacon, thanks for reporting!
This commit is contained in:
Mechiel Lukkien 2023-10-14 14:14:13 +02:00
parent 3e53343d21
commit a40f5a5eb3
No known key found for this signature in database
4 changed files with 74 additions and 45 deletions

View File

@ -524,9 +524,11 @@ func (w Webmail) MessageSubmit(ctx context.Context, m SubmitMessage) {
header("Content-Type", fmt.Sprintf(`multipart/mixed; boundary="%s"`, mp.Boundary())) header("Content-Type", fmt.Sprintf(`multipart/mixed; boundary="%s"`, mp.Boundary()))
line(xmsgw) line(xmsgw)
ct := mime.FormatMediaType("text/plain", map[string]string{"charset": charset})
textHdr := textproto.MIMEHeader{} textHdr := textproto.MIMEHeader{}
textHdr.Set("Content-Type", "text/plain; charset="+escapeParam(charset)) textHdr.Set("Content-Type", ct)
textHdr.Set("Content-Transfer-Encoding", cte) textHdr.Set("Content-Transfer-Encoding", cte)
textp, err := mp.CreatePart(textHdr) textp, err := mp.CreatePart(textHdr)
xcheckf(ctx, err, "adding text part to message") xcheckf(ctx, err, "adding text part to message")
_, err = textp.Write([]byte(text)) _, err = textp.Write([]byte(text))
@ -534,13 +536,11 @@ func (w Webmail) MessageSubmit(ctx context.Context, m SubmitMessage) {
xaddPart := func(ct, filename string) io.Writer { xaddPart := func(ct, filename string) io.Writer {
ahdr := textproto.MIMEHeader{} ahdr := textproto.MIMEHeader{}
if ct == "" { cd := mime.FormatMediaType("attachment", map[string]string{"filename": filename})
ct = "application/octet-stream"
}
ct += fmt.Sprintf(`; name="%s"`, filename)
ahdr.Set("Content-Type", ct) ahdr.Set("Content-Type", ct)
ahdr.Set("Content-Transfer-Encoding", "base64") ahdr.Set("Content-Transfer-Encoding", "base64")
ahdr.Set("Content-Disposition", fmt.Sprintf(`attachment; filename=%s`, escapeParam(filename))) ahdr.Set("Content-Disposition", cd)
ap, err := mp.CreatePart(ahdr) ap, err := mp.CreatePart(ahdr)
xcheckf(ctx, err, "adding attachment part to message") xcheckf(ctx, err, "adding attachment part to message")
return ap return ap
@ -587,12 +587,21 @@ func (w Webmail) MessageSubmit(ctx context.Context, m SubmitMessage) {
} }
ct := strings.TrimSuffix(t[0], "base64") ct := strings.TrimSuffix(t[0], "base64")
ct = strings.TrimSuffix(ct, ";") ct = strings.TrimSuffix(ct, ";")
if ct == "" {
ct = "application/octet-stream"
}
filename := a.Filename
if filename == "" {
filename = "unnamed.bin"
}
params := map[string]string{"name": filename}
ct = mime.FormatMediaType(ct, params)
// Ensure base64 is valid, then we'll write the original string. // Ensure base64 is valid, then we'll write the original string.
_, err := io.Copy(io.Discard, base64.NewDecoder(base64.StdEncoding, strings.NewReader(t[1]))) _, err := io.Copy(io.Discard, base64.NewDecoder(base64.StdEncoding, strings.NewReader(t[1])))
xcheckuserf(ctx, err, "parsing attachment as base64") xcheckuserf(ctx, err, "parsing attachment as base64")
xaddAttachmentBase64(ct, a.Filename, []byte(t[1])) xaddAttachmentBase64(ct, filename, []byte(t[1]))
} }
if len(m.ForwardAttachments.Paths) > 0 { if len(m.ForwardAttachments.Paths) > 0 {
@ -617,14 +626,16 @@ func (w Webmail) MessageSubmit(ctx context.Context, m SubmitMessage) {
ap = ap.Parts[xp] ap = ap.Parts[xp]
} }
filename := ap.ContentTypeParams["name"] filename := tryDecodeParam(log, ap.ContentTypeParams["name"])
if filename == "" { if filename == "" {
filename = "unnamed.bin" filename = "unnamed.bin"
} }
ct := strings.ToLower(ap.MediaType + "/" + ap.MediaSubType) params := map[string]string{"name": filename}
if pcharset := ap.ContentTypeParams["charset"]; pcharset != "" { if pcharset := ap.ContentTypeParams["charset"]; pcharset != "" {
ct += "; charset=" + escapeParam(pcharset) params["charset"] = pcharset
} }
ct := strings.ToLower(ap.MediaType + "/" + ap.MediaSubType)
ct = mime.FormatMediaType(ct, params)
xaddAttachment(ct, filename, ap.Reader()) xaddAttachment(ct, filename, ap.Reader())
} }
}) })
@ -634,7 +645,8 @@ func (w Webmail) MessageSubmit(ctx context.Context, m SubmitMessage) {
err = mp.Close() err = mp.Close()
xcheckf(ctx, err, "writing mime multipart") xcheckf(ctx, err, "writing mime multipart")
} else { } else {
header("Content-Type", "text/plain; charset="+escapeParam(charset)) ct := mime.FormatMediaType("text/plain", map[string]string{"charset": charset})
header("Content-Type", ct)
header("Content-Transfer-Encoding", cte) header("Content-Transfer-Encoding", cte)
line(xmsgw) line(xmsgw)
xmsgw.Write([]byte(text)) xmsgw.Write([]byte(text))

View File

@ -12,12 +12,46 @@ import (
"github.com/mjl-/mox/message" "github.com/mjl-/mox/message"
"github.com/mjl-/mox/mlog" "github.com/mjl-/mox/mlog"
"github.com/mjl-/mox/moxio" "github.com/mjl-/mox/moxio"
"github.com/mjl-/mox/moxvar"
"github.com/mjl-/mox/smtp" "github.com/mjl-/mox/smtp"
"github.com/mjl-/mox/store" "github.com/mjl-/mox/store"
) )
// todo: we should have all needed information for messageItem in store.Message (perhaps some data in message.Part) for fast access, not having to parse the on-disk message file. // todo: we should have all needed information for messageItem in store.Message (perhaps some data in message.Part) for fast access, not having to parse the on-disk message file.
// Attempt q/b-word-decode name, coming from Content-Type "name" field or
// Content-Disposition "filename" field.
//
// RFC 2231 specify an encoding for non-ascii values in mime header parameters. But
// it appears common practice to instead just q/b-word encode the values.
// Thunderbird and gmail.com do this for the Content-Type "name" parameter.
// gmail.com also does that for the Content-Disposition "filename" parameter, where
// Thunderbird uses the RFC 2231-defined encoding. Go's mime.ParseMediaType parses
// the mechanism specified in RFC 2231 only. The value for "name" we get here would
// already be decoded properly for standards-compliant headers, like
// "filename*0*=UTF-8”%...; filename*1*=%.... We'll look for Q/B-word encoding
// markers ("=?"-prefix or "?="-suffix) and try to decode if present. This would
// only cause trouble for filenames having this prefix/suffix.
func tryDecodeParam(log *mlog.Log, name string) string {
if name == "" || !strings.HasPrefix(name, "=?") && !strings.HasSuffix(name, "?=") {
return name
}
// todo: find where this is allowed. it seems quite common. perhaps we should remove the pedantic check?
if moxvar.Pedantic {
log.Debug("attachment contains rfc2047 q/b-word-encoded mime parameter instead of rfc2231-encoded", mlog.Field("name", name))
return name
}
dec := mime.WordDecoder{}
s, err := dec.DecodeHeader(name)
if err != nil {
log.Debugx("q/b-word decoding mime parameter", err, mlog.Field("name", name))
return name
}
return s
}
// todo: mime.FormatMediaType does not wrap long lines. should do it ourselves, and split header into several parts (if commonly supported).
func messageItem(log *mlog.Log, m store.Message, state *msgState) (MessageItem, error) { func messageItem(log *mlog.Log, m store.Message, state *msgState) (MessageItem, error) {
pm, err := parsedMessage(log, m, state, false, true) pm, err := parsedMessage(log, m, state, false, true)
if err != nil { if err != nil {
@ -212,10 +246,9 @@ func parsedMessage(log *mlog.Log, m store.Message, state *msgState, full, msgite
disp, params, err := mime.ParseMediaType(cp) disp, params, err := mime.ParseMediaType(cp)
log.Check(err, "parsing content-disposition", mlog.Field("cp", cp)) log.Check(err, "parsing content-disposition", mlog.Field("cp", cp))
if strings.EqualFold(disp, "attachment") { if strings.EqualFold(disp, "attachment") {
// todo: should we be decoding these names? i've seen messages with regular q-word style mime-encoding, not the one specified in ../rfc/2231:210 name := tryDecodeParam(log, p.ContentTypeParams["name"])
name := p.ContentTypeParams["name"]
if name == "" { if name == "" {
name = params["filename"] name = tryDecodeParam(log, params["filename"])
} }
pm.attachments = append(pm.attachments, Attachment{path, name, p}) pm.attachments = append(pm.attachments, Attachment{path, name, p})
return return
@ -285,8 +318,8 @@ func parsedMessage(log *mlog.Log, m store.Message, state *msgState, full, msgite
return return
} }
name, ok := p.ContentTypeParams["name"] name := tryDecodeParam(log, p.ContentTypeParams["name"])
if !ok && (full || msgitem) { if name == "" && (full || msgitem) {
// todo: should have this, and perhaps all content-* headers, preparsed in message.Part? // todo: should have this, and perhaps all content-* headers, preparsed in message.Part?
h, err := p.Header() h, err := p.Header()
log.Check(err, "parsing attachment headers", mlog.Field("msgid", m.ID)) log.Check(err, "parsing attachment headers", mlog.Field("msgid", m.ID))
@ -294,7 +327,7 @@ func parsedMessage(log *mlog.Log, m store.Message, state *msgState, full, msgite
if cp != "" { if cp != "" {
_, params, err := mime.ParseMediaType(cp) _, params, err := mime.ParseMediaType(cp)
log.Check(err, "parsing content-disposition", mlog.Field("cp", cp)) log.Check(err, "parsing content-disposition", mlog.Field("cp", cp))
name = params["filename"] name = tryDecodeParam(log, params["filename"])
} }
} }
pm.attachments = append(pm.attachments, Attachment{path, name, p}) pm.attachments = append(pm.attachments, Attachment{path, name, p})

View File

@ -1786,7 +1786,7 @@ func attachmentTypes(log *mlog.Log, m store.Message, state *msgState) (map[Attac
mt := strings.ToLower(a.Part.MediaType + "/" + a.Part.MediaSubType) mt := strings.ToLower(a.Part.MediaType + "/" + a.Part.MediaSubType)
if t, ok := attachmentMimetypes[mt]; ok { if t, ok := attachmentMimetypes[mt]; ok {
types[t] = true types[t] = true
} else if ext := filepath.Ext(a.Part.ContentTypeParams["name"]); ext != "" { } else if ext := filepath.Ext(tryDecodeParam(log, a.Part.ContentTypeParams["name"])); ext != "" {
if t, ok := attachmentExtensions[strings.ToLower(ext)]; ok { if t, ok := attachmentExtensions[strings.ToLower(ext)]; ok {
types[t] = true types[t] = true
} else { } else {

View File

@ -317,25 +317,6 @@ func serveContentFallback(log *mlog.Log, w http.ResponseWriter, r *http.Request,
http.ServeContent(w, r, "", fallbackMtime(log), bytes.NewReader(fallback)) http.ServeContent(w, r, "", fallbackMtime(log), bytes.NewReader(fallback))
} }
// Escape mime content header parameter, such as content-type charset or
// content-disposition filename.
func escapeParam(s string) string {
// todo: follow ../rfc/2183?
basic := len(s) > 0
for _, c := range s {
if c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '-' || c == '_' || c == '.' {
continue
}
basic = false
break
}
if basic {
return s
}
return `"` + strings.NewReplacer(`\`, `\\`, `"`, `\"`).Replace(s) + `"`
}
// Handler returns a handler for the webmail endpoints, customized for the max // Handler returns a handler for the webmail endpoints, customized for the max
// message size coming from the listener. // message size coming from the listener.
func Handler(maxMessageSize int64) func(w http.ResponseWriter, r *http.Request) { func Handler(maxMessageSize int64) func(w http.ResponseWriter, r *http.Request) {
@ -593,19 +574,20 @@ func handle(apiHandler http.Handler, w http.ResponseWriter, r *http.Request) {
subjectSlug = s subjectSlug = s
} }
filename := fmt.Sprintf("email-%d-attachments-%s%s.zip", m.ID, m.Received.Format("20060102-150405"), subjectSlug) filename := fmt.Sprintf("email-%d-attachments-%s%s.zip", m.ID, m.Received.Format("20060102-150405"), subjectSlug)
h.Set("Content-Disposition", fmt.Sprintf(`attachment; filename=%s`, escapeParam(filename))) cd := mime.FormatMediaType("attachment", map[string]string{"filename": filename})
h.Set("Content-Disposition", cd)
zw := zip.NewWriter(w) zw := zip.NewWriter(w)
names := map[string]bool{} names := map[string]bool{}
for _, a := range mi.Attachments { for _, a := range mi.Attachments {
ap := a.Part ap := a.Part
name := ap.ContentTypeParams["name"] name := tryDecodeParam(log, ap.ContentTypeParams["name"])
if name == "" { if name == "" {
// We don't check errors, this is all best-effort. // We don't check errors, this is all best-effort.
h, _ := ap.Header() h, _ := ap.Header()
disposition := h.Get("Content-Disposition") disposition := h.Get("Content-Disposition")
_, params, _ := mime.ParseMediaType(disposition) _, params, _ := mime.ParseMediaType(disposition)
name = params["filename"] name = tryDecodeParam(log, params["filename"])
} }
if name != "" { if name != "" {
name = filepath.Base(name) name = filepath.Base(name)
@ -697,10 +679,11 @@ func handle(apiHandler http.Handler, w http.ResponseWriter, r *http.Request) {
// not, there is not much we could do better... // not, there is not much we could do better...
headers(false, false, false) headers(false, false, false)
ct := "text/plain" ct := "text/plain"
params := map[string]string{}
if charset := p.ContentTypeParams["charset"]; charset != "" { if charset := p.ContentTypeParams["charset"]; charset != "" {
ct += fmt.Sprintf("; charset=%s", escapeParam(charset)) params["charset"] = charset
} }
h.Set("Content-Type", ct) h.Set("Content-Type", mime.FormatMediaType(ct, params))
h.Set("Cache-Control", "no-cache, max-age=0") h.Set("Cache-Control", "no-cache, max-age=0")
_, err := io.Copy(w, &moxio.AtReader{R: msgr}) _, err := io.Copy(w, &moxio.AtReader{R: msgr})
@ -892,18 +875,19 @@ func handle(apiHandler http.Handler, w http.ResponseWriter, r *http.Request) {
h.Set("Content-Type", ct) h.Set("Content-Type", ct)
h.Set("Cache-Control", "no-cache, max-age=0") h.Set("Cache-Control", "no-cache, max-age=0")
if t[1] == "download" { if t[1] == "download" {
name := ap.ContentTypeParams["name"] name := tryDecodeParam(log, ap.ContentTypeParams["name"])
if name == "" { if name == "" {
// We don't check errors, this is all best-effort. // We don't check errors, this is all best-effort.
h, _ := ap.Header() h, _ := ap.Header()
disposition := h.Get("Content-Disposition") disposition := h.Get("Content-Disposition")
_, params, _ := mime.ParseMediaType(disposition) _, params, _ := mime.ParseMediaType(disposition)
name = params["filename"] name = tryDecodeParam(log, params["filename"])
} }
if name == "" { if name == "" {
name = "attachment.bin" name = "attachment.bin"
} }
h.Set("Content-Disposition", fmt.Sprintf(`attachment; filename=%s`, escapeParam(name))) cd := mime.FormatMediaType("attachment", map[string]string{"filename": name})
h.Set("Content-Disposition", cd)
} }
_, err := io.Copy(w, ap.Reader()) _, err := io.Copy(w, ap.Reader())