From a40f5a5eb31ed160b53d30f5573dfe6b5d56129a Mon Sep 17 00:00:00 2001 From: Mechiel Lukkien Date: Sat, 14 Oct 2023 14:14:13 +0200 Subject: [PATCH] webmail: recognize q/b-word-encoded filenames in attachments in messages according to the rfc's (2231, and 2047), non-ascii filenames in content-type and content-disposition headers should be encoded like this: Content-Type: text/plain; name*=utf-8''hi%E2%98%BA.txt Content-Disposition: attachment; filename*=utf-8''hi%E2%98%BA.txt and that is what the Go standard library mime.ParseMediaType and mime.FormatMediaType parse and generate. this is what thunderbird sends: Content-Type: text/plain; charset=UTF-8; name="=?UTF-8?B?aGnimLoudHh0?=" Content-Disposition: attachment; filename*=UTF-8''%68%69%E2%98%BA%2E%74%78%74 (thunderbird will also correctly split long filenames over multiple parameters, named "filename*0*", "filename*1*", etc.) this is what gmail sends: Content-Type: text/plain; charset="US-ASCII"; name="=?UTF-8?B?aGnimLoudHh0?=" Content-Disposition: attachment; filename="=?UTF-8?B?aGnimLoudHh0?=" i cannot find where the q/b-word encoded values in "name" and "filename" are allowed. until that time, we try parsing them unless in pedantic mode. we didn't generate correctly encoded filenames yet, this commit also fixes that. for issue #82 by mattfbacon, thanks for reporting! --- webmail/api.go | 34 +++++++++++++++++++++++----------- webmail/message.go | 45 +++++++++++++++++++++++++++++++++++++++------ webmail/view.go | 2 +- webmail/webmail.go | 38 +++++++++++--------------------------- 4 files changed, 74 insertions(+), 45 deletions(-) diff --git a/webmail/api.go b/webmail/api.go index cb38ebf..47a8c5c 100644 --- a/webmail/api.go +++ b/webmail/api.go @@ -524,9 +524,11 @@ func (w Webmail) MessageSubmit(ctx context.Context, m SubmitMessage) { header("Content-Type", fmt.Sprintf(`multipart/mixed; boundary="%s"`, mp.Boundary())) line(xmsgw) + ct := mime.FormatMediaType("text/plain", map[string]string{"charset": charset}) textHdr := textproto.MIMEHeader{} - textHdr.Set("Content-Type", "text/plain; charset="+escapeParam(charset)) + textHdr.Set("Content-Type", ct) textHdr.Set("Content-Transfer-Encoding", cte) + textp, err := mp.CreatePart(textHdr) xcheckf(ctx, err, "adding text part to message") _, err = textp.Write([]byte(text)) @@ -534,13 +536,11 @@ func (w Webmail) MessageSubmit(ctx context.Context, m SubmitMessage) { xaddPart := func(ct, filename string) io.Writer { ahdr := textproto.MIMEHeader{} - if ct == "" { - ct = "application/octet-stream" - } - ct += fmt.Sprintf(`; name="%s"`, filename) + cd := mime.FormatMediaType("attachment", map[string]string{"filename": filename}) + ahdr.Set("Content-Type", ct) ahdr.Set("Content-Transfer-Encoding", "base64") - ahdr.Set("Content-Disposition", fmt.Sprintf(`attachment; filename=%s`, escapeParam(filename))) + ahdr.Set("Content-Disposition", cd) ap, err := mp.CreatePart(ahdr) xcheckf(ctx, err, "adding attachment part to message") return ap @@ -587,12 +587,21 @@ func (w Webmail) MessageSubmit(ctx context.Context, m SubmitMessage) { } ct := strings.TrimSuffix(t[0], "base64") ct = strings.TrimSuffix(ct, ";") + if ct == "" { + ct = "application/octet-stream" + } + filename := a.Filename + if filename == "" { + filename = "unnamed.bin" + } + params := map[string]string{"name": filename} + ct = mime.FormatMediaType(ct, params) // Ensure base64 is valid, then we'll write the original string. _, err := io.Copy(io.Discard, base64.NewDecoder(base64.StdEncoding, strings.NewReader(t[1]))) xcheckuserf(ctx, err, "parsing attachment as base64") - xaddAttachmentBase64(ct, a.Filename, []byte(t[1])) + xaddAttachmentBase64(ct, filename, []byte(t[1])) } if len(m.ForwardAttachments.Paths) > 0 { @@ -617,14 +626,16 @@ func (w Webmail) MessageSubmit(ctx context.Context, m SubmitMessage) { ap = ap.Parts[xp] } - filename := ap.ContentTypeParams["name"] + filename := tryDecodeParam(log, ap.ContentTypeParams["name"]) if filename == "" { filename = "unnamed.bin" } - ct := strings.ToLower(ap.MediaType + "/" + ap.MediaSubType) + params := map[string]string{"name": filename} if pcharset := ap.ContentTypeParams["charset"]; pcharset != "" { - ct += "; charset=" + escapeParam(pcharset) + params["charset"] = pcharset } + ct := strings.ToLower(ap.MediaType + "/" + ap.MediaSubType) + ct = mime.FormatMediaType(ct, params) xaddAttachment(ct, filename, ap.Reader()) } }) @@ -634,7 +645,8 @@ func (w Webmail) MessageSubmit(ctx context.Context, m SubmitMessage) { err = mp.Close() xcheckf(ctx, err, "writing mime multipart") } else { - header("Content-Type", "text/plain; charset="+escapeParam(charset)) + ct := mime.FormatMediaType("text/plain", map[string]string{"charset": charset}) + header("Content-Type", ct) header("Content-Transfer-Encoding", cte) line(xmsgw) xmsgw.Write([]byte(text)) diff --git a/webmail/message.go b/webmail/message.go index b089a3f..139a4d9 100644 --- a/webmail/message.go +++ b/webmail/message.go @@ -12,12 +12,46 @@ import ( "github.com/mjl-/mox/message" "github.com/mjl-/mox/mlog" "github.com/mjl-/mox/moxio" + "github.com/mjl-/mox/moxvar" "github.com/mjl-/mox/smtp" "github.com/mjl-/mox/store" ) // todo: we should have all needed information for messageItem in store.Message (perhaps some data in message.Part) for fast access, not having to parse the on-disk message file. +// Attempt q/b-word-decode name, coming from Content-Type "name" field or +// Content-Disposition "filename" field. +// +// RFC 2231 specify an encoding for non-ascii values in mime header parameters. But +// it appears common practice to instead just q/b-word encode the values. +// Thunderbird and gmail.com do this for the Content-Type "name" parameter. +// gmail.com also does that for the Content-Disposition "filename" parameter, where +// Thunderbird uses the RFC 2231-defined encoding. Go's mime.ParseMediaType parses +// the mechanism specified in RFC 2231 only. The value for "name" we get here would +// already be decoded properly for standards-compliant headers, like +// "filename*0*=UTF-8”%...; filename*1*=%.... We'll look for Q/B-word encoding +// markers ("=?"-prefix or "?="-suffix) and try to decode if present. This would +// only cause trouble for filenames having this prefix/suffix. +func tryDecodeParam(log *mlog.Log, name string) string { + if name == "" || !strings.HasPrefix(name, "=?") && !strings.HasSuffix(name, "?=") { + return name + } + // todo: find where this is allowed. it seems quite common. perhaps we should remove the pedantic check? + if moxvar.Pedantic { + log.Debug("attachment contains rfc2047 q/b-word-encoded mime parameter instead of rfc2231-encoded", mlog.Field("name", name)) + return name + } + dec := mime.WordDecoder{} + s, err := dec.DecodeHeader(name) + if err != nil { + log.Debugx("q/b-word decoding mime parameter", err, mlog.Field("name", name)) + return name + } + return s +} + +// todo: mime.FormatMediaType does not wrap long lines. should do it ourselves, and split header into several parts (if commonly supported). + func messageItem(log *mlog.Log, m store.Message, state *msgState) (MessageItem, error) { pm, err := parsedMessage(log, m, state, false, true) if err != nil { @@ -212,10 +246,9 @@ func parsedMessage(log *mlog.Log, m store.Message, state *msgState, full, msgite disp, params, err := mime.ParseMediaType(cp) log.Check(err, "parsing content-disposition", mlog.Field("cp", cp)) if strings.EqualFold(disp, "attachment") { - // todo: should we be decoding these names? i've seen messages with regular q-word style mime-encoding, not the one specified in ../rfc/2231:210 - name := p.ContentTypeParams["name"] + name := tryDecodeParam(log, p.ContentTypeParams["name"]) if name == "" { - name = params["filename"] + name = tryDecodeParam(log, params["filename"]) } pm.attachments = append(pm.attachments, Attachment{path, name, p}) return @@ -285,8 +318,8 @@ func parsedMessage(log *mlog.Log, m store.Message, state *msgState, full, msgite return } - name, ok := p.ContentTypeParams["name"] - if !ok && (full || msgitem) { + name := tryDecodeParam(log, p.ContentTypeParams["name"]) + if name == "" && (full || msgitem) { // todo: should have this, and perhaps all content-* headers, preparsed in message.Part? h, err := p.Header() log.Check(err, "parsing attachment headers", mlog.Field("msgid", m.ID)) @@ -294,7 +327,7 @@ func parsedMessage(log *mlog.Log, m store.Message, state *msgState, full, msgite if cp != "" { _, params, err := mime.ParseMediaType(cp) log.Check(err, "parsing content-disposition", mlog.Field("cp", cp)) - name = params["filename"] + name = tryDecodeParam(log, params["filename"]) } } pm.attachments = append(pm.attachments, Attachment{path, name, p}) diff --git a/webmail/view.go b/webmail/view.go index e81938f..148dd37 100644 --- a/webmail/view.go +++ b/webmail/view.go @@ -1786,7 +1786,7 @@ func attachmentTypes(log *mlog.Log, m store.Message, state *msgState) (map[Attac mt := strings.ToLower(a.Part.MediaType + "/" + a.Part.MediaSubType) if t, ok := attachmentMimetypes[mt]; ok { types[t] = true - } else if ext := filepath.Ext(a.Part.ContentTypeParams["name"]); ext != "" { + } else if ext := filepath.Ext(tryDecodeParam(log, a.Part.ContentTypeParams["name"])); ext != "" { if t, ok := attachmentExtensions[strings.ToLower(ext)]; ok { types[t] = true } else { diff --git a/webmail/webmail.go b/webmail/webmail.go index d564963..c00b4ef 100644 --- a/webmail/webmail.go +++ b/webmail/webmail.go @@ -317,25 +317,6 @@ func serveContentFallback(log *mlog.Log, w http.ResponseWriter, r *http.Request, http.ServeContent(w, r, "", fallbackMtime(log), bytes.NewReader(fallback)) } -// Escape mime content header parameter, such as content-type charset or -// content-disposition filename. -func escapeParam(s string) string { - // todo: follow ../rfc/2183? - - basic := len(s) > 0 - for _, c := range s { - if c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '-' || c == '_' || c == '.' { - continue - } - basic = false - break - } - if basic { - return s - } - return `"` + strings.NewReplacer(`\`, `\\`, `"`, `\"`).Replace(s) + `"` -} - // Handler returns a handler for the webmail endpoints, customized for the max // message size coming from the listener. func Handler(maxMessageSize int64) func(w http.ResponseWriter, r *http.Request) { @@ -593,19 +574,20 @@ func handle(apiHandler http.Handler, w http.ResponseWriter, r *http.Request) { subjectSlug = s } filename := fmt.Sprintf("email-%d-attachments-%s%s.zip", m.ID, m.Received.Format("20060102-150405"), subjectSlug) - h.Set("Content-Disposition", fmt.Sprintf(`attachment; filename=%s`, escapeParam(filename))) + cd := mime.FormatMediaType("attachment", map[string]string{"filename": filename}) + h.Set("Content-Disposition", cd) zw := zip.NewWriter(w) names := map[string]bool{} for _, a := range mi.Attachments { ap := a.Part - name := ap.ContentTypeParams["name"] + name := tryDecodeParam(log, ap.ContentTypeParams["name"]) if name == "" { // We don't check errors, this is all best-effort. h, _ := ap.Header() disposition := h.Get("Content-Disposition") _, params, _ := mime.ParseMediaType(disposition) - name = params["filename"] + name = tryDecodeParam(log, params["filename"]) } if name != "" { name = filepath.Base(name) @@ -697,10 +679,11 @@ func handle(apiHandler http.Handler, w http.ResponseWriter, r *http.Request) { // not, there is not much we could do better... headers(false, false, false) ct := "text/plain" + params := map[string]string{} if charset := p.ContentTypeParams["charset"]; charset != "" { - ct += fmt.Sprintf("; charset=%s", escapeParam(charset)) + params["charset"] = charset } - h.Set("Content-Type", ct) + h.Set("Content-Type", mime.FormatMediaType(ct, params)) h.Set("Cache-Control", "no-cache, max-age=0") _, err := io.Copy(w, &moxio.AtReader{R: msgr}) @@ -892,18 +875,19 @@ func handle(apiHandler http.Handler, w http.ResponseWriter, r *http.Request) { h.Set("Content-Type", ct) h.Set("Cache-Control", "no-cache, max-age=0") if t[1] == "download" { - name := ap.ContentTypeParams["name"] + name := tryDecodeParam(log, ap.ContentTypeParams["name"]) if name == "" { // We don't check errors, this is all best-effort. h, _ := ap.Header() disposition := h.Get("Content-Disposition") _, params, _ := mime.ParseMediaType(disposition) - name = params["filename"] + name = tryDecodeParam(log, params["filename"]) } if name == "" { name = "attachment.bin" } - h.Set("Content-Disposition", fmt.Sprintf(`attachment; filename=%s`, escapeParam(name))) + cd := mime.FormatMediaType("attachment", map[string]string{"filename": name}) + h.Set("Content-Disposition", cd) } _, err := io.Copy(w, ap.Reader())