add funtionality to import zip/tgz with maildirs/mboxes to account page

so users can easily take their email out of somewhere else, and import it into mox.

this goes a little way to give feedback as the import progresses: upload
progress is shown (surprisingly, browsers aren't doing this...), imported
mailboxes/messages are counted (batched) and import issues/warnings are
displayed, all sent over an SSE connection. an import token is stored in
sessionstorage. if you reload the page (e.g. after a connection error), the
browser will reconnect to the running import and show its progress again. and
you can just abort the import before it is finished and committed, and nothing
will have changed.

this also imports flags/keywords from mbox files.
This commit is contained in:
Mechiel Lukkien
2023-02-16 09:57:27 +01:00
parent 23b530ae36
commit 5336032088
32 changed files with 1968 additions and 518 deletions

View File

@ -574,7 +574,7 @@ func (a *Account) WithRLock(fn func()) {
// Must be called with account rlock or wlock.
//
// Caller must broadcast new message.
func (a *Account) DeliverX(log *mlog.Log, tx *bstore.Tx, m *Message, msgFile *os.File, consumeFile, isSent, sync bool) {
func (a *Account) DeliverX(log *mlog.Log, tx *bstore.Tx, m *Message, msgFile *os.File, consumeFile, isSent, sync, notrain bool) {
mb := Mailbox{ID: m.MailboxID}
err := tx.Get(&mb)
xcheckf(err, "get mailbox")
@ -671,6 +671,10 @@ func (a *Account) DeliverX(log *mlog.Log, tx *bstore.Tx, m *Message, msgFile *os
xcheckf(err, "sync directory")
}
if notrain && m.NeedsTraining() {
// If this ever happens, hopefully we'll get bug reports about it.
log.Error("deliver of message that unexpectedly needs training", mlog.Field("messageid", m.ID), mlog.Field("trainedjunk", m.TrainedJunk), mlog.Field("flags", m.Flags))
}
l := []Message{*m}
err = a.RetrainMessages(log, tx, l, false)
xcheckf(err, "training junkfilter")
@ -787,14 +791,14 @@ func (a *Account) Subjectpass(email string) (key string, err error) {
// If subscribe is true, any mailboxes that were created will also be subscribed to.
// Caller must hold account wlock.
// Caller must propagate changes if any.
func (a *Account) MailboxEnsureX(tx *bstore.Tx, name string, subscribe bool) (mb Mailbox, changes []Change) {
func (a *Account) MailboxEnsure(tx *bstore.Tx, name string, subscribe bool) (mb Mailbox, changes []Change, rerr error) {
if norm.NFC.String(name) != name {
panic("mailbox name not normalized")
return Mailbox{}, nil, fmt.Errorf("mailbox name not normalized")
}
// Quick sanity check.
if strings.EqualFold(name, "inbox") && name != "Inbox" {
panic("bad casing for inbox")
return Mailbox{}, nil, fmt.Errorf("bad casing for inbox")
}
elems := strings.Split(name, "/")
@ -803,7 +807,9 @@ func (a *Account) MailboxEnsureX(tx *bstore.Tx, name string, subscribe bool) (mb
return mb.Name == elems[0] || strings.HasPrefix(mb.Name, elems[0]+"/")
})
l, err := q.List()
xcheckf(err, "list mailboxes")
if err != nil {
return Mailbox{}, nil, fmt.Errorf("list mailboxes: %v", err)
}
mailboxes := map[string]Mailbox{}
for _, xmb := range l {
@ -822,24 +828,39 @@ func (a *Account) MailboxEnsureX(tx *bstore.Tx, name string, subscribe bool) (mb
continue
}
uidval, err := a.NextUIDValidity(tx)
xcheckf(err, "next uid validity")
if err != nil {
return Mailbox{}, nil, fmt.Errorf("next uid validity: %v", err)
}
mb = Mailbox{
Name: p,
UIDValidity: uidval,
UIDNext: 1,
}
err = tx.Insert(&mb)
xcheckf(err, "creating new mailbox")
if err != nil {
return Mailbox{}, nil, fmt.Errorf("creating new mailbox: %v", err)
}
change := ChangeAddMailbox{Name: p}
if subscribe {
err := tx.Insert(&Subscription{p})
if err != nil && !errors.Is(err, bstore.ErrUnique) {
xcheckf(err, "subscribing to mailbox")
return Mailbox{}, nil, fmt.Errorf("subscribing to mailbox: %v", err)
}
change.Flags = []string{`\Subscribed`}
}
changes = append(changes, ChangeAddMailbox{Name: p, Flags: []string{`\Subscribed`}})
changes = append(changes, change)
}
return
return mb, changes, nil
}
// MailboxEnsureX calls MailboxEnsure, panicing with the error if it is not nil.
func (a *Account) MailboxEnsureX(tx *bstore.Tx, name string, subscribe bool) (Mailbox, []Change) {
mb, changes, err := a.MailboxEnsure(tx, name, subscribe)
if err != nil {
panic(err)
}
return mb, changes
}
// Check if mailbox exists.
@ -1008,7 +1029,7 @@ func (a *Account) DeliverMailbox(log *mlog.Log, mailbox string, m *Message, msgF
m.MailboxOrigID = mb.ID
changes = append(changes, chl...)
a.DeliverX(log, tx, m, msgFile, consumeFile, mb.Sent, true)
a.DeliverX(log, tx, m, msgFile, consumeFile, mb.Sent, true, false)
return nil
})
// todo: if rename succeeded but transaction failed, we should remove the file.

View File

@ -72,13 +72,13 @@ func TestMailbox(t *testing.T) {
tcheck(t, err, "sent mailbox")
msent.MailboxID = mbsent.ID
msent.MailboxOrigID = mbsent.ID
acc.DeliverX(xlog, tx, &msent, msgFile, false, true, true)
acc.DeliverX(xlog, tx, &msent, msgFile, false, true, true, false)
err = tx.Insert(&mbrejects)
tcheck(t, err, "insert rejects mailbox")
mreject.MailboxID = mbrejects.ID
mreject.MailboxOrigID = mbrejects.ID
acc.DeliverX(xlog, tx, &mreject, msgFile, false, false, true)
acc.DeliverX(xlog, tx, &mreject, msgFile, false, false, true, false)
return nil
})

410
store/import.go Normal file
View File

@ -0,0 +1,410 @@
package store
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/mjl-/mox/mlog"
)
// MsgSource is implemented by readers for mailbox file formats.
type MsgSource interface {
// Return next message, or io.EOF when there are no more.
Next() (*Message, *os.File, string, error)
}
// MboxReader reads messages from an mbox file, implementing MsgSource.
type MboxReader struct {
createTemp func(pattern string) (*os.File, error)
path string
line int
r *bufio.Reader
prevempty bool
nonfirst bool
log *mlog.Log
eof bool
fromLine string // "From "-line for this message.
header bool // Now in header section.
}
func NewMboxReader(createTemp func(pattern string) (*os.File, error), filename string, r io.Reader, log *mlog.Log) *MboxReader {
return &MboxReader{
createTemp: createTemp,
path: filename,
line: 1,
r: bufio.NewReader(r),
log: log,
}
}
// Position returns "<filename>:<lineno>" for the current position.
func (mr *MboxReader) Position() string {
return fmt.Sprintf("%s:%d", mr.path, mr.line)
}
// Next returns the next message read from the mbox file. The file is a temporary
// file and must be removed/consumed. The third return value is the position in the
// file.
func (mr *MboxReader) Next() (*Message, *os.File, string, error) {
if mr.eof {
return nil, nil, "", io.EOF
}
from := []byte("From ")
if !mr.nonfirst {
mr.header = true
// First read, we're at the beginning of the file.
line, err := mr.r.ReadBytes('\n')
if err == io.EOF {
return nil, nil, "", io.EOF
}
mr.line++
if !bytes.HasPrefix(line, from) {
return nil, nil, mr.Position(), fmt.Errorf(`first line does not start with "From "`)
}
mr.nonfirst = true
mr.fromLine = strings.TrimSpace(string(line))
}
f, err := mr.createTemp("mboxreader")
if err != nil {
return nil, nil, mr.Position(), err
}
defer func() {
if f != nil {
f.Close()
if err := os.Remove(f.Name()); err != nil {
mr.log.Errorx("removing temporary message file after mbox read error", err, mlog.Field("path", f.Name()))
}
}
}()
fromLine := mr.fromLine
bf := bufio.NewWriter(f)
var flags Flags
var size int64
for {
line, err := mr.r.ReadBytes('\n')
if err != nil && err != io.EOF {
return nil, nil, mr.Position(), fmt.Errorf("reading from mbox: %v", err)
}
if len(line) > 0 {
mr.line++
// We store data with crlf, adjust any imported messages with bare newlines.
if !bytes.HasSuffix(line, []byte("\r\n")) {
line = append(line[:len(line)-1], "\r\n"...)
}
if mr.header {
// See https://doc.dovecot.org/admin_manual/mailbox_formats/mbox/
if bytes.HasPrefix(line, []byte("Status:")) {
s := strings.TrimSpace(strings.SplitN(string(line), ":", 2)[1])
for _, c := range s {
switch c {
case 'R':
flags.Seen = true
}
}
} else if bytes.HasPrefix(line, []byte("X-Status:")) {
s := strings.TrimSpace(strings.SplitN(string(line), ":", 2)[1])
for _, c := range s {
switch c {
case 'A':
flags.Answered = true
case 'F':
flags.Flagged = true
case 'T':
flags.Draft = true
case 'D':
flags.Deleted = true
}
}
} else if bytes.HasPrefix(line, []byte("X-Keywords:")) {
s := strings.TrimSpace(strings.SplitN(string(line), ":", 2)[1])
for _, t := range strings.Split(s, ",") {
flagSet(&flags, strings.ToLower(strings.TrimSpace(t)))
}
}
}
if bytes.Equal(line, []byte("\r\n")) {
mr.header = false
}
// Next mail message starts at bare From word.
if mr.prevempty && bytes.HasPrefix(line, from) {
mr.fromLine = strings.TrimSpace(string(line))
mr.header = true
break
}
if bytes.HasPrefix(line, []byte(">")) && bytes.HasPrefix(bytes.TrimLeft(line, ">"), []byte("From ")) {
line = line[1:]
}
n, err := bf.Write(line)
if err != nil {
return nil, nil, mr.Position(), fmt.Errorf("writing message to file: %v", err)
}
size += int64(n)
mr.prevempty = bytes.Equal(line, []byte("\r\n"))
}
if err == io.EOF {
mr.eof = true
break
}
}
if err := bf.Flush(); err != nil {
return nil, nil, mr.Position(), fmt.Errorf("flush: %v", err)
}
m := &Message{Flags: flags, Size: size}
if t := strings.SplitN(fromLine, " ", 3); len(t) == 3 {
layouts := []string{time.ANSIC, time.UnixDate, time.RubyDate}
for _, l := range layouts {
t, err := time.Parse(l, t[2])
if err == nil {
m.Received = t
break
}
}
}
// Prevent cleanup by defer.
mf := f
f = nil
return m, mf, mr.Position(), nil
}
type MaildirReader struct {
createTemp func(pattern string) (*os.File, error)
newf, curf *os.File
f *os.File // File we are currently reading from. We first read newf, then curf.
dir string // Name of directory for f. Can be empty on first call.
entries []os.DirEntry
dovecotKeywords []string
log *mlog.Log
}
func NewMaildirReader(createTemp func(pattern string) (*os.File, error), newf, curf *os.File, log *mlog.Log) *MaildirReader {
mr := &MaildirReader{
createTemp: createTemp,
newf: newf,
curf: curf,
f: newf,
log: log,
}
// Best-effort parsing of dovecot keywords.
kf, err := os.Open(filepath.Join(filepath.Dir(newf.Name()), "dovecot-keywords"))
if err == nil {
mr.dovecotKeywords, err = ParseDovecotKeywords(kf, log)
log.Check(err, "parsing dovecot keywords file")
kf.Close()
}
return mr
}
func (mr *MaildirReader) Next() (*Message, *os.File, string, error) {
if mr.dir == "" {
mr.dir = mr.f.Name()
}
if len(mr.entries) == 0 {
var err error
mr.entries, err = mr.f.ReadDir(100)
if err != nil && err != io.EOF {
return nil, nil, "", err
}
if len(mr.entries) == 0 {
if mr.f == mr.curf {
return nil, nil, "", io.EOF
}
mr.f = mr.curf
mr.dir = ""
return mr.Next()
}
}
p := filepath.Join(mr.dir, mr.entries[0].Name())
mr.entries = mr.entries[1:]
sf, err := os.Open(p)
if err != nil {
return nil, nil, p, fmt.Errorf("open message in maildir: %s", err)
}
defer sf.Close()
f, err := mr.createTemp("maildirreader")
if err != nil {
return nil, nil, p, err
}
defer func() {
if f != nil {
f.Close()
if err := os.Remove(f.Name()); err != nil {
mr.log.Errorx("removing temporary message file after maildir read error", err, mlog.Field("path", f.Name()))
}
}
}()
// Copy data, changing bare \n into \r\n.
r := bufio.NewReader(sf)
w := bufio.NewWriter(f)
var size int64
for {
line, err := r.ReadBytes('\n')
if err != nil && err != io.EOF {
return nil, nil, p, fmt.Errorf("reading message: %v", err)
}
if len(line) > 0 {
if !bytes.HasSuffix(line, []byte("\r\n")) {
line = append(line[:len(line)-1], "\r\n"...)
}
if n, err := w.Write(line); err != nil {
return nil, nil, p, fmt.Errorf("writing message: %v", err)
} else {
size += int64(n)
}
}
if err == io.EOF {
break
}
}
if err := w.Flush(); err != nil {
return nil, nil, p, fmt.Errorf("writing message: %v", err)
}
// Take received time from filename.
var received time.Time
t := strings.SplitN(filepath.Base(sf.Name()), ".", 2)
if v, err := strconv.ParseInt(t[0], 10, 64); err == nil {
received = time.Unix(v, 0)
}
// Parse flags. See https://cr.yp.to/proto/maildir.html.
flags := Flags{}
t = strings.SplitN(filepath.Base(sf.Name()), ":2,", 2)
if len(t) == 2 {
for _, c := range t[1] {
switch c {
case 'P':
// Passed, doesn't map to a common IMAP flag.
case 'R':
flags.Answered = true
case 'S':
flags.Seen = true
case 'T':
flags.Deleted = true
case 'D':
flags.Draft = true
case 'F':
flags.Flagged = true
default:
if c >= 'a' && c <= 'z' {
index := int(c - 'a')
if index >= len(mr.dovecotKeywords) {
continue
}
kw := mr.dovecotKeywords[index]
switch kw {
case "$Forwarded", "Forwarded":
flags.Forwarded = true
case "$Junk", "Junk":
flags.Junk = true
case "$NotJunk", "NotJunk", "NonJunk":
flags.Notjunk = true
case "$MDNSent":
flags.MDNSent = true
case "$Phishing", "Phishing":
flags.Phishing = true
}
// todo: custom labels, e.g. $label1, JunkRecorded?
}
}
}
}
m := &Message{Received: received, Flags: flags, Size: size}
// Prevent cleanup by defer.
mf := f
f = nil
return m, mf, p, nil
}
func ParseDovecotKeywords(r io.Reader, log *mlog.Log) ([]string, error) {
/*
If the dovecot-keywords file is present, we parse its additional flags, see
https://doc.dovecot.org/admin_manual/mailbox_formats/maildir/
0 Old
1 Junk
2 NonJunk
3 $Forwarded
4 $Junk
*/
keywords := make([]string, 26)
end := 0
scanner := bufio.NewScanner(r)
var errs []string
for scanner.Scan() {
s := scanner.Text()
t := strings.SplitN(s, " ", 2)
if len(t) != 2 {
errs = append(errs, fmt.Sprintf("unexpected dovecot keyword line: %q", s))
continue
}
v, err := strconv.ParseInt(t[0], 10, 32)
if err != nil {
errs = append(errs, fmt.Sprintf("unexpected dovecot keyword index: %q", s))
continue
}
if v < 0 || v >= int64(len(keywords)) {
errs = append(errs, fmt.Sprintf("dovecot keyword index too big: %q", s))
continue
}
index := int(v)
if keywords[index] != "" {
errs = append(errs, fmt.Sprintf("duplicate dovecot keyword: %q", s))
continue
}
keywords[index] = t[1]
if index >= end {
end = index + 1
}
}
if err := scanner.Err(); err != nil {
errs = append(errs, fmt.Sprintf("reading dovecot keywords file: %v", err))
}
var err error
if len(errs) > 0 {
err = errors.New(strings.Join(errs, "; "))
}
return keywords[:end], err
}
func flagSet(flags *Flags, word string) {
switch word {
case "forwarded", "$forwarded":
flags.Forwarded = true
case "junk", "$junk":
flags.Junk = true
case "notjunk", "$notjunk", "nonjunk", "$nonjunk":
flags.Notjunk = true
case "phishing", "$phishing":
flags.Phishing = true
case "mdnsent", "$mdnsent":
flags.MDNSent = true
}
}

98
store/import_test.go Normal file
View File

@ -0,0 +1,98 @@
package store
import (
"io"
"os"
"strings"
"testing"
"github.com/mjl-/mox/mlog"
)
func TestMboxReader(t *testing.T) {
createTemp := func(pattern string) (*os.File, error) {
return os.CreateTemp("", pattern)
}
mboxf, err := os.Open("../testdata/importtest.mbox")
if err != nil {
t.Fatalf("open mbox: %v", err)
}
defer mboxf.Close()
mr := NewMboxReader(createTemp, mboxf.Name(), mboxf, mlog.New("mboxreader"))
_, mf0, _, err := mr.Next()
if err != nil {
t.Fatalf("next mbox message: %v", err)
}
defer mf0.Close()
defer os.Remove(mf0.Name())
_, mf1, _, err := mr.Next()
if err != nil {
t.Fatalf("next mbox message: %v", err)
}
defer mf1.Close()
defer os.Remove(mf1.Name())
_, _, _, err = mr.Next()
if err != io.EOF {
t.Fatalf("got err %v, expected eof for next mbox message", err)
}
}
func TestMaildirReader(t *testing.T) {
createTemp := func(pattern string) (*os.File, error) {
return os.CreateTemp("", pattern)
}
// todo: rename 1642966915.1.mox to "1642966915.1.mox:2,"? cannot have that name in the git repo because go module (or the proxy) doesn't like it. could also add some flags and test they survive the import.
newf, err := os.Open("../testdata/importtest.maildir/new")
if err != nil {
t.Fatalf("open maildir new: %v", err)
}
defer newf.Close()
curf, err := os.Open("../testdata/importtest.maildir/cur")
if err != nil {
t.Fatalf("open maildir cur: %v", err)
}
defer curf.Close()
mr := NewMaildirReader(createTemp, newf, curf, mlog.New("maildirreader"))
_, mf0, _, err := mr.Next()
if err != nil {
t.Fatalf("next maildir message: %v", err)
}
defer mf0.Close()
defer os.Remove(mf0.Name())
_, mf1, _, err := mr.Next()
if err != nil {
t.Fatalf("next maildir message: %v", err)
}
defer mf1.Close()
defer os.Remove(mf1.Name())
_, _, _, err = mr.Next()
if err != io.EOF {
t.Fatalf("got err %v, expected eof for next maildir message", err)
}
}
func TestParseDovecotKeywords(t *testing.T) {
const data = `0 Old
1 Junk
2 NonJunk
3 $Forwarded
4 $Junk
`
keywords, err := ParseDovecotKeywords(strings.NewReader(data), mlog.New("dovecotkeywords"))
if err != nil {
t.Fatalf("parsing dovecot-keywords: %v", err)
}
got := strings.Join(keywords, ",")
want := "Old,Junk,NonJunk,$Forwarded,$Junk"
if got != want {
t.Fatalf("parsing dovecot keywords, got %q, want %q", got, want)
}
}