implement exporting of all mailboxes/messages as zip/tgz of mbox/maildir

This commit is contained in:
Mechiel Lukkien
2023-02-13 18:04:05 +01:00
parent ad51ffc365
commit 3de6642b3a
6 changed files with 547 additions and 7 deletions

383
store/export.go Normal file
View File

@ -0,0 +1,383 @@
package store
import (
"archive/tar"
"archive/zip"
"bufio"
"bytes"
"fmt"
"io"
"os"
"path/filepath"
"sort"
"time"
"github.com/mjl-/bstore"
"github.com/mjl-/mox/mlog"
)
// Archiver can archive multiple mailboxes and their messages.
type Archiver interface {
Create(name string, size int64, mtime time.Time) (io.Writer, error)
Close() error
}
// TarArchiver is an Archiver that writes to a tar ifle.
type TarArchiver struct {
*tar.Writer
}
// Create adds a file header to the tar file.
func (a TarArchiver) Create(name string, size int64, mtime time.Time) (io.Writer, error) {
hdr := tar.Header{
Name: name,
Size: size,
Mode: 0600,
ModTime: mtime,
Format: tar.FormatPAX,
}
if err := a.WriteHeader(&hdr); err != nil {
return nil, err
}
return a, nil
}
// ZipArchiver is an Archiver that writes to a zip file.
type ZipArchiver struct {
*zip.Writer
}
// Create adds a file header to the zip file.
func (a ZipArchiver) Create(name string, size int64, mtime time.Time) (io.Writer, error) {
hdr := zip.FileHeader{
Name: name,
Method: zip.Deflate,
Modified: mtime,
UncompressedSize64: uint64(size),
}
return a.CreateHeader(&hdr)
}
// ExportMessages writes messages to archiver. Either in maildir format, or otherwise in
// mbox. If mailboxOpt is empty, all mailboxes are exported, otherwise only the
// named mailbox.
//
// Some errors are not fatal and result in skipped messages. In that happens, a
// file "errors.txt" is added to the archive describing the errors. The goal is to
// let users export (hopefully) most messages even in the face of errors.
func (a *Account) ExportMessages(log *mlog.Log, archiver Archiver, maildir bool, mailboxOpt string) error {
// Start transaction without closure, we are going to close it early, but don't
// want to deal with declaring many variables now to be able to assign them in a
// closure and use them afterwards.
tx, err := a.DB.Begin(false)
if err != nil {
return fmt.Errorf("transaction: %v", err)
}
defer func() {
if tx != nil {
tx.Rollback()
}
}()
start := time.Now()
// Set up mailbox names and ids.
id2name := map[int64]string{}
name2id := map[string]int64{}
mailboxes, err := bstore.QueryTx[Mailbox](tx).List()
xcheckf(err, "query mailboxes")
for _, mb := range mailboxes {
id2name[mb.ID] = mb.Name
name2id[mb.Name] = mb.ID
}
var mailboxID int64
if mailboxOpt != "" {
var ok bool
mailboxID, ok = name2id[mailboxOpt]
if !ok {
return fmt.Errorf("mailbox not found")
}
}
var names []string
for _, name := range id2name {
if mailboxOpt != "" && name != mailboxOpt {
continue
}
names = append(names, name)
}
// We need to sort the names because maildirs can create subdirs. Ranging over
// id2name directly would randomize the directory names, we would create a sub
// maildir before the parent, and fail with "dir exists" when creating the parent
// dir.
sort.Slice(names, func(i, j int) bool {
return names[i] < names[j]
})
mailboxOrder := map[int64]int{}
for i, name := range names {
mbID := name2id[name]
mailboxOrder[mbID] = i
}
// Fetch all messages. This can take quite a bit of memory if the mailbox is large.
q := bstore.QueryTx[Message](tx)
if mailboxID > 0 {
q.FilterNonzero(Message{MailboxID: mailboxID})
}
msgs, err := q.List()
if err != nil {
return fmt.Errorf("listing messages: %v", err)
}
// Close transaction. We don't want to hold it for too long. We are now at risk
// that a message is be removed while we export, or flags changed. At least the
// size won't change. If we cannot open the message later on, we'll skip it and add
// an error message to an errors.txt file in the output archive.
if err := tx.Rollback(); err != nil {
return fmt.Errorf("closing transaction: %v", err)
}
tx = nil
// Order the messages by mailbox, received time and finally message ID.
sort.Slice(msgs, func(i, j int) bool {
iid := msgs[i].MailboxID
jid := msgs[j].MailboxID
if iid != jid {
return mailboxOrder[iid] < mailboxOrder[jid]
}
t := msgs[i].Received.Compare(msgs[j].Received)
if t != 0 {
return t < 0
}
return msgs[i].ID < msgs[j].ID
})
// We keep track of errors reading message files. We continue exporting and add an
// errors.txt file to the archive. In case of errors, the user can get (hopefully)
// most of their emails, and see something went wrong. For other errors, like
// writing to the archiver (e.g. a browser), we abort, because we don't want to
// continue with useless work.
var errors string
var curMailboxID int64 // Used to set curMailbox and finish a previous mbox file.
var curMailbox string
var mboxtmp *os.File
var mboxwriter *bufio.Writer
defer func() {
if mboxtmp != nil {
mboxtmp.Close()
}
}()
finishMbox := func() error {
if mboxtmp == nil {
return nil
}
if err := mboxwriter.Flush(); err != nil {
return fmt.Errorf("flush mbox writer: %v", err)
}
fi, err := mboxtmp.Stat()
if err != nil {
return fmt.Errorf("stat temporary mbox file: %v", err)
}
if _, err := mboxtmp.Seek(0, 0); err != nil {
return fmt.Errorf("seek to start of temporary mbox file")
}
w, err := archiver.Create(curMailbox+".mbox", fi.Size(), fi.ModTime())
if err != nil {
return fmt.Errorf("add mbox to archive: %v", err)
}
if _, err := io.Copy(w, mboxtmp); err != nil {
return fmt.Errorf("copying temp mbox file to archive: %v", err)
}
if err := mboxtmp.Close(); err != nil {
log.Errorx("closing temporary mbox file", err)
// Continue, not fatal.
}
mboxwriter = nil
mboxtmp = nil
return nil
}
exportMessage := func(m Message) error {
mp := a.MessagePath(m.ID)
var mr io.ReadCloser
if m.Size == int64(len(m.MsgPrefix)) {
mr = io.NopCloser(bytes.NewReader(m.MsgPrefix))
} else {
mpf, err := os.Open(mp)
if err != nil {
errors += fmt.Sprintf("open message file for id %d, path %s: %v (message skipped)\n", m.ID, mp, err)
return nil
}
defer mpf.Close()
st, err := mpf.Stat()
if err != nil {
errors += fmt.Sprintf("stat message file for id %d, path %s: %v (message skipped)\n", m.ID, mp, err)
return nil
}
size := st.Size() + int64(len(m.MsgPrefix))
if size != m.Size {
errors += fmt.Sprintf("message size mismatch for message id %d, database has %d, size is %d+%d=%d, using calculated size\n", m.ID, m.Size, len(m.MsgPrefix), st.Size(), size)
}
mr = FileMsgReader(m.MsgPrefix, mpf)
}
if maildir {
p := curMailbox
if m.Flags.Seen {
p = filepath.Join(p, "cur")
} else {
p = filepath.Join(p, "new")
}
name := fmt.Sprintf("%d.%d.mox:2,", m.Received.Unix(), m.ID)
// todo: more flags? forwarded, (non)junk, phishing, mdnsent would be nice. but what is the convention. dovecot-keywords sounds non-standard.
if m.Flags.Seen {
name += "S"
}
if m.Flags.Answered {
name += "R"
}
if m.Flags.Flagged {
name += "F"
}
if m.Flags.Draft {
name += "D"
}
p = filepath.Join(p, name)
// We store messages with \r\n, maildir needs without. But we need to know the
// final size. So first convert, then create file with size, and write from buffer.
// todo: for large messages, we should go through a temporary file instead of memory.
var dst bytes.Buffer
r := bufio.NewReader(mr)
for {
line, rerr := r.ReadBytes('\n')
if rerr != io.EOF && rerr != nil {
errors += fmt.Sprintf("reading from message for id %d: %v (message skipped)\n", m.ID, err)
return nil
}
if len(line) > 0 {
if bytes.HasSuffix(line, []byte("\r\n")) {
line = line[:len(line)-1]
line[len(line)-1] = '\n'
}
if _, err = dst.Write(line); err != nil {
return fmt.Errorf("writing message: %v", err)
}
}
if rerr == io.EOF {
break
}
}
size := int64(dst.Len())
w, err := archiver.Create(p, size, m.Received)
if err != nil {
return fmt.Errorf("adding message to archive: %v", err)
}
if _, err := io.Copy(w, &dst); err != nil {
return fmt.Errorf("copying message to archive: %v", err)
}
return nil
}
// todo: should we put status flags in Status or X-Status header inside the message?
// todo: should we do anything with Content-Length headers? changing the escaping could invalidate those. is anything checking that field?
mailfrom := "mox"
if m.MailFrom != "" {
mailfrom = m.MailFrom
}
if _, err := fmt.Fprintf(mboxwriter, "From %s %s\n", mailfrom, m.Received.Format(time.ANSIC)); err != nil {
return fmt.Errorf("write message line to mbox temp file: %v", err)
}
r := bufio.NewReader(mr)
for {
line, rerr := r.ReadBytes('\n')
if rerr != io.EOF && rerr != nil {
return fmt.Errorf("reading message: %v", err)
}
if len(line) > 0 {
if bytes.HasSuffix(line, []byte("\r\n")) {
line = line[:len(line)-1]
line[len(line)-1] = '\n'
}
if bytes.HasPrefix(bytes.TrimLeft(line, ">"), []byte("From ")) {
if _, err := fmt.Fprint(mboxwriter, ">"); err != nil {
return fmt.Errorf("writing escaping >: %v", err)
}
}
if _, err := mboxwriter.Write(line); err != nil {
return fmt.Errorf("writing line: %v", err)
}
}
if rerr == io.EOF {
break
}
}
if _, err := fmt.Fprint(mboxwriter, "\n"); err != nil {
return fmt.Errorf("writing end of message newline: %v", err)
}
return nil
}
for _, m := range msgs {
if m.MailboxID != curMailboxID {
if err := finishMbox(); err != nil {
return err
}
curMailbox = id2name[m.MailboxID]
curMailboxID = m.MailboxID
if maildir {
// Create the directories that show this is a maildir.
if _, err := archiver.Create(curMailbox+"/new/", 0, start); err != nil {
return fmt.Errorf("adding maildir new directory: %v", err)
}
if _, err := archiver.Create(curMailbox+"/cur/", 0, start); err != nil {
return fmt.Errorf("adding maildir cur directory: %v", err)
}
if _, err := archiver.Create(curMailbox+"/tmp/", 0, start); err != nil {
return fmt.Errorf("adding maildir tmp directory: %v", err)
}
} else {
mboxtmp, err = os.CreateTemp("", "mox-mail-export-mbox")
if err != nil {
return fmt.Errorf("creating temp mbox file: %v", err)
}
// Remove file immediately, so we are sure we don't leave it around.
if err := os.Remove(mboxtmp.Name()); err != nil {
return fmt.Errorf("removing temp file just created: %v", err)
}
mboxwriter = bufio.NewWriter(mboxtmp)
}
}
if err := exportMessage(m); err != nil {
return err
}
}
if err := finishMbox(); err != nil {
return err
}
if errors != "" {
w, err := archiver.Create("errors.txt", int64(len(errors)), time.Now())
if err != nil {
log.Errorx("adding errors.txt to archive", err)
return err
}
if _, err := w.Write([]byte(errors)); err != nil {
log.Errorx("writing errors.txt to archive", err)
return err
}
}
return nil
}

96
store/export_test.go Normal file
View File

@ -0,0 +1,96 @@
package store
import (
"archive/tar"
"archive/zip"
"bytes"
"io"
"os"
"testing"
"time"
"github.com/mjl-/mox/mlog"
"github.com/mjl-/mox/mox-"
)
func TestExport(t *testing.T) {
// Set up an account, add 2 messages to different 2 mailboxes. export as tar/zip
// and maildir/mbox. check there are 2 files in the repo, no errors.txt.
os.RemoveAll("../testdata/store/data")
mox.ConfigStaticPath = "../testdata/store/mox.conf"
mox.MustLoadConfig()
acc, err := OpenAccount("mjl")
tcheck(t, err, "open account")
defer acc.Close()
switchDone := Switchboard()
defer close(switchDone)
log := mlog.New("export")
msgFile, err := os.CreateTemp("", "mox-test-export")
tcheck(t, err, "create temp")
defer os.Remove(msgFile.Name()) // To be sure.
const msg = "test: test\r\n\r\ntest\r\n"
_, err = msgFile.Write([]byte(msg))
tcheck(t, err, "write message")
m := Message{Received: time.Now(), Size: int64(len(msg))}
err = acc.DeliverMailbox(xlog, "Inbox", &m, msgFile, false)
tcheck(t, err, "deliver")
m = Message{Received: time.Now(), Size: int64(len(msg))}
err = acc.DeliverMailbox(xlog, "Trash", &m, msgFile, true)
tcheck(t, err, "deliver")
var maildirZip, maildirTar, mboxZip, mboxTar bytes.Buffer
archive := func(archiver Archiver, maildir bool) {
t.Helper()
err = acc.ExportMessages(log, archiver, maildir, "")
tcheck(t, err, "export messages")
err = archiver.Close()
tcheck(t, err, "archiver close")
}
archive(ZipArchiver{zip.NewWriter(&maildirZip)}, true)
archive(ZipArchiver{zip.NewWriter(&mboxZip)}, false)
archive(TarArchiver{tar.NewWriter(&maildirTar)}, true)
archive(TarArchiver{tar.NewWriter(&mboxTar)}, false)
if r, err := zip.NewReader(bytes.NewReader(maildirZip.Bytes()), int64(maildirZip.Len())); err != nil {
t.Fatalf("reading maildir zip: %v", err)
} else if len(r.File) != 2*3+2 {
t.Fatalf("maildir zip, expected 2*3 dirs, and 2 files, got %d files", len(r.File))
}
if r, err := zip.NewReader(bytes.NewReader(mboxZip.Bytes()), int64(mboxZip.Len())); err != nil {
t.Fatalf("reading mbox zip: %v", err)
} else if len(r.File) != 2 {
t.Fatalf("maildir zip, 2 files, got %d files", len(r.File))
}
checkTarFiles := func(r io.Reader, n int) {
t.Helper()
tr := tar.NewReader(r)
have := 0
for {
h, err := tr.Next()
if err == io.EOF {
break
}
have++
if h.Name == "errors.txt" {
t.Fatalf("got errors.txt")
}
_, err = io.Copy(io.Discard, tr)
tcheck(t, err, "copy")
}
if n != have {
t.Fatalf("got %d files, expected %d", n, have)
}
}
checkTarFiles(&maildirTar, 2*3+2)
checkTarFiles(&mboxTar, 2)
}