mox/imapserver/search.go
Mechiel Lukkien cc5e3165ea
imapserver: implement "inprogress" response code (RFC 9585) for keepalive during long search
For long searches in big mailboxes, without any matches, we would previously
keep working and not say anything. Clients could interpret this silence as a
broken connection at some point. We now send a "we're still searching" untagged
OK responses with code INPROGRESS every 10 seconds while we're still searching,
to prevent the client from closing the connection. We also send how many
messages we've processed, and usually also how many we need to process in grand
total. Clients can use this to show a progress bar.
2025-03-30 10:43:02 +02:00

630 lines
17 KiB
Go

package imapserver
import (
"fmt"
"log/slog"
"net/textproto"
"strings"
"time"
"github.com/mjl-/bstore"
"github.com/mjl-/mox/message"
"github.com/mjl-/mox/store"
"slices"
)
// If last search output was this long ago, we write an untagged inprogress
// response. Changed during tests. ../rfc/9585:109
var inProgressPeriod = time.Duration(10 * time.Second)
// Search returns messages matching criteria specified in parameters.
//
// State: Selected
func (c *conn) cmdxSearch(isUID bool, tag, cmd string, p *parser) {
// Command: ../rfc/9051:3716 ../rfc/4731:31 ../rfc/4466:354 ../rfc/3501:2723
// Examples: ../rfc/9051:3986 ../rfc/4731:153 ../rfc/3501:2975
// Syntax: ../rfc/9051:6918 ../rfc/4466:611 ../rfc/3501:4954
// We will respond with ESEARCH instead of SEARCH if "RETURN" is present or for IMAP4rev2.
var eargs map[string]bool // Options except SAVE. Nil means old-style SEARCH response.
var save bool // For SAVE option. Kept separately for easier handling of MIN/MAX later.
// IMAP4rev2 always returns ESEARCH, even with absent RETURN.
if c.enabled[capIMAP4rev2] {
eargs = map[string]bool{}
}
// ../rfc/9051:6967
if p.take(" RETURN (") {
eargs = map[string]bool{}
for !p.take(")") {
if len(eargs) > 0 || save {
p.xspace()
}
if w, ok := p.takelist("MIN", "MAX", "ALL", "COUNT", "SAVE"); ok {
if w == "SAVE" {
save = true
} else {
eargs[w] = true
}
} else {
// ../rfc/4466:378 ../rfc/9051:3745
xsyntaxErrorf("ESEARCH result option %q not supported", w)
}
}
}
// ../rfc/4731:149 ../rfc/9051:3737
if eargs != nil && len(eargs) == 0 && !save {
eargs["ALL"] = true
}
// If UTF8=ACCEPT is enabled, we should not accept any charset. We are a bit more
// relaxed (reasonable?) and still allow US-ASCII and UTF-8. ../rfc/6855:198
if p.take(" CHARSET ") {
charset := strings.ToUpper(p.xastring())
if charset != "US-ASCII" && charset != "UTF-8" {
// ../rfc/3501:2771 ../rfc/9051:3836
xusercodeErrorf("BADCHARSET", "only US-ASCII and UTF-8 supported")
}
}
p.xspace()
sk := &searchKey{
searchKeys: []searchKey{*p.xsearchKey()},
}
for !p.empty() {
p.xspace()
sk.searchKeys = append(sk.searchKeys, *p.xsearchKey())
}
// Even in case of error, we ensure search result is changed.
if save {
c.searchResult = []store.UID{}
}
// We gather word and not-word searches from the top-level, turn them
// into a WordSearch for a more efficient search.
// todo optimize: also gather them out of AND searches.
var textWords, textNotWords, bodyWords, bodyNotWords []string
n := 0
for _, xsk := range sk.searchKeys {
switch xsk.op {
case "BODY":
bodyWords = append(bodyWords, xsk.astring)
continue
case "TEXT":
textWords = append(textWords, xsk.astring)
continue
case "NOT":
switch xsk.searchKey.op {
case "BODY":
bodyNotWords = append(bodyNotWords, xsk.searchKey.astring)
continue
case "TEXT":
textNotWords = append(textNotWords, xsk.searchKey.astring)
continue
}
}
sk.searchKeys[n] = xsk
n++
}
// We may be left with an empty but non-nil sk.searchKeys, which is important for
// matching.
sk.searchKeys = sk.searchKeys[:n]
var bodySearch, textSearch *store.WordSearch
if len(bodyWords) > 0 || len(bodyNotWords) > 0 {
ws := store.PrepareWordSearch(bodyWords, bodyNotWords)
bodySearch = &ws
}
if len(textWords) > 0 || len(textNotWords) > 0 {
ws := store.PrepareWordSearch(textWords, textNotWords)
textSearch = &ws
}
// Note: we only hold the account rlock for verifying the mailbox at the start.
c.account.RLock()
runlock := c.account.RUnlock
// Note: in a defer because we replace it below.
defer func() {
runlock()
}()
// If we only have a MIN and/or MAX, we can stop processing as soon as we
// have those matches.
var min, max int
if eargs["MIN"] {
min = 1
}
if eargs["MAX"] {
max = 1
}
var expungeIssued bool
var maxModSeq store.ModSeq
// We periodically send an untagged OK with INPROGRESS code while searching, to let
// clients doing slow searches know we're still working.
inProgressLast := time.Now()
// Only respond with tag if it can't be confused as end of response code. ../rfc/9585:122
inProgressTag := "nil"
if !strings.Contains(tag, "]") {
inProgressTag = dquote(tag).pack(c)
}
var uids []store.UID
c.xdbread(func(tx *bstore.Tx) {
c.xmailboxID(tx, c.mailboxID) // Validate.
runlock()
runlock = func() {}
// Normal forward search when we don't have MAX only. We only send an "inprogress"
// goal if we know how many messages we have to check.
forward := eargs == nil || max == 0 || len(eargs) != 1
reverse := max == 1 && (len(eargs) == 1 || min+max == len(eargs))
goal := "nil"
if len(c.uids) > 0 && forward != reverse {
goal = fmt.Sprintf("%d", len(c.uids))
}
var lastIndex = -1
if forward {
for i, uid := range c.uids {
lastIndex = i
if time.Since(inProgressLast) > inProgressPeriod {
c.writelinef("* OK [INPROGRESS (%s %d %s)] still searching", inProgressTag, i, goal)
inProgressLast = time.Now()
}
if match, modseq := c.searchMatch(tx, msgseq(i+1), uid, *sk, bodySearch, textSearch, &expungeIssued); match {
uids = append(uids, uid)
if modseq > maxModSeq {
maxModSeq = modseq
}
if min == 1 && min+max == len(eargs) {
break
}
}
}
}
// And reverse search for MAX if we have only MAX or MAX combined with MIN.
if reverse {
for i := len(c.uids) - 1; i > lastIndex; i-- {
if time.Since(inProgressLast) > inProgressPeriod {
c.writelinef("* OK [INPROGRESS (%s %d %s)] still searching", inProgressTag, len(c.uids)-1-i, goal)
inProgressLast = time.Now()
}
if match, modseq := c.searchMatch(tx, msgseq(i+1), c.uids[i], *sk, bodySearch, textSearch, &expungeIssued); match {
uids = append(uids, c.uids[i])
if modseq > maxModSeq {
maxModSeq = modseq
}
break
}
}
}
})
if eargs == nil {
// In IMAP4rev1, an untagged SEARCH response is required. ../rfc/3501:2728
if len(uids) == 0 {
c.bwritelinef("* SEARCH")
}
// Old-style SEARCH response. We must spell out each number. So we may be splitting
// into multiple responses. ../rfc/9051:6809 ../rfc/3501:4833
for len(uids) > 0 {
n := len(uids)
if n > 100 {
n = 100
}
s := ""
for _, v := range uids[:n] {
if !isUID {
v = store.UID(c.xsequence(v))
}
s += " " + fmt.Sprintf("%d", v)
}
// Since we don't have the max modseq for the possibly partial uid range we're
// writing here within hand reach, we conveniently interpret the ambiguous "for all
// messages being returned" in ../rfc/7162:1107 as meaning over all lines that we
// write. And that clients only commit this value after they have seen the tagged
// end of the command. Appears to be recommended behaviour, ../rfc/7162:2323.
// ../rfc/7162:1077 ../rfc/7162:1101
var modseq string
if sk.hasModseq() {
// ../rfc/7162:2557
modseq = fmt.Sprintf(" (MODSEQ %d)", maxModSeq.Client())
}
c.bwritelinef("* SEARCH%s%s", s, modseq)
uids = uids[n:]
}
} else {
// New-style ESEARCH response syntax: ../rfc/9051:6546 ../rfc/4466:522
if save {
// ../rfc/9051:3784 ../rfc/5182:13
c.searchResult = uids
if sanityChecks {
checkUIDs(c.searchResult)
}
}
// No untagged ESEARCH response if nothing was requested. ../rfc/9051:4160
if len(eargs) > 0 {
// The tag was originally a string, became an astring in IMAP4rev2, better stick to
// string. ../rfc/4466:707 ../rfc/5259:1163 ../rfc/9051:7087
resp := fmt.Sprintf(`* ESEARCH (TAG "%s")`, tag)
if isUID {
resp += " UID"
}
// NOTE: we are converting UIDs to msgseq in the uids slice (if needed) while
// keeping the "uids" name!
if !isUID {
// If searchResult is hanging on to the slice, we need to work on a copy.
if save {
nuids := make([]store.UID, len(uids))
copy(nuids, uids)
uids = nuids
}
for i, uid := range uids {
uids[i] = store.UID(c.xsequence(uid))
}
}
// If no matches, then no MIN/MAX response. ../rfc/4731:98 ../rfc/9051:3758
if eargs["MIN"] && len(uids) > 0 {
resp += fmt.Sprintf(" MIN %d", uids[0])
}
if eargs["MAX"] && len(uids) > 0 {
resp += fmt.Sprintf(" MAX %d", uids[len(uids)-1])
}
if eargs["COUNT"] {
resp += fmt.Sprintf(" COUNT %d", len(uids))
}
if eargs["ALL"] && len(uids) > 0 {
resp += fmt.Sprintf(" ALL %s", compactUIDSet(uids).String())
}
// Interaction between ESEARCH and CONDSTORE: ../rfc/7162:1211 ../rfc/4731:273
// Summary: send the highest modseq of the returned messages.
if sk.hasModseq() && len(uids) > 0 {
resp += fmt.Sprintf(" MODSEQ %d", maxModSeq.Client())
}
c.bwritelinef("%s", resp)
}
}
if expungeIssued {
// ../rfc/9051:5102
c.writeresultf("%s OK [EXPUNGEISSUED] done", tag)
} else {
c.ok(tag, cmd)
}
}
type search struct {
c *conn
tx *bstore.Tx
seq msgseq
uid store.UID
mr *store.MsgReader
m store.Message
p *message.Part
expungeIssued *bool
hasModseq bool
}
func (c *conn) searchMatch(tx *bstore.Tx, seq msgseq, uid store.UID, sk searchKey, bodySearch, textSearch *store.WordSearch, expungeIssued *bool) (bool, store.ModSeq) {
s := search{c: c, tx: tx, seq: seq, uid: uid, expungeIssued: expungeIssued, hasModseq: sk.hasModseq()}
defer func() {
if s.mr != nil {
err := s.mr.Close()
c.xsanity(err, "closing messagereader")
s.mr = nil
}
}()
return s.match(sk, bodySearch, textSearch)
}
func (s *search) match(sk searchKey, bodySearch, textSearch *store.WordSearch) (match bool, modseq store.ModSeq) {
// Instead of littering all the cases in match0 with calls to get modseq, we do it once
// here in case of a match.
defer func() {
if match && s.hasModseq {
if s.m.ID == 0 {
match = s.xensureMessage()
}
modseq = s.m.ModSeq
}
}()
match = s.match0(sk)
if match && bodySearch != nil {
if !s.xensurePart() {
match = false
return
}
var err error
match, err = bodySearch.MatchPart(s.c.log, s.p, false)
xcheckf(err, "search words in bodies")
}
if match && textSearch != nil {
if !s.xensurePart() {
match = false
return
}
var err error
match, err = textSearch.MatchPart(s.c.log, s.p, true)
xcheckf(err, "search words in headers and bodies")
}
return
}
func (s *search) xensureMessage() bool {
if s.m.ID > 0 {
return true
}
q := bstore.QueryTx[store.Message](s.tx)
q.FilterNonzero(store.Message{MailboxID: s.c.mailboxID, UID: s.uid})
m, err := q.Get()
if err == bstore.ErrAbsent || err == nil && m.Expunged {
// ../rfc/2180:607
*s.expungeIssued = true
return false
}
xcheckf(err, "get message")
s.m = m
return true
}
// ensure message, reader and part are loaded. returns whether that was
// successful.
func (s *search) xensurePart() bool {
if s.mr != nil {
return s.p != nil
}
if !s.xensureMessage() {
return false
}
// Closed by searchMatch after all (recursive) search.match calls are finished.
s.mr = s.c.account.MessageReader(s.m)
if s.m.ParsedBuf == nil {
s.c.log.Error("missing parsed message")
return false
}
p, err := s.m.LoadPart(s.mr)
xcheckf(err, "load parsed message")
s.p = &p
return true
}
func (s *search) match0(sk searchKey) bool {
c := s.c
// Difference between sk.searchKeys nil and length 0 is important. Because we take
// out word/notword searches, the list may be empty but non-nil.
if sk.searchKeys != nil {
for _, ssk := range sk.searchKeys {
if !s.match0(ssk) {
return false
}
}
return true
} else if sk.seqSet != nil {
return sk.seqSet.containsSeq(s.seq, c.uids, c.searchResult)
}
filterHeader := func(field, value string) bool {
lower := strings.ToLower(value)
h, err := s.p.Header()
if err != nil {
c.log.Debugx("parsing message header", err, slog.Any("uid", s.uid))
return false
}
for _, v := range h.Values(field) {
if strings.Contains(strings.ToLower(v), lower) {
return true
}
}
return false
}
// We handle ops by groups that need increasing details about the message.
switch sk.op {
case "ALL":
return true
case "NEW":
// We do not implement the RECENT flag, so messages cannot be NEW.
return false
case "OLD":
// We treat all messages as non-recent, so this means all messages.
return true
case "RECENT":
// We do not implement the RECENT flag. All messages are not recent.
return false
case "NOT":
return !s.match0(*sk.searchKey)
case "OR":
return s.match0(*sk.searchKey) || s.match0(*sk.searchKey2)
case "UID":
return sk.uidSet.containsUID(s.uid, c.uids, c.searchResult)
}
// Parsed part.
if !s.xensurePart() {
return false
}
// Parsed message, basic info.
switch sk.op {
case "ANSWERED":
return s.m.Answered
case "DELETED":
return s.m.Deleted
case "FLAGGED":
return s.m.Flagged
case "KEYWORD":
kw := strings.ToLower(sk.atom)
switch kw {
case "$forwarded":
return s.m.Forwarded
case "$junk":
return s.m.Junk
case "$notjunk":
return s.m.Notjunk
case "$phishing":
return s.m.Phishing
case "$mdnsent":
return s.m.MDNSent
default:
return slices.Contains(s.m.Keywords, kw)
}
case "SEEN":
return s.m.Seen
case "UNANSWERED":
return !s.m.Answered
case "UNDELETED":
return !s.m.Deleted
case "UNFLAGGED":
return !s.m.Flagged
case "UNKEYWORD":
kw := strings.ToLower(sk.atom)
switch kw {
case "$forwarded":
return !s.m.Forwarded
case "$junk":
return !s.m.Junk
case "$notjunk":
return !s.m.Notjunk
case "$phishing":
return !s.m.Phishing
case "$mdnsent":
return !s.m.MDNSent
default:
return !slices.Contains(s.m.Keywords, kw)
}
case "UNSEEN":
return !s.m.Seen
case "DRAFT":
return s.m.Draft
case "UNDRAFT":
return !s.m.Draft
case "BEFORE", "ON", "SINCE":
skdt := sk.date.Format("2006-01-02")
rdt := s.m.Received.Format("2006-01-02")
switch sk.op {
case "BEFORE":
return rdt < skdt
case "ON":
return rdt == skdt
case "SINCE":
return rdt >= skdt
}
panic("missing case")
case "LARGER":
return s.m.Size > sk.number
case "SMALLER":
return s.m.Size < sk.number
case "MODSEQ":
// ../rfc/7162:1045
return s.m.ModSeq.Client() >= *sk.clientModseq
case "SAVEDBEFORE", "SAVEDON", "SAVEDSINCE":
// If we don't have a savedate for this message (for messages received before we
// implemented this feature), we use the "internal date" (received timestamp) of
// the message. ../rfc/8514:237
rt := s.m.Received
if s.m.SaveDate != nil {
rt = *s.m.SaveDate
}
skdt := sk.date.Format("2006-01-02")
rdt := rt.Format("2006-01-02")
switch sk.op {
case "SAVEDBEFORE":
return rdt < skdt
case "SAVEDON":
return rdt == skdt
case "SAVEDSINCE":
return rdt >= skdt
}
panic("missing case")
case "SAVEDATESUPPORTED":
// We return whether we have a savedate for this message. We support it on all
// mailboxes, but we only have this metadata from the time we implemented this
// feature.
return s.m.SaveDate != nil
case "OLDER":
// ../rfc/5032:76
seconds := int64(time.Since(s.m.Received) / time.Second)
return seconds >= sk.number
case "YOUNGER":
seconds := int64(time.Since(s.m.Received) / time.Second)
return seconds <= sk.number
}
if s.p == nil {
c.log.Info("missing parsed message, not matching", slog.Any("uid", s.uid))
return false
}
// Parsed message, more info.
switch sk.op {
case "BCC":
return filterHeader("Bcc", sk.astring)
case "BODY", "TEXT":
// We gathered word/notword searches from the top-level, but we can also get them
// nested.
// todo optimize: handle deeper nested word/not-word searches more efficiently.
headerToo := sk.op == "TEXT"
match, err := store.PrepareWordSearch([]string{sk.astring}, nil).MatchPart(s.c.log, s.p, headerToo)
xcheckf(err, "word search")
return match
case "CC":
return filterHeader("Cc", sk.astring)
case "FROM":
return filterHeader("From", sk.astring)
case "SUBJECT":
return filterHeader("Subject", sk.astring)
case "TO":
return filterHeader("To", sk.astring)
case "HEADER":
// ../rfc/9051:3895
lower := strings.ToLower(sk.astring)
h, err := s.p.Header()
if err != nil {
c.log.Errorx("parsing header for search", err, slog.Any("uid", s.uid))
return false
}
k := textproto.CanonicalMIMEHeaderKey(sk.headerField)
for _, v := range h.Values(k) {
if lower == "" || strings.Contains(strings.ToLower(v), lower) {
return true
}
}
return false
case "SENTBEFORE", "SENTON", "SENTSINCE":
if s.p.Envelope == nil || s.p.Envelope.Date.IsZero() {
return false
}
dt := s.p.Envelope.Date.Format("2006-01-02")
skdt := sk.date.Format("2006-01-02")
switch sk.op {
case "SENTBEFORE":
return dt < skdt
case "SENTON":
return dt == skdt
case "SENTSINCE":
return dt > skdt
}
panic("missing case")
}
panic(serverError{fmt.Errorf("missing case for search key op %q", sk.op)})
}