mirror of
https://github.com/mjl-/mox.git
synced 2025-07-15 13:24:38 +03:00
do not use results from junk filter if we have less than 50 positive classifications to base the decision on
useful for new accounts. we don't want to start rejecting incoming messages for having a score near 0.5 because of too little training material. we err on the side of allowing messages in. the user will mark them as junk, training the filter. once enough non-junk has come in, we'll start the actual filtering. for issue #64 by x8x, and i've also seen this concern on matrix
This commit is contained in:
@ -230,7 +230,6 @@ test email
|
||||
`, "\n", "\r\n")
|
||||
|
||||
ts.run(func(err error, client *smtpclient.Client) {
|
||||
t.Helper()
|
||||
mailFrom := "mjl@mox.example"
|
||||
rcptTo := []string{"private@mox.example", "móx@mox.example"}
|
||||
if err == nil {
|
||||
@ -239,11 +238,10 @@ test email
|
||||
}
|
||||
ts.smtpErr(err, nil)
|
||||
|
||||
ts.checkCount("Inbox", 0) // Not receiving for mjl@ due to msgfrom, and not móx@ due to rcpt to.
|
||||
ts.checkCount("Inbox", 1) // Receiving once. For explicit móx@ recipient, not for mjl@ due to msgfrom, and another again for móx@ due to rcpt to.
|
||||
})
|
||||
|
||||
ts.run(func(err error, client *smtpclient.Client) {
|
||||
t.Helper()
|
||||
mailFrom := "mjl@mox.example"
|
||||
rcptTo := "private@mox.example"
|
||||
if err == nil {
|
||||
@ -251,7 +249,7 @@ test email
|
||||
}
|
||||
ts.smtpErr(err, nil)
|
||||
|
||||
ts.checkCount("Inbox", 1) // Only receiving for móx@mox.example, not mjl@.
|
||||
ts.checkCount("Inbox", 2) // Only receiving 1 new message compared to previous, for móx@mox.example, not mjl@.
|
||||
})
|
||||
|
||||
msg = strings.ReplaceAll(`From: <private@mox.example>
|
||||
|
@ -528,7 +528,7 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
||||
err := f.Close()
|
||||
log.Check(err, "closing junkfilter")
|
||||
}()
|
||||
contentProb, _, hams, spams, err := f.ClassifyMessageReader(ctx, store.FileMsgReader(d.m.MsgPrefix, d.dataFile), d.m.Size)
|
||||
result, err := f.ClassifyMessageReader(ctx, store.FileMsgReader(d.m.MsgPrefix, d.dataFile), d.m.Size)
|
||||
if err != nil {
|
||||
log.Errorx("testing for spam", err)
|
||||
addReasonText("classify message error: %v", err)
|
||||
@ -587,11 +587,12 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
||||
reason = reasonJunkContentStrict
|
||||
thresholdRemark = " (stricter due to recipient address not in to/cc header)"
|
||||
}
|
||||
accept = contentProb <= threshold
|
||||
junkSubjectpass = contentProb < threshold-0.2
|
||||
accept = result.Probability <= threshold || (!result.Significant && !suspiciousIPrevFail)
|
||||
junkSubjectpass = result.Probability < threshold-0.2
|
||||
log.Info("content analyzed",
|
||||
slog.Bool("accept", accept),
|
||||
slog.Float64("contentprob", contentProb),
|
||||
slog.Float64("contentprob", result.Probability),
|
||||
slog.Bool("contentsignificant", result.Significant),
|
||||
slog.Bool("subjectpass", junkSubjectpass))
|
||||
|
||||
s := "content: "
|
||||
@ -600,9 +601,12 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
||||
} else {
|
||||
s += "junk"
|
||||
}
|
||||
s += fmt.Sprintf(", spamscore %.2f, threshold %.2f%s", contentProb, threshold, thresholdRemark)
|
||||
if !result.Significant {
|
||||
s += " (not significant)"
|
||||
}
|
||||
s += fmt.Sprintf(", spamscore %.2f, threshold %.2f%s", result.Probability, threshold, thresholdRemark)
|
||||
s += " (ham words: "
|
||||
for i, w := range hams {
|
||||
for i, w := range result.Hams {
|
||||
if i > 0 {
|
||||
s += ", "
|
||||
}
|
||||
@ -613,7 +617,7 @@ func analyze(ctx context.Context, log mlog.Log, resolver dns.Resolver, d deliver
|
||||
s += fmt.Sprintf("%s %.3f", word, w.Score)
|
||||
}
|
||||
s += "), (spam words: "
|
||||
for i, w := range spams {
|
||||
for i, w := range result.Spams {
|
||||
if i > 0 {
|
||||
s += ", "
|
||||
}
|
||||
|
@ -670,6 +670,8 @@ func TestSpam(t *testing.T) {
|
||||
for i := 0; i < 3; i++ {
|
||||
nm := m
|
||||
tinsertmsg(t, ts.acc, "Inbox", &nm, deliverMessage)
|
||||
nm = m
|
||||
tinsertmsg(t, ts.acc, "mjl2", &nm, deliverMessage)
|
||||
}
|
||||
|
||||
// Delivery from sender with bad reputation should fail.
|
||||
@ -922,16 +924,22 @@ func TestDMARCSent(t *testing.T) {
|
||||
// Update DNS for an SPF pass, and DMARC pass.
|
||||
resolver.TXT["example.org."] = []string{"v=spf1 ip4:127.0.0.10 -all"}
|
||||
|
||||
// Insert spammy messages not related to the test message.
|
||||
// Insert hammy & spammy messages not related to the test message.
|
||||
m := store.Message{
|
||||
MailFrom: "remote@test.example",
|
||||
RcptToLocalpart: smtp.Localpart("mjl"),
|
||||
RcptToDomain: "mox.example",
|
||||
Flags: store.Flags{Seen: true, Junk: true},
|
||||
Flags: store.Flags{Seen: true},
|
||||
Size: int64(len(deliverMessage)),
|
||||
}
|
||||
for i := 0; i < 3; i++ {
|
||||
// We need at least 50 ham messages for the junk filter to become significant. We
|
||||
// offset it with negative messages for mediocre score.
|
||||
for i := 0; i < 50; i++ {
|
||||
nm := m
|
||||
nm.Junk = true
|
||||
tinsertmsg(t, ts.acc, "Archive", &nm, deliverMessage)
|
||||
nm = m
|
||||
nm.Notjunk = true
|
||||
tinsertmsg(t, ts.acc, "Archive", &nm, deliverMessage)
|
||||
}
|
||||
tretrain(t, ts.acc)
|
||||
|
Reference in New Issue
Block a user