do not use results from junk filter if we have less than 50 positive classifications to base the decision on

useful for new accounts. we don't want to start rejecting incoming messages for
having a score near 0.5 because of too little training material. we err on the
side of allowing messages in. the user will mark them as junk, training the
filter. once enough non-junk has come in, we'll start the actual filtering.

for issue #64 by x8x, and i've also seen this concern on matrix
This commit is contained in:
Mechiel Lukkien
2025-01-23 22:55:50 +01:00
parent 8fac9f862b
commit 6aa2139a54
6 changed files with 93 additions and 68 deletions

View File

@ -78,16 +78,16 @@ func TestFilter(t *testing.T) {
return
}
prob, _, _, _, err := f.ClassifyMessagePath(ctxbg, filepath.Join(hamdir, hamfiles[0]))
result, err := f.ClassifyMessagePath(ctxbg, filepath.Join(hamdir, hamfiles[0]))
tcheck(t, err, "classify ham message")
if prob > 0.1 {
t.Fatalf("trained ham file has prob %v, expected <= 0.1", prob)
if result.Probability > 0.1 {
t.Fatalf("trained ham file has prob %v, expected <= 0.1", result.Probability)
}
prob, _, _, _, err = f.ClassifyMessagePath(ctxbg, filepath.Join(spamdir, spamfiles[0]))
result, err = f.ClassifyMessagePath(ctxbg, filepath.Join(spamdir, spamfiles[0]))
tcheck(t, err, "classify spam message")
if prob < 0.9 {
t.Fatalf("trained spam file has prob %v, expected > 0.9", prob)
if result.Probability < 0.9 {
t.Fatalf("trained spam file has prob %v, expected > 0.9", result.Probability)
}
err = f.Close()
@ -145,18 +145,18 @@ func TestFilter(t *testing.T) {
// Classify and verify.
_, err = hamf.Seek(0, 0)
tcheck(t, err, "seek ham message")
prob, _, _, _, err = f.ClassifyMessageReader(ctxbg, hamf, hamsize)
result, err = f.ClassifyMessageReader(ctxbg, hamf, hamsize)
tcheck(t, err, "classify ham")
if prob > 0.1 {
t.Fatalf("got prob %v, expected <= 0.1", prob)
if result.Probability > 0.1 {
t.Fatalf("got prob %v, expected <= 0.1", result.Probability)
}
_, err = spamf.Seek(0, 0)
tcheck(t, err, "seek spam message")
prob, _, _, _, err = f.ClassifyMessageReader(ctxbg, spamf, spamsize)
result, err = f.ClassifyMessageReader(ctxbg, spamf, spamsize)
tcheck(t, err, "classify spam")
if prob < 0.9 {
t.Fatalf("got prob %v, expected >= 0.9", prob)
if result.Probability < 0.9 {
t.Fatalf("got prob %v, expected >= 0.9", result.Probability)
}
// Untrain ham & spam.
@ -185,18 +185,18 @@ func TestFilter(t *testing.T) {
// Classify again, should be unknown.
_, err = hamf.Seek(0, 0)
tcheck(t, err, "seek ham message")
prob, _, _, _, err = f.ClassifyMessageReader(ctxbg, hamf, hamsize)
result, err = f.ClassifyMessageReader(ctxbg, hamf, hamsize)
tcheck(t, err, "classify ham")
if math.Abs(prob-0.5) > 0.1 {
t.Fatalf("got prob %v, expected 0.5 +-0.1", prob)
if math.Abs(result.Probability-0.5) > 0.1 {
t.Fatalf("got prob %v, expected 0.5 +-0.1", result.Probability)
}
_, err = spamf.Seek(0, 0)
tcheck(t, err, "seek spam message")
prob, _, _, _, err = f.ClassifyMessageReader(ctxbg, spamf, spamsize)
result, err = f.ClassifyMessageReader(ctxbg, spamf, spamsize)
tcheck(t, err, "classify spam")
if math.Abs(prob-0.5) > 0.1 {
t.Fatalf("got prob %v, expected 0.5 +-0.1", prob)
if math.Abs(result.Probability-0.5) > 0.1 {
t.Fatalf("got prob %v, expected 0.5 +-0.1", result.Probability)
}
err = f.Close()