implement outgoing tls reports

we were already accepting, processing and displaying incoming tls reports. now
we start tracking TLS connection and security-policy-related errors for
outgoing message deliveries as well. we send reports once a day, to the
reporting addresses specified in TLSRPT records (rua) of a policy domain. these
reports are about MTA-STS policies and/or DANE policies, and about
STARTTLS-related failures.

sending reports is enabled by default, but can be disabled through setting
NoOutgoingTLSReports in mox.conf.

only at the end of the implementation process came the realization that the
TLSRPT policy domain for DANE (MX) hosts are separate from the TLSRPT policy
for the recipient domain, and that MTA-STS and DANE TLS/policy results are
typically delivered in separate reports. so MX hosts need their own TLSRPT
policies.

config for the per-host TLSRPT policy should be added to mox.conf for existing
installs, in field HostTLSRPT. it is automatically configured by quickstart for
new installs. with a HostTLSRPT config, the "dns records" and "dns check" admin
pages now suggest the per-host TLSRPT record. by creating that record, you're
requesting TLS reports about your MX host.

gathering all the TLS/policy results is somewhat tricky. the tentacles go
throughout the code. the positive result is that the TLS/policy-related code
had to be cleaned up a bit. for example, the smtpclient TLS modes now reflect
reality better, with independent settings about whether PKIX and/or DANE
verification has to be done, and/or whether verification errors have to be
ignored (e.g. for tls-required: no header). also, cached mtasts policies of
mode "none" are now cleaned up once the MTA-STS DNS record goes away.
This commit is contained in:
Mechiel Lukkien
2023-11-09 17:40:46 +01:00
parent df18ca3c02
commit 893a6f8911
58 changed files with 3246 additions and 504 deletions

View File

@ -1,194 +1,50 @@
// Package tlsrptdb stores reports from "SMTP TLS Reporting" in its database.
package tlsrptdb
import (
"context"
"fmt"
"os"
"path/filepath"
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/mjl-/bstore"
"github.com/mjl-/mox/dns"
"github.com/mjl-/mox/mlog"
"github.com/mjl-/mox/mox-"
"github.com/mjl-/mox/tlsrpt"
)
var (
xlog = mlog.New("tlsrptdb")
DBTypes = []any{TLSReportRecord{}}
DB *bstore.DB
mutex sync.Mutex
ReportDBTypes = []any{TLSReportRecord{}}
ReportDB *bstore.DB
mutex sync.Mutex
metricSession = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "mox_tlsrptdb_session_total",
Help: "Number of sessions, both success and known result types.",
},
[]string{"type"}, // Known result types, and "success"
)
knownResultTypes = map[tlsrpt.ResultType]struct{}{
tlsrpt.ResultSTARTTLSNotSupported: {},
tlsrpt.ResultCertificateHostMismatch: {},
tlsrpt.ResultCertificateExpired: {},
tlsrpt.ResultTLSAInvalid: {},
tlsrpt.ResultDNSSECInvalid: {},
tlsrpt.ResultDANERequired: {},
tlsrpt.ResultCertificateNotTrusted: {},
tlsrpt.ResultSTSPolicyInvalid: {},
tlsrpt.ResultSTSWebPKIInvalid: {},
tlsrpt.ResultValidationFailure: {},
tlsrpt.ResultSTSPolicyFetch: {},
}
// Accessed directly by tlsrptsend.
ResultDBTypes = []any{TLSResult{}}
ResultDB *bstore.DB
)
// TLSReportRecord is a TLS report as a database record, including information
// about the sender.
//
// todo: should be named just Record, but it would cause a sherpa type name conflict.
type TLSReportRecord struct {
ID int64 `bstore:"typename Record"`
Domain string `bstore:"index"` // Domain to which the TLS report applies.
FromDomain string
MailFrom string
Report tlsrpt.Report
}
func database(ctx context.Context) (rdb *bstore.DB, rerr error) {
mutex.Lock()
defer mutex.Unlock()
if DB == nil {
p := mox.DataDirPath("tlsrpt.db")
os.MkdirAll(filepath.Dir(p), 0770)
db, err := bstore.Open(ctx, p, &bstore.Options{Timeout: 5 * time.Second, Perm: 0660}, DBTypes...)
if err != nil {
return nil, err
}
DB = db
}
return DB, nil
}
// Init opens and possibly initializes the database.
// Init opens and possibly initializes the databases.
func Init() error {
_, err := database(mox.Shutdown)
return err
}
// Close closes the database connection.
func Close() {
mutex.Lock()
defer mutex.Unlock()
if DB != nil {
err := DB.Close()
xlog.Check(err, "closing database")
DB = nil
}
}
// AddReport adds a TLS report to the database.
//
// The report should have come in over SMTP, with a DKIM-validated
// verifiedFromDomain. Using HTTPS for reports is not recommended as there is no
// authentication on the reports origin.
//
// The report is currently required to only cover a single domain in its policy
// domain. Only reports for known domains are added to the database.
//
// Prometheus metrics are updated only for configured domains.
func AddReport(ctx context.Context, verifiedFromDomain dns.Domain, mailFrom string, r *tlsrpt.Report) error {
log := xlog.WithContext(ctx)
db, err := database(ctx)
if err != nil {
if _, err := reportDB(mox.Shutdown); err != nil {
return err
}
if len(r.Policies) == 0 {
return fmt.Errorf("no policies in report")
if _, err := resultDB(mox.Shutdown); err != nil {
return err
}
var reportdom, zerodom dns.Domain
record := TLSReportRecord{0, "", verifiedFromDomain.Name(), mailFrom, *r}
for _, p := range r.Policies {
pp := p.Policy
// Check domain, they must all be the same for now (in future, with DANE, this may
// no longer apply).
d, err := dns.ParseDomain(pp.Domain)
if err != nil {
log.Errorx("invalid domain in tls report", err, mlog.Field("domain", pp.Domain), mlog.Field("mailfrom", mailFrom))
continue
}
if _, ok := mox.Conf.Domain(d); !ok {
log.Info("unknown domain in tls report, not storing", mlog.Field("domain", d), mlog.Field("mailfrom", mailFrom))
return fmt.Errorf("unknown domain")
}
if reportdom != zerodom && d != reportdom {
return fmt.Errorf("multiple domains in report %s and %s", reportdom, d)
}
reportdom = d
metricSession.WithLabelValues("success").Add(float64(p.Summary.TotalSuccessfulSessionCount))
for _, f := range p.FailureDetails {
var result string
if _, ok := knownResultTypes[f.ResultType]; ok {
result = string(f.ResultType)
} else {
result = "other"
}
metricSession.WithLabelValues(result).Add(float64(f.FailedSessionCount))
}
}
record.Domain = reportdom.Name()
return db.Insert(ctx, &record)
return nil
}
// Records returns all TLS reports in the database.
func Records(ctx context.Context) ([]TLSReportRecord, error) {
db, err := database(ctx)
if err != nil {
return nil, err
// Close closes the database connections.
func Close() {
if ResultDB != nil {
err := ResultDB.Close()
xlog.Check(err, "closing result database")
ResultDB = nil
}
mutex.Lock()
defer mutex.Unlock()
if ReportDB != nil {
err := ReportDB.Close()
xlog.Check(err, "closing report database")
ReportDB = nil
}
return bstore.QueryDB[TLSReportRecord](ctx, db).List()
}
// RecordID returns the report for the ID.
func RecordID(ctx context.Context, id int64) (TLSReportRecord, error) {
db, err := database(ctx)
if err != nil {
return TLSReportRecord{}, err
}
e := TLSReportRecord{ID: id}
err = db.Get(ctx, &e)
return e, err
}
// RecordsPeriodDomain returns the reports overlapping start and end, for the given
// domain. If domain is empty, all records match for domain.
func RecordsPeriodDomain(ctx context.Context, start, end time.Time, domain string) ([]TLSReportRecord, error) {
db, err := database(ctx)
if err != nil {
return nil, err
}
q := bstore.QueryDB[TLSReportRecord](ctx, db)
if domain != "" {
q.FilterNonzero(TLSReportRecord{Domain: domain})
}
q.FilterFn(func(r TLSReportRecord) bool {
dr := r.Report.DateRange
return !dr.Start.Before(start) && dr.Start.Before(end) || dr.End.After(start) && !dr.End.After(end)
})
return q.List()
}

171
tlsrptdb/report.go Normal file
View File

@ -0,0 +1,171 @@
// Package tlsrptdb stores reports from "SMTP TLS Reporting" in its database.
package tlsrptdb
import (
"context"
"fmt"
"os"
"path/filepath"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/mjl-/bstore"
"github.com/mjl-/mox/dns"
"github.com/mjl-/mox/mlog"
"github.com/mjl-/mox/mox-"
"github.com/mjl-/mox/tlsrpt"
)
var (
metricSession = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "mox_tlsrptdb_session_total",
Help: "Number of sessions, both success and known result types.",
},
[]string{"type"}, // Known result types, and "success"
)
knownResultTypes = map[tlsrpt.ResultType]struct{}{
tlsrpt.ResultSTARTTLSNotSupported: {},
tlsrpt.ResultCertificateHostMismatch: {},
tlsrpt.ResultCertificateExpired: {},
tlsrpt.ResultTLSAInvalid: {},
tlsrpt.ResultDNSSECInvalid: {},
tlsrpt.ResultDANERequired: {},
tlsrpt.ResultCertificateNotTrusted: {},
tlsrpt.ResultSTSPolicyInvalid: {},
tlsrpt.ResultSTSWebPKIInvalid: {},
tlsrpt.ResultValidationFailure: {},
tlsrpt.ResultSTSPolicyFetch: {},
}
)
// TLSReportRecord is a TLS report as a database record, including information
// about the sender.
//
// todo: should be named just Record, but it would cause a sherpa type name conflict.
type TLSReportRecord struct {
ID int64 `bstore:"typename Record"`
Domain string `bstore:"index"` // Domain to which the TLS report applies.
FromDomain string
MailFrom string
HostReport bool // Report for host TLSRPT record, as opposed to domain TLSRPT record.
Report tlsrpt.Report
}
func reportDB(ctx context.Context) (rdb *bstore.DB, rerr error) {
mutex.Lock()
defer mutex.Unlock()
if ReportDB == nil {
p := mox.DataDirPath("tlsrpt.db")
os.MkdirAll(filepath.Dir(p), 0770)
db, err := bstore.Open(ctx, p, &bstore.Options{Timeout: 5 * time.Second, Perm: 0660}, ReportDBTypes...)
if err != nil {
return nil, err
}
ReportDB = db
}
return ReportDB, nil
}
// AddReport adds a TLS report to the database.
//
// The report should have come in over SMTP, with a DKIM-validated
// verifiedFromDomain. Using HTTPS for reports is not recommended as there is no
// authentication on the reports origin.
//
// The report is currently required to only cover a single domain in its policy
// domain. Only reports for known domains are added to the database.
//
// Prometheus metrics are updated only for configured domains.
func AddReport(ctx context.Context, verifiedFromDomain dns.Domain, mailFrom string, hostReport bool, r *tlsrpt.Report) error {
log := xlog.WithContext(ctx)
db, err := reportDB(ctx)
if err != nil {
return err
}
if len(r.Policies) == 0 {
return fmt.Errorf("no policies in report")
}
var reportdom, zerodom dns.Domain
record := TLSReportRecord{0, "", verifiedFromDomain.Name(), mailFrom, hostReport, *r}
for _, p := range r.Policies {
pp := p.Policy
// Check domain, they must all be the same for now (in future, with DANE, this may
// no longer apply).
d, err := dns.ParseDomain(pp.Domain)
if err != nil {
log.Errorx("invalid domain in tls report", err, mlog.Field("domain", pp.Domain), mlog.Field("mailfrom", mailFrom))
continue
}
if _, ok := mox.Conf.Domain(d); !ok {
log.Info("unknown domain in tls report, not storing", mlog.Field("domain", d), mlog.Field("mailfrom", mailFrom))
return fmt.Errorf("unknown domain")
}
if reportdom != zerodom && d != reportdom {
return fmt.Errorf("multiple domains in report %s and %s", reportdom, d)
}
reportdom = d
metricSession.WithLabelValues("success").Add(float64(p.Summary.TotalSuccessfulSessionCount))
for _, f := range p.FailureDetails {
var result string
if _, ok := knownResultTypes[f.ResultType]; ok {
result = string(f.ResultType)
} else {
result = "other"
}
metricSession.WithLabelValues(result).Add(float64(f.FailedSessionCount))
}
}
record.Domain = reportdom.Name()
return db.Insert(ctx, &record)
}
// Records returns all TLS reports in the database.
func Records(ctx context.Context) ([]TLSReportRecord, error) {
db, err := reportDB(ctx)
if err != nil {
return nil, err
}
return bstore.QueryDB[TLSReportRecord](ctx, db).List()
}
// RecordID returns the report for the ID.
func RecordID(ctx context.Context, id int64) (TLSReportRecord, error) {
db, err := reportDB(ctx)
if err != nil {
return TLSReportRecord{}, err
}
e := TLSReportRecord{ID: id}
err = db.Get(ctx, &e)
return e, err
}
// RecordsPeriodDomain returns the reports overlapping start and end, for the given
// domain. If domain is empty, all records match for domain.
func RecordsPeriodDomain(ctx context.Context, start, end time.Time, domain string) ([]TLSReportRecord, error) {
db, err := reportDB(ctx)
if err != nil {
return nil, err
}
q := bstore.QueryDB[TLSReportRecord](ctx, db)
if domain != "" {
q.FilterNonzero(TLSReportRecord{Domain: domain})
}
q.FilterFn(func(r TLSReportRecord) bool {
dr := r.Report.DateRange
return !dr.Start.Before(start) && dr.Start.Before(end) || dr.End.After(start) && !dr.End.After(end)
})
return q.List()
}

View File

@ -73,6 +73,7 @@ func TestReport(t *testing.T) {
dbpath := mox.DataDirPath("tlsrpt.db")
os.MkdirAll(filepath.Dir(dbpath), 0770)
defer os.Remove(dbpath)
defer os.Remove(mox.DataDirPath("tlsrptresult.db"))
if err := Init(); err != nil {
t.Fatalf("init database: %s", err)
@ -93,7 +94,7 @@ func TestReport(t *testing.T) {
if err != nil {
t.Fatalf("parsing TLSRPT from message %q: %s", file.Name(), err)
}
if err := AddReport(ctxbg, dns.Domain{ASCII: "mox.example"}, "tlsrpt@mox.example", report); err != nil {
if err := AddReport(ctxbg, dns.Domain{ASCII: "mox.example"}, "tlsrpt@mox.example", false, report); err != nil {
t.Fatalf("adding report to database: %s", err)
}
}
@ -101,7 +102,7 @@ func TestReport(t *testing.T) {
report, err := tlsrpt.Parse(strings.NewReader(reportJSON))
if err != nil {
t.Fatalf("parsing report: %v", err)
} else if err := AddReport(ctxbg, dns.Domain{ASCII: "company-y.example"}, "tlsrpt@company-y.example", report); err != nil {
} else if err := AddReport(ctxbg, dns.Domain{ASCII: "company-y.example"}, "tlsrpt@company-y.example", false, report); err != nil {
t.Fatalf("adding report to database: %s", err)
}

161
tlsrptdb/result.go Normal file
View File

@ -0,0 +1,161 @@
package tlsrptdb
import (
"context"
"fmt"
"os"
"path/filepath"
"time"
"github.com/mjl-/bstore"
"github.com/mjl-/mox/dns"
"github.com/mjl-/mox/mox-"
"github.com/mjl-/mox/tlsrpt"
)
// TLSResult is stored in the database to track TLS results per policy domain, day
// and recipient domain. These records will be included in TLS reports.
type TLSResult struct {
ID int64
// Domain with TLSRPT DNS record, with addresses that will receive reports. Either
// a recipient domain (for MTA-STS policies) or an (MX) host (for DANE policies).
// Unicode.
PolicyDomain string `bstore:"unique PolicyDomain+DayUTC+RecipientDomain,nonzero"`
// DayUTC is of the form yyyymmdd.
DayUTC string `bstore:"nonzero"`
// We send per 24h UTC-aligned days. ../rfc/8460:474
// Reports are sent per policy domain. When delivering a message to a recipient
// domain, we can get multiple TLSResults, typically one for MTA-STS, and one or
// more for DANE (one for each MX target, or actually TLSA base domain). We track
// recipient domain so we can display successes/failures for delivery of messages
// to a recipient domain in the admin pages. Unicode.
RecipientDomain string `bstore:"index,nonzero"`
Created time.Time `bstore:"default now"`
Updated time.Time `bstore:"default now"`
IsHost bool // Result is for host (e.g. DANE), not recipient domain (e.g. MTA-STS).
// Whether to send a report. TLS results for delivering messages with TLS reports
// will be recorded, but will not cause a report to be sent.
SendReport bool
// ../rfc/8460:318 says we should not include TLS results for sending a TLS report,
// but presumably that's to prevent mail servers sending a report every day once
// they start.
// Results is updated for each TLS attempt.
Results []tlsrpt.Result
}
func resultDB(ctx context.Context) (rdb *bstore.DB, rerr error) {
mutex.Lock()
defer mutex.Unlock()
if ResultDB == nil {
p := mox.DataDirPath("tlsrptresult.db")
os.MkdirAll(filepath.Dir(p), 0770)
db, err := bstore.Open(ctx, p, &bstore.Options{Timeout: 5 * time.Second, Perm: 0660}, ResultDBTypes...)
if err != nil {
return nil, err
}
ResultDB = db
}
return ResultDB, nil
}
// AddTLSResults adds or merges all tls results for delivering to a policy domain,
// on its UTC day to a recipient domain to the database. Results may cause multiple
// separate reports to be sent.
func AddTLSResults(ctx context.Context, results []TLSResult) error {
db, err := resultDB(ctx)
if err != nil {
return err
}
now := time.Now()
err = db.Write(ctx, func(tx *bstore.Tx) error {
for _, result := range results {
// Ensure all slices are non-nil. We do this now so all readers will marshal to
// compliant with the JSON schema. And also for consistent equality checks when
// merging policies created in different places.
for i, r := range result.Results {
if r.Policy.String == nil {
r.Policy.String = []string{}
}
if r.Policy.MXHost == nil {
r.Policy.MXHost = []string{}
}
if r.FailureDetails == nil {
r.FailureDetails = []tlsrpt.FailureDetails{}
}
result.Results[i] = r
}
q := bstore.QueryTx[TLSResult](tx)
q.FilterNonzero(TLSResult{PolicyDomain: result.PolicyDomain, DayUTC: result.DayUTC, RecipientDomain: result.RecipientDomain})
r, err := q.Get()
if err == bstore.ErrAbsent {
result.ID = 0
if err := tx.Insert(&result); err != nil {
return fmt.Errorf("insert: %w", err)
}
continue
} else if err != nil {
return err
}
report := tlsrpt.Report{Policies: r.Results}
report.Merge(result.Results...)
r.Results = report.Policies
r.IsHost = result.IsHost
if result.SendReport {
r.SendReport = true
}
r.Updated = now
if err := tx.Update(&r); err != nil {
return fmt.Errorf("update: %w", err)
}
}
return nil
})
return err
}
// Results returns all TLS results in the database, for all policy domains each
// with potentially multiple days. Sorted by RecipientDomain and day.
func Results(ctx context.Context) ([]TLSResult, error) {
db, err := resultDB(ctx)
if err != nil {
return nil, err
}
return bstore.QueryDB[TLSResult](ctx, db).SortAsc("PolicyDomain", "DayUTC", "RecipientDomain").List()
}
// ResultsPolicyDomain returns all TLSResults for a policy domain, potentially for
// multiple days.
func ResultsPolicyDomain(ctx context.Context, policyDomain dns.Domain) ([]TLSResult, error) {
db, err := resultDB(ctx)
if err != nil {
return nil, err
}
return bstore.QueryDB[TLSResult](ctx, db).FilterNonzero(TLSResult{PolicyDomain: policyDomain.Name()}).SortAsc("DayUTC", "RecipientDomain").List()
}
// RemoveResultsPolicyDomain removes all TLSResults for the policy domain on the
// day from the database.
func RemoveResultsPolicyDomain(ctx context.Context, policyDomain dns.Domain, dayUTC string) error {
db, err := resultDB(ctx)
if err != nil {
return err
}
_, err = bstore.QueryDB[TLSResult](ctx, db).FilterNonzero(TLSResult{PolicyDomain: policyDomain.Name(), DayUTC: dayUTC}).Delete()
return err
}