smtpserver: add prometheus metric for failing starttls handshakes for incoming deliveries

and add an alerting rule if the failure rate becomes >10% (e.g. expired
certificate).

the prometheus metrics includes a reason, including potential tls alerts, if
remote smtp clients would send those (openssl s_client -starttls does).

inspired by issue #237, where incoming connections were aborted by remote. such
errors would show up as "eof" in the metrics.
This commit is contained in:
Mechiel Lukkien
2024-11-29 12:43:21 +01:00
parent 09e7ddba9e
commit afb182cb14
5 changed files with 63 additions and 5 deletions

View File

@ -10,7 +10,8 @@ import (
"strings"
)
func formatAlert(alert uint8) string {
// FormatAlert formats a TLS alert in the form "alert-<num>" or "alert-<num>-<shortcode>".
func FormatAlert(alert uint8) string {
s := fmt.Sprintf("alert-%d", alert)
err := tls.AlertError(alert) // Since go1.21.0
// crypto/tls returns messages like "tls: short message" or "tls: alert(321)".

View File

@ -8,6 +8,7 @@ import (
"fmt"
)
func formatAlert(alert uint8) string {
// FormatAlert formats a TLS alert in the form "alert-<num>".
func FormatAlert(alert uint8) string {
return fmt.Sprintf("alert-%d", alert)
}

View File

@ -394,7 +394,7 @@ func TLSFailureDetails(err error) (ResultType, string) {
// todo: ideally, crypto/tls would let us check if this is an alert. it could be another uint8-typed error.
v := reflect.ValueOf(netErr.Err)
if v.Kind() == reflect.Uint8 && v.Type().Name() == "alert" {
reasonCode = "tls-remote-" + formatAlert(uint8(v.Uint()))
reasonCode = "tls-remote-" + FormatAlert(uint8(v.Uint()))
}
}
return ResultValidationFailure, reasonCode
@ -429,7 +429,7 @@ func TLSFailureDetails(err error) (ResultType, string) {
}
v := reflect.ValueOf(err)
if v.Kind() == reflect.Uint8 && v.Type().Name() == "alert" {
reasonCode = "tls-local-" + formatAlert(uint8(v.Uint()))
reasonCode = "tls-local-" + FormatAlert(uint8(v.Uint()))
}
}
return ResultValidationFailure, reasonCode