mirror of
https://github.com/mjl-/mox.git
synced 2025-06-27 21:48:16 +03:00
add prometheus metrics for errors when getting certificates through acme (typically from let's encrypt)
and add an alerting rule for it. we certainly want a heads up when there are issues with the certificates.
This commit is contained in:
parent
1277d78cb1
commit
e5e15a3965
@ -42,6 +42,24 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
metricMissingServerName = promauto.NewCounter(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Name: "mox_autotls_missing_servername_total",
|
||||||
|
Help: "Number of failed TLS connection attempts with missing SNI where no fallback hostname was configured.",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
metricUnknownServerName = promauto.NewCounter(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Name: "mox_autotls_unknown_servername_total",
|
||||||
|
Help: "Number of failed TLS connection attempts with an unrecognized SNI name where no fallback hostname was configured.",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
metricCertRequestErrors = promauto.NewCounter(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Name: "mox_autotls_cert_request_errors_total",
|
||||||
|
Help: "Number of errors trying to retrieve a certificate for a hostname, possibly ACME verification errors.",
|
||||||
|
},
|
||||||
|
)
|
||||||
metricCertput = promauto.NewCounter(
|
metricCertput = promauto.NewCounter(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Name: "mox_autotls_certput_total",
|
Name: "mox_autotls_certput_total",
|
||||||
@ -171,7 +189,7 @@ func Load(name, acmeDir, contactEmail, directoryURL string, eabKeyID string, eab
|
|||||||
return a, nil
|
return a, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// logigngGetCertificate is a helper to implement crypto/tls.Config.GetCertificate,
|
// loggingGetCertificate is a helper to implement crypto/tls.Config.GetCertificate,
|
||||||
// optionally falling back to a certificate for fallbackHostname in case SNI is
|
// optionally falling back to a certificate for fallbackHostname in case SNI is
|
||||||
// absent or for an unknown hostname.
|
// absent or for an unknown hostname.
|
||||||
func (m *Manager) loggingGetCertificate(hello *tls.ClientHelloInfo, fallbackHostname dns.Domain, fallbackNoSNI, fallbackUnknownSNI bool) (*tls.Certificate, error) {
|
func (m *Manager) loggingGetCertificate(hello *tls.ClientHelloInfo, fallbackHostname dns.Domain, fallbackNoSNI, fallbackUnknownSNI bool) (*tls.Certificate, error) {
|
||||||
@ -188,6 +206,7 @@ func (m *Manager) loggingGetCertificate(hello *tls.ClientHelloInfo, fallbackHost
|
|||||||
|
|
||||||
// Handle missing SNI to prevent logging an error below.
|
// Handle missing SNI to prevent logging an error below.
|
||||||
if hello.ServerName == "" {
|
if hello.ServerName == "" {
|
||||||
|
metricMissingServerName.Inc()
|
||||||
log.Debug("tls request without sni servername, rejecting", slog.Any("localaddr", hello.Conn.LocalAddr()), slog.Any("supportedprotos", hello.SupportedProtos))
|
log.Debug("tls request without sni servername, rejecting", slog.Any("localaddr", hello.Conn.LocalAddr()), slog.Any("supportedprotos", hello.SupportedProtos))
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
@ -195,6 +214,7 @@ func (m *Manager) loggingGetCertificate(hello *tls.ClientHelloInfo, fallbackHost
|
|||||||
cert, err := m.Manager.GetCertificate(hello)
|
cert, err := m.Manager.GetCertificate(hello)
|
||||||
if err != nil && errors.Is(err, errHostNotAllowed) {
|
if err != nil && errors.Is(err, errHostNotAllowed) {
|
||||||
if !fallbackUnknownSNI {
|
if !fallbackUnknownSNI {
|
||||||
|
metricUnknownServerName.Inc()
|
||||||
log.Debugx("requesting certificate", err, slog.String("host", hello.ServerName))
|
log.Debugx("requesting certificate", err, slog.String("host", hello.ServerName))
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
@ -203,12 +223,14 @@ func (m *Manager) loggingGetCertificate(hello *tls.ClientHelloInfo, fallbackHost
|
|||||||
hello.ServerName = fallbackHostname.ASCII
|
hello.ServerName = fallbackHostname.ASCII
|
||||||
cert, err = m.Manager.GetCertificate(hello)
|
cert, err = m.Manager.GetCertificate(hello)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
metricCertRequestErrors.Inc()
|
||||||
log.Errorx("requesting certificate for fallback hostname", err, slog.String("host", hello.ServerName))
|
log.Errorx("requesting certificate for fallback hostname", err, slog.String("host", hello.ServerName))
|
||||||
} else {
|
} else {
|
||||||
log.Debugx("requesting certificate for fallback hostname", err, slog.String("host", hello.ServerName))
|
log.Debug("using certificate for fallback hostname", slog.String("host", hello.ServerName))
|
||||||
}
|
}
|
||||||
return cert, err
|
return cert, err
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
|
metricCertRequestErrors.Inc()
|
||||||
log.Errorx("requesting certificate", err, slog.String("host", hello.ServerName))
|
log.Errorx("requesting certificate", err, slog.String("host", hello.ServerName))
|
||||||
}
|
}
|
||||||
return cert, err
|
return cert, err
|
||||||
|
@ -8,6 +8,11 @@ groups:
|
|||||||
annotations:
|
annotations:
|
||||||
summary: unhandled panic
|
summary: unhandled panic
|
||||||
|
|
||||||
|
- alert: mox-acme-request-cert-errors
|
||||||
|
expr: increase(mox_autotls_cert_request_errors_total[1h]) > 0
|
||||||
|
annotations:
|
||||||
|
summary: errors requesting tls certificates with acme
|
||||||
|
|
||||||
- alert: mox-ip-on-dns-blocklist
|
- alert: mox-ip-on-dns-blocklist
|
||||||
expr: mox_dnsbl_ips_success < 1
|
expr: mox_dnsbl_ips_success < 1
|
||||||
annotations:
|
annotations:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user