mirror of
https://github.com/mjl-/mox.git
synced 2025-07-14 02:54:37 +03:00
mox!
This commit is contained in:
189
publicsuffix/list.go
Normal file
189
publicsuffix/list.go
Normal file
@ -0,0 +1,189 @@
|
||||
// Package publicsuffix implements a public suffix list to look up the
|
||||
// organizational domain for a given host name. Organizational domains can be
|
||||
// registered, one level below a top-level domain.
|
||||
//
|
||||
// Example.com has a public suffix ".com", and example.co.uk has a public
|
||||
// suffix ".co.uk". The organizational domain of sub.example.com is
|
||||
// example.com, and the organization domain of sub.example.co.uk is
|
||||
// example.co.uk.
|
||||
package publicsuffix
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
_ "embed"
|
||||
|
||||
"golang.org/x/net/idna"
|
||||
|
||||
"github.com/mjl-/mox/dns"
|
||||
"github.com/mjl-/mox/mlog"
|
||||
)
|
||||
|
||||
var xlog = mlog.New("publicsuffix")
|
||||
|
||||
// todo: automatically fetch new lists periodically? compare it with the old one. refuse it if it changed too much, especially if it contains far fewer entries than before.
|
||||
|
||||
// Labels map from utf8 labels to labels for subdomains.
|
||||
// The end is marked with an empty string as label.
|
||||
type labels map[string]labels
|
||||
|
||||
// List is a public suffix list.
|
||||
type List struct {
|
||||
includes, excludes labels
|
||||
}
|
||||
|
||||
var publicsuffixList List
|
||||
|
||||
//go:embed public_suffix_list.txt
|
||||
var publicsuffixData []byte
|
||||
|
||||
func init() {
|
||||
l, err := ParseList(bytes.NewReader(publicsuffixData))
|
||||
if err != nil {
|
||||
xlog.Fatalx("parsing public suffix list", err)
|
||||
}
|
||||
publicsuffixList = l
|
||||
}
|
||||
|
||||
// ParseList parses a public suffix list.
|
||||
// Only the "ICANN DOMAINS" are used.
|
||||
func ParseList(r io.Reader) (List, error) {
|
||||
list := List{labels{}, labels{}}
|
||||
br := bufio.NewReader(r)
|
||||
|
||||
// Only use ICANN domains. ../rfc/7489-eid6729
|
||||
var icannDomains bool
|
||||
for {
|
||||
line, err := br.ReadString('\n')
|
||||
if line != "" {
|
||||
line = strings.TrimSpace(line)
|
||||
if strings.HasPrefix(line, "// ===BEGIN ICANN DOMAINS===") {
|
||||
icannDomains = true
|
||||
continue
|
||||
} else if strings.HasPrefix(line, "// ===END ICANN DOMAINS===") {
|
||||
icannDomains = false
|
||||
continue
|
||||
} else if line == "" || strings.HasPrefix(line, "//") || !icannDomains {
|
||||
continue
|
||||
}
|
||||
l := list.includes
|
||||
var t []string
|
||||
oline := line
|
||||
if strings.HasPrefix(line, "!") {
|
||||
line = line[1:]
|
||||
l = list.excludes
|
||||
t = strings.Split(line, ".")
|
||||
if len(t) == 1 {
|
||||
xlog.Print("exclude rule with single label, skipping", mlog.Field("line", oline))
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
t = strings.Split(line, ".")
|
||||
}
|
||||
for i := len(t) - 1; i >= 0; i-- {
|
||||
w := t[i]
|
||||
if w == "" {
|
||||
xlog.Print("empty label in rule, skipping", mlog.Field("line", oline))
|
||||
break
|
||||
}
|
||||
if w != "" && w != "*" {
|
||||
w, err = idna.Lookup.ToUnicode(w)
|
||||
if err != nil {
|
||||
xlog.Printx("invalid label, skipping", err, mlog.Field("line", oline))
|
||||
}
|
||||
}
|
||||
m, ok := l[w]
|
||||
if ok {
|
||||
if _, dup := m[""]; i == 0 && dup {
|
||||
xlog.Print("duplicate rule", mlog.Field("line", oline))
|
||||
}
|
||||
l = m
|
||||
} else {
|
||||
m = labels{}
|
||||
l[w] = m
|
||||
l = m
|
||||
}
|
||||
}
|
||||
l[""] = nil // Mark end.
|
||||
}
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return List{}, fmt.Errorf("reading public suffix list: %w", err)
|
||||
}
|
||||
}
|
||||
return list, nil
|
||||
}
|
||||
|
||||
// Lookup calls Lookup on the builtin public suffix list, from
|
||||
// https://publicsuffix.org/list/.
|
||||
func Lookup(ctx context.Context, domain dns.Domain) (orgDomain dns.Domain) {
|
||||
return publicsuffixList.Lookup(ctx, domain)
|
||||
}
|
||||
|
||||
// Lookup returns the organizational domain. If domain is an organizational
|
||||
// domain, or higher-level, the same domain is returned.
|
||||
func (l List) Lookup(ctx context.Context, domain dns.Domain) (orgDomain dns.Domain) {
|
||||
log := xlog.WithContext(ctx)
|
||||
defer func() {
|
||||
log.Debug("publicsuffix lookup result", mlog.Field("reqdom", domain), mlog.Field("orgdom", orgDomain))
|
||||
}()
|
||||
|
||||
t := strings.Split(domain.Name(), ".")
|
||||
|
||||
var n int
|
||||
if nexcl, ok := match(l.excludes, t); ok {
|
||||
n = nexcl
|
||||
} else if nincl, ok := match(l.includes, t); ok {
|
||||
n = nincl + 1
|
||||
} else {
|
||||
n = 2
|
||||
}
|
||||
if len(t) < n {
|
||||
return domain
|
||||
}
|
||||
name := strings.Join(t[len(t)-n:], ".")
|
||||
if isASCII(name) {
|
||||
return dns.Domain{ASCII: name}
|
||||
}
|
||||
t = strings.Split(domain.ASCII, ".")
|
||||
ascii := strings.Join(t[len(t)-n:], ".")
|
||||
return dns.Domain{ASCII: ascii, Unicode: name}
|
||||
}
|
||||
|
||||
func isASCII(s string) bool {
|
||||
for _, c := range s {
|
||||
if c >= 0x80 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func match(l labels, t []string) (int, bool) {
|
||||
if len(t) == 0 {
|
||||
_, ok := l[""]
|
||||
return 0, ok
|
||||
}
|
||||
s := t[len(t)-1]
|
||||
t = t[:len(t)-1]
|
||||
n := 0
|
||||
if m, mok := l[s]; mok {
|
||||
if nn, sok := match(m, t); sok {
|
||||
n = 1 + nn
|
||||
}
|
||||
}
|
||||
if m, mok := l["*"]; mok {
|
||||
if nn, sok := match(m, t); sok && nn >= n {
|
||||
n = 1 + nn
|
||||
}
|
||||
}
|
||||
_, mok := l[""]
|
||||
return n, n > 0 || mok
|
||||
}
|
79
publicsuffix/list_test.go
Normal file
79
publicsuffix/list_test.go
Normal file
@ -0,0 +1,79 @@
|
||||
package publicsuffix
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/mjl-/mox/dns"
|
||||
)
|
||||
|
||||
func TestList(t *testing.T) {
|
||||
const data = `
|
||||
// ===BEGIN ICANN DOMAINS===
|
||||
com
|
||||
|
||||
*.jp
|
||||
// Hosts in .hokkaido.jp can't set cookies below level 4...
|
||||
*.hokkaido.jp
|
||||
*.tokyo.jp
|
||||
// ...except hosts in pref.hokkaido.jp, which can set cookies at level 3.
|
||||
!pref.hokkaido.jp
|
||||
!metro.tokyo.jp
|
||||
|
||||
bücher.example.com
|
||||
// ===END ICANN DOMAINS===
|
||||
|
||||
ignored.example.com
|
||||
`
|
||||
l, err := ParseList(strings.NewReader(data))
|
||||
if err != nil {
|
||||
t.Fatalf("parsing list: %s", err)
|
||||
}
|
||||
|
||||
test := func(domain, orgDomain string) {
|
||||
t.Helper()
|
||||
|
||||
d, err := dns.ParseDomain(domain)
|
||||
if err != nil {
|
||||
t.Fatalf("idna to unicode %q: %s", domain, err)
|
||||
}
|
||||
od, err := dns.ParseDomain(orgDomain)
|
||||
if err != nil {
|
||||
t.Fatalf("idna to unicode org domain %q: %s", orgDomain, err)
|
||||
}
|
||||
|
||||
r := l.Lookup(context.Background(), d)
|
||||
if r != od {
|
||||
t.Fatalf("got %q, expected %q, for domain %q", r, orgDomain, domain)
|
||||
}
|
||||
}
|
||||
|
||||
test("com", "com")
|
||||
test("foo.com", "foo.com")
|
||||
test("bar.foo.com", "foo.com")
|
||||
test("foo.bar.jp", "foo.bar.jp")
|
||||
test("baz.foo.bar.jp", "foo.bar.jp")
|
||||
test("bar.jp", "bar.jp")
|
||||
test("foo.bar.hokkaido.jp", "foo.bar.hokkaido.jp")
|
||||
test("baz.foo.bar.hokkaido.jp", "foo.bar.hokkaido.jp")
|
||||
test("bar.hokkaido.jp", "bar.hokkaido.jp")
|
||||
test("pref.hokkaido.jp", "pref.hokkaido.jp")
|
||||
test("foo.pref.hokkaido.jp", "pref.hokkaido.jp")
|
||||
test("WwW.EXAMPLE.Com", "example.com")
|
||||
test("bücher.example.com", "bücher.example.com")
|
||||
test("foo.bücher.example.com", "foo.bücher.example.com")
|
||||
test("bar.foo.bücher.example.com", "foo.bücher.example.com")
|
||||
test("xn--bcher-kva.example.com", "bücher.example.com")
|
||||
test("foo.xn--bcher-kva.example.com", "foo.bücher.example.com")
|
||||
test("bar.foo.xn--bcher-kva.example.com", "foo.bücher.example.com")
|
||||
test("x.ignored.example.com", "example.com")
|
||||
|
||||
l, err = ParseList(bytes.NewReader(publicsuffixData))
|
||||
if err != nil {
|
||||
t.Fatalf("parsing public suffix list: %s", err)
|
||||
}
|
||||
|
||||
// todo: add testcases from https://raw.githubusercontent.com/publicsuffix/list/master/tests/test_psl.txt
|
||||
}
|
13825
publicsuffix/public_suffix_list.txt
Normal file
13825
publicsuffix/public_suffix_list.txt
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user