implement decoding charsets (other than ascii and utf-8) while reading textual message parts, and improve search

message.Part now has a ReaderUTF8OrBinary() along with the existing Reader().
the new function returns a reader of decoded content. we now use it in a few
places, including search. we only support the charsets in
golang.org/x/text/encoding/ianaindex.

search has also been changed to not read the entire message in memory. instead,
we make one 8k buffer for reading and search in that, and we keep the buffer
around for all messages. saves quite some allocations when searching large
mailboxes.
This commit is contained in:
Mechiel Lukkien
2023-07-28 22:15:23 +02:00
parent a31dfc573e
commit 01adad62b2
34 changed files with 157887 additions and 64 deletions

74
vendor/golang.org/x/text/encoding/ianaindex/ascii.go generated vendored Normal file
View File

@ -0,0 +1,74 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ianaindex
import (
"unicode"
"unicode/utf8"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/internal"
"golang.org/x/text/encoding/internal/identifier"
"golang.org/x/text/transform"
)
type asciiDecoder struct {
transform.NopResetter
}
func (d asciiDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for _, c := range src {
if c > unicode.MaxASCII {
r := unicode.ReplacementChar
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break
}
nDst += utf8.EncodeRune(dst[nDst:], r)
nSrc++
continue
}
if nDst >= len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst] = c
nDst++
nSrc++
}
return nDst, nSrc, err
}
type asciiEncoder struct {
transform.NopResetter
}
func (d asciiEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for _, c := range src {
if c > unicode.MaxASCII {
err = internal.RepertoireError(encoding.ASCIISub)
break
}
if nDst >= len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst] = c
nDst++
nSrc++
}
return nDst, nSrc, err
}
var asciiEnc = &internal.Encoding{
Encoding: &internal.SimpleEncoding{
asciiDecoder{},
asciiEncoder{},
},
Name: "US-ASCII",
MIB: identifier.ASCII,
}