update to latest bstore (with support for an index on a []string: Message.DKIMDomains), and cyclic data types (to be used for Message.Part soon); also adds a context.Context to database operations.

This commit is contained in:
Mechiel Lukkien
2023-05-22 14:40:36 +02:00
parent f6ed860ccb
commit e81930ba20
58 changed files with 1970 additions and 1035 deletions

View File

@ -1,3 +1,4 @@
/cover.out
/cover.html
/testdata/*.db
/testdata/tmp.*.db
/testdata/mail.db

View File

@ -2,7 +2,7 @@ build:
go build ./...
go vet ./...
GOARCH=386 go vet ./...
staticcheck ./...
staticcheck -checks 'all,-ST1012' ./...
./gendoc.sh
fmt:

View File

@ -1,22 +1,42 @@
bstore is a database library for storing and quering Go struct data.
Bstore is a database library for storing and quering Go values.
See https://pkg.go.dev/github.com/mjl-/bstore
Bstore is designed as a small, pure Go library that still provides most of
the common data consistency requirements for modest database use cases. Bstore
aims to make basic use of cgo-based libraries, such as sqlite, unnecessary.
See https://pkg.go.dev/github.com/mjl-/bstore for features, examples and full
documentation.
MIT-licensed
# Comparison
Bstore is designed as a small, pure Go library that still provides most of the
common data consistency requirements for modest database use cases. Bstore aims
to make basic use of cgo-based libraries, such as sqlite, unnecessary. Sqlite
is a great library, but Go applications that require cgo are hard to
# FAQ - Frequently Asked Questions
## Is bstore an ORM?
No. The API for bstore may look like an ORM. But instead of mapping bstore
"queries" (function calls) to an SQL query string, bstore executes them
directly without converting to a query language, storing the data itself.
## How does bstore store its data?
A bstore database is a single-file BoltDB database. BoltDB provides ACID
properties. Bstore uses a BoltDB "bucket" (key/value store) for each Go type
stored, with multiple subbuckets: one for type definitions, one for the actual
data, and one bucket per index. BoltDB stores data in a B+tree. See format.md
for details.
## How does bstore compare to sqlite?
Sqlite is a great library, but Go applications that require cgo are hard to
cross-compile. With bstore, cross-compiling to most Go-supported platforms
stays trivial. Although bstore is much more limited in so many aspects than
sqlite, bstore also offers some advantages as well.
stays trivial (though not plan9, unfortunately). Although bstore is much more
limited in so many aspects than sqlite, bstore also offers some advantages as
well. Some points of comparison:
- Cross-compilation and reproducibility: Trivial with bstore due to pure Go,
much harder with sqlite because of cgo.
- Code complexity: low with bstore (6k lines including comments/docs), high
- Code complexity: low with bstore (7k lines including comments/docs), high
with sqlite.
- Query language: mostly-type-checked function calls in bstore, free-form query
strings only checked at runtime with sqlite.
@ -33,19 +53,3 @@ sqlite, bstore also offers some advantages as well.
WAL or journal files).
- Test coverage: decent coverage but limited real-world for bstore, versus
extremely thoroughly tested and with enormous real-world use.
# FAQ
Q: Is bstore an ORM?
A: No. The API for bstore may look like an ORM. But instead of mapping bstore
"queries" (function calls) to an SQL query string, bstore executes them
directly without converting to a query language.
Q: How does bstore store its data?
A bstore database is a single-file BoltDB database. BoltDB provides ACID
properties. Bstore uses a BoltDB "bucket" (key/value store) for each Go type
stored, with multiple subbuckets: one for type definitions, one for the actual
data, and one bucket per index. BoltDB stores data in a B+tree. See format.md
for details.

View File

@ -24,7 +24,7 @@ func (f field) applyDefault(rv reflect.Value) error {
case kindBytes, kindBinaryMarshal, kindMap:
return nil
case kindSlice, kindStruct:
case kindSlice, kindStruct, kindArray:
return f.Type.applyDefault(rv)
case kindBool, kindInt, kindInt8, kindInt16, kindInt32, kindInt64, kindUint, kindUint8, kindUint16, kindUint32, kindUint64, kindFloat32, kindFloat64, kindString, kindTime:
@ -53,9 +53,9 @@ func (f field) applyDefault(rv reflect.Value) error {
}
// only for recursing. we do not support recursing into maps because it would
// involve more work making values settable. and how sensible it it anyway?
// involve more work making values settable. and how sensible is it anyway?
func (ft fieldType) applyDefault(rv reflect.Value) error {
if ft.Ptr && (rv.IsZero() || rv.IsNil()) {
if ft.Ptr && rv.IsZero() {
return nil
} else if ft.Ptr {
rv = rv.Elem()
@ -64,12 +64,19 @@ func (ft fieldType) applyDefault(rv reflect.Value) error {
case kindSlice:
n := rv.Len()
for i := 0; i < n; i++ {
if err := ft.List.applyDefault(rv.Index(i)); err != nil {
if err := ft.ListElem.applyDefault(rv.Index(i)); err != nil {
return err
}
}
case kindArray:
n := ft.ArrayLength
for i := 0; i < n; i++ {
if err := ft.ListElem.applyDefault(rv.Index(i)); err != nil {
return err
}
}
case kindStruct:
for _, nf := range ft.Fields {
for _, nf := range ft.structFields {
nfv := rv.FieldByIndex(nf.structField.Index)
if err := nf.applyDefault(nfv); err != nil {
return err

172
vendor/github.com/mjl-/bstore/doc.go generated vendored
View File

@ -1,5 +1,5 @@
/*
Package bstore is a database library for storing and querying Go struct data.
Package bstore is a database library for storing and querying Go values.
Bstore is designed as a small, pure Go library that still provides most of
the common data consistency requirements for modest database use cases. Bstore
@ -9,7 +9,7 @@ Bstore implements autoincrementing primary keys, indices, default values,
enforcement of nonzero, unique and referential integrity constraints, automatic
schema updates and a query API for combining filters/sorting/limits. Queries
are planned and executed using indices for fast execution where possible.
Bstores is designed with the Go type system in mind: you typically don't have to
Bstore is designed with the Go type system in mind: you typically don't have to
write any (un)marshal code for your types.
# Field types
@ -21,7 +21,7 @@ types, but not pointers to pointers:
- uint (as uint32), uint8, uint16, uint32, uint64
- bool, float32, float64, string, []byte
- Maps, with keys and values of any supported type, except keys with pointer types.
- Slices, with elements of any supported type.
- Slices and arrays, with elements of any supported type.
- time.Time
- Types that implement binary.MarshalBinary and binary.UnmarshalBinary, useful
for struct types with state in private fields. Do not change the
@ -32,24 +32,27 @@ Note: int and uint are stored as int32 and uint32, for compatibility of database
files between 32bit and 64bit systems. Where possible, use explicit (u)int32 or
(u)int64 types.
Embedded structs are handled by storing the individual fields of the embedded
struct. The named embedded type is not part of the type schema, and can
currently only be used with UpdateField and UpdateFields, not for filtering.
Cyclic types are supported, but cyclic data is not. Attempting to store cyclic
data will likely result in a stack overflow panic.
Anonymous struct fields are handled by taking in each of the anonymous struct's
fields as a type's own fields. The named embedded type is not part of the type
schema, and with a Query it can currently only be used with UpdateField and
UpdateFields, not for filtering.
Bstore embraces the use of Go zero values. Use zero values, possibly pointers,
where you would use NULL values in SQL.
Types that have not yet been implemented: interface values, (fixed length) arrays,
complex numbers.
# Struct tags
The typical Go struct can be stored in the database. The first field of a
struct type is its primary key, and must always be unique. Additional behaviour
can be configured through struct tag "bstore". The values are comma-separated.
Typically one word, but some have multiple space-separated words:
struct type is its primary key, must always be unique, and in case of an
integer type the insertion of a zero value automatically changes it to the next
sequence number by default. Additional behaviour can be configured through
struct tag "bstore". The values are comma-separated. Typically one word, but
some have multiple space-separated words:
- "-" ignores the field entirely.
- "-" ignores the field entirely, not stored.
- "name <fieldname>", use "fieldname" instead of the Go type field name.
- "nonzero", enforces that field values are not the zero value.
- "noauto", only valid for integer types, and only for the primary key. By
@ -57,16 +60,19 @@ Typically one word, but some have multiple space-separated words:
assigned on insert when it is 0. With noauto inserting a 0 value results in an
error. For primary keys of other types inserting the zero value always results
in an error.
- "index" or "index <field1+field2+...> [<name>]", adds an index. In the first
form, the index is on the field on which the tag is specified, and the index
name is the same as the field name. In the second form multiple fields can be
specified, and an optional name. The first field must be the field on which
the tag is specified. The field names are +-separated. The default name for
the second form is the same +-separated string but can be set explicitly to
the second parameter. An index can only be set for basic integer types, bools,
time and strings. Indices are automatically (re)created when registering a
type.
- "unique" or "unique <field1+field2+...> [<name>]", adds an index as with
- "index" or "index <field1>+<field2>+<...> [<name>]", adds an index. In the
first form, the index is on the field on which the tag is specified, and the
index name is the same as the field name. In the second form multiple fields can
be specified, and an optional name. The first field must be the field on which
the tag is specified. The field names are +-separated. The default name for the
second form is the same +-separated string but can be set explicitly with the
second parameter. An index can only be set for basic integer types, bools, time
and strings. A field of slice type can also have an index (but not a unique
index, and only one slice field per index), allowing fast lookup of any single
value in the slice with Query.FilterIn. Indices are automatically (re)created
when registering a type. Fields with a pointer type cannot have an index.
String values used in an index cannot contain a \0.
- "unique" or "unique <field1>+<field2>+<...> [<name>]", adds an index as with
"index" and also enforces a unique constraint. For time.Time the timezone is
ignored for the uniqueness check.
- "ref <type>", enforces that the value exists as primary key for "type".
@ -80,8 +86,8 @@ Typically one word, but some have multiple space-separated words:
Times are parsed as time.RFC3339 otherwise. Supported types: bool
("true"/"false"), integers, floats, strings. Value is not quoted and no escaping
of special characters, like the comma that separates struct tag words, is
possible. Defaults are also replaced on fields in nested structs and
slices, but not in maps.
possible. Defaults are also replaced on fields in nested structs, slices
and arrays, but not in maps.
- "typename <name>", override name of the type. The name of the Go type is
used by default. Can only be present on the first field (primary key).
Useful for doing schema updates.
@ -89,18 +95,14 @@ Typically one word, but some have multiple space-separated words:
# Schema updates
Before using a Go type, you must register it for use with the open database by
passing a (zero) value of that type to the Open or Register functions. For each
type, a type definition is stored in the database. If a type has an updated
definition since the previous database open, a new type definition is added to
the database automatically and any required modifications are made: Indexes
(re)created, fields added/removed, new nonzero/unique/reference constraints
validated.
passing a (possibly zero) value of that type to the Open or Register functions.
For each type, a type definition is stored in the database. If a type has an
updated definition since the previous database open, a new type definition is
added to the database automatically and any required modifications are made and
checked: Indexes (re)created, fields added/removed, new
nonzero/unique/reference constraints validated.
If data/types cannot be updated automatically (e.g. converting an int field into
a string field), custom data migration code is needed. You may have to keep
track of a data/schema version.
As a special case, you can switch field types between pointer and non-pointer
As a special case, you can change field types between pointer and non-pointer
types. With one exception: changing from pointer to non-pointer where the type
has a field that must be nonzero is not allowed. The on-disk encoding will not be
changed, and nil pointers will turn into zero values, and zero values into nil
@ -110,33 +112,95 @@ Because named embed structs are not part of the type definition, you can
wrap/unwrap fields into a embed/anonymous struct field. No new type definition
is created.
# BoltDB
Some schema conversions are not allowed. In some cases due to architectural
limitations. In some cases because the constraint checks haven't been
implemented yet, or the parsing code does not yet know how to parse the old
on-disk values into the updated Go types. If you need a conversion that is not
supported, you will need to write a manual conversion, and you would have to
keep track whether the update has been executed.
BoltDB is used as underlying storage. Bolt provides ACID transactions, storing
its data in a B+tree. Only a single write transaction can be active at a time,
but otherwise multiple read-only transactions can be active. Do not start a
blocking read-only transaction while holding a writable transaction or vice
versa, this will cause deadlock.
Changes that are allowed:
Bolt uses Go types that are memory mapped to the database file. This means bolt
database files cannot be transferred between machines with different endianness.
Bolt uses explicit widths for its types, so files can be transferred between
32bit and 64bit machines of same endianness.
- From smaller to larger integer types (same signedness).
- Removal of "noauto" on primary keys (always integer types). This updates the
"next sequence" counter automatically to continue after the current maximum
value.
- Adding/removing/modifying an index, including a unique index. When a unique
index is added, the current records are verified to be unique.
- Adding/removing a reference. When a reference is added, the current records
are verified to be valid references.
- Add/remove a nonzero constraint. Existing records are verified.
Conversions that are not currently allowed, but may be in the future:
- Signedness of integer types. With a one-time check that old values fit in the new
type, this could be allowed in the future.
- Conversions between basic types: strings, []byte, integers, floats, boolean.
Checks would have to be added for some of these conversions. For example,
from string to integer: the on-disk string values would have to be valid
integers.
- Types of primary keys cannot be changed, also not from one integer type to a
wider integer type of same signedness.
# BoltDB and storage
BoltDB is used as underlying storage. BoltDB stores key/values in a single
file, in multiple/nested buckets (namespaces) in a B+tree and provides ACID
transactions. Either a single write transaction or multiple read-only
transactions can be active at a time. Do not start a blocking read-only
transaction while holding a writable transaction or vice versa, this will cause
deadlock.
BoltDB returns Go values that are memory mapped to the database file. This
means BoltDB/bstore database files cannot be transferred between machines with
different endianness. BoltDB uses explicit widths for its types, so files can
be transferred between 32bit and 64bit machines of same endianness. While
BoltDB returns read-only memory mapped Go values, bstore only ever returns
parsed/copied regular writable Go values that require no special programmer
attention.
For each Go type opened for a database file, bstore ensures a BoltDB bucket
exists with two subbuckets:
- "types", with type descriptions of the stored records. Each time the database
file is opened with a modified Go type (add/removed/modified
field/type/bstore struct tag), a new type description is automatically added,
identified by sequence number.
- "records", containing all data, with the type's primary key as BoltDB key,
and the encoded remaining fields as value. The encoding starts with a
reference to a type description.
For each index, another subbucket is created, its name starting with "index.".
The stored keys consist of the index fields followed by the primary key, and an
empty value.
# Limitations
Bstore has limitations, not all of which are architectural so may be fixed in
the future.
Bstore does not implement the equivalent of SQL joins, aggregates, and many
other concepts.
Filtering/comparing/sorting on pointer fields is not currently allowed. Pointer
fields cannot have a (unique) index due to the current index format. Using zero
values is recommended instead for now.
Filtering/comparing/sorting on pointer fields is not allowed. Pointer fields
cannot have a (unique) index. Use non-pointer values with the zero value as the
equivalent of a nil pointer.
Integer field types can be expanded to wider types, but not to a different
signedness or a smaller integer (fewer bits). The primary key of a type cannot
currently be changed.
The first field of a stored struct is always the primary key. Autoincrement is
only available for the primary key.
The first field of a struct is always the primary key. Types requires an
explicit primary key. Autoincrement is only available for the primary key.
BoltDB opens the database file with a lock. Only one process can have the
database open at a time.
An index stored on disk in BoltDB can consume more disk space than other
database systems would: For each record, the indexed field(s) and primary key
are stored in full. Because bstore uses BoltDB as key/value store, and doesn't
manage disk pages itself, it cannot as efficiently pack an index page with many
records.
Interface values cannot be stored. This would require storing the type along
with the value. Instead, use a type that is a BinaryMarshaler.
Values of builtin type "complex" cannot be stored.
*/
package bstore

View File

@ -63,7 +63,15 @@ func (ft fieldType) equal(ov, v reflect.Value) (r bool) {
return false
}
for i := 0; i < n; i++ {
if !ft.List.equal(ov.Index(i), v.Index(i)) {
if !ft.ListElem.equal(ov.Index(i), v.Index(i)) {
return false
}
}
return true
case kindArray:
n := ft.ArrayLength
for i := 0; i < n; i++ {
if !ft.ListElem.equal(ov.Index(i), v.Index(i)) {
return false
}
}
@ -78,7 +86,7 @@ func (ft fieldType) equal(ov, v reflect.Value) (r bool) {
}
return bytes.Equal(obuf, buf)
case kindStruct:
for _, f := range ft.Fields {
for _, f := range ft.structFields {
fov := ov.FieldByIndex(f.structField.Index)
fv := v.FieldByIndex(f.structField.Index)
if !f.Type.equal(fov, fv) {

View File

@ -10,6 +10,8 @@ import (
bolt "go.etcd.io/bbolt"
)
// todo optimize: do not fetch full record if we can apply the filters with just the values we glean from the index key.
// exec represents the execution of a query plan.
type exec[T any] struct {
q *Query[T]
@ -94,6 +96,13 @@ func (e *exec[T]) nextKey(write, value bool) ([]byte, T, error) {
q := e.q
if q.err == nil {
select {
case <-q.ctxDone:
q.error(q.ctx.Err())
default:
}
}
if q.err != nil {
return nil, zero, q.err
}
@ -424,6 +433,25 @@ func (e *exec[T]) checkFilter(p *pair[T]) (rok bool, rerr error) {
return
}
}
case filterInSlice[T]:
v, err := p.Value(e)
if err != nil {
q.error(err)
return false, err
}
rv := reflect.ValueOf(v)
frv := rv.FieldByIndex(f.field.structField.Index)
n := frv.Len()
var have bool
for i := 0; i < n; i++ {
if f.field.Type.ListElem.equal(frv.Index(i), f.rvalue) {
have = true
break
}
}
if !have {
return
}
case filterCompare[T]:
v, err := p.Value(e)
if err != nil {
@ -531,10 +559,10 @@ func compare(k kind, a, b reflect.Value) int {
}
func (e *exec[T]) sort() {
// todo: We should check whether we actually need to load values. We're just
// always it now for the time being because SortStableFunc isn't going to
// give us a *pair (even though it could because of the slice) so we
// couldn't set/cache the value T during sorting.
// todo: We should check whether we actually need to load values. We're
// always loading it for the time being because SortStableFunc isn't
// going to give us a *pair (even though it could because of the slice)
// so we couldn't set/cache the value T during sorting.
q := e.q
for i := range e.data {

View File

@ -13,15 +13,17 @@ import (
// Types returns the types present in the database, regardless of whether they
// are currently registered using Open or Register. Useful for exporting data
// with Keys and Records.
func (db *DB) Types() ([]string, error) {
var types []string
err := db.Read(func(tx *Tx) error {
return tx.btx.ForEach(func(bname []byte, b *bolt.Bucket) error {
// note: we do not track stats for types operations.
func (tx *Tx) Types() ([]string, error) {
if err := tx.ctx.Err(); err != nil {
return nil, err
}
types = append(types, string(bname))
return nil
})
var types []string
err := tx.btx.ForEach(func(bname []byte, b *bolt.Bucket) error {
// note: we do not track stats for types operations.
types = append(types, string(bname))
return nil
})
if err != nil {
return nil, err
@ -31,9 +33,12 @@ func (db *DB) Types() ([]string, error) {
// prepareType prepares typeName for export/introspection with DB.Keys,
// DB.Record, DB.Records. It is different in that it does not require a
// reflect.Type to parse into. It parses to a map, e.g. for export to JSON. The
// returned typeVersion has no structFields set in its fields.
// reflect.Type to parse into. It parses to a map, e.g. for export to JSON.
func (db *DB) prepareType(tx *Tx, typeName string) (map[uint32]*typeVersion, *typeVersion, *bolt.Bucket, []string, error) {
if err := tx.ctx.Err(); err != nil {
return nil, nil, nil, nil, err
}
rb, err := tx.recordsBucket(typeName, 0.5)
if err != nil {
return nil, nil, nil, nil, err
@ -51,6 +56,7 @@ func (db *DB) prepareType(tx *Tx, typeName string) (map[uint32]*typeVersion, *ty
if err != nil {
return err
}
versions[ntv.Version] = ntv
if tv == nil || ntv.Version > tv.Version {
tv = ntv
@ -74,23 +80,28 @@ func (db *DB) prepareType(tx *Tx, typeName string) (map[uint32]*typeVersion, *ty
// Keys returns the parsed primary keys for the type "typeName". The type does
// not have to be registered with Open or Register. For use with Record(s) to
// export data.
func (db *DB) Keys(typeName string, fn func(pk any) error) error {
return db.Read(func(tx *Tx) error {
_, tv, rb, _, err := db.prepareType(tx, typeName)
if err != nil {
return err
func (tx *Tx) Keys(typeName string, fn func(pk any) error) error {
_, tv, rb, _, err := tx.db.prepareType(tx, typeName)
if err != nil {
return err
}
ctxDone := tx.ctx.Done()
v := reflect.New(reflect.TypeOf(tv.Fields[0].Type.zeroKey())).Elem()
return rb.ForEach(func(bk, bv []byte) error {
tx.stats.Records.Cursor++
select {
case <-ctxDone:
return tx.ctx.Err()
default:
}
// todo: do not pass nil parser?
v := reflect.New(reflect.TypeOf(tv.Fields[0].Type.zero(nil))).Elem()
return rb.ForEach(func(bk, bv []byte) error {
tx.stats.Records.Cursor++
if err := parsePK(v, bk); err != nil {
return err
}
return fn(v.Interface())
})
if err := parsePK(v, bk); err != nil {
return err
}
return fn(v.Interface())
})
}
@ -98,108 +109,109 @@ func (db *DB) Keys(typeName string, fn func(pk any) error) error {
// "Fields" is set to the fields of the type. The type does not have to be
// registered with Open or Register. Record parses the data without the Go
// type present. BinaryMarshal fields are returned as bytes.
func (db *DB) Record(typeName, key string, fields *[]string) (map[string]any, error) {
var r map[string]any
err := db.Read(func(tx *Tx) error {
versions, tv, rb, xfields, err := db.prepareType(tx, typeName)
if err != nil {
return err
}
*fields = xfields
func (tx *Tx) Record(typeName, key string, fields *[]string) (map[string]any, error) {
versions, tv, rb, xfields, err := tx.db.prepareType(tx, typeName)
if err != nil {
return nil, err
}
*fields = xfields
var kv any
switch tv.Fields[0].Type.Kind {
case kindBool:
switch key {
case "true":
kv = true
case "false":
kv = false
default:
err = fmt.Errorf("%w: invalid bool %q", ErrParam, key)
}
case kindInt8:
kv, err = strconv.ParseInt(key, 10, 8)
case kindInt16:
kv, err = strconv.ParseInt(key, 10, 16)
case kindInt32:
kv, err = strconv.ParseInt(key, 10, 32)
case kindInt:
kv, err = strconv.ParseInt(key, 10, 32)
case kindInt64:
kv, err = strconv.ParseInt(key, 10, 64)
case kindUint8:
kv, err = strconv.ParseUint(key, 10, 8)
case kindUint16:
kv, err = strconv.ParseUint(key, 10, 16)
case kindUint32:
kv, err = strconv.ParseUint(key, 10, 32)
case kindUint:
kv, err = strconv.ParseUint(key, 10, 32)
case kindUint64:
kv, err = strconv.ParseUint(key, 10, 64)
case kindString:
kv = key
case kindBytes:
kv = []byte(key) // todo: or decode from base64?
var kv any
switch tv.Fields[0].Type.Kind {
case kindBool:
switch key {
case "true":
kv = true
case "false":
kv = false
default:
return fmt.Errorf("internal error: unknown primary key kind %v", tv.Fields[0].Type.Kind)
}
if err != nil {
return err
}
pkv := reflect.ValueOf(kv)
kind, err := typeKind(pkv.Type())
if err != nil {
return err
}
if kind != tv.Fields[0].Type.Kind {
// Convert from various int types above to required type. The ParseInt/ParseUint
// calls already validated that the values fit.
pkt := reflect.TypeOf(tv.Fields[0].Type.zero(nil))
pkv = pkv.Convert(pkt)
}
k, err := packPK(pkv)
if err != nil {
return err
err = fmt.Errorf("%w: invalid bool %q", ErrParam, key)
}
case kindInt8:
kv, err = strconv.ParseInt(key, 10, 8)
case kindInt16:
kv, err = strconv.ParseInt(key, 10, 16)
case kindInt32:
kv, err = strconv.ParseInt(key, 10, 32)
case kindInt:
kv, err = strconv.ParseInt(key, 10, 32)
case kindInt64:
kv, err = strconv.ParseInt(key, 10, 64)
case kindUint8:
kv, err = strconv.ParseUint(key, 10, 8)
case kindUint16:
kv, err = strconv.ParseUint(key, 10, 16)
case kindUint32:
kv, err = strconv.ParseUint(key, 10, 32)
case kindUint:
kv, err = strconv.ParseUint(key, 10, 32)
case kindUint64:
kv, err = strconv.ParseUint(key, 10, 64)
case kindString:
kv = key
case kindBytes:
kv = []byte(key) // todo: or decode from base64?
default:
return nil, fmt.Errorf("internal error: unknown primary key kind %v", tv.Fields[0].Type.Kind)
}
if err != nil {
return nil, err
}
pkv := reflect.ValueOf(kv)
kind, err := typeKind(pkv.Type())
if err != nil {
return nil, err
}
if kind != tv.Fields[0].Type.Kind {
// Convert from various int types above to required type. The ParseInt/ParseUint
// calls already validated that the values fit.
pkt := reflect.TypeOf(tv.Fields[0].Type.zeroKey())
pkv = pkv.Convert(pkt)
}
k, err := packPK(pkv)
if err != nil {
return nil, err
}
tx.stats.Records.Get++
bv := rb.Get(k)
if bv == nil {
return ErrAbsent
}
record, err := parseMap(versions, k, bv)
if err != nil {
return err
}
r = record
return nil
})
return r, err
tx.stats.Records.Get++
bv := rb.Get(k)
if bv == nil {
return nil, ErrAbsent
}
record, err := parseMap(versions, k, bv)
if err != nil {
return nil, err
}
return record, nil
}
// Records calls "fn" for each record of "typeName". Records sets "fields" to
// the fields of the type. The type does not have to be registered with Open or
// Register. Record parses the data without the Go type present. BinaryMarshal
// fields are returned as bytes.
func (db *DB) Records(typeName string, fields *[]string, fn func(map[string]any) error) error {
return db.Read(func(tx *Tx) error {
versions, _, rb, xfields, err := db.prepareType(tx, typeName)
func (tx *Tx) Records(typeName string, fields *[]string, fn func(map[string]any) error) error {
versions, _, rb, xfields, err := tx.db.prepareType(tx, typeName)
if err != nil {
return err
}
*fields = xfields
ctxDone := tx.ctx.Done()
return rb.ForEach(func(bk, bv []byte) error {
tx.stats.Records.Cursor++
select {
case <-ctxDone:
return tx.ctx.Err()
default:
}
record, err := parseMap(versions, bk, bv)
if err != nil {
return err
}
*fields = xfields
return rb.ForEach(func(bk, bv []byte) error {
tx.stats.Records.Cursor++
record, err := parseMap(versions, bk, bv)
if err != nil {
return err
}
return fn(record)
})
return fn(record)
})
}
@ -228,7 +240,7 @@ func parseMap(versions map[uint32]*typeVersion, bk, bv []byte) (record map[strin
r := map[string]any{}
v := reflect.New(reflect.TypeOf(tv.Fields[0].Type.zero(p))).Elem()
v := reflect.New(reflect.TypeOf(tv.Fields[0].Type.zeroKey())).Elem()
err := parsePK(v, bk)
if err != nil {
return nil, err
@ -243,12 +255,12 @@ func parseMap(versions map[uint32]*typeVersion, bk, bv []byte) (record map[strin
if fm.Nonzero(i) {
r[f.Name] = f.Type.parseValue(p)
} else {
r[f.Name] = f.Type.zero(p)
r[f.Name] = f.Type.zeroExportValue()
}
}
if len(p.buf) != 0 {
return nil, fmt.Errorf("%w: leftover data after parsing", ErrStore)
return nil, fmt.Errorf("%w: leftover data after parsing (%d %x %q)", ErrStore, len(p.buf), p.buf, p.buf)
}
return r, nil
@ -315,14 +327,21 @@ func (ft fieldType) parseValue(p *parser) any {
var l []any
for i := 0; i < n; i++ {
if fm.Nonzero(i) {
l = append(l, ft.List.parseValue(p))
l = append(l, ft.ListElem.parseValue(p))
} else {
// Always add non-zero elements, or we would
// change the number of elements in a list.
l = append(l, ft.List.zero(p))
l = append(l, ft.ListElem.zeroExportValue())
}
}
return l
case kindArray:
n := ft.ArrayLength
var l []any
for i := 0; i < n; i++ {
l = append(l, ft.ListElem.parseValue(p))
}
return l
case kindMap:
un := p.Uvarint()
n := p.checkInt(un)
@ -338,19 +357,19 @@ func (ft fieldType) parseValue(p *parser) any {
if fm.Nonzero(i) {
v = ft.MapValue.parseValue(p)
} else {
v = ft.MapValue.zero(p)
v = ft.MapValue.zeroExportValue()
}
m[k] = v
}
return m
case kindStruct:
fm := p.Fieldmap(len(ft.Fields))
fm := p.Fieldmap(len(ft.structFields))
m := map[string]any{}
for i, f := range ft.Fields {
for i, f := range ft.structFields {
if fm.Nonzero(i) {
m[f.Name] = f.Type.parseValue(p)
} else {
m[f.Name] = f.Type.zero(p)
m[f.Name] = f.Type.zeroExportValue()
}
}
return m
@ -359,7 +378,7 @@ func (ft fieldType) parseValue(p *parser) any {
panic("cannot happen")
}
var zerovalues = map[kind]any{
var zeroExportValues = map[kind]any{
kindBytes: []byte(nil),
kindBinaryMarshal: []byte(nil), // We don't have the actual type available, so we just return binary data.
kindBool: false,
@ -380,12 +399,53 @@ var zerovalues = map[kind]any{
kindSlice: []any(nil),
kindMap: map[string]any(nil),
kindStruct: map[string]any(nil),
// kindArray handled in zeroExportValue()
}
func (ft fieldType) zero(p *parser) any {
v, ok := zerovalues[ft.Kind]
// zeroExportValue returns the zero value for a fieldType for use with exporting.
func (ft fieldType) zeroExportValue() any {
if ft.Kind == kindArray {
ev := ft.ListElem.zeroExportValue()
l := make([]any, ft.ArrayLength)
for i := 0; i < ft.ArrayLength; i++ {
l[i] = ev
}
return l
}
v, ok := zeroExportValues[ft.Kind]
if !ok {
p.Errorf("internal error: unhandled zero value for field type %v", ft.Kind)
panic(fmt.Errorf("internal error: unhandled zero value for field type %v", ft.Kind))
}
return v
}
var zeroKeys = map[kind]any{
kindBytes: []byte(nil),
kindBool: false,
kindInt8: int8(0),
kindInt16: int16(0),
kindInt32: int32(0),
kindInt: int(0),
kindInt64: int64(0),
kindUint8: uint8(0),
kindUint16: uint16(0),
kindUint32: uint32(0),
kindUint: uint(0),
kindUint64: uint64(0),
kindString: "",
kindTime: zerotime,
// kindSlice handled in zeroKeyValue()
}
// zeroKeyValue returns the zero value for a fieldType for use with exporting.
func (ft fieldType) zeroKey() any {
k := ft.Kind
if k == kindSlice {
k = ft.ListElem.Kind
}
v, ok := zeroKeys[k]
if !ok {
panic(fmt.Errorf("internal error: unhandled zero value for field type %v", ft.Kind))
}
return v
}

View File

@ -17,8 +17,8 @@ version is added to the "types" subbucket. Data is always inserted/updated with
the most recent type version. But the database may still hold data records
referencing older type versions. Bstore decodes a packed data record with the
referenced type version. For storage efficiency: the type version is reused for
many stored records, a self-describing format (like JSON) would duplicate the
field names in each stored record.
many stored records, a self-describing format (like JSON) for each stored
record would duplicate the field names in each stored record.
# Record storage
@ -51,6 +51,8 @@ more space than the single bit and are stored consecutively after the fieldmap:
the zero value marked in the fieldmap.
- Slices use a uvarint for the number of elements, followed by a bitmap for
nonzero values, followed by the encoded nonzero elements.
- Arrays (fixed length) start with a bitmap for nonzero values, followed by
the encoded nonzero elements.
- Maps use a uvariant for the number of key/value pairs, followed by a
fieldmap for the values (the keys are always present), followed by each
pair: key (always present), value (only if nonzero); key, value; etc.
@ -71,7 +73,7 @@ unsigned integer, or between string and []byte.
Indexes are stored in subbuckets, named starting with "index." followed by the
index name. Keys are a self-delimiting encodings of the fields that make up the
key, followed by the primary key for the "records" bucket. Values are always
empty in index buckets. For bool and integer types, the same fixed with
empty in index buckets. For bool and integer types, the same fixed width
encoding as for primary keys in the "records" subbucket is used. Strings are
encoded by their bytes (no \0 allowed) followed by a delimiting \0. Unlike
primary keys, an index can cover a field with type time.Time. Times are encoded

176
vendor/github.com/mjl-/bstore/keys.go generated vendored
View File

@ -10,7 +10,7 @@ import (
/*
The records buckets map a primary key to the record data. The primary key is of
a form that we can scan/range over. So fixed with for integers. For strings and
a form that we can scan/range over. So fixed width for integers. For strings and
bytes they are just their byte representation. We do not store the PK in the
record data. This means we cannot store a time.Time as primary key, because we
cannot have the timezone encoded for comparison reasons.
@ -150,7 +150,12 @@ fields:
if err != nil {
break
}
switch f.Type.Kind {
ft := f.Type
if ft.Kind == kindSlice {
// For an index on a slice, we store each value in the slice in a separate index key.
ft = *ft.ListElem
}
switch ft.Kind {
case kindString:
for i, b := range buf {
if b == 0 {
@ -174,6 +179,8 @@ fields:
take(8)
case kindTime:
take(8 + 4)
default:
err = fmt.Errorf("%w: unhandled kind %v for index key", ErrStore, ft.Kind)
}
}
if err != nil {
@ -203,9 +210,14 @@ fields:
return pk, nil, nil
}
// packKey returns a key to store in an index: first the prefix without pk, then
// the prefix including pk.
func (idx *index) packKey(rv reflect.Value, pk []byte) ([]byte, []byte, error) {
type indexkey struct {
pre []byte // Packed fields excluding PK, a slice of full.
full []byte // Packed fields including PK.
}
// packKey returns keys to store in an index: first the key prefixes without pk, then
// the prefixes including pk.
func (idx *index) packKey(rv reflect.Value, pk []byte) ([]indexkey, error) {
var l []reflect.Value
for _, f := range idx.Fields {
frv := rv.FieldByIndex(f.structField.Index)
@ -215,68 +227,108 @@ func (idx *index) packKey(rv reflect.Value, pk []byte) ([]byte, []byte, error) {
}
// packIndexKeys packs values from l, followed by the pk.
// It returns the key prefix (without pk), and full key with pk.
func packIndexKeys(l []reflect.Value, pk []byte) ([]byte, []byte, error) {
var prek, ik []byte
// It returns the key prefixes (without pk), and full keys with pk.
func packIndexKeys(l []reflect.Value, pk []byte) ([]indexkey, error) {
ikl := []indexkey{{}}
for _, frv := range l {
k, err := typeKind(frv.Type())
bufs, err := packIndexKey(frv)
if err != nil {
return nil, nil, err
return nil, err
}
var buf []byte
switch k {
case kindBool:
buf = []byte{0}
if frv.Bool() {
buf[0] = 1
if len(bufs) == 1 {
for i := range ikl {
ikl[i].full = append(ikl[i].full, bufs[0]...)
}
case kindInt8:
buf = []byte{byte(int8(frv.Int()) + math.MinInt8)}
case kindInt16:
buf = binary.BigEndian.AppendUint16(nil, uint16(int16(frv.Int())+math.MinInt16))
case kindInt32:
buf = binary.BigEndian.AppendUint32(nil, uint32(int32(frv.Int())+math.MinInt32))
case kindInt:
i := frv.Int()
if i < math.MinInt32 || i > math.MaxInt32 {
return nil, nil, fmt.Errorf("%w: int value %d does not fit in int32", ErrParam, i)
} else if len(ikl) == 1 && len(bufs) > 1 {
nikl := make([]indexkey, len(bufs))
for i, buf := range bufs {
nikl[i] = indexkey{full: append(append([]byte{}, ikl[0].full...), buf...)}
}
buf = binary.BigEndian.AppendUint32(nil, uint32(int32(i)+math.MinInt32))
case kindInt64:
buf = binary.BigEndian.AppendUint64(nil, uint64(frv.Int()+math.MinInt64))
case kindUint8:
buf = []byte{byte(frv.Uint())}
case kindUint16:
buf = binary.BigEndian.AppendUint16(nil, uint16(frv.Uint()))
case kindUint32:
buf = binary.BigEndian.AppendUint32(nil, uint32(frv.Uint()))
case kindUint:
i := frv.Uint()
if i > math.MaxUint32 {
return nil, nil, fmt.Errorf("%w: uint value %d does not fit in uint32", ErrParam, i)
}
buf = binary.BigEndian.AppendUint32(nil, uint32(i))
case kindUint64:
buf = binary.BigEndian.AppendUint64(nil, uint64(frv.Uint()))
case kindString:
buf = []byte(frv.String())
for _, c := range buf {
if c == 0 {
return nil, nil, fmt.Errorf("%w: string used as index key cannot have \\0", ErrParam)
}
}
buf = append(buf, 0)
case kindTime:
tm := frv.Interface().(time.Time)
buf = binary.BigEndian.AppendUint64(nil, uint64(tm.Unix()+math.MinInt64))
buf = binary.BigEndian.AppendUint32(buf, uint32(tm.Nanosecond()))
default:
return nil, nil, fmt.Errorf("internal error: bad type %v for index", frv.Type()) // todo: should be caught when making index type
ikl = nikl
} else if len(bufs) == 0 {
return nil, nil
} else {
return nil, fmt.Errorf("%w: multiple index fields that result in multiple values, or no data for index key, %d keys so far, %d new buffers", ErrStore, len(ikl), len(bufs))
}
ik = append(ik, buf...)
}
n := len(ik)
ik = append(ik, pk...)
prek = ik[:n]
return prek, ik, nil
for i := range ikl {
n := len(ikl[i].full)
ikl[i].full = append(ikl[i].full, pk...)
ikl[i].pre = ikl[i].full[:n]
}
return ikl, nil
}
func packIndexKey(frv reflect.Value) ([][]byte, error) {
k, err := typeKind(frv.Type())
if err != nil {
return nil, err
}
var buf []byte
switch k {
case kindBool:
buf = []byte{0}
if frv.Bool() {
buf[0] = 1
}
case kindInt8:
buf = []byte{byte(int8(frv.Int()) + math.MinInt8)}
case kindInt16:
buf = binary.BigEndian.AppendUint16(nil, uint16(int16(frv.Int())+math.MinInt16))
case kindInt32:
buf = binary.BigEndian.AppendUint32(nil, uint32(int32(frv.Int())+math.MinInt32))
case kindInt:
i := frv.Int()
if i < math.MinInt32 || i > math.MaxInt32 {
return nil, fmt.Errorf("%w: int value %d does not fit in int32", ErrParam, i)
}
buf = binary.BigEndian.AppendUint32(nil, uint32(int32(i)+math.MinInt32))
case kindInt64:
buf = binary.BigEndian.AppendUint64(nil, uint64(frv.Int()+math.MinInt64))
case kindUint8:
buf = []byte{byte(frv.Uint())}
case kindUint16:
buf = binary.BigEndian.AppendUint16(nil, uint16(frv.Uint()))
case kindUint32:
buf = binary.BigEndian.AppendUint32(nil, uint32(frv.Uint()))
case kindUint:
i := frv.Uint()
if i > math.MaxUint32 {
return nil, fmt.Errorf("%w: uint value %d does not fit in uint32", ErrParam, i)
}
buf = binary.BigEndian.AppendUint32(nil, uint32(i))
case kindUint64:
buf = binary.BigEndian.AppendUint64(nil, uint64(frv.Uint()))
case kindString:
buf = []byte(frv.String())
for _, c := range buf {
if c == 0 {
return nil, fmt.Errorf("%w: string used as index key cannot have \\0", ErrParam)
}
}
buf = append(buf, 0)
case kindTime:
tm := frv.Interface().(time.Time)
buf = binary.BigEndian.AppendUint64(nil, uint64(tm.Unix()+math.MinInt64))
buf = binary.BigEndian.AppendUint32(buf, uint32(tm.Nanosecond()))
case kindSlice:
n := frv.Len()
bufs := make([][]byte, n)
for i := 0; i < n; i++ {
nbufs, err := packIndexKey(frv.Index(i))
if err != nil {
return nil, fmt.Errorf("packing element from slice field: %w", err)
}
if len(nbufs) != 1 {
return nil, fmt.Errorf("packing element from slice field resulted in multiple buffers (%d)", len(bufs))
}
bufs[i] = nbufs[0]
}
return bufs, nil
default:
return nil, fmt.Errorf("internal error: bad type %v for index", frv.Type()) // todo: should be caught when making index type
}
return [][]byte{buf}, nil
}

View File

@ -12,207 +12,324 @@ func (ft fieldType) isZero(v reflect.Value) bool {
return true
}
if ft.Ptr {
return v.IsNil()
return v.IsZero()
}
switch ft.Kind {
case kindStruct:
for _, f := range ft.Fields {
for _, f := range ft.structFields {
if !f.Type.isZero(v.FieldByIndex(f.structField.Index)) {
return false
}
}
return true
}
// Use standard IsZero otherwise, also for kindBinaryMarshal.
return v.IsZero()
}
// checkNonzero compare ofields and nfields (from previous type schema vs newly
// created type schema) for nonzero struct tag. If an existing field got a
// nonzero struct tag added, we verify that there are indeed no nonzero values
// in the database. If there are, we return ErrZero.
// We ensure nonzero constraints when opening a database. An updated schema, with
// added nonzero constraints, can mean all records have to be checked. With cyclic
// types, we have to take care not to recurse, and for efficiency we want to only
// check fields/types that are affected. Steps:
//
// - Go through each field of the struct, and recurse into the field types,
// gathering the types and newly nonzero fields.
// - Propagate the need for nonzero checks to types that reference the changed
// types.
// - By now, if there was a new nonzero constraint, the top-level type will be
// marked as needing a check, so we'll read through all records and check all the
// immediate newly nonzero fields of a type, and recurse into fields of types that
// are marked as needing a check.
// nonzeroCheckType is tracked per reflect.Type that has been analysed (always the
// non-pointer type, i.e. a pointer is dereferenced). These types can be cyclic. We
// gather them for all types involved, including map and slice types and basic
// types, but "newlyNonzero" and "fields" will only be set for structs.
type nonzeroCheckType struct {
needsCheck bool
newlyNonzero []field // Fields in this type that have a new nonzero constraint themselves.
fields []field // All fields in a struct type.
// Types that reference this type. Used to propagate needsCheck to the top.
referencedBy map[reflect.Type]struct{}
}
func (ct *nonzeroCheckType) markRefBy(t reflect.Type) {
if t != nil {
ct.referencedBy[t] = struct{}{}
}
}
// checkNonzero compares ofields (optional previous type schema) and nfields (new
// type schema) for nonzero struct tags. If an existing field has a new nonzero
// constraint, we verify that there are indeed no nonzero values in the existing
// records. If there are, we return ErrZero. checkNonzero looks at (potentially
// cyclic) types referenced by fields.
func (tx *Tx) checkNonzero(st storeType, tv *typeVersion, ofields, nfields []field) error {
// First we gather paths that we need to check, so we can later simply
// execute those steps on all data we need to read.
paths := &follows{}
next:
for _, f := range nfields {
for _, of := range ofields {
if f.Name == of.Name {
err := f.checkNonzeroGather(&of, paths)
if err != nil {
return err
}
continue next
}
}
if err := f.checkNonzeroGather(nil, paths); err != nil {
return err
// Gather all new nonzero constraints on fields.
m := map[reflect.Type]*nonzeroCheckType{}
nonzeroCheckGather(m, st.Type, nil, ofields, nfields)
// Propagate the need for a check on all types due to a referenced type having a
// new nonzero constraint.
// todo: this can probably be done more elegantly, with fewer graph walks...
for t, ct := range m {
if ct.needsCheck {
nonzeroCheckPropagate(m, t, t, ct)
}
}
if len(paths.paths) == 0 {
// Common case, not reading all data.
// If needsCheck wasn't propagated to the top-level, there was no new nonzero
// constraint, and we're not going to read all the data. This is the common case
// when opening a database.
if !m[st.Type].needsCheck {
return nil
}
// Finally actually do the checks.
// todo: if there are only top-level fields to check, and we have an index, we can use the index check this without reading all data.
return tx.checkNonzeroPaths(st, tv, paths.paths)
// Read through all data, and check the new nonzero constraint.
// todo optimize: if there are only top-level fields to check, and we have indices on those fields, we can use the index to check this without reading all data.
return checkNonzeroRecords(tx, st, tv, m)
}
type follow struct {
mapKey, mapValue bool
field field
}
type follows struct {
current []follow
paths [][]follow
}
func (f *follows) push(ff follow) {
f.current = append(f.current, ff)
}
func (f *follows) pop() {
f.current = f.current[:len(f.current)-1]
}
func (f *follows) add() {
f.paths = append(f.paths, append([]follow{}, f.current...))
}
func (f field) checkNonzeroGather(of *field, paths *follows) error {
paths.push(follow{field: f})
defer paths.pop()
if f.Nonzero && (of == nil || !of.Nonzero) {
paths.add()
// Walk down fields, gathering their types (including those they reference), and
// marking needsCheck if any of a type's immediate field has a new nonzero
// constraint. The need for a check is not propagated to referencing types by this
// function.
func nonzeroCheckGather(m map[reflect.Type]*nonzeroCheckType, t, refBy reflect.Type, ofields, nfields []field) {
ct := m[t]
if ct != nil {
// Already gathered, don't recurse, for cyclic types.
ct.markRefBy(refBy)
return
}
if of != nil {
return f.Type.checkNonzeroGather(of.Type, paths)
ct = &nonzeroCheckType{
fields: nfields,
referencedBy: map[reflect.Type]struct{}{},
}
return nil
}
ct.markRefBy(refBy)
m[t] = ct
func (ft fieldType) checkNonzeroGather(oft fieldType, paths *follows) error {
switch ft.Kind {
case kindMap:
paths.push(follow{mapKey: true})
if err := ft.MapKey.checkNonzeroGather(*oft.MapKey, paths); err != nil {
return err
}
paths.pop()
paths.push(follow{mapValue: true})
if err := ft.MapValue.checkNonzeroGather(*oft.MapValue, paths); err != nil {
return err
}
paths.pop()
case kindSlice:
err := ft.List.checkNonzeroGather(*oft.List, paths)
if err != nil {
return err
}
case kindStruct:
next:
for _, ff := range ft.Fields {
for _, off := range oft.Fields {
if ff.Name == off.Name {
err := ff.checkNonzeroGather(&off, paths)
if err != nil {
return err
}
continue next
for _, f := range nfields {
// Check if this field is newly nonzero.
var of *field
for i := range ofields {
if f.Name == ofields[i].Name {
of = &ofields[i]
// Compare with existing field.
if f.Nonzero && !of.Nonzero {
ct.newlyNonzero = append(ct.newlyNonzero, f)
ct.needsCheck = true
}
}
err := ff.checkNonzeroGather(nil, paths)
if err != nil {
return err
break
}
}
// Check if this is a new field entirely, with nonzero constraint.
if of == nil && f.Nonzero {
ct.newlyNonzero = append(ct.newlyNonzero, f)
ct.needsCheck = true
}
// Descend into referenced types, adding references back to this type.
var oft *fieldType
if of != nil {
oft = &of.Type
}
ft := f.structField.Type
nonzeroCheckGatherFieldType(m, ft, t, oft, f.Type)
}
return nil
}
// checkNonzero reads through all records of a type, and checks that the fields
// gather new nonzero constraints for type "t", which is referenced by "refBy" (and
// will be marked as such). type "t" is described by "nft" and optionally
// previously by "oft".
func nonzeroCheckGatherFieldType(m map[reflect.Type]*nonzeroCheckType, t, refBy reflect.Type, oft *fieldType, nft fieldType) {
// If this is a pointer type, dereference the reflect type.
if nft.Ptr {
t = t.Elem()
}
if nft.Kind == kindStruct {
var fofields []field
if oft != nil {
fofields = oft.structFields
}
nonzeroCheckGather(m, t, refBy, fofields, nft.structFields)
}
// Mark this type as gathered, so we don't process it again if we recurse.
ct := m[t]
if ct != nil {
ct.markRefBy(refBy)
return
}
ct = &nonzeroCheckType{
fields: nft.structFields,
referencedBy: map[reflect.Type]struct{}{},
}
ct.markRefBy(refBy)
m[t] = ct
switch nft.Kind {
case kindMap:
var koft, voft *fieldType
if oft != nil {
koft = oft.MapKey
voft = oft.MapValue
}
nonzeroCheckGatherFieldType(m, t.Key(), t, koft, *nft.MapKey)
nonzeroCheckGatherFieldType(m, t.Elem(), t, voft, *nft.MapValue)
case kindSlice:
var loft *fieldType
if oft != nil {
loft = oft.ListElem
}
nonzeroCheckGatherFieldType(m, t.Elem(), t, loft, *nft.ListElem)
case kindArray:
var loft *fieldType
if oft != nil {
loft = oft.ListElem
}
nonzeroCheckGatherFieldType(m, t.Elem(), t, loft, *nft.ListElem)
}
}
// Propagate that type "t" is affected by a new nonzero constrained and needs to be
// checked. The types referencing "t" are in ct.referencedBy. "origt" is the
// starting type for this propagation.
func nonzeroCheckPropagate(m map[reflect.Type]*nonzeroCheckType, origt, t reflect.Type, ct *nonzeroCheckType) {
for rt := range ct.referencedBy {
if rt == origt {
continue // End recursion.
}
m[rt].needsCheck = true
nonzeroCheckPropagate(m, origt, rt, m[rt])
}
}
// checkNonzeroPaths reads through all records of a type, and checks that the fields
// indicated by paths are nonzero. If not, ErrZero is returned.
func (tx *Tx) checkNonzeroPaths(st storeType, tv *typeVersion, paths [][]follow) error {
func checkNonzeroRecords(tx *Tx, st storeType, tv *typeVersion, m map[reflect.Type]*nonzeroCheckType) error {
rb, err := tx.recordsBucket(st.Current.name, st.Current.fillPercent)
if err != nil {
return err
}
ctxDone := tx.ctx.Done()
return rb.ForEach(func(bk, bv []byte) error {
tx.stats.Records.Cursor++
select {
case <-ctxDone:
return tx.ctx.Err()
default:
}
// todo optimize: instead of parsing the full record, use the fieldmap to see if the value is nonzero.
rv, err := st.parseNew(bk, bv)
if err != nil {
return err
}
// todo optimization: instead of parsing the full record, use the fieldmap to see if the value is nonzero.
for _, path := range paths {
frv := rv.FieldByIndex(path[0].field.structField.Index)
if err := path[0].field.checkNonzero(frv, path[1:]); err != nil {
return err
}
}
return nil
ct := m[st.Type]
return checkNonzeroFields(m, st.Type, ct.newlyNonzero, ct.fields, rv)
})
}
func (f field) checkNonzero(rv reflect.Value, path []follow) error {
if len(path) == 0 {
if !f.Nonzero {
return fmt.Errorf("internal error: checkNonzero: expected field to have Nonzero set")
}
if f.Type.isZero(rv) {
// checkNonzeroFields checks that the newly nonzero fields of a struct value are
// indeed nonzero, and walks down referenced types, checking the constraint.
func checkNonzeroFields(m map[reflect.Type]*nonzeroCheckType, t reflect.Type, newlyNonzero, fields []field, rv reflect.Value) error {
// Check the newly nonzero fields.
for _, f := range newlyNonzero {
frv := rv.FieldByIndex(f.structField.Index)
if f.Type.isZero(frv) {
return fmt.Errorf("%w: field %q", ErrZero, f.Name)
}
return nil
}
return f.Type.checkNonzero(rv, path)
}
func (ft fieldType) checkNonzero(rv reflect.Value, path []follow) error {
switch ft.Kind {
case kindMap:
follow := path[0]
path = path[1:]
key := follow.mapKey
if !key && !follow.mapValue {
return fmt.Errorf("internal error: following map, expected mapKey or mapValue, got %#v", follow)
}
iter := rv.MapRange()
for iter.Next() {
var err error
if key {
err = ft.MapKey.checkNonzero(iter.Key(), path)
} else {
err = ft.MapValue.checkNonzero(iter.Value(), path)
}
if err != nil {
// Descend into referenced types.
for _, f := range fields {
switch f.Type.Kind {
case kindMap, kindSlice, kindStruct, kindArray:
ft := f.structField.Type
if err := checkNonzeroFieldType(m, f.Type, ft, rv.FieldByIndex(f.structField.Index)); err != nil {
return err
}
}
}
return nil
}
// checkNonzeroFieldType walks down a value, and checks that its (struct) types
// don't violate nonzero constraints.
// Does not check whether the value itself is nonzero. If required, that has
// already been checked.
func checkNonzeroFieldType(m map[reflect.Type]*nonzeroCheckType, ft fieldType, t reflect.Type, rv reflect.Value) error {
if ft.Ptr {
t = t.Elem()
}
if !m[t].needsCheck {
return nil
}
if ft.Ptr && rv.IsZero() {
return nil
}
if ft.Ptr {
rv = rv.Elem()
}
unptr := func(t reflect.Type, ptr bool) reflect.Type {
if ptr {
return t.Elem()
}
return t
}
switch ft.Kind {
case kindMap:
kt := t.Key()
vt := t.Elem()
checkKey := m[unptr(kt, ft.MapKey.Ptr)].needsCheck
checkValue := m[unptr(vt, ft.MapValue.Ptr)].needsCheck
iter := rv.MapRange()
for iter.Next() {
if checkKey {
if err := checkNonzeroFieldType(m, *ft.MapKey, kt, iter.Key()); err != nil {
return err
}
}
if checkValue {
if err := checkNonzeroFieldType(m, *ft.MapValue, vt, iter.Value()); err != nil {
return err
}
}
}
case kindSlice:
et := t.Elem()
n := rv.Len()
for i := 0; i < n; i++ {
if err := ft.List.checkNonzero(rv.Index(i), path); err != nil {
if err := checkNonzeroFieldType(m, *ft.ListElem, et, rv.Index(i)); err != nil {
return err
}
}
case kindArray:
et := t.Elem()
n := ft.ArrayLength
for i := 0; i < n; i++ {
if err := checkNonzeroFieldType(m, *ft.ListElem, et, rv.Index(i)); err != nil {
return err
}
}
case kindStruct:
follow := path[0]
path = path[1:]
frv := rv.FieldByIndex(follow.field.structField.Index)
if err := follow.field.checkNonzero(frv, path); err != nil {
ct := m[t]
if err := checkNonzeroFields(m, t, ct.newlyNonzero, ct.fields, rv); err != nil {
return err
}
default:
return fmt.Errorf("internal error: checkNonzero with non-empty path, but kind %v", ft.Kind)
}
return nil
}

View File

@ -17,14 +17,14 @@ type fieldmap struct {
buf []byte // Bitmap, we write the next 0/1 at bit n.
n int // Fields seen so far.
offset int // In final output, we write buf back after finish. Only relevant for packing.
Errorf func(format string, args ...any)
errorf func(format string, args ...any)
}
// add bit to fieldmap indicating if the field is nonzero.
func (f *fieldmap) Field(nonzero bool) {
o := f.n / 8
if f.n >= f.max {
f.Errorf("internal error: too many fields, max %d", f.max)
f.errorf("internal error: too many fields, max %d", f.max)
}
if nonzero {
f.buf[o] |= 1 << (7 - f.n%8)
@ -46,7 +46,7 @@ type packer struct {
popped []*fieldmap // Completed fieldmaps, to be written back during finish.
}
func (p *packer) Errorf(format string, args ...any) {
func (p *packer) errorf(format string, args ...any) {
panic(packErr{fmt.Errorf(format, args...)})
}
@ -54,7 +54,7 @@ func (p *packer) Errorf(format string, args ...any) {
func (p *packer) PushFieldmap(n int) {
p.fieldmaps = append(p.fieldmaps, p.fieldmap)
buf := make([]byte, (n+7)/8)
p.fieldmap = &fieldmap{max: n, buf: buf, offset: p.offset, Errorf: p.Errorf}
p.fieldmap = &fieldmap{max: n, buf: buf, offset: p.offset, errorf: p.errorf}
p.Write(buf) // Updates offset. Write errors cause panic.
}
@ -62,7 +62,7 @@ func (p *packer) PushFieldmap(n int) {
// bytes during finish.
func (p *packer) PopFieldmap() {
if p.fieldmap.n != p.fieldmap.max {
p.Errorf("internal error: fieldmap n %d != max %d", p.fieldmap.n, p.fieldmap.max)
p.errorf("internal error: fieldmap n %d != max %d", p.fieldmap.n, p.fieldmap.max)
}
p.popped = append(p.popped, p.fieldmap)
p.fieldmap = p.fieldmaps[len(p.fieldmaps)-1]
@ -73,7 +73,7 @@ func (p *packer) PopFieldmap() {
// returning the final bytes representation of this record.
func (p *packer) Finish() []byte {
if p.fieldmap != nil {
p.Errorf("internal error: leftover fieldmap during finish")
p.errorf("internal error: leftover fieldmap during finish")
}
buf := p.b.Bytes()
for _, f := range p.popped {
@ -90,7 +90,7 @@ func (p *packer) Field(nonzero bool) {
func (p *packer) Write(buf []byte) (int, error) {
n, err := p.b.Write(buf)
if err != nil {
p.Errorf("write: %w", err)
p.errorf("write: %w", err)
}
if n > 0 {
p.offset += n
@ -149,11 +149,12 @@ func (tv typeVersion) pack(p *packer, rv reflect.Value) {
nrv := rv.FieldByIndex(f.structField.Index)
if f.Type.isZero(nrv) {
if f.Nonzero {
p.Errorf("%w: %q", ErrZero, f.Name)
p.errorf("%w: %q", ErrZero, f.Name)
}
p.Field(false)
// Pretend to pack to get the nonzero checks.
if nrv.IsValid() && (nrv.Kind() != reflect.Ptr || !nrv.IsNil()) {
// todo: we should be able to do nonzero-check without pretending to pack.
if nrv.IsValid() && (nrv.Kind() != reflect.Ptr || !nrv.IsZero()) {
f.Type.pack(&packer{b: &bytes.Buffer{}}, nrv)
}
} else {
@ -176,7 +177,7 @@ func (ft fieldType) pack(p *packer, rv reflect.Value) {
v := rv
buf, err := v.Interface().(encoding.BinaryMarshaler).MarshalBinary()
if err != nil {
p.Errorf("marshalbinary: %w", err)
p.errorf("marshalbinary: %w", err)
}
p.AddBytes(buf)
case kindBool:
@ -192,7 +193,7 @@ func (ft fieldType) pack(p *packer, rv reflect.Value) {
case kindInt:
v := rv.Int()
if v < math.MinInt32 || v > math.MaxInt32 {
p.Errorf("%w: int %d does not fit in int32", ErrParam, v)
p.errorf("%w: int %d does not fit in int32", ErrParam, v)
}
p.Varint(v)
case kindInt8, kindInt16, kindInt32, kindInt64:
@ -202,7 +203,7 @@ func (ft fieldType) pack(p *packer, rv reflect.Value) {
case kindUint:
v := rv.Uint()
if v > math.MaxUint32 {
p.Errorf("%w: uint %d does not fit in uint32", ErrParam, v)
p.errorf("%w: uint %d does not fit in uint32", ErrParam, v)
}
p.Uvarint(v)
case kindFloat32:
@ -214,7 +215,7 @@ func (ft fieldType) pack(p *packer, rv reflect.Value) {
case kindTime:
buf, err := rv.Interface().(time.Time).MarshalBinary()
if err != nil {
p.Errorf("%w: pack time: %s", ErrParam, err)
p.errorf("%w: pack time: %s", ErrParam, err)
}
p.AddBytes(buf)
case kindSlice:
@ -223,15 +224,32 @@ func (ft fieldType) pack(p *packer, rv reflect.Value) {
p.PushFieldmap(n)
for i := 0; i < n; i++ {
nrv := rv.Index(i)
if ft.List.isZero(nrv) {
if ft.ListElem.isZero(nrv) {
p.Field(false)
// Pretend to pack to get the nonzero checks of the element.
if nrv.IsValid() && (nrv.Kind() != reflect.Ptr || !nrv.IsNil()) {
ft.List.pack(&packer{b: &bytes.Buffer{}}, nrv)
if nrv.IsValid() && (nrv.Kind() != reflect.Ptr || !nrv.IsZero()) {
ft.ListElem.pack(&packer{b: &bytes.Buffer{}}, nrv)
}
} else {
p.Field(true)
ft.List.pack(p, nrv)
ft.ListElem.pack(p, nrv)
}
}
p.PopFieldmap()
case kindArray:
n := ft.ArrayLength
p.PushFieldmap(n)
for i := 0; i < n; i++ {
nrv := rv.Index(i)
if ft.ListElem.isZero(nrv) {
p.Field(false)
// Pretend to pack to get the nonzero checks of the element.
if nrv.IsValid() && (nrv.Kind() != reflect.Ptr || !nrv.IsZero()) {
ft.ListElem.pack(&packer{b: &bytes.Buffer{}}, nrv)
}
} else {
p.Field(true)
ft.ListElem.pack(p, nrv)
}
}
p.PopFieldmap()
@ -249,7 +267,7 @@ func (ft fieldType) pack(p *packer, rv reflect.Value) {
if ft.MapValue.isZero(v) {
p.Field(false)
// Pretend to pack to get the nonzero checks of the key type.
if v.IsValid() && (v.Kind() != reflect.Ptr || !v.IsNil()) {
if v.IsValid() && (v.Kind() != reflect.Ptr || !v.IsZero()) {
ft.MapValue.pack(&packer{b: &bytes.Buffer{}}, v)
}
} else {
@ -259,16 +277,16 @@ func (ft fieldType) pack(p *packer, rv reflect.Value) {
}
p.PopFieldmap()
case kindStruct:
p.PushFieldmap(len(ft.Fields))
for _, f := range ft.Fields {
p.PushFieldmap(len(ft.structFields))
for _, f := range ft.structFields {
nrv := rv.FieldByIndex(f.structField.Index)
if f.Type.isZero(nrv) {
if f.Nonzero {
p.Errorf("%w: %q", ErrZero, f.Name)
p.errorf("%w: %q", ErrZero, f.Name)
}
p.Field(false)
// Pretend to pack to get the nonzero checks.
if nrv.IsValid() && (nrv.Kind() != reflect.Ptr || !nrv.IsNil()) {
if nrv.IsValid() && (nrv.Kind() != reflect.Ptr || !nrv.IsZero()) {
f.Type.pack(&packer{b: &bytes.Buffer{}}, nrv)
}
} else {
@ -278,6 +296,6 @@ func (ft fieldType) pack(p *packer, rv reflect.Value) {
}
p.PopFieldmap()
default:
p.Errorf("internal error: unhandled field type") // should be prevented when registering type
p.errorf("internal error: unhandled field type") // should be prevented when registering type
}
}

View File

@ -119,7 +119,7 @@ func (st storeType) parse(rv reflect.Value, buf []byte) (rerr error) {
tv.parse(p, rv)
if len(p.buf) != 0 {
return fmt.Errorf("%w: leftover data after parsing", ErrStore)
return fmt.Errorf("%w: leftover data after parsing (%d, %x %q)", ErrStore, len(p.buf), p.buf, p.buf)
}
return nil
@ -173,7 +173,8 @@ func (tv typeVersion) parse(p *parser, rv reflect.Value) {
// parse a nonzero fieldType.
func (ft fieldType) parse(p *parser, rv reflect.Value) {
// Because we allow schema changes from ptr to nonptr, rv can be a pointer or direct value regardless of ft.Ptr.
// Because we allow schema changes from ptr to nonptr, rv can be a
// pointer or direct value regardless of ft.Ptr.
if rv.Kind() == reflect.Ptr {
nrv := reflect.New(rv.Type().Elem())
rv.Set(nrv)
@ -239,10 +240,18 @@ func (ft fieldType) parse(p *parser, rv reflect.Value) {
slc := reflect.MakeSlice(rv.Type(), n, n)
for i := 0; i < int(n); i++ {
if fm.Nonzero(i) {
ft.List.parse(p, slc.Index(i))
ft.ListElem.parse(p, slc.Index(i))
}
}
rv.Set(slc)
case kindArray:
n := ft.ArrayLength
fm := p.Fieldmap(n)
for i := 0; i < n; i++ {
if fm.Nonzero(i) {
ft.ListElem.parse(p, rv.Index(i))
}
}
case kindMap:
un := p.Uvarint()
n := p.checkInt(un)
@ -259,11 +268,13 @@ func (ft fieldType) parse(p *parser, rv reflect.Value) {
}
rv.Set(mp)
case kindStruct:
fm := p.Fieldmap(len(ft.Fields))
fm := p.Fieldmap(len(ft.structFields))
strct := reflect.New(rv.Type()).Elem()
for i, f := range ft.Fields {
for i, f := range ft.structFields {
if f.structField.Type == nil {
f.Type.skip(p)
if fm.Nonzero(i) {
f.Type.skip(p)
}
continue
}
if fm.Nonzero(i) {
@ -303,7 +314,15 @@ func (ft fieldType) skip(p *parser) {
fm := p.Fieldmap(n)
for i := 0; i < n; i++ {
if fm.Nonzero(i) {
ft.List.skip(p)
ft.ListElem.skip(p)
}
}
case kindArray:
n := ft.ArrayLength
fm := p.Fieldmap(n)
for i := 0; i < n; i++ {
if fm.Nonzero(i) {
ft.ListElem.skip(p)
}
}
case kindMap:
@ -317,8 +336,8 @@ func (ft fieldType) skip(p *parser) {
}
}
case kindStruct:
fm := p.Fieldmap(len(ft.Fields))
for i, f := range ft.Fields {
fm := p.Fieldmap(len(ft.structFields))
for i, f := range ft.structFields {
if fm.Nonzero(i) {
f.Type.skip(p)
}

View File

@ -7,6 +7,12 @@ import (
"sort"
)
// todo: cache query plans? perhaps explicitly through something like a prepared statement. the current plan includes values in keys,start,stop, which would need to be calculated for each execution. should benchmark time spent in planning first.
// todo optimize: handle multiple sorts with multikey indices if they match
// todo optimize: combine multiple filter (not)in/equals calls for same field
// todo optimize: efficiently pack booleans in an index (eg for Message.Flags), and use it to query.
// todo optimize: do multiple range scans if necessary when we can use an index for an equal check with multiple values.
// Plan represents a plan to execute a query, possibly using a simple/quick
// bucket "get" or cursor scan (forward/backward) on either the records or an
// index.
@ -31,9 +37,9 @@ type plan[T any] struct {
startInclusive bool // If the start and stop values are inclusive or exclusive.
stopInclusive bool
// Filter we need to apply on after retrieving the record. If all
// original filters from a query were handled by "keys" above, or by a
// range scan, this field is empty.
// Filter we need to apply after retrieving the record. If all original filters
// from a query were handled by "keys" above, or by a range scan, this field is
// empty.
filters []filter[T]
// Orders we need to apply after first retrieving all records. As with
@ -73,8 +79,7 @@ func (q *Query[T]) selectPlan() (*plan[T], error) {
// filter per field. If there are multiple, we would use the last one.
// That's okay, we'll filter records out when we execute the leftover
// filters. Probably not common.
// This is common for filterEqual and filterIn on
// fields that have a unique index.
// This is common for filterEqual and filterIn on fields that have a unique index.
equalsIn := map[string]*filter[T]{}
for i := range q.xfilters {
ff := &q.xfilters[i]
@ -98,8 +103,8 @@ indices:
}
}
// Calculate all keys that we need to retrieve from the index.
// todo optimization: if there is a sort involving these fields, we could do the sorting before fetching data.
// todo optimization: we can generate the keys on demand, will help when limit is in use: we are not generating all keys.
// todo optimize: if there is a sort involving these fields, we could do the sorting before fetching data.
// todo optimize: we can generate the keys on demand, will help when limit is in use: we are not generating all keys.
var keys [][]byte
var skipFilters []*filter[T] // Filters to remove from the full list because they are handled by quering the index.
for i, f := range idx.Fields {
@ -116,12 +121,15 @@ indices:
}
fekeys := make([][]byte, len(rvalues))
for j, fv := range rvalues {
key, _, err := packIndexKeys([]reflect.Value{fv}, nil)
ikl, err := packIndexKeys([]reflect.Value{fv}, nil)
if err != nil {
q.error(err)
return nil, err
}
fekeys[j] = key
if len(ikl) != 1 {
return nil, fmt.Errorf("internal error: multiple index keys for unique index (%d)", len(ikl))
}
fekeys[j] = ikl[0].pre
}
if i == 0 {
keys = fekeys
@ -148,22 +156,26 @@ indices:
}
// Try all other indices. We treat them all as non-unique indices now.
// We want to use the one with as many "equal" prefix fields as
// possible. Then we hope to use a scan on the remaining, either
// because of a filterCompare, or for an ordering. If there is a limit,
// orderings are preferred over compares.
// We want to use the one with as many "equal" or "inslice" field filters as
// possible. Then we hope to use a scan on the remaining, either because of a
// filterCompare, or for an ordering. If there is a limit, orderings are preferred
// over compares.
equals := map[string]*filter[T]{}
inslices := map[string]*filter[T]{}
for i := range q.xfilters {
ff := &q.xfilters[i]
switch f := (*ff).(type) {
case filterEqual[T]:
equals[f.field.Name] = ff
case filterInSlice[T]:
inslices[f.field.Name] = ff
}
}
// We are going to generate new plans, and keep the new one if it is better than what we have.
// We are going to generate new plans, and keep the new one if it is better than
// what we have so far.
var p *plan[T]
var nequals int
var nexact int
var nrange int
var ordered bool
@ -181,18 +193,27 @@ indices:
}
} else {
packKeys = func(l []reflect.Value) ([]byte, error) {
key, _, err := packIndexKeys(l, nil)
return key, err
ikl, err := packIndexKeys(l, nil)
if err != nil {
return nil, err
}
if err == nil && len(ikl) != 1 {
return nil, fmt.Errorf("internal error: multiple index keys for exact filters, %v", ikl)
}
return ikl[0].pre, nil
}
}
var neq = 0
var nex = 0
// log.Printf("idx %v", idx)
var skipFilters []*filter[T]
for _, f := range idx.Fields {
if ff, ok := equals[f.Name]; ok {
skipFilters = append(skipFilters, ff)
neq++
if equals[f.Name] != nil && f.Type.Kind != kindSlice {
skipFilters = append(skipFilters, equals[f.Name])
nex++
} else if inslices[f.Name] != nil && f.Type.Kind == kindSlice {
skipFilters = append(skipFilters, inslices[f.Name])
nex++
} else {
break
}
@ -203,8 +224,8 @@ indices:
var nrng int
var order *order
orders := q.xorders
if neq < len(idx.Fields) {
nf := idx.Fields[neq]
if nex < len(idx.Fields) {
nf := idx.Fields[nex]
for i := range q.xfilters {
ff := &q.xfilters[i]
switch f := (*ff).(type) {
@ -230,7 +251,7 @@ indices:
}
// See if it can be used for ordering.
// todo optimization: we could use multiple orders
// todo optimize: we could use multiple orders
if len(orders) > 0 && orders[0].field.Name == nf.Name {
order = &orders[0]
orders = orders[1:]
@ -238,23 +259,29 @@ indices:
}
// See if this is better than what we had.
if !(neq > nequals || (neq == nequals && (nrng > nrange || order != nil && !ordered && (q.xlimit > 0 || nrng == nrange)))) {
// log.Printf("plan not better, neq %d, nrng %d, limit %d, order %v ordered %v", neq, nrng, q.limit, order, ordered)
if !(nex > nexact || (nex == nexact && (nrng > nrange || order != nil && !ordered && (q.xlimit > 0 || nrng == nrange)))) {
// log.Printf("plan not better, nex %d, nrng %d, limit %d, order %v ordered %v", nex, nrng, q.limit, order, ordered)
return nil
}
nequals = neq
nexact = nex
nrange = nrng
ordered = order != nil
// Calculate the prefix key.
var kvalues []reflect.Value
for i := 0; i < neq; i++ {
for i := 0; i < nex; i++ {
f := idx.Fields[i]
kvalues = append(kvalues, (*equals[f.Name]).(filterEqual[T]).rvalue)
var v reflect.Value
if f.Type.Kind != kindSlice {
v = (*equals[f.Name]).(filterEqual[T]).rvalue
} else {
v = (*inslices[f.Name]).(filterInSlice[T]).rvalue
}
kvalues = append(kvalues, v)
}
var key []byte
var err error
if neq > 0 {
if nex > 0 {
key, err = packKeys(kvalues)
if err != nil {
return err

View File

@ -1,6 +1,8 @@
package bstore
import (
"context"
"errors"
"fmt"
"reflect"
)
@ -23,12 +25,14 @@ import (
//
// A Query is not safe for concurrent use.
type Query[T any] struct {
st storeType // Of T.
pkType reflect.Type // Shortcut for st.Current.Fields[0].
xtx *Tx // If nil, a new transaction is automatically created from db. Using a tx goes through tx() one exists.
xdb *DB // If not nil, xtx was created to execute the operation and is when the operation finishes (also on error).
err error // If set, returned by operations. For indicating failed filters, or that an operation has finished.
xfilterIDs *filterIDs[T] // Kept separately from filters because these filters make us use the PK without further index planning.
ctx context.Context
ctxDone <-chan struct{} // ctx.Done(), kept here for fast access.
st storeType // Of T.
pkType reflect.Type // Shortcut for st.Current.Fields[0].
xtx *Tx // If nil, a new transaction is automatically created from db. Using a tx goes through tx() one exists.
xdb *DB // If not nil, xtx was created to execute the operation and is when the operation finishes (also on error).
err error // If set, returned by operations. For indicating failed filters, or that an operation has finished.
xfilterIDs *filterIDs[T] // Kept separately from filters because these filters make us use the PK without further index planning.
xfilters []filter[T]
xorders []order
@ -99,6 +103,14 @@ type filterNotIn[T any] struct {
func (filterNotIn[T]) filter() {}
// For matching one of the values in a field that is a slice of the same type.
type filterInSlice[T any] struct {
field field // Of field type, a slice.
rvalue reflect.Value
}
func (filterInSlice[T]) filter() {}
type compareOp byte
const (
@ -158,23 +170,37 @@ func (p *pair[T]) Value(e *exec[T]) (T, error) {
// QueryDB returns a new Query for type T. When an operation on the query is
// executed, a read-only/writable transaction is created as appropriate for the
// operation.
func QueryDB[T any](db *DB) *Query[T] {
func QueryDB[T any](ctx context.Context, db *DB) *Query[T] {
// We lock db for storeTypes. We keep it locked until Query is done.
db.typesMutex.RLock()
q := &Query[T]{xdb: db}
q.init(db)
q.init(ctx, db)
return q
}
// Query returns a new Query that operates on type T using transaction tx.
// QueryTx returns a new Query that operates on type T using transaction tx.
// The context of the transaction is used for the query.
func QueryTx[T any](tx *Tx) *Query[T] {
// note: Since we are in a transaction, we already hold an rlock on the
// db types.
q := &Query[T]{xtx: tx}
q.init(tx.db)
if tx.err != nil {
q.err = tx.err
return q
}
q.init(tx.ctx, tx.db)
return q
}
func (q *Query[T]) ctxErr() error {
select {
case <-q.ctxDone:
return q.ctx.Err()
default:
return nil
}
}
// Stats returns the current statistics for this query. When a query finishes,
// its stats are added to those of its transaction. When a transaction
// finishes, its stats are added to those of its database.
@ -182,7 +208,7 @@ func (q *Query[T]) Stats() Stats {
return q.stats
}
func (q *Query[T]) init(db *DB) {
func (q *Query[T]) init(ctx context.Context, db *DB) {
var v T
t := reflect.TypeOf(v)
if t.Kind() != reflect.Struct {
@ -194,6 +220,11 @@ func (q *Query[T]) init(db *DB) {
q.stats.LastType = q.st.Name
q.pkType = q.st.Current.Fields[0].structField.Type
}
q.ctx = ctx
q.ctxDone = ctx.Done()
if err := q.ctxErr(); q.err == nil && err != nil {
q.err = err
}
}
func (q *Query[T]) tx(write bool) (*Tx, error) {
@ -207,7 +238,7 @@ func (q *Query[T]) tx(write bool) (*Tx, error) {
q.error(err)
return nil, q.err
}
q.xtx = &Tx{db: q.xdb, btx: tx}
q.xtx = &Tx{ctx: q.ctx, db: q.xdb, btx: tx}
if write {
q.stats.Writes++
} else {
@ -308,6 +339,11 @@ func (q *Query[T]) checkErr() bool {
// Probably the result of using a Query zero value.
q.errorf("%w: invalid query, use QueryDB or QueryTx to make a query", ErrParam)
}
if q.err == nil {
if err := q.ctxErr(); err != nil {
q.err = err
}
}
return q.err == nil
}
@ -365,7 +401,10 @@ func (q *Query[T]) foreachKey(write, value bool, fn func(bk []byte, v T) error)
return nil
} else if err != nil {
return err
} else if err := fn(bk, v); err != nil {
} else if err := fn(bk, v); err == StopForEach {
q.error(ErrFinished)
return nil
} else if err != nil {
q.error(err)
return err
}
@ -436,14 +475,14 @@ var convertFieldKinds = map[convertKinds]struct{}{
// Check type of value for field and return a reflect value that can directly be set on the field.
// If the field is a pointer, we allow non-pointers and convert them.
// We require value to be of a type that can be converted without loss of precision to the type of field.
func (q *Query[T]) prepareValue(fname string, ft fieldType, sf reflect.StructField, rv reflect.Value) (reflect.Value, bool) {
func (q *Query[T]) prepareValue(fname string, ft fieldType, st reflect.Type, rv reflect.Value) (reflect.Value, bool) {
if !rv.IsValid() {
q.errorf("%w: invalid value", ErrParam)
return rv, false
}
// Quick check first.
t := rv.Type()
if t == sf.Type {
if t == st {
return rv, true
}
if !ft.Ptr && rv.Kind() == reflect.Ptr {
@ -461,14 +500,14 @@ func (q *Query[T]) prepareValue(fname string, ft fieldType, sf reflect.StructFie
return reflect.Value{}, false
}
if k != ft.Kind {
dt := sf.Type
dt := st
if ft.Ptr {
dt = dt.Elem()
}
rv = rv.Convert(dt)
}
if ft.Ptr && rv.Kind() != reflect.Ptr {
nv := reflect.New(sf.Type.Elem())
nv := reflect.New(st.Elem())
nv.Elem().Set(rv)
rv = nv
}
@ -654,7 +693,7 @@ func (q *Query[T]) filterEqual(fieldName string, values []any, not bool) {
return
}
if len(values) == 1 {
rv, ok := q.prepareValue(ff.Name, ff.Type, ff.structField, reflect.ValueOf(values[0]))
rv, ok := q.prepareValue(ff.Name, ff.Type, ff.structField.Type, reflect.ValueOf(values[0]))
if !ok {
return
}
@ -667,7 +706,7 @@ func (q *Query[T]) filterEqual(fieldName string, values []any, not bool) {
}
rvs := make([]reflect.Value, len(values))
for i, value := range values {
rv, ok := q.prepareValue(ff.Name, ff.Type, ff.structField, reflect.ValueOf(value))
rv, ok := q.prepareValue(ff.Name, ff.Type, ff.structField.Type, reflect.ValueOf(value))
if !ok {
return
}
@ -680,6 +719,42 @@ func (q *Query[T]) filterEqual(fieldName string, values []any, not bool) {
}
}
// FilterIn selects records that have one of values of the string slice fieldName.
//
// If fieldName has an index, it is used to select rows.
//
// Note: Value must be a compatible type for comparison with the elements of
// fieldName. Go constant numbers become ints, which are not compatible with uint
// or float types.
func (q *Query[T]) FilterIn(fieldName string, value any) *Query[T] {
if !q.checkErr() {
return q
}
ff, ok := q.lookupField(fieldName)
if !ok {
return q
}
if ff.Type.Ptr {
q.errorf("%w: cannot compare pointer values", ErrParam)
return q
}
if ff.Type.Kind != kindSlice {
q.errorf("%w: field for FilterIn must be a slice", ErrParam)
return q
}
et := ff.Type.ListElem
if et.Ptr {
q.errorf("%w: cannot compare element pointer values", ErrParam)
return q
}
rv, ok := q.prepareValue(ff.Name, *et, ff.structField.Type.Elem(), reflect.ValueOf(value))
if !ok {
return q
}
q.addFilter(filterInSlice[T]{ff, rv})
return q
}
// FilterGreater selects records that have fieldName > value.
//
// Note: Value must be a compatible type for comparison with fieldName. Go
@ -716,7 +791,7 @@ func (q *Query[T]) filterCompare(fieldName string, op compareOp, value reflect.V
q.errorf("%w: cannot compare %s", ErrParam, ff.Type.Kind)
return q
}
rv, ok := q.prepareValue(ff.Name, ff.Type, ff.structField, value)
rv, ok := q.prepareValue(ff.Name, ff.Type, ff.structField.Type, value)
if !ok {
return q
}
@ -831,7 +906,8 @@ func (q *Query[T]) gather(v T, rv reflect.Value) {
}
}
// Err returns if an error is set on the query. Can happen for invalid filters.
// Err returns if an error is set on the query. Can happen for invalid filters or
// canceled contexts.
// Finished queries return ErrFinished.
func (q *Query[T]) Err() error {
q.checkErr()
@ -979,7 +1055,7 @@ next:
if i == 0 {
return 0, fmt.Errorf("%w: cannot update primary key", ErrParam)
}
rv, ok := q.prepareValue(f.Name, f.Type, f.structField, reflect.ValueOf(value))
rv, ok := q.prepareValue(f.Name, f.Type, f.structField.Type, reflect.ValueOf(value))
if !ok {
return 0, q.err
}
@ -991,7 +1067,7 @@ next:
if ef.Name != name {
continue
}
rv, ok := q.prepareValue(ef.Name, ef.Type, ef.structField, reflect.ValueOf(value))
rv, ok := q.prepareValue(ef.Name, ef.Type, ef.structField.Type, reflect.ValueOf(value))
if !ok {
return 0, q.err
}
@ -1051,6 +1127,8 @@ func (q *Query[T]) IDs(idsptr any) (rerr error) {
return nil
}
// todo: should we have an iteration object that we can call Next and NextID on?
// Next fetches the next record, moving the cursor forward.
//
// ErrAbsent is returned if no more records match.
@ -1116,7 +1194,13 @@ func (q *Query[T]) Exists() (exists bool, rerr error) {
return err == nil, err
}
// StopForEach is an error value that, if returned by the function passed to
// Query.ForEach, stops further iterations.
var StopForEach error = errors.New("stop foreach")
// ForEach calls fn on each selected record.
// If fn returns StopForEach, ForEach stops iterating, so no longer calls fn,
// and returns nil.
func (q *Query[T]) ForEach(fn func(value T) error) (rerr error) {
defer q.finish(&rerr)
q.checkNotNext()

View File

@ -2,9 +2,12 @@ package bstore
import (
"bytes"
"context"
"encoding/binary"
"encoding/json"
"errors"
"fmt"
"os"
"reflect"
"sort"
"strconv"
@ -14,10 +17,22 @@ import (
bolt "go.etcd.io/bbolt"
)
// todo: implement changing PK type, eg to wider int. requires rewriting all values, and removing old typeVersions.
// todo: allow schema change between []byte and string?
// todo: allow more schema changes, eg int to string, bool to int or string, int to bool, perhaps even string to int/bool. and between structs and maps. would require rewriting the records.
const (
// First version.
ondiskVersion1 = 1
// With support for cyclic types, adding typeField.FieldsTypeSeq to
// define/reference types. Only used when a type has a field that references another
// struct type.
ondiskVersion2 = 2
)
var errSchemaCheck = errors.New("schema check")
// Register registers the Go types of each value in typeValues for use with the
// database. Each value must be a struct, not a pointer.
//
@ -30,7 +45,11 @@ const (
//
// Register can be called multiple times, with different types. But types that
// reference each other must be registered in the same call to Registers.
func (db *DB) Register(typeValues ...any) error {
//
// To help during development, if environment variable "bstore_schema_check" is set
// to "changed", an error is returned if there is no schema change. If it is set to
// "unchanged", an error is returned if there was a schema change.
func (db *DB) Register(ctx context.Context, typeValues ...any) error {
// We will drop/create new indices as needed. For changed indices, we drop
// and recreate. E.g. if an index becomes a unique index, or if a field in
// an index changes. These values map type and index name to their index.
@ -41,7 +60,10 @@ func (db *DB) Register(typeValues ...any) error {
ntypeversions := map[string]*typeVersion{} // New typeversions, through new types or updated versions of existing types.
registered := map[string]*storeType{} // Registered in this call.
return db.Write(func(tx *Tx) error {
checkSchemaChanged := os.Getenv("bstore_schema_check") == "changed"
checkSchemaUnchanged := os.Getenv("bstore_schema_check") == "unchanged"
return db.Write(ctx, func(tx *Tx) error {
for _, t := range typeValues {
rt := reflect.TypeOf(t)
if rt.Kind() != reflect.Struct {
@ -118,6 +140,11 @@ func (db *DB) Register(typeValues ...any) error {
// Decide if we need to add a new typeVersion to the database. I.e. a new type schema.
if st.Current == nil || !st.Current.typeEqual(*tv) {
if checkSchemaUnchanged {
return fmt.Errorf("%w: schema changed but bstore_schema_check=unchanged is set (type %v)", errSchemaCheck, st.Name)
}
checkSchemaChanged = false // After registering types, we check that it is false.
tv.Version = 1
if st.Current != nil {
tv.Version = st.Current.Version + 1
@ -127,6 +154,13 @@ func (db *DB) Register(typeValues ...any) error {
return fmt.Errorf("internal error: packing schema for type %q", tv.name)
}
// Sanity check: parse the typeVersion again, and check that we think it is equal to the typeVersion just written.
if xtv, err := parseSchema(k, v); err != nil {
return fmt.Errorf("%w: parsing generated typeVersion: %v", ErrStore, err)
} else if !xtv.typeEqual(*tv) {
return fmt.Errorf("%w: generated typeVersion not equal to itself after pack and parse", ErrStore)
}
// note: we don't track types bucket operations in stats.
if err := tb.Put(k, v); err != nil {
return fmt.Errorf("storing new schema: %w", err)
@ -202,6 +236,10 @@ func (db *DB) Register(typeValues ...any) error {
registered[st.Name] = &st
}
if checkSchemaChanged {
return fmt.Errorf("%w: schema did not change while bstore_schema_check=changed is set", errSchemaCheck)
}
// Check that referenced types exist, and make links in the referenced types.
for _, st := range registered {
tv := st.Current
@ -237,7 +275,7 @@ func (db *DB) Register(typeValues ...any) error {
// We cannot just recalculate the ReferencedBy, because the whole point is to
// detect types that are missing in this Register.
updateReferencedBy := map[string]struct{}{}
for _, ntv := range ntypeversions {
for ntname, ntv := range ntypeversions {
otv := otypeversions[ntv.name] // Can be nil, on first register.
// Look for references that were added.
@ -251,6 +289,66 @@ func (db *DB) Register(typeValues ...any) error {
if _, ok := registered[name].Current.ReferencedBy[ntv.name]; ok {
return fmt.Errorf("%w: type %q introduces reference to %q but is already marked as ReferencedBy in that type", ErrStore, ntv.name, name)
}
// Verify that the new reference does not violate the foreign key constraint.
var foundField bool
for _, f := range ntv.Fields {
for _, rname := range f.References {
if rname != name {
continue
}
foundField = true
// For newly added references, check they are valid.
b, err := tx.recordsBucket(ntname, ntv.fillPercent)
if err != nil {
return fmt.Errorf("%w: bucket for type %s with field with new reference: %v", ErrStore, ntname, err)
}
rb, err := tx.recordsBucket(name, registered[name].Current.fillPercent)
if err != nil {
return fmt.Errorf("%w: bucket for referenced type %s: %v", ErrStore, name, err)
}
nst := registered[ntname]
rv := reflect.New(nst.Type).Elem()
ctxDone := ctx.Done()
err = b.ForEach(func(bk, bv []byte) error {
tx.stats.Records.Cursor++
select {
case <-ctxDone:
return tx.ctx.Err()
default:
}
if err := nst.parse(rv, bv); err != nil {
return fmt.Errorf("parsing record for %s: %w", ntname, err)
}
frv := rv.FieldByIndex(f.structField.Index)
if frv.IsZero() {
return nil
}
rpk, err := packPK(frv)
if err != nil {
return fmt.Errorf("packing pk for referenced type %s: %w", name, err)
}
tx.stats.Records.Cursor++
if rb.Get(rpk) == nil {
return fmt.Errorf("%w: value %v not in %s", ErrReference, frv.Interface(), name)
}
return nil
})
if err != nil {
return fmt.Errorf("%w: ensuring referential integrity for newly added reference of %s.%s", err, ntname, f.Name)
}
}
}
if !foundField {
return fmt.Errorf("%w: could not find field causing newly referenced type %s in type %s", ErrStore, name, ntname)
}
// note: we are updating the previous tv's ReferencedBy, not tidy but it is safe.
registered[name].Current.ReferencedBy[ntv.name] = struct{}{}
updateReferencedBy[name] = struct{}{}
@ -271,8 +369,10 @@ func (db *DB) Register(typeValues ...any) error {
if _, ok := ntv.references[name]; ok {
continue
}
if _, ok := registered[name].Current.ReferencedBy[ntv.name]; !ok {
return fmt.Errorf("%w: previously referenced type %q not present in %q", ErrStore, ntv.name, name)
if rtv, ok := registered[name]; !ok {
return fmt.Errorf("%w: type %q formerly referenced by %q not yet registered", ErrStore, name, ntv.name)
} else if _, ok := rtv.Current.ReferencedBy[ntv.name]; !ok {
return fmt.Errorf("%w: formerly referenced type %q missing from %q", ErrStore, name, ntv.name)
}
// note: we are updating the previous tv's ReferencedBy, not tidy but it is safe.
delete(registered[name].Current.ReferencedBy, ntv.name)
@ -416,20 +516,29 @@ func (db *DB) Register(typeValues ...any) error {
}
ibkeys := make([][]key, len(idxs))
ctxDone := ctx.Done()
err = rb.ForEach(func(bk, bv []byte) error {
tx.stats.Records.Cursor++
select {
case <-ctxDone:
return tx.ctx.Err()
default:
}
rv := reflect.New(st.Type).Elem()
if err := st.parse(rv, bv); err != nil {
return fmt.Errorf("parsing record for index for %s: %w", name, err)
}
for i, idx := range idxs {
prek, ik, err := idx.packKey(rv, bk)
ikl, err := idx.packKey(rv, bk)
if err != nil {
return fmt.Errorf("creating key for %s.%s: %w", name, idx.Name, err)
}
ibkeys[i] = append(ibkeys[i], key{ik, uint16(len(prek))})
for _, ik := range ikl {
ibkeys[i] = append(ibkeys[i], key{ik.full, uint16(len(ik.pre))})
}
}
return nil
})
@ -447,14 +556,14 @@ func (db *DB) Register(typeValues ...any) error {
prev := keys[i-1]
if bytes.Equal(prev.buf[:prev.pre], k.buf[:k.pre]) {
// Do quite a bit of work to make a helpful error message.
a := reflect.New(reflect.TypeOf(idx.tv.Fields[0].Type.zero(nil))).Elem()
b := reflect.New(reflect.TypeOf(idx.tv.Fields[0].Type.zero(nil))).Elem()
a := reflect.New(reflect.TypeOf(idx.tv.Fields[0].Type.zeroKey())).Elem()
b := reflect.New(reflect.TypeOf(idx.tv.Fields[0].Type.zeroKey())).Elem()
parsePK(a, prev.buf[prev.pre:]) // Ignore error, nothing to do.
parsePK(b, k.buf[k.pre:]) // Ignore error, nothing to do.
var dup []any
_, values, _ := idx.parseKey(k.buf, true)
for i := range values {
x := reflect.New(reflect.TypeOf(idx.Fields[i].Type.zero(nil))).Elem()
x := reflect.New(reflect.TypeOf(idx.Fields[i].Type.zeroKey())).Elem()
parsePK(x, values[i]) // Ignore error, nothing to do.
dup = append(dup, x.Interface())
}
@ -502,8 +611,10 @@ func parseSchema(bk, bv []byte) (*typeVersion, error) {
if tv.Version != version {
return nil, fmt.Errorf("%w: version in schema %d does not match key %d", ErrStore, tv.Version, version)
}
if tv.OndiskVersion != ondiskVersion1 {
return nil, fmt.Errorf("internal error: OndiskVersion %d not supported", tv.OndiskVersion)
switch tv.OndiskVersion {
case ondiskVersion1, ondiskVersion2:
default:
return nil, fmt.Errorf("internal error: OndiskVersion %d not recognized/supported", tv.OndiskVersion)
}
// Fill references, used for comparing/checking schema updates.
@ -514,12 +625,72 @@ func parseSchema(bk, bv []byte) (*typeVersion, error) {
}
}
// Resolve fieldType.structFields, for referencing defined types, used for
// supporting cyclic types. The type itself always implicitly has sequence 1.
seqFields := map[int][]field{1: tv.Fields}
origOndiskVersion := tv.OndiskVersion
for i := range tv.Fields {
if err := tv.resolveStructFields(seqFields, &tv.Fields[i].Type); err != nil {
return nil, fmt.Errorf("%w: resolving struct fields for referencing types: %v", ErrStore, err)
}
}
if tv.OndiskVersion != origOndiskVersion {
return nil, fmt.Errorf("%w: resolving cyclic types changed ondisk version from %d to %d", ErrStore, origOndiskVersion, tv.OndiskVersion)
}
return &tv, nil
}
// Resolve structFields in ft (and recursively), either by setting it to Fields
// (common), or by setting it to the fields of a referenced type in case of a
// cyclic data type.
func (tv *typeVersion) resolveStructFields(seqFields map[int][]field, ft *fieldType) error {
if ft.Kind == kindStruct {
if ft.FieldsTypeSeq < 0 {
var ok bool
ft.structFields, ok = seqFields[-ft.FieldsTypeSeq]
if !ok {
return fmt.Errorf("reference to undefined FieldsTypeSeq %d (n %d)", -ft.FieldsTypeSeq, len(seqFields))
}
if len(ft.DefinitionFields) != 0 {
return fmt.Errorf("reference to FieldsTypeSeq while also defining fields")
}
} else if ft.FieldsTypeSeq > 0 {
if _, ok := seqFields[ft.FieldsTypeSeq]; ok {
return fmt.Errorf("duplicate definition of FieldsTypeSeq %d (n %d)", ft.FieldsTypeSeq, len(seqFields))
}
seqFields[ft.FieldsTypeSeq] = ft.DefinitionFields
ft.structFields = ft.DefinitionFields
}
// note: ondiskVersion1 does not have/use this field, so it defaults to 0.
if ft.FieldsTypeSeq == 0 {
ft.structFields = ft.DefinitionFields
}
for i := range ft.DefinitionFields {
if err := tv.resolveStructFields(seqFields, &ft.DefinitionFields[i].Type); err != nil {
return err
}
}
}
xftl := []*fieldType{ft.MapKey, ft.MapValue, ft.ListElem}
for _, xft := range xftl {
if xft == nil {
continue
}
if err := tv.resolveStructFields(seqFields, xft); err != nil {
return err
}
}
return nil
}
// packSchema returns a key and value to store in the types bucket.
func packSchema(tv *typeVersion) ([]byte, []byte, error) {
if tv.OndiskVersion != ondiskVersion1 {
switch tv.OndiskVersion {
case ondiskVersion1, ondiskVersion2:
default:
return nil, nil, fmt.Errorf("internal error: invalid OndiskVersion %d", tv.OndiskVersion)
}
v, err := json.Marshal(tv)
@ -540,12 +711,17 @@ func gatherTypeVersion(t reflect.Type) (*typeVersion, error) {
}
tv := &typeVersion{
Version: 0, // Set by caller.
OndiskVersion: ondiskVersion1, // Current on-disk format.
OndiskVersion: ondiskVersion2, // When opening a database with ondiskVersion1, we add a new typeVersion.
ReferencedBy: map[string]struct{}{},
name: tname,
fillPercent: 0.5,
}
tv.Fields, tv.embedFields, err = gatherTypeFields(t, true, true, false)
// The type being parsed implicitly has sequence 1. Next struct types will be
// assigned the next value (based on length of typeseqs). FieldTypes referencing
// another type are resolved below, after all fields have been gathered.
typeSeqs := map[reflect.Type]int{t: 1}
tv.Fields, tv.embedFields, err = gatherTypeFields(typeSeqs, t, true, true, false, true)
if err != nil {
return nil, err
}
@ -562,6 +738,15 @@ func gatherTypeVersion(t reflect.Type) (*typeVersion, error) {
}
}
// Resolve structFields for the typeFields that reference an earlier defined type,
// using the same function as used when loading a type from disk.
seqFields := map[int][]field{1: tv.Fields}
for i := range tv.Fields {
if err := tv.resolveStructFields(seqFields, &tv.Fields[i].Type); err != nil {
return nil, fmt.Errorf("%w: resolving struct fields for referencing types: %v", ErrStore, err)
}
}
// Find indices.
tv.Indices = map[string]*index{}
@ -572,6 +757,7 @@ func gatherTypeVersion(t reflect.Type) (*typeVersion, error) {
}
idx = &index{unique, iname, nil, tv}
tv.Indices[iname] = idx
nslice := 0
for _, f := range fields {
// todo: can we have a unique index on bytes? seems like this should be possible to have max 1 []byte in an index key, only to be used for unique get plans.
if f.Type.Ptr {
@ -579,6 +765,14 @@ func gatherTypeVersion(t reflect.Type) (*typeVersion, error) {
}
switch f.Type.Kind {
case kindBool, kindInt8, kindInt16, kindInt32, kindInt64, kindInt, kindUint8, kindUint16, kindUint32, kindUint64, kindUint, kindString, kindTime:
case kindSlice:
nslice++
if nslice > 1 {
return fmt.Errorf("%w: can only have one slice field in index, for field %q", ErrType, f.Name)
}
if unique {
return fmt.Errorf("%w: can only use slice field %v in field %q as index without unique", ErrType, f.Type.Kind, f.Name)
}
default:
return fmt.Errorf("%w: cannot use type %v in field %q as index/unique", ErrType, f.Type.Kind, f.Name)
}
@ -692,7 +886,7 @@ func gatherTypeVersion(t reflect.Type) (*typeVersion, error) {
// field must not be ignored and be a valid primary key field (eg no pointer).
// topLevel must be true only for the top-level struct fields, not for fields of
// deeper levels. Deeper levels cannot have index/unique constraints.
func gatherTypeFields(t reflect.Type, needFirst, topLevel, inMap bool) ([]field, []embed, error) {
func gatherTypeFields(typeSeqs map[reflect.Type]int, t reflect.Type, needFirst, topLevel, inMap, newSeq bool) ([]field, []embed, error) {
var fields []field
var embedFields []embed
@ -744,7 +938,7 @@ func gatherTypeFields(t reflect.Type, needFirst, topLevel, inMap bool) ([]field,
}
names[name] = struct{}{}
ft, err := gatherFieldType(sf.Type, inMap)
ft, err := gatherFieldType(typeSeqs, sf.Type, inMap, newSeq && !sf.Anonymous)
if err != nil {
return nil, nil, fmt.Errorf("field %q: %w", sf.Name, err)
}
@ -817,11 +1011,13 @@ func gatherTypeFields(t reflect.Type, needFirst, topLevel, inMap bool) ([]field,
}
}
if sf.Anonymous {
// We don't store anonymous/embed fields, unless it is a cyclic type, because then
// we wouldn't have included any of its type's fields.
if sf.Anonymous && ft.FieldsTypeSeq == 0 {
e := embed{name, ft, sf}
embedFields = append(embedFields, e)
} else {
f := field{name, ft, nonzero, tags.List("ref"), defstr, def, sf, nil}
f := field{name, ft, nonzero, tags.List("ref"), defstr, def, sf, false, nil}
fields = append(fields, f)
}
}
@ -842,12 +1038,13 @@ func checkKeyType(t reflect.Type) error {
return fmt.Errorf("%w: type %v not valid for primary key", ErrType, t)
}
func gatherFieldType(t reflect.Type, inMap bool) (fieldType, error) {
func gatherFieldType(typeSeqs map[reflect.Type]int, t reflect.Type, inMap, newSeq bool) (fieldType, error) {
ft := fieldType{}
if t.Kind() == reflect.Ptr {
t = t.Elem()
ft.Ptr = true
}
k, err := typeKind(t)
if err != nil {
return fieldType{}, err
@ -855,32 +1052,52 @@ func gatherFieldType(t reflect.Type, inMap bool) (fieldType, error) {
ft.Kind = k
switch ft.Kind {
case kindSlice:
l, err := gatherFieldType(t.Elem(), inMap)
l, err := gatherFieldType(typeSeqs, t.Elem(), inMap, newSeq)
if err != nil {
return ft, fmt.Errorf("list: %w", err)
return ft, fmt.Errorf("slice: %w", err)
}
ft.List = &l
ft.ListElem = &l
case kindArray:
l, err := gatherFieldType(typeSeqs, t.Elem(), inMap, newSeq)
if err != nil {
return ft, fmt.Errorf("array: %w", err)
}
ft.ListElem = &l
ft.ArrayLength = t.Len()
case kindMap:
kft, err := gatherFieldType(t.Key(), true)
kft, err := gatherFieldType(typeSeqs, t.Key(), true, newSeq)
if err != nil {
return ft, fmt.Errorf("map key: %w", err)
}
if kft.Ptr {
return ft, fmt.Errorf("%w: map key with pointer type not supported", ErrType)
}
vft, err := gatherFieldType(t.Elem(), true)
vft, err := gatherFieldType(typeSeqs, t.Elem(), true, newSeq)
if err != nil {
return ft, fmt.Errorf("map value: %w", err)
}
ft.MapKey = &kft
ft.MapValue = &vft
case kindStruct:
// note: we have no reason to gather embed field beyond top-level
fields, _, err := gatherTypeFields(t, false, false, inMap)
// If this is a known type, track a reference to the earlier defined type. Once the
// type with all Fields is fully parsed, the references will be resolved.
if seq, ok := typeSeqs[t]; ok {
ft.FieldsTypeSeq = -seq
return ft, nil
}
// If we are processing an anonymous (embed) field, we don't assign a new seq,
// because we won't be walking it when resolving again.
seq := len(typeSeqs) + 1
if newSeq {
typeSeqs[t] = seq
ft.FieldsTypeSeq = seq
}
fields, _, err := gatherTypeFields(typeSeqs, t, false, false, inMap, newSeq)
if err != nil {
return fieldType{}, fmt.Errorf("struct: %w", err)
}
ft.Fields = fields
ft.DefinitionFields = fields
}
return ft, nil
}
@ -941,6 +1158,10 @@ tv:
}
func (f *field) prepare(nfields []field, later, mvlater [][]field) {
if f.prepared {
return
}
f.prepared = true
for _, nf := range nfields {
if nf.Name == f.Name {
f.structField = nf.structField
@ -954,26 +1175,26 @@ func (ft fieldType) laterFields() (later, mvlater []field) {
later, _ = ft.MapKey.laterFields()
mvlater, _ = ft.MapValue.laterFields()
return later, mvlater
} else if ft.List != nil {
return ft.List.laterFields()
} else if ft.ListElem != nil {
return ft.ListElem.laterFields()
}
return ft.Fields, nil
return ft.structFields, nil
}
func (ft fieldType) prepare(nft *fieldType, later, mvlater [][]field) {
for i, f := range ft.Fields {
for i, f := range ft.structFields {
nlater, nmvlater, skip := lookupLater(f.Name, later)
if skip {
continue
}
ft.Fields[i].prepare(nft.Fields, nlater, nmvlater)
ft.structFields[i].prepare(nft.structFields, nlater, nmvlater)
}
if ft.MapKey != nil {
ft.MapKey.prepare(nft.MapKey, later, nil)
ft.MapValue.prepare(nft.MapValue, mvlater, nil)
}
if ft.List != nil {
ft.List.prepare(nft.List, later, mvlater)
if ft.ListElem != nil {
ft.ListElem.prepare(nft.ListElem, later, mvlater)
}
}
@ -1032,18 +1253,24 @@ func (ft fieldType) typeEqual(nft fieldType) bool {
if ft.Ptr != nft.Ptr || ft.Kind != nft.Kind {
return false
}
if len(ft.Fields) != len(nft.Fields) {
if ft.FieldsTypeSeq != nft.FieldsTypeSeq {
return false
}
for i, f := range ft.Fields {
if !f.typeEqual(nft.Fields[i]) {
if len(ft.DefinitionFields) != len(nft.DefinitionFields) {
return false
}
for i, f := range ft.DefinitionFields {
if !f.typeEqual(nft.DefinitionFields[i]) {
return false
}
}
if ft.MapKey != nil && (!ft.MapKey.typeEqual(*nft.MapKey) || !ft.MapValue.typeEqual(*nft.MapValue)) {
return false
}
if ft.List != nil && !ft.List.typeEqual(*nft.List) {
if ft.ListElem != nil && !ft.ListElem.typeEqual(*nft.ListElem) {
return false
}
if ft.ArrayLength != nft.ArrayLength {
return false
}
return true
@ -1069,12 +1296,16 @@ func (idx *index) typeEqual(nidx *index) bool {
// into an int32. Indices that need to be recreated (for an int width change) are
// recorded in recreateIndices.
func (tx *Tx) checkTypes(otv, ntv *typeVersion, recreateIndices map[string]struct{}) error {
// Used to track that two nonzero FieldsTypeSeq have been checked, to prevent
// recursing while checking.
checked := map[[2]int]struct{}{}
for _, f := range ntv.Fields {
for _, of := range otv.Fields {
if f.Name != of.Name {
continue
}
increase, err := of.Type.compatible(f.Type)
increase, err := of.Type.compatible(f.Type, checked)
if err != nil {
return fmt.Errorf("%w: field %q: %s", ErrIncompatible, f.Name, err)
}
@ -1099,7 +1330,7 @@ func (tx *Tx) checkTypes(otv, ntv *typeVersion, recreateIndices map[string]struc
// for maps/slices/structs). If not an error is returned. If they are, the first
// return value indicates if this is a field that needs it index recreated
// (currently for ints that are packed with fixed width encoding).
func (ft fieldType) compatible(nft fieldType) (bool, error) {
func (ft fieldType) compatible(nft fieldType, checked map[[2]int]struct{}) (bool, error) {
need := func(incr bool, l ...kind) (bool, error) {
for _, k := range l {
if nft.Kind == k {
@ -1160,10 +1391,10 @@ func (ft fieldType) compatible(nft fieldType) (bool, error) {
if nk != k {
return false, fmt.Errorf("map to %v: %w", nk, ErrIncompatible)
}
if _, err := ft.MapKey.compatible(*nft.MapKey); err != nil {
if _, err := ft.MapKey.compatible(*nft.MapKey, checked); err != nil {
return false, fmt.Errorf("map key: %w", err)
}
if _, err := ft.MapValue.compatible(*nft.MapValue); err != nil {
if _, err := ft.MapValue.compatible(*nft.MapValue, checked); err != nil {
return false, fmt.Errorf("map value: %w", err)
}
return false, nil
@ -1171,18 +1402,41 @@ func (ft fieldType) compatible(nft fieldType) (bool, error) {
if nk != k {
return false, fmt.Errorf("slice to %v: %w", nk, ErrIncompatible)
}
if _, err := ft.List.compatible(*nft.List); err != nil {
return false, fmt.Errorf("list: %w", err)
if _, err := ft.ListElem.compatible(*nft.ListElem, checked); err != nil {
return false, fmt.Errorf("slice: %w", err)
}
return false, nil
case kindArray:
if nk != k {
return false, fmt.Errorf("array to %v: %w", nk, ErrIncompatible)
}
if nft.ArrayLength != ft.ArrayLength {
return false, fmt.Errorf("array size cannot change (from %d to %d)", ft.ArrayLength, nft.ArrayLength)
}
if _, err := ft.ListElem.compatible(*nft.ListElem, checked); err != nil {
return false, fmt.Errorf("array: %w", err)
}
return false, nil
case kindStruct:
if nk != k {
return false, fmt.Errorf("struct to %v: %w", nk, ErrIncompatible)
}
for _, nf := range nft.Fields {
for _, f := range ft.Fields {
// For ondiskVersion2, the seqs are both nonzero, and we must check that we already
// did the check to prevent recursion.
haveSeq := nft.FieldsTypeSeq != 0 || ft.FieldsTypeSeq != 0
if haveSeq {
k := [2]int{nft.FieldsTypeSeq, ft.FieldsTypeSeq}
if _, ok := checked[k]; ok {
return false, nil
}
checked[k] = struct{}{} // Set early to prevent recursion in call below.
}
for _, nf := range nft.structFields {
for _, f := range ft.structFields {
if nf.Name == f.Name {
_, err := f.Type.compatible(nf.Type)
_, err := f.Type.compatible(nf.Type, checked)
if err != nil {
return false, fmt.Errorf("field %q: %w", nf.Name, err)
}
@ -1201,11 +1455,11 @@ func (ft fieldType) hasNonzeroField(stopAtPtr bool) bool {
}
switch ft.Kind {
case kindMap:
return ft.List.hasNonzeroField(true)
case kindSlice:
return ft.MapValue.hasNonzeroField(true)
case kindSlice, kindArray:
return ft.ListElem.hasNonzeroField(true)
case kindStruct:
for _, f := range ft.Fields {
for _, f := range ft.structFields {
if f.Nonzero || f.Type.hasNonzeroField(true) {
return true
}

View File

@ -1,7 +1,9 @@
package bstore
import (
"context"
"encoding"
"encoding/json"
"errors"
"fmt"
"io"
@ -14,6 +16,18 @@ import (
bolt "go.etcd.io/bbolt"
)
/*
- todo: should thoroughly review guarantees, where some of the bstore struct tags are allowed (e.g. top-level fields vs deeper struct fields), check that all features work well when combined (cyclic types, embed structs, default values, nonzero checks, type equality, zero values with fieldmap, skipping values (hidden due to later typeversions) and having different type versions), write more extensive tests.
- todo: write tests for invalid (meta)data inside the boltdb buckets (not for invalid boltdb files). we should detect the error properly, give a reasonable message. we shouldn't panic (nil deref, out of bounds index, consume too much memory). typeVersions, records, indices.
- todo: add benchmarks. is there a standard dataset databases use for benchmarking?
- todo optimize: profile and see if we can optimize for some quick wins.
- todo: should we add a way for ad-hoc data manipulation? e.g. with sql-like queries, e.g. update, delete, insert; and export results of queries to csv.
- todo: should we have a function that returns records in a map? eg Map() that is like List() but maps a key to T (too bad we cannot have a type for the key!).
- todo: better error messages (ordering of description & error; mention typename, fields (path), field types and offending value & type more often)
- todo: should we add types for dates and numerics?
- todo: struct tag for enums? where we check if the values match.
*/
var (
ErrAbsent = errors.New("absent") // If a function can return an ErrAbsent, it can be compared directly, without errors.Is.
ErrZero = errors.New("must be nonzero")
@ -26,6 +40,7 @@ var (
ErrFinished = errors.New("query finished")
ErrStore = errors.New("internal/storage error") // E.g. when buckets disappear, possibly by external users of the underlying BoltDB database.
ErrParam = errors.New("bad parameters")
ErrTxBotched = errors.New("botched transaction") // Set on transactions after failed and aborted write operations.
errTxClosed = errors.New("transaction is closed")
errNestedIndex = errors.New("struct tags index/unique only allowed at top-level structs")
@ -42,7 +57,7 @@ type DB struct {
// needs a wlock.
typesMutex sync.RWMutex
types map[reflect.Type]storeType
typeNames map[string]storeType // Go type name to store type, for checking duplicates.
typeNames map[string]storeType // Type name to store type, for checking duplicates.
statsMutex sync.Mutex
stats Stats
@ -52,7 +67,9 @@ type DB struct {
//
// A Tx is not safe for concurrent use.
type Tx struct {
db *DB // If nil, this transaction is closed.
ctx context.Context // Check before starting operations, query next calls, and during foreach.
err error // If not nil, operations return this error. Set when write operations fail, e.g. insert with constraint violations.
db *DB // If nil, this transaction is closed.
btx *bolt.Tx
bucketCache map[bucketKey]*bolt.Bucket
@ -109,9 +126,9 @@ type typeVersion struct {
type field struct {
Name string
Type fieldType
Nonzero bool
References []string // Referenced fields. Only for the top-level struct fields, not for nested structs.
Default string // As specified in struct tag. Processed version is defaultValue.
Nonzero bool `json:",omitempty"`
References []string `json:",omitempty"` // Referenced fields. Only for the top-level struct fields, not for nested structs.
Default string `json:",omitempty"` // As specified in struct tag. Processed version is defaultValue.
// If not the zero reflect.Value, set this value instead of a zero value on insert.
// This is always a non-pointer value. Only set for the current typeVersion
@ -123,6 +140,9 @@ type field struct {
// if this field is no longer in the type, or if it has been removed and
// added again in later schema versions.
structField reflect.StructField
// Whether this field has been prepared for parsing into, i.e.
// structField set if needed.
prepared bool
indices map[string]*index
}
@ -134,79 +154,135 @@ type embed struct {
structField reflect.StructField
}
type kind int
type kind string
func (k kind) MarshalJSON() ([]byte, error) {
return json.Marshal(string(k))
}
func (k *kind) UnmarshalJSON(buf []byte) error {
if string(buf) == "null" {
return nil
}
if len(buf) > 0 && buf[0] == '"' {
var s string
if err := json.Unmarshal(buf, &s); err != nil {
return fmt.Errorf("parsing fieldType.Kind string value %q: %v", buf, err)
}
nk, ok := kindsMap[s]
if !ok {
return fmt.Errorf("unknown fieldType.Kind value %q", s)
}
*k = nk
return nil
}
// In ondiskVersion1, the kinds were integers, starting at 1.
var i int
if err := json.Unmarshal(buf, &i); err != nil {
return fmt.Errorf("parsing fieldType.Kind int value %q: %v", buf, err)
}
if i <= 0 || i-1 >= len(kinds) {
return fmt.Errorf("unknown fieldType.Kind value %d", i)
}
*k = kinds[i-1]
return nil
}
const (
kindInvalid kind = iota
kindBytes
kindBool
kindInt
kindInt8
kindInt16
kindInt32
kindInt64
kindUint
kindUint8
kindUint16
kindUint32
kindUint64
kindFloat32
kindFloat64
kindMap
kindSlice
kindString
kindTime
kindBinaryMarshal
kindStruct
kindBytes kind = "bytes" // 1, etc
kindBool kind = "bool"
kindInt kind = "int"
kindInt8 kind = "int8"
kindInt16 kind = "int16"
kindInt32 kind = "int32"
kindInt64 kind = "int64"
kindUint kind = "uint"
kindUint8 kind = "uint8"
kindUint16 kind = "uint16"
kindUint32 kind = "uint32"
kindUint64 kind = "uint64"
kindFloat32 kind = "float32"
kindFloat64 kind = "float64"
kindMap kind = "map"
kindSlice kind = "slice"
kindString kind = "string"
kindTime kind = "time"
kindBinaryMarshal kind = "binarymarshal"
kindStruct kind = "struct"
kindArray kind = "array"
)
var kindStrings = []string{
"(invalid)",
"bytes",
"bool",
"int",
"int8",
"int16",
"int32",
"int64",
"uint",
"uint8",
"uint16",
"uint32",
"uint64",
"float32",
"float64",
"map",
"slice",
"string",
"time",
"binarymarshal",
"struct",
// In ondiskVersion1, the kinds were integers, starting at 1.
// Do not change the order. Add new values at the end.
var kinds = []kind{
kindBytes,
kindBool,
kindInt,
kindInt8,
kindInt16,
kindInt32,
kindInt64,
kindUint,
kindUint8,
kindUint16,
kindUint32,
kindUint64,
kindFloat32,
kindFloat64,
kindMap,
kindSlice,
kindString,
kindTime,
kindBinaryMarshal,
kindStruct,
kindArray,
}
func (k kind) String() string {
return kindStrings[k]
func makeKindsMap() map[string]kind {
m := map[string]kind{}
for _, k := range kinds {
m[string(k)] = k
}
return m
}
var kindsMap = makeKindsMap()
type fieldType struct {
Ptr bool // If type is a pointer.
Kind kind // Type with possible Ptr deferenced.
Fields []field // For kindStruct.
MapKey, MapValue *fieldType // For kindMap.
List *fieldType // For kindSlice.
}
Ptr bool `json:",omitempty"` // If type is a pointer.
Kind kind // Type with possible Ptr deferenced.
func (ft fieldType) String() string {
s := ft.Kind.String()
if ft.Ptr {
return s + "ptr"
}
return s
MapKey *fieldType `json:",omitempty"`
MapValue *fieldType `json:",omitempty"` // For kindMap.
ListElem *fieldType `json:"List,omitempty"` // For kindSlice and kindArray. Named List in JSON for compatibility.
ArrayLength int `json:",omitempty"` // For kindArray.
// For kindStruct, the fields of the struct. Only set for the first use of the type
// within a registered type. Code dealing with fields should use structFields
// (below) most of the time instead, it has the effective fields after resolving
// the type reference.
// Named "Fields" in JSON to stay compatible with ondiskVersion1, named
// DefinitionFields in Go for clarity.
DefinitionFields []field `json:"Fields,omitempty"`
// For struct types, the sequence number of this type (within the registered type).
// Needed for supporting cyclic types. Each struct type is assigned the next
// sequence number. The registered type implicitly has sequence 1. If positive,
// this defines a type (i.e. when it is first encountered analyzing fields
// depth-first). If negative, it references the type with positive seq (when a
// field is encountered of a type that was seen before). New since ondiskVersion2,
// structs in ondiskVersion1 will have zero value 0.
FieldsTypeSeq int `json:",omitempty"`
// Fields after taking cyclic types into account. Set when registering/loading a
// type. Not stored on disk because of potential cyclic data.
structFields []field
}
// Options configure how a database should be opened or initialized.
type Options struct {
Timeout time.Duration // Abort if opening DB takes longer than Timeout.
Timeout time.Duration // Abort if opening DB takes longer than Timeout. If not set, the deadline from the context is used.
Perm fs.FileMode // Permissions for new file if created. If zero, 0600 is used.
MustExist bool // Before opening, check that file exists. If not, io/fs.ErrNotExist is returned.
}
@ -219,10 +295,19 @@ type Options struct {
//
// Only one DB instance can be open for a file at a time. Use opts.Timeout to
// specify a timeout during open to prevent indefinite blocking.
func Open(path string, opts *Options, typeValues ...any) (*DB, error) {
//
// The context is used for opening and initializing the database, not for further
// operations. If the context is canceled while waiting on the database file lock,
// the operation is not aborted other than when the deadline/timeout is reached.
//
// See function Register for checks for changed/unchanged schema during open
// based on environment variable "bstore_schema_check".
func Open(ctx context.Context, path string, opts *Options, typeValues ...any) (*DB, error) {
var bopts *bolt.Options
if opts != nil && opts.Timeout > 0 {
bopts = &bolt.Options{Timeout: opts.Timeout}
} else if end, ok := ctx.Deadline(); ok {
bopts = &bolt.Options{Timeout: time.Until(end)}
}
var mode fs.FileMode = 0600
if opts != nil && opts.Perm != 0 {
@ -241,7 +326,7 @@ func Open(path string, opts *Options, typeValues ...any) (*DB, error) {
typeNames := map[string]storeType{}
types := map[reflect.Type]storeType{}
db := &DB{bdb: bdb, typeNames: typeNames, types: types}
if err := db.Register(typeValues...); err != nil {
if err := db.Register(ctx, typeValues...); err != nil {
bdb.Close()
return nil, err
}
@ -272,6 +357,9 @@ func (tx *Tx) Stats() Stats {
// WriteTo writes the entire database to w, not including changes made during this transaction.
func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
if err := tx.error(); err != nil {
return 0, err
}
return tx.btx.WriteTo(w)
}
@ -328,58 +416,67 @@ func (tx *Tx) indexBucket(idx *index) (*bolt.Bucket, error) {
// If a type is still referenced by another type, eg through a "ref" struct tag,
// ErrReference is returned.
// If the type does not exist, ErrAbsent is returned.
func (db *DB) Drop(name string) error {
return db.Write(func(tx *Tx) error {
func (db *DB) Drop(ctx context.Context, name string) error {
var st storeType
var ok bool
err := db.Write(ctx, func(tx *Tx) error {
tx.stats.Bucket.Get++
if tx.btx.Bucket([]byte(name)) == nil {
return ErrAbsent
}
if st, ok := db.typeNames[name]; ok && len(st.Current.referencedBy) > 0 {
st, ok = db.typeNames[name]
if ok && len(st.Current.referencedBy) > 0 {
return fmt.Errorf("%w: type is still referenced", ErrReference)
} else if ok {
for ref := range st.Current.references {
var n []*index
for _, idx := range db.typeNames[ref].Current.referencedBy {
if idx.tv != st.Current {
n = append(n, idx)
}
}
db.typeNames[ref].Current.referencedBy = n
}
delete(db.typeNames, name)
delete(db.types, st.Type)
}
tx.stats.Bucket.Delete++
return tx.btx.DeleteBucket([]byte(name))
})
if err != nil {
return err
}
if ok {
for ref := range st.Current.references {
var n []*index
for _, idx := range db.typeNames[ref].Current.referencedBy {
if idx.tv != st.Current {
n = append(n, idx)
}
}
db.typeNames[ref].Current.referencedBy = n
}
delete(db.typeNames, name)
delete(db.types, st.Type)
}
return nil
}
// Delete calls Delete on a new writable Tx.
func (db *DB) Delete(values ...any) error {
return db.Write(func(tx *Tx) error {
func (db *DB) Delete(ctx context.Context, values ...any) error {
return db.Write(ctx, func(tx *Tx) error {
return tx.Delete(values...)
})
}
// Get calls Get on a new read-only Tx.
func (db *DB) Get(values ...any) error {
return db.Read(func(tx *Tx) error {
func (db *DB) Get(ctx context.Context, values ...any) error {
return db.Read(ctx, func(tx *Tx) error {
return tx.Get(values...)
})
}
// Insert calls Insert on a new writable Tx.
func (db *DB) Insert(values ...any) error {
return db.Write(func(tx *Tx) error {
func (db *DB) Insert(ctx context.Context, values ...any) error {
return db.Write(ctx, func(tx *Tx) error {
return tx.Insert(values...)
})
}
// Update calls Update on a new writable Tx.
func (db *DB) Update(values ...any) error {
return db.Write(func(tx *Tx) error {
func (db *DB) Update(ctx context.Context, values ...any) error {
return db.Write(ctx, func(tx *Tx) error {
return tx.Update(values...)
})
}
@ -401,6 +498,7 @@ var typeKinds = map[reflect.Kind]kind{
reflect.Map: kindMap,
reflect.Slice: kindSlice,
reflect.String: kindString,
reflect.Array: kindArray,
}
func typeKind(t reflect.Type) (kind, error) {
@ -424,7 +522,10 @@ func typeKind(t reflect.Type) (kind, error) {
if t.Kind() == reflect.Struct {
return kindStruct, nil
}
return kind(0), fmt.Errorf("%w: unsupported type %v", ErrType, t)
if t.Kind() == reflect.Ptr {
return "", fmt.Errorf("%w: pointer to pointers not supported: %v", ErrType, t.Elem())
}
return "", fmt.Errorf("%w: unsupported type %v", ErrType, t)
}
func typeName(t reflect.Type) (string, error) {
@ -509,27 +610,39 @@ func (tv typeVersion) keyValue(tx *Tx, rv reflect.Value, insert bool, rb *bolt.B
}
// Read calls function fn with a new read-only transaction, ensuring transaction rollback.
func (db *DB) Read(fn func(*Tx) error) error {
func (db *DB) Read(ctx context.Context, fn func(*Tx) error) error {
if err := ctx.Err(); err != nil {
return err
}
db.typesMutex.RLock()
defer db.typesMutex.RUnlock()
return db.bdb.View(func(btx *bolt.Tx) error {
tx := &Tx{db: db, btx: btx}
tx := &Tx{ctx: ctx, db: db, btx: btx}
tx.stats.Reads++
defer tx.addStats()
return fn(tx)
if err := fn(tx); err != nil {
return err
}
return tx.err
})
}
// Write calls function fn with a new read-write transaction. If fn returns
// nil, the transaction is committed. Otherwise the transaction is rolled back.
func (db *DB) Write(fn func(*Tx) error) error {
func (db *DB) Write(ctx context.Context, fn func(*Tx) error) error {
if err := ctx.Err(); err != nil {
return err
}
db.typesMutex.RLock()
defer db.typesMutex.RUnlock()
return db.bdb.Update(func(btx *bolt.Tx) error {
tx := &Tx{db: db, btx: btx}
tx := &Tx{ctx: ctx, db: db, btx: btx}
tx.stats.Writes++
defer tx.addStats()
return fn(tx)
if err := fn(tx); err != nil {
return err
}
return tx.err
})
}

128
vendor/github.com/mjl-/bstore/tx.go generated vendored
View File

@ -2,12 +2,39 @@ package bstore
import (
"bytes"
"context"
"fmt"
"reflect"
bolt "go.etcd.io/bbolt"
)
// Mark a tx as botched, mentioning last actual error.
// Used when write operations fail. The transaction can be in inconsistent
// state, e.g. only some of a type's indicies may have been updated. We never
// want to commit such transactions.
func (tx *Tx) markError(err *error) {
if *err != nil && tx.err == nil {
tx.err = fmt.Errorf("%w (after %v)", ErrTxBotched, *err)
}
}
// Return if an error condition is set on on the transaction. To be called before
// starting an operation.
func (tx *Tx) error() error {
if tx.err != nil {
return tx.err
}
if tx.db == nil {
return errTxClosed
}
if err := tx.ctx.Err(); err != nil {
tx.err = err
return err
}
return nil
}
func (tx *Tx) structptr(value any) (reflect.Value, error) {
rv := reflect.ValueOf(value)
if !rv.IsValid() || rv.Kind() != reflect.Ptr || !rv.Elem().IsValid() || rv.Type().Elem().Kind() != reflect.Struct {
@ -42,10 +69,23 @@ func (tx *Tx) updateIndices(tv *typeVersion, pk []byte, ov, v reflect.Value) err
changed := func(idx *index) bool {
for _, f := range idx.Fields {
rofv := ov.FieldByIndex(f.structField.Index)
nofv := v.FieldByIndex(f.structField.Index)
// note: checking the interface values is enough, we only allow comparable types as index fields.
if rofv.Interface() != nofv.Interface() {
ofv := ov.FieldByIndex(f.structField.Index)
nfv := v.FieldByIndex(f.structField.Index)
if f.Type.Kind == kindSlice {
// Index field is a slice type, cannot use direct interface comparison.
on := ofv.Len()
nn := nfv.Len()
if on != nn {
return true
}
for i := 0; i < nn; i++ {
// Slice elements are comparable.
if ofv.Index(i) != nfv.Index(i) {
return true
}
}
} else if ofv.Interface() != nfv.Interface() {
// note: checking the interface values is enough.
return true
}
}
@ -69,36 +109,40 @@ func (tx *Tx) updateIndices(tv *typeVersion, pk []byte, ov, v reflect.Value) err
return err
}
if remove {
_, ik, err := idx.packKey(ov, pk)
ikl, err := idx.packKey(ov, pk)
if err != nil {
return err
}
tx.stats.Index.Delete++
if sanityChecks {
tx.stats.Index.Get++
if ib.Get(ik) == nil {
return fmt.Errorf("internal error: key missing from index")
for _, ik := range ikl {
tx.stats.Index.Delete++
if sanityChecks {
tx.stats.Index.Get++
if ib.Get(ik.full) == nil {
return fmt.Errorf("%w: key missing from index", ErrStore)
}
}
if err := ib.Delete(ik.full); err != nil {
return fmt.Errorf("%w: removing from index: %s", ErrStore, err)
}
}
if err := ib.Delete(ik); err != nil {
return fmt.Errorf("%w: removing from index: %s", ErrStore, err)
}
}
if add {
prek, ik, err := idx.packKey(v, pk)
ikl, err := idx.packKey(v, pk)
if err != nil {
return err
}
if idx.Unique {
tx.stats.Index.Cursor++
if xk, _ := ib.Cursor().Seek(prek); xk != nil && bytes.HasPrefix(xk, prek) {
return fmt.Errorf("%w: %q", ErrUnique, idx.Name)
for _, ik := range ikl {
if idx.Unique {
tx.stats.Index.Cursor++
if xk, _ := ib.Cursor().Seek(ik.pre); xk != nil && bytes.HasPrefix(xk, ik.pre) {
return fmt.Errorf("%w: %q", ErrUnique, idx.Name)
}
}
}
tx.stats.Index.Put++
if err := ib.Put(ik, []byte{}); err != nil {
return fmt.Errorf("inserting into index: %w", err)
tx.stats.Index.Put++
if err := ib.Put(ik.full, []byte{}); err != nil {
return fmt.Errorf("inserting into index: %w", err)
}
}
}
}
@ -124,7 +168,7 @@ func (tx *Tx) checkReferences(tv *typeVersion, pk []byte, ov, rv reflect.Value)
return err
}
if rb.Get(k) == nil {
return fmt.Errorf("%w: value %v from field %q to %q", ErrReference, frv.Interface(), f.Name, name)
return fmt.Errorf("%w: value %v from %q to %q", ErrReference, frv.Interface(), tv.name+"."+f.Name, name)
}
}
}
@ -143,8 +187,8 @@ func (tx *Tx) addStats() {
//
// ErrAbsent is returned if the record does not exist.
func (tx *Tx) Get(values ...any) error {
if tx.db == nil {
return errTxClosed
if err := tx.error(); err != nil {
return err
}
for _, value := range values {
@ -184,8 +228,8 @@ func (tx *Tx) Get(values ...any) error {
// ErrAbsent is returned if the record does not exist.
// ErrReference is returned if another record still references this record.
func (tx *Tx) Delete(values ...any) error {
if tx.db == nil {
return errTxClosed
if err := tx.error(); err != nil {
return err
}
for _, value := range values {
@ -222,7 +266,7 @@ func (tx *Tx) Delete(values ...any) error {
return nil
}
func (tx *Tx) delete(rb *bolt.Bucket, st storeType, k []byte, rov reflect.Value) error {
func (tx *Tx) delete(rb *bolt.Bucket, st storeType, k []byte, rov reflect.Value) (rerr error) {
// Check that anyone referencing this type does not reference this record.
for _, refBy := range st.Current.referencedBy {
if ib, err := tx.indexBucket(refBy); err != nil {
@ -236,6 +280,7 @@ func (tx *Tx) delete(rb *bolt.Bucket, st storeType, k []byte, rov reflect.Value)
}
// Delete value from indices.
defer tx.markError(&rerr)
if err := tx.updateIndices(st.Current, k, rov, reflect.Value{}); err != nil {
return fmt.Errorf("removing from indices: %w", err)
}
@ -250,8 +295,8 @@ func (tx *Tx) delete(rb *bolt.Bucket, st storeType, k []byte, rov reflect.Value)
//
// ErrAbsent is returned if the record does not exist.
func (tx *Tx) Update(values ...any) error {
if tx.db == nil {
return errTxClosed
if err := tx.error(); err != nil {
return err
}
for _, value := range values {
@ -282,8 +327,8 @@ func (tx *Tx) Update(values ...any) error {
// ErrZero is returned if a nonzero constraint would be violated.
// ErrReference is returned if another record is referenced that does not exist.
func (tx *Tx) Insert(values ...any) error {
if tx.db == nil {
return errTxClosed
if err := tx.error(); err != nil {
return err
}
for _, value := range values {
@ -298,6 +343,7 @@ func (tx *Tx) Insert(values ...any) error {
return err
}
// todo optimize: should track per field whether it (or a child) has a default value, and only applyDefault if so.
if err := st.Current.applyDefault(rv); err != nil {
return err
}
@ -344,7 +390,7 @@ func (tx *Tx) put(st storeType, rv reflect.Value, insert bool) error {
}
}
func (tx *Tx) insert(rb *bolt.Bucket, st storeType, rv, krv reflect.Value, k []byte) error {
func (tx *Tx) insert(rb *bolt.Bucket, st storeType, rv, krv reflect.Value, k []byte) (rerr error) {
v, err := st.pack(rv)
if err != nil {
return err
@ -352,6 +398,7 @@ func (tx *Tx) insert(rb *bolt.Bucket, st storeType, rv, krv reflect.Value, k []b
if err := tx.checkReferences(st.Current, k, reflect.Value{}, rv); err != nil {
return err
}
defer tx.markError(&rerr)
if err := tx.updateIndices(st.Current, k, reflect.Value{}, rv); err != nil {
return fmt.Errorf("updating indices for inserted value: %w", err)
}
@ -363,7 +410,7 @@ func (tx *Tx) insert(rb *bolt.Bucket, st storeType, rv, krv reflect.Value, k []b
return nil
}
func (tx *Tx) update(rb *bolt.Bucket, st storeType, rv, rov reflect.Value, k []byte) error {
func (tx *Tx) update(rb *bolt.Bucket, st storeType, rv, rov reflect.Value, k []byte) (rerr error) {
if st.Current.equal(rov, rv) {
return nil
}
@ -375,6 +422,7 @@ func (tx *Tx) update(rb *bolt.Bucket, st storeType, rv, rov reflect.Value, k []b
if err := tx.checkReferences(st.Current, k, rov, rv); err != nil {
return err
}
defer tx.markError(&rerr)
if err := tx.updateIndices(st.Current, k, rov, rv); err != nil {
return fmt.Errorf("updating indices for updated record: %w", err)
}
@ -391,13 +439,16 @@ func (tx *Tx) update(rb *bolt.Bucket, st storeType, rv, rov reflect.Value, k []b
//
// A writable Tx can be committed or rolled back. A read-only transaction must
// always be rolled back.
func (db *DB) Begin(writable bool) (*Tx, error) {
func (db *DB) Begin(ctx context.Context, writable bool) (*Tx, error) {
if err := ctx.Err(); err != nil {
return nil, err
}
btx, err := db.bdb.Begin(writable)
if err != nil {
return nil, err
}
db.typesMutex.RLock()
tx := &Tx{db: db, btx: btx}
tx := &Tx{ctx: ctx, db: db, btx: btx}
if writable {
tx.stats.Writes++
} else {
@ -422,9 +473,14 @@ func (tx *Tx) Rollback() error {
// Commit commits changes made in the transaction to the database.
// Statistics are added to its DB.
// If the commit fails, or the transaction was botched, the transaction is
// rolled back.
func (tx *Tx) Commit() error {
if tx.db == nil {
return errTxClosed
} else if tx.err != nil {
tx.Rollback()
return tx.err
}
tx.addStats()