diff --git a/.gitignore b/.gitignore index 563568ebb..a4393938b 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,5 @@ cmd/genji/genji # VS Code config .vscode/ + +gopls.log diff --git a/cmd/genji/dbutil/dump.go b/cmd/genji/dbutil/dump.go index dea2ecea1..f69cf5fe3 100644 --- a/cmd/genji/dbutil/dump.go +++ b/cmd/genji/dbutil/dump.go @@ -210,8 +210,12 @@ func dumpSchema(tx *genji.Tx, w io.Writer, tableName string) error { u = " UNIQUE" } - _, err = fmt.Fprintf(w, "CREATE%s INDEX %s ON %s (%s);\n", u, index.Info.IndexName, index.Info.TableName, - index.Info.Path) + var paths []string + for _, path := range index.Info.Paths { + paths = append(paths, path.String()) + } + + _, err = fmt.Fprintf(w, "CREATE%s INDEX %s ON %s (%s);\n", u, index.Info.IndexName, index.Info.TableName, strings.Join(paths, ", ")) if err != nil { return err } diff --git a/cmd/genji/dbutil/dump_test.go b/cmd/genji/dbutil/dump_test.go index cb306dd97..a9e0184f9 100644 --- a/cmd/genji/dbutil/dump_test.go +++ b/cmd/genji/dbutil/dump_test.go @@ -58,22 +58,27 @@ func TestDump(t *testing.T) { require.NoError(t, err) writeToBuf(q + "\n") - q = fmt.Sprintf(`CREATE INDEX idx_a_%s ON %s (a);`, table, table) + q = fmt.Sprintf(`CREATE INDEX idx_%s_a ON %s (a);`, table, table) + err = db.Exec(q) + require.NoError(t, err) + writeToBuf(q + "\n") + + q = fmt.Sprintf(`CREATE INDEX idx_%s_b_c ON %s (b, c);`, table, table) err = db.Exec(q) require.NoError(t, err) writeToBuf(q + "\n") - q = fmt.Sprintf(`INSERT INTO %s VALUES {"a": %d, "b": %d};`, table, 1, 2) + q = fmt.Sprintf(`INSERT INTO %s VALUES {"a": %d, "b": %d, "c": %d};`, table, 1, 2, 3) err = db.Exec(q) require.NoError(t, err) writeToBuf(q + "\n") - q = fmt.Sprintf(`INSERT INTO %s VALUES {"a": %d, "b": %d};`, table, 2, 2) + q = fmt.Sprintf(`INSERT INTO %s VALUES {"a": %d, "b": %d, "c": %d};`, table, 2, 2, 2) err = db.Exec(q) require.NoError(t, err) writeToBuf(q + "\n") - q = fmt.Sprintf(`INSERT INTO %s VALUES {"a": %d, "b": %d};`, table, 3, 2) + q = fmt.Sprintf(`INSERT INTO %s VALUES {"a": %d, "b": %d, "c": %d};`, table, 3, 2, 1) err = db.Exec(q) require.NoError(t, err) writeToBuf(q + "\n") diff --git a/cmd/genji/shell/command.go b/cmd/genji/shell/command.go index f20735806..b4f604d25 100644 --- a/cmd/genji/shell/command.go +++ b/cmd/genji/shell/command.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io" + "strings" "github.com/genjidb/genji" "github.com/genjidb/genji/cmd/genji/dbutil" @@ -126,7 +127,12 @@ func runIndexesCmd(db *genji.DB, tableName string, w io.Writer) error { return err } - fmt.Fprintf(w, "%s ON %s (%s)\n", index.IndexName, index.TableName, index.Path) + var paths []string + for _, path := range index.Paths { + paths = append(paths, path.String()) + } + + fmt.Fprintf(w, "%s ON %s (%s)\n", index.IndexName, index.TableName, strings.Join(paths, ", ")) return nil }) diff --git a/cmd/genji/shell/command_test.go b/cmd/genji/shell/command_test.go index 7689f5e6d..4f8880eb1 100644 --- a/cmd/genji/shell/command_test.go +++ b/cmd/genji/shell/command_test.go @@ -60,7 +60,7 @@ func TestIndexesCmd(t *testing.T) { want string fails bool }{ - {"All", "", "idx_bar_a ON bar (a)\nidx_foo_a ON foo (a)\nidx_foo_b ON foo (b)\n", false}, + {"All", "", "idx_bar_a_b ON bar (a, b)\nidx_foo_a ON foo (a)\nidx_foo_b ON foo (b)\n", false}, {"With table name", "foo", "idx_foo_a ON foo (a)\nidx_foo_b ON foo (b)\n", false}, {"With nonexistent table name", "baz", "", true}, } @@ -76,7 +76,7 @@ func TestIndexesCmd(t *testing.T) { CREATE INDEX idx_foo_a ON foo (a); CREATE INDEX idx_foo_b ON foo (b); CREATE TABLE bar; - CREATE INDEX idx_bar_a ON bar (a); + CREATE INDEX idx_bar_a_b ON bar (a, b); `) require.NoError(t, err) @@ -117,7 +117,7 @@ func TestSaveCommand(t *testing.T) { err = db.Exec(` CREATE TABLE test (a DOUBLE); - CREATE INDEX idx_a ON test (a); + CREATE INDEX idx_a_b ON test (a, b); `) require.NoError(t, err) err = db.Exec("INSERT INTO test (a, b) VALUES (?, ?)", 1, 2) @@ -160,7 +160,11 @@ func TestSaveCommand(t *testing.T) { err = db.View(func(tx *genji.Tx) error { indexes := tx.ListIndexes() require.Len(t, indexes, 1) - require.Equal(t, "idx_a", indexes[0]) + require.Equal(t, "idx_a_b", indexes[0]) + + index, err := tx.GetIndex("idx_a_b") + require.NoError(t, err) + require.Equal(t, []document.ValueType{document.DoubleValue, 0}, index.Info.Types) return nil }) @@ -172,12 +176,12 @@ func TestSaveCommand(t *testing.T) { defer tx.Rollback() - idx, err := tx.GetIndex("idx_a") + idx, err := tx.GetIndex("idx_a_b") require.NoError(t, err) // check that by iterating through the index and finding the previously inserted values var i int - err = idx.AscendGreaterOrEqual(document.Value{Type: document.DoubleValue}, func(v, k []byte) error { + err = idx.AscendGreaterOrEqual([]document.Value{document.NewDoubleValue(0)}, func(v, k []byte) error { i++ return nil }) diff --git a/database/catalog.go b/database/catalog.go index 1b41c14ff..a21299534 100644 --- a/database/catalog.go +++ b/database/catalog.go @@ -325,15 +325,19 @@ func (c *Catalog) ReIndex(tx *Transaction, indexName string) error { func (c *Catalog) buildIndex(tx *Transaction, idx *Index, table *Table) error { return table.Iterate(func(d document.Document) error { - v, err := idx.Info.Path.GetValueFromDocument(d) - if err == document.ErrFieldNotFound { - return nil - } - if err != nil { - return err + var err error + values := make([]document.Value, len(idx.Info.Paths)) + for i, path := range idx.Info.Paths { + values[i], err = path.GetValueFromDocument(d) + if err == document.ErrFieldNotFound { + return nil + } + if err != nil { + return err + } } - err = idx.Set(v, d.(document.Keyer).RawKey()) + err = idx.Set(values, d.(document.Keyer).RawKey()) if err != nil { return stringutil.Errorf("error while building the index: %w", err) } @@ -490,16 +494,25 @@ func (c *catalogCache) AddIndex(tx *Transaction, info *IndexInfo) error { return ErrTableNotFound } - // if the index is created on a field on which we know the type, - // create a typed index. - for _, fc := range ti.FieldConstraints { - if fc.Path.IsEqual(info.Path) { - if fc.Type != 0 { - info.Type = fc.Type - } + // if the index is created on a field on which we know the type then create a typed index. + // if the given info contained existing types, they are overriden. + info.Types = nil + +OUTER: + for _, path := range info.Paths { + for _, fc := range ti.FieldConstraints { + if fc.Path.IsEqual(path) { + // a constraint may or may not enforce a type + if fc.Type != 0 { + info.Types = append(info.Types, document.ValueType(fc.Type)) + } - break + continue OUTER + } } + + // no type was inferred for that path, add it to the index as untyped + info.Types = append(info.Types, document.ValueType(0)) } c.indexes[info.IndexName] = info diff --git a/database/catalog_test.go b/database/catalog_test.go index cd1a02c02..3b870032a 100644 --- a/database/catalog_test.go +++ b/database/catalog_test.go @@ -115,9 +115,9 @@ func TestCatalogTable(t *testing.T) { err := catalog.CreateTable(tx, "foo", ti) require.NoError(t, err) - err = catalog.CreateIndex(tx, &database.IndexInfo{Path: parsePath(t, "gender"), IndexName: "idx_gender", TableName: "foo"}) + err = catalog.CreateIndex(tx, &database.IndexInfo{Paths: []document.Path{parsePath(t, "gender")}, IndexName: "idx_gender", TableName: "foo"}) require.NoError(t, err) - err = catalog.CreateIndex(tx, &database.IndexInfo{Path: parsePath(t, "city"), IndexName: "idx_city", TableName: "foo", Unique: true}) + err = catalog.CreateIndex(tx, &database.IndexInfo{Paths: []document.Path{parsePath(t, "city")}, IndexName: "idx_city", TableName: "foo", Unique: true}) require.NoError(t, err) return nil @@ -318,7 +318,7 @@ func TestTxCreateIndex(t *testing.T) { update(t, db, func(tx *database.Transaction) error { err := catalog.CreateIndex(tx, &database.IndexInfo{ - IndexName: "idx_a", TableName: "test", Path: parsePath(t, "a"), + IndexName: "idx_a", TableName: "test", Paths: []document.Path{parsePath(t, "a")}, }) require.NoError(t, err) idx, err := tx.GetIndex("idx_a") @@ -326,7 +326,7 @@ func TestTxCreateIndex(t *testing.T) { require.NotNil(t, idx) var i int - err = idx.AscendGreaterOrEqual(document.Value{Type: document.DoubleValue}, func(v, k []byte) error { + err = idx.AscendGreaterOrEqual(values(document.Value{Type: document.DoubleValue}), func(v, k []byte) error { var buf bytes.Buffer err = document.NewValueEncoder(&buf).Encode(document.NewDoubleValue(float64(i))) require.NoError(t, err) @@ -355,12 +355,12 @@ func TestTxCreateIndex(t *testing.T) { update(t, db, func(tx *database.Transaction) error { err := catalog.CreateIndex(tx, &database.IndexInfo{ - IndexName: "idxFoo", TableName: "test", Path: parsePath(t, "foo"), + IndexName: "idxFoo", TableName: "test", Paths: []document.Path{parsePath(t, "foo")}, }) require.NoError(t, err) err = catalog.CreateIndex(tx, &database.IndexInfo{ - IndexName: "idxFoo", TableName: "test", Path: parsePath(t, "foo"), + IndexName: "idxFoo", TableName: "test", Paths: []document.Path{parsePath(t, "foo")}, }) require.Equal(t, database.ErrIndexAlreadyExists, err) return nil @@ -373,7 +373,7 @@ func TestTxCreateIndex(t *testing.T) { catalog := db.Catalog() update(t, db, func(tx *database.Transaction) error { err := catalog.CreateIndex(tx, &database.IndexInfo{ - IndexName: "idxFoo", TableName: "test", Path: parsePath(t, "foo"), + IndexName: "idxFoo", TableName: "test", Paths: []document.Path{parsePath(t, "foo")}, }) if !errors.Is(err, database.ErrTableNotFound) { require.Equal(t, err, database.ErrTableNotFound) @@ -394,7 +394,7 @@ func TestTxCreateIndex(t *testing.T) { update(t, db, func(tx *database.Transaction) error { err := catalog.CreateIndex(tx, &database.IndexInfo{ - TableName: "test", Path: parsePath(t, "foo"), + TableName: "test", Paths: []document.Path{parsePath(t, "foo")}, }) require.NoError(t, err) @@ -403,7 +403,7 @@ func TestTxCreateIndex(t *testing.T) { // create another one err = catalog.CreateIndex(tx, &database.IndexInfo{ - TableName: "test", Path: parsePath(t, "foo"), + TableName: "test", Paths: []document.Path{parsePath(t, "foo")}, }) require.NoError(t, err) @@ -424,11 +424,11 @@ func TestTxDropIndex(t *testing.T) { err := catalog.CreateTable(tx, "test", nil) require.NoError(t, err) err = catalog.CreateIndex(tx, &database.IndexInfo{ - IndexName: "idxFoo", TableName: "test", Path: parsePath(t, "foo"), + IndexName: "idxFoo", TableName: "test", Paths: []document.Path{parsePath(t, "foo")}, }) require.NoError(t, err) err = catalog.CreateIndex(tx, &database.IndexInfo{ - IndexName: "idxBar", TableName: "test", Path: parsePath(t, "bar"), + IndexName: "idxBar", TableName: "test", Paths: []document.Path{parsePath(t, "bar")}, }) require.NoError(t, err) return nil @@ -489,13 +489,13 @@ func TestCatalogReIndex(t *testing.T) { err = catalog.CreateIndex(tx, &database.IndexInfo{ IndexName: "a", TableName: "test", - Path: parsePath(t, "a"), + Paths: []document.Path{parsePath(t, "a")}, }) require.NoError(t, err) err = catalog.CreateIndex(tx, &database.IndexInfo{ IndexName: "b", TableName: "test", - Path: parsePath(t, "b"), + Paths: []document.Path{parsePath(t, "b")}, }) require.NoError(t, err) @@ -537,7 +537,7 @@ func TestCatalogReIndex(t *testing.T) { return catalog.CreateIndex(tx, &database.IndexInfo{ IndexName: "b", TableName: "test", - Path: parsePath(t, "b"), + Paths: []document.Path{parsePath(t, "b")}, }) }) @@ -570,7 +570,7 @@ func TestCatalogReIndex(t *testing.T) { require.NoError(t, err) var i int - err = idx.AscendGreaterOrEqual(document.Value{Type: document.DoubleValue}, func(v, k []byte) error { + err = idx.AscendGreaterOrEqual([]document.Value{document.Value{Type: document.DoubleValue}}, func(v, k []byte) error { var buf bytes.Buffer err = document.NewValueEncoder(&buf).Encode(document.NewDoubleValue(float64(i))) require.NoError(t, err) @@ -638,13 +638,13 @@ func TestReIndexAll(t *testing.T) { err = catalog.CreateIndex(tx, &database.IndexInfo{ IndexName: "t1a", TableName: "test1", - Path: parsePath(t, "a"), + Paths: []document.Path{parsePath(t, "a")}, }) require.NoError(t, err) err = catalog.CreateIndex(tx, &database.IndexInfo{ IndexName: "t2a", TableName: "test2", - Path: parsePath(t, "a"), + Paths: []document.Path{parsePath(t, "a")}, }) require.NoError(t, err) @@ -660,7 +660,7 @@ func TestReIndexAll(t *testing.T) { require.NoError(t, err) var i int - err = idx.AscendGreaterOrEqual(document.Value{Type: document.DoubleValue}, func(v, k []byte) error { + err = idx.AscendGreaterOrEqual([]document.Value{document.Value{Type: document.DoubleValue}}, func(v, k []byte) error { var buf bytes.Buffer err = document.NewValueEncoder(&buf).Encode(document.NewDoubleValue(float64(i))) require.NoError(t, err) @@ -676,7 +676,7 @@ func TestReIndexAll(t *testing.T) { require.NoError(t, err) i = 0 - err = idx.AscendGreaterOrEqual(document.Value{Type: document.DoubleValue}, func(v, k []byte) error { + err = idx.AscendGreaterOrEqual([]document.Value{document.Value{Type: document.DoubleValue}}, func(v, k []byte) error { var buf bytes.Buffer err = document.NewValueEncoder(&buf).Encode(document.NewDoubleValue(float64(i))) require.NoError(t, err) @@ -708,5 +708,5 @@ func TestReadOnlyTables(t *testing.T) { doc, err = db.QueryDocument(`CREATE INDEX idx_foo_a ON foo(a); SELECT * FROM __genji_indexes`) require.NoError(t, err) - testutil.RequireDocJSONEq(t, doc, `{"index_name":"idx_foo_a", "path":["a"], "table_name":"foo", "unique":false}`) + testutil.RequireDocJSONEq(t, doc, `{"index_name":"idx_foo_a", "paths":[["a"]], "table_name":"foo", "types":[0], "unique":false}`) } diff --git a/database/config.go b/database/config.go index 30dc99679..c97e5917b 100644 --- a/database/config.go +++ b/database/config.go @@ -230,13 +230,13 @@ func (t *tableStore) Replace(tx *Transaction, tableName string, info *TableInfo) type IndexInfo struct { TableName string IndexName string - Path document.Path + Paths []document.Path // If set to true, values will be associated with at most one key. False by default. Unique bool - // If set, the index is typed and only accepts that type - Type document.ValueType + // If set, the index is typed and only accepts values of those types. + Types []document.ValueType } // ToDocument creates a document from an IndexConfig. @@ -246,9 +246,19 @@ func (i *IndexInfo) ToDocument() document.Document { buf.Add("unique", document.NewBoolValue(i.Unique)) buf.Add("index_name", document.NewTextValue(i.IndexName)) buf.Add("table_name", document.NewTextValue(i.TableName)) - buf.Add("path", document.NewArrayValue(pathToArray(i.Path))) - if i.Type != 0 { - buf.Add("type", document.NewIntegerValue(int64(i.Type))) + + vb := document.NewValueBuffer() + for _, path := range i.Paths { + vb.Append(document.NewArrayValue(pathToArray(path))) + } + + buf.Add("paths", document.NewArrayValue(vb)) + if i.Types != nil { + types := make([]document.Value, 0, len(i.Types)) + for _, typ := range i.Types { + types = append(types, document.NewIntegerValue(int64(typ))) + } + buf.Add("types", document.NewArrayValue(document.NewValueBuffer(types...))) } return buf } @@ -273,21 +283,41 @@ func (i *IndexInfo) ScanDocument(d document.Document) error { } i.TableName = string(v.V.(string)) - v, err = d.GetByField("path") + v, err = d.GetByField("paths") if err != nil { return err } - i.Path, err = arrayToPath(v.V.(document.Array)) + + i.Paths = nil + err = v.V.(document.Array).Iterate(func(ii int, pval document.Value) error { + p, err := arrayToPath(pval.V.(document.Array)) + if err != nil { + return err + } + + i.Paths = append(i.Paths, p) + return nil + }) + if err != nil { return err } - v, err = d.GetByField("type") + v, err = d.GetByField("types") if err != nil && err != document.ErrFieldNotFound { return err } + if err == nil { - i.Type = document.ValueType(v.V.(int64)) + i.Types = nil + err = v.V.(document.Array).Iterate(func(ii int, tval document.Value) error { + i.Types = append(i.Types, document.ValueType(tval.V.(int64))) + return nil + }) + + if err != nil { + return err + } } return nil @@ -295,7 +325,17 @@ func (i *IndexInfo) ScanDocument(d document.Document) error { // Clone returns a copy of the index information. func (i IndexInfo) Clone() *IndexInfo { - return &i + c := i + + c.Paths = make([]document.Path, len(i.Paths)) + for i, p := range i.Paths { + c.Paths[i] = p.Clone() + } + + c.Types = make([]document.ValueType, len(i.Types)) + copy(c.Types, i.Types) + + return &c } type indexStore struct { @@ -407,7 +447,7 @@ func (i Indexes) GetIndex(name string) *Index { func (i Indexes) GetIndexByPath(p document.Path) *Index { for _, idx := range i { - if idx.Info.Path.IsEqual(p) { + if idx.Info.Paths[0].IsEqual(p) { return idx } } diff --git a/database/config_test.go b/database/config_test.go index c5316493c..0561ef544 100644 --- a/database/config_test.go +++ b/database/config_test.go @@ -163,7 +163,7 @@ func TestIndexStore(t *testing.T) { TableName: "test", IndexName: "idx_test", Unique: true, - Type: document.BoolValue, + Types: []document.ValueType{document.BoolValue}, } err = idxs.Insert(&cfg) diff --git a/database/index.go b/database/index.go index 1f0292aa2..7af517195 100644 --- a/database/index.go +++ b/database/index.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/binary" "errors" + "io" "github.com/genjidb/genji/document" "github.com/genjidb/genji/engine" @@ -18,10 +19,17 @@ const ( var ( // ErrIndexDuplicateValue is returned when a value is already associated with a key ErrIndexDuplicateValue = errors.New("duplicate value") + + // ErrIndexWrongArity is returned when trying to index more values that what an + // index supports. + ErrIndexWrongArity = errors.New("wrong index arity") ) // An Index associates encoded values with keys. -// It is sorted by value following the lexicographic order. +// +// The association is performed by encoding the values in a binary format that preserve +// ordering when compared lexicographically. For the implementation, see the binarysort +// package and the document.ValueEncoder. type Index struct { Info *IndexInfo @@ -29,8 +37,75 @@ type Index struct { storeName []byte } -// NewIndex creates an index that associates a value with a list of keys. +// indexValueEncoder encodes a field based on its type; if a type is provided, +// the value is encoded as is, without any type information. Otherwise, the +// type is prepended to the value. +type indexValueEncoder struct { + typ document.ValueType + w io.Writer +} + +func (e *indexValueEncoder) EncodeValue(v document.Value) error { + // if the index has no type constraint, encode the value with its type + if e.typ.IsAny() { + // prepend with the type + _, err := e.w.Write([]byte{byte(v.Type)}) + if err != nil { + return err + } + + // marshal the value, if it exists, just return the type otherwise + if v.V != nil { + b, err := v.MarshalBinary() + if err != nil { + return err + } + + _, err = e.w.Write(b) + if err != nil { + return err + } + } + + return nil + } + + if v.Type != e.typ { + if v.Type.IsAny() { + v.Type = e.typ + } else { + // this should never happen, but if it does, something is very wrong + panic("incompatible index type") + } + } + + if v.V == nil { + return nil + } + + // there is a type constraint, so a shorter form can be used as the type is always the same + b, err := v.MarshalBinary() + if err != nil { + return err + } + + _, err = e.w.Write(b) + return err +} + +// NewIndex creates an index that associates values with a list of keys. func NewIndex(tx engine.Transaction, idxName string, opts *IndexInfo) *Index { + if opts == nil { + opts = &IndexInfo{ + Types: []document.ValueType{document.AnyType}, + } + } + + // if no types are provided, it implies that it's an index for single untyped values + if opts.Types == nil { + opts.Types = []document.ValueType{document.AnyType} + } + return &Index{ tx: tx, storeName: append([]byte(indexStorePrefix), idxName...), @@ -40,18 +115,39 @@ func NewIndex(tx engine.Transaction, idxName string, opts *IndexInfo) *Index { var errStop = errors.New("stop") -// Set associates a value with a key. If Unique is set to false, it is +// IsComposite returns true if the index is defined to operate on at least more than one value. +func (idx *Index) IsComposite() bool { + return len(idx.Info.Types) > 1 +} + +// Arity returns how many values the index is operating on. +// For example, an index created with `CREATE INDEX idx_a_b ON foo (a, b)` has an arity of 2. +func (idx *Index) Arity() int { + return len(idx.Info.Types) +} + +// Set associates values with a key. If Unique is set to false, it is // possible to associate multiple keys for the same value // but a key can be associated to only one value. -func (idx *Index) Set(v document.Value, k []byte) error { +func (idx *Index) Set(vs []document.Value, k []byte) error { var err error if len(k) == 0 { return errors.New("cannot index value without a key") } - if idx.Info.Type != 0 && idx.Info.Type != v.Type { - return stringutil.Errorf("cannot index value of type %s in %s index", v.Type, idx.Info.Type) + if len(vs) == 0 { + return errors.New("cannot index without a value") + } + + if len(vs) != idx.Arity() { + return stringutil.Errorf("cannot index %d values on an index of arity %d", len(vs), len(idx.Info.Types)) + } + + for i, typ := range idx.Info.Types { + if !typ.IsAny() && typ != vs[i].Type { + return stringutil.Errorf("cannot index value of type %s in %s index", vs[i].Type, typ) + } } st, err := getOrCreateStore(idx.tx, idx.storeName) @@ -60,7 +156,10 @@ func (idx *Index) Set(v document.Value, k []byte) error { } // encode the value we are going to use as a key - buf, err := idx.EncodeValue(v) + var buf []byte + vb := document.NewValueBuffer(vs...) + buf, err = idx.EncodeValueBuffer(vb) + if err != nil { return err } @@ -105,7 +204,7 @@ func (idx *Index) Set(v document.Value, k []byte) error { } // Delete all the references to the key from the index. -func (idx *Index) Delete(v document.Value, k []byte) error { +func (idx *Index) Delete(vs []document.Value, k []byte) error { st, err := getOrCreateStore(idx.tx, idx.storeName) if err != nil { return nil @@ -113,11 +212,12 @@ func (idx *Index) Delete(v document.Value, k []byte) error { var toDelete []byte var buf []byte - err = idx.iterate(st, v, false, func(item engine.Item) error { + err = idx.iterate(st, vs, false, func(item engine.Item) error { buf, err = item.ValueCopy(buf[:0]) if err != nil { return err } + if bytes.Equal(buf, k) { toDelete = item.Key() return errStop @@ -136,25 +236,91 @@ func (idx *Index) Delete(v document.Value, k []byte) error { return engine.ErrKeyNotFound } +type Pivot []document.Value + +// validate panics when the pivot values are unsuitable for the index: +// - having pivot length superior to the index arity +// - having the first pivot element without a value when the subsequent ones do have values +func (pivot Pivot) validate(idx *Index) { + if len(pivot) > idx.Arity() { + panic("cannot iterate with a pivot whose size is superior to the index arity") + } + + if idx.IsComposite() && !pivot.IsAny() { + // it's acceptable for the last pivot element to just have a type and no value + hasValue := true + for _, p := range pivot { + // if on the previous pivot we have a value + if hasValue { + hasValue = p.V != nil + } else { + panic("cannot iterate on a composite index with a pivot with both values and nil values") + } + } + } +} + +// IsAny return true if every value of the pivot is typed with AnyType +func (pivot Pivot) IsAny() bool { + res := true + for _, p := range pivot { + res = res && p.Type.IsAny() && p.V == nil + if !res { + break + } + } + + return res +} + // AscendGreaterOrEqual seeks for the pivot and then goes through all the subsequent key value pairs in increasing order and calls the given function for each pair. // If the given function returns an error, the iteration stops and returns that error. -// If the pivot is empty, starts from the beginning. -func (idx *Index) AscendGreaterOrEqual(pivot document.Value, fn func(val, key []byte) error) error { +// If the pivot(s) is/are empty, starts from the beginning. +// +// Valid pivots are: +// - zero value pivot +// - iterate on everything +// - n elements pivot (where n is the index arity) with each element having a value and a type +// - iterate starting at the closest index value +// - optionally, the last pivot element can have just a type and no value, which will scope the value of that element to that type +// - less than n elements pivot, with each element having a value and a type +// - iterate starting at the closest index value, using the first known value for missing elements +// - optionally, the last pivot element can have just a type and no value, which will scope the value of that element to that type +// - a single element with a type but nil value: will iterate on everything of that type +// +// Any other variation of a pivot are invalid and will panic. +func (idx *Index) AscendGreaterOrEqual(pivot Pivot, fn func(val, key []byte) error) error { return idx.iterateOnStore(pivot, false, fn) } // DescendLessOrEqual seeks for the pivot and then goes through all the subsequent key value pairs in descreasing order and calls the given function for each pair. // If the given function returns an error, the iteration stops and returns that error. -// If the pivot is empty, starts from the end. -func (idx *Index) DescendLessOrEqual(pivot document.Value, fn func(val, key []byte) error) error { +// If the pivot(s) is/are empty, starts from the end. +// +// Valid pivots are: +// - zero value pivot +// - iterate on everything +// - n elements pivot (where n is the index arity) with each element having a value and a type +// - iterate starting at the closest index value +// - optionally, the last pivot element can have just a type and no value, which will scope the value of that element to that type +// - less than n elements pivot, with each element having a value and a type +// - iterate starting at the closest index value, using the last known value for missing elements +// - optionally, the last pivot element can have just a type and no value, which will scope the value of that element to that type +// - a single element with a type but nil value: will iterate on everything of that type +// +// Any other variation of a pivot are invalid and will panic. +func (idx *Index) DescendLessOrEqual(pivot Pivot, fn func(val, key []byte) error) error { return idx.iterateOnStore(pivot, true, fn) } -func (idx *Index) iterateOnStore(pivot document.Value, reverse bool, fn func(val, key []byte) error) error { - // if index and pivot are typed but not of the same type - // return no result - if idx.Info.Type != 0 && pivot.Type != 0 && idx.Info.Type != pivot.Type { - return nil +func (idx *Index) iterateOnStore(pivot Pivot, reverse bool, fn func(val, key []byte) error) error { + pivot.validate(idx) + + // If index and pivot values are typed but not of the same type, return no results. + for i, pv := range pivot { + if !pv.Type.IsAny() && !idx.Info.Types[i].IsAny() && pv.Type != idx.Info.Types[i] { + return nil + } } st, err := idx.tx.GetStore(idx.storeName) @@ -197,21 +363,45 @@ func (idx *Index) Truncate() error { return nil } -// EncodeValue encodes the value we are going to use as a key, -// If the index is typed, encode the value without expecting -// the presence of other types. -// If not, encode so that order is preserved regardless of the type. -func (idx *Index) EncodeValue(v document.Value) ([]byte, error) { - if idx.Info.Type != 0 { - return v.MarshalBinary() +// EncodeValueBuffer encodes the value buffer containing a single or +// multiple values being indexed into a byte array, keeping the +// order of the original values. +// +// The values are marshalled and separated with a document.ArrayValueDelim, +// *without* a trailing document.ArrayEnd, which enables to handle cases +// where only some of the values are being provided and still perform lookups +// (like index_foo_a_b_c and providing only a and b). +// +// See IndexValueEncoder for details about how the value themselves are encoded. +func (idx *Index) EncodeValueBuffer(vb *document.ValueBuffer) ([]byte, error) { + if vb.Len() > idx.Arity() { + return nil, ErrIndexWrongArity } - var err error var buf bytes.Buffer - err = document.NewValueEncoder(&buf).Encode(v) + + err := vb.Iterate(func(i int, value document.Value) error { + enc := &indexValueEncoder{typ: idx.Info.Types[i], w: &buf} + err := enc.EncodeValue(value) + if err != nil { + return err + } + + // if it's not the last value, append the seperator + if i < vb.Len()-1 { + err = buf.WriteByte(document.ArrayValueDelim) + if err != nil { + return err + } + } + + return nil + }) + if err != nil { return nil, err } + return buf.Bytes(), nil } @@ -233,27 +423,48 @@ func getOrCreateStore(tx engine.Transaction, name []byte) (engine.Store, error) return tx.GetStore(name) } -func (idx *Index) iterate(st engine.Store, pivot document.Value, reverse bool, fn func(item engine.Item) error) error { +// buildSeek encodes the pivot values as binary in order to seek into the indexed data. +func (idx *Index) buildSeek(pivot Pivot, reverse bool) ([]byte, error) { var seek []byte var err error - if pivot.V != nil { - seek, err = idx.EncodeValue(pivot) - if err != nil { - return err - } + // if we have valueless and typeless pivot, we just iterate + if pivot.IsAny() { + return []byte{}, nil + } + + // if the index is without type and the first pivot is valueless but typed, iterate but filter out the types we don't want, + // but just for the first pivot; subsequent pivot values cannot be filtered this way. + if idx.Info.Types[0].IsAny() && !pivot[0].Type.IsAny() && pivot[0].V == nil { + seek = []byte{byte(pivot[0].Type)} if reverse { seek = append(seek, 0xFF) } + + return seek, nil } - if idx.Info.Type == 0 && pivot.Type != 0 && pivot.V == nil { - seek = []byte{byte(pivot.Type)} + vb := document.NewValueBuffer(pivot...) + seek, err = idx.EncodeValueBuffer(vb) - if reverse { - seek = append(seek, 0xFF) - } + if err != nil { + return nil, err + } + + if reverse { + seek = append(seek, 0xFF) + } + + return seek, nil +} + +func (idx *Index) iterate(st engine.Store, pivot Pivot, reverse bool, fn func(item engine.Item) error) error { + var err error + + seek, err := idx.buildSeek(pivot, reverse) + if err != nil { + return err } it := st.Iterator(engine.IteratorOptions{Reverse: reverse}) @@ -262,8 +473,8 @@ func (idx *Index) iterate(st engine.Store, pivot document.Value, reverse bool, f for it.Seek(seek); it.Valid(); it.Next() { itm := it.Item() - // if index is untyped and pivot is typed, only iterate on values with the same type as pivot - if idx.Info.Type == 0 && pivot.Type != 0 && itm.Key()[0] != byte(pivot.Type) { + // If index is untyped and pivot first element is typed, only iterate on values with the same type as the first pivot + if len(pivot) > 0 && idx.Info.Types[0].IsAny() && !pivot[0].Type.IsAny() && itm.Key()[0] != byte(pivot[0].Type) { return nil } diff --git a/database/index_test.go b/database/index_test.go index d3eed616c..0fc4eea2c 100644 --- a/database/index_test.go +++ b/database/index_test.go @@ -8,22 +8,27 @@ import ( "strconv" "testing" - "github.com/genjidb/genji/binarysort" "github.com/genjidb/genji/database" "github.com/genjidb/genji/document" "github.com/genjidb/genji/engine" "github.com/genjidb/genji/engine/memoryengine" + "github.com/genjidb/genji/testutil" "github.com/stretchr/testify/require" ) -func getIndex(t testing.TB, unique bool) (*database.Index, func()) { +// values is a helper function to avoid having to type []document.Value{} all the time. +func values(vs ...document.Value) []document.Value { + return vs +} + +func getIndex(t testing.TB, unique bool, types ...document.ValueType) (*database.Index, func()) { ng := memoryengine.NewEngine() tx, err := ng.Begin(context.Background(), engine.TxOptions{ Writable: true, }) require.NoError(t, err) - idx := database.NewIndex(tx, "foo", &database.IndexInfo{Unique: unique}) + idx := database.NewIndex(tx, "foo", &database.IndexInfo{Unique: unique, Types: types}) return idx, func() { tx.Rollback() @@ -34,16 +39,40 @@ func TestIndexSet(t *testing.T) { for _, unique := range []bool{true, false} { text := fmt.Sprintf("Unique: %v, ", unique) - t.Run(text+"Set nil key falls", func(t *testing.T) { + t.Run(text+"Set nil key falls (arity=1)", func(t *testing.T) { idx, cleanup := getIndex(t, unique) defer cleanup() - require.Error(t, idx.Set(document.NewBoolValue(true), nil)) + require.Error(t, idx.Set(values(document.NewBoolValue(true)), nil)) }) - t.Run(text+"Set value and key succeeds", func(t *testing.T) { + t.Run(text+"Set value and key succeeds (arity=1)", func(t *testing.T) { idx, cleanup := getIndex(t, unique) defer cleanup() - require.NoError(t, idx.Set(document.NewBoolValue(true), []byte("key"))) + require.NoError(t, idx.Set(values(document.NewBoolValue(true)), []byte("key"))) + }) + + t.Run(text+"Set two values and key succeeds (arity=2)", func(t *testing.T) { + idx, cleanup := getIndex(t, unique, document.AnyType, document.AnyType) + defer cleanup() + require.NoError(t, idx.Set(values(document.NewBoolValue(true), document.NewBoolValue(true)), []byte("key"))) + }) + + t.Run(text+"Set one value fails (arity=1)", func(t *testing.T) { + idx, cleanup := getIndex(t, unique, document.AnyType, document.AnyType) + defer cleanup() + require.Error(t, idx.Set(values(document.NewBoolValue(true)), []byte("key"))) + }) + + t.Run(text+"Set two values fails (arity=1)", func(t *testing.T) { + idx, cleanup := getIndex(t, unique, document.AnyType) + defer cleanup() + require.Error(t, idx.Set(values(document.NewBoolValue(true), document.NewBoolValue(true)), []byte("key"))) + }) + + t.Run(text+"Set three values fails (arity=2)", func(t *testing.T) { + idx, cleanup := getIndex(t, unique, document.AnyType, document.AnyType) + defer cleanup() + require.Error(t, idx.Set(values(document.NewBoolValue(true), document.NewBoolValue(true), document.NewBoolValue(true)), []byte("key"))) }) } @@ -51,19 +80,37 @@ func TestIndexSet(t *testing.T) { idx, cleanup := getIndex(t, true) defer cleanup() - require.NoError(t, idx.Set(document.NewIntegerValue(10), []byte("key"))) - require.NoError(t, idx.Set(document.NewIntegerValue(11), []byte("key"))) - require.Equal(t, database.ErrIndexDuplicateValue, idx.Set(document.NewIntegerValue(10), []byte("key"))) + require.NoError(t, idx.Set(values(document.NewIntegerValue(10)), []byte("key"))) + require.NoError(t, idx.Set(values(document.NewIntegerValue(11)), []byte("key"))) + require.Equal(t, database.ErrIndexDuplicateValue, idx.Set(values(document.NewIntegerValue(10)), []byte("key"))) }) t.Run("Unique: true, Type: integer Duplicate", func(t *testing.T) { - idx, cleanup := getIndex(t, true) - idx.Info.Type = document.IntegerValue + idx, cleanup := getIndex(t, true, document.IntegerValue) defer cleanup() - require.NoError(t, idx.Set(document.NewIntegerValue(10), []byte("key"))) - require.NoError(t, idx.Set(document.NewIntegerValue(11), []byte("key"))) - require.Equal(t, database.ErrIndexDuplicateValue, idx.Set(document.NewIntegerValue(10), []byte("key"))) + require.NoError(t, idx.Set(values(document.NewIntegerValue(10)), []byte("key"))) + require.NoError(t, idx.Set(values(document.NewIntegerValue(11)), []byte("key"))) + require.Equal(t, database.ErrIndexDuplicateValue, idx.Set(values(document.NewIntegerValue(10)), []byte("key"))) + }) + + t.Run("Unique: true, Type: (integer, integer) Duplicate,", func(t *testing.T) { + idx, cleanup := getIndex(t, true, document.IntegerValue, document.IntegerValue) + defer cleanup() + + require.NoError(t, idx.Set(values(document.NewIntegerValue(10), document.NewIntegerValue(10)), []byte("key"))) + require.NoError(t, idx.Set(values(document.NewIntegerValue(10), document.NewIntegerValue(11)), []byte("key"))) + require.NoError(t, idx.Set(values(document.NewIntegerValue(11), document.NewIntegerValue(11)), []byte("key"))) + require.Equal(t, database.ErrIndexDuplicateValue, idx.Set(values(document.NewIntegerValue(10), document.NewIntegerValue(10)), []byte("key"))) + }) + + t.Run("Unique: true, Type: (integer, text) Duplicate,", func(t *testing.T) { + idx, cleanup := getIndex(t, true, document.IntegerValue, document.TextValue) + defer cleanup() + + require.NoError(t, idx.Set(values(document.NewIntegerValue(10), document.NewTextValue("foo")), []byte("key"))) + require.NoError(t, idx.Set(values(document.NewIntegerValue(11), document.NewTextValue("foo")), []byte("key"))) + require.Equal(t, database.ErrIndexDuplicateValue, idx.Set(values(document.NewIntegerValue(10), document.NewTextValue("foo")), []byte("key"))) }) } @@ -72,20 +119,58 @@ func TestIndexDelete(t *testing.T) { idx, cleanup := getIndex(t, false) defer cleanup() - require.NoError(t, idx.Set(document.NewDoubleValue(10), []byte("key"))) - require.NoError(t, idx.Set(document.NewIntegerValue(10), []byte("other-key"))) - require.NoError(t, idx.Set(document.NewIntegerValue(11), []byte("yet-another-key"))) - require.NoError(t, idx.Set(document.NewTextValue("hello"), []byte("yet-another-different-key"))) - require.NoError(t, idx.Delete(document.NewDoubleValue(10), []byte("key"))) + require.NoError(t, idx.Set(values(document.NewDoubleValue(10)), []byte("key"))) + require.NoError(t, idx.Set(values(document.NewIntegerValue(10)), []byte("other-key"))) + require.NoError(t, idx.Set(values(document.NewIntegerValue(11)), []byte("yet-another-key"))) + require.NoError(t, idx.Set(values(document.NewTextValue("hello")), []byte("yet-another-different-key"))) + require.NoError(t, idx.Delete(values(document.NewDoubleValue(10)), []byte("key"))) - pivot := document.NewIntegerValue(10) + pivot := values(document.NewIntegerValue(10)) i := 0 err := idx.AscendGreaterOrEqual(pivot, func(v, k []byte) error { if i == 0 { - requireEqualEncoded(t, document.NewIntegerValue(10), v) + requireEqualBinary(t, testutil.MakeArrayValue(t, 10), v) require.Equal(t, "other-key", string(k)) } else if i == 1 { - requireEqualEncoded(t, document.NewIntegerValue(11), v) + requireEqualBinary(t, testutil.MakeArrayValue(t, 11), v) + require.Equal(t, "yet-another-key", string(k)) + } else { + return errors.New("should not reach this point") + } + + i++ + return nil + }) + require.NoError(t, err) + require.Equal(t, 2, i) + }) + + t.Run("Unique: false, Delete valid key succeeds (arity=2)", func(t *testing.T) { + idx, cleanup := getIndex(t, false, document.AnyType, document.AnyType) + defer cleanup() + + require.NoError(t, idx.Set(values(document.NewDoubleValue(10), document.NewDoubleValue(10)), []byte("key"))) + require.NoError(t, idx.Set(values(document.NewIntegerValue(10), document.NewIntegerValue(10)), []byte("other-key"))) + require.NoError(t, idx.Set(values(document.NewIntegerValue(11), document.NewIntegerValue(11)), []byte("yet-another-key"))) + require.NoError(t, idx.Set(values(document.NewTextValue("hello"), document.NewTextValue("hello")), []byte("yet-another-different-key"))) + require.NoError(t, idx.Delete(values(document.NewDoubleValue(10), document.NewDoubleValue(10)), []byte("key"))) + + pivot := values(document.NewIntegerValue(10), document.NewIntegerValue(10)) + i := 0 + err := idx.AscendGreaterOrEqual(pivot, func(v, k []byte) error { + if i == 0 { + expected := document.NewArrayValue(document.NewValueBuffer( + document.NewIntegerValue(10), + document.NewIntegerValue(10), + )) + requireEqualBinary(t, expected, v) + require.Equal(t, "other-key", string(k)) + } else if i == 1 { + expected := document.NewArrayValue(document.NewValueBuffer( + document.NewIntegerValue(11), + document.NewIntegerValue(11), + )) + requireEqualBinary(t, expected, v) require.Equal(t, "yet-another-key", string(k)) } else { return errors.New("should not reach this point") @@ -102,19 +187,59 @@ func TestIndexDelete(t *testing.T) { idx, cleanup := getIndex(t, true) defer cleanup() - require.NoError(t, idx.Set(document.NewIntegerValue(10), []byte("key1"))) - require.NoError(t, idx.Set(document.NewDoubleValue(11), []byte("key2"))) - require.NoError(t, idx.Set(document.NewIntegerValue(12), []byte("key3"))) - require.NoError(t, idx.Delete(document.NewDoubleValue(11), []byte("key2"))) + require.NoError(t, idx.Set(values(document.NewIntegerValue(10)), []byte("key1"))) + require.NoError(t, idx.Set(values(document.NewDoubleValue(11)), []byte("key2"))) + require.NoError(t, idx.Set(values(document.NewIntegerValue(12)), []byte("key3"))) + require.NoError(t, idx.Delete(values(document.NewDoubleValue(11)), []byte("key2"))) + + i := 0 + err := idx.AscendGreaterOrEqual(values(document.Value{Type: document.IntegerValue}), func(v, k []byte) error { + switch i { + case 0: + requireEqualBinary(t, testutil.MakeArrayValue(t, 10), v) + require.Equal(t, "key1", string(k)) + case 1: + requireEqualBinary(t, testutil.MakeArrayValue(t, 12), v) + require.Equal(t, "key3", string(k)) + default: + return errors.New("should not reach this point") + } + + i++ + return nil + }) + require.NoError(t, err) + require.Equal(t, 2, i) + }) + + t.Run("Unique: true, Delete valid key succeeds (arity=2)", func(t *testing.T) { + idx, cleanup := getIndex(t, true, document.AnyType, document.AnyType) + defer cleanup() + + require.NoError(t, idx.Set(values(document.NewIntegerValue(10), document.NewIntegerValue(10)), []byte("key1"))) + require.NoError(t, idx.Set(values(document.NewDoubleValue(11), document.NewDoubleValue(11)), []byte("key2"))) + require.NoError(t, idx.Set(values(document.NewIntegerValue(12), document.NewIntegerValue(12)), []byte("key3"))) + require.NoError(t, idx.Delete(values(document.NewDoubleValue(11), document.NewDoubleValue(11)), []byte("key2"))) i := 0 - err := idx.AscendGreaterOrEqual(document.Value{Type: document.IntegerValue}, func(v, k []byte) error { + // this will break until the [v, int] case is supported + // pivot := values(document.NewIntegerValue(0), document.Value{Type: document.IntegerValue}) + pivot := values(document.NewIntegerValue(0), document.NewIntegerValue(0)) + err := idx.AscendGreaterOrEqual(pivot, func(v, k []byte) error { switch i { case 0: - requireEqualEncoded(t, document.NewIntegerValue(10), v) + expected := document.NewArrayValue(document.NewValueBuffer( + document.NewIntegerValue(10), + document.NewIntegerValue(10), + )) + requireEqualBinary(t, expected, v) require.Equal(t, "key1", string(k)) case 1: - requireEqualEncoded(t, document.NewIntegerValue(12), v) + expected := document.NewArrayValue(document.NewValueBuffer( + document.NewIntegerValue(12), + document.NewIntegerValue(12), + )) + requireEqualBinary(t, expected, v) require.Equal(t, "key3", string(k)) default: return errors.New("should not reach this point") @@ -134,18 +259,50 @@ func TestIndexDelete(t *testing.T) { idx, cleanup := getIndex(t, unique) defer cleanup() - require.Error(t, idx.Delete(document.NewTextValue("foo"), []byte("foo"))) + require.Error(t, idx.Delete(values(document.NewTextValue("foo")), []byte("foo"))) }) } } -func requireEqualEncoded(t *testing.T, expected document.Value, actual []byte) { +// requireEqualBinary asserts equality assuming that the value is encoded through marshal binary +func requireEqualBinary(t *testing.T, expected document.Value, actual []byte) { t.Helper() - var buf bytes.Buffer - err := document.NewValueEncoder(&buf).Encode(expected) + buf, err := expected.MarshalBinary() require.NoError(t, err) - require.Equal(t, buf.Bytes(), actual) + require.Equal(t, buf[:len(buf)-1], actual) +} + +type encValue struct { + skipType bool + document.Value +} + +func requireIdxEncodedEq(t *testing.T, evs ...encValue) func([]byte) { + t.Helper() + + var buf bytes.Buffer + for i, ev := range evs { + if !ev.skipType { + err := buf.WriteByte(byte(ev.Value.Type)) + require.NoError(t, err) + } + + b, err := ev.Value.MarshalBinary() + require.NoError(t, err) + + _, err = buf.Write(b) + require.NoError(t, err) + + if i < len(evs)-1 { + err = buf.WriteByte(document.ArrayValueDelim) + } + require.NoError(t, err) + } + + return func(actual []byte) { + require.Equal(t, buf.Bytes(), actual) + } } func TestIndexAscendGreaterThan(t *testing.T) { @@ -157,7 +314,7 @@ func TestIndexAscendGreaterThan(t *testing.T) { defer cleanup() i := 0 - err := idx.AscendGreaterOrEqual(document.Value{Type: document.IntegerValue}, func(val, key []byte) error { + err := idx.AscendGreaterOrEqual(values(document.Value{Type: document.IntegerValue}), func(val, key []byte) error { i++ return errors.New("should not iterate") }) @@ -165,160 +322,679 @@ func TestIndexAscendGreaterThan(t *testing.T) { require.Equal(t, 0, i) }) - t.Run(text+"With typed empty pivot, should iterate over all documents of the pivot type in order", func(t *testing.T) { + t.Run(text+"Should iterate through documents in order, ", func(t *testing.T) { + noiseBlob := func(i int) []document.Value { + t.Helper() + return []document.Value{document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10))} + } + noiseInts := func(i int) []document.Value { + t.Helper() + return []document.Value{document.NewIntegerValue(int64(i))} + } + + noCallEq := func(t *testing.T, i uint8, key []byte, val []byte) { + require.Fail(t, "equality test should not be called here") + } + + // the following tests will use that constant to determine how many values needs to be inserted + // with the value and noise generators. + total := 5 + tests := []struct { - name string - val func(i int) document.Value - t document.ValueType - pivot document.Value + name string + // the index type(s) that is being used + indexTypes []document.ValueType + // the pivot, typed or not used to iterate + pivot database.Pivot + // the generator for the values that are being indexed + val func(i int) []document.Value + // the generator for the noise values that are being indexed + noise func(i int) []document.Value + // the function to compare the key/value that the iteration yields + expectedEq func(t *testing.T, i uint8, key []byte, val []byte) + // the total count of iteration that should happen + expectedCount int + mustPanic bool }{ - {"integers", func(i int) document.Value { return document.NewIntegerValue(int64(i)) }, document.IntegerValue, document.Value{Type: document.IntegerValue}}, - {"doubles", func(i int) document.Value { return document.NewDoubleValue(float64(i) + float64(i)/2) }, document.DoubleValue, document.Value{Type: document.DoubleValue}}, - {"texts", func(i int) document.Value { return document.NewTextValue(strconv.Itoa(i)) }, document.TextValue, document.Value{Type: document.TextValue}}, + // integers --------------------------------------------------- + {name: "index=any, vals=integers, pivot=integer", + indexTypes: nil, + pivot: values(document.Value{Type: document.IntegerValue}), + val: func(i int) []document.Value { return values(document.NewIntegerValue(int64(i))) }, + noise: noiseBlob, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + )(val) + }, + expectedCount: 5, + }, + {name: "index=integer, vals=integers, pivot=integer", + indexTypes: []document.ValueType{document.IntegerValue}, + pivot: values(document.Value{Type: document.IntegerValue}), + val: func(i int) []document.Value { return values(document.NewIntegerValue(int64(i))) }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{true, document.NewIntegerValue(int64(i))}, + )(val) + }, + expectedCount: 5, + }, + {name: "index=any, vals=integers, pivot=integer:2", + indexTypes: nil, + pivot: values(document.NewIntegerValue(2)), + val: func(i int) []document.Value { return values(document.NewIntegerValue(int64(i))) }, + noise: noiseBlob, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i += 2 + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + )(val) + }, + expectedCount: 3, + }, + {name: "index=any, vals=integers, pivot=integer:10", + indexTypes: nil, + pivot: values(document.NewIntegerValue(10)), + val: func(i int) []document.Value { return values(document.NewIntegerValue(int64(i))) }, + noise: noiseBlob, + expectedEq: noCallEq, + expectedCount: 0, + }, + {name: "index=integer, vals=integers, pivot=integer:2", + indexTypes: []document.ValueType{document.IntegerValue}, + pivot: values(document.NewIntegerValue(2)), + val: func(i int) []document.Value { return values(document.NewIntegerValue(int64(i))) }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i += 2 + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{true, document.NewIntegerValue(int64(i))}, + )(val) + }, + expectedCount: 3, + }, + {name: "index=integer, vals=integers, pivot=double", + indexTypes: []document.ValueType{document.IntegerValue}, + pivot: values(document.Value{Type: document.DoubleValue}), + val: func(i int) []document.Value { return values(document.NewIntegerValue(int64(i))) }, + expectedEq: noCallEq, + expectedCount: 0, + }, + + // doubles ---------------------------------------------------- + {name: "index=any, vals=doubles, pivot=double", + indexTypes: nil, + pivot: values(document.Value{Type: document.DoubleValue}), + val: func(i int) []document.Value { return values(document.NewDoubleValue(float64(i) + float64(i)/2)) }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{false, document.NewDoubleValue(float64(i) + float64(i)/2)}, + )(val) + }, + expectedCount: 5, + }, + {name: "index=any, vals=doubles, pivot=double:1.8", + indexTypes: nil, + pivot: values(document.NewDoubleValue(1.8)), + val: func(i int) []document.Value { return values(document.NewDoubleValue(float64(i) + float64(i)/2)) }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i += 2 + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{false, document.NewDoubleValue(float64(i) + float64(i)/2)}, + )(val) + }, + expectedCount: 3, + }, + {name: "index=double, vals=doubles, pivot=double:1.8", + indexTypes: []document.ValueType{document.DoubleValue}, + pivot: values(document.NewDoubleValue(1.8)), + val: func(i int) []document.Value { return values(document.NewDoubleValue(float64(i) + float64(i)/2)) }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i += 2 + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{true, document.NewDoubleValue(float64(i) + float64(i)/2)}, + )(val) + }, + expectedCount: 3, + }, + {name: "index=any, vals=doubles, pivot=double:10.8", + indexTypes: nil, + pivot: values(document.NewDoubleValue(10.8)), + val: func(i int) []document.Value { return values(document.NewDoubleValue(float64(i) + float64(i)/2)) }, + expectedEq: noCallEq, + expectedCount: 0, + }, + + // text ------------------------------------------------------- + {name: "index=any, vals=text pivot=text", + indexTypes: nil, + pivot: values(document.Value{Type: document.TextValue}), + val: func(i int) []document.Value { return values(document.NewTextValue(strconv.Itoa(i))) }, + noise: noiseInts, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{false, document.NewTextValue(strconv.Itoa(int(i)))}, + )(val) + }, + expectedCount: 5, + }, + {name: "index=any, vals=text, pivot=text('2')", + indexTypes: nil, + pivot: values(document.NewTextValue("2")), + val: func(i int) []document.Value { return values(document.NewTextValue(strconv.Itoa(i))) }, + noise: noiseInts, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i += 2 + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{false, document.NewTextValue(strconv.Itoa(int(i)))}, + )(val) + }, + expectedCount: 3, + }, + {name: "index=any, vals=text, pivot=text('')", + indexTypes: nil, + pivot: values(document.NewTextValue("")), + val: func(i int) []document.Value { return values(document.NewTextValue(strconv.Itoa(i))) }, + noise: noiseInts, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{false, document.NewTextValue(strconv.Itoa(int(i)))}, + )(val) + }, + expectedCount: 5, + }, + {name: "index=any, vals=text, pivot=text('foo')", + indexTypes: nil, + pivot: values(document.NewTextValue("foo")), + val: func(i int) []document.Value { return values(document.NewTextValue(strconv.Itoa(i))) }, + noise: noiseInts, + expectedEq: noCallEq, + expectedCount: 0, + }, + {name: "index=text, vals=text, pivot=text('2')", + indexTypes: []document.ValueType{document.TextValue}, + pivot: values(document.NewTextValue("2")), + val: func(i int) []document.Value { return values(document.NewTextValue(strconv.Itoa(i))) }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i += 2 + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{true, document.NewTextValue(strconv.Itoa(int(i)))}, + )(val) + }, + expectedCount: 3, + }, + // composite -------------------------------------------------- + // composite indexes can have empty pivot values to iterate on the whole indexed data + {name: "index=[any, untyped], vals=[int, int], pivot=[nil,nil]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.Value{}, document.Value{}), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 5, + }, + + // composite indexes must have at least have one value if typed + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[int, int]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.Value{Type: document.IntegerValue}, document.Value{Type: document.IntegerValue}), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + expectedEq: noCallEq, + mustPanic: true, + }, + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[int]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.Value{Type: document.IntegerValue}), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 5, + }, + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[0, int, 0]", + indexTypes: []document.ValueType{0, 0, 0}, + pivot: values(document.NewIntegerValue(0), document.Value{Type: document.IntegerValue}, document.NewIntegerValue(0)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1)), document.NewIntegerValue(int64(i+1))) + }, + expectedEq: noCallEq, + mustPanic: true, + }, + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[0, int, nil]", + indexTypes: []document.ValueType{0, 0, 0}, + pivot: values(document.NewIntegerValue(0), document.Value{Type: document.IntegerValue}, document.NewIntegerValue(0), document.Value{}), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1)), document.NewIntegerValue(int64(i+1))) + }, + expectedEq: noCallEq, + mustPanic: true, + }, + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[int, 0]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.Value{Type: document.IntegerValue}, document.NewIntegerValue(0)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + expectedEq: noCallEq, + mustPanic: true, + }, + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[0, 0]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.NewIntegerValue(0), document.NewIntegerValue(0)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + noise: func(i int) []document.Value { + return values(document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10)), document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 5, + }, + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[2, 0]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.NewIntegerValue(2), document.NewIntegerValue(0)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + noise: func(i int) []document.Value { + return values(document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10)), document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i += 2 + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 3, + }, + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[2, int]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.NewIntegerValue(2), document.Value{Type: document.IntegerValue}), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + noise: func(i int) []document.Value { + return values(document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10)), document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i += 2 + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 3, + }, + // pivot [2, int] should filter out [2, not(int)] + {name: "index=[any, untyped], vals=[int, int], noise=[int, blob], pivot=[2, int]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.NewIntegerValue(2), document.Value{Type: document.IntegerValue}), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + noise: func(i int) []document.Value { + // only [3, not(int)] is greater than [2, int], so it will appear anyway if we don't skip it + if i < 3 { + return values(document.NewIntegerValue(int64(i)), document.NewBoolValue(true)) + } + + return nil + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i += 2 + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 3, + }, + // a more subtle case + {name: "index=[any, untyped], vals=[int, blob], noise=[blob, blob], pivot=[2, 'a']", // pivot is [2, a] but value is [2, c] but that must work anyway + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.NewIntegerValue(2), document.NewBlobValue([]byte{byte('a')})), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewBlobValue([]byte{byte('a' + uint8(i))})) + }, + noise: func(i int) []document.Value { + return values(document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10)), document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i += 2 + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + encValue{false, document.NewBlobValue([]byte{byte('a' + uint8(i))})}, + )(val) + }, + expectedCount: 3, + }, + // partial pivot + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[0]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.NewIntegerValue(0)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + noise: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + // let's not try to match, it's not important + }, + expectedCount: 10, + }, + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[2]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.NewIntegerValue(2)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + noise: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + // let's not try to match, it's not important + }, + expectedCount: 6, // total * 2 - (noise + val = 2) * 2 + }, + // this is a tricky test, when we have multiple values but they share the first pivot element; + // this is by definition a very implementation dependent test. + {name: "index=[any, untyped], vals=[int, int], noise=int, bool], pivot=[int:0, int:0]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.NewIntegerValue(0), document.NewIntegerValue(0)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + noise: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewBoolValue(true)) + }, + // [0, 0] > [0, true] but [1, true] > [0, 0] so we will see some bools in the results + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + if i%2 == 0 { + i = i / 2 + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + } + }, + expectedCount: 9, // 10 elements, but pivot skipped the initial [0, true] + }, + // index typed + {name: "index=[int, int], vals=[int, int], pivot=[0, 0]", + indexTypes: []document.ValueType{document.IntegerValue, document.IntegerValue}, + pivot: values(document.NewIntegerValue(0), document.NewIntegerValue(0)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + requireIdxEncodedEq(t, + encValue{true, document.NewIntegerValue(int64(i))}, + encValue{true, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 5, + }, + {name: "index=[int, int], vals=[int, int], pivot=[2, 0]", + indexTypes: []document.ValueType{document.IntegerValue, document.IntegerValue}, + pivot: values(document.NewIntegerValue(2), document.NewIntegerValue(0)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i += 2 + requireIdxEncodedEq(t, + encValue{true, document.NewIntegerValue(int64(i))}, + encValue{true, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 3, + }, + // a more subtle case + {name: "index=[int, blob], vals=[int, blob], pivot=[2, 'a']", // pivot is [2, a] but value is [2, c] but that must work anyway + indexTypes: []document.ValueType{document.IntegerValue, document.BlobValue}, + pivot: values(document.NewIntegerValue(2), document.NewBlobValue([]byte{byte('a')})), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewBlobValue([]byte{byte('a' + uint8(i))})) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i += 2 + requireIdxEncodedEq(t, + encValue{true, document.NewIntegerValue(int64(i))}, + encValue{true, document.NewBlobValue([]byte{byte('a' + uint8(i))})}, + )(val) + }, + expectedCount: 3, + }, + // partial pivot + {name: "index=[int, int], vals=[int, int], pivot=[0]", + indexTypes: []document.ValueType{document.IntegerValue, document.IntegerValue}, + pivot: values(document.NewIntegerValue(0)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + requireIdxEncodedEq(t, + encValue{true, document.NewIntegerValue(int64(i))}, + encValue{true, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 5, + }, + {name: "index=[int, int], vals=[int, int], pivot=[2]", + indexTypes: []document.ValueType{document.IntegerValue, document.IntegerValue}, + pivot: values(document.NewIntegerValue(2)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i += 2 + requireIdxEncodedEq(t, + encValue{true, document.NewIntegerValue(int64(i))}, + encValue{true, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 3, + }, + + // documents -------------------------------------------------- + {name: "index=[any, any], vals=[doc, int], pivot=[{a:2}, 3]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values( + document.NewDocumentValue(testutil.MakeDocument(t, `{"a":2}`)), + document.NewIntegerValue(int64(3)), + ), + val: func(i int) []document.Value { + return values( + document.NewDocumentValue(testutil.MakeDocument(t, `{"a":`+strconv.Itoa(i)+`}`)), + document.NewIntegerValue(int64(i+1)), + ) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i += 2 + requireIdxEncodedEq(t, + encValue{false, document.NewDocumentValue(testutil.MakeDocument(t, `{"a":`+strconv.Itoa(int(i))+`}`))}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 3, + }, + {name: "index=[document, int], vals=[doc, int], pivot=[{a:2}, 3]", + indexTypes: []document.ValueType{document.DocumentValue, document.IntegerValue}, + pivot: values( + document.NewDocumentValue(testutil.MakeDocument(t, `{"a":2}`)), + document.NewIntegerValue(int64(3)), + ), + val: func(i int) []document.Value { + return values( + document.NewDocumentValue(testutil.MakeDocument(t, `{"a":`+strconv.Itoa(i)+`}`)), + document.NewIntegerValue(int64(i+1)), + ) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i += 2 + requireIdxEncodedEq(t, + encValue{true, document.NewDocumentValue(testutil.MakeDocument(t, `{"a":`+strconv.Itoa(int(i))+`}`))}, + encValue{true, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 3, + }, + + // arrays ----------------------------------------------------- + {name: "index=[any, any], vals=[int[], int], pivot=[]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(), + val: func(i int) []document.Value { + return values( + testutil.MakeArrayValue(t, i, i), + document.NewIntegerValue(int64(i+1)), + ) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + requireIdxEncodedEq(t, + encValue{false, testutil.MakeArrayValue(t, i, i)}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 5, + }, + {name: "index=[any, any], vals=[int[], int], pivot=[[2,2], 3]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values( + testutil.MakeArrayValue(t, 2, 2), + document.NewIntegerValue(int64(3)), + ), + val: func(i int) []document.Value { + return values( + testutil.MakeArrayValue(t, i, i), + document.NewIntegerValue(int64(i+1)), + ) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i += 2 + requireIdxEncodedEq(t, + encValue{false, testutil.MakeArrayValue(t, i, i)}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 3, + }, + {name: "index=[any, any], vals=[int[], int[]], pivot=[[2,2], [3,3]]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values( + testutil.MakeArrayValue(t, 2, 2), + testutil.MakeArrayValue(t, 3, 3), + ), + val: func(i int) []document.Value { + return values( + testutil.MakeArrayValue(t, i, i), + testutil.MakeArrayValue(t, i+1, i+1), + ) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i += 2 + requireIdxEncodedEq(t, + encValue{false, testutil.MakeArrayValue(t, i, i)}, + encValue{false, testutil.MakeArrayValue(t, i+1, i+1)}, + )(val) + }, + expectedCount: 3, + }, + {name: "index=[array, any], vals=[int[], int], pivot=[[2,2], 3]", + indexTypes: []document.ValueType{document.ArrayValue, document.AnyType}, + pivot: values( + testutil.MakeArrayValue(t, 2, 2), + document.NewIntegerValue(int64(3)), + ), + val: func(i int) []document.Value { + return values( + testutil.MakeArrayValue(t, i, i), + document.NewIntegerValue(int64(i+1)), + ) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i += 2 + requireIdxEncodedEq(t, + encValue{true, testutil.MakeArrayValue(t, i, i)}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 3, + }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - idx, cleanup := getIndex(t, unique) + idx, cleanup := getIndex(t, unique, test.indexTypes...) defer cleanup() - for i := 0; i < 10; i += 2 { + for i := 0; i < total; i++ { require.NoError(t, idx.Set(test.val(i), []byte{'a' + byte(i)})) + if test.noise != nil { + v := test.noise(i) + if v != nil { + require.NoError(t, idx.Set(test.noise(i), []byte{'a' + byte(i)})) + } + } } var i uint8 var count int - err := idx.AscendGreaterOrEqual(test.pivot, func(val, rid []byte) error { - switch test.t { - case document.IntegerValue: - requireEqualEncoded(t, document.NewIntegerValue(int64(i)), val) - case document.DoubleValue: - requireEqualEncoded(t, document.NewDoubleValue(float64(i)+float64(i)/2), val) - case document.TextValue: - requireEqualEncoded(t, document.NewTextValue(strconv.Itoa(int(i))), val) - case document.BoolValue: - requireEqualEncoded(t, document.NewBoolValue(i > 0), val) + fn := func() error { + return idx.AscendGreaterOrEqual(test.pivot, func(val, rid []byte) error { + test.expectedEq(t, i, rid, val) + i++ + count++ + return nil + }) + } + if test.mustPanic { + // let's avoid panicking because expectedEq wasn't defined, which would + // be a false positive. + if test.expectedEq == nil { + test.expectedEq = func(t *testing.T, i uint8, key, val []byte) {} } - require.Equal(t, []byte{'a' + i}, rid) - - i += 2 - count++ - return nil - }) - require.NoError(t, err) - require.Equal(t, 5, count) + require.Panics(t, func() { _ = fn() }) + } else { + err := fn() + require.NoError(t, err) + require.Equal(t, test.expectedCount, count) + } }) } }) - - t.Run(text+"With pivot, should iterate over some documents in order", func(t *testing.T) { - idx, cleanup := getIndex(t, unique) - defer cleanup() - - for i := byte(0); i < 10; i += 2 { - require.NoError(t, idx.Set(document.NewTextValue(string([]byte{'A' + i})), []byte{'a' + i})) - } - - var i uint8 - var count int - pivot := document.NewTextValue("C") - err := idx.AscendGreaterOrEqual(pivot, func(val, rid []byte) error { - requireEqualEncoded(t, document.NewTextValue(string([]byte{'C' + i})), val) - require.Equal(t, []byte{'c' + i}, rid) - - i += 2 - count++ - return nil - }) - require.NoError(t, err) - require.Equal(t, 4, count) - }) - - t.Run(text+"With no pivot, should iterate over all documents in order, regardless of their type", func(t *testing.T) { - idx, cleanup := getIndex(t, unique) - defer cleanup() - - for i := int64(0); i < 10; i++ { - require.NoError(t, idx.Set(document.NewDoubleValue(float64(i)), []byte{'d', 'a' + byte(i)})) - require.NoError(t, idx.Set(document.NewTextValue(strconv.Itoa(int(i))), []byte{'s', 'a' + byte(i)})) - } - - var doubles, texts int - var count int - err := idx.AscendGreaterOrEqual(document.Value{}, func(val, rid []byte) error { - if count < 10 { - requireEqualEncoded(t, document.NewDoubleValue(float64(doubles)), val) - require.Equal(t, []byte{'d', 'a' + byte(doubles)}, rid) - doubles++ - } else { - requireEqualEncoded(t, document.NewTextValue(strconv.Itoa(int(texts))), val) - require.Equal(t, []byte{'s', 'a' + byte(texts)}, rid) - texts++ - } - count++ - return nil - }) - require.NoError(t, err) - require.Equal(t, 10, doubles) - require.Equal(t, 10, texts) - }) - - t.Run(text+"With no pivot and typed index, should iterate over all documents in order", func(t *testing.T) { - idx, cleanup := getIndex(t, unique) - idx.Info.Type = document.IntegerValue - defer cleanup() - - for i := int64(0); i < 10; i++ { - require.NoError(t, idx.Set(document.NewIntegerValue(i), []byte{'i', 'a' + byte(i)})) - } - - var ints int - err := idx.AscendGreaterOrEqual(document.Value{}, func(val, rid []byte) error { - enc, err := document.NewIntegerValue(int64(ints)).MarshalBinary() - require.NoError(t, err) - require.Equal(t, enc, val) - require.Equal(t, []byte{'i', 'a' + byte(ints)}, rid) - ints++ - - return nil - }) - require.NoError(t, err) - require.Equal(t, 10, ints) - }) } - - t.Run("Unique: false, Must iterate through similar values properly", func(t *testing.T) { - idx, cleanup := getIndex(t, false) - defer cleanup() - - for i := int64(0); i < 100; i++ { - require.NoError(t, idx.Set(document.NewIntegerValue(1), binarysort.AppendInt64(nil, i))) - require.NoError(t, idx.Set(document.NewTextValue("1"), binarysort.AppendInt64(nil, i))) - } - - var doubles, texts int - i := int64(0) - err := idx.AscendGreaterOrEqual(document.Value{Type: document.IntegerValue}, func(val, rid []byte) error { - requireEqualEncoded(t, document.NewIntegerValue(1), val) - require.Equal(t, binarysort.AppendInt64(nil, i), rid) - i++ - doubles++ - return nil - }) - require.NoError(t, err) - - i = 0 - err = idx.AscendGreaterOrEqual(document.Value{Type: document.TextValue}, func(val, rid []byte) error { - requireEqualEncoded(t, document.NewTextValue("1"), val) - require.Equal(t, binarysort.AppendInt64(nil, i), rid) - i++ - texts++ - return nil - }) - require.NoError(t, err) - require.Equal(t, 100, doubles) - require.Equal(t, 100, texts) - }) } func TestIndexDescendLessOrEqual(t *testing.T) { @@ -330,7 +1006,7 @@ func TestIndexDescendLessOrEqual(t *testing.T) { defer cleanup() i := 0 - err := idx.DescendLessOrEqual(document.Value{Type: document.IntegerValue}, func(val, key []byte) error { + err := idx.AscendGreaterOrEqual(values(document.Value{Type: document.IntegerValue}), func(val, key []byte) error { i++ return errors.New("should not iterate") }) @@ -338,80 +1014,718 @@ func TestIndexDescendLessOrEqual(t *testing.T) { require.Equal(t, 0, i) }) - t.Run(text+"With empty typed pivot, should iterate over all documents of the same type in reverse order", func(t *testing.T) { - idx, cleanup := getIndex(t, unique) - defer cleanup() - - for i := byte(0); i < 10; i += 2 { - require.NoError(t, idx.Set(document.NewIntegerValue(int64(i)), []byte{'a' + i})) + t.Run(text+"Should iterate through documents in order, ", func(t *testing.T) { + noiseBlob := func(i int) []document.Value { + t.Helper() + return []document.Value{document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10))} + } + noiseInts := func(i int) []document.Value { + t.Helper() + return []document.Value{document.NewIntegerValue(int64(i))} } - var i uint8 = 8 - var count int - err := idx.DescendLessOrEqual(document.Value{Type: document.IntegerValue}, func(val, key []byte) error { - requireEqualEncoded(t, document.NewIntegerValue(int64(i)), val) - require.Equal(t, []byte{'a' + i}, key) + noCallEq := func(t *testing.T, i uint8, key []byte, val []byte) { + require.Fail(t, "equality test should not be called here") + } - i -= 2 - count++ - return nil - }) - require.NoError(t, err) - require.Equal(t, 5, count) - }) + // the following tests will use that constant to determine how many values needs to be inserted + // with the value and noise generators. + total := 5 - t.Run(text+"With pivot, should iterate over some documents in order", func(t *testing.T) { - idx, cleanup := getIndex(t, unique) - defer cleanup() + tests := []struct { + name string + // the index type(s) that is being used + indexTypes []document.ValueType + // the pivot, typed or not used to iterate + pivot database.Pivot + // the generator for the values that are being indexed + val func(i int) []document.Value + // the generator for the noise values that are being indexed + noise func(i int) []document.Value + // the function to compare the key/value that the iteration yields + expectedEq func(t *testing.T, i uint8, key []byte, val []byte) + // the total count of iteration that should happen + expectedCount int + mustPanic bool + }{ + // integers --------------------------------------------------- + {name: "index=any, vals=integers, pivot=integer", + indexTypes: nil, + pivot: values(document.Value{Type: document.IntegerValue}), + val: func(i int) []document.Value { return values(document.NewIntegerValue(int64(i))) }, + noise: noiseBlob, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + require.Equal(t, []byte{'a' + i}, key) + requireEqualBinary(t, testutil.MakeArrayValue(t, int64(i)), val) + }, + expectedCount: 5, + }, + {name: "index=integer, vals=integers, pivot=integer", + indexTypes: []document.ValueType{document.IntegerValue}, + pivot: values(document.Value{Type: document.IntegerValue}), + val: func(i int) []document.Value { return values(document.NewIntegerValue(int64(i))) }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{true, document.NewIntegerValue(int64(i))}, + )(val) + }, + expectedCount: 5, + }, + {name: "index=any, vals=integers, pivot=integer:2", + indexTypes: nil, + pivot: values(document.NewIntegerValue(2)), + val: func(i int) []document.Value { return values(document.NewIntegerValue(int64(i))) }, + noise: noiseBlob, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 2 + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + )(val) + }, + expectedCount: 3, + }, + {name: "index=any, vals=integers, pivot=integer:-10", + indexTypes: nil, + pivot: values(document.NewIntegerValue(-10)), + val: func(i int) []document.Value { return values(document.NewIntegerValue(int64(i))) }, + noise: noiseBlob, + expectedEq: noCallEq, + expectedCount: 0, + }, + {name: "index=integer, vals=integers, pivot=integer:2", + indexTypes: []document.ValueType{document.IntegerValue}, + pivot: values(document.NewIntegerValue(2)), + val: func(i int) []document.Value { return values(document.NewIntegerValue(int64(i))) }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 2 + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{true, document.NewIntegerValue(int64(i))}, + )(val) + }, + expectedCount: 3, + }, + {name: "index=integer, vals=integers, pivot=double", + indexTypes: []document.ValueType{document.IntegerValue}, + pivot: values(document.Value{Type: document.DoubleValue}), + val: func(i int) []document.Value { return values(document.NewIntegerValue(int64(i))) }, + expectedEq: noCallEq, + expectedCount: 0, + }, + + // doubles ---------------------------------------------------- + {name: "index=any, vals=doubles, pivot=double", + indexTypes: nil, + pivot: values(document.Value{Type: document.DoubleValue}), + val: func(i int) []document.Value { return values(document.NewDoubleValue(float64(i) + float64(i)/2)) }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{false, document.NewDoubleValue(float64(i) + float64(i)/2)}, + )(val) + }, + expectedCount: 5, + }, + {name: "index=any, vals=doubles, pivot=double:1.8", + indexTypes: nil, + pivot: values(document.NewDoubleValue(1.8)), + val: func(i int) []document.Value { return values(document.NewDoubleValue(float64(i) + float64(i)/2)) }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 3 + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{false, document.NewDoubleValue(float64(i) + float64(i)/2)}, + )(val) + }, + expectedCount: 2, + }, + {name: "index=double, vals=doubles, pivot=double:1.8", + indexTypes: []document.ValueType{document.DoubleValue}, + pivot: values(document.NewDoubleValue(1.8)), + val: func(i int) []document.Value { return values(document.NewDoubleValue(float64(i) + float64(i)/2)) }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 3 + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{true, document.NewDoubleValue(float64(i) + float64(i)/2)}, + )(val) + }, + expectedCount: 2, + }, + {name: "index=any, vals=doubles, pivot=double:-10.8", + indexTypes: nil, + pivot: values(document.NewDoubleValue(-10.8)), + val: func(i int) []document.Value { return values(document.NewDoubleValue(float64(i) + float64(i)/2)) }, + expectedEq: noCallEq, + expectedCount: 0, + }, + + // text ------------------------------------------------------- + {name: "index=any, vals=text pivot=text", + indexTypes: nil, + pivot: values(document.Value{Type: document.TextValue}), + val: func(i int) []document.Value { return values(document.NewTextValue(strconv.Itoa(i))) }, + noise: noiseInts, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{false, document.NewTextValue(strconv.Itoa(int(i)))}, + )(val) + + }, + expectedCount: 5, + }, + {name: "index=any, vals=text, pivot=text('2')", + indexTypes: nil, + pivot: values(document.NewTextValue("2")), + val: func(i int) []document.Value { return values(document.NewTextValue(strconv.Itoa(i))) }, + noise: noiseInts, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 2 + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{false, document.NewTextValue(strconv.Itoa(int(i)))}, + )(val) + }, + expectedCount: 3, + }, + {name: "index=any, vals=text, pivot=text('')", + indexTypes: nil, + pivot: values(document.NewTextValue("")), + val: func(i int) []document.Value { return values(document.NewTextValue(strconv.Itoa(i))) }, + noise: noiseInts, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{false, document.NewTextValue(strconv.Itoa(int(i)))}, + )(val) + }, + expectedCount: 5, + }, + {name: "index=any, vals=text, pivot=text('foo')", + indexTypes: nil, + pivot: values(document.NewTextValue("foo")), + val: func(i int) []document.Value { return values(document.NewTextValue(strconv.Itoa(i))) }, + noise: noiseInts, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{false, document.NewTextValue(strconv.Itoa(int(i)))}, + )(val) + }, + expectedCount: 5, + }, + {name: "index=text, vals=text, pivot=text('2')", + indexTypes: []document.ValueType{document.TextValue}, + pivot: values(document.NewTextValue("2")), + val: func(i int) []document.Value { return values(document.NewTextValue(strconv.Itoa(i))) }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 2 + require.Equal(t, []byte{'a' + i}, key) + requireIdxEncodedEq(t, + encValue{true, document.NewTextValue(strconv.Itoa(int(i)))}, + )(val) + }, + expectedCount: 3, + }, + // composite -------------------------------------------------- + // composite indexes can have empty pivot values to iterate on the whole indexed data + {name: "index=[any, untyped], vals=[int, int], pivot=[nil,nil]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.Value{}, document.Value{}), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 5, + }, + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[int]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.Value{Type: document.IntegerValue}), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 5, + }, + // composite indexes cannot have values with type past the first element + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[int, int]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.Value{Type: document.IntegerValue}, document.Value{Type: document.IntegerValue}), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + mustPanic: true, + }, + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[0, int, 0]", + indexTypes: []document.ValueType{0, 0, 0}, + pivot: values(document.NewIntegerValue(0), document.Value{Type: document.IntegerValue}, document.NewIntegerValue(0)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1)), document.NewIntegerValue(int64(i+1))) + }, + mustPanic: true, + }, + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[int, 0]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.Value{Type: document.IntegerValue}, document.NewIntegerValue(0)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + mustPanic: true, + }, + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[0, 0]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.NewIntegerValue(0), document.NewIntegerValue(0)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + noise: func(i int) []document.Value { + return values(document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10)), document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10))) + }, + expectedEq: noCallEq, + expectedCount: 0, + }, + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[5, 5]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.NewIntegerValue(5), document.NewIntegerValue(5)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + noise: func(i int) []document.Value { + return values(document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10)), document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 5, + }, + // [0,1], [1,2], --[2,0]--, [2,3], [3,4], [4,5] + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[2, 0]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.NewIntegerValue(2), document.NewIntegerValue(0)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + noise: func(i int) []document.Value { + return values(document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10)), document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 3 + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 2, + }, + // [0,1], [1,2], [2,3], --[2,int]--, [3,4], [4,5] + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[2, int]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.NewIntegerValue(2), document.Value{Type: document.IntegerValue}), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + noise: func(i int) []document.Value { + return values(document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10)), document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 2 + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 3, + }, + // pivot [2, int] should filter out [2, not(int)] + // [0,1], [1,2], [2,3], --[2,int]--, [2, text], [3,4], [3,text], [4,5], [4,text] + {name: "index=[any, untyped], vals=[int, int], noise=[int, text], pivot=[2, int]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.NewIntegerValue(2), document.Value{Type: document.IntegerValue}), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + noise: func(i int) []document.Value { + if i > 1 { + return values(document.NewIntegerValue(int64(i)), document.NewTextValue("foo")) + } - for i := byte(0); i < 10; i++ { - require.NoError(t, idx.Set(document.NewTextValue(string([]byte{'A' + i})), []byte{'a' + i})) + return nil + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 2 + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 3, + }, + // a more subtle case + {name: "index=[any, untyped], vals=[int, blob], noise=[blob, blob], pivot=[2, 'a']", // pivot is [2, a] but value is [2, c] but that must work anyway + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.NewIntegerValue(2), document.NewBlobValue([]byte{byte('a')})), + val: func(i int) []document.Value { + return values( + document.NewIntegerValue(int64(i)), + document.NewBlobValue([]byte{byte('a' + uint8(i))}), + ) + }, + noise: func(i int) []document.Value { + return values( + document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10)), + document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10)), + ) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 3 + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + encValue{false, document.NewBlobValue([]byte{byte('a' + uint8(i))})}, + )(val) + }, + expectedCount: 2, + }, + // only one of the indexed value is typed + {name: "index=[any, blob], vals=[int, blob], noise=[blob, blob], pivot=[2, 'a']", // pivot is [2, a] but value is [2, c] but that must work anyway + indexTypes: []document.ValueType{0, document.BlobValue}, + pivot: values(document.NewIntegerValue(2), document.NewBlobValue([]byte{byte('a')})), + val: func(i int) []document.Value { + return values( + document.NewIntegerValue(int64(i)), + document.NewBlobValue([]byte{byte('a' + uint8(i))}), + ) + }, + noise: func(i int) []document.Value { + return values( + document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10)), + document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10)), + ) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 3 + requireIdxEncodedEq(t, + encValue{false, document.NewIntegerValue(int64(i))}, + encValue{true, document.NewBlobValue([]byte{byte('a' + uint8(i))})}, + )(val) + }, + expectedCount: 2, + }, + // partial pivot + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[0]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.NewIntegerValue(0)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + noise: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + // let's not try to match, it's not important + }, + expectedCount: 2, // [0] is "equal" to [0, 1] and [0, "1"] + }, + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[5]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.NewIntegerValue(5)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + noise: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + // let's not try to match, it's not important + }, + expectedCount: 10, + }, + {name: "index=[any, untyped], vals=[int, int], noise=[blob, blob], pivot=[2]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(document.NewIntegerValue(2)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + noise: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewBlobValue(strconv.AppendInt(nil, int64(i), 10))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + // let's not try to match, it's not important + }, + expectedCount: 6, // total * 2 - (noise + val = 2) * 2 + }, + // index typed + {name: "index=[int, int], vals=[int, int], pivot=[0, 0]", + indexTypes: []document.ValueType{document.IntegerValue, document.IntegerValue}, + pivot: values(document.NewIntegerValue(0), document.NewIntegerValue(0)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + expectedEq: noCallEq, + expectedCount: 0, + }, + {name: "index=[int, int], vals=[int, int], pivot=[5, 6]", + indexTypes: []document.ValueType{document.IntegerValue, document.IntegerValue}, + pivot: values(document.NewIntegerValue(5), document.NewIntegerValue(6)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + requireIdxEncodedEq(t, + encValue{true, document.NewIntegerValue(int64(i))}, + encValue{true, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 5, + }, + {name: "index=[int, int], vals=[int, int], pivot=[2, 0]", + indexTypes: []document.ValueType{document.IntegerValue, document.IntegerValue}, + pivot: values(document.NewIntegerValue(2), document.NewIntegerValue(0)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 3 + requireIdxEncodedEq(t, + encValue{true, document.NewIntegerValue(int64(i))}, + encValue{true, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 2, + }, + // a more subtle case + {name: "index=[int, blob], vals=[int, blob], pivot=[2, 'a']", // pivot is [2, a] but value is [2, c] but that must work anyway + indexTypes: []document.ValueType{document.IntegerValue, document.BlobValue}, + pivot: values(document.NewIntegerValue(2), document.NewBlobValue([]byte{byte('a')})), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewBlobValue([]byte{byte('a' + uint8(i))})) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 3 + requireIdxEncodedEq(t, + encValue{true, document.NewIntegerValue(int64(i))}, + encValue{true, document.NewBlobValue([]byte{byte('a' + uint8(i))})}, + )(val) + }, + expectedCount: 2, + }, + // partial pivot + {name: "index=[int, int], vals=[int, int], pivot=[0]", + indexTypes: []document.ValueType{document.IntegerValue, document.IntegerValue}, + pivot: values(document.NewIntegerValue(0)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 4 + requireIdxEncodedEq(t, + encValue{true, document.NewIntegerValue(int64(i))}, + encValue{true, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 1, + }, + // [0,1], [1,2], [2,3], --[2]--, [3,4], [4,5] + {name: "index=[int, int], vals=[int, int], pivot=[2]", + indexTypes: []document.ValueType{document.IntegerValue, document.IntegerValue}, + pivot: values(document.NewIntegerValue(2)), + val: func(i int) []document.Value { + return values(document.NewIntegerValue(int64(i)), document.NewIntegerValue(int64(i+1))) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 2 + requireIdxEncodedEq(t, + encValue{true, document.NewIntegerValue(int64(i))}, + encValue{true, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 3, + }, + // documents -------------------------------------------------- + {name: "index=[any, any], vals=[doc, int], pivot=[{a:2}, 3]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values( + document.NewDocumentValue(testutil.MakeDocument(t, `{"a":2}`)), + document.NewIntegerValue(int64(3)), + ), + val: func(i int) []document.Value { + return values( + document.NewDocumentValue(testutil.MakeDocument(t, `{"a":`+strconv.Itoa(i)+`}`)), + document.NewIntegerValue(int64(i+1)), + ) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 2 + requireIdxEncodedEq(t, + encValue{false, document.NewDocumentValue(testutil.MakeDocument(t, `{"a":`+strconv.Itoa(int(i))+`}`))}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 3, + }, + {name: "index=[document, int], vals=[doc, int], pivot=[{a:2}, 3]", + indexTypes: []document.ValueType{document.DocumentValue, document.IntegerValue}, + pivot: values( + document.NewDocumentValue(testutil.MakeDocument(t, `{"a":2}`)), + document.NewIntegerValue(int64(3)), + ), + val: func(i int) []document.Value { + return values( + document.NewDocumentValue(testutil.MakeDocument(t, `{"a":`+strconv.Itoa(i)+`}`)), + document.NewIntegerValue(int64(i+1)), + ) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 2 + requireIdxEncodedEq(t, + encValue{true, document.NewDocumentValue(testutil.MakeDocument(t, `{"a":`+strconv.Itoa(int(i))+`}`))}, + encValue{true, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 3, + }, + + // arrays ----------------------------------------------------- + {name: "index=[any, any], vals=[int[], int], pivot=[]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values(), + val: func(i int) []document.Value { + return values( + testutil.MakeArrayValue(t, i, i), + document.NewIntegerValue(int64(i+1)), + ) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + requireIdxEncodedEq(t, + encValue{false, testutil.MakeArrayValue(t, i, i)}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 5, + }, + {name: "index=[any, any], vals=[int[], int], pivot=[[2,2], 3]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values( + testutil.MakeArrayValue(t, 2, 2), + document.NewIntegerValue(int64(3)), + ), + val: func(i int) []document.Value { + return values( + testutil.MakeArrayValue(t, i, i), + document.NewIntegerValue(int64(i+1)), + ) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 2 + requireIdxEncodedEq(t, + encValue{false, testutil.MakeArrayValue(t, i, i)}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 3, + }, + {name: "index=[any, any], vals=[int[], int[]], pivot=[[2,2], [3,3]]", + indexTypes: []document.ValueType{document.AnyType, document.AnyType}, + pivot: values( + testutil.MakeArrayValue(t, 2, 2), + testutil.MakeArrayValue(t, 3, 3), + ), + val: func(i int) []document.Value { + return values( + testutil.MakeArrayValue(t, i, i), + testutil.MakeArrayValue(t, i+1, i+1), + ) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 2 + requireIdxEncodedEq(t, + encValue{false, testutil.MakeArrayValue(t, i, i)}, + encValue{false, testutil.MakeArrayValue(t, i+1, i+1)}, + )(val) + }, + expectedCount: 3, + }, + {name: "index=[array, any], vals=[int[], int], pivot=[[2,2], 3]", + indexTypes: []document.ValueType{document.ArrayValue, document.AnyType}, + pivot: values( + testutil.MakeArrayValue(t, 2, 2), + document.NewIntegerValue(int64(3)), + ), + val: func(i int) []document.Value { + return values( + testutil.MakeArrayValue(t, i, i), + document.NewIntegerValue(int64(i+1)), + ) + }, + expectedEq: func(t *testing.T, i uint8, key []byte, val []byte) { + i -= 2 + requireIdxEncodedEq(t, + encValue{true, testutil.MakeArrayValue(t, i, i)}, + encValue{false, document.NewIntegerValue(int64(i + 1))}, + )(val) + }, + expectedCount: 3, + }, } - var i byte = 0 - var count int - pivot := document.NewTextValue("F") - err := idx.DescendLessOrEqual(pivot, func(val, rid []byte) error { - requireEqualEncoded(t, document.NewTextValue(string([]byte{'F' - i})), val) - require.Equal(t, []byte{'f' - i}, rid) + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + idx, cleanup := getIndex(t, unique, test.indexTypes...) + defer cleanup() - i++ - count++ - return nil - }) - require.NoError(t, err) - require.Equal(t, 6, count) - }) + for i := 0; i < total; i++ { + require.NoError(t, idx.Set(test.val(i), []byte{'a' + byte(i)})) + if test.noise != nil { + v := test.noise(i) + if v != nil { + require.NoError(t, idx.Set(test.noise(i), []byte{'a' + byte(i)})) + } + } + } - t.Run(text+"With no pivot, should iterate over all documents in reverse order, regardless of their type", func(t *testing.T) { - idx, cleanup := getIndex(t, unique) - defer cleanup() + var i uint8 + var count int - for i := 0; i < 10; i++ { - require.NoError(t, idx.Set(document.NewIntegerValue(int64(i)), []byte{'i', 'a' + byte(i)})) - require.NoError(t, idx.Set(document.NewTextValue(strconv.Itoa(i)), []byte{'s', 'a' + byte(i)})) + fn := func() error { + t.Helper() + return idx.DescendLessOrEqual(test.pivot, func(val, rid []byte) error { + test.expectedEq(t, uint8(total-1)-i, rid, val) + i++ + count++ + return nil + }) + } + if test.mustPanic { + // let's avoid panicking because expectedEq wasn't defined, which would + // be a false positive. + if test.expectedEq == nil { + test.expectedEq = func(t *testing.T, i uint8, key, val []byte) {} + } + require.Panics(t, func() { + _ = fn() + }) + } else { + err := fn() + require.NoError(t, err) + require.Equal(t, test.expectedCount, count) + } + }) } - - var ints, texts int = 9, 9 - var count int = 20 - err := idx.DescendLessOrEqual(document.Value{}, func(val, rid []byte) error { - if count > 10 { - requireEqualEncoded(t, document.NewTextValue(strconv.Itoa(int(texts))), val) - require.Equal(t, []byte{'s', 'a' + byte(texts)}, rid) - texts-- - } else { - requireEqualEncoded(t, document.NewIntegerValue(int64(ints)), val) - require.Equal(t, []byte{'i', 'a' + byte(ints)}, rid) - ints-- - } - - count-- - return nil - }) - require.NoError(t, err) - require.Equal(t, 0, count) - require.Equal(t, -1, ints) - require.Equal(t, -1, texts) }) } } @@ -428,7 +1742,7 @@ func BenchmarkIndexSet(b *testing.B) { b.StartTimer() for j := 0; j < size; j++ { k := fmt.Sprintf("name-%d", j) - idx.Set(document.NewTextValue(k), []byte(k)) + idx.Set(values(document.NewTextValue(k)), []byte(k)) } b.StopTimer() cleanup() @@ -446,12 +1760,56 @@ func BenchmarkIndexIteration(b *testing.B) { for i := 0; i < size; i++ { k := []byte(fmt.Sprintf("name-%d", i)) - _ = idx.Set(document.NewTextValue(string(k)), k) + _ = idx.Set(values(document.NewTextValue(string(k))), k) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = idx.AscendGreaterOrEqual(values(document.Value{Type: document.TextValue}), func(_, _ []byte) error { + return nil + }) + } + b.StopTimer() + }) + } +} + +// BenchmarkCompositeIndexSet benchmarks the Set method with 1, 10, 1000 and 10000 successive insertions. +func BenchmarkCompositeIndexSet(b *testing.B) { + for size := 10; size <= 10000; size *= 10 { + b.Run(fmt.Sprintf("%.05d", size), func(b *testing.B) { + b.ResetTimer() + b.StopTimer() + for i := 0; i < b.N; i++ { + idx, cleanup := getIndex(b, false, document.TextValue, document.TextValue) + + b.StartTimer() + for j := 0; j < size; j++ { + k := fmt.Sprintf("name-%d", j) + idx.Set(values(document.NewTextValue(k), document.NewTextValue(k)), []byte(k)) + } + b.StopTimer() + cleanup() + } + }) + } +} + +// BenchmarkCompositeIndexIteration benchmarks the iterarion of a cursor with 1, 10, 1000 and 10000 items. +func BenchmarkCompositeIndexIteration(b *testing.B) { + for size := 10; size <= 10000; size *= 10 { + b.Run(fmt.Sprintf("%.05d", size), func(b *testing.B) { + idx, cleanup := getIndex(b, false, document.AnyType, document.AnyType) + defer cleanup() + + for i := 0; i < size; i++ { + k := []byte(fmt.Sprintf("name-%d", i)) + _ = idx.Set(values(document.NewTextValue(string(k)), document.NewTextValue(string(k))), k) } b.ResetTimer() for i := 0; i < b.N; i++ { - _ = idx.AscendGreaterOrEqual(document.Value{Type: document.TextValue}, func(_, _ []byte) error { + _ = idx.AscendGreaterOrEqual(values(document.NewTextValue(""), document.NewTextValue("")), func(_, _ []byte) error { return nil }) } diff --git a/database/table.go b/database/table.go index f1046ec92..319b6f819 100644 --- a/database/table.go +++ b/database/table.go @@ -92,12 +92,18 @@ func (t *Table) Insert(d document.Document) (document.Document, error) { indexes := t.Indexes() for _, idx := range indexes { - v, err := idx.Info.Path.GetValueFromDocument(fb) - if err != nil { - v = document.NewNullValue() + vs := make([]document.Value, 0, len(idx.Info.Paths)) + + for _, path := range idx.Info.Paths { + v, err := path.GetValueFromDocument(fb) + if err != nil { + v = document.NewNullValue() + } + + vs = append(vs, v) } - err = idx.Set(v, key) + err = idx.Set(vs, key) if err != nil { if err == ErrIndexDuplicateValue { return nil, ErrDuplicateDocument @@ -136,12 +142,21 @@ func (t *Table) Delete(key []byte) error { indexes := t.Indexes() for _, idx := range indexes { - v, err := idx.Info.Path.GetValueFromDocument(d) - if err != nil { - return err + vs := make([]document.Value, 0, len(idx.Info.Paths)) + for _, path := range idx.Info.Paths { + v, err := path.GetValueFromDocument(d) + if err != nil { + if err == document.ErrFieldNotFound { + v = document.NewNullValue() + } else { + return err + } + } + + vs = append(vs, v) } - err = idx.Delete(v, key) + err = idx.Delete(vs, key) if err != nil { return err } @@ -179,12 +194,16 @@ func (t *Table) replace(indexes []*Index, key []byte, d document.Document) error // remove key from indexes for _, idx := range indexes { - v, err := idx.Info.Path.GetValueFromDocument(old) - if err != nil { - v = document.NewNullValue() + vs := make([]document.Value, 0, len(idx.Info.Paths)) + for _, path := range idx.Info.Paths { + v, err := path.GetValueFromDocument(old) + if err != nil { + v = document.NewNullValue() + } + vs = append(vs, v) } - err = idx.Delete(v, key) + err := idx.Delete(vs, key) if err != nil { return err } @@ -207,12 +226,17 @@ func (t *Table) replace(indexes []*Index, key []byte, d document.Document) error // update indexes for _, idx := range indexes { - v, err := idx.Info.Path.GetValueFromDocument(d) - if err != nil { - v = document.NewNullValue() + vs := make([]document.Value, 0, len(idx.Info.Paths)) + for _, path := range idx.Info.Paths { + v, err := path.GetValueFromDocument(d) + if err != nil { + v = document.NewNullValue() + } + + vs = append(vs, v) } - err = idx.Set(v, key) + err = idx.Set(vs, key) if err != nil { if err == ErrIndexDuplicateValue { return ErrDuplicateDocument @@ -350,7 +374,7 @@ func (t *Table) encodeValueToKey(info *TableInfo, v document.Value) ([]byte, err } // if a primary key was defined and the primary is typed, convert the value to the right type. - if !pk.Type.IsZero() { + if !pk.Type.IsAny() { v, err = v.CastAs(pk.Type) if err != nil { return nil, err @@ -400,7 +424,7 @@ func (t *Table) iterate(pivot document.Value, reverse bool, fn func(d document.D info := t.Info() // if there is a pivot, convert it to the right type - if !pivot.Type.IsZero() && pivot.V != nil { + if !pivot.Type.IsAny() && pivot.V != nil { var err error seek, err = t.encodeValueToKey(info, pivot) if err != nil { diff --git a/database/table_test.go b/database/table_test.go index a0606e93a..7438c99e6 100644 --- a/database/table_test.go +++ b/database/table_test.go @@ -295,7 +295,7 @@ func TestTableInsert(t *testing.T) { require.NoError(t, err) err = tx.CreateIndex(&database.IndexInfo{ - IndexName: "idxFoo", TableName: "test", Path: parsePath(t, "foo"), + IndexName: "idxFoo", TableName: "test", Paths: []document.Path{parsePath(t, "foo")}, }) require.NoError(t, err) idx, err := tx.GetIndex("idxFoo") @@ -318,7 +318,7 @@ func TestTableInsert(t *testing.T) { require.NoError(t, err) var count int - err = idx.AscendGreaterOrEqual(document.Value{}, func(val, k []byte) error { + err = idx.AscendGreaterOrEqual([]document.Value{document.Value{}}, func(val, k []byte) error { switch count { case 0: // key2, which doesn't countain the field must appear first in the next, @@ -639,18 +639,31 @@ func TestTableReplace(t *testing.T) { _, tx, cleanup := newTestTx(t) defer cleanup() - err := tx.CreateTable("test", nil) + err := tx.CreateTable("test1", nil) + require.NoError(t, err) + + err = tx.CreateTable("test2", nil) require.NoError(t, err) + // simple indexes err = tx.CreateIndex(&database.IndexInfo{ - Path: document.NewPath("a"), + Paths: []document.Path{document.NewPath("a")}, Unique: true, - TableName: "test", + TableName: "test1", IndexName: "idx_foo_a", }) require.NoError(t, err) - tb, err := tx.GetTable("test") + // composite indexes + err = tx.CreateIndex(&database.IndexInfo{ + Paths: []document.Path{document.NewPath("x"), document.NewPath("y")}, + Unique: true, + TableName: "test2", + IndexName: "idx_foo_x_y", + }) + require.NoError(t, err) + + tb, err := tx.GetTable("test1") require.NoError(t, err) // insert two different documents @@ -659,26 +672,63 @@ func TestTableReplace(t *testing.T) { d2, err := tb.Insert(testutil.MakeDocument(t, `{"a": 2, "b": 2}`)) require.NoError(t, err) - before := testutil.GetIndexContent(t, tx, "idx_foo_a") + beforeIdxA := testutil.GetIndexContent(t, tx, "idx_foo_a") - // replace doc 1 without modifying indexed key + // --- a + // replace d1 without modifying indexed key err = tb.Replace(d1.(document.Keyer).RawKey(), testutil.MakeDocument(t, `{"a": 1, "b": 3}`)) require.NoError(t, err) - // index should be the same as before - require.Equal(t, before, testutil.GetIndexContent(t, tx, "idx_foo_a")) - // replace doc 2 and modify indexed key + // indexes should be the same as before + require.Equal(t, beforeIdxA, testutil.GetIndexContent(t, tx, "idx_foo_a")) + + // replace d2 and modify indexed key err = tb.Replace(d2.(document.Keyer).RawKey(), testutil.MakeDocument(t, `{"a": 3, "b": 3}`)) require.NoError(t, err) - // index should be different for doc 2 + + // indexes should be different for d2 got := testutil.GetIndexContent(t, tx, "idx_foo_a") - require.Equal(t, before[0], got[0]) - require.NotEqual(t, before[1], got[1]) + require.Equal(t, beforeIdxA[0], got[0]) + require.NotEqual(t, beforeIdxA[1], got[1]) - // replace doc 1 with duplicate indexed key + // replace d1 with duplicate indexed key err = tb.Replace(d1.(document.Keyer).RawKey(), testutil.MakeDocument(t, `{"a": 3, "b": 3}`)) + + // index should be the same as before + require.Equal(t, database.ErrDuplicateDocument, err) + + // --- x, y + tb, err = tx.GetTable("test2") + require.NoError(t, err) + // insert two different documents + dc1, err := tb.Insert(testutil.MakeDocument(t, `{"x": 1, "y": 1, "z": 1}`)) + require.NoError(t, err) + dc2, err := tb.Insert(testutil.MakeDocument(t, `{"x": 2, "y": 2, "z": 2}`)) + require.NoError(t, err) + + beforeIdxXY := testutil.GetIndexContent(t, tx, "idx_foo_x_y") + // replace dc1 without modifying indexed key + err = tb.Replace(dc1.(document.Keyer).RawKey(), testutil.MakeDocument(t, `{"x": 1, "y": 1, "z": 2}`)) + require.NoError(t, err) + + // index should be the same as before + require.Equal(t, beforeIdxXY, testutil.GetIndexContent(t, tx, "idx_foo_x_y")) + + // replace dc2 and modify indexed key + err = tb.Replace(dc2.(document.Keyer).RawKey(), testutil.MakeDocument(t, `{"x": 3, "y": 3, "z": 3}`)) + require.NoError(t, err) + + // indexes should be different for d2 + got = testutil.GetIndexContent(t, tx, "idx_foo_x_y") + require.Equal(t, beforeIdxXY[0], got[0]) + require.NotEqual(t, beforeIdxXY[1], got[1]) + + // replace dc2 with duplicate indexed key + err = tb.Replace(dc1.(document.Keyer).RawKey(), testutil.MakeDocument(t, `{"x": 3, "y": 3, "z": 3}`)) + // index should be the same as before require.Equal(t, database.ErrDuplicateDocument, err) + }) } @@ -739,21 +789,28 @@ func TestTableIndexes(t *testing.T) { Unique: true, IndexName: "idx1a", TableName: "test1", - Path: parsePath(t, "a"), + Paths: []document.Path{parsePath(t, "a")}, }) require.NoError(t, err) err = tx.CreateIndex(&database.IndexInfo{ Unique: false, IndexName: "idx1b", TableName: "test1", - Path: parsePath(t, "b"), + Paths: []document.Path{parsePath(t, "b")}, + }) + require.NoError(t, err) + err = tx.CreateIndex(&database.IndexInfo{ + Unique: false, + IndexName: "idx1ab", + TableName: "test1", + Paths: []document.Path{parsePath(t, "a"), parsePath(t, "b")}, }) require.NoError(t, err) err = tx.CreateIndex(&database.IndexInfo{ Unique: false, - IndexName: "ifx2a", + IndexName: "idx2a", TableName: "test2", - Path: parsePath(t, "a"), + Paths: []document.Path{parsePath(t, "a")}, }) require.NoError(t, err) @@ -762,7 +819,7 @@ func TestTableIndexes(t *testing.T) { m := tb.Indexes() require.NoError(t, err) - require.Len(t, m, 2) + require.Len(t, m, 3) }) } diff --git a/document/array.go b/document/array.go index ec4ae8d57..312c23a72 100644 --- a/document/array.go +++ b/document/array.go @@ -96,6 +96,10 @@ func (vb *ValueBuffer) GetByIndex(i int) (Value, error) { // Len returns the length the of array func (vb *ValueBuffer) Len() int { + if vb == nil { + return 0 + } + return len(vb.Values) } @@ -246,6 +250,46 @@ func (vb *ValueBuffer) UnmarshalJSON(data []byte) error { return nil } +func (vb *ValueBuffer) Types() []ValueType { + types := make([]ValueType, len(vb.Values)) + + for i, v := range vb.Values { + types[i] = v.Type + } + + return types +} + +// IsEqual compares two ValueBuffer and returns true if and only if +// both each values and types are respectively equal. +func (vb *ValueBuffer) IsEqual(other *ValueBuffer) bool { + if vb.Len() != other.Len() { + return false + } + + // empty buffers are always equal eh + if vb.Len() == 0 && other.Len() == 0 { + return true + } + + otherTypes := other.Types() + types := vb.Types() + + for i, typ := range types { + if typ != otherTypes[i] { + return false + } + } + + for i, v := range vb.Values { + if eq, err := v.IsEqual(other.Values[i]); err != nil || !eq { + return false + } + } + + return true +} + type sortableArray struct { vb *ValueBuffer err error diff --git a/document/document.go b/document/document.go index 76d096e5e..386d63383 100644 --- a/document/document.go +++ b/document/document.go @@ -558,6 +558,12 @@ func (p Path) GetValueFromArray(a Array) (Value, error) { return p[1:].getValueFromValue(v) } +func (p Path) Clone() Path { + c := make(Path, len(p)) + copy(c, p) + return c +} + func (p Path) getValueFromValue(v Value) (Value, error) { switch v.Type { case DocumentValue: diff --git a/document/value.go b/document/value.go index 647bb3f16..f9d37aa17 100644 --- a/document/value.go +++ b/document/value.go @@ -37,6 +37,9 @@ type ValueType uint8 // These types are separated by family so that when // new types are introduced we don't need to modify them. const ( + // denote the absence of type + AnyType ValueType = 0x0 + NullValue ValueType = 0x80 BoolValue ValueType = 0x81 @@ -88,9 +91,9 @@ func (t ValueType) IsNumber() bool { return t == IntegerValue || t == DoubleValue } -// IsZero returns whether this is a valid type. -func (t ValueType) IsZero() bool { - return t == 0 +// IsAny returns whether this is type is Any or a real type +func (t ValueType) IsAny() bool { + return t == AnyType } // A Value stores encoded data alongside its type. diff --git a/document/value_encoding.go b/document/value_encoding.go index c8b9d47cf..a5bad3b2a 100644 --- a/document/value_encoding.go +++ b/document/value_encoding.go @@ -8,8 +8,12 @@ import ( ) const ( - arrayValueDelim = 0x1f - arrayEnd = 0x1e + // ArrayValueDelim is a separator used when encoding document.Array in + // binary reprsentation + ArrayValueDelim = 0x1f + // ArrayEnd is the final separator used when encoding document.Array in + // binary reprsentation. + ArrayEnd = 0x1e documentValueDelim = 0x1c documentEnd = 0x1d ) @@ -83,7 +87,7 @@ func (ve *ValueEncoder) appendValue(v Value) error { func (ve *ValueEncoder) appendArray(a Array) error { err := a.Iterate(func(i int, value Value) error { if i > 0 { - err := ve.append(arrayValueDelim) + err := ve.append(ArrayValueDelim) if err != nil { return err } @@ -95,7 +99,7 @@ func (ve *ValueEncoder) appendArray(a Array) error { return err } - return ve.append(arrayEnd) + return ve.append(ArrayEnd) } // appendDocument encodes a document into a sort-ordered binary representation. @@ -239,8 +243,8 @@ func decodeArray(data []byte) (Array, int, error) { var vb ValueBuffer var readCount int - for len(data) > 0 && data[0] != arrayEnd { - v, i, err := decodeValueUntil(data, arrayValueDelim, arrayEnd) + for len(data) > 0 && data[0] != ArrayEnd { + v, i, err := decodeValueUntil(data, ArrayValueDelim, ArrayEnd) if err != nil { return nil, i, err } @@ -248,7 +252,7 @@ func decodeArray(data []byte) (Array, int, error) { vb.Append(v) // skip the delimiter - if data[i] == arrayValueDelim { + if data[i] == ArrayValueDelim { i++ } diff --git a/expr/comparison.go b/expr/comparison.go index 1bfda6cf0..997cacb69 100644 --- a/expr/comparison.go +++ b/expr/comparison.go @@ -146,6 +146,12 @@ func IsComparisonOperator(op Operator) bool { return false } +// IsEqualOperator returns true if e is the = operator +func IsEqualOperator(op Operator) bool { + _, ok := op.(*EqOperator) + return ok +} + // IsAndOperator reports if e is the AND operator. func IsAndOperator(op Operator) bool { _, ok := op.(*AndOp) @@ -164,6 +170,12 @@ func IsInOperator(e Expr) bool { return ok } +// IsNotInOperator reports if e is the NOT IN operator. +func IsNotInOperator(e Expr) bool { + _, ok := e.(*NotInOperator) + return ok +} + type InOperator struct { *simpleOperator } diff --git a/planner/explain_test.go b/planner/explain_test.go index e76125b4c..32c839eff 100644 --- a/planner/explain_test.go +++ b/planner/explain_test.go @@ -13,28 +13,29 @@ func TestExplainStmt(t *testing.T) { fails bool expected string }{ - // {"EXPLAIN SELECT 1 + 1", false, `"project(1 + 1)"`}, - // {"EXPLAIN SELECT * FROM noexist", true, ``}, - // {"EXPLAIN SELECT * FROM test", false, `"seqScan(test)"`}, - // {"EXPLAIN SELECT *, a FROM test", false, `"seqScan(test) | project(*, a)"`}, - // {"EXPLAIN SELECT a + 1 FROM test", false, `"seqScan(test) | project(a + 1)"`}, - // {"EXPLAIN SELECT a + 1 FROM test WHERE c > 10", false, `"seqScan(test) | filter(c > 10) | project(a + 1)"`}, - // {"EXPLAIN SELECT a + 1 FROM test WHERE c > 10 AND d > 20", false, `"seqScan(test) | filter(c > 10) | filter(d > 20) | project(a + 1)"`}, - // {"EXPLAIN SELECT a + 1 FROM test WHERE c > 10 OR d > 20", false, `"seqScan(test) | filter(c > 10 OR d > 20) | project(a + 1)"`}, - // {"EXPLAIN SELECT a + 1 FROM test WHERE c IN [1 + 1, 2 + 2]", false, `"seqScan(test) | filter(c IN [2, 4]) | project(a + 1)"`}, + {"EXPLAIN SELECT 1 + 1", false, `"project(1 + 1)"`}, + {"EXPLAIN SELECT * FROM noexist", true, ``}, + {"EXPLAIN SELECT * FROM test", false, `"seqScan(test)"`}, + {"EXPLAIN SELECT *, a FROM test", false, `"seqScan(test) | project(*, a)"`}, + {"EXPLAIN SELECT a + 1 FROM test", false, `"seqScan(test) | project(a + 1)"`}, + {"EXPLAIN SELECT a + 1 FROM test WHERE c > 10", false, `"seqScan(test) | filter(c > 10) | project(a + 1)"`}, + {"EXPLAIN SELECT a + 1 FROM test WHERE c > 10 AND d > 20", false, `"seqScan(test) | filter(c > 10) | filter(d > 20) | project(a + 1)"`}, + {"EXPLAIN SELECT a + 1 FROM test WHERE c > 10 OR d > 20", false, `"seqScan(test) | filter(c > 10 OR d > 20) | project(a + 1)"`}, + {"EXPLAIN SELECT a + 1 FROM test WHERE c IN [1 + 1, 2 + 2]", false, `"seqScan(test) | filter(c IN [2, 4]) | project(a + 1)"`}, {"EXPLAIN SELECT a + 1 FROM test WHERE a > 10", false, `"indexScan(\"idx_a\", [10, -1, true]) | project(a + 1)"`}, - // {"EXPLAIN SELECT a + 1 FROM test WHERE a > 10 AND b > 20 AND c > 30", false, `"indexScan(\"idx_b\", [20, -1, true]) | filter(a > 10) | filter(c > 30) | project(a + 1)"`}, - // {"EXPLAIN SELECT a + 1 FROM test WHERE c > 30 ORDER BY d LIMIT 10 OFFSET 20", false, `"seqScan(test) | filter(c > 30) | project(a + 1) | sort(d) | skip(20) | take(10)"`}, - // {"EXPLAIN SELECT a + 1 FROM test WHERE c > 30 ORDER BY d DESC LIMIT 10 OFFSET 20", false, `"seqScan(test) | filter(c > 30) | project(a + 1) | sortReverse(d) | skip(20) | take(10)"`}, - // // {"EXPLAIN SELECT a + 1 FROM test WHERE c > 30 ORDER BY a DESC LIMIT 10 OFFSET 20", false, `"indexScanReverse(\"idx_a\") | filter(c > 30) | project(a + 1) | skip(20) | take(10)"`}, - // {"EXPLAIN SELECT a + 1 FROM test WHERE c > 30 ORDER BY a DESC LIMIT 10 OFFSET 20", false, `"seqScan(test) | filter(c > 30) | project(a + 1) | sortReverse(a) | skip(20) | take(10)"`}, - // {"EXPLAIN SELECT a + 1 FROM test WHERE c > 30 GROUP BY a + 1 ORDER BY a DESC LIMIT 10 OFFSET 20", false, `"seqScan(test) | filter(c > 30) | groupBy(a + 1) | hashAggregate() | project(a + 1) | sortReverse(a) | skip(20) | take(10)"`}, - // {"EXPLAIN UPDATE test SET a = 10", false, `"seqScan(test) | set(a, 10) | tableReplace('test')"`}, - // {"EXPLAIN UPDATE test SET a = 10 WHERE c > 10", false, `"seqScan(test) | filter(c > 10) | set(a, 10) | tableReplace('test')"`}, - // {"EXPLAIN UPDATE test SET a = 10 WHERE a > 10", false, `"indexScan(\"idx_a\", [10, -1, true]) | set(a, 10) | tableReplace('test')"`}, - // {"EXPLAIN DELETE FROM test", false, `"seqScan(test) | tableDelete('test')"`}, - // {"EXPLAIN DELETE FROM test WHERE c > 10", false, `"seqScan(test) | filter(c > 10) | tableDelete('test')"`}, - // {"EXPLAIN DELETE FROM test WHERE a > 10", false, `"indexScan(\"idx_a\", [10, -1, true]) | tableDelete('test')"`}, + {"EXPLAIN SELECT a + 1 FROM test WHERE x = 10 AND y > 5", false, `"indexScan(\"idx_x_y\", [[10, 5], -1, true]) | project(a + 1)"`}, + {"EXPLAIN SELECT a + 1 FROM test WHERE a > 10 AND b > 20 AND c > 30", false, `"indexScan(\"idx_b\", [20, -1, true]) | filter(a > 10) | filter(c > 30) | project(a + 1)"`}, + {"EXPLAIN SELECT a + 1 FROM test WHERE c > 30 ORDER BY d LIMIT 10 OFFSET 20", false, `"seqScan(test) | filter(c > 30) | project(a + 1) | sort(d) | skip(20) | take(10)"`}, + {"EXPLAIN SELECT a + 1 FROM test WHERE c > 30 ORDER BY d DESC LIMIT 10 OFFSET 20", false, `"seqScan(test) | filter(c > 30) | project(a + 1) | sortReverse(d) | skip(20) | take(10)"`}, + // {"EXPLAIN SELECT a + 1 FROM test WHERE c > 30 ORDER BY a DESC LIMIT 10 OFFSET 20", false, `"indexScanReverse(\"idx_a\") | filter(c > 30) | project(a + 1) | skip(20) | take(10)"`}, + {"EXPLAIN SELECT a + 1 FROM test WHERE c > 30 ORDER BY a DESC LIMIT 10 OFFSET 20", false, `"seqScan(test) | filter(c > 30) | project(a + 1) | sortReverse(a) | skip(20) | take(10)"`}, + {"EXPLAIN SELECT a + 1 FROM test WHERE c > 30 GROUP BY a + 1 ORDER BY a DESC LIMIT 10 OFFSET 20", false, `"seqScan(test) | filter(c > 30) | groupBy(a + 1) | hashAggregate() | project(a + 1) | sortReverse(a) | skip(20) | take(10)"`}, + {"EXPLAIN UPDATE test SET a = 10", false, `"seqScan(test) | set(a, 10) | tableReplace('test')"`}, + {"EXPLAIN UPDATE test SET a = 10 WHERE c > 10", false, `"seqScan(test) | filter(c > 10) | set(a, 10) | tableReplace('test')"`}, + {"EXPLAIN UPDATE test SET a = 10 WHERE a > 10", false, `"indexScan(\"idx_a\", [10, -1, true]) | set(a, 10) | tableReplace('test')"`}, + {"EXPLAIN DELETE FROM test", false, `"seqScan(test) | tableDelete('test')"`}, + {"EXPLAIN DELETE FROM test WHERE c > 10", false, `"seqScan(test) | filter(c > 10) | tableDelete('test')"`}, + {"EXPLAIN DELETE FROM test WHERE a > 10", false, `"indexScan(\"idx_a\", [10, -1, true]) | tableDelete('test')"`}, } for _, test := range tests { @@ -48,6 +49,7 @@ func TestExplainStmt(t *testing.T) { err = db.Exec(` CREATE INDEX idx_a ON test (a); CREATE UNIQUE INDEX idx_b ON test (b); + CREATE INDEX idx_x_y ON test (x, y); `) require.NoError(t, err) diff --git a/planner/optimizer.go b/planner/optimizer.go index 0fcc2aabc..8312df3e7 100644 --- a/planner/optimizer.go +++ b/planner/optimizer.go @@ -382,16 +382,24 @@ func isProjectionUnique(indexes database.Indexes, po *stream.ProjectOperator, pk return true } -// UseIndexBasedOnFilterNodeRule scans the tree for the first filter node whose condition is an -// operator that satisfies the following criterias: +type filterNode struct { + path document.Path + v document.Value + f *stream.FilterOperator +} + +// UseIndexBasedOnFilterNodeRule scans the tree for filter nodes whose conditions are +// operators that satisfies the following criterias: // - is a comparison operator // - one of its operands is a path expression that is indexed // - the other operand is a literal value or a parameter -// If found, it will replace the input node by an indexInputNode using this index. +// +// If one or many are found, it will replace the input node by an indexInputNode using this index, +// removing the now irrelevant filter nodes. +// // TODO(asdine): add support for ORDER BY +// TODO(jh): clarify cost code in composite indexes case func UseIndexBasedOnFilterNodeRule(s *stream.Stream, tx *database.Transaction, params []expr.Param) (*stream.Stream, error) { - n := s.Op - // first we lookup for the seq scan node. // Here we will assume that at this point // if there is one it has to be the @@ -413,20 +421,191 @@ func UseIndexBasedOnFilterNodeRule(s *stream.Stream, tx *database.Transaction, p indexes := t.Indexes() var candidates []*candidate + var filterNodes []filterNode - // look for all selection nodes that satisfy our requirements - for n != nil { + // then we collect all usable filter nodes, in order to see what index (or PK) can be + // used to replace them. + for n := s.Op; n != nil; n = n.GetPrev() { if f, ok := n.(*stream.FilterOperator); ok { - candidate, err := getCandidateFromfilterNode(f, st.TableName, info, indexes) - if err != nil { - return nil, err + if f.E == nil { + continue + } + + op, ok := f.E.(expr.Operator) + if !ok { + continue + } + + if !expr.OperatorIsIndexCompatible(op) { + continue + } + + // determine if the operator could benefit from an index + ok, path, e := operatorCanUseIndex(op) + if !ok { + continue + } + + ev, ok := e.(expr.LiteralValue) + if !ok { + continue + } + + v := document.Value(ev) + + filterNodes = append(filterNodes, filterNode{path: path, v: v, f: f}) + + // check for primary keys scan while iterating on the filter nodes + if pk := info.GetPrimaryKey(); pk != nil && pk.Path.IsEqual(path) { + // if both types are different, don't select this scanner + v, ok, err := operandCanUseIndex(pk.Type, pk.Path, info.FieldConstraints, v) + if err != nil { + return nil, err + } + + if !ok { + continue + } else { + cd := candidate{ + filterOps: []*stream.FilterOperator{f}, + isPk: true, + priority: 3, + } + + ranges, err := getRangesFromOp(op, v) + if err != nil { + return nil, err + } + + cd.newOp = stream.PkScan(st.TableName, ranges...) + cd.cost = ranges.Cost() + + candidates = append(candidates, &cd) + } } - if candidate != nil { - candidates = append(candidates, candidate) + } + } + + findByPath := func(path document.Path) *filterNode { + for _, fno := range filterNodes { + if fno.path.IsEqual(path) { + return &fno } } - n = n.GetPrev() + return nil + } + + isNodeEq := func(fno *filterNode) bool { + op := fno.f.E.(expr.Operator) + return expr.IsEqualOperator(op) || expr.IsInOperator(op) + } + isNodeComp := func(fno *filterNode) bool { + op := fno.f.E.(expr.Operator) + return expr.IsComparisonOperator(op) + } + + // iterate on all indexes for that table, checking for each of them if its paths are matching + // the filter nodes of the given query. The resulting nodes are ordered like the index paths. +outer: + for _, idx := range indexes { + // order filter nodes by how the index paths order them; if absent, nil in still inserted + found := make([]*filterNode, len(idx.Info.Paths)) + for i, path := range idx.Info.Paths { + fno := findByPath(path) + + if fno != nil { + // mark this path from the index as found + found[i] = fno + } + } + + // Iterate on all the nodes for the given index, checking for each of its path, their is a corresponding node. + // It's possible for an index to be selected if not all of its paths are covered by the nodes, if and only if + // those are contiguous, relatively to the paths, i.e: + // - given idx_foo_abc(a, b, c) + // - given a query SELECT ... WHERE a = 1 AND b > 2 + // - the paths a and b are contiguous in the index definition, this index can be used + // - given a query SELECT ... WHERE a = 1 AND c > 2 + // - the paths a and c are not contiguous in the index definition, this index cannot be used for both values + // but it will be used with a and c with a normal filter node. + var fops []*stream.FilterOperator + var usableFilterNodes []*filterNode + contiguous := true + for i, fno := range found { + if contiguous { + if fno == nil { + contiguous = false + continue + } + + // is looking ahead at the next node possible? + if i+1 < len(found) { + // is there another node found after this one? + if found[i+1] != nil { + // current one must be an eq node then + if !isNodeEq(fno) { + continue outer + } + } else { + // the next node is the last one found, so the current one can also be a comparison and not just eq + if !isNodeComp(fno) { + continue outer + } + } + } else { + // that's the last filter node, it can be a comparison, + if !isNodeComp(fno) { + continue outer + } + } + + // what the index says this node type must be + typ := idx.Info.Types[i] + + fno.v, ok, err = operandCanUseIndex(typ, fno.path, info.FieldConstraints, fno.v) + if err != nil { + return nil, err + } + if !ok { + continue outer + } + } else { + // if on the index idx_abc(a,b,c), a is found, b isn't but c is + // then idx_abc is valid but just with a, c will use a filter node instead + continue + } + + usableFilterNodes = append(usableFilterNodes, fno) + fops = append(fops, fno.f) + } + + // no nodes for the index has been found + if found[0] == nil { + continue outer + } + + cd := candidate{ + filterOps: fops, + isIndex: true, + } + + // there are probably less values to iterate on if the index is unique + if idx.Info.Unique { + cd.priority = 2 + } else { + cd.priority = 1 + } + + ranges, err := getRangesFromFilterNodes(usableFilterNodes) + if err != nil { + return nil, err + } + + cd.newOp = stream.IndexScan(idx.Info.IndexName, ranges...) + cd.cost = ranges.Cost() + + candidates = append(candidates, &cd) } // determine which index is the most interesting and replace it in the tree. @@ -444,15 +623,27 @@ func UseIndexBasedOnFilterNodeRule(s *stream.Stream, tx *database.Transaction, p continue } - if currentCost < cost { + // With the current cost be computing on ranges, it's a bit hard to know what's best in + // between indexes. So, before looking at the cost, we look at how many filter ops would + // be replaced. + if len(selectedCandidate.filterOps) < len(candidate.filterOps) { selectedCandidate = candidates[i] cost = currentCost - } + continue + } else if len(selectedCandidate.filterOps) == len(candidate.filterOps) { + if currentCost < cost { + selectedCandidate = candidates[i] + cost = currentCost + continue + } - // if the cost is the same and the candidate's related index has a higher priority, - // select it. - if currentCost == cost && selectedCandidate.priority < candidate.priority { - selectedCandidate = candidates[i] + // if the cost is the same and the candidate's related index has a higher priority, + // select it. + if currentCost == cost { + if selectedCandidate.priority < candidate.priority { + selectedCandidate = candidates[i] + } + } } } @@ -461,7 +652,9 @@ func UseIndexBasedOnFilterNodeRule(s *stream.Stream, tx *database.Transaction, p } // remove the selection node from the tree - s.Remove(selectedCandidate.filterOp) + for _, f := range selectedCandidate.filterOps { + s.Remove(f) + } // we replace the seq scan node by the selected index scan node stream.InsertBefore(s.First(), selectedCandidate.newOp) @@ -472,9 +665,9 @@ func UseIndexBasedOnFilterNodeRule(s *stream.Stream, tx *database.Transaction, p } type candidate struct { - // filter operator to remove and replace by either an indexScan + // filter operators to remove and replace by either an indexScan // or pkScan operators. - filterOp *stream.FilterOperator + filterOps []*stream.FilterOperator // the candidate indexScan or pkScan operator newOp stream.Operator // the cost of the candidate @@ -488,91 +681,6 @@ type candidate struct { priority int } -// getCandidateFromfilterNode analyses f and determines if it can be replaced by an indexScan or pkScan operator. -func getCandidateFromfilterNode(f *stream.FilterOperator, tableName string, info *database.TableInfo, indexes database.Indexes) (*candidate, error) { - if f.E == nil { - return nil, nil - } - - // the root of the condition must be an operator - op, ok := f.E.(expr.Operator) - if !ok { - return nil, nil - } - - // determine if the operator can read from the index - if !expr.OperatorIsIndexCompatible(op) { - return nil, nil - } - - // determine if the operator can benefit from an index - ok, path, e := operatorCanUseIndex(op) - if !ok { - return nil, nil - } - - // analyse the other operand to make sure it's a literal - ev, ok := e.(expr.LiteralValue) - if !ok { - return nil, nil - } - v := document.Value(ev) - - // now, we look if an index exists for that path - cd := candidate{ - filterOp: f, - } - - // we'll start with checking if the path is the primary key of the table - if pk := info.GetPrimaryKey(); pk != nil && pk.Path.IsEqual(path) { - // check if the operand can be used and convert it when possible - v, ok, err := operandCanUseIndex(pk.Type, pk.Path, info.FieldConstraints, v) - if err != nil || !ok { - return nil, err - } - - cd.isPk = true - cd.priority = 3 - - ranges, err := getRangesFromOp(op, v) - if err != nil { - return nil, err - } - - cd.newOp = stream.PkScan(tableName, ranges...) - cd.cost = ranges.Cost() - return &cd, nil - } - - // if not, check if an index exists for that path - if idx := indexes.GetIndexByPath(document.Path(path)); idx != nil { - // check if the operand can be used and convert it when possible - v, ok, err := operandCanUseIndex(idx.Info.Type, idx.Info.Path, info.FieldConstraints, v) - if err != nil || !ok { - return nil, err - } - - cd.isIndex = true - if idx.Info.Unique { - cd.priority = 2 - } else { - cd.priority = 1 - } - - ranges, err := getRangesFromOp(op, v) - if err != nil { - return nil, err - } - - cd.newOp = stream.IndexScan(idx.Info.IndexName, ranges...) - cd.cost = ranges.Cost() - - return &cd, nil - } - - return nil, nil -} - func operatorCanUseIndex(op expr.Operator) (bool, document.Path, expr.Expr) { lf, leftIsField := op.LeftHand().(expr.Path) rf, rightIsField := op.RightHand().(expr.Path) @@ -620,7 +728,7 @@ func operandCanUseIndex(indexType document.ValueType, path document.Path, fc dat } // if the index is not typed, any operand can work - if indexType.IsZero() { + if indexType.IsAny() { return converted, true, nil } @@ -628,38 +736,135 @@ func operandCanUseIndex(indexType document.ValueType, path document.Path, fc dat return converted, indexType == converted.Type, nil } -func getRangesFromOp(op expr.Operator, v document.Value) (stream.Ranges, error) { - var ranges stream.Ranges +func getRangesFromFilterNodes(fnodes []*filterNode) (stream.IndexRanges, error) { + var ranges stream.IndexRanges + vb := document.NewValueBuffer() + // store IN operands with their position (in the index paths) as a key + inOperands := make(map[int]document.Array) + + for i, fno := range fnodes { + op := fno.f.E.(expr.Operator) + v := fno.v + + switch op.(type) { + case *expr.EqOperator, *expr.GtOperator, *expr.GteOperator, *expr.LtOperator, *expr.LteOperator: + vb = vb.Append(v) + case *expr.InOperator: + // mark where the IN operator values are supposed to go is in the buffer + // and what are the value needed to generate the ranges. + // operatorCanUseIndex made sure v is an array. + inOperands[i] = v.V.(document.Array) + + // placeholder for when we'll explode the IN operands in multiple ranges + vb = vb.Append(document.Value{}) + default: + panic(stringutil.Sprintf("unknown operator %#v", op)) + } + } + + if len(inOperands) > 1 { + // TODO FEATURE https://github.com/genjidb/genji/issues/392 + panic("unsupported operation: multiple IN operators on a composite index") + } + + // a small helper func to create a range based on an operator type + buildRange := func(op expr.Operator, vb *document.ValueBuffer) stream.IndexRange { + var rng stream.IndexRange + + switch op.(type) { + case *expr.EqOperator, *expr.InOperator: + rng.Exact = true + rng.Min = vb + case *expr.GtOperator: + rng.Exclusive = true + rng.Min = vb + case *expr.GteOperator: + rng.Min = vb + case *expr.LtOperator: + rng.Exclusive = true + rng.Max = vb + case *expr.LteOperator: + rng.Max = vb + } + + return rng + } + + // explode the IN operator values in multiple ranges + for pos, operands := range inOperands { + err := operands.Iterate(func(j int, value document.Value) error { + newVB := document.NewValueBuffer() + err := newVB.Copy(vb) + if err != nil { + return err + } + + // insert IN operand at the right position, replacing the placeholder value + newVB.Values[pos] = value + + // the last node is the only one that can be a comparison operator, so + // it's the one setting the range behaviour + last := fnodes[len(fnodes)-1] + op := last.f.E.(expr.Operator) + + rng := buildRange(op, newVB) + + ranges = ranges.Append(rng) + return nil + }) + + if err != nil { + return nil, err + } + } + + // Were there any IN operators requiring multiple ranges? + // If yes, we're done here. + if len(ranges) > 0 { + return ranges, nil + } + + // the last node is the only one that can be a comparison operator, so + // it's the one setting the range behaviour + last := fnodes[len(fnodes)-1] + op := last.f.E.(expr.Operator) + rng := buildRange(op, vb) + + return stream.IndexRanges{rng}, nil +} + +func getRangesFromOp(op expr.Operator, v document.Value) (stream.ValueRanges, error) { + var ranges stream.ValueRanges switch op.(type) { case *expr.EqOperator: - ranges = ranges.Append(stream.Range{ + ranges = ranges.Append(stream.ValueRange{ Min: v, Exact: true, }) case *expr.GtOperator: - ranges = ranges.Append(stream.Range{ + ranges = ranges.Append(stream.ValueRange{ Min: v, Exclusive: true, }) case *expr.GteOperator: - ranges = ranges.Append(stream.Range{ + ranges = ranges.Append(stream.ValueRange{ Min: v, }) case *expr.LtOperator: - ranges = ranges.Append(stream.Range{ + ranges = ranges.Append(stream.ValueRange{ Max: v, Exclusive: true, }) case *expr.LteOperator: - ranges = ranges.Append(stream.Range{ + ranges = ranges.Append(stream.ValueRange{ Max: v, }) case *expr.InOperator: - // opCanUseIndex made sure e is an array. + // operatorCanUseIndex made sure e is an array. a := v.V.(document.Array) err := a.Iterate(func(i int, value document.Value) error { - ranges = ranges.Append(stream.Range{ + ranges = ranges.Append(stream.ValueRange{ Min: value, Exact: true, }) diff --git a/planner/optimizer_test.go b/planner/optimizer_test.go index 2dacce5ae..7a85c3534 100644 --- a/planner/optimizer_test.go +++ b/planner/optimizer_test.go @@ -300,7 +300,8 @@ func TestRemoveUnnecessaryDedupNodeRule(t *testing.T) { } } -func TestUseIndexBasedOnSelectionNodeRule(t *testing.T) { +func TestUseIndexBasedOnSelectionNodeRule_Simple(t *testing.T) { + newVB := document.NewValueBuffer tests := []struct { name string root, expected *st.Stream @@ -313,22 +314,22 @@ func TestUseIndexBasedOnSelectionNodeRule(t *testing.T) { { "FROM foo WHERE a = 1", st.New(st.SeqScan("foo")).Pipe(st.Filter(parser.MustParseExpr("a = 1"))), - st.New(st.IndexScan("idx_foo_a", st.Range{Min: document.NewIntegerValue(1), Exact: true})), + st.New(st.IndexScan("idx_foo_a", st.IndexRange{Min: newVB(document.NewIntegerValue(1)), Exact: true})), }, { "FROM foo WHERE a = 1 AND b = 2", st.New(st.SeqScan("foo")). Pipe(st.Filter(parser.MustParseExpr("a = 1"))). Pipe(st.Filter(parser.MustParseExpr("b = 2"))), - st.New(st.IndexScan("idx_foo_b", st.Range{Min: document.NewIntegerValue(2), Exact: true})). - Pipe(st.Filter(parser.MustParseExpr("a = 1"))), + st.New(st.IndexScan("idx_foo_a", st.IndexRange{Min: newVB(document.NewIntegerValue(1)), Exact: true})). + Pipe(st.Filter(parser.MustParseExpr("b = 2"))), }, { "FROM foo WHERE c = 3 AND b = 2", st.New(st.SeqScan("foo")). Pipe(st.Filter(parser.MustParseExpr("c = 3"))). Pipe(st.Filter(parser.MustParseExpr("b = 2"))), - st.New(st.IndexScan("idx_foo_c", st.Range{Min: document.NewIntegerValue(3), Exact: true})). + st.New(st.IndexScan("idx_foo_c", st.IndexRange{Min: newVB(document.NewIntegerValue(3)), Exact: true})). Pipe(st.Filter(parser.MustParseExpr("b = 2"))), }, { @@ -336,7 +337,7 @@ func TestUseIndexBasedOnSelectionNodeRule(t *testing.T) { st.New(st.SeqScan("foo")). Pipe(st.Filter(parser.MustParseExpr("c > 3"))). Pipe(st.Filter(parser.MustParseExpr("b = 2"))), - st.New(st.IndexScan("idx_foo_b", st.Range{Min: document.NewIntegerValue(2), Exact: true})). + st.New(st.IndexScan("idx_foo_b", st.IndexRange{Min: newVB(document.NewIntegerValue(2)), Exact: true})). Pipe(st.Filter(parser.MustParseExpr("c > 3"))), }, { @@ -345,7 +346,7 @@ func TestUseIndexBasedOnSelectionNodeRule(t *testing.T) { Pipe(st.Filter(parser.MustParseExpr("c = 3"))). Pipe(st.Filter(parser.MustParseExpr("b = 2"))). Pipe(st.Project(parser.MustParseExpr("a"))), - st.New(st.IndexScan("idx_foo_c", st.Range{Min: document.NewIntegerValue(3), Exact: true})). + st.New(st.IndexScan("idx_foo_c", st.IndexRange{Min: newVB(document.NewIntegerValue(3)), Exact: true})). Pipe(st.Filter(parser.MustParseExpr("b = 2"))). Pipe(st.Project(parser.MustParseExpr("a"))), }, @@ -355,7 +356,7 @@ func TestUseIndexBasedOnSelectionNodeRule(t *testing.T) { Pipe(st.Filter(parser.MustParseExpr("c = 'hello'"))). Pipe(st.Filter(parser.MustParseExpr("b = 2"))). Pipe(st.Project(parser.MustParseExpr("a"))), - st.New(st.IndexScan("idx_foo_b", st.Range{Min: document.NewIntegerValue(2), Exact: true})). + st.New(st.IndexScan("idx_foo_b", st.IndexRange{Min: newVB(document.NewIntegerValue(2)), Exact: true})). Pipe(st.Filter(parser.MustParseExpr("c = 'hello'"))). Pipe(st.Project(parser.MustParseExpr("a"))), }, @@ -378,7 +379,7 @@ func TestUseIndexBasedOnSelectionNodeRule(t *testing.T) { expr.ArrayValue(document.NewValueBuffer(document.NewIntegerValue(1), document.NewIntegerValue(2))), ), )), - st.New(st.IndexScan("idx_foo_a", st.Range{Min: document.NewIntegerValue(1), Exact: true}, st.Range{Min: document.NewIntegerValue(2), Exact: true})), + st.New(st.IndexScan("idx_foo_a", st.IndexRange{Min: newVB(document.NewIntegerValue(1)), Exact: true}, st.IndexRange{Min: newVB(document.NewIntegerValue(2)), Exact: true})), }, { "FROM foo WHERE 1 IN a", @@ -388,19 +389,19 @@ func TestUseIndexBasedOnSelectionNodeRule(t *testing.T) { { "FROM foo WHERE a >= 10", st.New(st.SeqScan("foo")).Pipe(st.Filter(parser.MustParseExpr("a >= 10"))), - st.New(st.IndexScan("idx_foo_a", st.Range{Min: document.NewIntegerValue(10)})), + st.New(st.IndexScan("idx_foo_a", st.IndexRange{Min: newVB(document.NewIntegerValue(10))})), }, { "FROM foo WHERE k = 1", st.New(st.SeqScan("foo")).Pipe(st.Filter(parser.MustParseExpr("k = 1"))), - st.New(st.PkScan("foo", st.Range{Min: document.NewIntegerValue(1), Exact: true})), + st.New(st.PkScan("foo", st.ValueRange{Min: document.NewIntegerValue(1), Exact: true})), }, { "FROM foo WHERE k = 1 AND b = 2", st.New(st.SeqScan("foo")). Pipe(st.Filter(parser.MustParseExpr("k = 1"))). Pipe(st.Filter(parser.MustParseExpr("b = 2"))), - st.New(st.PkScan("foo", st.Range{Min: document.NewIntegerValue(1), Exact: true})). + st.New(st.PkScan("foo", st.ValueRange{Min: document.NewIntegerValue(1), Exact: true})). Pipe(st.Filter(parser.MustParseExpr("b = 2"))), }, { @@ -408,7 +409,7 @@ func TestUseIndexBasedOnSelectionNodeRule(t *testing.T) { st.New(st.SeqScan("foo")). Pipe(st.Filter(parser.MustParseExpr("a = 1"))). Pipe(st.Filter(parser.MustParseExpr("2 = k"))), - st.New(st.PkScan("foo", st.Range{Min: document.NewIntegerValue(2), Exact: true})). + st.New(st.PkScan("foo", st.ValueRange{Min: document.NewIntegerValue(2), Exact: true})). Pipe(st.Filter(parser.MustParseExpr("a = 1"))), }, { @@ -416,7 +417,7 @@ func TestUseIndexBasedOnSelectionNodeRule(t *testing.T) { st.New(st.SeqScan("foo")). Pipe(st.Filter(parser.MustParseExpr("a = 1"))). Pipe(st.Filter(parser.MustParseExpr("k < 2"))), - st.New(st.IndexScan("idx_foo_a", st.Range{Min: document.NewIntegerValue(1), Exact: true})). + st.New(st.IndexScan("idx_foo_a", st.IndexRange{Min: newVB(document.NewIntegerValue(1)), Exact: true})). Pipe(st.Filter(parser.MustParseExpr("k < 2"))), }, { @@ -424,7 +425,7 @@ func TestUseIndexBasedOnSelectionNodeRule(t *testing.T) { st.New(st.SeqScan("foo")). Pipe(st.Filter(parser.MustParseExpr("a = 1"))). Pipe(st.Filter(parser.MustParseExpr("k = 'hello'"))), - st.New(st.IndexScan("idx_foo_a", st.Range{Min: document.NewIntegerValue(1), Exact: true})). + st.New(st.IndexScan("idx_foo_a", st.IndexRange{Min: newVB(document.NewIntegerValue(1)), Exact: true})). Pipe(st.Filter(parser.MustParseExpr("k = 'hello'"))), }, { // c is an INT, 1.1 cannot be converted to int without precision loss, don't use the index @@ -475,22 +476,22 @@ func TestUseIndexBasedOnSelectionNodeRule(t *testing.T) { { "FROM foo WHERE k = [1, 1]", st.New(st.SeqScan("foo")).Pipe(st.Filter(parser.MustParseExpr("k = [1, 1]"))), - st.New(st.PkScan("foo", st.Range{Min: document.NewArrayValue(testutil.MakeArray(t, `[1, 1]`)), Exact: true})), + st.New(st.PkScan("foo", st.ValueRange{Min: document.NewArrayValue(testutil.MakeArray(t, `[1, 1]`)), Exact: true})), }, { // constraint on k[0] INT should not modify the operand "FROM foo WHERE k = [1.5, 1.5]", st.New(st.SeqScan("foo")).Pipe(st.Filter(parser.MustParseExpr("k = [1.5, 1.5]"))), - st.New(st.PkScan("foo", st.Range{Min: document.NewArrayValue(testutil.MakeArray(t, `[1.5, 1.5]`)), Exact: true})), + st.New(st.PkScan("foo", st.ValueRange{Min: document.NewArrayValue(testutil.MakeArray(t, `[1.5, 1.5]`)), Exact: true})), }, { "FROM foo WHERE a = [1, 1]", st.New(st.SeqScan("foo")).Pipe(st.Filter(parser.MustParseExpr("a = [1, 1]"))), - st.New(st.IndexScan("idx_foo_a", st.Range{Min: document.NewArrayValue(testutil.MakeArray(t, `[1, 1]`)), Exact: true})), + st.New(st.IndexScan("idx_foo_a", st.IndexRange{Min: newVB(document.NewArrayValue(testutil.MakeArray(t, `[1, 1]`))), Exact: true})), }, { // constraint on a[0] DOUBLE should modify the operand because it's lossless "FROM foo WHERE a = [1, 1.5]", st.New(st.SeqScan("foo")).Pipe(st.Filter(parser.MustParseExpr("a = [1, 1.5]"))), - st.New(st.IndexScan("idx_foo_a", st.Range{Min: document.NewArrayValue(testutil.MakeArray(t, `[1.0, 1.5]`)), Exact: true})), + st.New(st.IndexScan("idx_foo_a", st.IndexRange{Min: newVB(document.NewArrayValue(testutil.MakeArray(t, `[1.0, 1.5]`))), Exact: true})), }, } @@ -530,3 +531,317 @@ func TestUseIndexBasedOnSelectionNodeRule(t *testing.T) { } }) } + +func TestUseIndexBasedOnSelectionNodeRule_Composite(t *testing.T) { + newVB := document.NewValueBuffer + tests := []struct { + name string + root, expected *st.Stream + }{ + { + "FROM foo WHERE a = 1 AND d = 2", + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("a = 1"))). + Pipe(st.Filter(parser.MustParseExpr("d = 2"))), + st.New(st.IndexScan("idx_foo_a_d", st.IndexRange{Min: testutil.MakeValueBuffer(t, `[1, 2]`), Exact: true})), + }, + { + "FROM foo WHERE a = 1 AND d > 2", + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("a = 1"))). + Pipe(st.Filter(parser.MustParseExpr("d > 2"))), + st.New(st.IndexScan("idx_foo_a_d", st.IndexRange{Min: testutil.MakeValueBuffer(t, `[1, 2]`), Exclusive: true})), + }, + { + "FROM foo WHERE a = 1 AND d < 2", + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("a = 1"))). + Pipe(st.Filter(parser.MustParseExpr("d < 2"))), + st.New(st.IndexScan("idx_foo_a_d", st.IndexRange{Max: testutil.MakeValueBuffer(t, `[1, 2]`), Exclusive: true})), + }, + { + "FROM foo WHERE a = 1 AND d <= 2", + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("a = 1"))). + Pipe(st.Filter(parser.MustParseExpr("d <= 2"))), + st.New(st.IndexScan("idx_foo_a_d", st.IndexRange{Max: testutil.MakeValueBuffer(t, `[1, 2]`)})), + }, + { + "FROM foo WHERE a = 1 AND d >= 2", + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("a = 1"))). + Pipe(st.Filter(parser.MustParseExpr("d >= 2"))), + st.New(st.IndexScan("idx_foo_a_d", st.IndexRange{Min: testutil.MakeValueBuffer(t, `[1, 2]`)})), + }, + { + "FROM foo WHERE a > 1 AND d > 2", + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("a > 1"))). + Pipe(st.Filter(parser.MustParseExpr("d > 2"))), + st.New(st.IndexScan("idx_foo_a", st.IndexRange{Min: testutil.MakeValueBuffer(t, `[1]`), Exclusive: true})). + Pipe(st.Filter(parser.MustParseExpr("d > 2"))), + }, + { + "FROM foo WHERE a = 1 AND b = 2 AND c = 3", + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("a = 1"))). + Pipe(st.Filter(parser.MustParseExpr("b = 2"))). + Pipe(st.Filter(parser.MustParseExpr("c = 3"))), + st.New(st.IndexScan("idx_foo_a_b_c", st.IndexRange{Min: testutil.MakeValueBuffer(t, `[1, 2, 3]`), Exact: true})), + }, + { + "FROM foo WHERE a = 1 AND b = 2", // c is omitted, but it can still use idx_foo_a_b_c + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("a = 1"))). + Pipe(st.Filter(parser.MustParseExpr("b = 2"))), + st.New(st.IndexScan("idx_foo_a_b_c", st.IndexRange{Min: testutil.MakeValueBuffer(t, `[1, 2]`), Exact: true})), + }, + { + "FROM foo WHERE a = 1 AND b > 2", // c is omitted, but it can still use idx_foo_a_b_c, with > b + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("a = 1"))). + Pipe(st.Filter(parser.MustParseExpr("b > 2"))), + st.New(st.IndexScan("idx_foo_a_b_c", st.IndexRange{Min: testutil.MakeValueBuffer(t, `[1, 2]`), Exclusive: true})), + }, + { + "FROM foo WHERE a = 1 AND b < 2", // c is omitted, but it can still use idx_foo_a_b_c, with > b + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("a = 1"))). + Pipe(st.Filter(parser.MustParseExpr("b < 2"))), + st.New(st.IndexScan("idx_foo_a_b_c", st.IndexRange{Max: testutil.MakeValueBuffer(t, `[1, 2]`), Exclusive: true})), + }, + { + "FROM foo WHERE a = 1 AND b = 2 and k = 3", // c is omitted, but it can still use idx_foo_a_b_c + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("a = 1"))). + Pipe(st.Filter(parser.MustParseExpr("b = 2"))). + Pipe(st.Filter(parser.MustParseExpr("k = 3"))), + st.New(st.IndexScan("idx_foo_a_b_c", st.IndexRange{Min: testutil.MakeValueBuffer(t, `[1, 2]`), Exact: true})). + Pipe(st.Filter(parser.MustParseExpr("k = 3"))), + }, + // If a path is missing from the query, we can still the index, with paths after the missing one are + // using filter nodes rather than the index. + { + "FROM foo WHERE x = 1 AND z = 2", + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("x = 1"))). + Pipe(st.Filter(parser.MustParseExpr("z = 2"))), + st.New(st.IndexScan("idx_foo_x_y_z", st.IndexRange{Min: newVB(document.NewIntegerValue(1)), Exact: true})). + Pipe(st.Filter(parser.MustParseExpr("z = 2"))), + }, + { + "FROM foo WHERE a = 1 AND c = 2", + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("a = 1"))). + Pipe(st.Filter(parser.MustParseExpr("c = 2"))), + // c will be picked because it's a unique index and thus has a lower cost + st.New(st.IndexScan("idx_foo_c", st.IndexRange{Min: newVB(document.NewIntegerValue(2)), Exact: true})). + Pipe(st.Filter(parser.MustParseExpr("a = 1"))), + }, + { + "FROM foo WHERE b = 1 AND c = 2", + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("b = 1"))). + Pipe(st.Filter(parser.MustParseExpr("c = 2"))), + // c will be picked because it's a unique index and thus has a lower cost + st.New(st.IndexScan("idx_foo_c", st.IndexRange{Min: newVB(document.NewIntegerValue(2)), Exact: true})). + Pipe(st.Filter(parser.MustParseExpr("b = 1"))), + }, + { + "FROM foo WHERE a = 1 AND b = 2 AND c = 'a'", // c is from the wrong type and will prevent the index to be picked + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("a = 1"))). + Pipe(st.Filter(parser.MustParseExpr("b = 2"))). + Pipe(st.Filter(parser.MustParseExpr("c = 'a'"))), + st.New(st.IndexScan("idx_foo_a", st.IndexRange{Min: newVB(document.NewIntegerValue(1)), Exact: true})). + Pipe(st.Filter(parser.MustParseExpr("b = 2"))). + Pipe(st.Filter(parser.MustParseExpr("c = 'a'"))), + }, + + { + "FROM foo WHERE a IN [1, 2] AND d = 4", + st.New(st.SeqScan("foo")). + Pipe(st.Filter( + expr.In( + parser.MustParseExpr("a"), + expr.ArrayValue(document.NewValueBuffer(document.NewIntegerValue(1), document.NewIntegerValue(2))), + ), + )). + Pipe(st.Filter(parser.MustParseExpr("d = 4"))), + st.New(st.IndexScan("idx_foo_a_d", + st.IndexRange{Min: testutil.MakeValueBuffer(t, `[1, 4]`), Exact: true}, + st.IndexRange{Min: testutil.MakeValueBuffer(t, `[2, 4]`), Exact: true}, + )), + }, + { + "FROM foo WHERE a IN [1, 2] AND b = 3 AND c = 4", + st.New(st.SeqScan("foo")). + Pipe(st.Filter( + expr.In( + parser.MustParseExpr("a"), + expr.ArrayValue(document.NewValueBuffer(document.NewIntegerValue(1), document.NewIntegerValue(2))), + ), + )). + Pipe(st.Filter(parser.MustParseExpr("b = 3"))). + Pipe(st.Filter(parser.MustParseExpr("c = 4"))), + st.New(st.IndexScan("idx_foo_a_b_c", + st.IndexRange{Min: testutil.MakeValueBuffer(t, `[1, 3, 4]`), Exact: true}, + st.IndexRange{Min: testutil.MakeValueBuffer(t, `[2, 3, 4]`), Exact: true}, + )), + }, + { + "FROM foo WHERE a IN [1, 2] AND b = 3 AND c > 4", + st.New(st.SeqScan("foo")). + Pipe(st.Filter( + expr.In( + parser.MustParseExpr("a"), + expr.ArrayValue(document.NewValueBuffer(document.NewIntegerValue(1), document.NewIntegerValue(2))), + ), + )). + Pipe(st.Filter(parser.MustParseExpr("b = 3"))). + Pipe(st.Filter(parser.MustParseExpr("c > 4"))), + st.New(st.IndexScan("idx_foo_a_b_c", + st.IndexRange{Min: testutil.MakeValueBuffer(t, `[1, 3, 4]`), Exclusive: true}, + st.IndexRange{Min: testutil.MakeValueBuffer(t, `[2, 3, 4]`), Exclusive: true}, + )), + }, + { + "FROM foo WHERE a IN [1, 2] AND b = 3 AND c < 4", + st.New(st.SeqScan("foo")). + Pipe(st.Filter( + expr.In( + parser.MustParseExpr("a"), + expr.ArrayValue(document.NewValueBuffer(document.NewIntegerValue(1), document.NewIntegerValue(2))), + ), + )). + Pipe(st.Filter(parser.MustParseExpr("b = 3"))). + Pipe(st.Filter(parser.MustParseExpr("c < 4"))), + st.New(st.IndexScan("idx_foo_a_b_c", + st.IndexRange{Max: testutil.MakeValueBuffer(t, `[1, 3, 4]`), Exclusive: true}, + st.IndexRange{Max: testutil.MakeValueBuffer(t, `[2, 3, 4]`), Exclusive: true}, + )), + }, + // { + // "FROM foo WHERE a IN [1, 2] AND b IN [3, 4] AND c > 5", + // st.New(st.SeqScan("foo")). + // Pipe(st.Filter( + // expr.In( + // parser.MustParseExpr("a"), + // expr.ArrayValue(document.NewValueBuffer(document.NewIntegerValue(1), document.NewIntegerValue(2))), + // ), + // )). + // Pipe(st.Filter( + // expr.In( + // parser.MustParseExpr("b"), + // expr.ArrayValue(document.NewValueBuffer(document.NewIntegerValue(3), document.NewIntegerValue(4))), + // ), + // )). + // Pipe(st.Filter(parser.MustParseExpr("c < 5"))), + // st.New(st.IndexScan("idx_foo_a_b_c", + // st.IndexRange{Max: testutil.MakeValueBuffer(t, `[1, 3, 5]`), Exclusive: true}, + // st.IndexRange{Max: testutil.MakeValueBuffer(t, `[2, 3, 5]`), Exclusive: true}, + // st.IndexRange{Max: testutil.MakeValueBuffer(t, `[1, 4, 5]`), Exclusive: true}, + // st.IndexRange{Max: testutil.MakeValueBuffer(t, `[2, 4, 5]`), Exclusive: true}, + // )), + // }, + { + "FROM foo WHERE 1 IN a AND d = 2", + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("1 IN a"))). + Pipe(st.Filter(parser.MustParseExpr("d = 4"))), + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("1 IN a"))). + Pipe(st.Filter(parser.MustParseExpr("d = 4"))), + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + db, err := genji.Open(":memory:") + require.NoError(t, err) + defer db.Close() + + tx, err := db.Begin(true) + require.NoError(t, err) + defer tx.Rollback() + + err = tx.Exec(` + CREATE TABLE foo (k INT PRIMARY KEY, c INT); + CREATE INDEX idx_foo_a ON foo(a); + CREATE INDEX idx_foo_b ON foo(b); + CREATE UNIQUE INDEX idx_foo_c ON foo(c); + CREATE INDEX idx_foo_a_d ON foo(a, d); + CREATE INDEX idx_foo_a_b_c ON foo(a, b, c); + CREATE INDEX idx_foo_x_y_z ON foo(x, y, z); + INSERT INTO foo (k, a, b, c, d) VALUES + (1, 1, 1, 1, 1), + (2, 2, 2, 2, 2), + (3, 3, 3, 3, 3) + `) + require.NoError(t, err) + + res, err := planner.UseIndexBasedOnFilterNodeRule(test.root, tx.Transaction, nil) + require.NoError(t, err) + require.Equal(t, test.expected.String(), res.String()) + }) + } + + t.Run("array indexes", func(t *testing.T) { + tests := []struct { + name string + root, expected *st.Stream + }{ + { + "FROM foo WHERE a = [1, 1] AND b = [2, 2]", + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("a = [1, 1]"))). + Pipe(st.Filter(parser.MustParseExpr("b = [2, 2]"))), + st.New(st.IndexScan("idx_foo_a_b", st.IndexRange{ + Min: testutil.MakeValueBuffer(t, `[[1, 1], [2, 2]]`), + Exact: true})), + }, + { + "FROM foo WHERE a = [1, 1] AND b > [2, 2]", + st.New(st.SeqScan("foo")). + Pipe(st.Filter(parser.MustParseExpr("a = [1, 1]"))). + Pipe(st.Filter(parser.MustParseExpr("b > [2, 2]"))), + st.New(st.IndexScan("idx_foo_a_b", st.IndexRange{ + Min: testutil.MakeValueBuffer(t, `[[1, 1], [2, 2]]`), + Exclusive: true})), + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + db, err := genji.Open(":memory:") + require.NoError(t, err) + defer db.Close() + + tx, err := db.Begin(true) + require.NoError(t, err) + defer tx.Rollback() + + err = tx.Exec(` + CREATE TABLE foo ( + k ARRAY PRIMARY KEY, + a ARRAY + ); + CREATE INDEX idx_foo_a_b ON foo(a, b); + CREATE INDEX idx_foo_a0 ON foo(a[0]); + INSERT INTO foo (k, a, b) VALUES + ([1, 1], [1, 1], [1, 1]), + ([2, 2], [2, 2], [2, 2]), + ([3, 3], [3, 3], [3, 3]) + `) + require.NoError(t, err) + + res, err := planner.PrecalculateExprRule(test.root, tx.Transaction, nil) + require.NoError(t, err) + + res, err = planner.UseIndexBasedOnFilterNodeRule(res, tx.Transaction, nil) + require.NoError(t, err) + require.Equal(t, test.expected.String(), res.String()) + }) + } + }) +} diff --git a/query/create.go b/query/create.go index 8d674dc4a..812e610dc 100644 --- a/query/create.go +++ b/query/create.go @@ -32,9 +32,9 @@ func (stmt CreateTableStmt) Run(tx *database.Transaction, args []expr.Param) (Re if fc.IsUnique { err = tx.CreateIndex(&database.IndexInfo{ TableName: stmt.TableName, - Path: fc.Path, + Paths: []document.Path{fc.Path}, Unique: true, - Type: fc.Type, + Types: []document.ValueType{fc.Type}, }) if err != nil { return res, err @@ -50,7 +50,7 @@ func (stmt CreateTableStmt) Run(tx *database.Transaction, args []expr.Param) (Re type CreateIndexStmt struct { IndexName string TableName string - Path document.Path + Paths []document.Path IfNotExists bool Unique bool } @@ -69,7 +69,7 @@ func (stmt CreateIndexStmt) Run(tx *database.Transaction, args []expr.Param) (Re Unique: stmt.Unique, IndexName: stmt.IndexName, TableName: stmt.TableName, - Path: stmt.Path, + Paths: stmt.Paths, }) if stmt.IfNotExists && err == database.ErrIndexAlreadyExists { err = nil diff --git a/query/create_test.go b/query/create_test.go index 0e49cc692..6e522ae5e 100644 --- a/query/create_test.go +++ b/query/create_test.go @@ -288,17 +288,17 @@ func TestCreateTable(t *testing.T) { idx, err := tx.GetIndex("__genji_autoindex_test_1") require.NoError(t, err) - require.Equal(t, document.IntegerValue, idx.Info.Type) + require.Equal(t, document.IntegerValue, idx.Info.Types[0]) require.True(t, idx.Info.Unique) idx, err = tx.GetIndex("__genji_autoindex_test_2") require.NoError(t, err) - require.Equal(t, document.DoubleValue, idx.Info.Type) + require.Equal(t, document.DoubleValue, idx.Info.Types[0]) require.True(t, idx.Info.Unique) idx, err = tx.GetIndex("__genji_autoindex_test_3") require.NoError(t, err) - require.Zero(t, idx.Info.Type) + require.Zero(t, idx.Info.Types[0]) require.True(t, idx.Info.Unique) return nil }) @@ -319,7 +319,8 @@ func TestCreateIndex(t *testing.T) { {"No name", "CREATE UNIQUE INDEX ON test (foo[1])", false}, {"No name if not exists", "CREATE UNIQUE INDEX IF NOT EXISTS ON test (foo[1])", true}, {"No fields", "CREATE INDEX idx ON test", true}, - {"More than 1 field", "CREATE INDEX idx ON test (foo, bar)", true}, + {"Composite (2)", "CREATE INDEX idx ON test (foo, bar)", false}, + {"Composite (4)", "CREATE INDEX idx ON test (foo, bar, baz, baf)", false}, } for _, test := range tests { diff --git a/query/reindex_test.go b/query/reindex_test.go index 0f0548ea6..a4a0721ed 100644 --- a/query/reindex_test.go +++ b/query/reindex_test.go @@ -4,7 +4,6 @@ import ( "testing" "github.com/genjidb/genji" - "github.com/genjidb/genji/document" "github.com/stretchr/testify/require" ) @@ -82,7 +81,7 @@ func TestReIndex(t *testing.T) { } i := 0 - err = idx.AscendGreaterOrEqual(document.Value{}, func(val []byte, key []byte) error { + err = idx.AscendGreaterOrEqual(nil, func(val []byte, key []byte) error { i++ return nil }) diff --git a/query/select_test.go b/query/select_test.go index 6408b302d..7ef99c903 100644 --- a/query/select_test.go +++ b/query/select_test.go @@ -41,6 +41,7 @@ func TestSelectStmt(t *testing.T) { {"With eq op", "SELECT * FROM test WHERE size = 10", false, `[{"k":1,"color":"red","size":10,"shape":"square"},{"k":2,"color":"blue","size":10,"weight":100}]`, nil}, {"With neq op", "SELECT * FROM test WHERE color != 'red'", false, `[{"k":2,"color":"blue","size":10,"weight":100}]`, nil}, {"With gt op", "SELECT * FROM test WHERE size > 10", false, `[]`, nil}, + {"With gt bis", "SELECT * FROM test WHERE size > 9", false, `[{"k":1,"color":"red","size":10,"shape":"square"},{"k":2,"color":"blue","size":10,"weight":100}]`, nil}, {"With lt op", "SELECT * FROM test WHERE size < 15", false, `[{"k":1,"color":"red","size":10,"shape":"square"},{"k":2,"color":"blue","size":10,"weight":100}]`, nil}, {"With lte op", "SELECT * FROM test WHERE color <= 'salmon' ORDER BY k ASC", false, `[{"k":1,"color":"red","size":10,"shape":"square"},{"k":2,"color":"blue","size":10,"weight":100}]`, nil}, {"With add op", "SELECT size + 10 AS s FROM test ORDER BY k", false, `[{"s":20},{"s":20},{"s":null}]`, nil}, diff --git a/sql/parser/create.go b/sql/parser/create.go index a22de8b5d..8ba2ba84d 100644 --- a/sql/parser/create.go +++ b/sql/parser/create.go @@ -71,7 +71,7 @@ func (p *Parser) parseFieldDefinition(fc *database.FieldConstraint) (err error) return err } - if fc.Type == 0 && fc.DefaultValue.Type.IsZero() && !fc.IsNotNull && !fc.IsPrimaryKey && !fc.IsUnique { + if fc.Type.IsAny() && fc.DefaultValue.Type.IsAny() && !fc.IsNotNull && !fc.IsPrimaryKey && !fc.IsUnique { tok, pos, lit := p.ScanIgnoreWhitespace() return newParseError(scanner.Tokstr(tok, lit), []string{"CONSTRAINT", "TYPE"}, pos) } @@ -331,11 +331,7 @@ func (p *Parser) parseCreateIndexStatement(unique bool) (query.CreateIndexStmt, return stmt, newParseError(scanner.Tokstr(tok, lit), []string{"("}, pos) } - if len(paths) != 1 { - return stmt, &ParseError{Message: "indexes on more than one path are not supported"} - } - - stmt.Path = paths[0] + stmt.Paths = paths return stmt, nil } diff --git a/sql/parser/create_test.go b/sql/parser/create_test.go index 1114f893d..d73d30926 100644 --- a/sql/parser/create_test.go +++ b/sql/parser/create_test.go @@ -264,13 +264,22 @@ func TestParserCreateIndex(t *testing.T) { expected query.Statement errored bool }{ - {"Basic", "CREATE INDEX idx ON test (foo)", query.CreateIndexStmt{IndexName: "idx", TableName: "test", Path: document.Path(parsePath(t, "foo"))}, false}, - {"If not exists", "CREATE INDEX IF NOT EXISTS idx ON test (foo.bar[1])", query.CreateIndexStmt{IndexName: "idx", TableName: "test", Path: document.Path(parsePath(t, "foo.bar[1]")), IfNotExists: true}, false}, - {"Unique", "CREATE UNIQUE INDEX IF NOT EXISTS idx ON test (foo[3].baz)", query.CreateIndexStmt{IndexName: "idx", TableName: "test", Path: document.Path(parsePath(t, "foo[3].baz")), IfNotExists: true, Unique: true}, false}, - {"No name", "CREATE UNIQUE INDEX ON test (foo[3].baz)", query.CreateIndexStmt{TableName: "test", Path: document.Path(parsePath(t, "foo[3].baz")), Unique: true}, false}, + {"Basic", "CREATE INDEX idx ON test (foo)", query.CreateIndexStmt{IndexName: "idx", TableName: "test", Paths: []document.Path{document.Path(parsePath(t, "foo"))}}, false}, + {"If not exists", "CREATE INDEX IF NOT EXISTS idx ON test (foo.bar[1])", query.CreateIndexStmt{IndexName: "idx", TableName: "test", Paths: []document.Path{document.Path(parsePath(t, "foo.bar[1]"))}, IfNotExists: true}, false}, + {"Unique", "CREATE UNIQUE INDEX IF NOT EXISTS idx ON test (foo[3].baz)", query.CreateIndexStmt{IndexName: "idx", TableName: "test", Paths: []document.Path{document.Path(parsePath(t, "foo[3].baz"))}, IfNotExists: true, Unique: true}, false}, + {"No name", "CREATE UNIQUE INDEX ON test (foo[3].baz)", query.CreateIndexStmt{TableName: "test", Paths: []document.Path{document.Path(parsePath(t, "foo[3].baz"))}, Unique: true}, false}, {"No name with IF NOT EXISTS", "CREATE UNIQUE INDEX IF NOT EXISTS ON test (foo[3].baz)", nil, true}, + {"More than 1 path", "CREATE INDEX idx ON test (foo, bar)", + query.CreateIndexStmt(query.CreateIndexStmt{ + IndexName: "idx", + TableName: "test", + Paths: []document.Path{ + document.Path(parsePath(t, "foo")), + document.Path(parsePath(t, "bar")), + }, + }), + false}, {"No fields", "CREATE INDEX idx ON test", nil, true}, - {"More than 1 path", "CREATE INDEX idx ON test (foo, bar)", nil, true}, } for _, test := range tests { diff --git a/stream/iterator.go b/stream/iterator.go index 82e5a4132..7dbf124b6 100644 --- a/stream/iterator.go +++ b/stream/iterator.go @@ -5,6 +5,7 @@ import ( "strconv" "strings" + "github.com/genjidb/genji/database" "github.com/genjidb/genji/document" "github.com/genjidb/genji/expr" "github.com/genjidb/genji/stringutil" @@ -151,17 +152,17 @@ func (it *SeqScanOperator) String() string { type PkScanOperator struct { baseOperator TableName string - Ranges Ranges + Ranges ValueRanges Reverse bool } // PkScan creates an iterator that iterates over each document of the given table. -func PkScan(tableName string, ranges ...Range) *PkScanOperator { +func PkScan(tableName string, ranges ...ValueRange) *PkScanOperator { return &PkScanOperator{TableName: tableName, Ranges: ranges} } // PkScanReverse creates an iterator that iterates over each document of the given table in reverse order. -func PkScanReverse(tableName string, ranges ...Range) *PkScanOperator { +func PkScanReverse(tableName string, ranges ...ValueRange) *PkScanOperator { return &PkScanOperator{TableName: tableName, Ranges: ranges, Reverse: true} } @@ -233,7 +234,7 @@ func (it *PkScanOperator) Iterate(in *expr.Environment, fn func(out *expr.Enviro } var encEnd []byte - if !end.Type.IsZero() && end.V != nil { + if !end.Type.IsAny() && end.V != nil { encEnd, err = table.EncodeValue(end) if err != nil { return err @@ -275,18 +276,22 @@ func (it *PkScanOperator) Iterate(in *expr.Environment, fn func(out *expr.Enviro type IndexScanOperator struct { baseOperator + // IndexName references the index that will be used to perform the scan IndexName string - Ranges Ranges - Reverse bool + // Ranges defines the boundaries of the scan, each corresponding to one value of the group of values + // being indexed in the case of a composite index. + Ranges IndexRanges + // Reverse indicates the direction used to traverse the index. + Reverse bool } // IndexScan creates an iterator that iterates over each document of the given table. -func IndexScan(name string, ranges ...Range) *IndexScanOperator { +func IndexScan(name string, ranges ...IndexRange) *IndexScanOperator { return &IndexScanOperator{IndexName: name, Ranges: ranges} } // IndexScanReverse creates an iterator that iterates over each document of the given table in reverse order. -func IndexScanReverse(name string, ranges ...Range) *IndexScanOperator { +func IndexScanReverse(name string, ranges ...IndexRange) *IndexScanOperator { return &IndexScanOperator{IndexName: name, Ranges: ranges, Reverse: true} } @@ -327,12 +332,12 @@ func (it *IndexScanOperator) Iterate(in *expr.Environment, fn func(out *expr.Env return err } - err = it.Ranges.Encode(index, in) + err = it.Ranges.EncodeBuffer(index, in) if err != nil { return err } - var iterator func(pivot document.Value, fn func(val, key []byte) error) error + var iterator func(pivot database.Pivot, fn func(val, key []byte) error) error if !it.Reverse { iterator = index.AscendGreaterOrEqual @@ -342,7 +347,7 @@ func (it *IndexScanOperator) Iterate(in *expr.Environment, fn func(out *expr.Env // if there are no ranges use a simpler and faster iteration function if len(it.Ranges) == 0 { - return iterator(document.Value{}, func(val, key []byte) error { + return iterator(nil, func(val, key []byte) error { d, err := table.GetDocument(key) if err != nil { return err @@ -354,7 +359,7 @@ func (it *IndexScanOperator) Iterate(in *expr.Environment, fn func(out *expr.Env } for _, rng := range it.Ranges { - var start, end document.Value + var start, end *document.ValueBuffer if !it.Reverse { start = rng.Min end = rng.Max @@ -364,19 +369,25 @@ func (it *IndexScanOperator) Iterate(in *expr.Environment, fn func(out *expr.Env } var encEnd []byte - if !end.Type.IsZero() && end.V != nil { - encEnd, err = index.EncodeValue(end) + if end.Len() > 0 { + encEnd, err = index.EncodeValueBuffer(end) if err != nil { return err } } - err = iterator(start, func(val, key []byte) error { + var pivot database.Pivot + if start != nil { + pivot = start.Values + } + + err = iterator(pivot, func(val, key []byte) error { if !rng.IsInRange(val) { // if we reached the end of our range, we can stop iterating. if encEnd == nil { return nil } + cmp := bytes.Compare(val, encEnd) if !it.Reverse && cmp > 0 { return ErrStreamClosed @@ -395,233 +406,16 @@ func (it *IndexScanOperator) Iterate(in *expr.Environment, fn func(out *expr.Env newEnv.SetDocument(d) return fn(&newEnv) }) + if err == ErrStreamClosed { err = nil } if err != nil { return err } - } - - return nil -} - -type Range struct { - Min, Max document.Value - // Exclude Min and Max from the results. - // By default, min and max are inclusive. - // Exclusive and Exact cannot be set to true at the same time. - Exclusive bool - // Used to match an exact value equal to Min. - // If set to true, Max will be ignored for comparison - // and for determining the global upper bound. - Exact bool - - encodedMin, encodedMax []byte - rangeType document.ValueType -} - -func (r *Range) encode(encoder ValueEncoder, env *expr.Environment) error { - var err error - - // first we evaluate Min and Max - if !r.Min.Type.IsZero() { - r.encodedMin, err = encoder.EncodeValue(r.Min) - if err != nil { - return err - } - r.rangeType = r.Min.Type - } - if !r.Max.Type.IsZero() { - r.encodedMax, err = encoder.EncodeValue(r.Max) - if err != nil { - return err - } - if !r.rangeType.IsZero() && r.rangeType != r.Max.Type { - panic("range contain values of different types") - } - - r.rangeType = r.Max.Type - } - // ensure boundaries are typed - if r.Min.Type.IsZero() { - r.Min.Type = r.rangeType - } - if r.Max.Type.IsZero() { - r.Max.Type = r.rangeType - } - - if r.Exclusive && r.Exact { - panic("exclusive and exact cannot both be true") + // } } return nil } - -func (r *Range) String() string { - if r.Exact { - return stringutil.Sprintf("%v", r.Min) - } - - if r.Min.Type.IsZero() { - r.Min = document.NewIntegerValue(-1) - } - if r.Max.Type.IsZero() { - r.Max = document.NewIntegerValue(-1) - } - - if r.Exclusive { - return stringutil.Sprintf("[%v, %v, true]", r.Min, r.Max) - } - - return stringutil.Sprintf("[%v, %v]", r.Min, r.Max) -} - -func (r *Range) IsEqual(other *Range) bool { - if r.Exact != other.Exact { - return false - } - - if r.rangeType != other.rangeType { - return false - } - - if r.Exclusive != other.Exclusive { - return false - } - - if r.Min.Type != other.Min.Type { - return false - } - ok, err := r.Min.IsEqual(other.Min) - if err != nil || !ok { - return false - } - - if r.Max.Type != other.Max.Type { - return false - } - ok, err = r.Max.IsEqual(other.Max) - if err != nil || !ok { - return false - } - - return true -} - -type Ranges []Range - -// Append rng to r and return the new slice. -// Duplicate ranges are ignored. -func (r Ranges) Append(rng Range) Ranges { - // ensure we don't keep duplicate ranges - isDuplicate := false - for _, e := range r { - if e.IsEqual(&rng) { - isDuplicate = true - break - } - } - - if isDuplicate { - return r - } - - return append(r, rng) -} - -type ValueEncoder interface { - EncodeValue(v document.Value) ([]byte, error) -} - -// Encode each range using the given value encoder. -func (r Ranges) Encode(encoder ValueEncoder, env *expr.Environment) error { - for i := range r { - err := r[i].encode(encoder, env) - if err != nil { - return err - } - } - - return nil -} - -func (r Ranges) String() string { - var sb strings.Builder - - for i, rr := range r { - if i > 0 { - sb.WriteString(", ") - } - - sb.WriteString(rr.String()) - } - - return sb.String() -} - -// Cost is a best effort function to determine the cost of -// a range lookup. -func (r Ranges) Cost() int { - var cost int - - for _, rng := range r { - // if we are looking for an exact value - // increment by 1 - if rng.Exact { - cost++ - continue - } - - // if there are two boundaries, increment by 50 - if !rng.Min.Type.IsZero() && !rng.Max.Type.IsZero() { - cost += 50 - } - - // if there is only one boundary, increment by 100 - if (!rng.Min.Type.IsZero() && rng.Max.Type.IsZero()) || (rng.Min.Type.IsZero() && !rng.Max.Type.IsZero()) { - cost += 100 - continue - } - - // if there are no boundaries, increment by 200 - cost += 200 - } - - return cost -} - -func (r *Range) IsInRange(value []byte) bool { - // by default, we consider the value within range - cmpMin, cmpMax := 1, -1 - - // we compare with the lower bound and see if it matches - if r.encodedMin != nil { - cmpMin = bytes.Compare(value, r.encodedMin) - } - - // if exact is true the value has to be equal to the lower bound. - if r.Exact { - return cmpMin == 0 - } - - // if exclusive and the value is equal to the lower bound - // we can ignore it - if r.Exclusive && cmpMin == 0 { - return false - } - - // the value is bigger than the lower bound, - // see if it matches the upper bound. - if r.encodedMax != nil { - cmpMax = bytes.Compare(value, r.encodedMax) - } - - // if boundaries are strict, ignore values equal to the max - if r.Exclusive && cmpMax == 0 { - return false - } - - return cmpMax <= 0 -} diff --git a/stream/iterator_test.go b/stream/iterator_test.go index 8162fddc5..44a8a3335 100644 --- a/stream/iterator_test.go +++ b/stream/iterator_test.go @@ -124,7 +124,7 @@ func TestPkScan(t *testing.T) { tests := []struct { name string docsInTable, expected testutil.Docs - ranges stream.Ranges + ranges stream.ValueRanges reverse bool fails bool }{ @@ -139,7 +139,7 @@ func TestPkScan(t *testing.T) { "max:2", testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), - stream.Ranges{ + stream.ValueRanges{ {Max: document.NewIntegerValue(2)}, }, false, false, @@ -148,7 +148,7 @@ func TestPkScan(t *testing.T) { "max:1", testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), testutil.MakeDocuments(t, `{"a": 1}`), - stream.Ranges{ + stream.ValueRanges{ {Max: document.NewIntegerValue(1)}, }, false, false, @@ -157,7 +157,7 @@ func TestPkScan(t *testing.T) { "min", testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), - stream.Ranges{ + stream.ValueRanges{ {Min: document.NewIntegerValue(1)}, }, false, false, @@ -166,7 +166,7 @@ func TestPkScan(t *testing.T) { "min/max", testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), - stream.Ranges{ + stream.ValueRanges{ {Min: document.NewIntegerValue(1), Max: document.NewIntegerValue(2)}, }, false, false, @@ -181,7 +181,7 @@ func TestPkScan(t *testing.T) { "reverse/max", testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), testutil.MakeDocuments(t, `{"a": 2}`, `{"a": 1}`), - stream.Ranges{ + stream.ValueRanges{ {Max: document.NewIntegerValue(2)}, }, true, false, @@ -190,7 +190,7 @@ func TestPkScan(t *testing.T) { "reverse/min", testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), testutil.MakeDocuments(t, `{"a": 2}`, `{"a": 1}`), - stream.Ranges{ + stream.ValueRanges{ {Min: document.NewIntegerValue(1)}, }, true, false, @@ -199,7 +199,7 @@ func TestPkScan(t *testing.T) { "reverse/min/max", testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), testutil.MakeDocuments(t, `{"a": 2}`, `{"a": 1}`), - stream.Ranges{ + stream.ValueRanges{ {Min: document.NewIntegerValue(1), Max: document.NewIntegerValue(2)}, }, true, false, @@ -258,14 +258,14 @@ func TestPkScan(t *testing.T) { } t.Run("String", func(t *testing.T) { - require.Equal(t, `pkScan("test", [1, 2])`, stream.PkScan("test", stream.Range{ + require.Equal(t, `pkScan("test", [1, 2])`, stream.PkScan("test", stream.ValueRange{ Min: document.NewIntegerValue(1), Max: document.NewIntegerValue(2), }).String()) op := stream.PkScan("test", - stream.Range{Min: document.NewIntegerValue(1), Max: document.NewIntegerValue(2), Exclusive: true}, - stream.Range{Min: document.NewIntegerValue(10), Exact: true}, - stream.Range{Min: document.NewIntegerValue(100)}, + stream.ValueRange{Min: document.NewIntegerValue(1), Max: document.NewIntegerValue(2), Exclusive: true}, + stream.ValueRange{Min: document.NewIntegerValue(10), Exact: true}, + stream.ValueRange{Min: document.NewIntegerValue(100)}, ) op.Reverse = true @@ -274,98 +274,302 @@ func TestPkScan(t *testing.T) { } func TestIndexScan(t *testing.T) { + newVB := document.NewValueBuffer tests := []struct { name string + indexOn string docsInTable, expected testutil.Docs - ranges stream.Ranges + ranges stream.IndexRanges reverse bool fails bool }{ - {name: "empty"}, + {name: "empty", indexOn: "a"}, { - "no range", + "no range", "a", testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), nil, false, false, }, { - "max:2", + "no range", "a, b", + testutil.MakeDocuments(t, `{"a": 1, "b": 2}`, `{"a": 2, "b": 3}`), + testutil.MakeDocuments(t, `{"a": 1, "b": 2}`, `{"a": 2, "b": 3}`), + nil, false, false, + }, + { + "max:2", "a", testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), - stream.Ranges{ - {Max: document.NewIntegerValue(2)}, + stream.IndexRanges{ + {Max: newVB(document.NewIntegerValue(2))}, }, false, false, }, { - "max:1", + "max:[2, 2]", "a, b", + testutil.MakeDocuments(t, `{"a": 1, "b": 2}`, `{"a": 2, "b": 2}`), + testutil.MakeDocuments(t, `{"a": 1, "b": 2}`, `{"a": 2, "b": 2}`), + stream.IndexRanges{ + {Max: newVB( + document.NewIntegerValue(2), + document.NewIntegerValue(2), + )}, + }, + false, false, + }, + { + "max:1", "a", testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), testutil.MakeDocuments(t, `{"a": 1}`), - stream.Ranges{ - {Max: document.NewIntegerValue(1)}, + stream.IndexRanges{ + {Max: newVB(document.NewIntegerValue(1))}, }, false, false, }, { - "min", + "max:[1, 2]", "a, b", + testutil.MakeDocuments(t, `{"a": 1, "b": 2}`, `{"a": 2, "b": 2}`), + testutil.MakeDocuments(t, `{"a": 1, "b": 2}`), + stream.IndexRanges{ + {Max: newVB( + document.NewIntegerValue(1), + document.NewIntegerValue(2), + )}, + }, + false, false, + }, + { + "min", "a", testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), - stream.Ranges{ - {Min: document.NewIntegerValue(1)}, + stream.IndexRanges{ + {Min: newVB(document.NewIntegerValue(1))}, }, false, false, }, { - "min/max", + "min:[2, 1]", "a, b", + testutil.MakeDocuments(t, `{"a": 1, "b": 2}`, `{"a": 2, "b": 2}`), + testutil.MakeDocuments(t, `{"a": 2, "b": 2}`), + stream.IndexRanges{ + { + Min: newVB(document.NewIntegerValue(2), document.NewIntegerValue(1)), + }, + }, + false, false, + }, + { + "min/max", "a", testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), - stream.Ranges{ - {Min: document.NewIntegerValue(1), Max: document.NewIntegerValue(2)}, + stream.IndexRanges{ + { + Min: newVB(document.NewIntegerValue(1)), + Max: newVB(document.NewIntegerValue(2)), + }, }, false, false, }, { - "reverse/no range", + "min:[1, 1], max:[2,2]", "a, b", + testutil.MakeDocuments(t, `{"a": 1, "b": 2}`, `{"a": 2, "b": 2}`), + testutil.MakeDocuments(t, `{"a": 1, "b": 2}`, `{"a": 2, "b": 2}`), + stream.IndexRanges{ + { + Min: newVB(document.NewIntegerValue(1), document.NewIntegerValue(1)), + Max: newVB(document.NewIntegerValue(2), document.NewIntegerValue(2)), + }, + }, + false, false, + }, + { + "min:[1, 1], max:[2,2] bis", "a, b", + testutil.MakeDocuments(t, `{"a": 1, "b": 3}`, `{"a": 2, "b": 2}`), + testutil.MakeDocuments(t, `{"a": 1, "b": 3}`, `{"a": 2, "b": 2}`), // [1, 3] < [2, 2] + stream.IndexRanges{ + { + Min: newVB(document.NewIntegerValue(1), document.NewIntegerValue(1)), + Max: newVB(document.NewIntegerValue(2), document.NewIntegerValue(2)), + }, + }, + false, false, + }, + { + "reverse/no range", "a", testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), testutil.MakeDocuments(t, `{"a": 2}`, `{"a": 1}`), nil, true, false, }, { - "reverse/max", + "reverse/max", "a", testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), testutil.MakeDocuments(t, `{"a": 2}`, `{"a": 1}`), - stream.Ranges{ - {Max: document.NewIntegerValue(2)}, + stream.IndexRanges{ + {Max: newVB(document.NewIntegerValue(2))}, }, true, false, }, { - "reverse/min", + "reverse/max", "a, b", + testutil.MakeDocuments(t, `{"a": 1, "b": 1}`, `{"a": 2, "b": 2}`), + testutil.MakeDocuments(t, `{"a": 2, "b": 2}`, `{"a": 1, "b": 1}`), + stream.IndexRanges{ + { + Max: newVB(document.NewIntegerValue(2), document.NewIntegerValue(2)), + }, + }, + true, false, + }, + { + "reverse/min", "a", testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), testutil.MakeDocuments(t, `{"a": 2}`, `{"a": 1}`), - stream.Ranges{ - {Min: document.NewIntegerValue(1)}, + stream.IndexRanges{ + {Min: newVB(document.NewIntegerValue(1))}, }, true, false, }, { - "reverse/min/max", + "reverse/min neg", "a", + testutil.MakeDocuments(t, `{"a": 1}`, `{"a": -2}`), + testutil.MakeDocuments(t, `{"a": 1}`), + stream.IndexRanges{ + {Min: newVB(document.NewIntegerValue(1))}, + }, + true, false, + }, + { + "reverse/min", "a, b", + testutil.MakeDocuments(t, `{"a": 1, "b": 1}`, `{"a": 2, "b": 2}`), + testutil.MakeDocuments(t, `{"a": 2, "b": 2}`, `{"a": 1, "b": 1}`), + stream.IndexRanges{ + { + Min: newVB(document.NewIntegerValue(1), document.NewIntegerValue(1)), + }, + }, + true, false, + }, + { + "reverse/min/max", "a", testutil.MakeDocuments(t, `{"a": 1}`, `{"a": 2}`), testutil.MakeDocuments(t, `{"a": 2}`, `{"a": 1}`), - stream.Ranges{ - {Min: document.NewIntegerValue(1), Max: document.NewIntegerValue(2)}, + stream.IndexRanges{ + { + Min: newVB(document.NewIntegerValue(1)), + Max: newVB(document.NewIntegerValue(2)), + }, + }, + true, false, + }, + { + "reverse/min/max", "a, b", + testutil.MakeDocuments(t, `{"a": 1, "b": 1}`, `{"a": 2, "b": 2}`), + testutil.MakeDocuments(t, `{"a": 2, "b": 2}`, `{"a": 1, "b": 1}`), + stream.IndexRanges{ + { + Min: newVB(document.NewIntegerValue(1), document.NewIntegerValue(1)), + Max: newVB(document.NewIntegerValue(2), document.NewIntegerValue(2)), + }, + }, + true, false, + }, + { + "max:[1]", "a, b", + testutil.MakeDocuments(t, `{"a": 1, "b": 1}`, `{"a": 2, "b": 2}`, `{"a": 1, "b": 9223372036854775807}`), + testutil.MakeDocuments(t, `{"a": 1, "b": 1}`, `{"a": 1, "b": 9223372036854775807}`), + stream.IndexRanges{ + { + IndexArity: 2, + Max: newVB(document.NewIntegerValue(1)), + }, + }, + false, false, + }, + { + "reverse max:[1]", "a, b", + testutil.MakeDocuments(t, `{"a": 1, "b": 1}`, `{"a": 2, "b": 2}`, `{"a": 1, "b": 9223372036854775807}`), + testutil.MakeDocuments(t, `{"a": 1, "b": 9223372036854775807}`, `{"a": 1, "b": 1}`), + stream.IndexRanges{ + { + Max: newVB(document.NewIntegerValue(1)), + Exclusive: false, + Exact: false, + IndexArity: 2, + }, + }, + true, false, + }, + { + "max:[1, 2]", "a, b, c", + testutil.MakeDocuments(t, `{"a": 1, "b": 2, "c": 1}`, `{"a": 2, "b": 2, "c": 2}`, `{"a": 1, "b": 2, "c": 9223372036854775807}`), + testutil.MakeDocuments(t, `{"a": 1, "b": 2, "c": 1}`, `{"a": 1, "b": 2, "c": 9223372036854775807}`), + stream.IndexRanges{ + {IndexArity: 3, Max: newVB(document.NewIntegerValue(1), document.NewIntegerValue(2))}, + }, + false, false, + }, + { + "min:[1]", "a, b", + testutil.MakeDocuments(t, `{"a": 1, "b": -2}`, `{"a": -2, "b": 2}`, `{"a": 1, "b": 1}`), + testutil.MakeDocuments(t, `{"a": 1, "b": -2}`, `{"a": 1, "b": 1}`), + stream.IndexRanges{ + {Min: newVB(document.NewIntegerValue(1))}, + }, + false, false, + }, + { + "min:[1]", "a, b, c", + testutil.MakeDocuments(t, `{"a": 1, "b": -2, "c": 0}`, `{"a": -2, "b": 2, "c": 1}`, `{"a": 1, "b": 1, "c": 2}`), + testutil.MakeDocuments(t, `{"a": 1, "b": -2, "c": 0}`, `{"a": 1, "b": 1, "c": 2}`), + stream.IndexRanges{ + {Min: newVB(document.NewIntegerValue(1))}, + }, + false, false, + }, + { + "reverse min:[1]", "a, b", + testutil.MakeDocuments(t, `{"a": 1, "b": -2}`, `{"a": -2, "b": 2}`, `{"a": 1, "b": 1}`), + testutil.MakeDocuments(t, `{"a": 1, "b": 1}`, `{"a": 1, "b": -2}`), + stream.IndexRanges{ + {Min: newVB(document.NewIntegerValue(1))}, + }, + true, false, + }, + { + "min:[1], max[2]", "a, b", + testutil.MakeDocuments(t, `{"a": 1, "b": -2}`, `{"a": -2, "b": 2}`, `{"a": 2, "b": 42}`, `{"a": 3, "b": -1}`), + testutil.MakeDocuments(t, `{"a": 1, "b": -2}`, `{"a": 2, "b": 42}`), + stream.IndexRanges{ + { + IndexArity: 2, + Min: newVB(document.NewIntegerValue(1)), + Max: newVB(document.NewIntegerValue(2)), + }, + }, + false, false, + }, + { + "reverse min:[1], max[2]", "a, b", + testutil.MakeDocuments(t, `{"a": 1, "b": -2}`, `{"a": -2, "b": 2}`, `{"a": 2, "b": 42}`, `{"a": 3, "b": -1}`), + testutil.MakeDocuments(t, `{"a": 2, "b": 42}`, `{"a": 1, "b": -2}`), + stream.IndexRanges{ + { + IndexArity: 2, + Min: newVB(document.NewIntegerValue(1)), + Max: newVB(document.NewIntegerValue(2)), + }, }, true, false, }, } for _, test := range tests { - t.Run(test.name, func(t *testing.T) { + t.Run(test.name+"index on "+test.indexOn, func(t *testing.T) { db, err := genji.Open(":memory:") require.NoError(t, err) defer db.Close() - err = db.Exec("CREATE TABLE test (a INTEGER); CREATE INDEX idx_test_a ON test(a)") + err = db.Exec("CREATE TABLE test (a INTEGER, b INTEGER, c INTEGER); CREATE INDEX idx_test_a ON test(" + test.indexOn + ")") require.NoError(t, err) for _, doc := range test.docsInTable { @@ -411,15 +615,32 @@ func TestIndexScan(t *testing.T) { } t.Run("String", func(t *testing.T) { - require.Equal(t, `indexScan("idx_test_a", [1, 2])`, stream.IndexScan("idx_test_a", stream.Range{ - Min: document.NewIntegerValue(1), Max: document.NewIntegerValue(2), - }).String()) + t.Run("idx_test_a", func(t *testing.T) { + require.Equal(t, `indexScan("idx_test_a", [1, 2])`, stream.IndexScan("idx_test_a", stream.IndexRange{ + Min: newVB(document.NewIntegerValue(1)), Max: newVB(document.NewIntegerValue(2)), + }).String()) - op := stream.IndexScan("idx_test_a", stream.Range{ - Min: document.NewIntegerValue(1), Max: document.NewIntegerValue(2), + op := stream.IndexScan("idx_test_a", stream.IndexRange{ + Min: newVB(document.NewIntegerValue(1)), Max: newVB(document.NewIntegerValue(2)), + }) + op.Reverse = true + + require.Equal(t, `indexScanReverse("idx_test_a", [1, 2])`, op.String()) }) - op.Reverse = true - require.Equal(t, `indexScanReverse("idx_test_a", [1, 2])`, op.String()) + t.Run("idx_test_a_b", func(t *testing.T) { + require.Equal(t, `indexScan("idx_test_a_b", [[1, 1], [2, 2]])`, stream.IndexScan("idx_test_a_b", stream.IndexRange{ + Min: newVB(document.NewIntegerValue(1), document.NewIntegerValue(1)), + Max: newVB(document.NewIntegerValue(2), document.NewIntegerValue(2)), + }).String()) + + op := stream.IndexScan("idx_test_a_b", stream.IndexRange{ + Min: newVB(document.NewIntegerValue(1), document.NewIntegerValue(1)), + Max: newVB(document.NewIntegerValue(2), document.NewIntegerValue(2)), + }) + op.Reverse = true + + require.Equal(t, `indexScanReverse("idx_test_a_b", [[1, 1], [2, 2]])`, op.String()) + }) }) } diff --git a/stream/range.go b/stream/range.go new file mode 100644 index 000000000..479117321 --- /dev/null +++ b/stream/range.go @@ -0,0 +1,501 @@ +package stream + +import ( + "bytes" + "strings" + + "github.com/genjidb/genji/document" + "github.com/genjidb/genji/expr" + "github.com/genjidb/genji/stringutil" +) + +type Costable interface { + Cost() int +} + +type ValueRange struct { + Min, Max document.Value + // Exclude Min and Max from the results. + // By default, min and max are inclusive. + // Exclusive and Exact cannot be set to true at the same time. + Exclusive bool + // Used to match an exact value equal to Min. + // If set to true, Max will be ignored for comparison + // and for determining the global upper bound. + Exact bool + + encodedMin, encodedMax []byte + rangeType document.ValueType +} + +func (r *ValueRange) encode(encoder ValueEncoder, env *expr.Environment) error { + var err error + + // first we evaluate Min and Max + if !r.Min.Type.IsAny() { + r.encodedMin, err = encoder.EncodeValue(r.Min) + if err != nil { + return err + } + r.rangeType = r.Min.Type + } + if !r.Max.Type.IsAny() { + r.encodedMax, err = encoder.EncodeValue(r.Max) + if err != nil { + return err + } + if !r.rangeType.IsAny() && r.rangeType != r.Max.Type { + panic("range contain values of different types") + } + + r.rangeType = r.Max.Type + } + + // ensure boundaries are typed + if r.Min.Type.IsAny() { + r.Min.Type = r.rangeType + } + if r.Max.Type.IsAny() { + r.Max.Type = r.rangeType + } + + if r.Exclusive && r.Exact { + panic("exclusive and exact cannot both be true") + } + + return nil +} + +func (r *ValueRange) String() string { + if r.Exact { + return stringutil.Sprintf("%v", r.Min) + } + + if r.Min.Type.IsAny() { + r.Min = document.NewIntegerValue(-1) + } + if r.Max.Type.IsAny() { + r.Max = document.NewIntegerValue(-1) + } + + if r.Exclusive { + return stringutil.Sprintf("[%v, %v, true]", r.Min, r.Max) + } + + return stringutil.Sprintf("[%v, %v]", r.Min, r.Max) +} + +func (r *ValueRange) IsEqual(other *ValueRange) bool { + if r.Exact != other.Exact { + return false + } + + if r.rangeType != other.rangeType { + return false + } + + if r.Exclusive != other.Exclusive { + return false + } + + if r.Min.Type != other.Min.Type { + return false + } + ok, err := r.Min.IsEqual(other.Min) + if err != nil || !ok { + return false + } + + if r.Max.Type != other.Max.Type { + return false + } + ok, err = r.Max.IsEqual(other.Max) + if err != nil || !ok { + return false + } + + return true +} + +type ValueRanges []ValueRange + +// Append rng to r and return the new slice. +// Duplicate ranges are ignored. +func (r ValueRanges) Append(rng ValueRange) ValueRanges { + // ensure we don't keep duplicate ranges + isDuplicate := false + for _, e := range r { + if e.IsEqual(&rng) { + isDuplicate = true + break + } + } + + if isDuplicate { + return r + } + + return append(r, rng) +} + +type ValueEncoder interface { + EncodeValue(v document.Value) ([]byte, error) +} + +// Encode each range using the given value encoder. +func (r ValueRanges) Encode(encoder ValueEncoder, env *expr.Environment) error { + for i := range r { + err := r[i].encode(encoder, env) + if err != nil { + return err + } + } + + return nil +} + +func (r ValueRanges) String() string { + var sb strings.Builder + + for i, rr := range r { + if i > 0 { + sb.WriteString(", ") + } + + sb.WriteString(rr.String()) + } + + return sb.String() +} + +// Cost is a best effort function to determine the cost of +// a range lookup. +func (r ValueRanges) Cost() int { + var cost int + + for _, rng := range r { + // if we are looking for an exact value + // increment by 1 + if rng.Exact { + cost++ + continue + } + + // if there are two boundaries, increment by 50 + if !rng.Min.Type.IsAny() && !rng.Max.Type.IsAny() { + cost += 50 + } + + // if there is only one boundary, increment by 100 + if (!rng.Min.Type.IsAny() && rng.Max.Type.IsAny()) || (rng.Min.Type.IsAny() && !rng.Max.Type.IsAny()) { + cost += 100 + continue + } + + // if there are no boundaries, increment by 200 + cost += 200 + } + + return cost +} + +func (r *ValueRange) IsInRange(value []byte) bool { + // by default, we consider the value within range + cmpMin, cmpMax := 1, -1 + + // we compare with the lower bound and see if it matches + if r.encodedMin != nil { + cmpMin = bytes.Compare(value, r.encodedMin) + } + + // if exact is true the value has to be equal to the lower bound. + if r.Exact { + return cmpMin == 0 + } + + // if exclusive and the value is equal to the lower bound + // we can ignore it + if r.Exclusive && cmpMin == 0 { + return false + } + + // the value is bigger than the lower bound, + // see if it matches the upper bound. + if r.encodedMax != nil { + cmpMax = bytes.Compare(value, r.encodedMax) + } + + // if boundaries are strict, ignore values equal to the max + if r.Exclusive && cmpMax == 0 { + return false + } + + return cmpMax <= 0 +} + +// IndexRange represents a range to select indexed values after or before +// a given boundary. Because indexes can be composites, IndexRange boundaries +// are composite as well. +type IndexRange struct { + Min, Max *document.ValueBuffer + + // Exclude Min and Max from the results. + // By default, min and max are inclusive. + // Exclusive and Exact cannot be set to true at the same time. + Exclusive bool + // Used to match an exact value equal to Min. + // If set to true, Max will be ignored for comparison + // and for determining the global upper bound. + Exact bool + + // IndexArity is the underlying index arity, which can be greater + // than the boundaries of this range. + IndexArity int + + encodedMin, encodedMax []byte + rangeTypes []document.ValueType +} + +func (r *IndexRange) encode(encoder ValueBufferEncoder, env *expr.Environment) error { + var err error + + // first we evaluate Min and Max + if r.Min.Len() > 0 { + r.encodedMin, err = encoder.EncodeValueBuffer(r.Min) + if err != nil { + return err + } + r.rangeTypes = r.Min.Types() + } + + if r.Max.Len() > 0 { + r.encodedMax, err = encoder.EncodeValueBuffer(r.Max) + if err != nil { + return err + } + + if len(r.rangeTypes) > 0 { + maxTypes := r.Max.Types() + + if len(maxTypes) != len(r.rangeTypes) { + panic("range types for max and min differ in size") + } + + for i, typ := range maxTypes { + if typ != r.rangeTypes[i] { + panic("range contain values of different types") + } + } + } + + r.rangeTypes = r.Max.Types() + } + + // Ensure boundaries are typed, at least with the first type + if r.Max.Len() == 0 && r.Min.Len() > 0 { + v, err := r.Min.GetByIndex(0) + if err != nil { + return err + } + + r.Max = document.NewValueBuffer(document.Value{Type: v.Type}) + } + + if r.Min.Len() == 0 && r.Max.Len() > 0 { + v, err := r.Max.GetByIndex(0) + if err != nil { + return err + } + + r.Min = document.NewValueBuffer(document.Value{Type: v.Type}) + } + + if r.Exclusive && r.Exact { + panic("exclusive and exact cannot both be true") + } + + return nil +} + +func (r *IndexRange) String() string { + format := func(vb *document.ValueBuffer) string { + switch vb.Len() { + case 0: + return "-1" + case 1: + return vb.Values[0].String() + default: + b, err := vb.MarshalJSON() + if err != nil { + return "err" + } + + return string(b) + } + } + + if r.Exact { + return stringutil.Sprintf("%v", format(r.Min)) + } + + if r.Exclusive { + return stringutil.Sprintf("[%v, %v, true]", format(r.Min), format(r.Max)) + } + + return stringutil.Sprintf("[%v, %v]", format(r.Min), format(r.Max)) +} + +func (r *IndexRange) IsEqual(other *IndexRange) bool { + if r.Exact != other.Exact { + return false + } + + for i, typ := range r.rangeTypes { + if typ != other.rangeTypes[i] { + return false + } + } + + if r.Exclusive != other.Exclusive { + return false + } + + if r.Min.Len() != other.Min.Len() { + return false + } + + if r.Max.Len() != other.Max.Len() { + return false + } + + if !r.Min.IsEqual(other.Min) { + return false + } + + if !r.Max.IsEqual(other.Max) { + return false + } + + return true +} + +type IndexRanges []IndexRange + +// Append rng to r and return the new slice. +// Duplicate ranges are ignored. +func (r IndexRanges) Append(rng IndexRange) IndexRanges { + // ensure we don't keep duplicate ranges + isDuplicate := false + for _, e := range r { + if e.IsEqual(&rng) { + isDuplicate = true + break + } + } + + if isDuplicate { + return r + } + + return append(r, rng) +} + +type ValueBufferEncoder interface { + EncodeValueBuffer(vb *document.ValueBuffer) ([]byte, error) +} + +// Encode each range using the given value encoder. +func (r IndexRanges) EncodeBuffer(encoder ValueBufferEncoder, env *expr.Environment) error { + for i := range r { + err := r[i].encode(encoder, env) + if err != nil { + return err + } + } + + return nil +} + +func (r IndexRanges) String() string { + var sb strings.Builder + + for i, rr := range r { + if i > 0 { + sb.WriteString(", ") + } + + sb.WriteString(rr.String()) + } + + return sb.String() +} + +// Cost is a best effort function to determine the cost of +// a range lookup. +func (r IndexRanges) Cost() int { + var cost int + + for _, rng := range r { + // if we are looking for an exact value + // increment by 1 + if rng.Exact { + cost++ + continue + } + + // if there are two boundaries, increment by 50 + if rng.Min.Len() > 0 && rng.Max.Len() > 0 { + cost += 50 + continue + } + + // if there is only one boundary, increment by 100 + if rng.Min.Len() > 0 || rng.Max.Len() > 0 { + cost += 100 + continue + } + + // if there are no boundaries, increment by 200 + cost += 200 + } + + return cost +} + +func (r *IndexRange) IsInRange(value []byte) bool { + // by default, we consider the value within range + cmpMin, cmpMax := 1, -1 + + // we compare with the lower bound and see if it matches + if r.encodedMin != nil { + cmpMin = bytes.Compare(value, r.encodedMin) + } + + // if exact is true the value has to be equal to the lower bound. + if r.Exact { + return cmpMin == 0 + } + + // if exclusive and the value is equal to the lower bound + // we can ignore it + if r.Exclusive && cmpMin == 0 { + return false + } + + // the value is bigger than the lower bound, + // see if it matches the upper bound. + if r.encodedMax != nil { + if r.Max.Len() < r.IndexArity { + cmpMax = bytes.Compare(value[:len(r.encodedMax)], r.encodedMax) + } else { + cmpMax = bytes.Compare(value, r.encodedMax) + } + } + + // if boundaries are strict, ignore values equal to the max + if r.Exclusive && cmpMax == 0 { + return false + } + + return cmpMin >= 0 && cmpMax <= 0 +} diff --git a/testutil/document.go b/testutil/document.go index 0c6bbb6c2..d2af6ed9e 100644 --- a/testutil/document.go +++ b/testutil/document.go @@ -11,12 +11,25 @@ import ( ) // MakeValue turns v into a document.Value. -func MakeValue(t testing.TB, v interface{}) *document.Value { +func MakeValue(t testing.TB, v interface{}) document.Value { t.Helper() vv, err := document.NewValue(v) require.NoError(t, err) - return &vv + return vv +} + +func MakeArrayValue(t testing.TB, vs ...interface{}) document.Value { + t.Helper() + + vvs := []document.Value{} + for _, v := range vs { + vvs = append(vvs, MakeValue(t, v)) + } + + vb := document.NewValueBuffer(vvs...) + + return document.NewArrayValue(vb) } // MakeDocument creates a document from a json string. @@ -51,6 +64,17 @@ func MakeArray(t testing.TB, jsonArray string) document.Array { return &vb } +func MakeValueBuffer(t testing.TB, jsonArray string) *document.ValueBuffer { + t.Helper() + + var vb document.ValueBuffer + + err := vb.UnmarshalJSON([]byte(jsonArray)) + require.NoError(t, err) + + return &vb +} + type Docs []document.Document func (docs Docs) RequireEqual(t testing.TB, others Docs) { diff --git a/testutil/index.go b/testutil/index.go index 6bd251e93..855789796 100644 --- a/testutil/index.go +++ b/testutil/index.go @@ -16,7 +16,7 @@ func GetIndexContent(t testing.TB, tx *database.Transaction, indexName string) [ require.NoError(t, err) var content []KV - err = idx.AscendGreaterOrEqual(document.Value{}, func(val, key []byte) error { + err = idx.AscendGreaterOrEqual([]document.Value{{}}, func(val, key []byte) error { content = append(content, KV{ Key: append([]byte{}, val...), Value: append([]byte{}, key...),