From 58baee921b3709da6cdda596be33270494454a29 Mon Sep 17 00:00:00 2001 From: mirito333 Date: Wed, 11 Sep 2024 11:56:34 +0900 Subject: [PATCH 1/7] feat: add usearch --- Makefile | 20 ++ go.mod | 1 + go.sum | 2 + internal/core/algorithm/usearch/option.go | 164 ++++++++++ internal/core/algorithm/usearch/usearch.go | 249 +++++++++++++++ .../core/algorithm/usearch/usearch_test.go | 295 ++++++++++++++++++ internal/errors/usearch.go | 32 ++ versions/USEARCH_VERSION | 1 + 8 files changed, 764 insertions(+) create mode 100644 internal/core/algorithm/usearch/option.go create mode 100644 internal/core/algorithm/usearch/usearch.go create mode 100644 internal/core/algorithm/usearch/usearch_test.go create mode 100644 internal/errors/usearch.go create mode 100644 versions/USEARCH_VERSION diff --git a/Makefile b/Makefile index 42b9302bfc..3207c28e7f 100644 --- a/Makefile +++ b/Makefile @@ -85,6 +85,7 @@ BUF_VERSION := $(eval BUF_VERSION := $(shell cat versions/BUF_VERS CMAKE_VERSION := $(eval CMAKE_VERSION := $(shell cat versions/CMAKE_VERSION))$(CMAKE_VERSION) DOCKER_VERSION := $(eval DOCKER_VERSION := $(shell cat versions/DOCKER_VERSION))$(DOCKER_VERSION) FAISS_VERSION := $(eval FAISS_VERSION := $(shell cat versions/FAISS_VERSION))$(FAISS_VERSION) +USEARCH_VERSION := $(eval USEARCH_VERSION := $(shell cat versions/USEARCH_VERSION))$(USEARCH_VERSION) GOLANGCILINT_VERSION := $(eval GOLANGCILINT_VERSION := $(shell cat versions/GOLANGCILINT_VERSION))$(GOLANGCILINT_VERSION) GO_VERSION := $(eval GO_VERSION := $(shell cat versions/GO_VERSION))$(GO_VERSION) HDF5_VERSION := $(eval HDF5_VERSION := $(shell cat versions/HDF5_VERSION))$(HDF5_VERSION) @@ -603,6 +604,11 @@ version/ngt: version/faiss: @echo $(FAISS_VERSION) +.PHONY: version/usearch +## print usearch version +version/usearch: + @echo $(USEARCH_VERSION) + .PHONY: version/docker ## print Kubernetes version version/docker: @@ -677,6 +683,20 @@ $(LIB_PATH)/libfaiss.a: rm -rf $(TEMP_DIR)/v$(FAISS_VERSION).tar.gz $(TEMP_DIR)/faiss-$(FAISS_VERSION) ldconfig +.PHONY: usearch/install +## install usearch +usearch/install: +ifeq ($(OS),linux) + curl -sSL https://github.com/unum-cloud/usearch/releases/download/v$(USEARCH_VERSION)/usearch_$(OS)_$(GOARCH)_$(USEARCH_VERSION).deb -o usearch_$(OS)_$(USEARCH_VERSION).deb + dpkg -i usearch_$(OS)_$(USEARCH_VERSION).deb + rm usearch_$(OS)_$(USEARCH_VERSION).deb +else ifeq ($(OS),macos) + curl -sSL https://github.com/unum-cloud/usearch/releases/download/v$(USEARCH_VERSION)/usearch_macos_$(GOARCH)_$(USEARCH_VERSION).zip -o usearch_macos_$(OS)_$(USEARCH_VERSION).zip + unzip usearch_macos_$(OS)_$(USEARCH_VERSION).zip + sudo mv libusearch_c.dylib /usr/local/lib && sudo mv usearch.h /usr/local/include + rm -rf usearch_macos_$(OS)_$(USEARCH_VERSION).zip +endif + .PHONY: cmake/install ## install CMAKE cmake/install: diff --git a/go.mod b/go.mod index 3e63824046..fea8003d5d 100644 --- a/go.mod +++ b/go.mod @@ -380,6 +380,7 @@ require ( github.com/quasilyte/go-ruleguard/dsl v0.3.22 github.com/scylladb/gocqlx v0.0.0-00010101000000-000000000000 github.com/stretchr/testify v1.9.0 + github.com/unum-cloud/usearch/golang v0.0.0-20240828190432-b9a9758a06e1 github.com/zeebo/xxh3 v1.0.2 go.etcd.io/bbolt v1.3.8 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0 diff --git a/go.sum b/go.sum index 309a52862a..3295469f7c 100644 --- a/go.sum +++ b/go.sum @@ -635,6 +635,8 @@ github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vl github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= +github.com/unum-cloud/usearch/golang v0.0.0-20240828190432-b9a9758a06e1 h1:hILse+Dt0Sk6RfyG19Ld48kcdTOnHx2F6dm3QH1X4Mw= +github.com/unum-cloud/usearch/golang v0.0.0-20240828190432-b9a9758a06e1/go.mod h1:NxBpQibuBBeA/V8RGbrNzVAv4OyWWL5yNao7mVz656k= github.com/urfave/cli/v2 v2.4.0/go.mod h1:NX9W0zmTvedE5oDoOMs2RTC8RvdK98NTYZE5LbaEYPg= github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ= github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0= diff --git a/internal/core/algorithm/usearch/option.go b/internal/core/algorithm/usearch/option.go new file mode 100644 index 0000000000..ffaffedfcb --- /dev/null +++ b/internal/core/algorithm/usearch/option.go @@ -0,0 +1,164 @@ +// +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Package usearch provides implementation of Go API for https://github.com/unum-cloud/usearch +package usearch + +import ( + "strconv" + "strings" + + "github.com/kpango/fastime" + core "github.com/unum-cloud/usearch/golang" + "github.com/vdaas/vald/internal/core/algorithm" + "github.com/vdaas/vald/internal/errors" +) + +// Option represents the functional option for usearch. +type Option func(*usearch) error + +var defaultOptions = []Option{ + WithIndexPath("/tmp/usearch-" + strconv.FormatInt(fastime.UnixNanoNow(), 10)), + WithQuantizationType("F32"), + WithMetricType("cosine"), + WithDimension(64), + WithConnectivity(0), + WithExpansionAdd(0), + WithExpansionSearch(0), + WithMulti(false), +} + +// WithIndexPath represents the option to set the index path for usearch. +func WithIndexPath(path string) Option { + return func(u *usearch) error { + if len(path) == 0 { + return errors.NewErrIgnoredOption("indexPath") + } + u.idxPath = path + return nil + } +} + +// WithQuantizationType represents the option to set the quantizationType for usearch. +func WithQuantizationType(quantizationType string) Option { + return func(u *usearch) error { + switch quantizationType{ + case "BF16": + u.quantizationType = core.BF16 + case "F16": + u.quantizationType = core.F16 + case "F32": + u.quantizationType = core.F32 + case "F64": + u.quantizationType = core.F64 + case "I8": + u.quantizationType = core.I8 + case "B1": + u.quantizationType = core.B1 + default: + err := errors.NewUsearchError("unsupported QuantizationType") + return errors.NewErrCriticalOption("QuantizationType", quantizationType, err) + } + return nil + } +} + +// WithMetricType represents the option to set the metricType for usearch. +func WithMetricType(metricType string) Option { + return func(u *usearch) error { + switch strings.NewReplacer("-", "", "_", "", " ", "").Replace(strings.ToLower(metricType)) { + case "l2sq": + u.metricType = core.L2sq + case "ip": + u.metricType = core.InnerProduct + case "cosine": + u.metricType = core.Cosine + case "haversine": + u.metricType = core.Haversine + case "divergence": + u.metricType = core.Divergence + case "pearson": + u.metricType = core.Pearson + case "hamming": + u.metricType = core.Hamming + case "tanimoto": + u.metricType = core.Tanimoto + case "sorensen": + u.metricType = core.Sorensen + default: + err := errors.NewUsearchError("unsupported MetricType") + return errors.NewErrCriticalOption("MetricType", metricType, err) + } + return nil + } +} + +// WithDimension represents the option to set the dimension for usearch. +func WithDimension(dim int) Option { + return func(u *usearch) error { + if dim > algorithm.MaximumVectorDimensionSize || dim < algorithm.MinimumVectorDimensionSize { + err := errors.ErrInvalidDimensionSize(dim, algorithm.MaximumVectorDimensionSize) + return errors.NewErrCriticalOption("dimension", dim, err) + } + + u.dimension = uint(dim) + return nil + } +} + +// WithConnectivity represents the option to set the connectivity for usearch. +func WithConnectivity(connectivity int) Option { + return func(u *usearch) error { + if connectivity < 0 { + return errors.NewErrInvalidOption("Connectivity", connectivity) + } + + u.connectivity = uint(connectivity) + return nil + } +} + +// WithExpansionAdd represents the option to set the expansion add for usearch. +func WithExpansionAdd(expansionAdd int) Option { + return func(u *usearch) error { + if expansionAdd < 0 { + return errors.NewErrInvalidOption("Expansion Add", expansionAdd) + } + + u.expansionAdd = uint(expansionAdd) + return nil + } +} + +// WithExpansionSearch represents the option to set the expansion search for usearch. +func WithExpansionSearch(expansionSearch int) Option { + return func(u *usearch) error { + if expansionSearch < 0 { + return errors.NewErrInvalidOption("Expansion Search", expansionSearch) + } + + u.expansionSearch = uint(expansionSearch) + return nil + } +} + +// WithMulti represents the option to set the multi for usearch. +func WithMulti(multi bool) Option { + return func(u *usearch) error { + u.multi = multi + return nil + } +} diff --git a/internal/core/algorithm/usearch/usearch.go b/internal/core/algorithm/usearch/usearch.go new file mode 100644 index 0000000000..601615127c --- /dev/null +++ b/internal/core/algorithm/usearch/usearch.go @@ -0,0 +1,249 @@ +// +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Package usearch provides Go API implementation for USearch library. https://github.com/unum-cloud/usearch +package usearch + +import ( + "sync" + + core "github.com/unum-cloud/usearch/golang" + "github.com/vdaas/vald/internal/core/algorithm" + "github.com/vdaas/vald/internal/errors" +) + +type ( + // Uuearch is the core interface for interacting with usearch index. + Usearch interface { + // SaveIndex saves the USearch index to storage. + SaveIndex() error + + // SaveIndexWithPath saves the USearch index to the specified path. + SaveIndexWithPath(path string) error + + // GetIndicesSize returns the number of vectors in index. + GetIndicesSize() (indicesSize int, err error) + + // Reserve reserves memory for vectors of given number of arg. + Reserve(vectorCount int) error + + // Add adds vectors to the USearch index and returns the total count. + Add(key uint64, vec []float32) error + + // Search performs a nearest neighbor search and returns the results. + Search(q []float32, k int) ([]algorithm.SearchResult, error) + + // GetObject retruns search result by id as []algorithm.SearchResult. + GetObject(key core.Key, count int) ([]float32, error) + + // Remove removes vectors from the index by key. + Remove(key uint64) error + + // Close frees the resources used by the USearch index. + Close() error + } + + usearch struct { + // index struct + index *core.Index + + // config + quantizationType core.Quantization + metricType core.Metric + dimension uint + connectivity uint + expansionAdd uint + expansionSearch uint + multi bool + + idxPath string + mu *sync.RWMutex + } +) + +// New initializes a new USearch instance with the provided options. +func New(opts ...Option) (Usearch, error) { + return gen(false, opts...) +} + +func Load(opts ...Option) (Usearch, error) { + return gen(true, opts...) +} + +func gen(isLoad bool, opts ...Option) (Usearch, error) { + var ( + u = new(usearch) + err error + ) + u.mu = new(sync.RWMutex) + + for _, opt := range append(defaultOptions, opts...) { + if err = opt(u); err != nil { + return nil, errors.NewUsearchError("usarch option error :" + err.Error()) + } + } + + if isLoad { + conf := core.DefaultConfig(uint(u.dimension)) + u.index, err = core.NewIndex(conf) + if err != nil { + return nil, errors.NewUsearchError("usearch new index error for load index") + } + + err = u.index.Load(u.idxPath) + if err != nil { + return nil, errors.NewUsearchError("usearch load index error") + } + } else { + options := core.DefaultConfig(u.dimension) + options.Quantization = u.quantizationType + options.Metric = u.metricType + options.Dimensions = u.dimension + options.Connectivity = u.connectivity + options.ExpansionAdd = u.expansionAdd + options.ExpansionSearch = u.expansionSearch + options.Multi = u.multi + + u.index, err = core.NewIndex(options) + if err != nil { + return nil, errors.NewUsearchError("usearch create index error") + } + } + + return u, nil +} + +// SaveIndex stores usearch index to storage. +func (u *usearch) SaveIndex() error { + u.mu.Lock() + defer u.mu.Unlock() + + err := u.index.Save(u.idxPath) + if err != nil { + return errors.NewUsearchError("usarch save index error") + } + return nil +} + +// SaveIndexWithPath stores usearch index to specified storage. +func (u *usearch) SaveIndexWithPath(idxPath string) error { + u.mu.Lock() + defer u.mu.Unlock() + + err := u.index.Save(idxPath) + if err != nil { + return errors.NewUsearchError("usarch save index with path error") + } + return nil +} + +// GetIndicesSize returns the number of vectors in index. +func (u *usearch) GetIndicesSize() (indicesSize int, err error) { + u.mu.Lock() + defer u.mu.Unlock() + size, err := u.index.Len() + if err != nil { + return -1, errors.NewUsearchError("failed to usearch_size") + } + return int(size), err +} + +// Add adds vectors to the index +func (u *usearch) Add(key core.Key, vec []float32) error { + if len(vec) != int(u.dimension) { + return errors.New("inconsistent dimensions") + } + + u.mu.Lock() + err := u.index.Add(key, vec) + defer u.mu.Unlock() + if err != nil { + return errors.NewUsearchError("failed to usearch_add") + } + return nil +} + +// Reserve reserves memory for vectors of given number of arg. +func (u *usearch) Reserve(vectorCount int) error { + u.mu.Lock() + err := u.index.Reserve(uint(vectorCount)) + defer u.mu.Unlock() + if err != nil { + return errors.NewUsearchError("failed to usearch_reserve") + } + return nil +} + +// Search returns search result as []algorithm.SearchResult. +func (u *usearch) Search(q []float32, k int) ([]algorithm.SearchResult, error) { + if len(q) != int(u.dimension) { + return nil, errors.ErrIncompatibleDimensionSize(len(q), int(u.dimension)) + } + u.mu.Lock() + I, D, err := u.index.Search(q, uint(k)) + u.mu.Unlock() + if err != nil { + return nil, errors.NewUsearchError("failed to usearch_search") + } + + if len(I) == 0 || len(D) == 0 { + return nil, errors.ErrEmptySearchResult + } + + result := make([]algorithm.SearchResult, min(len(I), k)) + for i := range result { + result[i] = algorithm.SearchResult{ID: uint32(I[i]), Distance: D[i], Error: nil} + } + return result, nil +} + +// GetObject retruns search result by id as []algorithm.SearchResult. +func (u *usearch) GetObject(key core.Key, count int) ([]float32, error) { + u.mu.RLock() + vectors, err := u.index.Get(key, uint(count)) + u.mu.RUnlock() + if err != nil { + return nil, errors.NewUsearchError("failed to usearch_get") + } + // ASK: 何か適切なerrorがある? + if vectors == nil { + return nil, nil + } + + return vectors, nil +} + +// Remove removes from usearch index. +func (u *usearch) Remove(key core.Key) error { + u.mu.Lock() + err := u.index.Remove(key) + defer u.mu.Unlock() + if err != nil { + return errors.NewUsearchError("failed to usearch_remove") + } + + return nil +} + +// Close frees the resources associated with the USearch index. +func (u *usearch) Close() error { + err := u.index.Destroy() + if err != nil { + return errors.NewUsearchError("failed to usearch_free") + } + u.index = nil + return nil +} diff --git a/internal/core/algorithm/usearch/usearch_test.go b/internal/core/algorithm/usearch/usearch_test.go new file mode 100644 index 0000000000..378044034e --- /dev/null +++ b/internal/core/algorithm/usearch/usearch_test.go @@ -0,0 +1,295 @@ +// +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Package usearch provides Go API implementation for USearch library. https://github.com/unum-cloud/usearch +package usearch + +import ( + "os" + "path/filepath" + "testing" + + "github.com/vdaas/vald/internal/core/algorithm" + "github.com/vdaas/vald/internal/errors" + "github.com/vdaas/vald/internal/log" + "github.com/vdaas/vald/internal/log/logger" + "github.com/vdaas/vald/internal/test/comparator" + "github.com/vdaas/vald/internal/test/goleak" +) + +var ( + usearchComparator = []comparator.Option{ + comparator.AllowUnexported(usearch{}), + comparator.RWMutexComparer, + comparator.ErrorComparer, + comparator.AtomicUint64Comparator, + } + + searchResultComparator = []comparator.Option{ + comparator.CompareField("Distance", comparator.Comparer(func(s1, s2 float32) bool { + if s1 == 0 { // if vec1 is same as vec2, the distance should be same + return s2 == 0 + } + // by setting non-zero value in test case, it will only check if both got/want is non-zero + return s1 != 0 && s2 != 0 + })), + } + + defaultAfterFunc = func(t *testing.T, u Usearch) error { + t.Helper() + + if u == nil { + return nil + } + + u.Close() + return nil + } +) + +func idxTempDir(t *testing.T) string { + t.Helper() + return filepath.Join(t.TempDir(), "index") +} + +func TestMain(m *testing.M) { + log.Init(log.WithLoggerType(logger.NOP.String())) + os.Exit(m.Run()) +} + +func Test_usearch_Search(t *testing.T) { + type args struct { + q []float32 + k int + } + type fields struct { + idxPath string + quantizationType string + metricType string + dimension int + connectivity int + expansionAdd int + expansionSearch int + multi bool + } + type want struct { + want []algorithm.SearchResult + err error + } + type test struct { + name string + args args + fields fields + createFunc func(t *testing.T, fields fields) (Usearch, error) + want want + checkFunc func(want, []algorithm.SearchResult, Usearch, error) error + beforeFunc func(args) + afterFunc func(*testing.T, Usearch) error + } + defaultCreateFunc := func(t *testing.T, fields fields) (Usearch, error) { + t.Helper() + + return New( + WithIndexPath(fields.idxPath), + WithQuantizationType(fields.quantizationType), + WithMetricType(fields.metricType), + WithDimension(fields.dimension), + WithConnectivity(fields.connectivity), + WithExpansionAdd(fields.expansionAdd), + WithExpansionSearch(fields.expansionSearch), + WithMulti(fields.multi), + ) + } + defaultCheckFunc := func(w want, got []algorithm.SearchResult, n Usearch, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) + } + if diff := comparator.Diff(got, w.want, searchResultComparator...); diff != "" { + return errors.Errorf("diff: %s", diff) + } + + return nil + } + insertCreateFunc := func(t *testing.T, fields fields, vecs [][]float32, poolSize uint32) (Usearch, error) { // create func with insert/index + t.Helper() + + u, err := defaultCreateFunc(t, fields) + if err != nil { + return nil, err + } + + err = u.Reserve(int(poolSize)) + if err != nil { + return nil, err + } + + for i := range poolSize { + if err := u.Add(uint64(i+1), vecs[i]); err != nil { + t.Error(err) + return nil, err + } + } + + return u, nil + } + tests := []test{ + // object type uint8 + { + name: "return vector id after the same vector inserted", + args: args{ + q: []float32{0, 1, 2, 3, 4, 5, 6, 7, 8}, + k: 5, + }, + fields: fields{ + idxPath: idxTempDir(t), + quantizationType: "F32", + metricType: "cosine", + dimension: 9, + connectivity: 0, + expansionAdd: 0, + expansionSearch: 0, + multi: false, + }, + createFunc: func(t *testing.T, fields fields) (Usearch, error) { + t.Helper() + vec := []float32{0, 1, 2, 3, 4, 5, 6, 7, 8} + + return insertCreateFunc(t, fields, [][]float32{vec}, 1) + }, + want: want{ + want: []algorithm.SearchResult{ + {ID: uint32(1), Distance: 0}, + }, + }, + }, + { + name: "resturn vector id after the nearby vector inserted", + args: args{ + q: []float32{1, 2, 3, 4, 5, 6, 7, 8, 9}, + k: 5, + }, + fields: fields{ + idxPath: idxTempDir(t), + quantizationType: "F32", + metricType: "cosine", + dimension: 9, + connectivity: 0, + expansionAdd: 0, + expansionSearch: 0, + multi: false, + }, + createFunc: func(t *testing.T, fields fields) (Usearch, error) { + t.Helper() + iv := []float32{0, 1, 2, 3, 4, 5, 6, 7, 8} + + return insertCreateFunc(t, fields, [][]float32{iv}, 1) + }, + want: want{ + want: []algorithm.SearchResult{ + {ID: uint32(1), Distance: 1}, + }, + }, + }, + { + name: "return nothing if the search dimension is less than the inserted vector", + args: args{ + q: []float32{0, 1, 2, 3, 4, 5, 6, 7}, + k: 5, + }, + fields: fields{ + idxPath: idxTempDir(t), + quantizationType: "F32", + metricType: "cosine", + dimension: 9, + connectivity: 0, + expansionAdd: 0, + expansionSearch: 0, + multi: false, + }, + createFunc: func(t *testing.T, fields fields) (Usearch, error) { + t.Helper() + vec := []float32{0, 1, 2, 3, 4, 5, 6, 7, 8} + + return insertCreateFunc(t, fields, [][]float32{vec}, 1) + }, + want: want{ + err: errors.New("incompatible dimension size detected\trequested: 8,\tconfigured: 9"), + }, + }, + { + name: "return nothing if the search dimension is more than the inserted vector", + args: args{ + q: []float32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + k: 5, + }, + fields: fields{ + idxPath: idxTempDir(t), + quantizationType: "F32", + metricType: "cosine", + dimension: 9, + connectivity: 0, + expansionAdd: 0, + expansionSearch: 0, + multi: false, + }, + createFunc: func(t *testing.T, fields fields) (Usearch, error) { + t.Helper() + vec := []float32{0, 1, 2, 3, 4, 5, 6, 7, 8} + + return insertCreateFunc(t, fields, [][]float32{vec}, 1) + }, + want: want{ + err: errors.New("incompatible dimension size detected\trequested: 10,\tconfigured: 9"), + }, + }, + } + + for _, tc := range tests { + test := tc + t.Run(test.name, func(tt *testing.T) { + tt.Parallel() + + defer goleak.VerifyNone(tt, goleak.IgnoreCurrent()) + if test.beforeFunc != nil { + test.beforeFunc(test.args) + } + if test.afterFunc == nil { + test.afterFunc = defaultAfterFunc + } + checkFunc := test.checkFunc + if test.checkFunc == nil { + checkFunc = defaultCheckFunc + } + if test.createFunc == nil { + test.createFunc = defaultCreateFunc + } + + u, err := test.createFunc(tt, test.fields) + if err != nil { + tt.Fatal(err) + } + + got, err := u.Search(test.args.q, test.args.k) + if err := checkFunc(test.want, got, u, err); err != nil { + tt.Errorf("error = %v", err) + } + + if err := test.afterFunc(tt, u); err != nil { + tt.Error(err) + } + }) + } +} diff --git a/internal/errors/usearch.go b/internal/errors/usearch.go new file mode 100644 index 0000000000..4b065a0f35 --- /dev/null +++ b/internal/errors/usearch.go @@ -0,0 +1,32 @@ +// +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Package errors provides error types and function +package errors + +type UsearchError struct { + Msg string +} + +func NewUsearchError(msg string) error { + return UsearchError{ + Msg: msg, + } +} + +func (u UsearchError) Error() string { + return u.Msg +} diff --git a/versions/USEARCH_VERSION b/versions/USEARCH_VERSION new file mode 100644 index 0000000000..d91346fd9e --- /dev/null +++ b/versions/USEARCH_VERSION @@ -0,0 +1 @@ +2.15.1 \ No newline at end of file From a3792b67949a41ff215450d8602b781c62d1c8bc Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Wed, 11 Sep 2024 02:58:30 +0000 Subject: [PATCH 2/7] style: format code with Gofumpt and Prettier This commit fixes the style issues introduced in 58baee9 according to the output from Gofumpt and Prettier. Details: https://github.com/vdaas/vald/pull/2608 --- internal/core/algorithm/usearch/option.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/core/algorithm/usearch/option.go b/internal/core/algorithm/usearch/option.go index ffaffedfcb..aa0f53a9cb 100644 --- a/internal/core/algorithm/usearch/option.go +++ b/internal/core/algorithm/usearch/option.go @@ -55,7 +55,7 @@ func WithIndexPath(path string) Option { // WithQuantizationType represents the option to set the quantizationType for usearch. func WithQuantizationType(quantizationType string) Option { return func(u *usearch) error { - switch quantizationType{ + switch quantizationType { case "BF16": u.quantizationType = core.BF16 case "F16": From 938cc12aadea2b6ab028a9891ec63204a8819c0f Mon Sep 17 00:00:00 2001 From: mirito333 Date: Thu, 12 Sep 2024 10:55:41 +0900 Subject: [PATCH 3/7] feat: impl usearch istallation cmd for ci/base container --- dockers/ci/base/Dockerfile | 1 + hack/actions/gen/main.go | 3 ++- hack/docker/gen/main.go | 7 ++++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/dockers/ci/base/Dockerfile b/dockers/ci/base/Dockerfile index 09fccb1e35..3ee2d9b83c 100644 --- a/dockers/ci/base/Dockerfile +++ b/dockers/ci/base/Dockerfile @@ -120,6 +120,7 @@ RUN --mount=type=bind,target=.,rw \ && make telepresence/install \ && make ngt/install \ && make faiss/install \ + && make usearch/install \ && rm -rf ${GOPATH}/src/github.com/${ORG}/${REPO}/* # skipcq: DOK-DL3002 USER root:root diff --git a/hack/actions/gen/main.go b/hack/actions/gen/main.go index d5be92145f..b828885289 100644 --- a/hack/actions/gen/main.go +++ b/hack/actions/gen/main.go @@ -313,6 +313,7 @@ const ( rustVersionPath = versionsPath + "/RUST_VERSION" faissVersionPath = versionsPath + "/FAISS_VERSION" ngtVersionPath = versionsPath + "/NGT_VERSION" + usearchVersionPath = versionsPath + "/USEARCH_VERSION" makefilePath = "Makefile" makefileDirPath = "Makefile.d/**" @@ -646,7 +647,7 @@ func main() { append(ngtBuildDeps, append(faissBuildDeps, devContainerDeps...)...)...)...), - Preprocess: append(ciContainerPreprocess, ngtPreprocess, faissPreprocess), + Preprocess: append(ciContainerPreprocess, ngtPreprocess, faissPreprocess,), Entrypoints: []string{"/bin/bash"}, }, "vald-dev-container": { diff --git a/hack/docker/gen/main.go b/hack/docker/gen/main.go index 7750270efc..3c65ac4c26 100644 --- a/hack/docker/gen/main.go +++ b/hack/docker/gen/main.go @@ -241,8 +241,9 @@ const ( agentInernalPackage = "pkg/agent/internal" - ngtPreprocess = "make ngt/install" - faissPreprocess = "make faiss/install" + ngtPreprocess = "make ngt/install" + faissPreprocess = "make faiss/install" + usearchPreprocess = "make usearch/install" helmOperatorRootdir = "/opt/helm" helmOperatorWatchFile = helmOperatorRootdir + "/watches.yaml" @@ -645,7 +646,7 @@ func main() { append(ngtBuildDeps, append(faissBuildDeps, devContainerDeps...)...)...)...), - Preprocess: append(ciContainerPreprocess, ngtPreprocess, faissPreprocess), + Preprocess: append(ciContainerPreprocess, ngtPreprocess, faissPreprocess, usearchPreprocess), Entrypoints: []string{"/bin/bash"}, }, "vald-dev-container": { From b3ca54f30172bf2fa5a2a2c138b9be040b3a628d Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 01:56:07 +0000 Subject: [PATCH 4/7] style: format code with Gofumpt and Prettier This commit fixes the style issues introduced in 938cc12 according to the output from Gofumpt and Prettier. Details: https://github.com/vdaas/vald/pull/2608 --- hack/actions/gen/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/actions/gen/main.go b/hack/actions/gen/main.go index b828885289..269f6f1973 100644 --- a/hack/actions/gen/main.go +++ b/hack/actions/gen/main.go @@ -647,7 +647,7 @@ func main() { append(ngtBuildDeps, append(faissBuildDeps, devContainerDeps...)...)...)...), - Preprocess: append(ciContainerPreprocess, ngtPreprocess, faissPreprocess,), + Preprocess: append(ciContainerPreprocess, ngtPreprocess, faissPreprocess), Entrypoints: []string{"/bin/bash"}, }, "vald-dev-container": { From 045f0243bbf22107f21b182327e74a5fced347c9 Mon Sep 17 00:00:00 2001 From: mirito333 Date: Thu, 12 Sep 2024 11:41:45 +0900 Subject: [PATCH 5/7] add: multiple vector test --- internal/core/algorithm/usearch/usearch.go | 8 +- .../core/algorithm/usearch/usearch_test.go | 90 ++++++++++++++++++- 2 files changed, 92 insertions(+), 6 deletions(-) diff --git a/internal/core/algorithm/usearch/usearch.go b/internal/core/algorithm/usearch/usearch.go index 601615127c..0d8c647f63 100644 --- a/internal/core/algorithm/usearch/usearch.go +++ b/internal/core/algorithm/usearch/usearch.go @@ -18,6 +18,7 @@ package usearch import ( + "strconv" "sync" core "github.com/unum-cloud/usearch/golang" @@ -202,7 +203,6 @@ func (u *usearch) Search(q []float32, k int) ([]algorithm.SearchResult, error) { if len(I) == 0 || len(D) == 0 { return nil, errors.ErrEmptySearchResult } - result := make([]algorithm.SearchResult, min(len(I), k)) for i := range result { result[i] = algorithm.SearchResult{ID: uint32(I[i]), Distance: D[i], Error: nil} @@ -218,9 +218,11 @@ func (u *usearch) GetObject(key core.Key, count int) ([]float32, error) { if err != nil { return nil, errors.NewUsearchError("failed to usearch_get") } - // ASK: 何か適切なerrorがある? + if vectors == nil { - return nil, nil + return nil, errors.ErrObjectNotFound( + errors.NewUsearchError("failed to usearch_get"), strconv.Itoa(int(key)), + ) } return vectors, nil diff --git a/internal/core/algorithm/usearch/usearch_test.go b/internal/core/algorithm/usearch/usearch_test.go index 378044034e..8ec0f3941e 100644 --- a/internal/core/algorithm/usearch/usearch_test.go +++ b/internal/core/algorithm/usearch/usearch_test.go @@ -18,6 +18,7 @@ package usearch import ( + "math" "os" "path/filepath" "testing" @@ -136,8 +137,8 @@ func Test_usearch_Search(t *testing.T) { return nil, err } - for i := range poolSize { - if err := u.Add(uint64(i+1), vecs[i]); err != nil { + for i, v := range vecs { + if err := u.Add(uint64(i+1), v); err != nil { t.Error(err) return nil, err } @@ -146,7 +147,6 @@ func Test_usearch_Search(t *testing.T) { return u, nil } tests := []test{ - // object type uint8 { name: "return vector id after the same vector inserted", args: args{ @@ -203,6 +203,90 @@ func Test_usearch_Search(t *testing.T) { }, }, }, + { + name: "return limited result after insert 10 vectors with limited size 3", + args: args{ + q: []float32{1, 2, 3, 4, 5, 6, 7, 8, 9}, + k: 3, + }, + fields: fields{ + idxPath: idxTempDir(t), + quantizationType: "F32", + metricType: "cosine", + dimension: 9, + connectivity: 0, + expansionAdd: 0, + expansionSearch: 0, + multi: false, + }, + createFunc: func(t *testing.T, fields fields) (Usearch, error) { + t.Helper() + ivs := [][]float32{ // insert 10 vec + {0, 1, 2, 3, 4, 5, 6, 7, 8}, + {2, 3, 4, 5, 6, 7, 8, 9, 10}, + {0, 1, 2, 3, 4, 5, 6, 7, 8}, + {2, 3, 4, 5, 6, 7, 8, 9, 10}, + {0, 1, 2, 3, 4, 5, 6, 7, 8}, + {2, 3, 4, 5, 6, 7, 8, 9, 10}, + {0, 1, 2, 3, 4, 5, 6, 7, 8}, + {2, 3, 4, 5, 6, 7, 8, 9, 10}, + {2, 3, 4, 5, 6, 7, 8, 9, 10}, + {2, 3, 4, 5, 6, 7, 8, 9, math.MaxFloat32}, + } + + return insertCreateFunc(t, fields, ivs, 10) + }, + want: want{ + want: []algorithm.SearchResult{ + {ID: uint32(10), Distance: 3}, + {ID: uint32(9), Distance: 3}, + {ID: uint32(8), Distance: 3}, + }, + }, + }, + { + name: "return most accurate result after insert 10 vectors with limited size 5", + args: args{ + q: []float32{1, 2, 3, 4, 5, 6, 7, 8, 9}, + k: 5, + }, + fields: fields{ + idxPath: idxTempDir(t), + quantizationType: "F32", + metricType: "cosine", + dimension: 9, + connectivity: 0, + expansionAdd: 0, + expansionSearch: 0, + multi: false, + }, + createFunc: func(t *testing.T, fields fields) (Usearch, error) { + t.Helper() + ivs := [][]float32{ + {0, 1, 2, 3, 4, 5, 6, 7, 8}, // vec id 1 + {2, 3, 4, 5, 6, 7, 8, 9, 10}, // vec id 2 + {0, 1, 2, 3, 4, 5, 6, 7, 8}, // vec id 3 + {2, 3, 4, 5, 6, 7, 8, 9, 10}, // vec id 4 + {0, 1, 2, 3, 4, 5, 6, 7, 8}, // vec id 5 + {2, 3, 4, 5, 6, 7, 8, 9, 10}, // vec id 6 + {2, 3, 4, 5, 6, 7, 8, 9, 9.04}, // vec id 7 + {2, 3, 4, 5, 6, 7, 8, 9, 9.03}, // vec id 8 + {1, 2, 3, 4, 5, 6, 7, 8, 9.01}, // vec id 9 + {1, 2, 3, 4, 5, 6, 7, 8, 9.02}, // vec id 10 + } + + return insertCreateFunc(t, fields, ivs, 10) + }, + want: want{ + want: []algorithm.SearchResult{ + {ID: uint32(9), Distance: 2.384185791015625e-07}, + {ID: uint32(10), Distance: 5.364418029785156e-07}, + {ID: uint32(6), Distance: 3}, + {ID: uint32(4), Distance: 3}, + {ID: uint32(2), Distance: 3}, + }, + }, + }, { name: "return nothing if the search dimension is less than the inserted vector", args: args{ From c7e967823c9ab161a722087ce9d2d407ff4433ce Mon Sep 17 00:00:00 2001 From: mirito333 Date: Thu, 12 Sep 2024 12:15:27 +0900 Subject: [PATCH 6/7] fix: add ldconfg to Makefile --- Makefile | 2 ++ hack/docker/gen/main.go | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3207c28e7f..644937332c 100644 --- a/Makefile +++ b/Makefile @@ -690,11 +690,13 @@ ifeq ($(OS),linux) curl -sSL https://github.com/unum-cloud/usearch/releases/download/v$(USEARCH_VERSION)/usearch_$(OS)_$(GOARCH)_$(USEARCH_VERSION).deb -o usearch_$(OS)_$(USEARCH_VERSION).deb dpkg -i usearch_$(OS)_$(USEARCH_VERSION).deb rm usearch_$(OS)_$(USEARCH_VERSION).deb + ldconfig else ifeq ($(OS),macos) curl -sSL https://github.com/unum-cloud/usearch/releases/download/v$(USEARCH_VERSION)/usearch_macos_$(GOARCH)_$(USEARCH_VERSION).zip -o usearch_macos_$(OS)_$(USEARCH_VERSION).zip unzip usearch_macos_$(OS)_$(USEARCH_VERSION).zip sudo mv libusearch_c.dylib /usr/local/lib && sudo mv usearch.h /usr/local/include rm -rf usearch_macos_$(OS)_$(USEARCH_VERSION).zip + ldconfig endif .PHONY: cmake/install diff --git a/hack/docker/gen/main.go b/hack/docker/gen/main.go index 3c65ac4c26..3bc6c1bb9e 100644 --- a/hack/docker/gen/main.go +++ b/hack/docker/gen/main.go @@ -664,7 +664,8 @@ func main() { Preprocess: append(devContainerPreprocess, append(ciContainerPreprocess, ngtPreprocess, - faissPreprocess)...), + faissPreprocess, + usearchPreprocess)...), }, "vald-buildbase": { AppName: "buildbase", From 43a16e5c381239417eb4b91eee77a87ddc03ffce Mon Sep 17 00:00:00 2001 From: mirito333 Date: Thu, 12 Sep 2024 14:35:52 +0900 Subject: [PATCH 7/7] refactor: covert switch to map --- internal/core/algorithm/usearch/option.go | 60 ++++++++++------------- 1 file changed, 26 insertions(+), 34 deletions(-) diff --git a/internal/core/algorithm/usearch/option.go b/internal/core/algorithm/usearch/option.go index aa0f53a9cb..d4bf0061c8 100644 --- a/internal/core/algorithm/usearch/option.go +++ b/internal/core/algorithm/usearch/option.go @@ -55,20 +55,17 @@ func WithIndexPath(path string) Option { // WithQuantizationType represents the option to set the quantizationType for usearch. func WithQuantizationType(quantizationType string) Option { return func(u *usearch) error { - switch quantizationType { - case "BF16": - u.quantizationType = core.BF16 - case "F16": - u.quantizationType = core.F16 - case "F32": - u.quantizationType = core.F32 - case "F64": - u.quantizationType = core.F64 - case "I8": - u.quantizationType = core.I8 - case "B1": - u.quantizationType = core.B1 - default: + quantizationTypeMap := map[string]core.Quantization{ + "BF16": core.BF16, + "F16": core.F16, + "F32": core.F32, + "F64": core.F64, + "I8": core.I8, + "B1": core.B1, + } + if quantizationType, ok := quantizationTypeMap[quantizationType]; ok { + u.quantizationType = quantizationType + } else { err := errors.NewUsearchError("unsupported QuantizationType") return errors.NewErrCriticalOption("QuantizationType", quantizationType, err) } @@ -79,26 +76,21 @@ func WithQuantizationType(quantizationType string) Option { // WithMetricType represents the option to set the metricType for usearch. func WithMetricType(metricType string) Option { return func(u *usearch) error { - switch strings.NewReplacer("-", "", "_", "", " ", "").Replace(strings.ToLower(metricType)) { - case "l2sq": - u.metricType = core.L2sq - case "ip": - u.metricType = core.InnerProduct - case "cosine": - u.metricType = core.Cosine - case "haversine": - u.metricType = core.Haversine - case "divergence": - u.metricType = core.Divergence - case "pearson": - u.metricType = core.Pearson - case "hamming": - u.metricType = core.Hamming - case "tanimoto": - u.metricType = core.Tanimoto - case "sorensen": - u.metricType = core.Sorensen - default: + metricTypeMap := map[string]core.Metric{ + "l2sq": core.L2sq, + "ip": core.InnerProduct, + "cosine": core.Cosine, + "haversine": core.Haversine, + "divergence": core.Divergence, + "pearson": core.Pearson, + "hamming": core.Hamming, + "tanimoto": core.Tanimoto, + "sorensen": core.Sorensen, + } + normalizedMetricType := strings.NewReplacer("-", "", "_", "", " ", "").Replace(strings.ToLower(metricType)) + if metricType, ok := metricTypeMap[normalizedMetricType]; ok { + u.metricType = metricType + } else { err := errors.NewUsearchError("unsupported MetricType") return errors.NewErrCriticalOption("MetricType", metricType, err) }