From c540362c4288a444c90c75c6d823768fe0c19d49 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Mon, 26 Aug 2024 23:26:03 -0700 Subject: [PATCH 1/5] Improve: Mixed-precision kernels for `i8` --- include/usearch/index_plugins.hpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/include/usearch/index_plugins.hpp b/include/usearch/index_plugins.hpp index c7c75d95..c71e89d4 100644 --- a/include/usearch/index_plugins.hpp +++ b/include/usearch/index_plugins.hpp @@ -1419,7 +1419,10 @@ template struct metric_ } }; -struct cos_i8_t { +/** + * @brief Cosine (Angular) distance for signed 8-bit integers using 16-bit intermediates. + */ +struct metric_cos_i8_t { using scalar_t = i8_t; using result_t = f32_t; @@ -1445,7 +1448,11 @@ struct cos_i8_t { } }; -struct l2sq_i8_t { +/** + * @brief Squared Euclidean (L2) distance for signed 8-bit integers using 16-bit intermediates. + * Square root is avoided at the end, as it won't affect the ordering. + */ +struct metric_l2sq_i8_t { using scalar_t = i8_t; using result_t = f32_t; @@ -1775,7 +1782,7 @@ class metric_punned_t { case metric_kind_t::cos_k: { switch (scalar_kind_) { case scalar_kind_t::bf16_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; - case scalar_kind_t::i8_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::i8_k: metric_ptr_ = (uptr_t)&equidimensional_; break; case scalar_kind_t::f16_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; case scalar_kind_t::f32_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; case scalar_kind_t::f64_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; @@ -1786,7 +1793,7 @@ class metric_punned_t { case metric_kind_t::l2sq_k: { switch (scalar_kind_) { case scalar_kind_t::bf16_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; - case scalar_kind_t::i8_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::i8_k: metric_ptr_ = (uptr_t)&equidimensional_; break; case scalar_kind_t::f16_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; case scalar_kind_t::f32_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; case scalar_kind_t::f64_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; From 3f24e1d6989b90c75c30060100a911c2fa781c98 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Mon, 26 Aug 2024 23:26:23 -0700 Subject: [PATCH 2/5] Docs: Annotate helper functions --- .vscode/settings.json | 2 ++ include/usearch/index_plugins.hpp | 56 ++++++++++++++++++++++++++++--- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 3b5d8520..56e46896 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -153,6 +153,7 @@ "HNSW", "hnswlib", "ibin", + "ivdep", "jaccard", "Jemalloc", "Kullback", @@ -175,6 +176,7 @@ "Println", "pytest", "Quickstart", + "relock", "repr", "rtype", "SIMD", diff --git a/include/usearch/index_plugins.hpp b/include/usearch/index_plugins.hpp index c71e89d4..c41afbce 100644 --- a/include/usearch/index_plugins.hpp +++ b/include/usearch/index_plugins.hpp @@ -139,6 +139,9 @@ enum class scalar_kind_t : std::uint8_t { i8_k = 23, }; +/** + * @brief Maps a scalar type to its corresponding scalar_kind_t enumeration value. + */ template scalar_kind_t scalar_kind() noexcept { if (std::is_same()) return scalar_kind_t::b1x8_k; @@ -175,22 +178,43 @@ template scalar_kind_t scalar_kind() noexcept { return scalar_kind_t::unknown_k; } +/** + * @brief Converts an angle from degrees to radians. + */ template at angle_to_radians(at angle) noexcept { return angle * at(3.14159265358979323846) / at(180); } +/** + * @brief Readability helper to compute the square of a given value. + */ template at square(at value) noexcept { return value * value; } +/** + * @brief Clamps a value between a lower and upper bound using a custom comparator. Similar to `std::clamp`. + * https://en.cppreference.com/w/cpp/algorithm/clamp + */ template inline at clamp(at v, at lo, at hi, compare_at comp) noexcept { return comp(v, lo) ? lo : comp(hi, v) ? hi : v; } + +/** + * @brief Clamps a value between a lower and upper bound. Similar to `std::clamp`. + * https://en.cppreference.com/w/cpp/algorithm/clamp + */ template inline at clamp(at v, at lo, at hi) noexcept { return usearch::clamp(v, lo, hi, std::less{}); } -inline bool str_equals(char const* begin, std::size_t len, char const* other_begin) noexcept { - std::size_t other_len = std::strlen(other_begin); - return len == other_len && std::strncmp(begin, other_begin, len) == 0; +/** + * @brief Compares two strings for equality, given a length for the first string. + */ +inline bool str_equals(char const* first_begin, std::size_t first_len, char const* second_begin) noexcept { + std::size_t second_len = std::strlen(second_begin); + return first_len == second_len && std::strncmp(first_begin, second_begin, first_len) == 0; } +/** + * @brief Returns the number of bits required to represent a scalar type. + */ inline std::size_t bits_per_scalar(scalar_kind_t scalar_kind) noexcept { switch (scalar_kind) { case scalar_kind_t::uuid_k: return 128; @@ -213,6 +237,10 @@ inline std::size_t bits_per_scalar(scalar_kind_t scalar_kind) noexcept { } } +/** + * @brief Returns the number of bits in a scalar word for a given scalar type. + * Equivalent to `bits_per_scalar` for types that are not bit-packed. + */ inline std::size_t bits_per_scalar_word(scalar_kind_t scalar_kind) noexcept { switch (scalar_kind) { case scalar_kind_t::uuid_k: return 128; @@ -235,6 +263,9 @@ inline std::size_t bits_per_scalar_word(scalar_kind_t scalar_kind) noexcept { } } +/** + * @brief Returns the string name of a given scalar type. + */ inline char const* scalar_kind_name(scalar_kind_t scalar_kind) noexcept { switch (scalar_kind) { case scalar_kind_t::uuid_k: return "uuid"; @@ -257,6 +288,9 @@ inline char const* scalar_kind_name(scalar_kind_t scalar_kind) noexcept { } } +/** + * @brief Returns the string name of a given distance metric. + */ inline char const* metric_kind_name(metric_kind_t metric) noexcept { switch (metric) { case metric_kind_t::unknown_k: return "unknown"; @@ -273,6 +307,10 @@ inline char const* metric_kind_name(metric_kind_t metric) noexcept { default: return ""; } } + +/** + * @brief Parses a string to identify the corresponding `scalar_kind_t` enumeration value. + */ inline expected_gt scalar_kind_from_name(char const* name, std::size_t len) { expected_gt parsed; if (str_equals(name, len, "f32")) @@ -292,10 +330,16 @@ inline expected_gt scalar_kind_from_name(char const* name, std::s return parsed; } +/** + * @brief Parses a string to identify the corresponding `scalar_kind_t` enumeration value. + */ inline expected_gt scalar_kind_from_name(char const* name) { return scalar_kind_from_name(name, std::strlen(name)); } +/** + * @brief Parses a string to identify the corresponding `metric_kind_t` enumeration value. + */ inline expected_gt metric_from_name(char const* name, std::size_t len) { expected_gt parsed; if (str_equals(name, len, "l2sq") || str_equals(name, len, "euclidean_sq")) { @@ -321,6 +365,10 @@ inline expected_gt metric_from_name(char const* name, std::size_t "tanimoto, sorensen"); return parsed; } + +/** + * @brief Parses a string to identify the corresponding `metric_kind_t` enumeration value. + */ inline expected_gt metric_from_name(char const* name) { return metric_from_name(name, std::strlen(name)); } @@ -417,7 +465,7 @@ class f16_bits_t { inline f16_bits_t(float v) noexcept : uint16_(f32_to_f16(v)) {} inline f16_bits_t(double v) noexcept : uint16_(f32_to_f16(static_cast(v))) {} - inline bool operator<(const f16_bits_t& other) const noexcept { return float(*this) < float(other); } + inline bool operator<(f16_bits_t const& other) const noexcept { return float(*this) < float(other); } inline f16_bits_t operator+(f16_bits_t other) const noexcept { return {float(*this) + float(other)}; } inline f16_bits_t operator-(f16_bits_t other) const noexcept { return {float(*this) - float(other)}; } From 84e487fb7436375e29de857c6b9d9951cc591164 Mon Sep 17 00:00:00 2001 From: Abe Tomoaki Date: Wed, 28 Aug 2024 23:54:17 +0900 Subject: [PATCH 3/5] Docs: Update JavaScript code example (#478) 1. Add `require('node:assert')` Fix the following error: ``` ReferenceError: assert is not defined ``` 2. Change metric to `l2sq` Fix the following error: ``` AssertionError [ERR_ASSERTION]: Expected values to be loosely deep-equal: Float32Array(1) [ -0.00017893314361572266 ] should loosely deep-equal Float32Array(1) [ 0 ] ``` Reference:. https://github.com/unum-cloud/usearch/blob/242be104de770c12ed1fc938215530b944000b42/javascript/usearch.test.js#L49-L61 --- javascript/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/javascript/README.md b/javascript/README.md index d02c3fd7..cb153a33 100644 --- a/javascript/README.md +++ b/javascript/README.md @@ -21,8 +21,9 @@ wasmer install unum/usearch Create an index, add vectors, and perform searches with ease: ```js +const assert = require('node:assert'); const usearch = require('usearch'); -const index = new usearch.Index({ metric: 'cos', connectivity: 16, dimensions: 3 }); +const index = new usearch.Index({ metric: 'l2sq', connectivity: 16, dimensions: 3 }); index.add(42n, new Float32Array([0.2, 0.6, 0.4])); const results = index.search(new Float32Array([0.2, 0.6, 0.4]), 10); From 9952834027b906f3b971a6cf4af3a3fedde4a322 Mon Sep 17 00:00:00 2001 From: Abe Tomoaki Date: Wed, 28 Aug 2024 23:55:24 +0900 Subject: [PATCH 4/5] Improve: Use `const` instead of `var` in JS test (#479) The test code used `const`, so we unified it. Also, it is better to use `const` than `var`. --- javascript/usearch.test.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/javascript/usearch.test.js b/javascript/usearch.test.js index d3673508..688a5847 100644 --- a/javascript/usearch.test.js +++ b/javascript/usearch.test.js @@ -47,13 +47,13 @@ test('Batch operations', () => { }); test("Expected results", () => { - var index = new usearch.Index({ + const index = new usearch.Index({ metric: "l2sq", connectivity: 16, dimensions: 3, }); index.add(42n, new Float32Array([0.2, 0.6, 0.4])); - var results = index.search(new Float32Array([0.2, 0.6, 0.4]), 10); + const results = index.search(new Float32Array([0.2, 0.6, 0.4]), 10); assert.equal(index.size(), 1); assert.deepEqual(results.keys, new BigUint64Array([42n])); From 50a6608bfd2312174bc9af90dfa54a7b9861b329 Mon Sep 17 00:00:00 2001 From: brkp Date: Wed, 28 Aug 2024 20:47:53 +0200 Subject: [PATCH 5/5] Make: Rust Windows build (#472) Closes #477 Co-authored-by: T-T <7758904+CCNut@users.noreply.github.com> Signed-off-by: brkp --- build.rs | 8 +++++++- rust/lib.cpp | 6 +++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/build.rs b/build.rs index 0a1c96f3..7007508e 100644 --- a/build.rs +++ b/build.rs @@ -80,7 +80,13 @@ fn main() { .flag_if_supported("/std:c++17") .flag_if_supported("/O2") .flag_if_supported("/fp:fast") - .flag_if_supported("/W1"); // Reduce warnings verbosity + .flag_if_supported("/W1") // Reduce warnings verbosity + .flag_if_supported("/EHsc") + .flag_if_supported("/MD") + .flag_if_supported("/permissive-") + .flag_if_supported("/sdl-") + .define("_ALLOW_RUNTIME_LIBRARY_MISMATCH", None) + .define("_ALLOW_POINTER_TO_CONST_MISMATCH", None); } let mut result = build.try_compile("usearch"); diff --git a/rust/lib.cpp b/rust/lib.cpp index 87ee1d2c..9014decc 100644 --- a/rust/lib.cpp +++ b/rust/lib.cpp @@ -66,7 +66,7 @@ NativeIndex::NativeIndex(std::unique_ptr index) : index_(std::move(inde auto make_predicate(uptr_t metric, uptr_t metric_state) { return [=](vector_key_t key) { auto func = reinterpret_cast(metric); - auto state = reinterpret_cast(metric_state); + auto state = static_cast(metric_state); return func(key, state); }; } @@ -104,8 +104,8 @@ void NativeIndex::change_expansion_search(size_t n) const { index_->change_expan void NativeIndex::change_metric(uptr_t metric, uptr_t state) const { index_->change_metric(metric_punned_t::stateful( // - reinterpret_cast(metric), // - reinterpret_cast(state), // + static_cast(metric), // + static_cast(state), // index_->metric().metric_kind(), // index_->scalar_kind())); }