-
Notifications
You must be signed in to change notification settings - Fork 434
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add halfvec indexing and queries to periodic pgvector performance tes…
…ts (#8057) ## Problem halfvec data type was introduced in pgvector 0.7.0 and is popular because it allows smaller vectors, smaller indexes and potentially better performance. So far we have not tested halfvec in our periodic performance tests. This PR adds halfvec indexing and halfvec queries to the test.
- Loading branch information
1 parent
8189219
commit 4621003
Showing
7 changed files
with
82 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
DROP TABLE IF EXISTS halfvec_test_table; | ||
|
||
CREATE TABLE halfvec_test_table ( | ||
_id text NOT NULL, | ||
title text, | ||
text text, | ||
embeddings halfvec(1536), | ||
PRIMARY KEY (_id) | ||
); | ||
|
||
INSERT INTO halfvec_test_table (_id, title, text, embeddings) | ||
SELECT _id, title, text, embeddings::halfvec | ||
FROM documents; | ||
|
||
CREATE INDEX documents_half_precision_hnsw_idx ON halfvec_test_table USING hnsw (embeddings halfvec_cosine_ops) WITH (m = 64, ef_construction = 128); |
13 changes: 13 additions & 0 deletions
13
test_runner/performance/pgvector/pgbench_custom_script_pgvector_halfvec_queries.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
-- run with pooled connection | ||
-- pgbench -T 300 -c 100 -j20 -f pgbench_halfvec_queries.sql -postgresql://neondb_owner:<secret>@ep-floral-thunder-w1gzhaxi-pooler.eu-west-1.aws.neon.build/neondb?sslmode=require" | ||
|
||
with x (x) as ( | ||
select "embeddings" as x | ||
from halfvec_test_table | ||
TABLESAMPLE SYSTEM (1) | ||
LIMIT 1 | ||
) | ||
SELECT title, "embeddings" <=> (select x from x) as distance | ||
FROM halfvec_test_table | ||
ORDER BY 2 | ||
LIMIT 30; |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import pytest | ||
from fixtures.compare_fixtures import PgCompare | ||
|
||
from performance.test_perf_pgbench import PgBenchLoadType, get_durations_matrix, run_test_pgbench | ||
|
||
|
||
# The following test runs on an existing database that has pgvector extension installed | ||
# and a table with 1 million embedding vectors loaded and indexed with HNSW. | ||
# | ||
# Run this pgbench tests against an existing remote Postgres cluster with the necessary setup. | ||
@pytest.mark.parametrize("duration", get_durations_matrix()) | ||
@pytest.mark.remote_cluster | ||
def test_pgbench_remote_pgvector_hnsw(remote_compare: PgCompare, duration: int): | ||
run_test_pgbench(remote_compare, 1, duration, PgBenchLoadType.PGVECTOR_HNSW) | ||
|
||
|
||
# The following test runs on an existing database that has pgvector extension installed | ||
# and a table with 1 million embedding vectors loaded and indexed with halfvec. | ||
# | ||
# Run this pgbench tests against an existing remote Postgres cluster with the necessary setup. | ||
@pytest.mark.parametrize("duration", get_durations_matrix()) | ||
@pytest.mark.remote_cluster | ||
def test_pgbench_remote_pgvector_halfvec(remote_compare: PgCompare, duration: int): | ||
run_test_pgbench(remote_compare, 1, duration, PgBenchLoadType.PGVECTOR_HALFVEC) |
4621003
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
3304 tests run: 3153 passed, 0 failed, 151 skipped (full report)
Flaky tests (1)
Postgres 16
test_subscriber_restart
: releaseCode coverage* (full report)
functions
:31.5% (6635 of 21064 functions)
lines
:48.6% (51636 of 106301 lines)
* collected from Rust tests only
4621003 at 2024-06-14T17:58:17.700Z :recycle: