first draft for hnsw queries in scheduled perf test

neondatabase · May 22, 2024 · 97d6f50 · 97d6f50
1 parent 00d66e8
commit 97d6f50
Show file tree

Hide file tree

Showing 3 changed files with 278 additions and 3 deletions.
diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml
@@ -38,6 +38,11 @@ on:
         description: 'AWS-RDS and AWS-AURORA normally only run on Saturday. Set this to true to run them on every workflow_dispatch'
         required: false
         default: false
+      run_only_pgvector_tests:
+        type: boolean
+        description: 'Run pgvector tests but no other tests. If not set, pgvector tests will not be run'
+        required: false
+        default: false
 
 defaults:
   run:
@@ -50,6 +55,7 @@ concurrency:
 
 jobs:
   bench:
+    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
     env:
       TEST_PG_BENCH_DURATIONS_MATRIX: "300"
       TEST_PG_BENCH_SCALES_MATRIX: "10,100"
@@ -120,6 +126,7 @@ jobs:
         SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
 
   generate-matrices:
+    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
     # Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
     #
     # Available platforms:
@@ -197,6 +204,7 @@ jobs:
         echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
 
   pgbench-compare:
+    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
     needs: [ generate-matrices ]
 
     strategy:
@@ -343,6 +351,233 @@ jobs:
       env:
         SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
 
+  pgbench-pgvector:
+    if: ${{ github.event.inputs.run_only_pgvector_tests == 'true' || github.event.inputs.run_only_pgvector_tests == null }}
+
+    strategy:
+      fail-fast: false
+      matrix: ${{fromJson(needs.generate-matrices.outputs.pgbench-compare-matrix)}}
+
+    env:
+      TEST_PG_BENCH_DURATIONS_MATRIX: "300"
+      TEST_PG_BENCH_SCALES_MATRIX: "1"
+      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
+      DEFAULT_PG_VERSION: 16
+      TEST_OUTPUT: /tmp/test_output
+      BUILD_TYPE: remote
+      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
+      PLATFORM: ${{ matrix.platform }}
+
+    runs-on: [ self-hosted, us-east-2, x64 ]
+    container:
+      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
+      options: --init
+
+    # Increase timeout to 8h, default timeout is 6h
+    timeout-minutes: 480
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Download Neon artifact
+      uses: ./.github/actions/download
+      with:
+        name: neon-${{ runner.os }}-release-artifact
+        path: /tmp/neon/
+        prefix: latest
+
+    - name: Add Postgres binaries to PATH
+      run: |
+        ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
+        echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
+
+    - name: Create Neon Project
+      if: contains(fromJson('["neon-captest-new", "neon-captest-freetier", "neonvm-captest-new", "neonvm-captest-freetier"]'), matrix.platform)
+      id: create-neon-project
+      uses: ./.github/actions/neon-project-create
+      with:
+        region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
+        postgres_version: ${{ env.DEFAULT_PG_VERSION }}
+        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
+        compute_units: ${{ (matrix.platform == 'neon-captest-freetier' && '[0.25, 0.25]') || '[1, 1]' }}
+        provisioner: ${{ (contains(matrix.platform, 'neonvm-') && 'k8s-neonvm') || 'k8s-pod' }}
+
+    - name: Set up Connection String
+      id: set-up-connstr
+      run: |
+        case "${PLATFORM}" in
+          neon-captest-reuse)
+            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
+            ;;
+          neonvm-captest-sharding-reuse)
+            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_SHARDING_CONNSTR }}
+            ;;
+          neon-captest-new | neon-captest-freetier | neonvm-captest-new | neonvm-captest-freetier)
+            CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
+            ;;
+          rds-aurora)
+            CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CONNSTR }}
+            ;;
+          rds-postgres)
+            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
+            ;;
+          *)
+            echo >&2 "Unknown PLATFORM=${PLATFORM}"
+            exit 1
+            ;;
+        esac
+
+        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
+
+        QUERIES=("SELECT version()")
+        if [[ "${PLATFORM}" = "neon"* ]]; then
+          QUERIES+=("SHOW neon.tenant_id")
+          QUERIES+=("SHOW neon.timeline_id")
+        fi
+
+        for q in "${QUERIES[@]}"; do
+          psql ${CONNSTR} -c "${q}"
+        done
+
+    - name: Benchmark init
+      uses: ./.github/actions/run-python-test-set
+      with:
+        build_type: ${{ env.BUILD_TYPE }}
+        test_selection: performance
+        run_in_parallel: false
+        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
+        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
+      env:
+        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
+        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
+        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+
+    - name: Benchmark simple-update
+      uses: ./.github/actions/run-python-test-set
+      with:
+        build_type: ${{ env.BUILD_TYPE }}
+        test_selection: performance
+        run_in_parallel: false
+        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
+        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
+      env:
+        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
+        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
+        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+
+    - name: Benchmark select-only
+      uses: ./.github/actions/run-python-test-set
+      with:
+        build_type: ${{ env.BUILD_TYPE }}
+        test_selection: performance
+        run_in_parallel: false
+        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
+        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
+      env:
+        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
+        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
+        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+
+    - name: Delete Neon Project
+      if: ${{ steps.create-neon-project.outputs.project_id && always() }}
+      uses: ./.github/actions/neon-project-delete
+      with:
+        project_id: ${{ steps.create-neon-project.outputs.project_id }}
+        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
+
+    - name: Create Allure report
+      if: ${{ !cancelled() }}
+      uses: ./.github/actions/allure-report-generate
+
+    - name: Post to a Slack channel
+      if: ${{ github.event.schedule && failure() }}
+      uses: slackapi/slack-github-action@v1
+      with:
+        channel-id: "C033QLM5P7D" # dev-staging-stream
+        slack-message: "Periodic perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+      env:
+        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
+
+pgbench-compare:
+    if: ${{ github.event.inputs.run_only_pgvector_tests == 'true' || github.event.inputs.run_only_pgvector_tests == null }}
+
+    strategy:
+      fail-fast: false
+
+    env:
+      TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
+      TEST_PG_BENCH_SCALES_MATRIX: "1"
+      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
+      DEFAULT_PG_VERSION: 16
+      TEST_OUTPUT: /tmp/test_output
+      BUILD_TYPE: remote
+      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
+      PLATFORM: "neon-captest-pgvector"
+
+    runs-on: [ self-hosted, us-east-2, x64 ]
+    container:
+      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
+      options: --init
+
+    # Increase timeout to 8h, default timeout is 6h
+    timeout-minutes: 480
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Download Neon artifact
+      uses: ./.github/actions/download
+      with:
+        name: neon-${{ runner.os }}-release-artifact
+        path: /tmp/neon/
+        prefix: latest
+
+    - name: Add Postgres binaries to PATH
+      run: |
+        ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
+        echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
+
+    - name: Set up Connection String
+      id: set-up-connstr
+      run: |
+        CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR }}
+        
+        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
+
+        QUERIES=("SELECT version()")
+        QUERIES+=("SHOW neon.tenant_id")
+        QUERIES+=("SHOW neon.timeline_id")
+        
+        for q in "${QUERIES[@]}"; do
+          psql ${CONNSTR} -c "${q}"
+        done
+
+    - name: Benchmark pgvector hnsw queries
+      uses: ./.github/actions/run-python-test-set
+      with:
+        build_type: ${{ env.BUILD_TYPE }}
+        test_selection: performance
+        run_in_parallel: false
+        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
+        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_pgvector
+      env:
+        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
+        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
+        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+
+    - name: Create Allure report
+      if: ${{ !cancelled() }}
+      uses: ./.github/actions/allure-report-generate
+
+    - name: Post to a Slack channel
+      if: ${{ github.event.schedule && failure() }}
+      uses: slackapi/slack-github-action@v1
+      with:
+        channel-id: "C033QLM5P7D" # dev-staging-stream
+        slack-message: "Periodic perf testing neon-captest-pgvector: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+      env:
+        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
+
   clickbench-compare:
     # ClichBench DB for rds-aurora and rds-Postgres deployed to the same clusters
     # we use for performance testing in pgbench-compare.
@@ -351,7 +586,7 @@ jobs:
     #
     # *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows
     # *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB
-    if: ${{ !cancelled() }}
+    if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
     needs: [ generate-matrices, pgbench-compare ]
 
     strategy:
@@ -455,7 +690,7 @@ jobs:
     # We might change it after https://github.com/neondatabase/neon/issues/2900.
     #
     # *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
-    if: ${{ !cancelled() }}
+    if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
     needs: [ generate-matrices, clickbench-compare ]
 
     strategy:
@@ -557,7 +792,7 @@ jobs:
         SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
 
   user-examples-compare:
-    if: ${{ !cancelled() }}
+    if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
     needs: [ generate-matrices, tpch-compare ]
 
     strategy:

diff --git a/test_runner/performance/pgvector/pgbench_custom_script_pgvector_hsnw_queries.sql b/test_runner/performance/pgvector/pgbench_custom_script_pgvector_hsnw_queries.sql
@@ -0,0 +1,10 @@
+with x (x) as (
+  select "embeddings" as x
+  from hnsw_test_table 
+  TABLESAMPLE SYSTEM (1) 
+  LIMIT 1
+)
+SELECT title, "embeddings" <=> (select x from x) as distance
+FROM hnsw_test_table
+ORDER BY 2
+LIMIT 30;
diff --git a/test_runner/performance/test_perf_pgbench.py b/test_runner/performance/test_perf_pgbench.py
@@ -17,6 +17,7 @@ class PgBenchLoadType(enum.Enum):
     INIT = "init"
     SIMPLE_UPDATE = "simple-update"
     SELECT_ONLY = "select-only"
+    PGVECTOR_HNSW = "pgvector-hnsw"
 
 
 def utc_now_timestamp() -> int:
@@ -132,6 +133,26 @@ def run_test_pgbench(env: PgCompare, scale: int, duration: int, workload_type: P
             password=password,
         )
 
+    if workload_type == PgBenchLoadType.PGVECTOR_HNSW:
+        # Run simple-update workload
+        run_pgbench(
+            env,
+            "pgvector-hnsw",
+            [
+                "pgbench",
+                "-f",
+                "test_runner/performance/pgvector/pgbench_custom_script_pgvector_hsnw_queries.sql",
+                "-c100",
+                "-j20",
+                f"-T{duration}",
+                "-P2",
+                "--protocol=prepared",
+                "--progress-timestamp",
+                connstr,
+            ],
+            password=password,
+        )
+
     env.report_size()
 
 
@@ -201,3 +222,12 @@ def test_pgbench_remote_simple_update(remote_compare: PgCompare, scale: int, dur
 @pytest.mark.remote_cluster
 def test_pgbench_remote_select_only(remote_compare: PgCompare, scale: int, duration: int):
     run_test_pgbench(remote_compare, scale, duration, PgBenchLoadType.SELECT_ONLY)
+
+# The following test runs on an existing database that has pgvector extension installed
+# and a table with 1 million embedding vectors loaded and indexed with HNSW.
+#
+# Run this pgbench tests against an existing remote Postgres cluster with the necessary setup.
+@pytest.mark.parametrize("duration", get_durations_matrix())
+@pytest.mark.remote_cluster
+def test_pgbench_remote_pgvector(remote_compare: PgCompare, duration: int):
+    run_test_pgbench(remote_compare, 1, duration, PgBenchLoadType.PGVECTOR_HNSW)