redis-performance · fcostaoliveira · Apr 9, 2024 · Apr 11, 2024 · Apr 11, 2024 · Apr 15, 2024
diff --git a/.github/workflows/actions/run-engine-benchmark/action.yaml b/.github/workflows/actions/run-engine-benchmark/action.yaml
@@ -0,0 +1,38 @@
+name: Run Engine Benchmark
+description: "Run benchmark with specified params"
+inputs:
+  engine:
+    description: "engine (i.e qdrant-default)"
+    required: true
+  dataset:
+    description: "dataset (i.e random-100)"
+    required: true
+  compose_file:
+    description: "path to docker compose"
+    required: true
+
+runs:
+  using: "composite"
+  steps:
+    - name: Install poetry
+      shell: bash
+      run: pip install poetry
+    - uses: actions/setup-python@v5
+      with:
+        python-version: "3.10"
+        cache: "poetry"
+    - name: Install deps
+      shell: bash
+      run: poetry install
+    - uses: hoverkraft-tech/[email protected]
+      with:
+        compose-file: "${{ inputs.compose_file }}"
+    - name: Execution
+      shell: bash
+      run: |
+        engine="${{ inputs.engine }}"
+        if [[ "$engine" == *"elasticsearch"* || "$engine" == *"opensearch"* ]]; then
+          ./tools/wait_for_green_status.sh
+        fi
+        source $(poetry env info -p)/bin/activate
+        poetry run python3 run.py --engines "${{ inputs.engine }}" --datasets "${{ inputs.dataset }}"
diff --git a/.github/workflows/actions/send-slack-msg/action.yaml b/.github/workflows/actions/send-slack-msg/action.yaml
@@ -0,0 +1,72 @@
+name: Send Notification
+description: "Send a notification to Slack"
+inputs:
+  bench_name:
+    description: "name of the failed job (i.e runBenchmark)"
+    required: true
+  job_status:
+    description: "status of the job (i.e failed)"
+    required: true
+  failed_outputs:
+    description: "details of the failed job"
+    required: false
+    default: "{}"
+  qdrant_version:
+    description: "version of Qdrant used in the benchmark"
+    required: false
+    default: "unknown"
+  engine_name:
+    description: "name of the engine used in the benchmark"
+    required: false
+    default: "unknown"
+  dataset:
+    description: "name of the dataset used in the benchmark"
+    required: false
+    default: "unknown"
+
+runs:
+  using: "composite"
+  steps:
+    - uses: slackapi/[email protected]
+      with:
+        payload: |
+          {
+            "text": "CI benchmarks (${{ inputs.bench_name }}) run status: ${{ inputs.status }}",
+            "blocks": [
+              {
+                "type": "section",
+                "text": {
+                  "type": "mrkdwn",
+                  "text": "CI benchmarks (${{ inputs.bench_name }}) failed because of *${{ inputs.failed_outputs }}*."
+                }
+              },
+              {
+                "type": "section",
+                "text": {
+                  "type": "mrkdwn",
+                  "text": "Qdrant version: *${{ inputs.qdrant_version }}*."
+                }
+              },
+              {
+                "type": "section",
+                "text": {
+                  "type": "mrkdwn",
+                  "text": "Engine: *${{ inputs.engine_name }}*."
+                }
+              },
+              {
+                "type": "section",
+                "text": {
+                  "type": "mrkdwn",
+                  "text": "Dataset: *${{ inputs.dataset }}*."
+                }
+              },
+              {
+                "type": "section",
+                "text": {
+                  "type": "mrkdwn",
+                  "text": "View the results <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|here>"
+                }
+              }
+            ]
+          }
diff --git a/.github/workflows/clean-datasets.yaml b/.github/workflows/clean-datasets.yaml
@@ -0,0 +1,52 @@
+name: Clean Datasets
+
+on:
+  repository_dispatch:
+  workflow_dispatch:
+  schedule:
+    # Run every month on the 1st day at 3 am
+    - cron: "0 3 1 * *"
+
+concurrency:
+  group: continuous-benchmark
+
+# This removes the ci-datasets volume from client machine.
+# The next run of Continuous Benchmark will create the volume again and download all the datasets.
+jobs:
+  removeDatasetsVolume:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: webfactory/[email protected]
+        with:
+          ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
+      - name: Benches
+        id: benches
+        run: |
+            export HCLOUD_TOKEN=${{ secrets.HCLOUD_TOKEN }}
+
+            set +e
+
+            timeout 10m bash -x tools/run_client_remove_volume.sh
+
+            set -e
+      - name: Send Notification
+        if: failure()
+        uses: slackapi/[email protected]
+        with:
+          payload: |
+            {
+              "text": "Failed to remove the datasets volume (removeDatasetsVolume), run status: ${{ job.status }}",
+              "blocks": [
+                {
+                  "type": "section",
+                  "text": {
+                    "type": "mrkdwn",
+                    "text": "View the results <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|here>"
+                  }
+                }
+              ]
+            }
+        env:
+          SLACK_WEBHOOK_URL: ${{ secrets.CI_ALERTS_CHANNEL_WEBHOOK_URL }}
+          SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
diff --git a/.github/workflows/continuous-benchmark-2.yaml b/.github/workflows/continuous-benchmark-2.yaml
@@ -0,0 +1,86 @@
+name: Continuous Benchmark 2
+
+on:
+  repository_dispatch:
+  workflow_dispatch:
+  schedule:
+    # Run every day at midnight
+    - cron: "0 0 * * *"
+
+# Restrict to only running this workflow one at a time.
+# Any new runs will be queued until the previous run is complete.
+# Any existing pending runs will be cancelled and replaced with current run.
+concurrency:
+  group: continuous-benchmark
+
+jobs:
+  # Schedule this benchmark to run once a day for the sake of saving on S3 costs.
+  runLoadTimeBenchmark:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: webfactory/[email protected]
+        with:
+          ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
+      - name: Benches
+        id: benches
+        run: |
+            export HCLOUD_TOKEN=${{ secrets.HCLOUD_TOKEN }}
+            export POSTGRES_PASSWORD=${{ secrets.POSTGRES_PASSWORD }}
+            export POSTGRES_HOST=${{ secrets.POSTGRES_HOST }}
+            export SERVER_NAME="benchmark-server-3"
+            bash -x tools/setup_ci.sh
+
+            set +e
+
+            # Benchmark collection load time
+            export BENCHMARK_STRATEGY="collection-reload"
+
+            declare -A DATASET_TO_ENGINE
+            declare -A DATASET_TO_URL
+            DATASET_TO_ENGINE["all-payloads-default"]="qdrant-continuous-benchmark-snapshot"
+            DATASET_TO_ENGINE["all-payloads-on-disk"]="qdrant-continuous-benchmark-snapshot"
+            DATASET_TO_ENGINE["all-payloads-default-sparse"]="qdrant-continuous-benchmark-snapshot"
+            DATASET_TO_ENGINE["all-payloads-on-disk-sparse"]="qdrant-continuous-benchmark-snapshot"
+
+            export STORAGE_URL="https://storage.googleapis.com/qdrant-benchmark-snapshots/all-payloads"
+            DATASET_TO_URL["all-payloads-default"]="${STORAGE_URL}/benchmark-all-payloads-500k-768-default.snapshot"
+            DATASET_TO_URL["all-payloads-on-disk"]="${STORAGE_URL}/benchmark-all-payloads-500k-768-on-disk.snapshot"
+            DATASET_TO_URL["all-payloads-default-sparse"]="${STORAGE_URL}/benchmark-all-payloads-500k-sparse-default.snapshot"
+            DATASET_TO_URL["all-payloads-on-disk-sparse"]="${STORAGE_URL}/benchmark-all-payloads-500k-sparse-on-disk.snapshot"
+
+            set +e
+
+            for dataset in "${!DATASET_TO_ENGINE[@]}"; do
+              export ENGINE_NAME=${DATASET_TO_ENGINE[$dataset]}
+              export DATASETS=$dataset
+              export SNAPSHOT_URL=${DATASET_TO_URL[$dataset]}
+
+              # Benchmark the dev branch:
+              export QDRANT_VERSION=ghcr/dev
+              export QDRANT__FEATURE_FLAGS__ALL=true
+              timeout 30m bash -x tools/run_ci.sh
+
+              # Benchmark the master branch:
+              export QDRANT_VERSION=docker/master
+              export QDRANT__FEATURE_FLAGS__ALL=false
+              timeout 30m bash -x tools/run_ci.sh
+            done
+
+            set -e
+      - name: Fail job if any of the benches failed
+        if: steps.benches.outputs.failed == 'error' || steps.benches.outputs.failed == 'timeout'
+        run: exit 1
+      - name: Send slack message
+        uses: ./.github/workflows/actions/send-slack-msg
+        if: failure() || cancelled()
+        with:
+          bench_name: "runLoadTimeBenchmark"
+          job_status: ${{ job.status }}
+          failed_outputs: ${{ steps.benches.outputs.failed }}
+          qdrant_version: ${{ steps.benches.outputs.qdrant_version }}
+          engine_name: ${{ steps.benches.outputs.engine_name }}
+          dataset: ${{ steps.benches.outputs.dataset }}
+        env:
+          SLACK_WEBHOOK_URL: ${{ secrets.CI_ALERTS_CHANNEL_WEBHOOK_URL }}
+          SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK