Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
e38efb9
feat: add diskann index
richyreachy Apr 22, 2026
43c9db5
fix: add libaio dependency
richyreachy Apr 23, 2026
043456d
Merge branch 'main' into feat/diskann_index
richyreachy Apr 23, 2026
dae465c
fix: fix initialization
richyreachy Apr 23, 2026
1202e42
Merge branch 'feat/diskann_index' of github.com:richyreachy/zvec into…
richyreachy Apr 23, 2026
d9503d3
Merge branch 'main' into feat/diskann_index
richyreachy Apr 23, 2026
c24cd82
refactor: fix ut
richyreachy Apr 24, 2026
87a1b8c
Merge branch 'feat/diskann_index' of github.com:richyreachy/zvec into…
richyreachy Apr 24, 2026
4e668da
fix: fix seed value
richyreachy Apr 25, 2026
fe60376
fix: fix seed value
richyreachy Apr 26, 2026
dc3d0a8
fix: update ut
richyreachy Apr 27, 2026
071d275
Merge branch 'main' into feat/diskann_index
richyreachy Apr 27, 2026
78eea04
fix: fix ut
richyreachy Apr 27, 2026
41f893d
Merge branch 'main' into feat/diskann_index
richyreachy Apr 28, 2026
4cc1ecb
fix: fix according to pr comments
richyreachy Apr 28, 2026
35192e7
Merge branch 'feat/diskann_index' of github.com:richyreachy/zvec into…
richyreachy Apr 28, 2026
53f9ef9
fix: fix on comments
richyreachy Apr 29, 2026
f3ee4ca
fix: fix yaml
richyreachy Apr 29, 2026
fd71a06
feat: support dynamic load
richyreachy Apr 29, 2026
17570d7
feat: add dynamic load
richyreachy Apr 29, 2026
9ac2c3f
Merge branch 'main' into feat/diskann_index
richyreachy Apr 29, 2026
b9a19d1
fix: plugin
richyreachy Apr 29, 2026
9712bd5
fix: plugin
richyreachy Apr 29, 2026
e7bceb4
fix: plugin
richyreachy Apr 29, 2026
9c91f11
fix: fix yaml
richyreachy Apr 29, 2026
1c486f3
fix: plugin
richyreachy Apr 29, 2026
56fcc95
fix: plugin
richyreachy Apr 29, 2026
32954a1
fix: fix plugin
richyreachy Apr 29, 2026
c0b2a8f
Revert "fix: fix plugin"
richyreachy Apr 29, 2026
916c870
Merge branch 'main' into feat/diskann_dynamic_load
richyreachy Apr 29, 2026
f78ff5d
fix: fix ut
richyreachy Apr 29, 2026
7fec86c
Merge branch 'feat/diskann_index' into feat/diskann_dynamic_load
richyreachy Apr 29, 2026
ac1bad7
fix: fix windows build
richyreachy Apr 29, 2026
bf6365d
Merge branch 'main' into feat/diskann_dynamic_load
richyreachy Apr 29, 2026
a2fa6f2
fix: fix build
richyreachy Apr 30, 2026
9d0a63b
feat: merge with main
richyreachy May 7, 2026
7e07c0b
feat: merge with main
richyreachy May 7, 2026
1e23806
fix: fix buffer storage
richyreachy May 7, 2026
9b0e226
Merge branch 'feat/diskann_index' into feat/diskann_dynamic_load
richyreachy May 7, 2026
5d5634a
refactor: use silent invokation
richyreachy May 7, 2026
2b561f9
Merge branch 'main' into feat/diskann_dynamic_load
richyreachy May 7, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .github/workflows/03-macos-linux-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,14 @@ jobs:
sudo apt-get install -y clang libomp-dev
shell: bash

- name: Install AIO
if: runner.os == 'Linux' && runner.arch == 'X64'
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
libaio-dev
shell: bash

- name: Print CPU info
if: runner.os == 'Linux'
run: lscpu
Expand Down Expand Up @@ -89,7 +97,7 @@ jobs:
pytest \
scikit-build-core \
setuptools_scm
shell: bash
shell: bash

- name: Build from source
run: |
Expand Down
8 changes: 8 additions & 0 deletions .github/workflows/clang_tidy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ jobs:
sudo apt-get update
sudo apt-get install -y clang-tidy=1:18.0-59~exp2 cmake ninja-build libomp-dev
- name: Install AIO
if: runner.os == 'Linux' && runner.arch == 'X64'
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
libaio-dev
shell: bash

- name: Configure CMake and export compile commands
run: |
cmake -S . -B build -G Ninja \
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,6 @@
[submodule "thirdparty/RaBitQ-Library/RaBitQ-Library-0.1"]
path = thirdparty/RaBitQ-Library/RaBitQ-Library-0.1
url = https://github.com/VectorDB-NTU/RaBitQ-Library.git
[submodule "thirdparty/aio/libaio-0.3"]
path = thirdparty/aio/libaio-0.3
url = https://github.com/yugabyte/libaio.git
23 changes: 23 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,17 @@ else()
endif()
message(STATUS "RABITQ_ARCH_FLAG: ${RABITQ_ARCH_FLAG}")

# DiskAnn support (Linux x86_64 only, requires libaio)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i686|i386" AND NOT ANDROID AND NOT IOS)
set(DISKANN_SUPPORTED ON)
add_definitions(-DDISKANN_SUPPORTED=1)
else()
set(DISKANN_SUPPORTED OFF)
add_definitions(-DDISKANN_SUPPORTED=0)
message(STATUS "DiskAnn support disabled - only supported on Linux x86_64")
endif()
message(STATUS "DISKANN_SUPPORTED: ${DISKANN_SUPPORTED}")

option(USE_OSS_MIRROR "Use OSS mirror for faster third-party downloads" OFF)
if(DEFINED ENV{USE_OSS_MIRROR} AND NOT "$ENV{USE_OSS_MIRROR}" STREQUAL "")
set(USE_OSS_MIRROR "$ENV{USE_OSS_MIRROR}" CACHE BOOL "Use OSS mirror for faster third-party downloads" FORCE)
Expand Down Expand Up @@ -145,4 +156,16 @@ if(BUILD_PYTHON_BINDINGS)

message(STATUS "Zvec install path: ${ZVEC_PY_INSTALL_DIR}")
install(TARGETS _zvec LIBRARY DESTINATION ${ZVEC_PY_INSTALL_DIR})

# DiskAnn ships as a runtime-loaded shared module
# (libzvec_diskann_plugin.so) that is brought online implicitly the
# first time a DiskAnn index is created — users never call any load
# function. The Python extension resolves the module next to _zvec.so
# (see the $ORIGIN rpath in src/binding/python/CMakeLists.txt); the
# module must therefore be installed alongside _zvec.so in the wheel.
# The target exists only on platforms where DiskAnn is buildable
# (currently Linux x86_64 with libaio).
if(TARGET core_knn_diskann)
install(TARGETS core_knn_diskann LIBRARY DESTINATION ${ZVEC_PY_INSTALL_DIR})
endif()
endif()
14 changes: 14 additions & 0 deletions python/tests/detail/fixture_helper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
import pytest
import logging
import platform

DISKANN_SUPPORTED = platform.system() == "Linux" and platform.machine() in (
"x86_64",
"AMD64",
"i686",
"i386",
)

from typing import Any, Generator
from zvec.typing import DataType, StatusCode, MetricType, QuantizeType
Expand Down Expand Up @@ -97,6 +105,12 @@ def full_schema_new(request) -> CollectionSchema:
else:
nullable, has_index, vector_index = True, False, HnswIndexParam()

# Skip DiskAnn tests on unsupported platforms
from zvec.model.param import DiskAnnIndexParam

if isinstance(vector_index, DiskAnnIndexParam) and not DISKANN_SUPPORTED:
pytest.skip("DiskAnn only supported on Linux x86_64")

scalar_index_param = None
vector_index_param = None
if has_index:
Expand Down
96 changes: 80 additions & 16 deletions python/tests/detail/test_collection_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@
HnswIndexParam,
FlatIndexParam,
IVFIndexParam,
DiskAnnIndexParam,
HnswQueryParam,
IVFQueryParam,
DiskAnnQueryParam,
)

from zvec.model.schema import FieldSchema, VectorSchema
Expand Down Expand Up @@ -179,10 +181,24 @@ def get_ground_truth_map(collection, test_docs, query_vectors_map, metric_type,
for field_name, query_vectors in query_vectors_map.items():
ground_truth_map[field_name] = {}

# Support per-field metric type: metric_type can be a dict mapping
# field_name -> MetricType, or a single MetricType applied to all fields.
if isinstance(metric_type, dict):
field_metric = metric_type.get(field_name, MetricType.IP)
else:
field_metric = metric_type

for i, query_vector in enumerate(query_vectors):
# Get the ground truth for this query
relevant_doc_ids_scores = get_ground_truth_for_vector_query(
collection, query_vector, field_name, test_docs, i, metric_type, k, True
collection,
query_vector,
field_name,
test_docs,
i,
field_metric,
k,
True,
)
ground_truth_map[field_name][i] = relevant_doc_ids_scores

Expand Down Expand Up @@ -292,6 +308,7 @@ class TestRecall:
[
(True, True, HnswIndexParam()),
(False, True, IVFIndexParam()),
(False, True, DiskAnnIndexParam()),
(False, True, FlatIndexParam()), # ——ok
(
True,
Expand Down Expand Up @@ -371,6 +388,19 @@ class TestRecall:
use_soar=False,
),
),
(
True,
True,
DiskAnnIndexParam(
metric_type=MetricType.IP,
max_degree=32,
),
),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.L2, max_degree=32),
),
],
indirect=True,
)
Expand All @@ -388,10 +418,16 @@ def test_recall_with_single_vector_valid_500(
):
full_schema_params = request.getfixturevalue("full_schema_new")

# Build per-field metric type map so ground truth uses each field's
# actual index metric (fields may fall back to HnswIndexParam/IP).
field_metric_map = {}
for vector_para in full_schema_params.vectors:
if vector_para.name == "vector_fp32_field":
metric_type = vector_para.index_param.metric_type
break
if vector_para.index_param is not None:
field_metric_map[vector_para.name] = vector_para.index_param.metric_type
else:
field_metric_map[vector_para.name] = MetricType.IP

metric_type = field_metric_map.get("vector_fp32_field", MetricType.IP)

multiple_docs = [
generate_doc_recall(i, full_collection_new.schema) for i in range(doc_num)
Expand Down Expand Up @@ -438,9 +474,13 @@ def test_recall_with_single_vector_valid_500(
multiple_docs[i].vectors[field_name] for i in range(query_num)
]

# Get ground truth mapping
# Get ground truth mapping (pass per-field metric map)
ground_truth_map = get_ground_truth_map(
full_collection_new, multiple_docs, query_vectors_map, metric_type, top_k
full_collection_new,
multiple_docs,
query_vectors_map,
field_metric_map,
top_k,
)

# Validate ground truth mapping structure
Expand Down Expand Up @@ -479,8 +519,8 @@ def test_recall_with_single_vector_valid_500(

print("(recall_at_k_stats:\n")
print(recall_at_k_stats)
print("metric_type:")
print(metric_type)
print("field_metric_map:")
print(field_metric_map)
# Print Recall@K statistics
print(f"Recall@{top_k} using Ground Truth:")
for field_name, stats in recall_at_k_stats.items():
Expand Down Expand Up @@ -552,7 +592,21 @@ def test_recall_with_single_vector_valid_500(
use_soar=True,
),
),
# (True, True, IVFIndexParam(metric_type=MetricType.COSINE, n_list=150, n_iters=15, use_soar=False, )),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.IP, max_degree=32),
),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.L2, max_degree=32),
),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.COSINE, max_degree=32),
),
],
indirect=True,
)
Expand All @@ -571,10 +625,16 @@ def test_recall_with_single_vector_valid_2000(
):
full_schema_params = request.getfixturevalue("full_schema_new")

# Build per-field metric type map so ground truth uses each field's
# actual index metric (fields may fall back to HnswIndexParam/IP).
field_metric_map = {}
for vector_para in full_schema_params.vectors:
if vector_para.name == "vector_fp32_field":
metric_type = vector_para.index_param.metric_type
break
if vector_para.index_param is not None:
field_metric_map[vector_para.name] = vector_para.index_param.metric_type
else:
field_metric_map[vector_para.name] = MetricType.IP

metric_type = field_metric_map.get("vector_fp32_field", MetricType.IP)

multiple_docs = [
generate_doc_recall(i, full_collection_new.schema) for i in range(doc_num)
Expand Down Expand Up @@ -621,9 +681,13 @@ def test_recall_with_single_vector_valid_2000(
multiple_docs[i].vectors[field_name] for i in range(query_num)
]

# Get ground truth mapping
# Get ground truth mapping (pass per-field metric map)
ground_truth_map = get_ground_truth_map(
full_collection_new, multiple_docs, query_vectors_map, metric_type, top_k
full_collection_new,
multiple_docs,
query_vectors_map,
field_metric_map,
top_k,
)

# Validate ground truth mapping structure
Expand Down Expand Up @@ -662,8 +726,8 @@ def test_recall_with_single_vector_valid_2000(

print("(recall_at_k_stats:\n")
print(recall_at_k_stats)
print("metric_type:")
print(metric_type)
print("field_metric_map:")
print(field_metric_map)
# Print Recall@K statistics
print(f"Recall@{top_k} using Ground Truth:")
for field_name, stats in recall_at_k_stats.items():
Expand Down
4 changes: 4 additions & 0 deletions python/zvec/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
AddColumnOption,
AlterColumnOption,
CollectionOption,
DiskAnnIndexParam,
DiskAnnQueryParam,
FlatIndexParam,
HnswIndexParam,
HnswQueryParam,
Expand Down Expand Up @@ -116,6 +118,8 @@
"HnswRabitqIndexParam",
"FlatIndexParam",
"IVFIndexParam",
"DiskAnnIndexParam",
"DiskAnnQueryParam",
"CollectionOption",
"IndexOption",
"OptimizeOption",
Expand Down
4 changes: 4 additions & 0 deletions python/zvec/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ from .model.param import (
AddColumnOption,
AlterColumnOption,
CollectionOption,
DiskAnnIndexParam,
DiskAnnQueryParam,
FlatIndexParam,
HnswIndexParam,
HnswQueryParam,
Expand Down Expand Up @@ -53,6 +55,8 @@ __all__: list = [
"DataType",
"DenseEmbeddingFunction",
"DenseEmbeddingFunction",
"DiskAnnIndexParam",
"DiskAnnQueryParam",
"Doc",
"FieldSchema",
"FlatIndexParam",
Expand Down
4 changes: 4 additions & 0 deletions python/zvec/model/param/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
AddColumnOption,
AlterColumnOption,
CollectionOption,
DiskAnnIndexParam,
DiskAnnQueryParam,
FlatIndexParam,
HnswIndexParam,
HnswQueryParam,
Expand All @@ -35,6 +37,8 @@
"AddColumnOption",
"AlterColumnOption",
"CollectionOption",
"DiskAnnIndexParam",
"DiskAnnQueryParam",
"FlatIndexParam",
"HnswIndexParam",
"HnswQueryParam",
Expand Down
3 changes: 2 additions & 1 deletion src/ailego/algorithm/kmeans.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class Kmc2CentroidsGenerator {
auto *centroids = owner->mutable_centroids();

std::mt19937 mt((std::random_device())());

std::uniform_real_distribution<float> dist(0.0, 1.0);

ContainerType benches(cache.dimension());
Expand Down Expand Up @@ -1216,4 +1217,4 @@ using NibbleInnerProductKmeans =
LloydCluster<T, TPool, TContext, NibbleVectorArray<T>>;

} // namespace ailego
} // namespace zvec
} // namespace zvec
2 changes: 1 addition & 1 deletion src/ailego/algorithm/lloyd_cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -361,4 +361,4 @@ class LloydCluster {
};

} // namespace ailego
} // namespace zvec
} // namespace zvec
5 changes: 5 additions & 0 deletions src/binding/c/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ set_target_properties(zvec_c_api PROPERTIES
VISIBILITY_INLINES_HIDDEN ON
)

# On Windows, define ZVEC_BUILD_SHARED so that c_api.h uses __declspec(dllexport)
if(MSVC OR WIN32)
target_compile_definitions(zvec_c_api PRIVATE ZVEC_BUILD_SHARED)
endif()

find_package(Threads REQUIRED)

# Static linking of C++ standard library is handled in platform-specific sections
Expand Down
Loading