Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
e38efb9
feat: add diskann index
richyreachy Apr 22, 2026
43c9db5
fix: add libaio dependency
richyreachy Apr 23, 2026
043456d
Merge branch 'main' into feat/diskann_index
richyreachy Apr 23, 2026
dae465c
fix: fix initialization
richyreachy Apr 23, 2026
1202e42
Merge branch 'feat/diskann_index' of github.com:richyreachy/zvec into…
richyreachy Apr 23, 2026
d9503d3
Merge branch 'main' into feat/diskann_index
richyreachy Apr 23, 2026
c24cd82
refactor: fix ut
richyreachy Apr 24, 2026
87a1b8c
Merge branch 'feat/diskann_index' of github.com:richyreachy/zvec into…
richyreachy Apr 24, 2026
4e668da
fix: fix seed value
richyreachy Apr 25, 2026
fe60376
fix: fix seed value
richyreachy Apr 26, 2026
dc3d0a8
fix: update ut
richyreachy Apr 27, 2026
071d275
Merge branch 'main' into feat/diskann_index
richyreachy Apr 27, 2026
78eea04
fix: fix ut
richyreachy Apr 27, 2026
41f893d
Merge branch 'main' into feat/diskann_index
richyreachy Apr 28, 2026
4cc1ecb
fix: fix according to pr comments
richyreachy Apr 28, 2026
35192e7
Merge branch 'feat/diskann_index' of github.com:richyreachy/zvec into…
richyreachy Apr 28, 2026
53f9ef9
fix: fix on comments
richyreachy Apr 29, 2026
f3ee4ca
fix: fix yaml
richyreachy Apr 29, 2026
9ac2c3f
Merge branch 'main' into feat/diskann_index
richyreachy Apr 29, 2026
f78ff5d
fix: fix ut
richyreachy Apr 29, 2026
9d0a63b
feat: merge with main
richyreachy May 7, 2026
1e23806
fix: fix buffer storage
richyreachy May 7, 2026
12ae920
Merge branch 'main' into feat/diskann_index
richyreachy May 7, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .github/workflows/03-macos-linux-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,14 @@ jobs:
sudo apt-get install -y clang libomp-dev
shell: bash

- name: Install AIO
if: runner.os == 'Linux' && runner.arch == 'X64'
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
libaio-dev
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

如果用户的环境没有装libaio-dev,会发生什么?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

现在默认使用是需要安装libaio,可以通过配置的方式进行区分,千问的建议是通过linux安装包的方式安装libaio库:

Installation

zvec requires the libaio system library on linux platform.

On Ubuntu/Debian:

sudo apt-get install libaio1 libaio-dev
pip install zvec

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

如果没有安装,会发生什么?这里预期的行为应该是 如果用户不安装aio,不影响除diskann的其他功能使用

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

在新的PR里做了调整:#378

shell: bash

- name: Print CPU info
if: runner.os == 'Linux'
run: lscpu
Expand Down Expand Up @@ -89,7 +97,7 @@ jobs:
pytest \
scikit-build-core \
setuptools_scm
shell: bash
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

把bash加回去吧,统一一点,并且如果后续这里是多行命令,在非bash为默认shell的环境下可能会出问题

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

shell: bash

- name: Build from source
run: |
Expand Down
8 changes: 8 additions & 0 deletions .github/workflows/clang_tidy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ jobs:
sudo apt-get update
sudo apt-get install -y clang-tidy=1:18.0-59~exp2 cmake ninja-build libomp-dev

- name: Install AIO
if: runner.os == 'Linux' && runner.arch == 'X64'
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
libaio-dev
shell: bash

- name: Configure CMake and export compile commands
run: |
cmake -S . -B build -G Ninja \
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,6 @@
[submodule "thirdparty/RaBitQ-Library/RaBitQ-Library-0.1"]
path = thirdparty/RaBitQ-Library/RaBitQ-Library-0.1
url = https://github.com/VectorDB-NTU/RaBitQ-Library.git
[submodule "thirdparty/aio/libaio-0.3"]
path = thirdparty/aio/libaio-0.3
url = https://github.com/yugabyte/libaio.git
11 changes: 11 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,17 @@ else()
endif()
message(STATUS "RABITQ_ARCH_FLAG: ${RABITQ_ARCH_FLAG}")

# DiskAnn support (Linux x86_64 only, requires libaio)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i686|i386" AND NOT ANDROID AND NOT IOS)
set(DISKANN_SUPPORTED ON)
add_definitions(-DDISKANN_SUPPORTED=1)
else()
set(DISKANN_SUPPORTED OFF)
add_definitions(-DDISKANN_SUPPORTED=0)
message(STATUS "DiskAnn support disabled - only supported on Linux x86_64")
endif()
message(STATUS "DISKANN_SUPPORTED: ${DISKANN_SUPPORTED}")

option(USE_OSS_MIRROR "Use OSS mirror for faster third-party downloads" OFF)
if(DEFINED ENV{USE_OSS_MIRROR} AND NOT "$ENV{USE_OSS_MIRROR}" STREQUAL "")
set(USE_OSS_MIRROR "$ENV{USE_OSS_MIRROR}" CACHE BOOL "Use OSS mirror for faster third-party downloads" FORCE)
Expand Down
14 changes: 14 additions & 0 deletions python/tests/detail/fixture_helper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
import pytest
import logging
import platform

DISKANN_SUPPORTED = platform.system() == "Linux" and platform.machine() in (
"x86_64",
"AMD64",
"i686",
"i386",
)

from typing import Any, Generator
from zvec.typing import DataType, StatusCode, MetricType, QuantizeType
Expand Down Expand Up @@ -97,6 +105,12 @@ def full_schema_new(request) -> CollectionSchema:
else:
nullable, has_index, vector_index = True, False, HnswIndexParam()

# Skip DiskAnn tests on unsupported platforms
from zvec.model.param import DiskAnnIndexParam

if isinstance(vector_index, DiskAnnIndexParam) and not DISKANN_SUPPORTED:
pytest.skip("DiskAnn only supported on Linux x86_64")

scalar_index_param = None
vector_index_param = None
if has_index:
Expand Down
96 changes: 80 additions & 16 deletions python/tests/detail/test_collection_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@
HnswIndexParam,
FlatIndexParam,
IVFIndexParam,
DiskAnnIndexParam,
HnswQueryParam,
IVFQueryParam,
DiskAnnQueryParam,
)

from zvec.model.schema import FieldSchema, VectorSchema
Expand Down Expand Up @@ -179,10 +181,24 @@ def get_ground_truth_map(collection, test_docs, query_vectors_map, metric_type,
for field_name, query_vectors in query_vectors_map.items():
ground_truth_map[field_name] = {}

# Support per-field metric type: metric_type can be a dict mapping
# field_name -> MetricType, or a single MetricType applied to all fields.
if isinstance(metric_type, dict):
field_metric = metric_type.get(field_name, MetricType.IP)
else:
field_metric = metric_type

for i, query_vector in enumerate(query_vectors):
# Get the ground truth for this query
relevant_doc_ids_scores = get_ground_truth_for_vector_query(
collection, query_vector, field_name, test_docs, i, metric_type, k, True
collection,
query_vector,
field_name,
test_docs,
i,
field_metric,
k,
True,
)
ground_truth_map[field_name][i] = relevant_doc_ids_scores

Expand Down Expand Up @@ -292,6 +308,7 @@ class TestRecall:
[
(True, True, HnswIndexParam()),
(False, True, IVFIndexParam()),
(False, True, DiskAnnIndexParam()),
(False, True, FlatIndexParam()), # ——ok
(
True,
Expand Down Expand Up @@ -371,6 +388,19 @@ class TestRecall:
use_soar=False,
),
),
(
True,
True,
DiskAnnIndexParam(
metric_type=MetricType.IP,
max_degree=32,
),
),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.L2, max_degree=32),
),
],
indirect=True,
)
Expand All @@ -388,10 +418,16 @@ def test_recall_with_single_vector_valid_500(
):
full_schema_params = request.getfixturevalue("full_schema_new")

# Build per-field metric type map so ground truth uses each field's
# actual index metric (fields may fall back to HnswIndexParam/IP).
field_metric_map = {}
for vector_para in full_schema_params.vectors:
if vector_para.name == "vector_fp32_field":
metric_type = vector_para.index_param.metric_type
break
if vector_para.index_param is not None:
field_metric_map[vector_para.name] = vector_para.index_param.metric_type
else:
field_metric_map[vector_para.name] = MetricType.IP

metric_type = field_metric_map.get("vector_fp32_field", MetricType.IP)

multiple_docs = [
generate_doc_recall(i, full_collection_new.schema) for i in range(doc_num)
Expand Down Expand Up @@ -438,9 +474,13 @@ def test_recall_with_single_vector_valid_500(
multiple_docs[i].vectors[field_name] for i in range(query_num)
]

# Get ground truth mapping
# Get ground truth mapping (pass per-field metric map)
ground_truth_map = get_ground_truth_map(
full_collection_new, multiple_docs, query_vectors_map, metric_type, top_k
full_collection_new,
multiple_docs,
query_vectors_map,
field_metric_map,
top_k,
)

# Validate ground truth mapping structure
Expand Down Expand Up @@ -479,8 +519,8 @@ def test_recall_with_single_vector_valid_500(

print("(recall_at_k_stats:\n")
print(recall_at_k_stats)
print("metric_type:")
print(metric_type)
print("field_metric_map:")
print(field_metric_map)
# Print Recall@K statistics
print(f"Recall@{top_k} using Ground Truth:")
for field_name, stats in recall_at_k_stats.items():
Expand Down Expand Up @@ -552,7 +592,21 @@ def test_recall_with_single_vector_valid_500(
use_soar=True,
),
),
# (True, True, IVFIndexParam(metric_type=MetricType.COSINE, n_list=150, n_iters=15, use_soar=False, )),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.IP, max_degree=32),
),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.L2, max_degree=32),
),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.COSINE, max_degree=32),
),
],
indirect=True,
)
Expand All @@ -571,10 +625,16 @@ def test_recall_with_single_vector_valid_2000(
):
full_schema_params = request.getfixturevalue("full_schema_new")

# Build per-field metric type map so ground truth uses each field's
# actual index metric (fields may fall back to HnswIndexParam/IP).
field_metric_map = {}
for vector_para in full_schema_params.vectors:
if vector_para.name == "vector_fp32_field":
metric_type = vector_para.index_param.metric_type
break
if vector_para.index_param is not None:
field_metric_map[vector_para.name] = vector_para.index_param.metric_type
else:
field_metric_map[vector_para.name] = MetricType.IP

metric_type = field_metric_map.get("vector_fp32_field", MetricType.IP)

multiple_docs = [
generate_doc_recall(i, full_collection_new.schema) for i in range(doc_num)
Expand Down Expand Up @@ -621,9 +681,13 @@ def test_recall_with_single_vector_valid_2000(
multiple_docs[i].vectors[field_name] for i in range(query_num)
]

# Get ground truth mapping
# Get ground truth mapping (pass per-field metric map)
ground_truth_map = get_ground_truth_map(
full_collection_new, multiple_docs, query_vectors_map, metric_type, top_k
full_collection_new,
multiple_docs,
query_vectors_map,
field_metric_map,
top_k,
)

# Validate ground truth mapping structure
Expand Down Expand Up @@ -662,8 +726,8 @@ def test_recall_with_single_vector_valid_2000(

print("(recall_at_k_stats:\n")
print(recall_at_k_stats)
print("metric_type:")
print(metric_type)
print("field_metric_map:")
print(field_metric_map)
# Print Recall@K statistics
print(f"Recall@{top_k} using Ground Truth:")
for field_name, stats in recall_at_k_stats.items():
Expand Down
4 changes: 4 additions & 0 deletions python/zvec/model/param/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
AddColumnOption,
AlterColumnOption,
CollectionOption,
DiskAnnIndexParam,
DiskAnnQueryParam,
FlatIndexParam,
HnswIndexParam,
HnswQueryParam,
Expand All @@ -35,6 +37,8 @@
"AddColumnOption",
"AlterColumnOption",
"CollectionOption",
"DiskAnnIndexParam",
"DiskAnnQueryParam",
"FlatIndexParam",
"HnswIndexParam",
"HnswQueryParam",
Expand Down
3 changes: 2 additions & 1 deletion src/ailego/algorithm/kmeans.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class Kmc2CentroidsGenerator {
auto *centroids = owner->mutable_centroids();

std::mt19937 mt((std::random_device())());

std::uniform_real_distribution<float> dist(0.0, 1.0);

ContainerType benches(cache.dimension());
Expand Down Expand Up @@ -1216,4 +1217,4 @@ using NibbleInnerProductKmeans =
LloydCluster<T, TPool, TContext, NibbleVectorArray<T>>;

} // namespace ailego
} // namespace zvec
} // namespace zvec
2 changes: 1 addition & 1 deletion src/ailego/algorithm/lloyd_cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -361,4 +361,4 @@ class LloydCluster {
};

} // namespace ailego
} // namespace zvec
} // namespace zvec
5 changes: 5 additions & 0 deletions src/binding/c/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ set_target_properties(zvec_c_api PROPERTIES
VISIBILITY_INLINES_HIDDEN ON
)

# On Windows, define ZVEC_BUILD_SHARED so that c_api.h uses __declspec(dllexport)
if(MSVC OR WIN32)
target_compile_definitions(zvec_c_api PRIVATE ZVEC_BUILD_SHARED)
endif()

find_package(Threads REQUIRED)

# Static linking of C++ standard library is handled in platform-specific sections
Expand Down
65 changes: 45 additions & 20 deletions src/binding/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,51 @@ set(SRC_LISTS
pybind11_add_module(_zvec ${SRC_LISTS})

if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
target_link_libraries(_zvec PRIVATE
-Wl,--whole-archive
$<TARGET_FILE:core_knn_flat_static>
$<TARGET_FILE:core_knn_flat_sparse_static>
$<TARGET_FILE:core_knn_hnsw_static>
$<TARGET_FILE:core_knn_hnsw_rabitq_static>
$<TARGET_FILE:core_knn_hnsw_sparse_static>
$<TARGET_FILE:core_knn_ivf_static>
$<TARGET_FILE:core_knn_vamana_static>
$<TARGET_FILE:core_knn_cluster_static>
$<TARGET_FILE:core_mix_reducer_static>
$<TARGET_FILE:core_metric_static>
$<TARGET_FILE:core_utility_static>
$<TARGET_FILE:core_quantizer_static>
-Wl,--no-whole-archive
zvec_db
)
target_link_options(_zvec PRIVATE
"LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports.map"
)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|arm")
target_link_libraries(_zvec PRIVATE
-Wl,--whole-archive
$<TARGET_FILE:core_knn_flat_static>
$<TARGET_FILE:core_knn_flat_sparse_static>
$<TARGET_FILE:core_knn_hnsw_static>
$<TARGET_FILE:core_knn_hnsw_rabitq_static>
$<TARGET_FILE:core_knn_hnsw_sparse_static>
$<TARGET_FILE:core_knn_ivf_static>
$<TARGET_FILE:core_knn_vamana_static>
$<TARGET_FILE:core_knn_cluster_static>
$<TARGET_FILE:core_mix_reducer_static>
$<TARGET_FILE:core_metric_static>
$<TARGET_FILE:core_utility_static>
$<TARGET_FILE:core_quantizer_static>
-Wl,--no-whole-archive
zvec_db
)
target_link_options(_zvec PRIVATE
"LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports.map"
)
else ()
target_link_libraries(_zvec PRIVATE
-Wl,--whole-archive
$<TARGET_FILE:core_knn_flat_static>
$<TARGET_FILE:core_knn_flat_sparse_static>
$<TARGET_FILE:core_knn_hnsw_static>
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

代码有重复,可以调整一下

$<TARGET_FILE:core_knn_hnsw_rabitq_static>
$<TARGET_FILE:core_knn_hnsw_sparse_static>
$<TARGET_FILE:core_knn_ivf_static>
$<TARGET_FILE:core_knn_vamana_static>
$<TARGET_FILE:core_knn_diskann_static>
$<TARGET_FILE:core_knn_cluster_static>
$<TARGET_FILE:core_mix_reducer_static>
$<TARGET_FILE:core_metric_static>
$<TARGET_FILE:core_utility_static>
$<TARGET_FILE:core_quantizer_static>
-Wl,--no-whole-archive
zvec_db
aio
)
target_link_options(_zvec PRIVATE
"LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports.map"
)
endif()
elseif (APPLE)
target_link_libraries(_zvec PRIVATE
-Wl,-force_load,$<TARGET_FILE:core_knn_flat_static>
Expand Down
Loading
Loading