Skip to content

IVF Index Support in SVS #156

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ usr/
wheelhouse/

# Bundled test data
/data/
/data/temp

# Misc tool related files
*.swp
Expand Down
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ include("cmake/fmt.cmake")
include("cmake/spdlog.cmake")
include("cmake/toml.cmake")

# IVF requires Intel(R) MKL support
if(SVS_EXPERIMENTAL_BUILD_IVF)
include("cmake/mkl.cmake")
target_compile_options(${SVS_LIB} INTERFACE "-DSVS_HAVE_MKL=1")
endif()

add_library(svs_x86_options_base INTERFACE)
add_library(svs::x86_options_base ALIAS svs_x86_options_base)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
Expand Down
2 changes: 1 addition & 1 deletion THIRD-PARTY-PROGRAMS
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing
external contributions to this project including patches, pull requests, etc.

--------------------------------------------------------------------------------
7. MKL (cmake/mkl.cmake, https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html)
7. Intel(R) MKL (cmake/mkl.cmake, https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html)

Copyright (c) Intel Corporation, All rights reserved.

Expand Down
11 changes: 11 additions & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,17 @@ set(SHARED_LIBRARY_FILES
src/inverted/memory/executables/memory_test.cpp
)

# ivf
if (SVS_EXPERIMENTAL_BUILD_IVF)
list(APPEND SHARED_LIBRARY_FILES
src/ivf/uncompressed.cpp
src/ivf/search.cpp
src/ivf/build.cpp
src/ivf/test.cpp
)
endif()


add_library(svs_benchmark_library SHARED ${SHARED_LIBRARY_FILES})
target_include_directories(svs_benchmark_library PUBLIC ${CMAKE_CURRENT_LIST_DIR}/include)

Expand Down
280 changes: 280 additions & 0 deletions benchmark/include/svs-benchmark/ivf/build.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
/*
* Copyright 2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

// svs-benchmark
#include "svs-benchmark/benchmark.h"
#include "svs-benchmark/build.h"
#include "svs-benchmark/datasets.h"
#include "svs-benchmark/index_traits.h"
#include "svs-benchmark/ivf/search.h"
#include "svs-benchmark/search.h"

// svs
#include "svs/orchestrators/ivf.h"

// stl
#include <filesystem>
#include <memory>
#include <optional>
#include <span>
#include <string_view>
#include <vector>

namespace svsbenchmark::ivf {

struct StaticBenchmark {};

// Forward declarations
struct BuildJob;

template <typename T> struct AssociatedJob;

template <> struct AssociatedJob<StaticBenchmark> {
using type = BuildJob;
};

template <typename T> using associated_job_t = typename AssociatedJob<T>::type;

// Job names
inline constexpr std::string_view benchmark_name(StaticBenchmark) {
return "ivf_static_build";
}

// Entry-point for registering the static index building executable.
std::unique_ptr<Benchmark> static_workflow();

// Shared struct between the static and dynamic paths.
struct BuildJobBase {
public:
// A descriptive name for this workload.
std::string description_;

// The dataset to load
Dataset dataset_;

// Paths
std::filesystem::path data_;
std::filesystem::path queries_;

// The number of queries (taken form queries) to use in the training set.
size_t queries_in_training_set_;

// Dataset Parameters
svs::DataType data_type_;
svs::DataType query_type_;
svs::DistanceType distance_;
Extent ndims_;

// Build Parameters
svs::index::ivf::IVFBuildParameters build_parameters_;
size_t num_threads_;

public:
///// Contructor
BuildJobBase(
std::string_view description,
svsbenchmark::Dataset dataset,
std::filesystem::path data,
std::filesystem::path queries,
size_t queries_in_training_set,
svs::DataType data_type,
svs::DataType query_type,
svs::DistanceType distance,
size_t ndims,
const svs::index::ivf::IVFBuildParameters& build_parameters,
size_t num_threads
)
: description_{description}
, dataset_{dataset}
, data_{std::move(data)}
, queries_{std::move(queries)}
, queries_in_training_set_{queries_in_training_set}
, data_type_{data_type}
, query_type_{query_type}
, distance_{distance}
, ndims_{ndims}
, build_parameters_{build_parameters}
, num_threads_{num_threads} {}

// Compatibility with `ExpectedResults`.
const svs::index::ivf::IVFBuildParameters& get_build_parameters() const {
return build_parameters_;
}
svs::DistanceType get_distance() const { return distance_; }

// Return an example BuildJob that can be used to generate sample config files.
static BuildJobBase example() {
return BuildJobBase(
"example index build",
Dataset::example(),
"data.fvecs",
"queries.fvecs",
5000,
svs::DataType::float32,
svs::DataType::float32,
svs::DistanceType::L2,
svs::Dynamic,
svs::index::ivf::IVFBuildParameters(128, 10000, 10, false, 0.1),
8
);
}

svs::lib::SaveTable
to_toml(std::string_view schema, const svs::lib::Version& version) const {
return svs::lib::SaveTable(
schema,
version,
{SVS_LIST_SAVE_(description),
SVS_LIST_SAVE_(dataset),
SVS_LIST_SAVE_(data),
SVS_LIST_SAVE_(queries),
SVS_LIST_SAVE_(queries_in_training_set),
SVS_LIST_SAVE_(data_type),
SVS_LIST_SAVE_(query_type),
SVS_LIST_SAVE_(distance),
SVS_LIST_SAVE_(ndims),
SVS_LIST_SAVE_(build_parameters),
SVS_LIST_SAVE_(num_threads)}
);
}

static BuildJobBase from_toml(
const svs::lib::ContextFreeLoadTable& table,
const std::optional<std::filesystem::path>& root
) {
namespace lib = svs::lib;
return BuildJobBase(
SVS_LOAD_MEMBER_AT_(table, description),
SVS_LOAD_MEMBER_AT_(table, dataset, root),
svsbenchmark::extract_filename(table, "data", root),
svsbenchmark::extract_filename(table, "queries", root),
SVS_LOAD_MEMBER_AT_(table, queries_in_training_set),
SVS_LOAD_MEMBER_AT_(table, data_type),
SVS_LOAD_MEMBER_AT_(table, query_type),
SVS_LOAD_MEMBER_AT_(table, distance),
SVS_LOAD_MEMBER_AT_(table, ndims),
SVS_LOAD_MEMBER_AT_(table, build_parameters),
SVS_LOAD_MEMBER_AT_(table, num_threads)
);
}
};

// Parsed setup for a static index build job.
struct BuildJob : public BuildJobBase {
public:
// Paths
std::filesystem::path groundtruth_;
// Preset search parameters
std::vector<svs::index::ivf::IVFSearchParameters> preset_parameters_;
// Post-build validation parameters.
svsbenchmark::search::SearchParameters search_parameters_;
// Directory to save the built index.
// An empty optional implies no saving.
std::optional<std::filesystem::path> save_directory_;

public:
template <typename... Args>
BuildJob(
std::filesystem::path groundtruth,
std::vector<svs::index::ivf::IVFSearchParameters> preset_parameters,
svsbenchmark::search::SearchParameters search_parameters,
std::optional<std::filesystem::path> save_directory,
Args&&... args
)
: BuildJobBase(std::forward<Args>(args)...)
, groundtruth_{std::move(groundtruth)}
, preset_parameters_{std::move(preset_parameters)}
, search_parameters_{std::move(search_parameters)}
, save_directory_{std::move(save_directory)} {}

// Return an example BuildJob that can be used to generate sample config files.
static BuildJob example() {
return BuildJob(
"groundtruth.ivecs", // groundtruth
{{10, 1.0}, {10, 4.0}, {50, 1.0}}, // preset_parameters
svsbenchmark::search::SearchParameters::example(), // search_parameters
std::nullopt, // save_directory
BuildJobBase::example() // base-class
);
}

// Compatibility with abstract search-space.
std::vector<svs::index::ivf::IVFSearchParameters> get_search_configs() const {
return preset_parameters_;
}
const svsbenchmark::search::SearchParameters& get_search_parameters() const {
return search_parameters_;
}

template <typename F>
auto invoke(F&& f, const Checkpoint& SVS_UNUSED(checkpoint)) const {
return f(dataset_, query_type_, data_type_, distance_, ndims_, *this);
}

// Save the index if the `save_directory` field is non-empty.
template <typename Index> void maybe_save_index(Index& index) const {
if (!save_directory_) {
return;
}
const auto& root = save_directory_.value();
svs::lib::save_to_disk(index, root / "clustering");
}

static constexpr svs::lib::Version save_version = svs::lib::Version(0, 0, 0);
static constexpr std::string_view serialization_schema = "benchmark_ivf_build_job";

// Save the BuildJob to a TOML table.
svs::lib::SaveTable save() const {
// Get a base table.
auto table = BuildJobBase::to_toml(serialization_schema, save_version);

// Append the extra information needed by the static BuildJob.
SVS_INSERT_SAVE_(table, groundtruth);
SVS_INSERT_SAVE_(table, preset_parameters);
SVS_INSERT_SAVE_(table, search_parameters);
table.insert("save_directory", svs::lib::save(save_directory_.value_or("")));
return table;
}

// Load a BuildJob from a TOML table.
static BuildJob load(
const svs::lib::ContextFreeLoadTable& table,
const std::optional<std::filesystem::path>& root,
svsbenchmark::SaveDirectoryChecker& checker
) {
return BuildJob(
svsbenchmark::extract_filename(table, "groundtruth", root),
SVS_LOAD_MEMBER_AT_(table, preset_parameters),
SVS_LOAD_MEMBER_AT_(table, search_parameters),
checker.extract(table.unwrap(), "save_directory"),
BuildJobBase::from_toml(table, root)
);
}
};

// Dispatchers
using StaticBuildDispatcher = svs::lib::Dispatcher<
toml::table,
svsbenchmark::Dataset,
svs::DataType,
svs::DataType,
svs::DistanceType,
Extent,
const BuildJob&>;

} // namespace svsbenchmark::ivf
39 changes: 39 additions & 0 deletions benchmark/include/svs-benchmark/ivf/common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright 2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// svs-benchmark
#include "svs-benchmark/benchmark.h"

// svs
#include "svs/core/distance.h"
#include "svs/index/ivf/common.h"

// stl
#include <initializer_list>

namespace svsbenchmark::ivf {

// Test Routines
SVS_BENCHMARK_FOR_TESTS_ONLY inline search::SearchParameters test_search_parameters() {
return search::SearchParameters{10, {0.5, 0.8, 0.9}};
}

SVS_BENCHMARK_FOR_TESTS_ONLY inline std::vector<svs::index::ivf::IVFSearchParameters>
test_search_configs() {
return std::vector<svs::index::ivf::IVFSearchParameters>({{{10, 1.0}, {50, 1.0}}});
}

} // namespace svsbenchmark::ivf
Loading
Loading