Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 47 additions & 3 deletions src/VecSim/algorithms/hnsw/hnsw.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,14 @@ class HNSWIndex : public VecSimIndexAbstract<DataType, DistType>,
mutable VisitedNodesHandlerPool visitedNodesHandlerPool;
mutable std::shared_mutex indexDataGuard;

public:
mutable std::atomic_size_t num_searches;
mutable std::atomic_size_t num_visited_nodes;
mutable std::atomic_size_t num_visited_nodes_higher_levels;

protected:
#ifdef BUILD_TESTS
#include "VecSim/algorithms/hnsw/hnsw_base_tests_friends.h"

#include "hnsw_serializer_declarations.h"
#endif

Expand Down Expand Up @@ -531,6 +536,8 @@ void HNSWIndex<DataType, DistType>::processCandidate(
ElementLevelData &node_level = getElementLevelData(cur_element, layer);
linkListSize num_links = node_level.getNumLinks();
if (num_links > 0) {
// Increment visited nodes counter - we're visiting this node's neighbors
num_visited_nodes.fetch_add(1, std::memory_order_relaxed);

const char *cur_data, *next_data;
// Pre-fetch first candidate tag address.
Expand Down Expand Up @@ -616,6 +623,8 @@ void HNSWIndex<DataType, DistType>::processCandidate_RangeSearch(
linkListSize num_links = node_level.getNumLinks();

if (num_links > 0) {
// Increment visited nodes counter - we're visiting this node's neighbors
num_visited_nodes.fetch_add(1, std::memory_order_relaxed);

const char *cur_data, *next_data;
// Pre-fetch first candidate tag address.
Expand Down Expand Up @@ -1209,6 +1218,7 @@ void HNSWIndex<DataType, DistType>::greedySearchLevel(const void *vector_data, s
// Don't allow choosing a deleted node as an entry point upon searching for neighbors
// candidates (that is, we're NOT running a query, but inserting a new vector).
idType bestNonDeletedCand = bestCand;
size_t visited_count = 0;

do {
if (running_query && VECSIM_TIMEOUT(timeoutCtx)) {
Expand All @@ -1228,6 +1238,9 @@ void HNSWIndex<DataType, DistType>::greedySearchLevel(const void *vector_data, s
if (isInProcess(candidate)) {
continue;
}
if (running_query) {
visited_count++;
}
DistType d = this->calcDistance(vector_data, getDataByInternalId(candidate));
if (d < curDist) {
curDist = d;
Expand All @@ -1245,6 +1258,9 @@ void HNSWIndex<DataType, DistType>::greedySearchLevel(const void *vector_data, s
} while (changed);
if (!running_query) {
bestCand = bestNonDeletedCand;
} else {
// Update the counter for higher level visited nodes
num_visited_nodes_higher_levels.fetch_add(visited_count, std::memory_order_relaxed);
}
}

Expand Down Expand Up @@ -1617,7 +1633,8 @@ HNSWIndex<DataType, DistType>::HNSWIndex(const HNSWParams *params,
size_t random_seed)
: VecSimIndexAbstract<DataType, DistType>(abstractInitParams, components),
VecSimIndexTombstone(), maxElements(0), graphDataBlocks(this->allocator),
idToMetaData(this->allocator), visitedNodesHandlerPool(0, this->allocator) {
idToMetaData(this->allocator), visitedNodesHandlerPool(0, this->allocator), num_searches(0),
num_visited_nodes(0), num_visited_nodes_higher_levels(0) {

M = params->M ? params->M : HNSW_DEFAULT_M;
M0 = M * 2;
Expand Down Expand Up @@ -1963,6 +1980,9 @@ VecSimQueryReply *HNSWIndex<DataType, DistType>::topKQuery(const void *query_dat
return rep;
}

// Increment search counter
num_searches.fetch_add(1, std::memory_order_relaxed);

auto processed_query_ptr = this->preprocessQuery(query_data);
const void *processed_query = processed_query_ptr.get();
void *timeoutCtx = nullptr;
Expand Down Expand Up @@ -2077,6 +2097,10 @@ VecSimQueryReply *HNSWIndex<DataType, DistType>::rangeQuery(const void *query_da
if (curElementCount == 0) {
return rep;
}

// Increment search counter
num_searches.fetch_add(1, std::memory_order_relaxed);

auto processed_query_ptr = this->preprocessQuery(query_data);
const void *processed_query = processed_query_ptr.get();
void *timeoutCtx = nullptr;
Expand Down Expand Up @@ -2120,6 +2144,10 @@ VecSimIndexDebugInfo HNSWIndex<DataType, DistType>::debugInfo() const {
info.hnswInfo.entrypoint = ep_id != INVALID_ID ? getExternalLabel(ep_id) : INVALID_LABEL;
info.hnswInfo.visitedNodesPoolSize = this->visitedNodesHandlerPool.getPoolSize();
info.hnswInfo.numberOfMarkedDeletedNodes = this->getNumMarkedDeleted();
info.hnswInfo.num_searches = this->num_searches.load(std::memory_order_relaxed);
info.hnswInfo.num_visited_nodes = this->num_visited_nodes.load(std::memory_order_relaxed);
info.hnswInfo.num_visited_nodes_higher_levels =
this->num_visited_nodes_higher_levels.load(std::memory_order_relaxed);
return info;
}

Expand All @@ -2135,7 +2163,7 @@ template <typename DataType, typename DistType>
VecSimDebugInfoIterator *HNSWIndex<DataType, DistType>::debugInfoIterator() const {
VecSimIndexDebugInfo info = this->debugInfo();
// For readability. Update this number when needed.
size_t numberOfInfoFields = 17;
size_t numberOfInfoFields = 20;
auto *infoIterator = new VecSimDebugInfoIterator(numberOfInfoFields, this->allocator);

infoIterator->addInfoField(
Expand Down Expand Up @@ -2186,6 +2214,22 @@ VecSimDebugInfoIterator *HNSWIndex<DataType, DistType>::debugInfoIterator() cons
.fieldType = INFOFIELD_UINT64,
.fieldValue = {FieldValue{.uintegerValue = info.hnswInfo.numberOfMarkedDeletedNodes}}});

infoIterator->addInfoField(
VecSim_InfoField{.fieldName = VecSimCommonStrings::NUM_SEARCHES,
.fieldType = INFOFIELD_UINT64,
.fieldValue = {FieldValue{.uintegerValue = info.hnswInfo.num_searches}}});

infoIterator->addInfoField(VecSim_InfoField{
.fieldName = VecSimCommonStrings::NUM_VISITED_NODES,
.fieldType = INFOFIELD_UINT64,
.fieldValue = {FieldValue{.uintegerValue = info.hnswInfo.num_visited_nodes}}});

infoIterator->addInfoField(
VecSim_InfoField{.fieldName = VecSimCommonStrings::NUM_VISITED_NODES_HIGHER_LEVELS,
.fieldType = INFOFIELD_UINT64,
.fieldValue = {FieldValue{
.uintegerValue = info.hnswInfo.num_visited_nodes_higher_levels}}});

return infoIterator;
}

Expand Down
2 changes: 2 additions & 0 deletions src/VecSim/algorithms/hnsw/hnsw_batch_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ VecSimQueryReply_Code HNSW_BatchIterator<DataType, DistType>::scanGraphInternal(
this->index->lockNodeLinks(node_graph_data);
ElementLevelData &node_level_data = this->index->getElementLevelData(node_graph_data, 0);
if (node_level_data.numLinks > 0) {
// Increment visited nodes counter - we're visiting this node's neighbors
this->index->num_visited_nodes.fetch_add(1, std::memory_order_relaxed);

// Pre-fetch first candidate tag address.
__builtin_prefetch(visited_list->getElementsTags() + node_level_data.links[0]);
Expand Down
1 change: 1 addition & 0 deletions src/VecSim/algorithms/hnsw/hnsw_multi.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ HNSWIndex_Multi<DataType, DistType>::newBatchIterator(const void *queryBlob,
VecSimQueryParams *queryParams) const {
// force_copy == true.
auto queryBlobCopy = this->preprocessQuery(queryBlob, true);
this->num_searches.fetch_add(1, std::memory_order_relaxed);

// take ownership of the blob copy and pass it to the batch iterator.
auto *queryBlobCopyPtr = queryBlobCopy.release();
Expand Down
2 changes: 2 additions & 0 deletions src/VecSim/algorithms/hnsw/hnsw_serializer.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class HNSWSerializer : public Serializer {
};

explicit HNSWSerializer(EncodingVersion version = EncodingVersion::V4);
virtual ~HNSWSerializer() = default;

static EncodingVersion ReadVersion(std::ifstream &input);

Expand All @@ -35,6 +36,7 @@ class HNSWSerializer : public Serializer {

protected:
EncodingVersion m_version;
virtual void saveIndexIMP(std::ofstream &output) = 0;

private:
void saveIndexFields(std::ofstream &output) const = 0;
Expand Down
3 changes: 2 additions & 1 deletion src/VecSim/algorithms/hnsw/hnsw_serializer_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ HNSWIndex<DataType, DistType>::HNSWIndex(std::ifstream &input, const HNSWParams
HNSWSerializer::EncodingVersion version)
: VecSimIndexAbstract<DataType, DistType>(abstractInitParams, components),
HNSWSerializer(version), epsilon(params->epsilon), graphDataBlocks(this->allocator),
idToMetaData(this->allocator), visitedNodesHandlerPool(0, this->allocator) {
idToMetaData(this->allocator), visitedNodesHandlerPool(0, this->allocator), num_searches(0),
num_visited_nodes(0) {

this->restoreIndexFields(input);
this->fieldsValidation();
Expand Down
1 change: 1 addition & 0 deletions src/VecSim/algorithms/hnsw/hnsw_single.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ HNSWIndex_Single<DataType, DistType>::newBatchIterator(const void *queryBlob,
VecSimQueryParams *queryParams) const {
// force_copy == true.
auto queryBlobCopy = this->preprocessQuery(queryBlob, true);
this->num_searches.fetch_add(1, std::memory_order_relaxed);

// take ownership of the blob copy and pass it to the batch iterator.
auto *queryBlobCopyPtr = queryBlobCopy.release();
Expand Down
4 changes: 4 additions & 0 deletions src/VecSim/utils/vec_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ const char *VecSimCommonStrings::EPSILON_STRING = "EPSILON";
const char *VecSimCommonStrings::HNSW_MAX_LEVEL = "MAX_LEVEL";
const char *VecSimCommonStrings::HNSW_ENTRYPOINT = "ENTRYPOINT";
const char *VecSimCommonStrings::NUM_MARKED_DELETED = "NUMBER_OF_MARKED_DELETED";
const char *VecSimCommonStrings::NUM_SEARCHES = "NUM_SEARCHES";
const char *VecSimCommonStrings::NUM_VISITED_NODES = "NUM_VISITED_NODES";
const char *VecSimCommonStrings::NUM_VISITED_NODES_HIGHER_LEVELS =
"NUM_VISITED_NODES_HIGHER_LEVELS";

const char *VecSimCommonStrings::SVS_SEARCH_WS_STRING = "SEARCH_WINDOW_SIZE";
const char *VecSimCommonStrings::SVS_CONSTRUCTION_WS_STRING = "CONSTRUCTION_WINDOW_SIZE";
Expand Down
3 changes: 3 additions & 0 deletions src/VecSim/utils/vec_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ struct VecSimCommonStrings {
static const char *HNSW_MAX_LEVEL;
static const char *HNSW_ENTRYPOINT;
static const char *NUM_MARKED_DELETED;
static const char *NUM_SEARCHES;
static const char *NUM_VISITED_NODES;
static const char *NUM_VISITED_NODES_HIGHER_LEVELS;
// static const char *HNSW_VISITED_NODES_POOL_SIZE_STRING;

static const char *SVS_SEARCH_WS_STRING;
Expand Down
7 changes: 5 additions & 2 deletions src/VecSim/vec_sim_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,11 @@ typedef struct {
double epsilon; // Epsilon parameter for HNSW graph accuracy/latency for range search.
size_t max_level; // Number of graph levels.
size_t entrypoint; // Entrypoint vector label.
size_t visitedNodesPoolSize; // The max number of parallel graph scans so far.
size_t numberOfMarkedDeletedNodes; // The number of nodes that are marked as deleted.
size_t visitedNodesPoolSize; // The max number of parallel graph scans so far.
size_t numberOfMarkedDeletedNodes; // The number of nodes that are marked as deleted.
size_t num_searches; // Total number of searches performed.
size_t num_visited_nodes; // Total number of nodes visited during searches.
size_t num_visited_nodes_higher_levels; // Total number of nodes visited in higher levels (> 0).
} hnswInfoStruct;

typedef struct {
Expand Down
45 changes: 45 additions & 0 deletions tests/benchmark/bm_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,39 @@ void BM_VecSimCommon<index_type_t>::TopK_HNSW(benchmark::State &st, unsigned sho
size_t k = st.range(1);
std::atomic_int correct = 0;
size_t iter = 0;

// Get initial metrics
auto index = GET_INDEX(INDEX_HNSW + index_offset);
VecSimIndexDebugInfo info_before = VecSimIndex_DebugInfo(index);
size_t num_searches_before = info_before.hnswInfo.num_searches;
size_t num_visited_nodes_before = info_before.hnswInfo.num_visited_nodes;
size_t num_visited_nodes_higher_levels_before =
info_before.hnswInfo.num_visited_nodes_higher_levels;

for (auto _ : st) {
RunTopK_HNSW(st, ef, iter, k, correct, index_offset);
iter++;
}

// Get final metrics
VecSimIndexDebugInfo info_after = VecSimIndex_DebugInfo(index);
size_t num_searches_after = info_after.hnswInfo.num_searches;
size_t num_visited_nodes_after = info_after.hnswInfo.num_visited_nodes;
size_t num_visited_nodes_higher_levels_after =
info_after.hnswInfo.num_visited_nodes_higher_levels;

// Calculate deltas
size_t total_searches = num_searches_after - num_searches_before;
size_t total_visited_nodes = num_visited_nodes_after - num_visited_nodes_before;
size_t total_visited_nodes_higher_levels =
num_visited_nodes_higher_levels_after - num_visited_nodes_higher_levels_before;

st.counters["Recall"] = (float)correct / (float)(k * iter);
st.counters["Avg_visited_nodes_level_0"] =
total_searches > 0 ? (double)total_visited_nodes / (double)total_searches : 0.0;
st.counters["Avg_visited_nodes_higher_levels"] =
total_searches > 0 ? (double)total_visited_nodes_higher_levels / (double)total_searches
: 0.0;
}

template <typename index_type_t>
Expand All @@ -112,6 +140,12 @@ void BM_VecSimCommon<index_type_t>::TopK_Tiered(benchmark::State &st, unsigned s
size_t total_iters = 50;
VecSimQueryReply *all_results[total_iters];

// Get initial metrics from the backend HNSW index
auto hnsw_index = GET_INDEX(INDEX_HNSW + index_offset);
VecSimIndexDebugInfo info_before = VecSimIndex_DebugInfo(hnsw_index);
size_t num_searches_before = info_before.hnswInfo.num_searches;
size_t num_visited_nodes_before = info_before.hnswInfo.num_visited_nodes;

auto parallel_knn_search = [](AsyncJob *job) {
auto *search_job = reinterpret_cast<tieredIndexMock::SearchJobMock *>(job);
HNSWRuntimeParams hnswRuntimeParams = {.efRuntime = search_job->ef};
Expand All @@ -134,6 +168,15 @@ void BM_VecSimCommon<index_type_t>::TopK_Tiered(benchmark::State &st, unsigned s
}
}

// Get final metrics
VecSimIndexDebugInfo info_after = VecSimIndex_DebugInfo(hnsw_index);
size_t num_searches_after = info_after.hnswInfo.num_searches;
size_t num_visited_nodes_after = info_after.hnswInfo.num_visited_nodes;

// Calculate deltas
size_t total_searches = num_searches_after - num_searches_before;
size_t total_visited_nodes = num_visited_nodes_after - num_visited_nodes_before;

// Measure recall
for (iter = 0; iter < total_iters; iter++) {
auto bf_results =
Expand All @@ -147,6 +190,8 @@ void BM_VecSimCommon<index_type_t>::TopK_Tiered(benchmark::State &st, unsigned s

st.counters["Recall"] = (float)correct / (float)(k * iter);
st.counters["num_threads"] = (double)BM_VecSimGeneral::mock_thread_pool->thread_pool_size;
st.counters["Avg_visited_nodes_level_0"] =
total_searches > 0 ? (double)total_visited_nodes / (double)total_searches : 0.0;
}

#define REGISTER_TopK_BF(BM_CLASS, BM_FUNC) \
Expand Down
36 changes: 29 additions & 7 deletions tests/benchmark/bm_vecsim_basics.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,14 @@ void BM_VecSimBasics<index_type_t>::Range_HNSW(benchmark::State &st) {
HNSWRuntimeParams hnswRuntimeParams = {.epsilon = epsilon};
auto query_params = BM_VecSimGeneral::CreateQueryParams(hnswRuntimeParams);

// Get initial metrics
auto index = GET_INDEX(INDEX_HNSW);
VecSimIndexDebugInfo info_before = VecSimIndex_DebugInfo(index);
size_t num_searches_before = info_before.hnswInfo.num_searches;
size_t num_visited_nodes_before = info_before.hnswInfo.num_visited_nodes;
size_t num_visited_nodes_higher_levels_before =
info_before.hnswInfo.num_visited_nodes_higher_levels;

for (auto _ : st) {
auto hnsw_results = VecSimIndex_RangeQuery(
GET_INDEX(INDEX_HNSW), QUERIES[iter % N_QUERIES].data(), radius, &query_params, BY_ID);
Expand All @@ -307,8 +315,27 @@ void BM_VecSimBasics<index_type_t>::Range_HNSW(benchmark::State &st) {
iter++;
st.ResumeTiming();
}

// Get final metrics
VecSimIndexDebugInfo info_after = VecSimIndex_DebugInfo(index);
size_t num_searches_after = info_after.hnswInfo.num_searches;
size_t num_visited_nodes_after = info_after.hnswInfo.num_visited_nodes;
size_t num_visited_nodes_higher_levels_after =
info_after.hnswInfo.num_visited_nodes_higher_levels;

// Calculate deltas
size_t total_searches = num_searches_after - num_searches_before;
size_t total_visited_nodes = num_visited_nodes_after - num_visited_nodes_before;
size_t total_visited_nodes_higher_levels =
num_visited_nodes_higher_levels_after - num_visited_nodes_higher_levels_before;

st.counters["Avg. results number"] = (double)total_res / iter;
st.counters["Recall"] = (float)total_res / total_res_bf;
st.counters["Avg_visited_nodes_level_0"] =
total_searches > 0 ? (double)total_visited_nodes / (double)total_searches : 0.0;
st.counters["Avg_visited_nodes_higher_levels"] =
total_searches > 0 ? (double)total_visited_nodes_higher_levels / (double)total_searches
: 0.0;
}

template <typename index_type_t>
Expand Down Expand Up @@ -338,15 +365,9 @@ void BM_VecSimBasics<index_type_t>::UpdateAtBlockSize(benchmark::State &st) {
assert(VecSimIndex_IndexSize(index) % BM_VecSimGeneral::block_size == overhead);
assert(VecSimIndex_IndexSize(index) == N_VECTORS + added_vec_count);

std::cout << "Added " << added_vec_count << " vectors to reach block size boundary."
<< std::endl;
std::cout << "Index size is now " << VecSimIndex_IndexSize(index) << std::endl;
std::cout << "Last label is " << curr_label - 1 << std::endl;

// Benchmark loop: repeatedly delete/add same vector to trigger grow-shrink cycles
labelType label_to_update = curr_label - 1;
size_t index_cap = index->indexMetaDataCapacity();
std::cout << "index_cap after adding vectors " << index_cap << std::endl;
assert(index_cap == initial_index_cap + BM_VecSimGeneral::block_size);

for (auto _ : st) {
Expand All @@ -370,9 +391,10 @@ void BM_VecSimBasics<index_type_t>::UpdateAtBlockSize(benchmark::State &st) {
assert(index->indexMetaDataCapacity() == index_cap);
}
assert(VecSimIndex_IndexSize(index) == N_VECTORS + added_vec_count);
st.counters["vectors_added"] = static_cast<float>(added_vec_count);
st.counters["index_cap_change"] = static_cast<float>(index_cap) - static_cast<float>(initial_index_cap);

// Clean-up all the new vectors to restore the index size to its original value.

size_t new_label_count = index->indexLabelCount();
for (size_t label = initial_label_count; label < new_label_count; label++) {
// If index is tiered HNSW, remove directly from the underline HNSW.
Expand Down
Loading
Loading