Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/san.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ if (USE_ASAN OR USE_MSAN)
set(LLVM_CXX_FLAGS "-stdlib=libc++ -I${MSAN_PREFIX}/include -I${MSAN_PREFIX}/include/c++/v1")
set(LLVM_LD_FLAGS "-stdlib=libc++ -Wl,-rpath=${MSAN_PREFIX}/lib -L${MSAN_PREFIX}/lib -lc++abi")
endif()
endif()
endif()
4 changes: 2 additions & 2 deletions src/VecSim/algorithms/brute_force/brute_force_multi.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ class BruteForceIndex_Multi : public BruteForceIndex<DataType, DistType> {
const char *data = reinterpret_cast<const char *>(this->getDataByInternalId(id));

// Create a vector with the full data (including any metadata like norms)
std::vector<char> vec(this->getDataSize());
memcpy(vec.data(), data, this->getDataSize());
std::vector<char> vec(this->getStoredDataSize());
memcpy(vec.data(), data, this->getStoredDataSize());
vectors_output.push_back(std::move(vec));
}

Expand Down
4 changes: 2 additions & 2 deletions src/VecSim/algorithms/brute_force/brute_force_single.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ class BruteForceIndex_Single : public BruteForceIndex<DataType, DistType> {
const char *data = reinterpret_cast<const char *>(this->getDataByInternalId(id));

// Create a vector with the full data (including any metadata like norms)
std::vector<char> vec(this->getDataSize());
memcpy(vec.data(), data, this->getDataSize());
std::vector<char> vec(this->getStoredDataSize());
memcpy(vec.data(), data, this->getStoredDataSize());
vectors_output.push_back(std::move(vec));

return vectors_output;
Expand Down
2 changes: 1 addition & 1 deletion src/VecSim/algorithms/hnsw/hnsw.h
Original file line number Diff line number Diff line change
Expand Up @@ -1182,7 +1182,7 @@ void HNSWIndex<DataType, DistType>::SwapLastIdWithDeletedId(idType element_inter
memcpy((void *)element, last_element, this->elementGraphDataSize);

auto data = getDataByInternalId(element_internal_id);
memcpy((void *)data, last_element_data, this->dataSize);
memcpy((void *)data, last_element_data, this->getStoredDataSize());

this->idToMetaData[element_internal_id] = this->idToMetaData[curElementCount];

Expand Down
4 changes: 2 additions & 2 deletions src/VecSim/algorithms/hnsw/hnsw_multi.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ class HNSWIndex_Multi : public HNSWIndex<DataType, DistType> {
const char *data = this->getDataByInternalId(id);

// Create a vector with the full data (including any metadata like norms)
std::vector<char> vec(this->dataSize);
memcpy(vec.data(), data, this->dataSize);
std::vector<char> vec(this->getStoredDataSize());
memcpy(vec.data(), data, this->getStoredDataSize());
vectors_output.push_back(std::move(vec));
}

Expand Down
4 changes: 2 additions & 2 deletions src/VecSim/algorithms/hnsw/hnsw_single.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ class HNSWIndex_Single : public HNSWIndex<DataType, DistType> {
const char *data = this->getDataByInternalId(id);

// Create a vector with the full data (including any metadata like norms)
std::vector<char> vec(this->dataSize);
memcpy(vec.data(), data, this->dataSize);
std::vector<char> vec(this->getStoredDataSize());
memcpy(vec.data(), data, this->getStoredDataSize());
vectors_output.push_back(std::move(vec));

return vectors_output;
Expand Down
8 changes: 7 additions & 1 deletion src/VecSim/algorithms/hnsw/hnsw_tiered.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,17 @@ class TieredHNSWIndex : public VecSimTieredIndex<DataType, DistType> {

~TieredHNSW_BatchIterator();

const void *getQueryBlob() const override { return flat_iterator->getQueryBlob(); }

VecSimQueryReply *getNextResults(size_t n_res, VecSimQueryReply_Order order) override;

bool isDepleted() override;

void reset() override;

#ifdef BUILD_TESTS
VecSimBatchIterator *getHNSWIterator() { return hnsw_iterator; }
#endif
};

public:
Expand Down Expand Up @@ -542,7 +548,7 @@ void TieredHNSWIndex<DataType, DistType>::executeInsertJob(HNSWInsertJob *job) {
HNSWIndex<DataType, DistType> *hnsw_index = this->getHNSWIndex();
// Copy the vector blob from the flat buffer, so we can release the flat lock while we are
// indexing the vector into HNSW index.
size_t data_size = this->frontendIndex->getDataSize();
size_t data_size = this->frontendIndex->getStoredDataSize();
auto blob_copy = this->getAllocator()->allocate_unique(data_size);
// Assuming the size of the blob stored in the frontend index matches the size of the blob
// stored in the HNSW index.
Expand Down
3 changes: 2 additions & 1 deletion src/VecSim/algorithms/svs/svs.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,12 +182,13 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
return MemoryUtils::unique_blob{const_cast<void *>(original_data), [](void *) {}};
}

const auto data_size = this->getDataSize() * n;
const auto data_size = this->getStoredDataSize() * n;

auto processed_blob =
MemoryUtils::unique_blob{this->allocator->allocate(data_size),
[this](void *ptr) { this->allocator->free_allocation(ptr); }};
// Assuming original data size equals to processed data size
assert(this->getInputBlobSize() == this->getStoredDataSize());
memcpy(processed_blob.get(), original_data, data_size);
// Preprocess each vector in place
for (size_t i = 0; i < n; i++) {
Expand Down
4 changes: 2 additions & 2 deletions src/VecSim/algorithms/svs/svs_serializer_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ void SVSIndex<MetricType, DataType, isMulti, QuantBits, ResidualBits, IsLeanVec>
// Note: this->metric corresponds to MetricType template parameter
writeBinaryPOD(output, this->dim);
writeBinaryPOD(output, this->vecType); // DataType template parameter (as VecSimType enum)
writeBinaryPOD(output, this->dataSize);
writeBinaryPOD(output, this->getStoredDataSize());
writeBinaryPOD(output, this->metric); // MetricType template parameter (as VecSimMetric enum)
writeBinaryPOD(output, this->blockSize);
writeBinaryPOD(output, this->isMulti);
Expand Down Expand Up @@ -128,7 +128,7 @@ bool SVSIndex<MetricType, DataType, isMulti, QuantBits, ResidualBits,

compareField(input, this->dim, "dim");
compareField(input, this->vecType, "vecType");
compareField(input, this->dataSize, "dataSize");
compareField(input, this->getStoredDataSize(), "dataSize");
compareField(input, this->metric, "metric");
compareField(input, this->blockSize, "blockSize");
compareField(input, this->isMulti, "isMulti");
Expand Down
2 changes: 1 addition & 1 deletion src/VecSim/batch_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ struct VecSimBatchIterator : public VecsimBaseObject {
: VecsimBaseObject(allocator), query_vector(query_vector), returned_results_count(0),
timeoutCtx(tctx) {};

inline const void *getQueryBlob() const { return query_vector; }
virtual inline const void *getQueryBlob() const { return query_vector; }

inline void *getTimeoutCtx() const { return timeoutCtx; }

Expand Down
28 changes: 9 additions & 19 deletions src/VecSim/index_factories/brute_force_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "VecSim/algorithms/brute_force/brute_force_single.h"
#include "VecSim/algorithms/brute_force/brute_force_multi.h"
#include "VecSim/index_factories/components/components_factory.h"
#include "VecSim/index_factories/factory_utils.h"
#include "VecSim/types/bfloat16.h"
#include "VecSim/types/float16.h"

Expand All @@ -32,31 +33,19 @@ inline VecSimIndex *NewIndex_ChooseMultiOrSingle(const BFParams *params,
BruteForceIndex_Single<DataType, DistType>(params, abstractInitParams, components);
}

static AbstractIndexInitParams NewAbstractInitParams(const VecSimParams *params) {

const BFParams *bfParams = &params->algoParams.bfParams;
size_t dataSize = VecSimParams_GetDataSize(bfParams->type, bfParams->dim, bfParams->metric);
AbstractIndexInitParams abstractInitParams = {.allocator =
VecSimAllocator::newVecsimAllocator(),
.dim = bfParams->dim,
.vecType = bfParams->type,
.dataSize = dataSize,
.metric = bfParams->metric,
.blockSize = bfParams->blockSize,
.multi = bfParams->multi,
.logCtx = params->logCtx};
return abstractInitParams;
}

VecSimIndex *NewIndex(const VecSimParams *params, bool is_normalized) {
const BFParams *bfParams = &params->algoParams.bfParams;
AbstractIndexInitParams abstractInitParams = NewAbstractInitParams(params);
AbstractIndexInitParams abstractInitParams =
VecSimFactory::NewAbstractInitParams(bfParams, params->logCtx, is_normalized);
return NewIndex(bfParams, abstractInitParams, is_normalized);
}

VecSimIndex *NewIndex(const BFParams *bfparams, const AbstractIndexInitParams &abstractInitParams,
bool is_normalized) {

assert(is_normalized ||
abstractInitParams.inputBlobSize == bfparams->dim * VecSimType_sizeof(bfparams->type));
assert(!is_normalized ||
abstractInitParams.inputBlobSize != bfparams->dim * VecSimType_sizeof(bfparams->type));
if (bfparams->type == VecSimType_FLOAT32) {
IndexComponents<float, float> indexComponents = CreateIndexComponents<float, float>(
abstractInitParams.allocator, bfparams->metric, bfparams->dim, is_normalized);
Expand Down Expand Up @@ -140,6 +129,7 @@ size_t EstimateInitialSize(const BFParams *params, bool is_normalized) {

size_t EstimateElementSize(const BFParams *params) {
// counting the vector size + idToLabel entry + LabelToIds entry (map reservation)
return params->dim * VecSimType_sizeof(params->type) + sizeof(labelType) + sizeof(void *);
return VecSimParams_GetStoredDataSize(params->type, params->dim, params->metric) +
sizeof(labelType) + sizeof(void *);
}
}; // namespace BruteForceFactory
40 changes: 40 additions & 0 deletions src/VecSim/index_factories/factory_utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright (c) 2006-Present, Redis Ltd.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
* GNU Affero General Public License v3 (AGPLv3).
*/
#pragma once

#include "VecSim/vec_sim_index.h"

namespace VecSimFactory {
template <typename IndexParams>
static AbstractIndexInitParams NewAbstractInitParams(const IndexParams *algo_params, void *logCtx,
bool is_input_preprocessed) {

size_t storedDataSize =
VecSimParams_GetStoredDataSize(algo_params->type, algo_params->dim, algo_params->metric);

// If the input vectors are already processed (for example, normalized), the input blob size is
// the same as the stored data size. inputBlobSize = storedDataSize Otherwise, the input blob
// size is the original size of the vector. inputBlobSize = algo_params->dim *
// VecSimType_sizeof(algo_params->type)
size_t inputBlobSize = is_input_preprocessed
? storedDataSize
: algo_params->dim * VecSimType_sizeof(algo_params->type);
AbstractIndexInitParams abstractInitParams = {.allocator =
VecSimAllocator::newVecsimAllocator(),
.dim = algo_params->dim,
.vecType = algo_params->type,
.storedDataSize = storedDataSize,
.metric = algo_params->metric,
.blockSize = algo_params->blockSize,
.multi = algo_params->multi,
.logCtx = logCtx,
.inputBlobSize = inputBlobSize};
return abstractInitParams;
}
} // namespace VecSimFactory
27 changes: 7 additions & 20 deletions src/VecSim/index_factories/hnsw_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "VecSim/algorithms/hnsw/hnsw_multi.h"
#include "VecSim/index_factories/hnsw_factory.h"
#include "VecSim/index_factories/components/components_factory.h"
#include "VecSim/index_factories/factory_utils.h"
#include "VecSim/algorithms/hnsw/hnsw.h"
#include "VecSim/types/bfloat16.h"
#include "VecSim/types/float16.h"
Expand All @@ -33,26 +34,10 @@ NewIndex_ChooseMultiOrSingle(const HNSWParams *params,
HNSWIndex_Single<DataType, DistType>(params, abstractInitParams, components);
}

static AbstractIndexInitParams NewAbstractInitParams(const VecSimParams *params) {
const HNSWParams *hnswParams = &params->algoParams.hnswParams;

size_t dataSize =
VecSimParams_GetDataSize(hnswParams->type, hnswParams->dim, hnswParams->metric);
AbstractIndexInitParams abstractInitParams = {.allocator =
VecSimAllocator::newVecsimAllocator(),
.dim = hnswParams->dim,
.vecType = hnswParams->type,
.dataSize = dataSize,
.metric = hnswParams->metric,
.blockSize = hnswParams->blockSize,
.multi = hnswParams->multi,
.logCtx = params->logCtx};
return abstractInitParams;
}

VecSimIndex *NewIndex(const VecSimParams *params, bool is_normalized) {
const HNSWParams *hnswParams = &params->algoParams.hnswParams;
AbstractIndexInitParams abstractInitParams = NewAbstractInitParams(params);
AbstractIndexInitParams abstractInitParams =
VecSimFactory::NewAbstractInitParams(hnswParams, params->logCtx, is_normalized);

if (hnswParams->type == VecSimType_FLOAT32) {
IndexComponents<float, float> indexComponents = CreateIndexComponents<float, float>(
Expand Down Expand Up @@ -141,7 +126,8 @@ size_t EstimateElementSize(const HNSWParams *params) {
size_t elementGraphDataSize = sizeof(ElementGraphData) + sizeof(idType) * M * 2;

size_t size_total_data_per_element =
elementGraphDataSize + params->dim * VecSimType_sizeof(params->type);
elementGraphDataSize +
VecSimParams_GetStoredDataSize(params->type, params->dim, params->metric);

// when reserving space for new labels in the lookup hash table, each entry is a pointer to a
// label node (bucket).
Expand Down Expand Up @@ -220,7 +206,8 @@ VecSimIndex *NewIndex(const std::string &location, bool is_normalized) {
VecSimParams vecsimParams = {.algo = VecSimAlgo_HNSWLIB,
.algoParams = {.hnswParams = HNSWParams{params}}};

AbstractIndexInitParams abstractInitParams = NewAbstractInitParams(&vecsimParams);
AbstractIndexInitParams abstractInitParams =
VecSimFactory::NewAbstractInitParams(&params, vecsimParams.logCtx, is_normalized);
if (params.type == VecSimType_FLOAT32) {
IndexComponents<float, float> indexComponents = CreateIndexComponents<float, float>(
abstractInitParams.allocator, params.metric, abstractInitParams.dim, is_normalized);
Expand Down
16 changes: 3 additions & 13 deletions src/VecSim/index_factories/svs_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,19 @@
#include "VecSim/vec_sim_index.h"
#include "VecSim/algorithms/svs/svs.h"
#include "VecSim/index_factories/components/components_factory.h"
#include "VecSim/index_factories/factory_utils.h"

namespace SVSFactory {

namespace {
AbstractIndexInitParams NewAbstractInitParams(const VecSimParams *params) {
auto &svsParams = params->algoParams.svsParams;
size_t dataSize = VecSimParams_GetDataSize(svsParams.type, svsParams.dim, svsParams.metric);
return {.allocator = VecSimAllocator::newVecsimAllocator(),
.dim = svsParams.dim,
.vecType = svsParams.type,
.dataSize = dataSize,
.metric = svsParams.metric,
.blockSize = svsParams.blockSize,
.multi = svsParams.multi,
.logCtx = params->logCtx};
}

// NewVectorsImpl() is the chain of a template helper functions to create a new SVS index.
template <typename MetricType, typename DataType, size_t QuantBits, size_t ResidualBits,
bool IsLeanVec>
VecSimIndex *NewIndexImpl(const VecSimParams *params, bool is_normalized) {
auto abstractInitParams = NewAbstractInitParams(params);
auto &svsParams = params->algoParams.svsParams;
auto abstractInitParams =
VecSimFactory::NewAbstractInitParams(&svsParams, params->logCtx, is_normalized);
auto preprocessors = CreatePreprocessorsContainer<svs_details::vecsim_dt<DataType>>(
abstractInitParams.allocator, svsParams.metric, svsParams.dim, is_normalized, 0);
IndexComponents<svs_details::vecsim_dt<DataType>, float> components = {
Expand Down
30 changes: 8 additions & 22 deletions src/VecSim/index_factories/tiered_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,10 @@ inline VecSimIndex *NewIndex(const TieredIndexParams *params) {

BFParams bf_params = NewBFParams(params);

std::shared_ptr<VecSimAllocator> flat_allocator = VecSimAllocator::newVecsimAllocator();
size_t dataSize = VecSimParams_GetDataSize(bf_params.type, bf_params.dim, bf_params.metric);

AbstractIndexInitParams abstractInitParams = {.allocator = flat_allocator,
.dim = bf_params.dim,
.vecType = bf_params.type,
.dataSize = dataSize,
.metric = bf_params.metric,
.blockSize = bf_params.blockSize,
.multi = bf_params.multi,
.logCtx = params->primaryIndexParams->logCtx};
AbstractIndexInitParams abstractInitParams =
VecSimFactory::NewAbstractInitParams(&bf_params, params->primaryIndexParams->logCtx, false);
assert(hnsw_index->getInputBlobSize() == abstractInitParams.storedDataSize);
assert(hnsw_index->getStoredDataSize() == abstractInitParams.storedDataSize);
auto frontendIndex = static_cast<BruteForceIndex<DataType, DistType> *>(
BruteForceFactory::NewIndex(&bf_params, abstractInitParams, false));

Expand Down Expand Up @@ -144,17 +137,10 @@ inline VecSimIndex *NewIndex(const TieredIndexParams *params) {

auto bf_params = NewBFParams(params);

std::shared_ptr<VecSimAllocator> flat_allocator = VecSimAllocator::newVecsimAllocator();
size_t dataSize = VecSimParams_GetDataSize(bf_params.type, bf_params.dim, bf_params.metric);

AbstractIndexInitParams abstractInitParams = {.allocator = flat_allocator,
.dim = bf_params.dim,
.vecType = bf_params.type,
.dataSize = dataSize,
.metric = bf_params.metric,
.blockSize = bf_params.blockSize,
.multi = bf_params.multi,
.logCtx = params->primaryIndexParams->logCtx};
AbstractIndexInitParams abstractInitParams =
VecSimFactory::NewAbstractInitParams(&bf_params, params->primaryIndexParams->logCtx, false);
assert(svs_index->getInputBlobSize() == abstractInitParams.storedDataSize);
assert(svs_index->getStoredDataSize() == abstractInitParams.storedDataSize);
auto frontendIndex = static_cast<BruteForceIndex<DataType, float> *>(
BruteForceFactory::NewIndex(&bf_params, abstractInitParams, false));

Expand Down
Loading
Loading