@@ -112,7 +112,7 @@ class TieredHNSWIndex : public VecSimTieredIndex<DataType, DistType> {
112
112
// ownership (we do it right after we update the HNSW global data and receive the new state).
113
113
template <bool releaseFlatGuard>
114
114
void insertVectorToHNSW (HNSWIndex<DataType, DistType> *hnsw_index, labelType label,
115
- const void *blob);
115
+ DataType *blob);
116
116
117
117
#ifdef BUILD_TESTS
118
118
#include " VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h"
@@ -366,7 +366,7 @@ void TieredHNSWIndex<DataType, DistType>::updateInsertJobInternalId(idType prev_
366
366
template <typename DataType, typename DistType>
367
367
template <bool releaseFlatGuard>
368
368
void TieredHNSWIndex<DataType, DistType>::insertVectorToHNSW(
369
- HNSWIndex<DataType, DistType> *hnsw_index, labelType label, const void *blob) {
369
+ HNSWIndex<DataType, DistType> *hnsw_index, labelType label, DataType *blob) {
370
370
// Acquire the index data lock, so we know what is the exact index size at this time. Acquire
371
371
// the main r/w lock before to avoid deadlocks.
372
372
AddVectorCtx state = {0 };
@@ -430,6 +430,9 @@ void TieredHNSWIndex<DataType, DistType>::insertVectorToHNSW(
430
430
/* ******************* Job's callbacks **********************************/
431
431
template <typename DataType, typename DistType>
432
432
void TieredHNSWIndex<DataType, DistType>::executeInsertJob(HNSWInsertJob *job) {
433
+ // Note: this method had not been tested with yet overwriting scenarios, where job may
434
+ // have been invalidate before it is executed (TODO in the future).
435
+ HNSWIndex<DataType, DistType> *hnsw_index = this ->getHNSWIndex ();
433
436
// Note that accessing the job fields should occur with flat index guard held (here and later).
434
437
this ->flatIndexGuard .lock_shared ();
435
438
if (job->id == INVALID_JOB_ID) {
@@ -438,7 +441,6 @@ void TieredHNSWIndex<DataType, DistType>::executeInsertJob(HNSWInsertJob *job) {
438
441
return ;
439
442
}
440
443
441
- HNSWIndex<DataType, DistType> *hnsw_index = this ->getHNSWIndex ();
442
444
// Copy the vector blob from the flat buffer, so we can release the flat lock while we are
443
445
// indexing the vector into HNSW index.
444
446
DataType blob_copy[this ->frontendIndex ->getDim ()];
@@ -593,35 +595,18 @@ size_t TieredHNSWIndex<DataType, DistType>::indexLabelCount() const {
593
595
template <typename DataType, typename DistType>
594
596
int TieredHNSWIndex<DataType, DistType>::addVector(const void *blob, labelType label,
595
597
void *auxiliaryCtx) {
596
- int ret = 1 ;
597
- auto hnsw_index = this ->getHNSWIndex ();
598
- if (this ->getWriteMode () == VecSim_WriteInPlace) {
599
- this ->mainIndexGuard .lock ();
600
- // Internally, we may overwrite (delete the previous vector stored under this label), and
601
- // may need to increase the capacity when we append the new vector afterwards.
602
- ret = hnsw_index->addVector (blob, label);
603
- this ->mainIndexGuard .unlock ();
598
+
599
+ if (this ->getWriteMode () == VecSim_WriteInPlace ||
600
+ this ->frontendIndex ->indexSize () >= this ->flatBufferLimit ) {
601
+ auto hnsw_index = this ->getHNSWIndex ();
602
+ // Insert vector directly to HNSW (since flat buffer guard was not held, no need to release
603
+ // it internally).
604
+ this ->insertVectorToHNSW <false >(hnsw_index, label, (DataType *)blob);
604
605
this ->UpdateIndexMemory (this ->memoryCtx , this ->getAllocationSize ());
605
- return ret;
606
- }
607
- if (this ->frontendIndex ->indexSize () >= this ->flatBufferLimit ) {
608
- // Handle overwrite situation.
609
- if (!this ->backendIndex ->isMultiValue ()) {
610
- // This will do nothing (and return 0) if this label doesn't exist. Otherwise, it may
611
- // remove vector from the flat buffer and/or the HNSW index.
612
- ret -= this ->deleteVector (label);
613
- }
614
- if (this ->frontendIndex ->indexSize () >= this ->flatBufferLimit ) {
615
- // We didn't remove a vector from flat buffer due to overwrite, insert the new vector
616
- // directly to HNSW. Since flat buffer guard was not held, no need to release it
617
- // internally.
618
- this ->insertVectorToHNSW <false >(hnsw_index, label, blob);
619
- this ->UpdateIndexMemory (this ->memoryCtx , this ->getAllocationSize ());
620
- return ret;
621
- }
622
- // Otherwise, we fall back to the "regular" insertion into the flat buffer
623
- // (since it is not full anymore after removing the previous vector stored under the label).
606
+ return 1 ;
624
607
}
608
+
609
+ /* Note: this currently doesn't support overriding (assuming that the label doesn't exist)! */
625
610
this ->flatIndexGuard .lock ();
626
611
idType new_flat_id = this ->frontendIndex ->indexSize ();
627
612
if (this ->frontendIndex ->isLabelExists (label) && !this ->frontendIndex ->isMultiValue ()) {
@@ -713,7 +698,9 @@ int TieredHNSWIndex<DataType, DistType>::deleteVector(labelType label) {
713
698
// Note that we may remove the same vector that has been removed from the flat index, if it was
714
699
// being ingested at that time.
715
700
if (this ->getWriteMode () == VecSim_WriteAsync) {
701
+ this ->mainIndexGuard .lock_shared ();
716
702
num_deleted_vectors += this ->deleteLabelFromHNSW (label);
703
+ this ->mainIndexGuard .unlock_shared ();
717
704
// Apply ready swap jobs if number of deleted vectors reached the threshold
718
705
// (under exclusive lock of the main index guard).
719
706
this ->executeReadySwapJobs ();
0 commit comments