@@ -405,23 +405,30 @@ void do_update_impl(
405
405
do_write_impl (std::move (kvs), root_folder, bucket_name, s3_client, std::forward<KeyBucketizer>(bucketizer));
406
406
}
407
407
408
- inline auto default_prefix_handler () {
408
+ inline PrefixHandler default_prefix_handler () {
409
409
return [](const std::string& prefix, const std::string& key_type_dir, const KeyDescriptor& key_descriptor, KeyType) {
410
410
return !prefix.empty () ? fmt::format (" {}/{}*{}" , key_type_dir, key_descriptor, prefix) : key_type_dir;
411
411
};
412
412
}
413
413
414
- template <class KeyBucketizer , class PrefixHandler >
415
- bool do_iterate_type_impl (
416
- KeyType key_type,
417
- const IterateTypePredicate& visitor,
414
+ struct PathInfo {
415
+ PathInfo (std::string prefix, std::string key_type_dir, size_t path_to_key_size) :
416
+ key_prefix_ (std::move(prefix)), key_type_dir_(std::move(key_type_dir)), path_to_key_size_(path_to_key_size) {
417
+
418
+ }
419
+
420
+ std::string key_prefix_;
421
+ std::string key_type_dir_;
422
+ size_t path_to_key_size_;
423
+ };
424
+
425
+ template <class KeyBucketizer >
426
+ PathInfo calculate_path_info (
418
427
const std::string& root_folder,
419
- const std::string& bucket_name,
420
- const S3ClientInterface& s3_client,
421
- KeyBucketizer&& bucketizer,
422
- PrefixHandler&& prefix_handler = default_prefix_handler(),
423
- const std::string& prefix = std::string{}) {
424
- ARCTICDB_SAMPLE (S3StorageIterateType, 0 )
428
+ KeyType key_type,
429
+ const PrefixHandler& prefix_handler,
430
+ const std::string& prefix,
431
+ KeyBucketizer&& bucketizer) {
425
432
auto key_type_dir = key_type_folder (root_folder, key_type);
426
433
const auto path_to_key_size = key_type_dir.size () + 1 + bucketizer.bucketize_length (key_type);
427
434
// if prefix is empty, add / to avoid matching both 'log' and 'logc' when key_type_dir is {root_folder}/log
@@ -438,19 +445,36 @@ bool do_iterate_type_impl(
438
445
: IndexDescriptorImpl::Type::TIMESTAMP,
439
446
FormatType::TOKENIZED);
440
447
auto key_prefix = prefix_handler (prefix, key_type_dir, key_descriptor, key_type);
441
- ARCTICDB_RUNTIME_DEBUG (log ::storage (), " Searching for objects in bucket {} with prefix {}" , bucket_name,
442
- key_prefix);
448
+
449
+ return {key_prefix, key_type_dir, path_to_key_size};
450
+ }
451
+
452
+ template <class KeyBucketizer >
453
+ bool do_iterate_type_impl (
454
+ KeyType key_type,
455
+ const IterateTypePredicate& visitor,
456
+ const std::string& root_folder,
457
+ const std::string& bucket_name,
458
+ const S3ClientInterface& s3_client,
459
+ KeyBucketizer&& bucketizer,
460
+ const PrefixHandler& prefix_handler = default_prefix_handler(),
461
+ const std::string& prefix = std::string{}) {
462
+ ARCTICDB_SAMPLE (S3StorageIterateType, 0 )
463
+
464
+ auto path_info = calculate_path_info (root_folder, key_type, prefix_handler, prefix, std::move (bucketizer));
465
+ ARCTICDB_RUNTIME_DEBUG (log ::storage (), " Iterating over objects in bucket {} with prefix {}" , bucket_name,
466
+ path_info.key_prefix_ );
443
467
444
468
auto continuation_token = std::optional<std::string>();
445
469
do {
446
- auto list_objects_result = s3_client.list_objects (key_prefix , bucket_name, continuation_token);
470
+ auto list_objects_result = s3_client.list_objects (path_info. key_prefix_ , bucket_name, continuation_token);
447
471
if (list_objects_result.is_success ()) {
448
472
auto & output = list_objects_result.get_output ();
449
473
450
474
ARCTICDB_RUNTIME_DEBUG (log ::storage (), " Received object list" );
451
475
452
476
for (auto & s3_object_name : output.s3_object_names ) {
453
- auto key = s3_object_name.substr (path_to_key_size );
477
+ auto key = s3_object_name.substr (path_info. path_to_key_size_ );
454
478
ARCTICDB_TRACE (log ::version (), " Got object_list: {}, key: {}" , s3_object_name, key);
455
479
auto k = variant_key_from_bytes (
456
480
reinterpret_cast <uint8_t *>(key.data ()),
@@ -474,13 +498,55 @@ bool do_iterate_type_impl(
474
498
error.GetMessage ().c_str ());
475
499
// We don't raise on expected errors like NoSuchKey because we want to return an empty list
476
500
// instead of raising.
477
- raise_if_unexpected_error (error, key_prefix );
501
+ raise_if_unexpected_error (error, path_info. key_prefix_ );
478
502
return false ;
479
503
}
480
504
} while (continuation_token.has_value ());
481
505
return false ;
482
506
}
483
507
508
+ template <class KeyBucketizer >
509
+ ObjectSizes do_calculate_sizes_for_type_impl (
510
+ KeyType key_type,
511
+ const std::string& root_folder,
512
+ const std::string& bucket_name,
513
+ const S3ClientInterface& s3_client,
514
+ KeyBucketizer&& bucketizer,
515
+ const PrefixHandler& prefix_handler = default_prefix_handler(),
516
+ const std::string& prefix = std::string{}) {
517
+ ARCTICDB_SAMPLE (S3StorageCalculateSizesForType, 0 )
518
+
519
+ auto path_info = calculate_path_info (root_folder, key_type, prefix_handler, prefix, std::move (bucketizer));
520
+ ARCTICDB_RUNTIME_DEBUG (log ::storage (), " Calculating sizes for objects in bucket {} with prefix {}" , bucket_name,
521
+ path_info.key_prefix_ );
522
+
523
+ auto continuation_token = std::optional<std::string>();
524
+ ObjectSizes res{key_type};
525
+ do {
526
+ auto list_objects_result = s3_client.list_objects (path_info.key_prefix_ , bucket_name, continuation_token);
527
+ if (list_objects_result.is_success ()) {
528
+ auto & output = list_objects_result.get_output ();
529
+
530
+ ARCTICDB_RUNTIME_DEBUG (log ::storage (), " Received object list" );
531
+
532
+ for (auto & s3_object_size : output.s3_object_sizes ) {
533
+ res.count_ += 1 ;
534
+ res.compressed_size_bytes_ += s3_object_size;
535
+ }
536
+ continuation_token = output.next_continuation_token ;
537
+ } else {
538
+ const auto & error = list_objects_result.get_error ();
539
+ log ::storage ().warn (" Failed to iterate key type with key '{}' {}: {}" ,
540
+ key_type,
541
+ error.GetExceptionName ().c_str (),
542
+ error.GetMessage ().c_str ());
543
+ raise_if_unexpected_error (error, path_info.key_prefix_ );
544
+ }
545
+ } while (continuation_token.has_value ());
546
+
547
+ return res;
548
+ }
549
+
484
550
template <class KeyBucketizer >
485
551
bool do_key_exists_impl (
486
552
const VariantKey& key,
0 commit comments