Skip to content

Commit af3a911

Browse files
committed
Add shard scaling factor
1 parent e3f88a5 commit af3a911

File tree

8 files changed

+184
-48
lines changed

8 files changed

+184
-48
lines changed

quickwit/quickwit-common/src/shared_consts.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,14 @@ pub const SPLIT_FIELDS_FILE_NAME: &str = "split_fields";
6969
/// More or less the indexing throughput of a core
7070
/// i.e PIPELINE_THROUGHPUT / PIPELINE_FULL_CAPACITY
7171
pub const DEFAULT_SHARD_THROUGHPUT_LIMIT: ByteSize = ByteSize::mib(5);
72-
///
72+
/// Large enough to absorb small bursts but should remain defensive against unbalanced shards
7373
pub const DEFAULT_SHARD_BURST_LIMIT: ByteSize = ByteSize::mib(50);
7474

75+
/// Maximum factor that avoids oscillating between scale up and scale down
76+
pub const MAX_SHARD_SCALE_UP_FACTOR: f32 = 2.0;
77+
/// A high value to allow quick scale up by default
78+
pub const DEFAULT_SHARD_SCALE_UP_FACTOR: f32 = 1.5;
79+
const _: () = assert!(DEFAULT_SHARD_SCALE_UP_FACTOR < MAX_SHARD_SCALE_UP_FACTOR);
80+
7581
// (Just a reexport).
7682
pub use bytesize::MIB;

quickwit/quickwit-config/src/cluster_config/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ pub struct ClusterConfig {
2424
pub default_index_root_uri: Uri,
2525
pub replication_factor: usize,
2626
pub shard_throughput_limit: ByteSize,
27+
pub shard_scaling_factor: f32,
2728
}
2829

2930
impl ClusterConfig {
@@ -35,6 +36,7 @@ impl ClusterConfig {
3536
default_index_root_uri: Uri::for_test("ram:///indexes"),
3637
replication_factor: 1,
3738
shard_throughput_limit: quickwit_common::shared_consts::DEFAULT_SHARD_THROUGHPUT_LIMIT,
39+
shard_scaling_factor: 1.01,
3840
}
3941
}
4042
}

quickwit/quickwit-config/src/node_config/mod.rs

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@ use anyhow::{bail, ensure};
2525
use bytesize::ByteSize;
2626
use http::HeaderMap;
2727
use quickwit_common::net::HostAddr;
28-
use quickwit_common::shared_consts::{DEFAULT_SHARD_BURST_LIMIT, DEFAULT_SHARD_THROUGHPUT_LIMIT};
28+
use quickwit_common::shared_consts::{
29+
DEFAULT_SHARD_BURST_LIMIT, DEFAULT_SHARD_SCALE_UP_FACTOR, DEFAULT_SHARD_THROUGHPUT_LIMIT,
30+
MAX_SHARD_SCALE_UP_FACTOR,
31+
};
2932
use quickwit_common::uri::Uri;
3033
use quickwit_proto::indexing::CpuCapacity;
3134
use quickwit_proto::types::NodeId;
@@ -39,7 +42,7 @@ use crate::{ConfigFormat, MetastoreConfigs};
3942

4043
pub const DEFAULT_QW_CONFIG_PATH: &str = "config/quickwit.yaml";
4144

42-
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
45+
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
4346
#[serde(deny_unknown_fields)]
4447
pub struct RestConfig {
4548
pub listen_addr: SocketAddr,
@@ -48,7 +51,7 @@ pub struct RestConfig {
4851
pub extra_headers: HeaderMap,
4952
}
5053

51-
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
54+
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
5255
#[serde(deny_unknown_fields)]
5356
pub struct GrpcConfig {
5457
#[serde(default = "GrpcConfig::default_max_message_size")]
@@ -78,7 +81,7 @@ impl Default for GrpcConfig {
7881
}
7982
}
8083

81-
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
84+
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
8285
#[serde(deny_unknown_fields)]
8386
pub struct IndexerConfig {
8487
#[serde(default = "IndexerConfig::default_split_store_max_num_bytes")]
@@ -175,7 +178,7 @@ impl Default for IndexerConfig {
175178
}
176179
}
177180

178-
#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
181+
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
179182
#[serde(deny_unknown_fields)]
180183
pub struct SplitCacheLimits {
181184
pub max_num_bytes: ByteSize,
@@ -201,7 +204,7 @@ impl SplitCacheLimits {
201204
}
202205
}
203206

204-
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
207+
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
205208
#[serde(deny_unknown_fields, default)]
206209
pub struct SearcherConfig {
207210
pub aggregation_memory_limit: ByteSize,
@@ -236,7 +239,7 @@ pub struct SearcherConfig {
236239
/// This policy is inspired by this guidance. It does not track instanteneous throughput, but
237240
/// computes an overall timeout using the following formula:
238241
/// `timeout_offset + num_bytes_get_request / min_throughtput`
239-
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
242+
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
240243
pub struct StorageTimeoutPolicy {
241244
pub min_throughtput_bytes_per_secs: u64,
242245
pub timeout_millis: u64,
@@ -320,7 +323,7 @@ impl SearcherConfig {
320323
}
321324
}
322325

323-
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
326+
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
324327
#[serde(deny_unknown_fields, default)]
325328
pub struct IngestApiConfig {
326329
/// Maximum memory space taken by the ingest WAL
@@ -333,6 +336,8 @@ pub struct IngestApiConfig {
333336
pub shard_throughput_limit: ByteSize,
334337
/// [hidden] Targeted throughput for each shard
335338
pub shard_burst_limit: ByteSize,
339+
/// [hidden] new_shard_count = ceil(old_shard_count * shard_scaling_factor)
340+
pub shard_scale_up_factor: f32,
336341
}
337342

338343
impl Default for IngestApiConfig {
@@ -344,6 +349,7 @@ impl Default for IngestApiConfig {
344349
content_length_limit: ByteSize::mib(10),
345350
shard_throughput_limit: DEFAULT_SHARD_THROUGHPUT_LIMIT,
346351
shard_burst_limit: DEFAULT_SHARD_BURST_LIMIT,
352+
shard_scale_up_factor: DEFAULT_SHARD_SCALE_UP_FACTOR,
347353
}
348354
}
349355
}
@@ -404,11 +410,18 @@ impl IngestApiConfig {
404410
self.shard_burst_limit,
405411
estimated_persist_size,
406412
);
413+
ensure!(
414+
self.shard_scale_up_factor > 1.0
415+
&& self.shard_scale_up_factor <= MAX_SHARD_SCALE_UP_FACTOR,
416+
"shard_scale_up_factor ({:?}) must be in the (1,{}) interval",
417+
self.shard_scale_up_factor,
418+
MAX_SHARD_SCALE_UP_FACTOR,
419+
);
407420
Ok(())
408421
}
409422
}
410423

411-
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
424+
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
412425
#[serde(deny_unknown_fields)]
413426
pub struct JaegerConfig {
414427
/// Enables the gRPC endpoint that allows the Jaeger Query Service to connect and retrieve

quickwit/quickwit-control-plane/src/control_plane.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ impl ControlPlane {
167167
ingester_pool.clone(),
168168
replication_factor,
169169
shard_throughput_limit_mib,
170+
cluster_config.shard_scaling_factor,
170171
);
171172

172173
let readiness_tx = readiness_tx.clone();

quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ impl IngestController {
285285
ingester_pool: IngesterPool,
286286
replication_factor: usize,
287287
max_shard_ingestion_throughput_mib_per_sec: f32,
288+
shard_scaling_factor: f32,
288289
) -> Self {
289290
IngestController {
290291
metastore,
@@ -294,6 +295,7 @@ impl IngestController {
294295
stats: IngestControllerStats::default(),
295296
scaling_arbiter: ScalingArbiter::with_max_shard_ingestion_throughput_mib_per_sec(
296297
max_shard_ingestion_throughput_mib_per_sec,
298+
shard_scaling_factor,
297299
),
298300
}
299301
}
@@ -396,12 +398,13 @@ impl IngestController {
396398
};
397399

398400
match scaling_mode {
399-
ScalingMode::Up => {
401+
ScalingMode::Up(shards) => {
400402
self.try_scale_up_shards(
401403
local_shards_update.source_uid,
402404
shard_stats,
403405
model,
404406
progress,
407+
shards,
405408
)
406409
.await?;
407410
}
@@ -670,18 +673,19 @@ impl IngestController {
670673
shard_stats: ShardStats,
671674
model: &mut ControlPlaneModel,
672675
progress: &Progress,
676+
shards_to_create: usize,
673677
) -> MetastoreResult<()> {
674678
if !model
675-
.acquire_scaling_permits(&source_uid, ScalingMode::Up)
679+
.acquire_scaling_permits(&source_uid, ScalingMode::Up(shards_to_create))
676680
.unwrap_or(false)
677681
{
678682
return Ok(());
679683
}
680-
let new_num_open_shards = shard_stats.num_open_shards + 1;
681-
let new_shard_source_uids: HashMap<SourceUid, usize> =
682-
HashMap::from_iter([(source_uid.clone(), 1)]);
684+
let new_num_open_shards = shard_stats.num_open_shards + shards_to_create;
685+
let new_shards_per_source: HashMap<SourceUid, usize> =
686+
HashMap::from_iter([(source_uid.clone(), shards_to_create)]);
683687
let successful_source_uids_res = self
684-
.try_open_shards(new_shard_source_uids, model, &Default::default(), progress)
688+
.try_open_shards(new_shards_per_source, model, &Default::default(), progress)
685689
.await;
686690

687691
match successful_source_uids_res {
@@ -691,7 +695,7 @@ impl IngestController {
691695
if successful_source_uids.is_empty() {
692696
// We did not manage to create the shard.
693697
// We can release our permit.
694-
model.release_scaling_permits(&source_uid, ScalingMode::Up);
698+
model.release_scaling_permits(&source_uid, ScalingMode::Up(shards_to_create));
695699
warn!(
696700
index_uid=%source_uid.index_uid,
697701
source_id=%source_uid.source_id,
@@ -715,7 +719,7 @@ impl IngestController {
715719
source_id=%source_uid.source_id,
716720
"scaling up number of shards to {new_num_open_shards} failed: {metastore_error:?}"
717721
);
718-
model.release_scaling_permits(&source_uid, ScalingMode::Up);
722+
model.release_scaling_permits(&source_uid, ScalingMode::Up(shards_to_create));
719723
Err(metastore_error)
720724
}
721725
}
@@ -739,12 +743,12 @@ impl IngestController {
739743
/// The number of successfully open shards is returned.
740744
async fn try_open_shards(
741745
&mut self,
742-
source_uids: HashMap<SourceUid, usize>,
746+
shards_per_source: HashMap<SourceUid, usize>,
743747
model: &mut ControlPlaneModel,
744748
unavailable_leaders: &FnvHashSet<NodeId>,
745749
progress: &Progress,
746750
) -> MetastoreResult<HashMap<SourceUid, usize>> {
747-
let num_shards: usize = source_uids.values().sum();
751+
let num_shards: usize = shards_per_source.values().sum();
748752

749753
if num_shards == 0 {
750754
return Ok(HashMap::new());
@@ -756,7 +760,7 @@ impl IngestController {
756760
return Ok(HashMap::new());
757761
};
758762

759-
let source_uids_with_multiplicity = source_uids
763+
let source_uids_with_multiplicity = shards_per_source
760764
.iter()
761765
.flat_map(|(source_uid, count)| std::iter::repeat(source_uid).take(*count));
762766

@@ -1347,6 +1351,7 @@ mod tests {
13471351
ingester_pool.clone(),
13481352
replication_factor,
13491353
TEST_SHARD_THROUGHPUT_LIMIT_MIB,
1354+
1.001,
13501355
);
13511356

13521357
let mut model = ControlPlaneModel::default();
@@ -1532,6 +1537,7 @@ mod tests {
15321537
ingester_pool,
15331538
replication_factor,
15341539
TEST_SHARD_THROUGHPUT_LIMIT_MIB,
1540+
1.001,
15351541
);
15361542

15371543
let mut model = ControlPlaneModel::default();
@@ -1574,6 +1580,7 @@ mod tests {
15741580
ingester_pool,
15751581
replication_factor,
15761582
TEST_SHARD_THROUGHPUT_LIMIT_MIB,
1583+
1.001,
15771584
);
15781585
let mut model = ControlPlaneModel::default();
15791586

@@ -1624,6 +1631,7 @@ mod tests {
16241631
ingester_pool.clone(),
16251632
replication_factor,
16261633
TEST_SHARD_THROUGHPUT_LIMIT_MIB,
1634+
1.001,
16271635
);
16281636

16291637
let mut model = ControlPlaneModel::default();
@@ -1806,6 +1814,7 @@ mod tests {
18061814
ingester_pool.clone(),
18071815
replication_factor,
18081816
TEST_SHARD_THROUGHPUT_LIMIT_MIB,
1817+
1.001,
18091818
);
18101819

18111820
let ingester_id_0 = NodeId::from("test-ingester-0");
@@ -2029,6 +2038,7 @@ mod tests {
20292038
ingester_pool.clone(),
20302039
replication_factor,
20312040
TEST_SHARD_THROUGHPUT_LIMIT_MIB,
2041+
1.001,
20322042
);
20332043

20342044
let index_uid = IndexUid::for_test("test-index", 0);
@@ -2166,6 +2176,7 @@ mod tests {
21662176
ingester_pool.clone(),
21672177
replication_factor,
21682178
TEST_SHARD_THROUGHPUT_LIMIT_MIB,
2179+
1.001,
21692180
);
21702181

21712182
let index_uid = IndexUid::for_test("test-index", 0);
@@ -2363,6 +2374,7 @@ mod tests {
23632374
ingester_pool.clone(),
23642375
replication_factor,
23652376
TEST_SHARD_THROUGHPUT_LIMIT_MIB,
2377+
1.001,
23662378
);
23672379

23682380
let index_uid = IndexUid::for_test("test-index", 0);
@@ -2487,6 +2499,7 @@ mod tests {
24872499
ingester_pool.clone(),
24882500
replication_factor,
24892501
TEST_SHARD_THROUGHPUT_LIMIT_MIB,
2502+
1.001,
24902503
);
24912504

24922505
let index_uid = IndexUid::for_test("test-index", 0);
@@ -2510,9 +2523,9 @@ mod tests {
25102523

25112524
let progress = Progress::default();
25122525

2513-
// Test could not find leader.
2526+
// Test could not find leader because no ingester in pool
25142527
controller
2515-
.try_scale_up_shards(source_uid.clone(), shard_stats, &mut model, &progress)
2528+
.try_scale_up_shards(source_uid.clone(), shard_stats, &mut model, &progress, 1)
25162529
.await
25172530
.unwrap();
25182531

@@ -2564,21 +2577,21 @@ mod tests {
25642577

25652578
// Test failed to open shards.
25662579
controller
2567-
.try_scale_up_shards(source_uid.clone(), shard_stats, &mut model, &progress)
2580+
.try_scale_up_shards(source_uid.clone(), shard_stats, &mut model, &progress, 1)
25682581
.await
25692582
.unwrap();
25702583
assert_eq!(model.all_shards().count(), 0);
25712584

25722585
// Test failed to init shards.
25732586
controller
2574-
.try_scale_up_shards(source_uid.clone(), shard_stats, &mut model, &progress)
2587+
.try_scale_up_shards(source_uid.clone(), shard_stats, &mut model, &progress, 1)
25752588
.await
25762589
.unwrap_err();
25772590
assert_eq!(model.all_shards().count(), 0);
25782591

25792592
// Test successfully opened shard.
25802593
controller
2581-
.try_scale_up_shards(source_uid.clone(), shard_stats, &mut model, &progress)
2594+
.try_scale_up_shards(source_uid.clone(), shard_stats, &mut model, &progress, 1)
25822595
.await
25832596
.unwrap();
25842597
assert_eq!(
@@ -2598,6 +2611,7 @@ mod tests {
25982611
ingester_pool.clone(),
25992612
replication_factor,
26002613
TEST_SHARD_THROUGHPUT_LIMIT_MIB,
2614+
1.001,
26012615
);
26022616

26032617
let index_uid = IndexUid::for_test("test-index", 0);
@@ -2824,6 +2838,7 @@ mod tests {
28242838
ingester_pool.clone(),
28252839
replication_factor,
28262840
TEST_SHARD_THROUGHPUT_LIMIT_MIB,
2841+
1.001,
28272842
);
28282843

28292844
let index_uid = IndexUid::for_test("test-index", 0);
@@ -2907,6 +2922,7 @@ mod tests {
29072922
ingester_pool,
29082923
replication_factor,
29092924
TEST_SHARD_THROUGHPUT_LIMIT_MIB,
2925+
1.001,
29102926
);
29112927

29122928
let mut model = ControlPlaneModel::default();
@@ -2982,6 +2998,7 @@ mod tests {
29822998
ingester_pool.clone(),
29832999
replication_factor,
29843000
TEST_SHARD_THROUGHPUT_LIMIT_MIB,
3001+
1.001,
29853002
);
29863003

29873004
let closed_shards = controller.close_shards(Vec::new()).await;
@@ -3138,6 +3155,7 @@ mod tests {
31383155
ingester_pool.clone(),
31393156
replication_factor,
31403157
TEST_SHARD_THROUGHPUT_LIMIT_MIB,
3158+
1.001,
31413159
);
31423160

31433161
let mut model = ControlPlaneModel::default();

0 commit comments

Comments
 (0)