Skip to content

Commit faf4f05

Browse files
committedAug 25, 2022
[State Sync] Add backpressure to execute-commit pipeline.
1 parent 9092a25 commit faf4f05

File tree

16 files changed

+163
-94
lines changed

16 files changed

+163
-94
lines changed
 

‎config/src/config/state_sync_config.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,8 @@ impl Default for DataStreamingServiceConfig {
152152
Self {
153153
global_summary_refresh_interval_ms: 50,
154154
max_concurrent_requests: 3,
155-
max_concurrent_state_requests: 4,
156-
max_data_stream_channel_sizes: 1000,
155+
max_concurrent_state_requests: 6,
156+
max_data_stream_channel_sizes: 500,
157157
max_request_retry: 3,
158158
max_notification_id_mappings: 300,
159159
progress_check_interval_ms: 100,

‎state-sync/aptos-data-client/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ pub enum ResponseError {
161161
/// This trait provides a simple feedback mechanism for users of the Data Client
162162
/// to alert it to bad responses so that the peers responsible for providing this
163163
/// data can be penalized.
164-
pub trait ResponseCallback: fmt::Debug + Send + 'static {
164+
pub trait ResponseCallback: fmt::Debug + Send + Sync + 'static {
165165
// TODO(philiphayes): ideally this would take a `self: Box<Self>`, i.e.,
166166
// consume the callback, which better communicates that you should only report
167167
// an error once. however, the current state-sync-v2 code makes this difficult...

‎state-sync/state-sync-v2/data-streaming-service/src/data_stream.rs

+24-21
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ use aptos_data_client::{
2424
use aptos_id_generator::{IdGenerator, U64IdGenerator};
2525
use aptos_infallible::Mutex;
2626
use aptos_logger::prelude::*;
27-
use channel::{aptos_channel, message_queues::QueueStyle};
28-
use futures::{stream::FusedStream, Stream};
27+
use futures::channel::mpsc;
28+
use futures::{stream::FusedStream, SinkExt, Stream};
2929
use std::{
3030
collections::{BTreeMap, VecDeque},
3131
pin::Pin,
@@ -79,7 +79,7 @@ pub struct DataStream<T> {
7979
notifications_to_responses: BTreeMap<NotificationId, ResponseContext>,
8080

8181
// The channel on which to send data notifications when they are ready.
82-
notification_sender: channel::aptos_channel::Sender<(), DataNotification>,
82+
notification_sender: mpsc::Sender<DataNotification>,
8383

8484
// A unique notification ID generator
8585
notification_id_generator: Arc<U64IdGenerator>,
@@ -108,11 +108,8 @@ impl<T: AptosDataClient + Send + Clone + 'static> DataStream<T> {
108108
advertised_data: &AdvertisedData,
109109
) -> Result<(Self, DataStreamListener), Error> {
110110
// Create a new data stream listener
111-
let (notification_sender, notification_receiver) = aptos_channel::new(
112-
QueueStyle::FIFO, // If the stream overflows, drop the new messages
113-
config.max_data_stream_channel_sizes as usize,
114-
None,
115-
);
111+
let (notification_sender, notification_receiver) =
112+
mpsc::channel(config.max_data_stream_channel_sizes as usize);
116113
let data_stream_listener = DataStreamListener::new(data_stream_id, notification_receiver);
117114

118115
// Create a new stream engine
@@ -276,8 +273,13 @@ impl<T: AptosDataClient + Send + Clone + 'static> DataStream<T> {
276273
pending_client_response
277274
}
278275

279-
fn send_data_notification(&mut self, data_notification: DataNotification) -> Result<(), Error> {
280-
if let Err(error) = self.notification_sender.push((), data_notification) {
276+
// TODO(joshlind): this function shouldn't be blocking when trying to send! If there are
277+
// multiple streams, a single blocked stream could cause them all to block.
278+
async fn send_data_notification(
279+
&mut self,
280+
data_notification: DataNotification,
281+
) -> Result<(), Error> {
282+
if let Err(error) = self.notification_sender.send(data_notification).await {
281283
let error = Error::UnexpectedErrorEncountered(error.to_string());
282284
warn!(
283285
(LogSchema::new(LogEntry::StreamNotification)
@@ -298,7 +300,7 @@ impl<T: AptosDataClient + Send + Clone + 'static> DataStream<T> {
298300
self.send_failure
299301
}
300302

301-
fn send_end_of_stream_notification(&mut self) -> Result<(), Error> {
303+
async fn send_end_of_stream_notification(&mut self) -> Result<(), Error> {
302304
// Create end of stream notification
303305
let notification_id = self.notification_id_generator.next();
304306
let data_notification = DataNotification {
@@ -314,12 +316,12 @@ impl<T: AptosDataClient + Send + Clone + 'static> DataStream<T> {
314316
.message("Sent the end of stream notification"))
315317
);
316318
self.stream_end_notification_id = Some(notification_id);
317-
self.send_data_notification(data_notification)
319+
self.send_data_notification(data_notification).await
318320
}
319321

320322
/// Processes any data client responses that have been received. Note: the
321323
/// responses must be processed in FIFO order.
322-
pub fn process_data_responses(
324+
pub async fn process_data_responses(
323325
&mut self,
324326
global_data_summary: GlobalDataSummary,
325327
) -> Result<(), Error> {
@@ -328,25 +330,26 @@ impl<T: AptosDataClient + Send + Clone + 'static> DataStream<T> {
328330
|| self.send_failure
329331
{
330332
if !self.send_failure && self.stream_end_notification_id.is_none() {
331-
self.send_end_of_stream_notification()?;
333+
self.send_end_of_stream_notification().await?;
332334
}
333335
return Ok(()); // There's nothing left to do
334336
}
335337

336338
// Process any ready data responses
337339
for _ in 0..self.get_max_concurrent_requests() {
338340
if let Some(pending_response) = self.pop_pending_response_queue() {
339-
let mut pending_response = pending_response.lock();
340341
let client_response = pending_response
342+
.lock()
341343
.client_response
342344
.take()
343345
.expect("The client response should be ready!");
344-
let client_request = &pending_response.client_request;
346+
let client_request = &pending_response.lock().client_request.clone();
345347

346348
match client_response {
347349
Ok(client_response) => {
348350
if sanity_check_client_response(client_request, &client_response) {
349-
self.send_data_notification_to_client(client_request, client_response)?;
351+
self.send_data_notification_to_client(client_request, client_response)
352+
.await?;
350353
} else {
351354
self.handle_sanity_check_failure(
352355
client_request,
@@ -457,7 +460,7 @@ impl<T: AptosDataClient + Send + Clone + 'static> DataStream<T> {
457460
}
458461

459462
/// Sends a data notification to the client along the stream
460-
fn send_data_notification_to_client(
463+
async fn send_data_notification_to_client(
461464
&mut self,
462465
data_client_request: &DataClientRequest,
463466
data_client_response: Response<ResponsePayload>,
@@ -487,7 +490,7 @@ impl<T: AptosDataClient + Send + Clone + 'static> DataStream<T> {
487490
notification_id
488491
)))
489492
);
490-
self.send_data_notification(data_notification)?;
493+
self.send_data_notification(data_notification).await?;
491494

492495
// Reset the failure count. We've sent a notification and can move on.
493496
self.request_failure_count = 0;
@@ -606,7 +609,7 @@ impl<T> Drop for DataStream<T> {
606609
#[derive(Debug)]
607610
pub struct DataStreamListener {
608611
pub data_stream_id: DataStreamId,
609-
notification_receiver: channel::aptos_channel::Receiver<(), DataNotification>,
612+
notification_receiver: mpsc::Receiver<DataNotification>,
610613

611614
/// Stores the number of consecutive timeouts encountered when listening to this stream
612615
pub num_consecutive_timeouts: u64,
@@ -615,7 +618,7 @@ pub struct DataStreamListener {
615618
impl DataStreamListener {
616619
pub fn new(
617620
data_stream_id: DataStreamId,
618-
notification_receiver: channel::aptos_channel::Receiver<(), DataNotification>,
621+
notification_receiver: mpsc::Receiver<DataNotification>,
619622
) -> Self {
620623
Self {
621624
data_stream_id,

‎state-sync/state-sync-v2/data-streaming-service/src/streaming_service.rs

+9-7
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ impl<T: AptosDataClient + Send + Clone + 'static> DataStreamingService<T> {
8484
self.refresh_global_data_summary();
8585
}
8686
_ = progress_check_interval.select_next_some() => {
87-
self.check_progress_of_all_data_streams();
87+
self.check_progress_of_all_data_streams().await;
8888
}
8989
}
9090
}
@@ -255,11 +255,11 @@ impl<T: AptosDataClient + Send + Clone + 'static> DataStreamingService<T> {
255255
}
256256

257257
/// Ensures that all existing data streams are making progress
258-
fn check_progress_of_all_data_streams(&mut self) {
258+
async fn check_progress_of_all_data_streams(&mut self) {
259259
// Drive the progress of each stream
260260
let data_stream_ids = self.get_all_data_stream_ids();
261261
for data_stream_id in &data_stream_ids {
262-
if let Err(error) = self.update_progress_of_data_stream(data_stream_id) {
262+
if let Err(error) = self.update_progress_of_data_stream(data_stream_id).await {
263263
if matches!(error, Error::NoDataToFetch(_)) {
264264
sample!(
265265
SampleRate::Duration(Duration::from_secs(NO_DATA_TO_FETCH_LOG_FREQ_SECS)),
@@ -287,7 +287,7 @@ impl<T: AptosDataClient + Send + Clone + 'static> DataStreamingService<T> {
287287

288288
/// Ensures that a data stream has in-flight data requests and handles
289289
/// any new responses that have arrived since we last checked.
290-
fn update_progress_of_data_stream(
290+
async fn update_progress_of_data_stream(
291291
&mut self,
292292
data_stream_id: &DataStreamId,
293293
) -> Result<(), Error> {
@@ -324,7 +324,9 @@ impl<T: AptosDataClient + Send + Clone + 'static> DataStreamingService<T> {
324324
);
325325
} else {
326326
// Process any data client requests that have received responses
327-
data_stream.process_data_responses(global_data_summary)?;
327+
data_stream
328+
.process_data_responses(global_data_summary)
329+
.await?;
328330
}
329331

330332
Ok(())
@@ -420,7 +422,7 @@ mod streaming_service_tests {
420422
// should detect the dropped listeners and remove the streams).
421423
let timeout_deadline = Instant::now().add(Duration::from_secs(MAX_STREAM_WAIT_SECS));
422424
while Instant::now() < timeout_deadline {
423-
streaming_service.check_progress_of_all_data_streams();
425+
streaming_service.check_progress_of_all_data_streams().await;
424426
if streaming_service.get_all_data_stream_ids().is_empty() {
425427
return; // All streams were dropped!
426428
}
@@ -516,7 +518,7 @@ mod streaming_service_tests {
516518
let timeout_deadline =
517519
Instant::now().add(Duration::from_secs(MAX_STREAM_WAIT_SECS));
518520
while Instant::now() < timeout_deadline {
519-
streaming_service.check_progress_of_all_data_streams();
521+
streaming_service.check_progress_of_all_data_streams().await;
520522
if let Ok(data_notification) = timeout(
521523
Duration::from_secs(1),
522524
data_stream_listener.select_next_some(),

‎state-sync/state-sync-v2/data-streaming-service/src/tests/data_stream.rs

+16
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ async fn test_stream_blocked() {
7171
// Process the data responses and force a data re-fetch
7272
data_stream
7373
.process_data_responses(global_data_summary.clone())
74+
.await
7475
.unwrap();
7576

7677
// If we're sent a data notification, verify it's an end of stream notification!
@@ -130,6 +131,7 @@ async fn test_stream_garbage_collection() {
130131
// Process the data response
131132
data_stream
132133
.process_data_responses(global_data_summary.clone())
134+
.await
133135
.unwrap();
134136

135137
// Process the data response
@@ -199,6 +201,7 @@ async fn test_stream_data_error() {
199201
// Process the responses and verify the data client request was resent to the network
200202
data_stream
201203
.process_data_responses(global_data_summary)
204+
.await
202205
.unwrap();
203206
assert_none!(stream_listener.select_next_some().now_or_never());
204207
verify_client_request_resubmitted(&mut data_stream, client_request);
@@ -236,6 +239,7 @@ async fn test_stream_invalid_response() {
236239
// Process the responses and verify the data client request was resent to the network
237240
data_stream
238241
.process_data_responses(global_data_summary)
242+
.await
239243
.unwrap();
240244
assert_none!(stream_listener.select_next_some().now_or_never());
241245
verify_client_request_resubmitted(&mut data_stream, client_request);
@@ -270,13 +274,15 @@ async fn test_epoch_stream_out_of_order_responses() {
270274
set_epoch_ending_response_in_queue(&mut data_stream, 1);
271275
data_stream
272276
.process_data_responses(global_data_summary.clone())
277+
.await
273278
.unwrap();
274279
assert_none!(stream_listener.select_next_some().now_or_never());
275280

276281
// Set a response for the first request and verify two notifications
277282
set_epoch_ending_response_in_queue(&mut data_stream, 0);
278283
data_stream
279284
.process_data_responses(global_data_summary.clone())
285+
.await
280286
.unwrap();
281287
for _ in 0..2 {
282288
verify_epoch_ending_notification(
@@ -292,6 +298,7 @@ async fn test_epoch_stream_out_of_order_responses() {
292298
set_epoch_ending_response_in_queue(&mut data_stream, 2);
293299
data_stream
294300
.process_data_responses(global_data_summary.clone())
301+
.await
295302
.unwrap();
296303
verify_epoch_ending_notification(
297304
&mut stream_listener,
@@ -305,6 +312,7 @@ async fn test_epoch_stream_out_of_order_responses() {
305312
set_epoch_ending_response_in_queue(&mut data_stream, 2);
306313
data_stream
307314
.process_data_responses(global_data_summary.clone())
315+
.await
308316
.unwrap();
309317
for _ in 0..3 {
310318
verify_epoch_ending_notification(
@@ -342,6 +350,7 @@ async fn test_state_stream_out_of_order_responses() {
342350
set_num_state_values_response_in_queue(&mut data_stream, 0);
343351
data_stream
344352
.process_data_responses(global_data_summary.clone())
353+
.await
345354
.unwrap();
346355

347356
// Verify at least six requests have been made
@@ -355,13 +364,15 @@ async fn test_state_stream_out_of_order_responses() {
355364
set_state_value_response_in_queue(&mut data_stream, 1);
356365
data_stream
357366
.process_data_responses(global_data_summary.clone())
367+
.await
358368
.unwrap();
359369
assert_none!(stream_listener.select_next_some().now_or_never());
360370

361371
// Set a response for the first request and verify two notifications
362372
set_state_value_response_in_queue(&mut data_stream, 0);
363373
data_stream
364374
.process_data_responses(global_data_summary.clone())
375+
.await
365376
.unwrap();
366377
for _ in 0..2 {
367378
let data_notification = get_data_notification(&mut stream_listener).await.unwrap();
@@ -377,6 +388,7 @@ async fn test_state_stream_out_of_order_responses() {
377388
set_state_value_response_in_queue(&mut data_stream, 2);
378389
data_stream
379390
.process_data_responses(global_data_summary.clone())
391+
.await
380392
.unwrap();
381393
let data_notification = get_data_notification(&mut stream_listener).await.unwrap();
382394
assert_matches!(
@@ -390,6 +402,7 @@ async fn test_state_stream_out_of_order_responses() {
390402
set_state_value_response_in_queue(&mut data_stream, 2);
391403
data_stream
392404
.process_data_responses(global_data_summary.clone())
405+
.await
393406
.unwrap();
394407
for _ in 0..3 {
395408
let data_notification = get_data_notification(&mut stream_listener).await.unwrap();
@@ -430,6 +443,7 @@ async fn test_stream_listener_dropped() {
430443
set_epoch_ending_response_in_queue(&mut data_stream, 0);
431444
data_stream
432445
.process_data_responses(global_data_summary.clone())
446+
.await
433447
.unwrap();
434448
verify_epoch_ending_notification(
435449
&mut stream_listener,
@@ -449,6 +463,7 @@ async fn test_stream_listener_dropped() {
449463
set_epoch_ending_response_in_queue(&mut data_stream, 0);
450464
data_stream
451465
.process_data_responses(global_data_summary.clone())
466+
.await
452467
.unwrap_err();
453468
let (_, sent_notifications) = data_stream.get_sent_requests_and_notifications();
454469
assert_eq!(sent_notifications.len(), 2);
@@ -457,6 +472,7 @@ async fn test_stream_listener_dropped() {
457472
set_epoch_ending_response_in_queue(&mut data_stream, 0);
458473
data_stream
459474
.process_data_responses(global_data_summary.clone())
475+
.await
460476
.unwrap();
461477
let (_, sent_notifications) = data_stream.get_sent_requests_and_notifications();
462478
assert_eq!(sent_notifications.len(), 2);

‎state-sync/state-sync-v2/data-streaming-service/src/tests/streaming_client.rs

+3-7
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ use crate::{
1515
},
1616
tests::utils::{create_ledger_info, initialize_logger},
1717
};
18-
use channel::{aptos_channel, message_queues::QueueStyle};
1918
use claim::assert_ok;
19+
use futures::channel::mpsc;
2020
use futures::{executor::block_on, FutureExt, StreamExt};
2121
use std::thread::JoinHandle;
2222

@@ -284,12 +284,8 @@ fn spawn_service_and_respond_with_error(
284284
}
285285

286286
/// Creates and returns a new data stream sender and listener pair.
287-
fn new_data_stream_sender_listener() -> (
288-
channel::aptos_channel::Sender<(), DataNotification>,
289-
DataStreamListener,
290-
) {
291-
let (notification_sender, notification_receiver) =
292-
aptos_channel::new(QueueStyle::KLAST, 1, None);
287+
fn new_data_stream_sender_listener() -> (mpsc::Sender<DataNotification>, DataStreamListener) {
288+
let (notification_sender, notification_receiver) = mpsc::channel(100);
293289
let data_stream_listener = DataStreamListener::new(0, notification_receiver);
294290

295291
(notification_sender, data_stream_listener)

‎state-sync/state-sync-v2/state-sync-driver/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ edition = "2018"
1111

1212
[dependencies]
1313
anyhow = "1.0.52"
14+
async-trait = "0.1.53"
1415
bcs = "0.1.3"
1516
futures = "0.3.21"
1617
once_cell = "1.10.0"

‎state-sync/state-sync-v2/state-sync-driver/src/bootstrapper.rs

+16-12
Original file line numberDiff line numberDiff line change
@@ -1096,12 +1096,14 @@ impl<
10961096
let num_transaction_outputs = transaction_outputs_with_proof
10971097
.transactions_and_outputs
10981098
.len();
1099-
self.storage_synchronizer.apply_transaction_outputs(
1100-
notification_id,
1101-
transaction_outputs_with_proof,
1102-
proof_ledger_info,
1103-
end_of_epoch_ledger_info,
1104-
)?;
1099+
self.storage_synchronizer
1100+
.apply_transaction_outputs(
1101+
notification_id,
1102+
transaction_outputs_with_proof,
1103+
proof_ledger_info,
1104+
end_of_epoch_ledger_info,
1105+
)
1106+
.await?;
11051107
num_transaction_outputs
11061108
} else {
11071109
self.reset_active_stream(Some(NotificationAndFeedback::new(
@@ -1117,12 +1119,14 @@ impl<
11171119
BootstrappingMode::ExecuteTransactionsFromGenesis => {
11181120
if let Some(transaction_list_with_proof) = transaction_list_with_proof {
11191121
let num_transactions = transaction_list_with_proof.transactions.len();
1120-
self.storage_synchronizer.execute_transactions(
1121-
notification_id,
1122-
transaction_list_with_proof,
1123-
proof_ledger_info,
1124-
end_of_epoch_ledger_info,
1125-
)?;
1122+
self.storage_synchronizer
1123+
.execute_transactions(
1124+
notification_id,
1125+
transaction_list_with_proof,
1126+
proof_ledger_info,
1127+
end_of_epoch_ledger_info,
1128+
)
1129+
.await?;
11261130
num_transactions
11271131
} else {
11281132
self.reset_active_stream(Some(NotificationAndFeedback::new(

‎state-sync/state-sync-v2/state-sync-driver/src/continuous_syncer.rs

+16-12
Original file line numberDiff line numberDiff line change
@@ -251,12 +251,14 @@ impl<
251251
let num_transaction_outputs = transaction_outputs_with_proof
252252
.transactions_and_outputs
253253
.len();
254-
self.storage_synchronizer.apply_transaction_outputs(
255-
notification_id,
256-
transaction_outputs_with_proof,
257-
ledger_info_with_signatures.clone(),
258-
None,
259-
)?;
254+
self.storage_synchronizer
255+
.apply_transaction_outputs(
256+
notification_id,
257+
transaction_outputs_with_proof,
258+
ledger_info_with_signatures.clone(),
259+
None,
260+
)
261+
.await?;
260262
num_transaction_outputs
261263
} else {
262264
self.reset_active_stream(Some(NotificationAndFeedback::new(
@@ -272,12 +274,14 @@ impl<
272274
ContinuousSyncingMode::ExecuteTransactions => {
273275
if let Some(transaction_list_with_proof) = transaction_list_with_proof {
274276
let num_transactions = transaction_list_with_proof.transactions.len();
275-
self.storage_synchronizer.execute_transactions(
276-
notification_id,
277-
transaction_list_with_proof,
278-
ledger_info_with_signatures.clone(),
279-
None,
280-
)?;
277+
self.storage_synchronizer
278+
.execute_transactions(
279+
notification_id,
280+
transaction_list_with_proof,
281+
ledger_info_with_signatures.clone(),
282+
None,
283+
)
284+
.await?;
281285
num_transactions
282286
} else {
283287
self.reset_active_stream(Some(NotificationAndFeedback::new(

‎state-sync/state-sync-v2/state-sync-driver/src/metrics.rs

+20-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
use aptos_metrics_core::{
5-
register_int_counter_vec, register_int_gauge_vec, IntCounterVec, IntGaugeVec,
5+
register_histogram_vec, register_int_counter_vec, register_int_gauge_vec, HistogramTimer,
6+
HistogramVec, IntCounterVec, IntGaugeVec,
67
};
78
use once_cell::sync::Lazy;
89

@@ -11,6 +12,9 @@ pub const DRIVER_CLIENT_NOTIFICATION: &str = "driver_client_notification";
1112
pub const DRIVER_CONSENSUS_COMMIT_NOTIFICATION: &str = "driver_consensus_commit_notification";
1213
pub const DRIVER_CONSENSUS_SYNC_NOTIFICATION: &str = "driver_consensus_sync_notification";
1314
pub const STORAGE_SYNCHRONIZER_PENDING_DATA: &str = "storage_synchronizer_pending_data";
15+
pub const STORAGE_SYNCHRONIZER_APPLY_CHUNK: &str = "apply_chunk";
16+
pub const STORAGE_SYNCHRONIZER_EXECUTE_CHUNK: &str = "execute_chunk";
17+
pub const STORAGE_SYNCHRONIZER_COMMIT_CHUNK: &str = "commit_chunk";
1418

1519
/// An enum representing the component currently executing
1620
pub enum ExecutingComponent {
@@ -122,6 +126,16 @@ pub static STORAGE_SYNCHRONIZER_GAUGES: Lazy<IntGaugeVec> = Lazy::new(|| {
122126
.unwrap()
123127
});
124128

129+
/// Counter for tracking storage synchronizer latencies
130+
pub static STORAGE_SYNCHRONIZER_LATENCIES: Lazy<HistogramVec> = Lazy::new(|| {
131+
register_histogram_vec!(
132+
"aptos_state_sync_storage_synchronizer_latencies",
133+
"Counters related to the storage synchronizer latencies",
134+
&["label"]
135+
)
136+
.unwrap()
137+
});
138+
125139
/// Gauges for the storage synchronizer operations
126140
pub static STORAGE_SYNCHRONIZER_OPERATIONS: Lazy<IntGaugeVec> = Lazy::new(|| {
127141
register_int_gauge_vec!(
@@ -163,3 +177,8 @@ pub fn set_epoch_state_gauge(epoch: &str, validator_address: &str, validator_wei
163177
.with_label_values(&[epoch, validator_address, validator_weight])
164178
.set(1);
165179
}
180+
181+
/// Starts the timer for the provided histogram and label
182+
pub fn start_timer(histogram: &Lazy<HistogramVec>, label: &str) -> HistogramTimer {
183+
histogram.with_label_values(&[label]).start_timer()
184+
}

‎state-sync/state-sync-v2/state-sync-driver/src/storage_synchronizer.rs

+28-10
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
// Copyright (c) Aptos
22
// SPDX-License-Identifier: Apache-2.0
33

4+
use crate::metrics;
45
use crate::{
56
error::Error,
67
logging::{LogEntry, LogSchema},
78
metadata_storage::MetadataStorageInterface,
8-
metrics,
99
notification_handlers::{
1010
CommitNotification, CommittedTransactions, ErrorNotification, MempoolNotificationHandler,
1111
},
@@ -21,6 +21,7 @@ use aptos_types::{
2121
Transaction, TransactionListWithProof, TransactionOutput, TransactionOutputListWithProof,
2222
},
2323
};
24+
use async_trait::async_trait;
2425
use data_streaming_service::data_notification::NotificationId;
2526
use event_notifications::EventSubscriptionService;
2627
use executor_types::ChunkExecutorTrait;
@@ -41,11 +42,12 @@ use tokio::{
4142

4243
/// Synchronizes the storage of the node by verifying and storing new data
4344
/// (e.g., transactions and outputs).
45+
#[async_trait]
4446
pub trait StorageSynchronizerInterface {
4547
/// Applies a batch of transaction outputs.
4648
///
4749
/// Note: this assumes that the ledger infos have already been verified.
48-
fn apply_transaction_outputs(
50+
async fn apply_transaction_outputs(
4951
&mut self,
5052
notification_id: NotificationId,
5153
output_list_with_proof: TransactionOutputListWithProof,
@@ -56,7 +58,7 @@ pub trait StorageSynchronizerInterface {
5658
/// Executes a batch of transactions.
5759
///
5860
/// Note: this assumes that the ledger infos have already been verified.
59-
fn execute_transactions(
61+
async fn execute_transactions(
6062
&mut self,
6163
notification_id: NotificationId,
6264
transaction_list_with_proof: TransactionListWithProof,
@@ -227,8 +229,8 @@ impl<
227229
}
228230

229231
/// Notifies the executor of new data chunks
230-
fn notify_executor(&mut self, storage_data_chunk: StorageDataChunk) -> Result<(), Error> {
231-
if let Err(error) = self.executor_notifier.try_send(storage_data_chunk) {
232+
async fn notify_executor(&mut self, storage_data_chunk: StorageDataChunk) -> Result<(), Error> {
233+
if let Err(error) = self.executor_notifier.send(storage_data_chunk).await {
232234
Err(Error::UnexpectedError(format!(
233235
"Failed to send storage data chunk to executor: {:?}",
234236
error
@@ -240,12 +242,13 @@ impl<
240242
}
241243
}
242244

245+
#[async_trait]
243246
impl<
244247
ChunkExecutor: ChunkExecutorTrait + 'static,
245248
MetadataStorage: MetadataStorageInterface + Clone + Send + Sync + 'static,
246249
> StorageSynchronizerInterface for StorageSynchronizer<ChunkExecutor, MetadataStorage>
247250
{
248-
fn apply_transaction_outputs(
251+
async fn apply_transaction_outputs(
249252
&mut self,
250253
notification_id: NotificationId,
251254
output_list_with_proof: TransactionOutputListWithProof,
@@ -258,10 +261,10 @@ impl<
258261
target_ledger_info,
259262
end_of_epoch_ledger_info,
260263
);
261-
self.notify_executor(storage_data_chunk)
264+
self.notify_executor(storage_data_chunk).await
262265
}
263266

264-
fn execute_transactions(
267+
async fn execute_transactions(
265268
&mut self,
266269
notification_id: NotificationId,
267270
transaction_list_with_proof: TransactionListWithProof,
@@ -274,7 +277,7 @@ impl<
274277
target_ledger_info,
275278
end_of_epoch_ledger_info,
276279
);
277-
self.notify_executor(storage_data_chunk)
280+
self.notify_executor(storage_data_chunk).await
278281
}
279282

280283
fn initialize_state_synchronizer(
@@ -387,6 +390,10 @@ fn spawn_executor<ChunkExecutor: ChunkExecutorTrait + 'static>(
387390
target_ledger_info,
388391
end_of_epoch_ledger_info,
389392
) => {
393+
let timer = metrics::start_timer(
394+
&metrics::STORAGE_SYNCHRONIZER_LATENCIES,
395+
metrics::STORAGE_SYNCHRONIZER_EXECUTE_CHUNK,
396+
);
390397
let num_transactions = transactions_with_proof.transactions.len();
391398
let result = chunk_executor.execute_chunk(
392399
transactions_with_proof,
@@ -407,6 +414,7 @@ fn spawn_executor<ChunkExecutor: ChunkExecutorTrait + 'static>(
407414
num_transactions as u64,
408415
);
409416
}
417+
drop(timer);
410418
(notification_id, result)
411419
}
412420
StorageDataChunk::TransactionOutputs(
@@ -415,6 +423,10 @@ fn spawn_executor<ChunkExecutor: ChunkExecutorTrait + 'static>(
415423
target_ledger_info,
416424
end_of_epoch_ledger_info,
417425
) => {
426+
let timer = metrics::start_timer(
427+
&metrics::STORAGE_SYNCHRONIZER_LATENCIES,
428+
metrics::STORAGE_SYNCHRONIZER_APPLY_CHUNK,
429+
);
418430
let num_outputs = outputs_with_proof.transactions_and_outputs.len();
419431
let result = chunk_executor.apply_chunk(
420432
outputs_with_proof,
@@ -435,6 +447,7 @@ fn spawn_executor<ChunkExecutor: ChunkExecutorTrait + 'static>(
435447
num_outputs as u64,
436448
);
437449
}
450+
drop(timer);
438451
(notification_id, result)
439452
}
440453
storage_data_chunk => {
@@ -448,7 +461,7 @@ fn spawn_executor<ChunkExecutor: ChunkExecutorTrait + 'static>(
448461
// Notify the committer of new executed chunks
449462
match result {
450463
Ok(()) => {
451-
if let Err(error) = committer_notifier.try_send(notification_id) {
464+
if let Err(error) = committer_notifier.send(notification_id).await {
452465
let error = format!("Failed to notify the committer! Error: {:?}", error);
453466
send_storage_synchronizer_error(
454467
error_notification_sender.clone(),
@@ -498,6 +511,10 @@ fn spawn_committer<
498511
let committer = async move {
499512
while let Some(notification_id) = committer_listener.next().await {
500513
// Commit the executed chunk
514+
let timer = metrics::start_timer(
515+
&metrics::STORAGE_SYNCHRONIZER_LATENCIES,
516+
metrics::STORAGE_SYNCHRONIZER_COMMIT_CHUNK,
517+
);
501518
match chunk_executor.commit_chunk() {
502519
Ok(notification) => {
503520
// Log the event and update the metrics
@@ -543,6 +560,7 @@ fn spawn_committer<
543560
.await;
544561
}
545562
};
563+
drop(timer);
546564
decrement_pending_data_chunks(pending_transaction_chunks.clone());
547565
}
548566
};

‎state-sync/state-sync-v2/state-sync-driver/src/tests/bootstrapper.rs

+11-11
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ use data_streaming_service::{
3131
data_notification::{DataNotification, DataPayload},
3232
streaming_client::NotificationFeedback,
3333
};
34-
use futures::{channel::oneshot, FutureExt};
34+
use futures::{channel::oneshot, FutureExt, SinkExt};
3535
use mockall::{predicate::eq, Sequence};
3636
use std::sync::Arc;
3737

@@ -211,7 +211,7 @@ async fn test_data_stream_state_values() {
211211
// Create the mock streaming client
212212
let mut mock_streaming_client = create_mock_streaming_client();
213213
let mut expectation_sequence = Sequence::new();
214-
let (notification_sender_1, data_stream_listener_1) = create_data_stream_listener();
214+
let (mut notification_sender_1, data_stream_listener_1) = create_data_stream_listener();
215215
let (_notification_sender_2, data_stream_listener_2) = create_data_stream_listener();
216216
let data_stream_id_1 = data_stream_listener_1.data_stream_id;
217217
for data_stream_listener in [data_stream_listener_1, data_stream_listener_2] {
@@ -257,7 +257,7 @@ async fn test_data_stream_state_values() {
257257
notification_id,
258258
data_payload: DataPayload::TransactionOutputsWithProof(create_output_list_with_proof()),
259259
};
260-
notification_sender_1.push((), data_notification).unwrap();
260+
notification_sender_1.send(data_notification).await.unwrap();
261261

262262
// Drive progress again and ensure we get a verification error
263263
let error = drive_progress(&mut bootstrapper, &global_data_summary, false)
@@ -286,7 +286,7 @@ async fn test_data_stream_transactions() {
286286
// Create the mock streaming client
287287
let mut mock_streaming_client = create_mock_streaming_client();
288288
let mut expectation_sequence = Sequence::new();
289-
let (notification_sender_1, data_stream_listener_1) = create_data_stream_listener();
289+
let (mut notification_sender_1, data_stream_listener_1) = create_data_stream_listener();
290290
let (_notification_sender_2, data_stream_listener_2) = create_data_stream_listener();
291291
let data_stream_id_1 = data_stream_listener_1.data_stream_id;
292292
for data_stream_listener in [data_stream_listener_1, data_stream_listener_2] {
@@ -328,7 +328,7 @@ async fn test_data_stream_transactions() {
328328
notification_id,
329329
data_payload: DataPayload::TransactionsWithProof(create_transaction_list_with_proof()),
330330
};
331-
notification_sender_1.push((), data_notification).unwrap();
331+
notification_sender_1.send(data_notification).await.unwrap();
332332

333333
// Drive progress again and ensure we get a verification error
334334
let error = drive_progress(&mut bootstrapper, &global_data_summary, false)
@@ -357,7 +357,7 @@ async fn test_data_stream_transaction_outputs() {
357357
// Create the mock streaming client
358358
let mut mock_streaming_client = create_mock_streaming_client();
359359
let mut expectation_sequence = Sequence::new();
360-
let (notification_sender_1, data_stream_listener_1) = create_data_stream_listener();
360+
let (mut notification_sender_1, data_stream_listener_1) = create_data_stream_listener();
361361
let (_notification_sender_2, data_stream_listener_2) = create_data_stream_listener();
362362
let data_stream_id_1 = data_stream_listener_1.data_stream_id;
363363
for data_stream_listener in [data_stream_listener_1, data_stream_listener_2] {
@@ -401,7 +401,7 @@ async fn test_data_stream_transaction_outputs() {
401401
TransactionOutputListWithProof::new_empty(),
402402
),
403403
};
404-
notification_sender_1.push((), data_notification).unwrap();
404+
notification_sender_1.send(data_notification).await.unwrap();
405405

406406
// Drive progress again and ensure we get a verification error
407407
let error = drive_progress(&mut bootstrapper, &global_data_summary, false)
@@ -530,7 +530,7 @@ async fn test_snapshot_sync_existing_state() {
530530
// Create the mock streaming client
531531
let mut mock_streaming_client = create_mock_streaming_client();
532532
let mut expectation_sequence = Sequence::new();
533-
let (notification_sender_1, data_stream_listener_1) = create_data_stream_listener();
533+
let (mut notification_sender_1, data_stream_listener_1) = create_data_stream_listener();
534534
let (_notification_sender_2, data_stream_listener_2) = create_data_stream_listener();
535535
let data_stream_id_1 = data_stream_listener_1.data_stream_id;
536536
mock_streaming_client
@@ -604,7 +604,7 @@ async fn test_snapshot_sync_existing_state() {
604604
notification_id,
605605
data_payload: DataPayload::TransactionOutputsWithProof(create_output_list_with_proof()),
606606
};
607-
notification_sender_1.push((), data_notification).unwrap();
607+
notification_sender_1.send(data_notification).await.unwrap();
608608

609609
// Drive progress again and ensure we get an invalid payload error
610610
let error = drive_progress(&mut bootstrapper, &global_data_summary, false)
@@ -831,7 +831,7 @@ async fn test_waypoint_mismatch() {
831831

832832
// Create the mock streaming client
833833
let mut mock_streaming_client = create_mock_streaming_client();
834-
let (notification_sender, data_stream_listener) = create_data_stream_listener();
834+
let (mut notification_sender, data_stream_listener) = create_data_stream_listener();
835835
let data_stream_id = data_stream_listener.data_stream_id;
836836
mock_streaming_client
837837
.expect_get_all_epoch_ending_ledger_infos()
@@ -870,7 +870,7 @@ async fn test_waypoint_mismatch() {
870870
notification_id,
871871
data_payload: DataPayload::EpochEndingLedgerInfos(invalid_ledger_info),
872872
};
873-
notification_sender.push((), data_notification).unwrap();
873+
notification_sender.send(data_notification).await.unwrap();
874874

875875
// Drive progress again and ensure we get a verification error
876876
let error = drive_progress(&mut bootstrapper, &global_data_summary, false)

‎state-sync/state-sync-v2/state-sync-driver/src/tests/continuous_syncer.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ use data_streaming_service::{
2727
data_notification::{DataNotification, DataPayload},
2828
streaming_client::NotificationFeedback,
2929
};
30+
use futures::SinkExt;
3031
use mockall::{predicate::eq, Sequence};
3132
use std::sync::Arc;
3233
use storage_service_types::Epoch;
@@ -128,7 +129,7 @@ async fn test_data_stream_transactions_with_target() {
128129
// Create the mock streaming client
129130
let mut mock_streaming_client = create_mock_streaming_client();
130131
let mut expectation_sequence = Sequence::new();
131-
let (notification_sender_1, data_stream_listener_1) = create_data_stream_listener();
132+
let (mut notification_sender_1, data_stream_listener_1) = create_data_stream_listener();
132133
let (_notification_sender_2, data_stream_listener_2) = create_data_stream_listener();
133134
let data_stream_id_1 = data_stream_listener_1.data_stream_id;
134135
for data_stream_listener in [data_stream_listener_1, data_stream_listener_2] {
@@ -182,7 +183,7 @@ async fn test_data_stream_transactions_with_target() {
182183
TransactionOutputListWithProof::new_empty(),
183184
),
184185
};
185-
notification_sender_1.push((), data_notification).unwrap();
186+
notification_sender_1.send(data_notification).await.unwrap();
186187

187188
// Drive progress again and ensure we get a verification error
188189
let error = continuous_syncer
@@ -213,7 +214,7 @@ async fn test_data_stream_transaction_outputs() {
213214
// Create the mock streaming client
214215
let mut mock_streaming_client = create_mock_streaming_client();
215216
let mut expectation_sequence = Sequence::new();
216-
let (notification_sender_1, data_stream_listener_1) = create_data_stream_listener();
217+
let (mut notification_sender_1, data_stream_listener_1) = create_data_stream_listener();
217218
let (_notification_sender_2, data_stream_listener_2) = create_data_stream_listener();
218219
let data_stream_id_1 = data_stream_listener_1.data_stream_id;
219220
for data_stream_listener in [data_stream_listener_1, data_stream_listener_2] {
@@ -266,7 +267,7 @@ async fn test_data_stream_transaction_outputs() {
266267
transaction_output_with_proof,
267268
),
268269
};
269-
notification_sender_1.push((), data_notification).unwrap();
270+
notification_sender_1.send(data_notification).await.unwrap();
270271

271272
// Drive progress again and ensure we get a verification error
272273
let error = continuous_syncer

‎state-sync/state-sync-v2/state-sync-driver/src/tests/mocks.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -422,16 +422,17 @@ mock! {
422422
// This automatically creates a MockStorageSynchronizer.
423423
mock! {
424424
pub StorageSynchronizer {}
425+
#[async_trait]
425426
impl StorageSynchronizerInterface for StorageSynchronizer {
426-
fn apply_transaction_outputs(
427+
async fn apply_transaction_outputs(
427428
&mut self,
428429
notification_id: NotificationId,
429430
output_list_with_proof: TransactionOutputListWithProof,
430431
target_ledger_info: LedgerInfoWithSignatures,
431432
end_of_epoch_ledger_info: Option<LedgerInfoWithSignatures>,
432433
) -> Result<(), crate::error::Error>;
433434

434-
fn execute_transactions(
435+
async fn execute_transactions(
435436
&mut self,
436437
notification_id: NotificationId,
437438
transaction_list_with_proof: TransactionListWithProof,

‎state-sync/state-sync-v2/state-sync-driver/src/tests/storage_synchronizer.rs

+5
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ async fn test_apply_transaction_outputs() {
7878
create_epoch_ending_ledger_info(),
7979
None,
8080
)
81+
.await
8182
.unwrap();
8283

8384
// Verify we get a mempool and event notification. Also verify that there's no pending data.
@@ -113,6 +114,7 @@ async fn test_apply_transaction_outputs_error() {
113114
create_epoch_ending_ledger_info(),
114115
None,
115116
)
117+
.await
116118
.unwrap();
117119

118120
// Verify we get an error notification and that there's no pending data
@@ -145,6 +147,7 @@ async fn test_commit_chunk_error() {
145147
create_epoch_ending_ledger_info(),
146148
None,
147149
)
150+
.await
148151
.unwrap();
149152

150153
// Verify we get an error notification and that there's no pending data
@@ -191,6 +194,7 @@ async fn test_execute_transactions() {
191194
create_epoch_ending_ledger_info(),
192195
None,
193196
)
197+
.await
194198
.unwrap();
195199

196200
// Verify we get a mempool and event notification. Also verify that there's no pending data.
@@ -226,6 +230,7 @@ async fn test_execute_transactions_error() {
226230
create_epoch_ending_ledger_info(),
227231
None,
228232
)
233+
.await
229234
.unwrap();
230235

231236
// Verify we get an error notification and that there's no pending data

‎state-sync/state-sync-v2/state-sync-driver/src/tests/utils.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@ use aptos_types::{
3030
waypoint::Waypoint,
3131
write_set::WriteSet,
3232
};
33-
use channel::{aptos_channel, aptos_channel::Sender, message_queues::QueueStyle};
3433
use data_streaming_service::{
3534
data_notification::DataNotification, data_stream::DataStreamListener, streaming_client::Epoch,
3635
};
3736
use event_notifications::EventNotificationListener;
37+
use futures::channel::mpsc;
3838
use futures::StreamExt;
3939
use mempool_notifications::{CommittedTransaction, MempoolNotificationListener};
4040
use move_deps::move_core_types::language_storage::TypeTag;
@@ -43,9 +43,8 @@ use rand::Rng;
4343
use storage_service_types::responses::CompleteDataRange;
4444

4545
/// Creates a new data stream listener and notification sender pair
46-
pub fn create_data_stream_listener() -> (Sender<(), DataNotification>, DataStreamListener) {
47-
let (notification_sender, notification_receiver) =
48-
aptos_channel::new(QueueStyle::KLAST, 100, None);
46+
pub fn create_data_stream_listener() -> (mpsc::Sender<DataNotification>, DataStreamListener) {
47+
let (notification_sender, notification_receiver) = mpsc::channel(100);
4948
let data_stream_listener = DataStreamListener::new(create_random_u64(), notification_receiver);
5049

5150
(notification_sender, data_stream_listener)

0 commit comments

Comments
 (0)
Please sign in to comment.