@@ -32,12 +32,13 @@ use diesel::{
32
32
} ;
33
33
use graph:: {
34
34
constraint_violation,
35
- futures03:: future:: select_all,
35
+ futures03:: { future:: select_all, FutureExt as _ } ,
36
36
prelude:: {
37
37
info, lazy_static, o, warn, BlockNumber , BlockPtr , CheapClone , Logger , StoreError , ENV_VARS ,
38
38
} ,
39
39
schema:: EntityType ,
40
40
slog:: { debug, error} ,
41
+ tokio,
41
42
} ;
42
43
use itertools:: Itertools ;
43
44
@@ -687,6 +688,21 @@ impl CopyProgress {
687
688
}
688
689
}
689
690
691
+ enum WorkerResult {
692
+ Ok ( CopyTableWorker ) ,
693
+ Err ( StoreError ) ,
694
+ Wake ,
695
+ }
696
+
697
+ impl From < Result < CopyTableWorker , StoreError > > for WorkerResult {
698
+ fn from ( result : Result < CopyTableWorker , StoreError > ) -> Self {
699
+ match result {
700
+ Ok ( worker) => WorkerResult :: Ok ( worker) ,
701
+ Err ( e) => WorkerResult :: Err ( e) ,
702
+ }
703
+ }
704
+ }
705
+
690
706
/// A helper to run copying of one table. We need to thread `conn` and
691
707
/// `table` from the control loop to the background worker and back again to
692
708
/// the control loop. This worker facilitates that
@@ -705,18 +721,15 @@ impl CopyTableWorker {
705
721
}
706
722
}
707
723
708
- async fn run (
709
- mut self ,
710
- logger : Logger ,
711
- progress : Arc < CopyProgress > ,
712
- ) -> Result < Self , StoreError > {
724
+ async fn run ( mut self , logger : Logger , progress : Arc < CopyProgress > ) -> WorkerResult {
713
725
let object = self . table . dst . object . cheap_clone ( ) ;
714
726
graph:: spawn_blocking_allow_panic ( move || {
715
727
self . result = self . run_inner ( logger, & progress) ;
716
728
self
717
729
} )
718
730
. await
719
731
. map_err ( |e| constraint_violation ! ( "copy worker for {} panicked: {}" , object, e) )
732
+ . into ( )
720
733
}
721
734
722
735
fn run_inner ( & mut self , logger : Logger , progress : & CopyProgress ) -> Result < Status , StoreError > {
@@ -812,6 +825,57 @@ impl CopyTableWorker {
812
825
}
813
826
}
814
827
828
+ /// A helper to manage the workers that are copying data. Besides the actual
829
+ /// workers it also keeps a worker that wakes us up periodically to give us
830
+ /// a chance to create more workers if there are database connections
831
+ /// available
832
+ struct Workers {
833
+ /// The list of workers that are currently running. This will always
834
+ /// include a future that wakes us up periodically
835
+ futures : Vec < Pin < Box < dyn Future < Output = WorkerResult > > > > ,
836
+ }
837
+
838
+ impl Workers {
839
+ fn new ( ) -> Self {
840
+ Self {
841
+ futures : vec ! [ Self :: waker( ) ] ,
842
+ }
843
+ }
844
+
845
+ fn add ( & mut self , worker : Pin < Box < dyn Future < Output = WorkerResult > > > ) {
846
+ self . futures . push ( worker) ;
847
+ }
848
+
849
+ fn has_work ( & self ) -> bool {
850
+ self . futures . len ( ) > 1
851
+ }
852
+
853
+ async fn select ( & mut self ) -> WorkerResult {
854
+ use WorkerResult :: * ;
855
+
856
+ let futures = std:: mem:: take ( & mut self . futures ) ;
857
+ let ( result, _idx, remaining) = select_all ( futures) . await ;
858
+ self . futures = remaining;
859
+ match result {
860
+ Ok ( _) | Err ( _) => { /* nothing to do */ }
861
+ Wake => {
862
+ self . futures . push ( Self :: waker ( ) ) ;
863
+ }
864
+ }
865
+ result
866
+ }
867
+
868
+ fn waker ( ) -> Pin < Box < dyn Future < Output = WorkerResult > > > {
869
+ let sleep = tokio:: time:: sleep ( ENV_VARS . store . batch_target_duration ) ;
870
+ Box :: pin ( sleep. map ( |( ) | WorkerResult :: Wake ) )
871
+ }
872
+
873
+ /// Return the number of workers that are not the waker
874
+ fn len ( & self ) -> usize {
875
+ self . futures . len ( ) - 1
876
+ }
877
+ }
878
+
815
879
/// A helper for copying subgraphs
816
880
pub struct Connection {
817
881
/// The connection pool for the shard that will contain the destination
@@ -926,7 +990,7 @@ impl Connection {
926
990
& mut self ,
927
991
state : & mut CopyState ,
928
992
progress : & Arc < CopyProgress > ,
929
- ) -> Option < Pin < Box < dyn Future < Output = Result < CopyTableWorker , StoreError > > > > > {
993
+ ) -> Option < Pin < Box < dyn Future < Output = WorkerResult > > > > {
930
994
let Some ( conn) = self . conn . take ( ) else {
931
995
return None ;
932
996
} ;
@@ -947,7 +1011,7 @@ impl Connection {
947
1011
& mut self ,
948
1012
state : & mut CopyState ,
949
1013
progress : & Arc < CopyProgress > ,
950
- ) -> Option < Pin < Box < dyn Future < Output = Result < CopyTableWorker , StoreError > > > > > {
1014
+ ) -> Option < Pin < Box < dyn Future < Output = WorkerResult > > > > {
951
1015
// It's important that we get the connection before the table since
952
1016
// we remove the table from the state and could drop it otherwise
953
1017
let Some ( conn) = self
@@ -989,19 +1053,15 @@ impl Connection {
989
1053
990
1054
/// Wait for all workers to finish. This is called when we a worker has
991
1055
/// failed with an error that forces us to abort copying
992
- async fn cancel_workers (
993
- & mut self ,
994
- progress : Arc < CopyProgress > ,
995
- mut workers : Vec < Pin < Box < dyn Future < Output = Result < CopyTableWorker , StoreError > > > > > ,
996
- ) {
1056
+ async fn cancel_workers ( & mut self , progress : Arc < CopyProgress > , mut workers : Workers ) {
997
1057
progress. cancel ( ) ;
998
1058
error ! (
999
1059
self . logger,
1000
1060
"copying encountered an error; waiting for all workers to finish"
1001
1061
) ;
1002
- while ! workers. is_empty ( ) {
1003
- let ( result , _ , remaining ) = select_all ( workers ) . await ;
1004
- workers = remaining ;
1062
+ while workers. has_work ( ) {
1063
+ use WorkerResult :: * ;
1064
+ let result = workers . select ( ) . await ;
1005
1065
match result {
1006
1066
Ok ( worker) => {
1007
1067
self . conn = Some ( worker. conn ) ;
@@ -1010,6 +1070,7 @@ impl Connection {
1010
1070
/* Ignore; we had an error previously */
1011
1071
error ! ( self . logger, "copy worker panicked: {}" , e) ;
1012
1072
}
1073
+ Wake => { /* Ignore; this is just a waker */ }
1013
1074
}
1014
1075
}
1015
1076
}
@@ -1031,14 +1092,14 @@ impl Connection {
1031
1092
//
1032
1093
// The loop has to be very careful about terminating early so that
1033
1094
// we do not ever leave the loop with `self.conn == None`
1034
- let mut workers = Vec :: new ( ) ;
1035
- while !state. unfinished . is_empty ( ) || ! workers. is_empty ( ) {
1095
+ let mut workers = Workers :: new ( ) ;
1096
+ while !state. unfinished . is_empty ( ) || workers. has_work ( ) {
1036
1097
// We usually add at least one job here, except if we are out of
1037
1098
// tables to copy. In that case, we go through the `while` loop
1038
1099
// every time one of the tables we are currently copying
1039
1100
// finishes
1040
1101
if let Some ( worker) = self . default_worker ( & mut state, & progress) {
1041
- workers. push ( worker) ;
1102
+ workers. add ( worker) ;
1042
1103
}
1043
1104
loop {
1044
1105
if workers. len ( ) >= self . workers {
@@ -1047,24 +1108,24 @@ impl Connection {
1047
1108
let Some ( worker) = self . extra_worker ( & mut state, & progress) else {
1048
1109
break ;
1049
1110
} ;
1050
- workers. push ( worker) ;
1111
+ workers. add ( worker) ;
1051
1112
}
1052
1113
1053
1114
self . assert_progress ( workers. len ( ) , & state) ?;
1054
- let ( result, _idx, remaining) = select_all ( workers) . await ;
1055
- workers = remaining;
1115
+ let result = workers. select ( ) . await ;
1056
1116
1057
1117
// Analyze `result` and take another trip through the loop if
1058
1118
// everything is ok; wait for pending workers and return if
1059
1119
// there was an error or if copying was cancelled.
1120
+ use WorkerResult as W ;
1060
1121
match result {
1061
- Err ( e) => {
1122
+ W :: Err ( e) => {
1062
1123
// This is a panic in the background task. We need to
1063
1124
// cancel all other tasks and return the error
1064
1125
self . cancel_workers ( progress, workers) . await ;
1065
1126
return Err ( e) ;
1066
1127
}
1067
- Ok ( worker) => {
1128
+ W :: Ok ( worker) => {
1068
1129
// Put the connection back into self.conn so that we can use it
1069
1130
// in the next iteration.
1070
1131
self . conn = Some ( worker. conn ) ;
@@ -1090,6 +1151,10 @@ impl Connection {
1090
1151
}
1091
1152
}
1092
1153
}
1154
+ W :: Wake => {
1155
+ // nothing to do, just try to create more workers by
1156
+ // going through the loop again
1157
+ }
1093
1158
} ;
1094
1159
}
1095
1160
debug_assert ! ( self . conn. is_some( ) ) ;
0 commit comments