@@ -579,8 +579,13 @@ leader(info, {Status, Node, InfoList}, State0)
579579  when  Status  =:=  nodedown  orelse 
580580       Status  =:=  nodeup  -> 
581581    handle_node_status_change (Node , Status , InfoList , ? FUNCTION_NAME , State0 );
582- leader (info , {update_peer , PeerId , Update }, State0 ) -> 
583-     State  =  update_peer (PeerId , Update , State0 ),
582+ leader (info , {unsuspend_peer , PeerId }, State0 ) -> 
583+     State  =  case  ra_server :peer_status (PeerId , State0 # state .server_state ) of 
584+                 suspended  ->
585+                     update_peer (PeerId , #{status  =>  normal }, State0 );
586+                 _  ->
587+                     State0 
588+             end ,
584589    {keep_state , State , []};
585590leader (_ , tick_timeout , State0 ) -> 
586591    {State1 , RpcEffs } =  make_rpcs (State0 ),
@@ -1393,13 +1398,15 @@ handle_effects(RaftState, Effects0, EvtType, State0, Actions0) ->
13931398    {State , lists :reverse (Actions )}.
13941399
13951400handle_effect (_RaftState , {send_rpc , To , Rpc }, _ ,
1396-               # state {conf  =  Conf } =  State0 , Actions ) -> 
1401+               # state {conf  =  Conf ,
1402+                      server_state  =  SS } =  State0 , Actions ) -> 
13971403    %  fully qualified use only so that we can mock it for testing
13981404    %  TODO: review / refactor to remove the mod call here
1405+     PeerStatus  =  ra_server :peer_status (To , SS ),
13991406    case  ? MODULE :send_rpc (To , Rpc , State0 ) of 
14001407        ok  ->
14011408            {State0 , Actions };
1402-         nosuspend  ->
1409+         nosuspend  when   PeerStatus   ==   normal   ->
14031410            % % update peer status to suspended and spawn a process
14041411            % % to send the rpc without nosuspend so that it will block until
14051412            % % the data can get through
@@ -1410,11 +1417,13 @@ handle_effect(_RaftState, {send_rpc, To, Rpc}, _,
14101417                                 % % the peer status back to normal
14111418                                 ok  =  gen_statem :cast (To , Rpc ),
14121419                                 incr_counter (Conf , ? C_RA_SRV_MSGS_SENT , 1 ),
1413-                                  Self  !  {update_peer , To , #{ status   =>   normal } }
1420+                                  Self  !  {unsuspend_peer , To }
14141421                         end ),
1415-             ? DEBUG (" ~ts : temporarily suspending peer ~w  due to full distribution buffer"  ,
1416-                    [log_id (State0 ), To ]),
1422+             %   ?DEBUG("~ts: temporarily suspending peer ~w due to full distribution buffer ~W ",
1423+             %          [log_id(State0), To, Rpc, 5 ]),
14171424            {update_peer (To , #{status  =>  suspended }, State0 ), Actions };
1425+         nosuspend  ->
1426+             {State0 , Actions };
14181427        noconnect  ->
14191428            % % for noconnects just allow it to pipeline and catch up later
14201429            {State0 , Actions }
@@ -1976,6 +1985,8 @@ send_snapshots(Id, Term, {_, ToNode} = To, ChunkSize,
19761985            Result  =  read_chunks_and_send_rpc (RPC , To , ReadState , 1 ,
19771986                                              ChunkSize , InstallTimeout ,
19781987                                              SnapState ),
1988+             ? DEBUG (" ~ts : sending snapshot to ~w  completed"  ,
1989+                    [LogId , To ]),
19791990            ok  =  gen_statem :cast (Id , {To , Result })
19801991    end .
19811992
0 commit comments