Skip to content

Commit b18b42d

Browse files
committed
Add support + mission for new leader election algorithm
Part of stellar/stellar-core#4387 This change adds support for testing the new leader election algorithm by generating configs that make use of auto quorum set configuration where possible. In doing so, it switches many tests over to auto quorum set configuration. This change also adds a new set of missions that blend nodes running the old and new leader election algorithm to assess the impact of nodes using these different algorithms simultaneously. The good news is that in running the test I did not see much increase in timeouts with either majority (no ledger had more than 1 timeout).
1 parent 6c1084c commit b18b42d

8 files changed

+232
-27
lines changed

doc/missions.md

+9
Original file line numberDiff line numberDiff line change
@@ -146,3 +146,12 @@ Run a network with a mix of fast and slow nodes. This mission allows to test int
146146
## MissionMaxTPSMixed
147147

148148
Stress test a network of simulated Tier1 topology with a mix of classic and soroban traffic and report maximum achieved throughput.
149+
150+
## MissionMixedNominationLeaderElectionWithOldMajority
151+
152+
Run a network with a mix of nodes running the old and new nomination leader election algorithms. Contains a majority of nodes running the old algorithm.
153+
154+
155+
## MissionMixedNominationLeaderElectionWithNewMajority
156+
157+
Run a network with a mix of nodes running the old and new nomination leader election algorithms. Contains a majority of nodes running the new algorithm.

src/FSLibrary/FSLibrary.fsproj

+1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
<Compile Include="MissionHistoryTestnetPerformance.fs" />
5757
<Compile Include="MissionMixedImageLoadGeneration.fs" />
5858
<Compile Include="MissionMixedImageNetworkSurvey.fs" />
59+
<Compile Include="MissionMixedNominationLeaderElection.fs" />
5960
<Compile Include="MissionVersionMixConsensus.fs" />
6061
<Compile Include="MissionSorobanLoadGeneration.fs" />
6162
<Compile Include="MissionSorobanConfigUpgrades.fs" />
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// Copyright 2024 Stellar Development Foundation and contributors. Licensed
2+
// under the Apache License, Version 2.0. See the COPYING file at the root
3+
// of this distribution or at http://www.apache.org/licenses/LICENSE-2.0
4+
5+
// This mission runs a network with a mix of nodes running the old and new
6+
// nomination leader election algorithms.
7+
8+
module MissionMixedNominationLeaderElection
9+
10+
open StellarCoreHTTP
11+
open StellarCorePeer
12+
open StellarCoreSet
13+
open StellarFormation
14+
open StellarMissionContext
15+
open StellarStatefulSets
16+
open StellarSupercluster
17+
18+
let mixedNominationAlgorithm (oldCount: int) (context: MissionContext) =
19+
let oldNodeCount = oldCount
20+
let newNodeCount = 3 - oldCount
21+
22+
let oldName = "core-old-leader-election"
23+
let newName = "core-new-leader-election"
24+
25+
let oldCoreSet =
26+
MakeLiveCoreSet
27+
oldName
28+
{ CoreSetOptions.GetDefault context.image with
29+
nodeCount = oldNodeCount
30+
invariantChecks = AllInvariantsExceptBucketConsistencyChecks
31+
accelerateTime = false
32+
dumpDatabase = false
33+
forceOldStyleLeaderElection = true }
34+
35+
let newCoreSet =
36+
MakeLiveCoreSet
37+
newName
38+
{ CoreSetOptions.GetDefault context.image with
39+
nodeCount = newNodeCount
40+
invariantChecks = AllInvariantsExceptBucketConsistencyChecks
41+
accelerateTime = false
42+
dumpDatabase = false
43+
requireAutoQset = true }
44+
45+
let coreSets = [ oldCoreSet; newCoreSet ]
46+
47+
context.Execute
48+
coreSets
49+
None
50+
(fun (formation: StellarFormation) ->
51+
formation.WaitUntilSynced coreSets
52+
53+
let peer = formation.NetworkCfg.GetPeer oldCoreSet 0
54+
peer.WaitForFewLedgers(3)
55+
formation.UpgradeProtocolToLatest coreSets
56+
peer.WaitForLatestProtocol()
57+
peer.WaitForFewLedgers(60) // About 5 minutes
58+
59+
// Check everything is still in sync
60+
formation.CheckNoErrorsAndPairwiseConsistency()
61+
formation.EnsureAllNodesInSync coreSets)
62+
63+
let mixedNominationLeaderElectionWithOldMajority (context: MissionContext) = mixedNominationAlgorithm 2 context
64+
65+
let mixedNominationLeaderElectionWithNewMajority (context: MissionContext) = mixedNominationAlgorithm 1 context

src/FSLibrary/MissionProtocolUpgradeWithLoad.fs

+5-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@ let protocolUpgradeWithLoad (context: MissionContext) =
1818
"core"
1919
{ CoreSetOptions.GetDefault context.image with
2020
invariantChecks = AllInvariantsExceptBucketConsistencyChecks
21-
dumpDatabase = false }
21+
dumpDatabase = false
22+
// Set `requireAutoQset` to `true` as an extra check that this
23+
// mission uses the application-specific nomination leader
24+
// election protocol.
25+
requireAutoQset = true }
2226

2327
let context =
2428
{ context.WithSmallLoadgenOptions with

src/FSLibrary/StellarCoreCfg.fs

+114-18
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55
module StellarCoreCfg
66

77
open FSharp.Data
8+
open Logging
89
open Nett
10+
open System
11+
open System.Collections
912
open System.Text.RegularExpressions
1013
open StellarCoreSet
1114
open StellarNetworkCfg
@@ -141,6 +144,7 @@ type StellarCoreCfg =
141144
networkPassphrase: NetworkPassphrase
142145
nodeSeed: KeyPair
143146
nodeIsValidator: bool
147+
homeDomain: string option
144148
runStandalone: bool
145149
image: string
146150
preferredPeers: PeerDnsName list
@@ -156,6 +160,7 @@ type StellarCoreCfg =
156160
unsafeQuorum: bool
157161
failureSafety: int
158162
quorumSet: QuorumSet
163+
forceOldStyleLeaderElection: bool
159164
historyNodes: Map<PeerShortName, PeerDnsName>
160165
historyGetCommands: Map<PeerShortName, string>
161166
localHistory: bool
@@ -208,6 +213,10 @@ type StellarCoreCfg =
208213
t.Add("COMMANDS", logLevelCommands) |> ignore
209214
t.Add("CATCHUP_COMPLETE", self.catchupMode = CatchupComplete) |> ignore
210215

216+
match self.homeDomain with
217+
| None -> ()
218+
| Some hd -> t.Add("NODE_HOME_DOMAIN", hd) |> ignore
219+
211220
match self.network.missionContext.peerReadingCapacity, self.network.missionContext.peerFloodCapacity with
212221
| None, None -> ()
213222
| Some read, Some flood ->
@@ -306,6 +315,9 @@ type StellarCoreCfg =
306315
t.Add("QUORUM_INTERSECTION_CHECKER", false) |> ignore
307316
t.Add("MANUAL_CLOSE", self.manualClose) |> ignore
308317

318+
if self.forceOldStyleLeaderElection then
319+
t.Add("FORCE_OLD_STYLE_LEADER_ELECTION", true) |> ignore
320+
309321
let invList =
310322
match self.invariantChecks with
311323
| AllInvariants -> [ ".*" ]
@@ -321,7 +333,7 @@ type StellarCoreCfg =
321333
| Some duration -> t.Add("ARTIFICIALLY_SET_SURVEY_PHASE_DURATION_FOR_TESTING", duration) |> ignore
322334

323335
// Add tables (and subtables, recursively) for qsets.
324-
let rec addQsetAt (label: string) (qs: QuorumSet) =
336+
let rec addExplicitQsetAt (label: string) (qs: ExplicitQuorumSet) =
325337
let validators : string array =
326338
Map.toArray qs.validators
327339
|> Array.map (fun (n: PeerShortName, k: KeyPair) -> sprintf "%s %s" k.Address n.StringName)
@@ -333,11 +345,54 @@ type StellarCoreCfg =
333345
| None -> ()
334346
| Some (pct) -> innerTab.Add("THRESHOLD_PERCENT", pct) |> ignore
335347

336-
Array.iteri (fun (i: int) (qs: QuorumSet) -> addQsetAt (sprintf "%s.sub%d" label i) qs) qs.innerQuorumSets
348+
Array.iteri
349+
(fun (i: int) (qs: ExplicitQuorumSet) -> addExplicitQsetAt (sprintf "%s.sub%d" label i) qs)
350+
qs.innerQuorumSets
351+
352+
let homeDomainToTable (homeDomain: HomeDomain) =
353+
let ret = Toml.Create()
354+
ret.Add("HOME_DOMAIN", homeDomain.name) |> ignore
355+
356+
ret.Add("QUALITY", homeDomain.quality.ToString() |> String.map Char.ToUpper)
357+
|> ignore
358+
359+
ret
360+
361+
let autoValidatorToTable (autoValidator: AutoValidator) =
362+
let ret = Toml.Create()
363+
ret.Add("NAME", autoValidator.name.StringName) |> ignore
364+
ret.Add("HOME_DOMAIN", autoValidator.homeDomain) |> ignore
365+
ret.Add("PUBLIC_KEY", autoValidator.keys.Address) |> ignore
366+
367+
match Map.tryFind autoValidator.name self.historyGetCommands with
368+
| Some cmd -> ret.Add("HISTORY", cmd) |> ignore
369+
| None ->
370+
match Map.tryFind autoValidator.name self.historyNodes with
371+
| Some dnsName -> ret.Add("HISTORY", Map.find "get" (remoteHist dnsName)) |> ignore
372+
| None -> ()
337373

338-
addQsetAt "QUORUM_SET" self.quorumSet
374+
ret
339375

340376
let localTab = t.Add("HISTORY", Toml.Create(), TomlObjectFactory.RequireTomlObject()).Added
377+
378+
match self.quorumSet with
379+
| ExplicitQuorumSet qs ->
380+
addExplicitQsetAt "QUORUM_SET" qs
381+
382+
for historyNode in self.historyNodes do
383+
localTab.Add(historyNode.Key.StringName, remoteHist historyNode.Value) |> ignore
384+
385+
for historyGetCommand in self.historyGetCommands do
386+
localTab.Add(historyGetCommand.Key.StringName, getHist historyGetCommand.Value)
387+
|> ignore
388+
| AutoQuorumSet qs ->
389+
let homeDomainsTab = t.Add("HOME_DOMAINS", ([]: IDictionary list)).Added
390+
List.iter (fun hd -> homeDomainsTab.Add(homeDomainToTable hd) |> ignore) qs.homeDomains
391+
let validatorsTab = t.Add("VALIDATORS", ([]: IDictionary list)).Added
392+
// Filter out local node
393+
let validators = List.filter (fun (v: AutoValidator) -> v.keys <> self.nodeSeed) qs.validators
394+
List.iter (fun v -> validatorsTab.Add(autoValidatorToTable v) |> ignore) validators
395+
341396
// When simulateApplyWeight = Some _, stellar-core sets MODE_STORES_HISTORY
342397
// which is used for simulations that only test consensus.
343398
// In such cases, we should not pass put and mkdir commands.
@@ -351,12 +406,6 @@ type StellarCoreCfg =
351406
)
352407
|> ignore
353408

354-
for historyNode in self.historyNodes do
355-
localTab.Add(historyNode.Key.StringName, remoteHist historyNode.Value) |> ignore
356-
357-
for historyGetCommand in self.historyGetCommands do
358-
localTab.Add(historyGetCommand.Key.StringName, getHist historyGetCommand.Value)
359-
|> ignore
360409

361410
t
362411

@@ -425,17 +474,60 @@ type NetworkCfg with
425474
self.CoreSetList |> List.map processCoreSet |> List.concat |> Map.ofList
426475

427476
member self.QuorumSet(o: CoreSetOptions) : QuorumSet =
428-
let ofNameKeyList (nks: (PeerShortName * KeyPair) array) (threshold: int option) : QuorumSet =
429-
{ thresholdPercent = threshold
430-
validators = Map.ofArray nks
431-
innerQuorumSets = [||] }
477+
let toExplicitQSet (nks: (PeerShortName * KeyPair) array) (threshold: int option) : QuorumSet =
478+
LogInfo "Using explicit quorum set configuration"
479+
480+
ExplicitQuorumSet
481+
{ thresholdPercent = threshold
482+
validators = Map.ofArray nks
483+
innerQuorumSets = [||] }
484+
485+
let toAutoQSet (nks: (PeerShortName * KeyPair) list) (homeDomain: string) =
486+
LogInfo "Using auto quorum set configuration"
487+
let homeDomains = [ { name = homeDomain; quality = High } ]
488+
489+
let validators =
490+
List.map (fun (n: PeerShortName, k) -> { name = n; homeDomain = homeDomain; keys = k }) nks
491+
492+
AutoQuorumSet { homeDomains = homeDomains; validators = validators }
493+
494+
// Generate a QuorumSet from an array of (PeerShortName, KeyPair) pairs.
495+
// Produces a simple flat qset of nodes. Uses auto quorum set
496+
// configuration if possible.
497+
let simpleQuorum (nks: (PeerShortName * KeyPair) array) =
498+
match o.homeDomain with
499+
| Some hd ->
500+
if nks.Length >= 3 then
501+
// There are enough validators to use auto quorum set config
502+
toAutoQSet (List.ofArray nks) hd
503+
else if o.requireAutoQset then
504+
failwith "Auto quorum set configuration requires at least 3 validators"
505+
else
506+
// Fall back on manual quorum set configuration
507+
toExplicitQSet nks None
508+
| None ->
509+
if o.requireAutoQset then
510+
failwith "Auto quorum set configuration requires a home domain"
511+
else
512+
toExplicitQSet nks None
513+
514+
let checkAutoQSetIncompatability (mode: string) =
515+
if o.requireAutoQset then
516+
failwithf "Auto quorum set configuration is incompatible with %s" mode
517+
else
518+
()
432519

433520
match o.quorumSet with
434-
| AllPeersQuorum -> ofNameKeyList (self.GetNameKeyListAll()) None
435-
| CoreSetQuorum (ns) -> ofNameKeyList (self.GetNameKeyList [ ns ]) None
436-
| CoreSetQuorumList (q) -> ofNameKeyList (self.GetNameKeyList q) None
437-
| CoreSetQuorumListWithThreshold (q, t) -> ofNameKeyList (self.GetNameKeyList q) (Some(t))
438-
| ExplicitQuorum (e) -> e
521+
| AllPeersQuorum -> simpleQuorum (self.GetNameKeyListAll())
522+
| CoreSetQuorum (ns) -> simpleQuorum (self.GetNameKeyList [ ns ])
523+
| CoreSetQuorumList (q) -> simpleQuorum (self.GetNameKeyList q)
524+
| CoreSetQuorumListWithThreshold (q, t) ->
525+
checkAutoQSetIncompatability "CoreSetQuorumListWithThreshold"
526+
toExplicitQSet (self.GetNameKeyList q) (Some(t))
527+
| ExplicitQuorum (e) ->
528+
checkAutoQSetIncompatability "ExplicitQuorum"
529+
ExplicitQuorumSet e
530+
| AutoQuorum q -> AutoQuorumSet q
439531

440532
member self.HistoryNodes(o: CoreSetOptions) : Map<PeerShortName, PeerDnsName> =
441533
match o.historyNodes, o.quorumSet with
@@ -471,6 +563,7 @@ type NetworkCfg with
471563
networkPassphrase = self.networkPassphrase
472564
nodeSeed = KeyPair.Random()
473565
nodeIsValidator = false
566+
homeDomain = None
474567
runStandalone = false
475568
image = opts.image
476569
preferredPeers = self.PreferredPeers opts
@@ -486,6 +579,7 @@ type NetworkCfg with
486579
unsafeQuorum = opts.unsafeQuorum
487580
failureSafety = 0
488581
quorumSet = self.QuorumSet opts
582+
forceOldStyleLeaderElection = opts.forceOldStyleLeaderElection
489583
historyNodes = self.HistoryNodes opts
490584
historyGetCommands = opts.historyGetCommands
491585
localHistory = opts.localHistory
@@ -503,6 +597,7 @@ type NetworkCfg with
503597
networkPassphrase = self.networkPassphrase
504598
nodeSeed = c.keys.[i]
505599
nodeIsValidator = c.options.validate
600+
homeDomain = c.options.homeDomain
506601
runStandalone = false
507602
image = c.options.image
508603
preferredPeers =
@@ -524,6 +619,7 @@ type NetworkCfg with
524619
unsafeQuorum = c.options.unsafeQuorum
525620
failureSafety = 0
526621
quorumSet = self.QuorumSet c.options
622+
forceOldStyleLeaderElection = c.options.forceOldStyleLeaderElection
527623
historyNodes = self.HistoryNodes c.options
528624
historyGetCommands = c.options.historyGetCommands
529625
localHistory = c.options.localHistory

0 commit comments

Comments
 (0)