Skip to content

Commit e3315fa

Browse files
author
Antoni Nowakowski
committed
Merge branch 'main' into tendermint-main-fix-byzzfuzz-partitions
2 parents 9d61ae4 + 386ebe4 commit e3315fa

File tree

8 files changed

+84
-13
lines changed

8 files changed

+84
-13
lines changed

simulator/src/main/java/byzzbench/simulator/BaseScenario.java

+9
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,15 @@ public synchronized Node getNode(String nodeId) {
166166
@Override
167167
public final void setupScenario() {
168168
this.setup();
169+
170+
// sample f replicas to be faulty at start
171+
List<String> replicaIds = new ArrayList<>(this.getReplicas().keySet());
172+
Collections.shuffle(replicaIds);
173+
int f = this.maxFaultyReplicas();
174+
for (int i = 0; i < f; i++) {
175+
this.markReplicaFaulty(replicaIds.get(i));
176+
}
177+
169178
this.getClients().values().forEach(Client::initialize);
170179
this.getNodes().values().forEach(Node::initialize);
171180
this.scheduler.initializeScenario(this);

simulator/src/main/java/byzzbench/simulator/Timekeeper.java

+5
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,11 @@ public void onEventDropped(Event event) {
7373
// nothing to do
7474
}
7575

76+
@Override
77+
public void onEventRequeued(Event event) {
78+
// nothing to do
79+
}
80+
7681
@Override
7782
public void onEventDelivered(Event event) {
7883
// check if it was a timeout

simulator/src/main/java/byzzbench/simulator/faults/factories/ByzzFuzzScenarioFaultFactory.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ public List<Fault> generateFaults(FaultContext input) {
3737
int d = scheduler.getNumRoundsWithNetworkFaults();
3838
int r = scheduler.getNumRoundsWithFaults();
3939
Set<String> replicaIds = scenario.getReplicas().keySet();
40+
Set<String> faultyReplicaIds = scenario.getFaultyReplicaIds();
4041

4142
// Create network faults
4243
for (int i = 1; i <= d; i++) {
@@ -49,7 +50,7 @@ public List<Fault> generateFaults(FaultContext input) {
4950
// Create process faults
5051
for (int i = 1; i <= c; i++) {
5152
int round = rand.nextInt(r) + 1;
52-
String sender = replicaIds.stream().skip(rand.nextInt(replicaIds.size())).findFirst().orElseThrow();
53+
String sender = input.getScenario().getFaultyReplicaIds().stream().skip(rand.nextInt(faultyReplicaIds.size())).findFirst().orElseThrow();
5354
Set<String> recipientIds = SetSubsets.getRandomNonEmptySubset(replicaIds);
5455

5556
// generate process fault

simulator/src/main/java/byzzbench/simulator/scheduler/BaseScheduler.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ public List<ClientRequestEvent> getQueuedClientRequestEvents(Scenario scenario)
118118
* @param messageEvents The list of queued message events
119119
* @return The next message event
120120
*/
121-
public Event getNextMessageEvent(List<Event> messageEvents) {
121+
public <T extends Event> T getNextMessageEvent(List<T> messageEvents) {
122122
switch (config.getScheduler().getExecutionMode()) {
123123
case SYNC -> {
124124
return messageEvents.stream().min(Comparator.comparing(Event::getEventId)).orElseThrow();

simulator/src/main/java/byzzbench/simulator/scheduler/RandomScheduler.java

+16-10
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import java.util.List;
1717
import java.util.Optional;
1818
import java.util.Random;
19-
import java.util.stream.Collectors;
19+
import java.util.SortedSet;
2020

2121
/**
2222
* A scheduler that randomly selects events to deliver, drop, mutate or timeout.
@@ -26,11 +26,14 @@
2626
public class RandomScheduler extends BaseScheduler {
2727
private final Random random = new Random();
2828

29-
3029
public RandomScheduler(ByzzBenchConfig config, MessageMutatorService messageMutatorService) {
3130
super(config, messageMutatorService);
3231
}
3332

33+
public <T> T getRandomElement(List<T> list) {
34+
return list.get(random.nextInt(list.size()));
35+
}
36+
3437
@Override
3538
public String getId() {
3639
return "Random";
@@ -53,21 +56,24 @@ public synchronized Optional<EventDecision> scheduleNext(Scenario scenario) thro
5356
}
5457

5558
List<TimeoutEvent> timeoutEvents = this.getQueuedTimeoutEvents(scenario);
56-
List<Event> clientRequestEvents = availableEvents.stream().filter(ClientRequestEvent.class::isInstance).collect(Collectors.toList());
57-
List<Event> messageEvents = availableEvents.stream().filter(MessageEvent.class::isInstance).collect(Collectors.toList());
59+
List<Event> clientRequestEvents = availableEvents.stream().filter(ClientRequestEvent.class::isInstance).toList();
60+
List<MessageEvent> messageEvents = availableEvents.stream().filter(MessageEvent.class::isInstance).map(MessageEvent.class::cast).toList();
61+
62+
SortedSet<String> faultyReplicaIds = scenario.getFaultyReplicaIds();
63+
List<MessageEvent> mutateableMessageEvents = messageEvents.stream().filter(msg -> faultyReplicaIds.contains(msg.getSenderId())).toList();
5864

5965
int timeoutWeight = timeoutEvents.size() * this.deliverTimeoutWeight();
6066
int deliverMessageWeight = messageEvents.size() * this.deliverMessageWeight();
6167
int deliverClientRequestWeight = clientRequestEvents.size() * this.deliverClientRequestWeight();
6268
int dropMessageWeight = (messageEvents.size() * this.dropMessageWeight(scenario));
63-
int mutateMessageWeight = (messageEvents.size() * this.mutateMessageWeight(scenario));
69+
int mutateMessageWeight = (mutateableMessageEvents.size() * this.mutateMessageWeight(scenario));
6470
int dieRoll = random.nextInt(timeoutWeight + deliverMessageWeight
6571
+ deliverClientRequestWeight + dropMessageWeight + mutateMessageWeight);
6672

6773
// check if we should trigger a timeout
6874
dieRoll -= timeoutWeight;
6975
if (dieRoll < 0) {
70-
Event timeout = timeoutEvents.get(random.nextInt(timeoutEvents.size()));
76+
Event timeout = getRandomElement(timeoutEvents);
7177
scenario.getTransport().deliverEvent(timeout.getEventId());
7278
EventDecision decision = new EventDecision(EventDecision.DecisionType.DELIVERED, timeout.getEventId());
7379
return Optional.of(decision);
@@ -85,7 +91,7 @@ public synchronized Optional<EventDecision> scheduleNext(Scenario scenario) thro
8591
// check if we should target delivering a request from a client to a replica
8692
dieRoll -= deliverClientRequestWeight;
8793
if (dieRoll < 0) {
88-
Event request = clientRequestEvents.get(random.nextInt(clientRequestEvents.size()));
94+
Event request = getRandomElement(clientRequestEvents);
8995
scenario.getTransport().deliverEvent(request.getEventId());
9096
EventDecision decision = new EventDecision(EventDecision.DecisionType.DELIVERED, request.getEventId());
9197
return Optional.of(decision);
@@ -94,7 +100,7 @@ public synchronized Optional<EventDecision> scheduleNext(Scenario scenario) thro
94100
// check if we should drop a message sent between nodes
95101
dieRoll -= dropMessageWeight;
96102
if (dieRoll < 0) {
97-
Event message = messageEvents.get(random.nextInt(messageEvents.size()));
103+
Event message = getRandomElement(messageEvents);
98104
scenario.getTransport().dropEvent(message.getEventId());
99105
if(!(message instanceof GossipMessage)) {
100106
EventDecision decision = new EventDecision(EventDecision.DecisionType.DROPPED, message.getEventId());
@@ -105,7 +111,7 @@ public synchronized Optional<EventDecision> scheduleNext(Scenario scenario) thro
105111
// check if we should mutate-and-deliver a message sent between nodes
106112
dieRoll -= mutateMessageWeight;
107113
if (dieRoll < 0) {
108-
Event message = messageEvents.get(random.nextInt(messageEvents.size()));
114+
Event message = getRandomElement(mutateableMessageEvents);
109115
List<MessageMutationFault> mutators = this.getMessageMutatorService().getMutatorsForEvent(message);
110116

111117
if (mutators.isEmpty()) {
@@ -117,7 +123,7 @@ public synchronized Optional<EventDecision> scheduleNext(Scenario scenario) thro
117123
}
118124
scenario.getTransport().applyMutation(
119125
message.getEventId(),
120-
mutators.get(random.nextInt(mutators.size())));
126+
getRandomElement(mutators));
121127
scenario.getTransport().deliverEvent(message.getEventId());
122128

123129
EventDecision decision = new EventDecision(EventDecision.DecisionType.MUTATED_AND_DELIVERED, message.getEventId());

simulator/src/main/java/byzzbench/simulator/scheduler/twins/TwinsScheduler.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public String getId() {
4747
@Override
4848
public void initializeScenario(Scenario scenario) {
4949
// Get the IDs of the replicas
50-
List<String> replicaIds = scenario.getReplicas().keySet().stream().sorted().toList();
50+
List<String> replicaIds = scenario.getFaultyReplicaIds().stream().toList();
5151

5252
if (replicaIds.size() < numReplicas) {
5353
throw new IllegalArgumentException("Not enough replicas to create " + numReplicas + " twins");

simulator/src/main/java/byzzbench/simulator/transport/Transport.java

+43
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ public class Transport {
7575
*/
7676
@JsonIgnore
7777
private final List<TransportObserver> observers = new ArrayList<>();
78+
@Getter
79+
private boolean isGlobalStabilizationTime = false;
7880

7981
/**
8082
* Adds an observer to the transport layer.
@@ -276,13 +278,19 @@ public synchronized void deliverEvent(long eventId) throws Exception {
276278
* @param eventId The ID of the message to drop.
277279
*/
278280
public synchronized void dropEvent(long eventId) {
281+
// Check if it is GST - no more dropping
282+
if (this.isGlobalStabilizationTime) {
283+
throw new IllegalStateException("Cannot drop events during GST");
284+
}
285+
279286
// check if event is a message
280287
Event e = events.get(eventId);
281288

282289
if (e.getStatus() != Event.Status.QUEUED) {
283290
return;
284291
// throw new IllegalArgumentException("Event not found or not in QUEUED state");
285292
}
293+
286294
e.setStatus(Event.Status.DROPPED);
287295
this.observers.forEach(o -> o.onEventDropped(e));
288296
log.info("Dropped: " + e);
@@ -329,6 +337,13 @@ public synchronized void applyMutation(long eventId, Fault fault) {
329337
"Event %d is not a message - cannot mutate it.", eventId));
330338
}
331339

340+
// check if sender is faulty
341+
if (!this.scenario.isFaultyReplica(m.getSenderId())) {
342+
throw new IllegalArgumentException(
343+
String.format("Cannot mutate message: sender %s is not marked as faulty", m.getSenderId())
344+
);
345+
}
346+
332347
// create input for the fault
333348
FaultContext input = new FaultContext(this.scenario, e);
334349

@@ -494,4 +509,32 @@ public synchronized List<Fault> getEnabledNetworkFaults() {
494509
public synchronized Fault getNetworkFault(String faultId) {
495510
return this.networkFaults.get(faultId);
496511
}
512+
513+
/**
514+
* Simulates GST event, according to the partial-synchrony model:
515+
* <ul>
516+
* <li>All dropped messages are re-queued</li>
517+
* <li>Prevents further dropping of messages</li>
518+
* <li>All network partitions are healed</li>
519+
* <li>Prevents further network partitions</li>
520+
* </ul>
521+
*/
522+
public void globalStabilizationTime() {
523+
this.isGlobalStabilizationTime = true;
524+
525+
// re-queue all dropped messages
526+
this.events.values().stream()
527+
.filter(e -> e.getStatus() == Event.Status.DROPPED)
528+
.forEach(e -> {
529+
e.setStatus(Event.Status.QUEUED);
530+
this.observers.forEach(o -> o.onEventRequeued(e));
531+
});
532+
533+
// clear all network faults
534+
// XXX: Is this the right thing to do?
535+
this.networkFaults.clear();
536+
537+
// heal all partitions
538+
this.router.resetPartitions();
539+
}
497540
}

simulator/src/main/java/byzzbench/simulator/transport/TransportObserver.java

+7
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@ public interface TransportObserver {
1717
*/
1818
void onEventDropped(Event event);
1919

20+
/**
21+
* Called when the status of an event changes from {@link Event.Status#DROPPED} to {@link Event.Status#QUEUED}.
22+
*
23+
* @param event The event that was re-queued after being previously dropped.
24+
*/
25+
void onEventRequeued(Event event);
26+
2027
/**
2128
* Called when the status of an event changes to {@link Event.Status#DELIVERED}.
2229
*

0 commit comments

Comments
 (0)