Skip to content

Commit 666bb5e

Browse files
committed
Merge remote-tracking branch 'origin/main' into hBFT-fixed
2 parents 31b9d97 + 81fba32 commit 666bb5e

File tree

6 files changed

+77
-12
lines changed

6 files changed

+77
-12
lines changed

simulator/src/main/java/byzzbench/simulator/BaseScenario.java

+10-1
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,16 @@ public synchronized Node getNode(String nodeId) {
179179
@Override
180180
public final void setupScenario() {
181181
this.setup();
182-
//this.getClients().values().forEach(Client::initialize);
182+
183+
// sample f replicas to be faulty at start
184+
List<String> replicaIds = new ArrayList<>(this.getReplicas().keySet());
185+
Collections.shuffle(replicaIds);
186+
int f = this.maxFaultyReplicas();
187+
for (int i = 0; i < f; i++) {
188+
this.markReplicaFaulty(replicaIds.get(i));
189+
}
190+
191+
this.getClients().values().forEach(Client::initialize);
183192
this.getNodes().values().forEach(Node::initialize);
184193
this.scheduler.initializeScenario(this);
185194
}

simulator/src/main/java/byzzbench/simulator/Timekeeper.java

+5
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,11 @@ public void onEventDropped(Event event) {
7373
// nothing to do
7474
}
7575

76+
@Override
77+
public void onEventRequeued(Event event) {
78+
// nothing to do
79+
}
80+
7681
@Override
7782
public void onEventDelivered(Event event) {
7883
// check if it was a timeout

simulator/src/main/java/byzzbench/simulator/faults/factories/ByzzFuzzScenarioFaultFactory.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ public List<Fault> generateFaults(FaultContext input) {
3737
int d = scheduler.getNumRoundsWithNetworkFaults();
3838
int r = scheduler.getNumRoundsWithFaults();
3939
Set<String> replicaIds = scenario.getReplicas().keySet();
40+
Set<String> faultyReplicaIds = scenario.getFaultyReplicaIds();
4041

4142
// Create network faults
4243
for (int i = 1; i <= d; i++) {
@@ -49,7 +50,7 @@ public List<Fault> generateFaults(FaultContext input) {
4950
// Create process faults
5051
for (int i = 1; i <= c; i++) {
5152
int round = rand.nextInt(r) + 1;
52-
String sender = replicaIds.stream().skip(rand.nextInt(replicaIds.size())).findFirst().orElseThrow();
53+
String sender = input.getScenario().getFaultyReplicaIds().stream().skip(rand.nextInt(faultyReplicaIds.size())).findFirst().orElseThrow();
5354
Set<String> recipientIds = SetSubsets.getRandomNonEmptySubset(replicaIds);
5455

5556
// generate process fault

simulator/src/main/java/byzzbench/simulator/transport/Transport.java

+43
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ public class Transport {
7777
*/
7878
@JsonIgnore
7979
private final List<TransportObserver> observers = new ArrayList<>();
80+
@Getter
81+
private boolean isGlobalStabilizationTime = false;
8082

8183
/**
8284
* Adds an observer to the transport layer.
@@ -350,6 +352,11 @@ public synchronized void deliverEvent(long eventId) throws Exception {
350352
* @param eventId The ID of the message to drop.
351353
*/
352354
public synchronized void dropEvent(long eventId) {
355+
// Check if it is GST - no more dropping
356+
if (this.isGlobalStabilizationTime) {
357+
throw new IllegalStateException("Cannot drop events during GST");
358+
}
359+
353360
// check if event is a message
354361
Event e = events.get(eventId);
355362
if (e instanceof TimeoutEvent) {
@@ -359,6 +366,7 @@ public synchronized void dropEvent(long eventId) {
359366
if (e.getStatus() != Event.Status.QUEUED) {
360367
throw new IllegalArgumentException("Event not found or not in QUEUED state");
361368
}
369+
362370
e.setStatus(Event.Status.DROPPED);
363371
this.observers.forEach(o -> o.onEventDropped(e));
364372
//log.info("Dropped: " + e);
@@ -404,6 +412,13 @@ public synchronized void applyMutation(long eventId, Fault fault) {
404412
"Event %d is not a message - cannot mutate it.", eventId));
405413
}
406414

415+
// check if sender is faulty
416+
if (!this.scenario.isFaultyReplica(m.getSenderId())) {
417+
throw new IllegalArgumentException(
418+
String.format("Cannot mutate message: sender %s is not marked as faulty", m.getSenderId())
419+
);
420+
}
421+
407422
// create input for the fault
408423
FaultContext input = new FaultContext(this.scenario, e);
409424

@@ -647,4 +662,32 @@ public synchronized List<Fault> getEnabledNetworkFaults() {
647662
public synchronized Fault getNetworkFault(String faultId) {
648663
return this.networkFaults.get(faultId);
649664
}
665+
666+
/**
667+
* Simulates GST event, according to the partial-synchrony model:
668+
* <ul>
669+
* <li>All dropped messages are re-queued</li>
670+
* <li>Prevents further dropping of messages</li>
671+
* <li>All network partitions are healed</li>
672+
* <li>Prevents further network partitions</li>
673+
* </ul>
674+
*/
675+
public void globalStabilizationTime() {
676+
this.isGlobalStabilizationTime = true;
677+
678+
// re-queue all dropped messages
679+
this.events.values().stream()
680+
.filter(e -> e.getStatus() == Event.Status.DROPPED)
681+
.forEach(e -> {
682+
e.setStatus(Event.Status.QUEUED);
683+
this.observers.forEach(o -> o.onEventRequeued(e));
684+
});
685+
686+
// clear all network faults
687+
// XXX: Is this the right thing to do?
688+
this.networkFaults.clear();
689+
690+
// heal all partitions
691+
this.router.resetPartitions();
692+
}
650693
}

simulator/src/main/java/byzzbench/simulator/transport/TransportObserver.java

+7
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@ public interface TransportObserver {
1717
*/
1818
void onEventDropped(Event event);
1919

20+
/**
21+
* Called when the status of an event changes from {@link Event.Status#DROPPED} to {@link Event.Status#QUEUED}.
22+
*
23+
* @param event The event that was re-queued after being previously dropped.
24+
*/
25+
void onEventRequeued(Event event);
26+
2027
/**
2128
* Called when the status of an event changes to {@link Event.Status#DELIVERED}.
2229
*

simulator/src/main/resources/application.yml

+10-10
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,20 @@ byzzbench:
2222
outputSchedules: buggy # which schedules to write to file? one of 'all', 'buggy' or 'none'
2323

2424
scheduler:
25-
id: "byzzfuzz" # The ID of the scheduler to use
25+
id: "random" # The ID of the scheduler to use
2626
executionMode: sync # async (default, any message delivered) or sync (communication-closure hypothesis, FIFO)
27-
maxDropMessages: 0 # Maximum number of messages to drop per scenario
28-
maxMutateMessages: 0 # Maximum number of messages to mutate per scenario
29-
deliverTimeoutWeight: 1 # The weight for scheduler to trigger a timeout
27+
maxDropMessages: 1 # Maximum number of messages to drop per scenario
28+
maxMutateMessages: 1 # Maximum number of messages to mutate per scenario
29+
deliverTimeoutWeight: 25 # The weight for scheduler to trigger a timeout
3030
deliverMessageWeight: 99 # The weight for scheduler to deliver a message
3131
deliverClientRequestWeight: 99 # The weight for scheduler to deliver a client request to a replica
32-
dropMessageWeight: 100 # The weight for scheduler to drop a message
33-
mutateMessageWeight: 100 # The weight for scheduler to mutate a message
32+
dropMessageWeight: 0 # The weight for scheduler to drop a message
33+
mutateMessageWeight: 50 # The weight for scheduler to mutate a message
3434
params: # additional parameters for the scheduler
3535
# ByzzFuzz
3636
numRoundsWithProcessFaults: 1
37-
numRoundsWithNetworkFaults: 1
38-
numRoundsWithFaults: 2
37+
numRoundsWithNetworkFaults: 0
38+
numRoundsWithFaults: 8
3939
# Twins
4040
numReplicas: 1
4141
numTwinsPerReplica: 2
@@ -47,6 +47,6 @@ byzzbench:
4747
scheduledFaults:
4848
factories: [ ]
4949
termination: # Success condition for the scenario - terminates it when all conditions are met
50-
minEvents: 100 # check if N events have been scheduled. If so, can terminate
51-
minRounds: 2 # check if N rounds have elapsed. If so, can terminate
50+
minEvents: 400 # check if N events have been scheduled. If so, can terminate
51+
minRounds: 8 # check if N rounds have elapsed. If so, can terminate
5252
samplingFrequency: 30 # check if should terminate every N scheduler decisions

0 commit comments

Comments
 (0)