Skip to content

Commit 9ea27b7

Browse files
authored
Merge pull request #147 from joaomlneto/sample-f-on-startup
Sample set of faulty replicas on startup
2 parents 499989b + b5b60b0 commit 9ea27b7

File tree

5 files changed

+59
-1
lines changed

5 files changed

+59
-1
lines changed

simulator/src/main/java/byzzbench/simulator/BaseScenario.java

+9
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,15 @@ public synchronized Node getNode(String nodeId) {
166166
@Override
167167
public final void setupScenario() {
168168
this.setup();
169+
170+
// sample f replicas to be faulty at start
171+
List<String> replicaIds = new ArrayList<>(this.getReplicas().keySet());
172+
Collections.shuffle(replicaIds);
173+
int f = this.maxFaultyReplicas();
174+
for (int i = 0; i < f; i++) {
175+
this.markReplicaFaulty(replicaIds.get(i));
176+
}
177+
169178
this.getClients().values().forEach(Client::initialize);
170179
this.getNodes().values().forEach(Node::initialize);
171180
this.scheduler.initializeScenario(this);

simulator/src/main/java/byzzbench/simulator/Timekeeper.java

+5
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,11 @@ public void onEventDropped(Event event) {
7373
// nothing to do
7474
}
7575

76+
@Override
77+
public void onEventRequeued(Event event) {
78+
// nothing to do
79+
}
80+
7681
@Override
7782
public void onEventDelivered(Event event) {
7883
// check if it was a timeout

simulator/src/main/java/byzzbench/simulator/faults/factories/ByzzFuzzScenarioFaultFactory.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ public List<Fault> generateFaults(FaultContext input) {
3737
int d = scheduler.getNumRoundsWithNetworkFaults();
3838
int r = scheduler.getNumRoundsWithFaults();
3939
Set<String> replicaIds = scenario.getReplicas().keySet();
40+
Set<String> faultyReplicaIds = scenario.getFaultyReplicaIds();
4041

4142
// Create network faults
4243
for (int i = 1; i <= d; i++) {
@@ -49,7 +50,7 @@ public List<Fault> generateFaults(FaultContext input) {
4950
// Create process faults
5051
for (int i = 1; i <= c; i++) {
5152
int round = rand.nextInt(r) + 1;
52-
String sender = replicaIds.stream().skip(rand.nextInt(replicaIds.size())).findFirst().orElseThrow();
53+
String sender = input.getScenario().getFaultyReplicaIds().stream().skip(rand.nextInt(faultyReplicaIds.size())).findFirst().orElseThrow();
5354
Set<String> recipientIds = SetSubsets.getRandomNonEmptySubset(replicaIds);
5455

5556
// generate process fault

simulator/src/main/java/byzzbench/simulator/transport/Transport.java

+36
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ public class Transport {
7575
*/
7676
@JsonIgnore
7777
private final List<TransportObserver> observers = new ArrayList<>();
78+
@Getter
79+
private boolean isGlobalStabilizationTime = false;
7880

7981
/**
8082
* Adds an observer to the transport layer.
@@ -275,12 +277,18 @@ public synchronized void deliverEvent(long eventId) throws Exception {
275277
* @param eventId The ID of the message to drop.
276278
*/
277279
public synchronized void dropEvent(long eventId) {
280+
// Check if it is GST - no more dropping
281+
if (this.isGlobalStabilizationTime) {
282+
throw new IllegalStateException("Cannot drop events during GST");
283+
}
284+
278285
// check if event is a message
279286
Event e = events.get(eventId);
280287

281288
if (e.getStatus() != Event.Status.QUEUED) {
282289
throw new IllegalArgumentException("Event not found or not in QUEUED state");
283290
}
291+
284292
e.setStatus(Event.Status.DROPPED);
285293
this.observers.forEach(o -> o.onEventDropped(e));
286294
log.info("Dropped: " + e);
@@ -491,4 +499,32 @@ public synchronized List<Fault> getEnabledNetworkFaults() {
491499
public synchronized Fault getNetworkFault(String faultId) {
492500
return this.networkFaults.get(faultId);
493501
}
502+
503+
/**
504+
* Simulates GST event, according to the partial-synchrony model:
505+
* <ul>
506+
* <li>All dropped messages are re-queued</li>
507+
* <li>Prevents further dropping of messages</li>
508+
* <li>All network partitions are healed</li>
509+
* <li>Prevents further network partitions</li>
510+
* </ul>
511+
*/
512+
public void globalStabilizationTime() {
513+
this.isGlobalStabilizationTime = true;
514+
515+
// re-queue all dropped messages
516+
this.events.values().stream()
517+
.filter(e -> e.getStatus() == Event.Status.DROPPED)
518+
.forEach(e -> {
519+
e.setStatus(Event.Status.QUEUED);
520+
this.observers.forEach(o -> o.onEventRequeued(e));
521+
});
522+
523+
// clear all network faults
524+
// XXX: Is this the right thing to do?
525+
this.networkFaults.clear();
526+
527+
// heal all partitions
528+
this.router.resetPartitions();
529+
}
494530
}

simulator/src/main/java/byzzbench/simulator/transport/TransportObserver.java

+7
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@ public interface TransportObserver {
1717
*/
1818
void onEventDropped(Event event);
1919

20+
/**
21+
* Called when the status of an event changes from {@link Event.Status#DROPPED} to {@link Event.Status#QUEUED}.
22+
*
23+
* @param event The event that was re-queued after being previously dropped.
24+
*/
25+
void onEventRequeued(Event event);
26+
2027
/**
2128
* Called when the status of an event changes to {@link Event.Status#DELIVERED}.
2229
*

0 commit comments

Comments
 (0)