Skip to content

Commit 33a540a

Browse files
committedJan 24, 2025·
Fixes all small bugs that were found after applying mutations
1 parent 5d271cf commit 33a540a

File tree

2 files changed

+55
-92
lines changed

2 files changed

+55
-92
lines changed
 

‎simulator/src/main/java/byzzbench/simulator/protocols/Zyzzyva/ZyzzyvaReplica.java

+53-90
Original file line numberDiff line numberDiff line change
@@ -88,18 +88,6 @@ public ZyzzyvaReplica(String replicaId,
8888
);
8989
// we set this as a null commit certificate for view changes etc. this is the first instance the system is stable
9090
this.getMessageLog().setMaxCC(startCC);
91-
92-
this.setRequestTimeoutId(this.setTimeout(
93-
"requestTimeout",
94-
() -> {
95-
log.warning("Replica " + this.getId() + " didn't receive next request in time, init view change");
96-
IHateThePrimaryMessage ihtpm = new IHateThePrimaryMessage(this.getViewNumber());
97-
ihtpm.sign(this.getId());
98-
this.broadcastMessage(ihtpm);
99-
this.handleIHateThePrimaryMessage(this.getId(), ihtpm);
100-
},
101-
Duration.ofSeconds(300)
102-
));
10391
}
10492

10593
@Override
@@ -272,7 +260,7 @@ public void handleClientRequest(String clientId, Serializable request) {
272260
this.calculateHistory(this.getHighestSequenceNumber() + 1, digest),
273261
// digest
274262
digest);
275-
log.info("Replica " + this.getId() + " ordering request with sequence number " + orm.getSequenceNumber());
263+
log.info("Replica " + this.getId() + " ordering request with sequence number " + orm.getSequenceNumber() + " and operation " + requestMessage.getOperation());
276264
orm.sign(this.getId());
277265
OrderedRequestMessageWrapper ormw = new OrderedRequestMessageWrapper(orm, requestMessage);
278266
this.broadcastMessage(ormw);
@@ -366,12 +354,19 @@ public void handleOrderedRequestMessageWrapper(String sender, OrderedRequestMess
366354
log.warning("Failed to clear forward to primary timeout, possibly because it's been triggered");
367355
} catch (NullPointerException ignored) {
368356
}
369-
SpeculativeResponseWrapper response = this.executeOrderedRequest(ormw);
357+
SpeculativeResponseWrapper srw = this.executeOrderedRequest(ormw);
370358
if (ormw.getRequestMessage().getClientId().equals("Noop")) {
371359
log.info("Received a noop");
372360
return;
373361
}
374-
this.sendReplyToClient(ormw.getRequestMessage().getClientId(), response);
362+
this.sendReplyToClient(ormw.getRequestMessage().getClientId(), srw);
363+
364+
// checkpointing
365+
if (ormw.getOrderedRequest().getSequenceNumber() % this.getCP_INTERVAL() == 0) {
366+
log.info("Replica " + this.getId() + " going to checkpoint");
367+
this.broadcastMessage(srw.getSpecResponse());
368+
this.handleSpeculativeResponse(this.getId(), srw.getSpecResponse());
369+
}
375370
}
376371

377372
/**
@@ -394,13 +389,6 @@ public SpeculativeResponseWrapper executeOrderedRequest(OrderedRequestMessageWra
394389
// updates the request cache
395390
this.getMessageLog().putResponseCache(clientId, ormw.getRequestMessage(), srw);
396391

397-
// checkpointing
398-
if (ormw.getOrderedRequest().getSequenceNumber() % this.getCP_INTERVAL() == 0) {
399-
log.info("Checkpointing");
400-
this.broadcastMessage(srw.getSpecResponse());
401-
this.handleSpeculativeResponse(this.getId(), srw.getSpecResponse());
402-
}
403-
404392
return srw;
405393
}
406394

@@ -756,6 +744,7 @@ private void handleCommitMessage(String sender, CommitMessage commitMessage) {
756744
log.warning("Received invalid commit certificate from " + sender);
757745
return;
758746
}
747+
log.info("Received a commit certificate from " + sender + " with sequence number " + cc.getSequenceNumber());
759748
// commit the operations
760749
this.handleCommitCertificate(cc);
761750
LocalCommitMessage lcm = new LocalCommitMessage(
@@ -764,6 +753,7 @@ private void handleCommitMessage(String sender, CommitMessage commitMessage) {
764753
cc.getHistory(),
765754
this.getId(),
766755
sender);
756+
log.info("Replica " + this.getId() + " sending a local commit message to " + sender);
767757
lcm.sign(this.getId());
768758
this.sendMessage(lcm, sender);
769759
}
@@ -801,6 +791,7 @@ private void handleCommitCertificate(CommitCertificate cc) {
801791
this.getMessageLog().setMaxCC(cc);
802792
if (this.getCommitLog().getHighestSequenceNumber() > this.getMessageLog().getMaxCC().getSequenceNumber()) {
803793
log.warning("Replica " + this.getId() + " has a higher sequence number in the commit log than the maxCC");
794+
throw new IllegalStateException("Replica " + this.getId() + " has a higher sequence number in the commit log than the maxCC");
804795
}
805796
}
806797

@@ -849,7 +840,7 @@ private boolean isValidCommitCertificate(CommitCertificate cc) {
849840

850841
// if the currentCC is not null and the sequence number is the same, we return true, this means we commit again?
851842
if (currentCC != null && cc.getSequenceNumber() == currentCC.getSequenceNumber()) {
852-
log.warning("Replica " + this.getId() +" received a commit certificate with the same sequence number");
843+
log.warning("Replica " + this.getId() + " received a commit certificate with the same sequence number ");
853844
return false;
854845
}
855846

@@ -994,21 +985,6 @@ private void commitToViewChange() {
994985
log.warning("MaxCC is null");
995986
}
996987

997-
998-
// creates the CCs
999-
// if (this.getMessageLog().getMaxCC() != null && this.getMessageLog().getMaxCC().getViewNumber() == this.getViewNumber()) {
1000-
// cc = this.getMessageLog().getMaxCC();
1001-
// }
1002-
// } else if (this.getMessageLog()
1003-
// .getViewConfirmMessages()
1004-
// .getOrDefault(this.getViewNumber(), new ArrayList<>())
1005-
// .size() >= this.faultsTolerated + 1) {
1006-
// cc = new ArrayList<>(this.getMessageLog().getViewConfirmMessages().get(this.getViewNumber()));
1007-
// } else {
1008-
// if(this.getMessageLog().getNewViewMessages().isEmpty()) {log.warning("New view messages is empty"); return;}
1009-
// cc = this.getMessageLog().getNewViewMessages().sequencedValues().getLast();
1010-
// }
1011-
1012988
if (this.getMessageLog().getCheckpointMessages().get(this.getMessageLog().getLastCheckpoint()) == null) {
1013989
log.warning("Checkpoint messages is null");
1014990
}
@@ -1032,7 +1008,7 @@ private void commitToViewChange() {
10321008
this.broadcastMessage(vcmw);
10331009
this.handleViewChangeMessageWrapper(this.getId(), vcmw);
10341010
}
1035-
1011+
log.info("Replica " + this.getId() + " committed to a view change with maxCC " + cc.getSequenceNumber() + ", last checkpoint " + this.getMessageLog().getLastCheckpoint() + " and highest sequence number " + this.getHighestSequenceNumber());
10361012
this.setDisgruntled(true);
10371013
}
10381014

@@ -1450,31 +1426,7 @@ public void handleViewConfirmMessage(String sender, ViewConfirmMessage vcm) {
14501426
private void reconcileLocalHistoryViewChange(Collection<ViewChangeMessage> viewChangeMessages, SortedMap<Long, OrderedRequestMessageWrapper> calculatedHistory) {
14511427
long latestStableCheckpoint = viewChangeMessages.stream().map(ViewChangeMessage::getStableCheckpoint).max(Long::compareTo).orElse(-1L);
14521428
CommitCertificate maxCC = viewChangeMessages.stream().map(ViewChangeMessage::getCommitCertificate).max(Comparator.comparingLong(CommitCertificate::getSequenceNumber)).get();
1453-
// // when we perform a view change directly after a checkpoint
1454-
// // our maxCC is equal to this
1455-
// if (calculatedHistory.isEmpty()) {
1456-
// log.info("Calculated history is empty probably because we committed right before the view change");
1457-
// if (maxCC.getSequenceNumber() != latestStableCheckpoint) {
1458-
// throw new IllegalStateException("MaxCC sequence number is not equal to the latest stable checkpoint when calculated history is empty");
1459-
// }
1460-
// // we are up to speed, nothing to reconcile
1461-
// if (latestStableCheckpoint == this.getHighestSequenceNumber()) return;
1462-
// // to create a CC, we need 2f + 1 replicas or more to agree
1463-
// // to create new view message we need 2f + 1 replicas, so our maxCC will always equal the last cc.
1464-
// // we therefore roll back to the last CC, since we've already commmitted
1465-
// /// TODO: change this then
1466-
// if (latestStableCheckpoint < this.getHighestSequenceNumber() && this.getMessageLog().getLastCheckpoint() == latestStableCheckpoint) {
1467-
// rollbackToCheckpoint(latestStableCheckpoint, maxCC);
1468-
// return;
1469-
// }
1470-
// // we catch up to the checkpoint
1471-
// if (latestStableCheckpoint > this.getHighestSequenceNumber()) {
1472-
// this.catchUpToCheckpoint(latestStableCheckpoint, calculatedHistory, maxCC);
1473-
// return;
1474-
// }
1475-
// throw new IllegalStateException("Something went wrong in empty reconciliation");
1476-
// }
1477-
1429+
log.info("Replica " + this.getId() + " reconciling local history with maxCC " + maxCC.getSequenceNumber() + ", latest stable checkpoint " + latestStableCheckpoint + " and highest sequence number " + this.getHighestSequenceNumber());
14781430
if (calculatedHistory.isEmpty()) {
14791431
// nothing to reconcile
14801432
if (latestStableCheckpoint == this.getHighestSequenceNumber()) return;
@@ -1509,24 +1461,49 @@ else if (this.getHighestSequenceNumber() == latestStableCheckpoint) {
15091461
// if we have a higher commit certificate, we set it
15101462
this.handleCommitCertificate(maxCC);
15111463
}
1512-
// max-l > min-s and histories diverge
1464+
// max-l > min-s
15131465
else {
1514-
long lastHistoryKey = this.getHistory().getLastKey();
1466+
long lastHistoryKey;
1467+
try {
1468+
lastHistoryKey = this.getHistory().getLastKey();
1469+
} catch (NoSuchElementException e) {
1470+
log.warning("No last history key found");
1471+
return;
1472+
}
1473+
15151474
long lastHistory = this.getHistory().get(lastHistoryKey);
15161475
if (calculatedHistory.get(lastHistoryKey).getOrderedRequest().getHistory() == lastHistory) {
15171476
// handle the last commit certificate (make sure everything is committed)
15181477
// execute from max-l + 1
1478+
15191479
for (long i = this.getHighestSequenceNumber() + 1; i <= calculatedHistory.sequencedKeySet().getLast(); i++) {
15201480
this.executeOrderedRequest(calculatedHistory.get(i));
15211481
}
15221482
if (this.getMessageLog().getLastCheckpoint() < latestStableCheckpoint) {
15231483
this.getMessageLog().setLastCheckpoint(latestStableCheckpoint);
15241484
}
1525-
this.handleCommitCertificate(maxCC);
1485+
1486+
if (this.getMessageLog().getMaxCC().getSequenceNumber() < maxCC.getSequenceNumber()) {
1487+
this.handleCommitCertificate(maxCC);
1488+
}
15261489
}
15271490
// histories diverge and we roll back
15281491
else {
1529-
this.rollbackToCheckpoint(latestStableCheckpoint, calculatedHistory, maxCC);
1492+
if (this.getMessageLog().getMaxCC().getSequenceNumber() > maxCC.getSequenceNumber()) {
1493+
log.info("Diverging histories, rolling back to checkpoint");
1494+
this.getMessageLog().getOrderedMessages().clear();
1495+
this.getHistory().clear();
1496+
this.getMessageLog().getRequestCache().clear();
1497+
for (OrderedRequestMessageWrapper ormw : calculatedHistory.sequencedValues()) {
1498+
if (ormw.getOrderedRequest().getSequenceNumber() <= this.getMessageLog().getMaxCC().getSequenceNumber()) {
1499+
continue;
1500+
}
1501+
this.executeOrderedRequest(ormw);
1502+
}
1503+
1504+
} else {
1505+
this.rollbackToCheckpoint(latestStableCheckpoint, calculatedHistory, maxCC);
1506+
}
15301507
}
15311508
}
15321509

@@ -1544,6 +1521,7 @@ else if (this.getHighestSequenceNumber() == latestStableCheckpoint) {
15441521
* @param maxCC - the maxCC received from the view change messages
15451522
*/
15461523
private void catchUpToCheckpoint(long latestStableCheckpoint, SortedMap<Long, OrderedRequestMessageWrapper> calculatedHistory, CommitCertificate maxCC) {
1524+
log.info("Replica " + this.getId() + " catching up to checkpoint " + latestStableCheckpoint);
15471525
// set the latest checkpoint
15481526
this.getMessageLog().setLastCheckpoint(latestStableCheckpoint);
15491527
// clear the history
@@ -1570,23 +1548,7 @@ private void catchUpToCheckpoint(long latestStableCheckpoint, SortedMap<Long, Or
15701548
* @param maxCC - the maxCC received from the view change messages
15711549
*/
15721550
private void rollbackToCheckpoint(long latestStableCheckpoint, SortedMap<Long, OrderedRequestMessageWrapper> calculatedHistory, CommitCertificate maxCC) {
1573-
// // removes the checkpoint responses
1574-
// this.getMessageLog().getSpeculativeResponsesCheckpoint().clear();
1575-
// // puts the orm corresponding to the latest stable checkpoint back into the ordered messages
1576-
// OrderedRequestMessageWrapper ccRequest = this.getMessageLog().getOrderedMessages().get(latestStableCheckpoint);
1577-
// if (ccRequest == null) {
1578-
// log.warning("Couldn't find the checkpoint request");
1579-
// }
1580-
// this.getMessageLog().getOrderedMessages().clear();
1581-
// this.getMessageLog().putOrderedRequestMessageWrapper(ccRequest);
1582-
// this.getHistory().clear();
1583-
// if (maxCC.getSequenceNumber() != latestStableCheckpoint) {
1584-
// /// TODO: this doesn't seem right
1585-
// throw new IllegalStateException("MaxCC sequence number is not equal to the latest stable checkpoint when calculated history is empty");
1586-
// }
1587-
// this.getHistory().add(latestStableCheckpoint, maxCC.getHistory());
1588-
// this.setHighestSequenceNumber(latestStableCheckpoint);
1589-
1551+
log.info("Replica " + this.getId() + " rolling back to checkpoint " + latestStableCheckpoint);
15901552
// remove everything in the message logs
15911553
this.getMessageLog().getSpeculativeResponsesCheckpoint().clear();
15921554
this.getMessageLog().getOrderedMessages().clear();
@@ -1665,6 +1627,10 @@ private void handleNewViewMessage(String sender, NewViewMessage nvm) {
16651627
if(!isValidNewViewMessage(nvm)) {
16661628
return;
16671629
}
1630+
if (this.getMessageLog().getNewViewMessages().containsKey(nvm.getFutureViewNumber())) {
1631+
log.info("Received a new view message for a view number that we've already received");
1632+
return;
1633+
}
16681634

16691635
// add the new view message to the message log
16701636
this.getMessageLog().putNewViewMessage(nvm);
@@ -1724,11 +1690,6 @@ private void beginNewView(ViewConfirmMessage vcm) {
17241690
this.getMessageLog().getIHateThePrimaries().getOrDefault(this.getViewNumber(), new TreeMap<>()).clear();
17251691
this.getMessageLog().getFillHoleMessages().clear();
17261692

1727-
// this.getHistory().clear();
1728-
// this.getHistory().add(vcm.getLastKnownSequenceNumber(), vcm.getHistory());
1729-
/// TODO: See if this is higher than what we have so far, because it might mess with the ordering and cause a replica to skip.
1730-
// this.setHighestSequenceNumber(vcm.getLastKnownSequenceNumber());
1731-
17321693
// sets the view number and primary
17331694
log.info("Replica " +
17341695
this.getId() +
@@ -1813,6 +1774,7 @@ private void checkIfCommitCheckpoint(long sequenceNumber) {
18131774
* @param sequenceNumber - the sequence number to create the checkpoint for
18141775
*/
18151776
private void createCheckpoint(long sequenceNumber) {
1777+
log.info("Replica " + this.getId() + " created stable checkpoint for sequence number " + sequenceNumber);
18161778
// set the last checkpoint
18171779
this.getMessageLog().setLastCheckpoint(sequenceNumber);
18181780

@@ -1914,6 +1876,7 @@ private void checkIfCheckpoint(long sequenceNumber) {
19141876
);
19151877
cm.sign(this.getId());
19161878
this.broadcastMessage(cm);
1879+
log.info("Replica " + this.getId() + " sent a checkpoint message for sequence number " + sequenceNumber);
19171880
this.handleCheckpointMessage(this.getId(), cm);
19181881
}
19191882
}

‎simulator/src/main/resources/application.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ springdoc:
1717
# ByzzBench configuration
1818
byzzbench:
1919
autostart: false # Whether to start running scenarios automatically on startup
20-
numScenarios: 5000
20+
numScenarios: 100
2121
#outputPath: /tmp/byzzbench # The path to write the output to
2222
outputSchedules: buggy # which schedules to write to file? one of 'all', 'buggy' or 'none'
2323

2424
scheduler:
25-
id: "random" # The ID of the scheduler to use
25+
id: "byzzfuzz" # The ID of the scheduler to use
2626
executionMode: sync # async (default, any message delivered) or sync (communication-closure hypothesis, FIFO)
2727
maxDropMessages: 0 # Maximum number of messages to drop per scenario
2828
maxMutateMessages: 0 # Maximum number of messages to mutate per scenario

0 commit comments

Comments
 (0)
Please sign in to comment.