From 2df7b1cd3779922cc36e3a2ee9161fd341ed629e Mon Sep 17 00:00:00 2001 From: mattl-netflix <63665634+mattl-netflix@users.noreply.github.com> Date: Tue, 31 Aug 2021 12:02:15 -0700 Subject: [PATCH] Throw in case of gossip mismatch when determining replace_ip in assigned token case (#972) * CASS-1752 Throw in case of gossip mismatch when determining replace_ip in assigned token case * CASS-1752 Toggle throw-on-mismatch with a property. * Add test of two-node mismatch case --- .../netflix/priam/config/IConfiguration.java | 4 ++ .../priam/config/PriamConfiguration.java | 5 ++ .../priam/identity/token/TokenRetriever.java | 6 +++ .../token/AssignedTokenRetrieverTest.java | 52 +++++++++++++++++++ .../token/TokenRetrieverUtilsTest.java | 36 +++++++++++++ 5 files changed, 103 insertions(+) diff --git a/priam/src/main/java/com/netflix/priam/config/IConfiguration.java b/priam/src/main/java/com/netflix/priam/config/IConfiguration.java index 096206866..dd6a8009d 100644 --- a/priam/src/main/java/com/netflix/priam/config/IConfiguration.java +++ b/priam/src/main/java/com/netflix/priam/config/IConfiguration.java @@ -1140,6 +1140,10 @@ default boolean skipIngressUnlessIPIsPublic() { return false; } + default boolean permitDirectTokenAssignmentWithGossipMismatch() { + return false; + } + /** * Escape hatch for getting any arbitrary property by key This is useful so we don't have to * keep adding methods to this interface for every single configuration option ever. Also diff --git a/priam/src/main/java/com/netflix/priam/config/PriamConfiguration.java b/priam/src/main/java/com/netflix/priam/config/PriamConfiguration.java index 4c8e22afd..7b96715ac 100644 --- a/priam/src/main/java/com/netflix/priam/config/PriamConfiguration.java +++ b/priam/src/main/java/com/netflix/priam/config/PriamConfiguration.java @@ -774,4 +774,9 @@ public BackupsToCompress getBackupsToCompress() { return BackupsToCompress.valueOf( config.get("priam.backupsToCompress", BackupsToCompress.ALL.name())); } + + @Override + public boolean permitDirectTokenAssignmentWithGossipMismatch() { + return config.get(PRIAM_PRE + ".permitDirectTokenAssignmentWithGossipMismatch", false); + } } diff --git a/priam/src/main/java/com/netflix/priam/identity/token/TokenRetriever.java b/priam/src/main/java/com/netflix/priam/identity/token/TokenRetriever.java index a2280f004..5db933ef8 100644 --- a/priam/src/main/java/com/netflix/priam/identity/token/TokenRetriever.java +++ b/priam/src/main/java/com/netflix/priam/identity/token/TokenRetriever.java @@ -187,6 +187,12 @@ private String getReplacedIpForAssignedToken( "Priam found that the token is not alive according to Cassandra and we should start Cassandra in replace mode with replace ip: " + inferredIp); } + } else if (inferredTokenOwnership.getTokenInformationStatus() + == TokenRetrieverUtils.InferredTokenOwnership.TokenInformationStatus + .MISMATCH + && !config.permitDirectTokenAssignmentWithGossipMismatch()) { + throw new TokenRetrieverUtils.GossipParseException( + "We saw inconsistent results from gossip. Throwing to buy time for it to settle."); } return ipToReplace; } diff --git a/priam/src/test/java/com/netflix/priam/identity/token/AssignedTokenRetrieverTest.java b/priam/src/test/java/com/netflix/priam/identity/token/AssignedTokenRetrieverTest.java index e77f88d6d..94c02cb79 100644 --- a/priam/src/test/java/com/netflix/priam/identity/token/AssignedTokenRetrieverTest.java +++ b/priam/src/test/java/com/netflix/priam/identity/token/AssignedTokenRetrieverTest.java @@ -206,6 +206,56 @@ public void grabAssignedTokenThrowWhenGossipAgreesPreviousTokenOwnerIsLive( factory, membership, config, instanceInfo, tokenRetriever)); } + @Test + public void grabAssignedTokenThrowToBuyTimeWhenGossipDisagreesOnPreviousTokenOwner( + @Mocked IPriamInstanceFactory factory, + @Mocked IConfiguration config, + @Mocked IMembership membership, + @Mocked Sleeper sleeper, + @Mocked ITokenManager tokenManager, + @Mocked InstanceInfo instanceInfo, + @Mocked TokenRetrieverUtils retrievalUtils) { + List liveHosts = newPriamInstances(); + Collections.shuffle(liveHosts); + + TokenRetrieverUtils.InferredTokenOwnership inferredTokenOwnership = + new TokenRetrieverUtils.InferredTokenOwnership(); + inferredTokenOwnership.setTokenInformationStatus( + TokenRetrieverUtils.InferredTokenOwnership.TokenInformationStatus.MISMATCH); + inferredTokenOwnership.setTokenInformation( + new TokenRetrieverUtils.TokenInformation(liveHosts.get(0).getHostIP(), false)); + + new Expectations() { + { + config.getAppName(); + result = APP; + + factory.getAllIds(DEAD_APP); + result = ImmutableSet.of(); + factory.getAllIds(APP); + result = ImmutableSet.copyOf(liveHosts); + + instanceInfo.getInstanceId(); + result = liveHosts.get(0).getInstanceId(); + + TokenRetrieverUtils.inferTokenOwnerFromGossip( + ImmutableSet.copyOf(liveHosts), + liveHosts.get(0).getToken(), + liveHosts.get(0).getDC()); + result = inferredTokenOwnership; + } + }; + + ITokenRetriever tokenRetriever = + new TokenRetriever( + factory, membership, config, instanceInfo, sleeper, tokenManager); + Assertions.assertThrows( + TokenRetrieverUtils.GossipParseException.class, + () -> + new InstanceIdentity( + factory, membership, config, instanceInfo, tokenRetriever)); + } + @Test public void grabAssignedTokenStartDbInBootstrapModeWhenGossipDisagreesOnPreviousTokenOwner( @Mocked IPriamInstanceFactory factory, @@ -230,6 +280,8 @@ public void grabAssignedTokenStartDbInBootstrapModeWhenGossipDisagreesOnPrevious { config.getAppName(); result = APP; + config.permitDirectTokenAssignmentWithGossipMismatch(); + result = true; factory.getAllIds(DEAD_APP); result = ImmutableSet.of(); diff --git a/priam/src/test/java/com/netflix/priam/identity/token/TokenRetrieverUtilsTest.java b/priam/src/test/java/com/netflix/priam/identity/token/TokenRetrieverUtilsTest.java index 7d20e90f6..54a5f4d9f 100644 --- a/priam/src/test/java/com/netflix/priam/identity/token/TokenRetrieverUtilsTest.java +++ b/priam/src/test/java/com/netflix/priam/identity/token/TokenRetrieverUtilsTest.java @@ -6,8 +6,10 @@ import com.google.common.collect.ImmutableSet; import com.netflix.priam.identity.PriamInstance; import com.netflix.priam.utils.SystemUtils; +import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; import mockit.Expectations; @@ -111,6 +113,40 @@ public void testRetrieveTokenOwnerWhenGossipDisagrees(@Mocked SystemUtils system Assert.assertTrue(inferredTokenOwnership.getTokenInformation().isLive()); } + @Test + public void testRetrieveTokenOwnerWhenGossipDisagrees_2Nodes(@Mocked SystemUtils systemUtils) { + ImmutableSet myInstances = + ImmutableSet.copyOf(instances.stream().limit(3).collect(Collectors.toList())); + List myLiveInstances = liveInstances.stream().limit(3).collect(Collectors.toList()); + Map myTokenToEndpointMap = + IntStream.range(0, 3) + .mapToObj(String::valueOf) + .collect( + Collectors.toMap( + Function.identity(), (i) -> tokenToEndpointMap.get(i))); + Map alteredMap = new HashMap<>(myTokenToEndpointMap); + alteredMap.put("1", "1.2.3.4"); + + new Expectations() { + { + SystemUtils.getDataFromUrl(String.format(STATUS_URL_FORMAT, "fakeHost-0")); + result = getStatus(myLiveInstances, myTokenToEndpointMap); + minTimes = 0; + + SystemUtils.getDataFromUrl(String.format(STATUS_URL_FORMAT, "fakeHost-2")); + result = getStatus(myLiveInstances, alteredMap); + minTimes = 0; + } + }; + + TokenRetrieverUtils.InferredTokenOwnership inferredTokenOwnership = + TokenRetrieverUtils.inferTokenOwnerFromGossip(myInstances, "1", "us-east"); + Assert.assertEquals( + TokenRetrieverUtils.InferredTokenOwnership.TokenInformationStatus.MISMATCH, + inferredTokenOwnership.getTokenInformationStatus()); + Assert.assertTrue(inferredTokenOwnership.getTokenInformation().isLive()); + } + @Test public void testRetrieveTokenOwnerWhenAllHostsInGossipReturnsNull( @Mocked SystemUtils systemUtils) throws Exception {