Skip to content

Commit 2604ce8

Browse files
committed
xds: XdsNR should be subscribing to clusters with XdsDepManager
This is missing behavior defined in gRFC A74: > As per gRFC A31, the ConfigSelector gives each RPC a ref to the > cluster that was selected for it to ensure that the cluster is not > removed from the xds_cluster_manager LB policy config before the RPC > is done with its LB picks. These cluster refs will also hold a > subscription for the cluster from the XdsDependencyManager, so that > the XdsDependencyManager will not stop watching the cluster resource > until the cluster is removed from the xds_cluster_manager LB policy > config. Without the logic, RPCs can race and see the error: > INTERNAL: CdsLb for cluster0: Unable to find non-dynamic root cluster Fixes #12152. This fixes the regression introduced in 297ab05
1 parent 1c43098 commit 2604ce8

File tree

1 file changed

+25
-9
lines changed

1 file changed

+25
-9
lines changed

xds/src/main/java/io/grpc/xds/XdsNameResolver.java

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,7 @@ public void run() {
548548
if (clusterRefs.get(cluster).refCount.get() != 0) {
549549
throw new AssertionError();
550550
}
551-
clusterRefs.remove(cluster);
551+
clusterRefs.remove(cluster).close();
552552
if (resolveState.lastConfigOrStatus.hasValue()) {
553553
updateResolutionResult(resolveState.lastConfigOrStatus.getValue());
554554
} else {
@@ -793,9 +793,13 @@ private void updateRoutes(
793793
clusterRefs.get(cluster).refCount.incrementAndGet();
794794
} else {
795795
if (clusterNameMap.containsKey(cluster)) {
796+
assert cluster.startsWith("cluster:");
797+
XdsConfig.Subscription subscription =
798+
xdsDependencyManager.subscribeToCluster(cluster.substring("cluster:".length()));
796799
clusterRefs.put(
797800
cluster,
798-
ClusterRefState.forCluster(new AtomicInteger(1), clusterNameMap.get(cluster)));
801+
ClusterRefState.forCluster(
802+
new AtomicInteger(1), clusterNameMap.get(cluster), subscription));
799803
}
800804
if (rlsPluginConfigMap.containsKey(cluster)) {
801805
clusterRefs.put(
@@ -826,7 +830,7 @@ private void updateRoutes(
826830
for (String cluster : deletedClusters) {
827831
int count = clusterRefs.get(cluster).refCount.decrementAndGet();
828832
if (count == 0) {
829-
clusterRefs.remove(cluster);
833+
clusterRefs.remove(cluster).close();
830834
shouldUpdateResult = true;
831835
}
832836
}
@@ -879,7 +883,7 @@ private void cleanUpRoutes(Status error) {
879883
for (String cluster : existingClusters) {
880884
int count = clusterRefs.get(cluster).refCount.decrementAndGet();
881885
if (count == 0) {
882-
clusterRefs.remove(cluster);
886+
clusterRefs.remove(cluster).close();
883887
}
884888
}
885889
existingClusters = null;
@@ -965,15 +969,18 @@ private static class ClusterRefState {
965969
final String traditionalCluster;
966970
@Nullable
967971
final RlsPluginConfig rlsPluginConfig;
972+
@Nullable
973+
final XdsConfig.Subscription subscription;
968974

969975
private ClusterRefState(
970976
AtomicInteger refCount, @Nullable String traditionalCluster,
971-
@Nullable RlsPluginConfig rlsPluginConfig) {
977+
@Nullable RlsPluginConfig rlsPluginConfig, @Nullable XdsConfig.Subscription subscription) {
972978
this.refCount = refCount;
973979
checkArgument(traditionalCluster == null ^ rlsPluginConfig == null,
974980
"There must be exactly one non-null value in traditionalCluster and pluginConfig");
975981
this.traditionalCluster = traditionalCluster;
976982
this.rlsPluginConfig = rlsPluginConfig;
983+
this.subscription = subscription;
977984
}
978985

979986
private Map<String, ?> toLbPolicy() {
@@ -993,12 +1000,21 @@ private ClusterRefState(
9931000
}
9941001
}
9951002

996-
static ClusterRefState forCluster(AtomicInteger refCount, String name) {
997-
return new ClusterRefState(refCount, name, null);
1003+
private void close() {
1004+
if (subscription != null) {
1005+
subscription.close();
1006+
}
1007+
}
1008+
1009+
static ClusterRefState forCluster(
1010+
AtomicInteger refCount, String name, XdsConfig.Subscription subscription) {
1011+
return new ClusterRefState(refCount, name, null, checkNotNull(subscription, "subscription"));
9981012
}
9991013

1000-
static ClusterRefState forRlsPlugin(AtomicInteger refCount, RlsPluginConfig rlsPluginConfig) {
1001-
return new ClusterRefState(refCount, null, rlsPluginConfig);
1014+
static ClusterRefState forRlsPlugin(
1015+
AtomicInteger refCount,
1016+
RlsPluginConfig rlsPluginConfig) {
1017+
return new ClusterRefState(refCount, null, rlsPluginConfig, null);
10021018
}
10031019
}
10041020
}

0 commit comments

Comments
 (0)