Skip to content

Commit 1066c67

Browse files
committed
MULTIARCH-5369 Improve transparency for PreferredDuringSchedulingIgnoredDuringExecution configuration
1 parent 4a6c5e1 commit 1066c67

File tree

5 files changed

+74
-37
lines changed

5 files changed

+74
-37
lines changed

controllers/podplacement/events.go

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,21 @@ const (
1111
ArchitectureAwareSchedulingGateRemovalFailure = "ArchAwareSchedGateRemovalFailed"
1212
ArchitectureAwareSchedulingGateRemovalSuccess = "ArchAwareSchedGateRemovalSuccess"
1313
NoSupportedArchitecturesFound = "NoSupportedArchitecturesFound"
14+
ArchitecturePreferredAffinityDuplicates = "ArchAwarePreferredAffinityDuplicates"
1415

15-
SchedulingGateAddedMsg = "Successfully gated with the " + utils.SchedulingGateName + " scheduling gate"
16-
SchedulingGateRemovalSuccessMsg = "Successfully removed the " + utils.SchedulingGateName + " scheduling gate"
17-
SchedulingGateRemovalFailureMsg = "Failed to remove the scheduling gate \"" + utils.SchedulingGateName + "\""
18-
ArchitecturePredicatesConflictMsg = "All the scheduling predicates already include architecture-specific constraints"
19-
ArchitecturePredicateSetupMsg = "Set the supported architectures to "
20-
ArchitecturePreferredPredicateSetupMsg = "Set the architecture preferences in the nodeAffinity"
21-
ArchitecturePreferredPredicateSkippedMsg = "The node affinity already includes architecture preferences"
22-
ImageArchitectureInspectionErrorMsg = "Failed to retrieve the supported architectures: "
23-
NoSupportedArchitecturesFoundMsg = "Pod cannot be scheduled due to incompatible image architectures; container images have no supported architectures in common"
24-
ArchitectureAwareGatedPodIgnoredMsg = "The gated pod has been modified and is no longer eligible for architecture-aware scheduling"
25-
ImageInspectionErrorMaxRetriesMsg = "Failed to retrieve the supported architectures after multiple retries"
16+
SchedulingGateAddedMsg = "Successfully gated with the " + utils.SchedulingGateName + " scheduling gate"
17+
SchedulingGateRemovalSuccessMsg = "Successfully removed the " + utils.SchedulingGateName + " scheduling gate"
18+
SchedulingGateRemovalFailureMsg = "Failed to remove the scheduling gate \"" + utils.SchedulingGateName + "\""
19+
ArchitecturePredicatesConflictMsg = "All the scheduling predicates already include architecture-specific constraints"
20+
ArchitecturePredicateSetupMsg = "Set the supported architectures to "
21+
22+
ArchitecturePreferredPredicateSetupMsg = "Applied all architecture preferences from configuration"
23+
ArchitecturePreferredAffinityWithDuplicatesMsg = "Applied some architecture preferences from configuration; others were already set"
24+
ArchitecturePreferredAffinityAllDuplicatesMsg = "Skipped all architecture preferences from configuration; all were already set"
25+
ArchitecturePreferredPredicateSkippedMsg = "Skipped configuration; no architecture preferences were provided"
26+
27+
ImageArchitectureInspectionErrorMsg = "Failed to retrieve the supported architectures: "
28+
NoSupportedArchitecturesFoundMsg = "Pod cannot be scheduled due to incompatible image architectures; container images have no supported architectures in common"
29+
ArchitectureAwareGatedPodIgnoredMsg = "The gated pod has been modified and is no longer eligible for architecture-aware scheduling"
30+
ImageInspectionErrorMaxRetriesMsg = "Failed to retrieve the supported architectures after multiple retries"
2631
)

controllers/podplacement/pod_model.go

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,8 @@ func (pod *Pod) setRequiredArchNodeAffinity(requirement corev1.NodeSelectorRequi
167167
}
168168

169169
// SetPreferredArchNodeAffinity sets the node affinity for the pod to the preferences given in the ClusterPodPlacementConfig.
170-
func (pod *Pod) SetPreferredArchNodeAffinity(nodeAffinity *plugins.NodeAffinityScoring) {
170+
// The configSource parameter identifies which configuration is setting the preferences (e.g., "ClusterPodPlacementConfig" or "PodPlacementConfig/my-ppc").
171+
func (pod *Pod) SetPreferredArchNodeAffinity(nodeAffinity *plugins.NodeAffinityScoring, configSource string) {
171172
log := ctrllog.FromContext(pod.Ctx())
172173
if pod.Spec.Affinity == nil {
173174
pod.Spec.Affinity = &corev1.Affinity{}
@@ -183,6 +184,7 @@ func (pod *Pod) SetPreferredArchNodeAffinity(nodeAffinity *plugins.NodeAffinityS
183184

184185
seenArchitectures := pod.getExistingPreferredArchitectures()
185186
var preferredSchedulingTerms []corev1.PreferredSchedulingTerm
187+
var skippedArchitectures []string
186188
for _, nodeAffinityScoringPlatformTerm := range nodeAffinity.Platforms {
187189
if !seenArchitectures[nodeAffinityScoringPlatformTerm.Architecture] {
188190
preferredSchedulingTerm := corev1.PreferredSchedulingTerm{
@@ -200,17 +202,37 @@ func (pod *Pod) SetPreferredArchNodeAffinity(nodeAffinity *plugins.NodeAffinityS
200202
preferredSchedulingTerms = append(preferredSchedulingTerms, preferredSchedulingTerm)
201203
seenArchitectures[nodeAffinityScoringPlatformTerm.Architecture] = true
202204
} else {
203-
log.Info("Preferred affinity for pod is already set", "Architecture", nodeAffinityScoringPlatformTerm.Architecture, "Weight", nodeAffinityScoringPlatformTerm.Weight, "Pod.Name", pod.Name, "Pod.Namespace", pod.Namespace)
205+
skippedArchitectures = append(skippedArchitectures, nodeAffinityScoringPlatformTerm.Architecture)
206+
log.Info("Preferred affinity for pod is already set", "Architecture", nodeAffinityScoringPlatformTerm.Architecture, "Weight", nodeAffinityScoringPlatformTerm.Weight, "Pod.Name", pod.Name, "Pod.Namespace", pod.Namespace, "ConfigSource", configSource)
204207
}
205208
}
206209

207-
// if the nodeSelectorTerms were patched at least once, we set the nodeAffinity label to the set value, to keep
208-
// track of the fact that the nodeAffinity was patched by the operator.
209210
if preferredSchedulingTerms != nil {
210211
pod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution = append(
211212
pod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution, preferredSchedulingTerms...)
212213
pod.EnsureLabel(utils.PreferredNodeAffinityLabel, utils.NodeAffinityLabelValueSet)
213-
pod.PublishEvent(corev1.EventTypeNormal, ArchitectureAwareNodeAffinitySet, ArchitecturePreferredPredicateSetupMsg)
214+
}
215+
switch {
216+
// Case 1: All architectures from this config were successfully added (no duplicates)
217+
case preferredSchedulingTerms != nil && skippedArchitectures == nil:
218+
pod.PublishEvent(corev1.EventTypeNormal, ArchitectureAwareNodeAffinitySet, fmt.Sprintf("%s source: %s", ArchitecturePreferredPredicateSetupMsg, configSource))
219+
log.V(2).Info("Applied all architecture preferences from configuration", "ConfigSource", configSource)
220+
221+
// Case 2: Some architectures were added, but some were skipped due to duplicates
222+
case preferredSchedulingTerms != nil && skippedArchitectures != nil:
223+
pod.EnsureLabel(utils.PreferredNodeAffinitySourceLabel, utils.LabelValueSetWithDuplicates)
224+
pod.PublishEvent(corev1.EventTypeNormal, ArchitectureAwareNodeAffinitySet, fmt.Sprintf("%s source: %s, skipped: %s", ArchitecturePreferredAffinityWithDuplicatesMsg, configSource, strings.Join(skippedArchitectures, ", ")))
225+
log.V(2).Info("Applied some architecture preferences from configuration", "ConfigSource", configSource, "SkippedArchitectures", skippedArchitectures)
226+
227+
// Case 3: All architectures from this config were already set
228+
case preferredSchedulingTerms == nil && skippedArchitectures != nil:
229+
pod.EnsureLabel(utils.PreferredNodeAffinitySourceLabel, utils.LabelValueSetWithDuplicates)
230+
pod.PublishEvent(corev1.EventTypeNormal, ArchitecturePreferredAffinityDuplicates, fmt.Sprintf("%s source: %s, architectures: %s", ArchitecturePreferredAffinityAllDuplicatesMsg, configSource, strings.Join(skippedArchitectures, ", ")))
231+
log.V(2).Info("All architectures from configuration were already set", "ConfigSource", configSource, "SkippedArchitectures", skippedArchitectures)
232+
233+
// Case 4: No architectures were provided in the config
234+
default:
235+
log.V(2).Info("No architecture preferences provided in configuration", "ConfigSource", configSource)
214236
}
215237
}
216238

controllers/podplacement/pod_reconciler.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ func (r *PodReconciler) processPod(ctx context.Context, pod *Pod) {
122122
r.applyPodPlacementConfigs(ctx, pod)
123123

124124
if cppc != nil && cppc.PluginsEnabled(common.NodeAffinityScoringPluginName) {
125-
pod.SetPreferredArchNodeAffinity(cppc.Spec.Plugins.NodeAffinityScoring)
125+
pod.SetPreferredArchNodeAffinity(cppc.Spec.Plugins.NodeAffinityScoring, multiarchv1beta1.ClusterPodPlacementConfigKind)
126126
}
127127

128128
// Prepare the requirement for the node affinity.
@@ -145,8 +145,15 @@ func (r *PodReconciler) processPod(ctx context.Context, pod *Pod) {
145145
// If the pod has been processed successfully or the max retries have been reached, remove the scheduling gate.
146146
if err == nil || pod.maxRetries() {
147147
if pod.Labels[utils.PreferredNodeAffinityLabel] == utils.LabelValueNotSet {
148-
pod.PublishEvent(corev1.EventTypeNormal, ArchitectureAwareNodeAffinitySet,
149-
ArchitecturePreferredPredicateSkippedMsg)
148+
if pod.Labels[utils.LabelValueSetWithDuplicates] == utils.LabelValueSetWithDuplicates {
149+
pod.PublishEvent(corev1.EventTypeNormal, ArchitectureAwareNodeAffinitySet,
150+
ArchitecturePreferredAffinityAllDuplicatesMsg)
151+
log.V(2).Info("All provided preferred node affinity was already set.")
152+
} else {
153+
pod.PublishEvent(corev1.EventTypeNormal, ArchitectureAwareNodeAffinitySet,
154+
ArchitecturePreferredPredicateSkippedMsg)
155+
log.V(2).Info("No preferred node affinity was set")
156+
}
150157
}
151158

152159
log.V(1).Info("Removing the scheduling gate from pod.")
@@ -188,7 +195,8 @@ func (r *PodReconciler) applyPodPlacementConfigs(ctx context.Context, pod *Pod)
188195
if selector == labels.Nothing() || selector.Matches(labels.Set(pod.Labels)) {
189196
log.Info("Applying namespace-scoped config", "PodPlacementConfig", ppc.Name)
190197
// Apply the configuration, checking for overlaps
191-
pod.SetPreferredArchNodeAffinity(ppc.Spec.Plugins.NodeAffinityScoring)
198+
configSource := fmt.Sprintf("%s-%s", multiarchv1beta1.PodPlacementConfigResource, ppc.Name)
199+
pod.SetPreferredArchNodeAffinity(ppc.Spec.Plugins.NodeAffinityScoring, configSource)
192200
}
193201
}
194202
}

controllers/podplacement/pod_reconciler_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,7 @@ var _ = Describe("Controllers/Podplacement/PodReconciler", func() {
580580
g.Expect(pod.Labels).To(HaveKeyWithValue(utils.SchedulingGateLabel, utils.SchedulingGateLabelValueRemoved),
581581
"scheduling gate annotation not found")
582582
g.Expect(pod.Labels).To(HaveKeyWithValue(utils.ImageInspectionErrorCountLabel, strconv.Itoa(MaxRetryCount)), "image inspection error count not found")
583-
g.Expect(pod.Labels).To(HaveKeyWithValue(utils.PreferredNodeAffinityLabel, utils.NodeAffinityLabelValueSet),
583+
g.Expect(pod.Labels).To(HaveKeyWithValue(utils.PreferredNodeAffinityLabel, utils.LabelValueAllDuplicates),
584584
"preferred node affinity label not found")
585585
g.Expect(pod.Labels).To(HaveKeyWithValue(utils.NodeAffinityLabel, utils.LabelValueNotSet),
586586
"node affinity label not found")

pkg/utils/const.go

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,22 +21,24 @@ const (
2121
)
2222

2323
const (
24-
ArchLabel = "kubernetes.io/arch"
25-
NodeAffinityLabel = "multiarch.openshift.io/node-affinity"
26-
PreferredNodeAffinityLabel = "multiarch.openshift.io/preferred-node-affinity"
27-
NodeAffinityLabelValueSet = "set"
28-
LabelValueNotSet = "not-set"
29-
HostnameLabel = "kubernetes.io/hostname"
30-
SchedulingGateLabel = "multiarch.openshift.io/scheduling-gate"
31-
SchedulingGateLabelValueGated = "gated"
32-
SchedulingGateLabelValueRemoved = "removed"
33-
PodPlacementFinalizerName = "finalizers.multiarch.openshift.io/pod-placement"
34-
SingleArchLabel = "multiarch.openshift.io/single-arch"
35-
MultiArchLabel = "multiarch.openshift.io/multi-arch"
36-
NoSupportedArchLabel = "multiarch.openshift.io/no-supported-arch"
37-
ImageInspectionErrorLabel = "multiarch.openshift.io/image-inspect-error"
38-
ImageInspectionErrorCountLabel = "multiarch.openshift.io/image-inspect-error-count"
39-
LabelGroup = "multiarch.openshift.io"
24+
ArchLabel = "kubernetes.io/arch"
25+
NodeAffinityLabel = "multiarch.openshift.io/node-affinity"
26+
PreferredNodeAffinityLabel = "multiarch.openshift.io/preferred-node-affinity"
27+
PreferredNodeAffinitySourceLabel = "multiarch.openshift.io/preferred-affinity-source"
28+
NodeAffinityLabelValueSet = "set"
29+
LabelValueNotSet = "not-set"
30+
LabelValueSetWithDuplicates = "has-duplicates"
31+
HostnameLabel = "kubernetes.io/hostname"
32+
SchedulingGateLabel = "multiarch.openshift.io/scheduling-gate"
33+
SchedulingGateLabelValueGated = "gated"
34+
SchedulingGateLabelValueRemoved = "removed"
35+
PodPlacementFinalizerName = "finalizers.multiarch.openshift.io/pod-placement"
36+
SingleArchLabel = "multiarch.openshift.io/single-arch"
37+
MultiArchLabel = "multiarch.openshift.io/multi-arch"
38+
NoSupportedArchLabel = "multiarch.openshift.io/no-supported-arch"
39+
ImageInspectionErrorLabel = "multiarch.openshift.io/image-inspect-error"
40+
ImageInspectionErrorCountLabel = "multiarch.openshift.io/image-inspect-error-count"
41+
LabelGroup = "multiarch.openshift.io"
4042
)
4143

4244
const (

0 commit comments

Comments
 (0)