Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scheduler Framework Logic: Preview Only, Do Not Merge #1

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions apis/placement/v1beta1/binding_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

const (
// SchedulerCleanupFinalizer is a finalizer added by the scheduler to all bindings, to make sure
// that the scheduler can react to binding deletions if necessary.
SchedulerCleanupFinalizer = fleetPrefix + "scheduler-cleanup"
)

// +kubebuilder:object:root=true
// +kubebuilder:resource:scope=Cluster,categories={fleet},shortName=rb
// +kubebuilder:subresource:status
Expand All @@ -33,15 +39,42 @@ type ClusterResourceBinding struct {

// ResourceBindingSpec defines the desired state of ClusterResourceBinding.
type ResourceBindingSpec struct {
// The desired state of the binding. Possible values: Creating, Active, Deleting.
// +required
State BindingState `json:"state"`

// ResourceSnapshotName is the name of the resource snapshot that this resource binding points to.
// If the resources are divided into multiple snapshots because of the resource size limit,
// it points to the name of the leading snapshot of the index group.
ResourceSnapshotName string `json:"resourceSnapshotName"`

// PolicySnapshtName is the name of the scheduling policy snapshot that this resource binding
// points to; more specifically, the scheduler creates this bindings in accordance with this
// scheduling policy snapshot.
PolicySnapshotName string `json:"policySnapshotName"`

// TargetCluster is the name of the cluster that the scheduler assigns the resources to.
TargetCluster string `json:"targetCluster"`

// ClusterDecision explains why the scheduler makes this binding.
ClusterDecision ClusterDecision `json:"clusterDecision"`
}

// BindingState is the state of the binding
type BindingState string

const (
// BindingStateCreating means the binding is ready but need to be rolled
// out to the target cluster.
BindingStateCreating BindingState = "Creating"

// BindingStateActive means the binding is in effect.
BindingStateActive BindingState = "Active"

// BindingStateDeleting means the binding is about to be deleted.
BindingStateDeleting BindingState = "Deleting"
)

// ResourceBindingStatus represents the current status of a ClusterResourceBinding.
type ResourceBindingStatus struct {
// +patchMergeKey=type
Expand Down
119 changes: 98 additions & 21 deletions apis/placement/v1beta1/clusterresourceplacement_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package v1beta1
import (
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
)

// +genclient
Expand All @@ -28,7 +29,7 @@ import (
// Note that you can't select the following resources:
// - reserved namespaces including: default, kube-* (reserved for Kubernetes system namespaces), fleet-* (reserved for fleet system namespaces).
// - reserved fleet resource types including: MemberCluster, InternalMemberCluster, ClusterResourcePlacement, MultiClusterService, ServiceImport, etc.
// The `ClusterResourceBinding` will be created and it represents a scheduling decision that binds a group of resources
// The `ClusterResourceBinding` will be created, and it represents a scheduling decision that binds a group of resources
// to a cluster.
type ClusterResourcePlacement struct {
metav1.TypeMeta `json:",inline"`
Expand All @@ -45,6 +46,12 @@ type ClusterResourcePlacement struct {

// ClusterResourcePlacementSpec defines the desired state of ClusterResourcePlacement.
type ClusterResourcePlacementSpec struct {
// The number of old snapshots to retain to allow rollback.
// This is a pointer to distinguish between explicit zero and not specified.
// Defaults to 10.
// +optional
RevisionHistoryLimit *int32 `json:"revisionHistoryLimit,omitempty"`

// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=100

Expand All @@ -57,6 +64,11 @@ type ClusterResourcePlacementSpec struct {
// If unspecified, all the joined member clusters are selected.
// +optional
Policy *PlacementPolicy `json:"policy,omitempty"`

// The rollout strategy to use to replace existing placement with new ones.
// +optional
// +patchStrategy=retainKeys
Strategy RolloutStrategy `json:"strategy,omitempty"`
}

// ClusterResourceSelector is used to select cluster scoped resources as the target resources to be placed.
Expand Down Expand Up @@ -207,6 +219,55 @@ type TopologySpreadConstraint struct {
WhenUnsatisfiable UnsatisfiableConstraintAction `json:"whenUnsatisfiable,omitempty"`
}

// RolloutStrategy describes how to update existing placed resource either onto a different cluster
// or in-place upgrade to the latest resource.
type RolloutStrategy struct {
// Type of rollout. The only supported type is "RollingUpdate". Default is "RollingUpdate".
// +optional
Type RolloutStrategyType `json:"type,omitempty"`

// Rolling update config params. Present only if RolloutStrategyType = RollingUpdate.
// +optional
RollingUpdate *RollingUpdateConfig `json:"rollingUpdate,omitempty"`
}

// +enum
type RolloutStrategyType string

const (
// RollingUpdateRolloutStrategyType replace the old placed resource using rolling update
// i.e. gradually create the new one while replace the old ones.
RollingUpdateRolloutStrategyType RolloutStrategyType = "RollingUpdate"
)

// RollingUpdateConfig contains the config to control the desired behavior of rolling update.
type RollingUpdateConfig struct {
// The maximum number of clusters that can be unavailable during the rolling update
// comparing to the desired number of clusters.
// Absolute number is calculated from percentage by applying to the new target number of clusters.
// We consider a resource not available when we either remove it from a cluster or in-place
// upgrade the resources content on the same cluster.
// This can not be 0 if MaxSurge is 0.
// Defaults to 25%.
// +optional
MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"`

// The maximum number of clusters that can be scheduled above the desired number of clusters.
// Absolute number is calculated from percentage by applying to the new target number of clusters.
// This does not apply to the case that we do in-place upgrade of resources on the same cluster.
// This can not be 0 if MaxUnavailable is 0.
// Defaults to 25%.
// +optional
MaxSurge *intstr.IntOrString `json:"maxSurge,omitempty"`

// WaitBetweenRollingPeriodSeconds is used to config the time to wait between rolling out phases
// A resource placement is considered available if `WaitBetweenRollingPeriodSeconds` seconds
// has passed after the resources are applied to the target cluster successfully.
// Default is 60.
// +optional
WaitBetweenRollingPeriodSeconds *int `json:"waitBetweenRollingPeriodSeconds,omitempty"`
}

// UnsatisfiableConstraintAction defines the type of actions that can be taken if a constraint is not satisfied.
// +enum
type UnsatisfiableConstraintAction string
Expand Down Expand Up @@ -236,17 +297,11 @@ type ClusterResourcePlacementStatus struct {
// +optional
SelectedResources []ResourceIdentifier `json:"selectedResources,omitempty"`

// TargetClusters contains a list of names of member clusters selected by PlacementPolicy.
// PlacementStatus contains a list of placement status on the clusters that are selected by PlacementPolicy.
// Each selected cluster is guaranteed to have a corresponding placementStatuses.
// Note that the clusters must be both joined and meeting PlacementPolicy.
// +optional
TargetClusters []string `json:"targetClusters,omitempty"`

// +kubebuilder:validation:MaxItems=1000

// FailedResourcePlacements is a list of all the resources failed to be placed to the given clusters.
// Note that we only include 1000 failed resource placements even if there are more than 1000.
// +optional
FailedResourcePlacements []FailedResourcePlacement `json:"failedPlacements,omitempty"`
PlacementStatuses []ResourcePlacementStatus `json:"placementStatuses,omitempty"`
}

// ResourceIdentifier identifies one Kubernetes resource.
Expand All @@ -272,6 +327,27 @@ type ResourceIdentifier struct {
Namespace string `json:"namespace,omitempty"`
}

// ResourcePlacementStatus represents the status of a group of resource placed on a selected cluster
// An empty ClusterDecision indicates it is not scheduled yet.
type ResourcePlacementStatus struct {
// Conditions is an array of current observed conditions for ResourcePlacementStatus.
// +optional
Conditions []metav1.Condition `json:"conditions,omitempty"`

// ClusterName is the name of the ManagedCluster. If it is not empty, its value should be unique cross all
// placement decisions for the Placement.
// +kubebuilder:validation:Required
// +required
ClusterName string `json:"clusterName"`

// +kubebuilder:validation:MaxItems=100

// FailedResourcePlacements is a list of all the resources failed to be placed to the given cluster.
// Note that we only include 100 failed resource placements even if there are more than 100.
// +optional
FailedResourcePlacements []FailedResourcePlacement `json:"failedPlacements,omitempty"`
}

// FailedResourcePlacement contains the failure details of a failed resource placement.
type FailedResourcePlacement struct {
// The resource failed to be placed.
Expand All @@ -287,24 +363,25 @@ type FailedResourcePlacement struct {
Condition metav1.Condition `json:"condition"`
}

// ResourcePlacementConditionType defines a specific condition of a resource placement.
// ResourcePlacementConditionType defines a specific condition of the cluster resource placement.
// +enum
type ResourcePlacementConditionType string

const (
// ResourcePlacementConditionTypeScheduled indicates whether we have selected at least one resource to be placed to at least one member cluster and created work CRs under the corresponding per-cluster namespaces (i.e., fleet-member-<member-name>).
// ResourceScheduledConditionType indicates whether we have selected at least one resource to be placed to at least one
// member cluster and created work CRs under the corresponding per-cluster namespaces (i.e., fleet-member-<member-name>).
// Its condition status can be one of the following:
// - "True" means we have selected at least one resource, targeted at least one member cluster and created the work CRs.
// - "False" means we have selected zero resources, zero target clusters, or failed to create the work CRs.
// - "Unknown" otherwise.
ResourcePlacementConditionTypeScheduled ResourcePlacementConditionType = "Scheduled"
// - "True" means we have successfully scheduled the resources to fully satisfy the placement requirement.
// - "False" means we didn't fully satisfy the placement requirement. We will fill the Reason field.
// - "Unknown" means we don't have a scheduling decision yet.
ResourceScheduledConditionType ResourcePlacementConditionType = "Scheduled"

// ResourcePlacementStatusConditionTypeApplied indicates whether the selected member clusters have received the work CRs and applied the selected resources locally.
// ResourcesAppliedConditionType indicates whether the selected member clusters have applied the selected resources locally.
// Its condition status can be one of the following:
// - "True" means all the selected resources are successfully applied to all the target clusters.
// - "False" means some of them have failed.
// - "Unknown" otherwise.
ResourcePlacementStatusConditionTypeApplied ResourcePlacementConditionType = "Applied"
// - "True" means all the selected resources are successfully applied to the target cluster.
// - "False" means some of them have failed. We will place some of the detailed failure in the FailedResourcePlacement array.
// - "Unknown" means we haven't started the apply yet.
ResourcesAppliedConditionType ResourcePlacementConditionType = "Applied"
)

// PlacementType identifies the type of placement.
Expand Down
35 changes: 18 additions & 17 deletions apis/placement/v1beta1/policysnapshot_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,17 @@ type ClusterPolicySnapshot struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

// The desired state of PolicySnapshot.
// The desired state of SchedulingPolicySnapshot.
// +required
Spec PolicySnapshotSpec `json:"spec"`
Spec SchedulingPolicySnapshotSpec `json:"spec"`

// The observed status of PolicySnapshot.
// The observed status of SchedulingPolicySnapshot.
// +optional
Status PolicySnapshotStatus `json:"status,omitempty"`
Status SchedulingPolicySnapshotStatus `json:"status,omitempty"`
}

// PolicySnapshotSpec defines the desired state of PolicySnapshot.
type PolicySnapshotSpec struct {
// SchedulingPolicySnapshotSpec defines the desired state of SchedulingPolicySnapshot.
type SchedulingPolicySnapshotSpec struct {
// Policy defines how to select member clusters to place the selected resources.
// If unspecified, all the joined member clusters are selected.
// +optional
Expand All @@ -63,35 +63,36 @@ type PolicySnapshotSpec struct {
PolicyHash []byte `json:"policyHash"`
}

// PolicySnapshotStatus defines the observed state of PolicySnapshot.
type PolicySnapshotStatus struct {
// SchedulingPolicySnapshotStatus defines the observed state of SchedulingPolicySnapshot.
type SchedulingPolicySnapshotStatus struct {
// +patchMergeKey=type
// +patchStrategy=merge
// +listType=map
// +listMapKey=type

// Conditions is an array of current observed conditions for PolicySnapshot.
// Conditions is an array of current observed conditions for SchedulingPolicySnapshot.
// +optional
Conditions []metav1.Condition `json:"conditions"`

// +kubebuilder:validation:MaxItems=100
// ClusterDecisions contains a list of names of member clusters considered by the scheduler.
// Note that all the selected clusters must present in the list while not all the
// member clusters are guaranteed to be listed.
// member clusters are guaranteed to be listed due to the size limit. We will try to
// add the clusters that can provide the most insight to the list first.
// +optional
ClusterDecisions []ClusterDecision `json:"targetClusters,omitempty"`
}

// PolicySnapshotConditionType identifies a specific condition of the PolicySnapshot.
type PolicySnapshotConditionType string
// SchedulingPolicySnapshotConditionType identifies a specific condition of the SchedulingPolicySnapshot.
type SchedulingPolicySnapshotConditionType string

const (
// Scheduled indicates the scheduled condition of the given policySnapshot.
// Scheduled indicates the scheduled condition of the given SchedulingPolicySnapshot.
// Its condition status can be one of the following:
// - "True" means the corresponding policySnapshot is fully scheduled.
// - "False" means the corresponding policySnapshot is not scheduled yet.
// - "Unknown" means this policy does not have a full schedule yet.
PolicySnapshotScheduled PolicySnapshotConditionType = "Scheduled"
// - "True" means the corresponding SchedulingPolicySnapshot is scheduled.
// - "False" means the corresponding SchedulingPolicySnapshot is not scheduled yet.
// - "Unknown" means the status of the scheduling is unknown.
PolicySnapshotScheduled SchedulingPolicySnapshotConditionType = "Scheduled"
)

// ClusterDecision represents a decision from a placement
Expand Down
Loading