Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 24 additions & 12 deletions pkg/controller/common/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -936,12 +936,25 @@ func CalculateConfigFileDiffs(oldIgnConfig, newIgnConfig *ign3types.Config) []st
return diffFileSet
}

// CalculateConfigUnitDiffs compares the units present in two ignition configurations and returns the list of units
// that are different between them
//
//nolint:dupl
func CalculateConfigUnitDiffs(oldIgnConfig, newIgnConfig *ign3types.Config) []string {
// Go through the units and see what is new or different
type UnitDiff struct {
Added []ign3types.Unit
Removed []ign3types.Unit
Updated []ign3types.Unit
}

// GetChangedConfigUnitsByType compares the units present in two ignition configurations, one
// old config and the other new, a struct with units mapped to the type of change:
// - New unit added: "Added"
// - Unit previously existing removed: "Removed"
// - Existing unit changed in some way: "Updated"
func GetChangedConfigUnitsByType(oldIgnConfig, newIgnConfig *ign3types.Config) (unitDiffs UnitDiff) {
diffUnit := UnitDiff{
Added: []ign3types.Unit{},
Removed: []ign3types.Unit{},
Updated: []ign3types.Unit{},
}

// Get the sets of the old and new units from the ignition configurations
oldUnitSet := make(map[string]ign3types.Unit)
for _, u := range oldIgnConfig.Systemd.Units {
oldUnitSet[u.Name] = u
Expand All @@ -950,26 +963,25 @@ func CalculateConfigUnitDiffs(oldIgnConfig, newIgnConfig *ign3types.Config) []st
for _, u := range newIgnConfig.Systemd.Units {
newUnitSet[u.Name] = u
}
diffUnitSet := []string{}

// First check if any units were removed
for unit := range oldUnitSet {
_, ok := newUnitSet[unit]
if !ok {
diffUnitSet = append(diffUnitSet, unit)
diffUnit.Removed = append(diffUnit.Removed, oldUnitSet[unit])
}
}

// Now check if any units were added/changed
// Now check if any units were added or updated
for name, newUnit := range newUnitSet {
oldUnit, ok := oldUnitSet[name]
if !ok {
diffUnitSet = append(diffUnitSet, name)
diffUnit.Added = append(diffUnit.Added, newUnitSet[name])
} else if !reflect.DeepEqual(oldUnit, newUnit) {
diffUnitSet = append(diffUnitSet, name)
diffUnit.Updated = append(diffUnit.Updated, newUnitSet[name])
}
}
return diffUnitSet
return diffUnit
}

// NewIgnFile returns a simple ignition3 file from just path and file contents.
Expand Down
31 changes: 30 additions & 1 deletion pkg/daemon/on_disk_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"os"
"path/filepath"
"strings"

ign2types "github.com/coreos/ignition/config/v2_2/types"
ign3types "github.com/coreos/ignition/v2/config/v3_5/types"
Expand Down Expand Up @@ -101,7 +102,12 @@ func checkV3Unit(unit ign3types.Unit, systemdPath string) error {
return nil
}

return checkFileContentsAndMode(path, []byte(*unit.Contents), defaultFilePermissions)
err := checkFileContentsAndMode(path, []byte(*unit.Contents), defaultFilePermissions)
if err != nil {
return err
}

return checkUnitEnabled(unit.Name, unit.Enabled)
}

// checkV3Units validates the contents of all the units in the
Expand Down Expand Up @@ -231,6 +237,29 @@ func checkV2Files(files []ign2types.File) error {
return nil
}

// checkUnitEnabled checks whether a systemd unit is enabled as expected.
func checkUnitEnabled(name string, expectedEnabled *bool) error {
if expectedEnabled == nil {
return nil
}
outBytes, _ := runGetOut("systemctl", "is-enabled", name)
out := strings.TrimSpace(string(outBytes))

switch {
case *expectedEnabled:
// If expected to be enabled, reject known "not enabled" states
if out == "disabled" || out == "masked" || out == "masked-runtime" || out == "not-found" {
return fmt.Errorf("unit %q expected enabled=true, but systemd reports %q", name, out)
}
case !*expectedEnabled:
// If expected to be disabled, reject any of the enabled-like states
if out == "enabled" || out == "enabled-runtime" {
return fmt.Errorf("unit %q expected enabled=false, but systemd reports %q", name, out)
}
}
return nil
}

// checkFileContentsAndMode reads the file from the filepath and compares its
// contents and mode with the expectedContent and mode parameters. It logs an
// error in case of an error or mismatch and returns the status of the
Expand Down
27 changes: 19 additions & 8 deletions pkg/daemon/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"path/filepath"
"reflect"
goruntime "runtime"
"slices"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -1013,13 +1014,20 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi
logSystem("Starting update from %s to %s: %+v", oldConfigName, newConfigName, diff)

diffFileSet := ctrlcommon.CalculateConfigFileDiffs(&oldIgnConfig, &newIgnConfig)
diffUnitSet := ctrlcommon.CalculateConfigUnitDiffs(&oldIgnConfig, &newIgnConfig)
// Get the added and updated units
unitDiff := ctrlcommon.GetChangedConfigUnitsByType(&oldIgnConfig, &newIgnConfig)
addedOrChangedUnits := slices.Concat(unitDiff.Added, unitDiff.Updated)
// Get the names of all units changed in some way (added, removed, or updated)
var allChangedUnitNames []string
for _, unit := range append(addedOrChangedUnits, unitDiff.Removed...) {
allChangedUnitNames = append(allChangedUnitNames, unit.Name)
}

var nodeDisruptionActions []opv1.NodeDisruptionPolicyStatusAction
var actions []string
// Node Disruption Policies cannot be used during firstboot as API is not accessible.
if !firstBoot {
nodeDisruptionActions, err = dn.calculatePostConfigChangeNodeDisruptionAction(diff, diffFileSet, diffUnitSet)
nodeDisruptionActions, err = dn.calculatePostConfigChangeNodeDisruptionAction(diff, diffFileSet, allChangedUnitNames)
} else {
actions, err = calculatePostConfigChangeAction(diff, diffFileSet)
klog.Infof("Skipping node disruption polciies as node is executing first boot.")
Expand Down Expand Up @@ -1125,13 +1133,13 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi
}

// update files on disk that need updating
if err := dn.updateFiles(oldIgnConfig, newIgnConfig, skipCertificateWrite); err != nil {
if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, skipCertificateWrite); err != nil {
return err
}

defer func() {
if retErr != nil {
if err := dn.updateFiles(newIgnConfig, oldIgnConfig, skipCertificateWrite); err != nil {
if err := dn.updateFiles(newIgnConfig, oldIgnConfig, addedOrChangedUnits, skipCertificateWrite); err != nil {
errs := kubeErrs.NewAggregate([]error{err, retErr})
retErr = fmt.Errorf("error rolling back files writes: %w", errs)
return
Expand Down Expand Up @@ -1266,15 +1274,18 @@ func (dn *Daemon) updateHypershift(oldConfig, newConfig *mcfgv1.MachineConfig, d
return fmt.Errorf("parsing new Ignition config failed: %w", err)
}

unitDiff := ctrlcommon.GetChangedConfigUnitsByType(&oldIgnConfig, &newIgnConfig)
addedOrChangedUnits := slices.Concat(unitDiff.Added, unitDiff.Updated)

// update files on disk that need updating
// We should't skip the certificate write in HyperShift since it does not run the extra daemon process
if err := dn.updateFiles(oldIgnConfig, newIgnConfig, false); err != nil {
if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, false); err != nil {
return err
}

defer func() {
if retErr != nil {
if err := dn.updateFiles(newIgnConfig, oldIgnConfig, false); err != nil {
if err := dn.updateFiles(newIgnConfig, oldIgnConfig, addedOrChangedUnits, false); err != nil {
errs := kubeErrs.NewAggregate([]error{err, retErr})
retErr = fmt.Errorf("error rolling back files writes: %w", errs)
return
Expand Down Expand Up @@ -1783,12 +1794,12 @@ func (dn *CoreOSDaemon) getKernelPackagesForRelease() releaseKernelPackages {
// whatever has been written is picked up by the appropriate daemons, if
// required. in particular, a daemon-reload and restart for any unit files
// touched.
func (dn *Daemon) updateFiles(oldIgnConfig, newIgnConfig ign3types.Config, skipCertificateWrite bool) error {
func (dn *Daemon) updateFiles(oldIgnConfig, newIgnConfig ign3types.Config, addedOrChangedUnits []ign3types.Unit, skipCertificateWrite bool) error {
klog.Info("Updating files")
if err := dn.writeFiles(newIgnConfig.Storage.Files, skipCertificateWrite); err != nil {
return err
}
if err := dn.writeUnits(newIgnConfig.Systemd.Units); err != nil {
if err := dn.writeUnits(addedOrChangedUnits); err != nil {
return err
}
return dn.deleteStaleData(oldIgnConfig, newIgnConfig)
Expand Down
32 changes: 15 additions & 17 deletions test/extended/machineconfignode.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,10 @@ func checkMCNConditionStatus(mcn *mcfgv1.MachineConfigNode, conditionType mcfgv1
return conditionStatus == status
}

// `waitForMCNConditionStatus` waits up to a specified timeout for the desired MCN
// condition to match the desired status (ex. wait until "Updated" is "False")
// `WaitForMCNConditionStatus` waits up to a specified timeout for the desired MCN condition to
// match the desired status (ex. wait until "Updated" is "False"). If the desired condition is
// "Unknown," the function will also return true if the condition is "True," which ensures that we
// do not fail when an update progresses quickly through the intermediary "Unknown" phase.
func waitForMCNConditionStatus(machineConfigClient *machineconfigclient.Clientset, mcnName string, conditionType mcfgv1.StateProgress, status metav1.ConditionStatus,
timeout time.Duration, interval time.Duration) (bool, error) {

Expand All @@ -162,6 +164,12 @@ func waitForMCNConditionStatus(machineConfigClient *machineconfigclient.Clientse

// Check if the MCN status is as desired
conditionMet = checkMCNConditionStatus(workerNodeMCN, conditionType, status)
// If the condition was not met and we are expecting it may have transitioned quickly
// trough the "Unknown" phase, check if the condition has flipped to `True`.
if !conditionMet && status == metav1.ConditionUnknown {
conditionMet = checkMCNConditionStatus(workerNodeMCN, conditionType, metav1.ConditionTrue)
logger.Infof("MCN '%v' %v condition was %v, missed transition through %v.", mcnName, conditionType, metav1.ConditionTrue, status)
}
return conditionMet, nil
}); err != nil {
logger.Infof("The desired MCN condition was never met: %v", err)
Expand Down Expand Up @@ -211,9 +219,7 @@ func ValidateTransitionThroughConditions(machineConfigClient *machineconfigclien
logger.Infof("Waiting for UpdateExecuted=Unknown")
conditionMet, err = waitForMCNConditionStatus(machineConfigClient, updatingNodeName, mcfgv1.MachineConfigNodeUpdateExecuted, metav1.ConditionUnknown, 30*time.Second, 1*time.Second)
o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occurred while waiting for UpdateExecuted=Unknown: %v", err))
if !conditionMet {
logger.Infof("Warning, could not detect UpdateExecuted=Unknown.")
}
o.Expect(conditionMet).To(o.BeTrue(), "Error, could not detect UpdateExecuted=Unknown.")

// On standard, non-rebootless, update, check that node transitions through "Cordoned" and "Drained" phases
if !isRebootless {
Expand All @@ -225,9 +231,7 @@ func ValidateTransitionThroughConditions(machineConfigClient *machineconfigclien
logger.Infof("Waiting for Drained=Unknown")
conditionMet, err = waitForMCNConditionStatus(machineConfigClient, updatingNodeName, mcfgv1.MachineConfigNodeUpdateDrained, metav1.ConditionUnknown, 15*time.Second, 1*time.Second)
o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occurred while waiting for Drained=Unknown: %v", err))
if !conditionMet {
logger.Infof("Warning, could not detect Drained=Unknown.")
}
o.Expect(conditionMet).To(o.BeTrue(), "Error, could not detect Drained=Unknown.")

logger.Infof("Waiting for Drained=True")
conditionMet, err = waitForMCNConditionStatus(machineConfigClient, updatingNodeName, mcfgv1.MachineConfigNodeUpdateDrained, metav1.ConditionTrue, 4*time.Minute, 1*time.Second)
Expand All @@ -238,18 +242,14 @@ func ValidateTransitionThroughConditions(machineConfigClient *machineconfigclien
logger.Infof("Waiting for AppliedFilesAndOS=Unknown")
conditionMet, err = waitForMCNConditionStatus(machineConfigClient, updatingNodeName, mcfgv1.MachineConfigNodeUpdateFilesAndOS, metav1.ConditionUnknown, 30*time.Second, 1*time.Second)
o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occurred while waiting for AppliedFilesAndOS=Unknown: %v", err))
if !conditionMet {
logger.Infof("Warning, could not detect AppliedFilesAndOS=Unknown.")
}
o.Expect(conditionMet).To(o.BeTrue(), "Error, could not detect AppliedFilesAndOS=Unknown.")

// On image mode update, check that node transitions through the "ImagePulledFromRegistry" phase
if isImageMode {
logger.Infof("Waiting for ImagePulledFromRegistry=Unknown")
conditionMet, err = waitForMCNConditionStatus(machineConfigClient, updatingNodeName, mcfgv1.MachineConfigNodeImagePulledFromRegistry, metav1.ConditionUnknown, 30*time.Second, 1*time.Second)
o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occurred while waiting for ImagePulledFromRegistry=Unknown: %v", err))
if !conditionMet {
logger.Infof("Warning, could not detect ImagePulledFromRegistry=Unknown.")
}
o.Expect(conditionMet).To(o.BeTrue(), "Error, could not detect ImagePulledFromRegistry=Unknown.")
}

logger.Infof("Waiting for AppliedFilesAndOS=True")
Expand Down Expand Up @@ -280,9 +280,7 @@ func ValidateTransitionThroughConditions(machineConfigClient *machineconfigclien
logger.Infof("Waiting for RebootedNode=Unknown")
conditionMet, err = waitForMCNConditionStatus(machineConfigClient, updatingNodeName, mcfgv1.MachineConfigNodeUpdateRebooted, metav1.ConditionUnknown, 15*time.Second, 1*time.Second)
o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occurred while waiting for RebootedNode=Unknown: %v", err))
if !conditionMet {
logger.Infof("Warning, could not detect RebootedNode=Unknown.")
}
o.Expect(conditionMet).To(o.BeTrue(), "Error, could not detect RebootedNode=Unknown.")

logger.Infof("Waiting for RebootedNode=True")
conditionMet, err = waitForMCNConditionStatus(machineConfigClient, updatingNodeName, mcfgv1.MachineConfigNodeUpdateRebooted, metav1.ConditionTrue, 10*time.Minute, 1*time.Second)
Expand Down