Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions api/v1beta1/nodeset_keys.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,11 @@ func (o *NodeSet) HeadlessServiceKey() types.NamespacedName {
Namespace: o.Namespace,
}
}

func (o *NodeSet) SshHostKeys() types.NamespacedName {
key := o.Key()
return types.NamespacedName{
Name: fmt.Sprintf("%s-ssh-host-keys", key.Name),
Namespace: o.Namespace,
}
}
12 changes: 12 additions & 0 deletions api/v1beta1/nodeset_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ type NodeSetSpec struct {
// +optional
Slurmd ContainerWrapper `json:"slurmd,omitempty"`

// SSH configuration for worker pods.
// +optional
Ssh NodeSetSsh `json:"ssh,omitzero"`

// The logfile sidecar configuration.
// +optional
LogFile ContainerWrapper `json:"logfile,omitzero"`
Expand Down Expand Up @@ -112,6 +116,14 @@ type NodeSetPartition struct {
Config string `json:"config,omitzero"`
}

// NodeSetSsh defines SSH configuration for NodeSet worker pods.
type NodeSetSsh struct {
// Enabled controls whether SSH access is enabled for this NodeSet.
// When enabled, SSH host keys will be created and mounted, and port 22 will be exposed.
// +default:=false
Enabled bool `json:"enabled"`
}

// NodeSetUpdateStrategy indicates the strategy that the NodeSet
// controller will be used to perform updates. It includes any additional
// parameters necessary to perform the update for the indicated strategy.
Expand Down
16 changes: 16 additions & 0 deletions api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions config/crd/bases/slinky.slurm.net_nodesets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,18 @@ spec:
Ref: https://github.com/kubernetes/api/blob/master/core/v1/types.go#L2885
type: object
x-kubernetes-preserve-unknown-fields: true
ssh:
description: SSH configuration for worker pods.
properties:
enabled:
default: false
description: |-
Enabled controls whether SSH access is enabled for this NodeSet.
When enabled, SSH host keys will be created and mounted, and port 22 will be exposed.
type: boolean
required:
- enabled
type: object
taintKubeNodes:
default: false
description: |-
Expand Down
12 changes: 12 additions & 0 deletions helm/slurm-operator-crds/templates/slinky.slurm.net_nodesets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,18 @@ spec:
Ref: https://github.com/kubernetes/api/blob/master/core/v1/types.go#L2885
type: object
x-kubernetes-preserve-unknown-fields: true
ssh:
description: SSH configuration for worker pods.
properties:
enabled:
default: false
description: |-
Enabled controls whether SSH access is enabled for this NodeSet.
When enabled, SSH host keys will be created and mounted, and port 22 will be exposed.
type: boolean
required:
- enabled
type: object
taintKubeNodes:
default: false
description: |-
Expand Down
4 changes: 4 additions & 0 deletions helm/slurm/templates/nodeset/nodeset-cr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ spec:
config: {{ include "slurm.worker.partitionConfig" $nodeset.partition }}
{{- end }}{{- /* if (include "slurm.worker.partitionConfig" $nodeset.partition) */}}
{{- end }}{{- /* with $nodeset.partition */}}
{{- with $nodeset.ssh }}
ssh:
{{- toYaml . | nindent 4 }}
{{- end }}{{- /* with $nodeset.ssh */}}
replicas: {{ $nodeset.replicas }}
slurmd:
{{- $_ := set $nodeset.slurmd "imagePullPolicy" (default $.Values.imagePullPolicy $nodeset.slurmd.imagePullPolicy) -}}
Expand Down
5 changes: 5 additions & 0 deletions helm/slurm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,11 @@ nodesets:
configMap: {}
# State: UP
# MaxTime: UNLIMITED
# SSH configuration for this NodeSet.
# ssh:
# -- Enable SSH access to worker pods with pam_slurm_adopt.
# Ref: https://slurm.schedmd.com/pam_slurm_adopt.html
# enabled: false
# -- Enable propagation of container `resources.limits` into slurmd.
useResourceLimits: true
# Update strategy configuration.
Expand Down
8 changes: 4 additions & 4 deletions internal/builder/login_secret.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,12 @@ func (b *Builder) BuildLoginSshHostKeys(loginset *slinkyv1beta1.LoginSet) (*core
Key: loginset.SshHostKeys(),
Metadata: loginset.Spec.Template.PodMetadata,
Data: map[string][]byte{
sshHostEcdsaKeyFile: keyPairRsa.PrivateKey(),
sshHostEcdsaPubKeyFile: keyPairRsa.PublicKey(),
sshHostEcdsaKeyFile: keyPairEcdsa.PrivateKey(),
sshHostEcdsaPubKeyFile: keyPairEcdsa.PublicKey(),
sshHostEd25519KeyFile: keyPairEd25519.PrivateKey(),
sshHostEd25519PubKeyFile: keyPairEd25519.PublicKey(),
sshHostRsaKeyFile: keyPairEcdsa.PrivateKey(),
sshHostRsaPubKeyFile: keyPairEcdsa.PublicKey(),
sshHostRsaKeyFile: keyPairRsa.PrivateKey(),
sshHostRsaPubKeyFile: keyPairRsa.PublicKey(),
},
Immutable: true,
}
Expand Down
88 changes: 73 additions & 15 deletions internal/builder/worker_app.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

const (
SlurmdPort = 6818
SshPort = 22

slurmdUser = "root"

Expand Down Expand Up @@ -67,7 +68,7 @@ func (b *Builder) BuildWorkerPodTemplate(nodeset *slinkyv1beta1.NodeSet, control
InitContainers: []corev1.Container{
b.logfileContainer(spec.LogFile, slurmdLogFilePath),
},
Volumes: nodesetVolumes(controller),
Volumes: nodesetVolumes(nodeset, controller),
Tolerations: []corev1.Toleration{
slurmtaints.TolerationWorkerNode,
},
Expand All @@ -78,7 +79,7 @@ func (b *Builder) BuildWorkerPodTemplate(nodeset *slinkyv1beta1.NodeSet, control
return b.buildPodTemplate(opts)
}

func nodesetVolumes(controller *slinkyv1beta1.Controller) []corev1.Volume {
func nodesetVolumes(nodeset *slinkyv1beta1.NodeSet, controller *slinkyv1beta1.Controller) []corev1.Volume {
out := []corev1.Volume{
{
Name: slurmEtcVolume,
Expand All @@ -102,23 +103,83 @@ func nodesetVolumes(controller *slinkyv1beta1.Controller) []corev1.Volume {
},
logFileVolume(),
}

// Add SSH host keys volume if SSH is enabled
if nodeset.Spec.Ssh.Enabled {
out = append(out, corev1.Volume{
Name: sshHostKeysVolume,
VolumeSource: corev1.VolumeSource{
Projected: &corev1.ProjectedVolumeSource{
DefaultMode: ptr.To[int32](0o600),
Sources: []corev1.VolumeProjection{
{
Secret: &corev1.SecretProjection{
LocalObjectReference: corev1.LocalObjectReference{
Name: nodeset.SshHostKeys().Name,
},
Items: []corev1.KeyToPath{
{Key: sshHostRsaKeyFile, Path: sshHostRsaKeyFile, Mode: ptr.To[int32](0o600)},
{Key: sshHostRsaPubKeyFile, Path: sshHostRsaPubKeyFile, Mode: ptr.To[int32](0o644)},
{Key: sshHostEd25519KeyFile, Path: sshHostEd25519KeyFile, Mode: ptr.To[int32](0o600)},
{Key: sshHostEd25519PubKeyFile, Path: sshHostEd25519PubKeyFile, Mode: ptr.To[int32](0o644)},
{Key: sshHostEcdsaKeyFile, Path: sshHostEcdsaKeyFile, Mode: ptr.To[int32](0o600)},
{Key: sshHostEcdsaPubKeyFile, Path: sshHostEcdsaPubKeyFile, Mode: ptr.To[int32](0o644)},
},
},
},
},
},
},
})
}

return out
}

func (b *Builder) slurmdContainer(nodeset *slinkyv1beta1.NodeSet, controller *slinkyv1beta1.Controller) corev1.Container {
merge := nodeset.Spec.Slurmd.Container

// Base ports always include slurmd
ports := []corev1.ContainerPort{
{
Name: labels.WorkerApp,
ContainerPort: SlurmdPort,
Protocol: corev1.ProtocolTCP,
},
}

// Add SSH port if enabled
if nodeset.Spec.Ssh.Enabled {
ports = append(ports, corev1.ContainerPort{
Name: "ssh",
ContainerPort: SshPort,
Protocol: corev1.ProtocolTCP,
})
}

// Base volume mounts
volumeMounts := []corev1.VolumeMount{
{Name: slurmEtcVolume, MountPath: slurmEtcDir, ReadOnly: true},
{Name: slurmLogFileVolume, MountPath: slurmLogFileDir},
}

// Add SSH host key mounts if enabled
if nodeset.Spec.Ssh.Enabled {
volumeMounts = append(volumeMounts,
corev1.VolumeMount{Name: sshHostKeysVolume, MountPath: sshHostRsaKeyFilePath, SubPath: sshHostRsaKeyFile, ReadOnly: true},
corev1.VolumeMount{Name: sshHostKeysVolume, MountPath: sshHostRsaKeyPubFilePath, SubPath: sshHostRsaPubKeyFile, ReadOnly: true},
corev1.VolumeMount{Name: sshHostKeysVolume, MountPath: sshHostEd25519KeyFilePath, SubPath: sshHostEd25519KeyFile, ReadOnly: true},
corev1.VolumeMount{Name: sshHostKeysVolume, MountPath: sshHostEd25519PubKeyFilePath, SubPath: sshHostEd25519PubKeyFile, ReadOnly: true},
corev1.VolumeMount{Name: sshHostKeysVolume, MountPath: sshHostEcdsaKeyFilePath, SubPath: sshHostEcdsaKeyFile, ReadOnly: true},
corev1.VolumeMount{Name: sshHostKeysVolume, MountPath: sshHostEcdsaPubKeyFilePath, SubPath: sshHostEcdsaPubKeyFile, ReadOnly: true},
)
}

opts := ContainerOpts{
base: corev1.Container{
Name: labels.WorkerApp,
Args: slurmdArgs(nodeset, controller),
Ports: []corev1.ContainerPort{
{
Name: labels.WorkerApp,
ContainerPort: SlurmdPort,
Protocol: corev1.ProtocolTCP,
},
},
Name: labels.WorkerApp,
Args: slurmdArgs(nodeset, controller),
Ports: ports,
StartupProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Expand Down Expand Up @@ -169,10 +230,7 @@ func (b *Builder) slurmdContainer(nodeset *slinkyv1beta1.NodeSet, controller *sl
},
},
},
VolumeMounts: []corev1.VolumeMount{
{Name: slurmEtcVolume, MountPath: slurmEtcDir, ReadOnly: true},
{Name: slurmLogFileVolume, MountPath: slurmLogFileDir},
},
VolumeMounts: volumeMounts,
},
merge: merge,
}
Expand Down
48 changes: 48 additions & 0 deletions internal/builder/worker_secret.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// SPDX-FileCopyrightText: Copyright (C) SchedMD LLC.
// SPDX-License-Identifier: Apache-2.0

package builder

import (
"fmt"

corev1 "k8s.io/api/core/v1"

slinkyv1beta1 "github.com/SlinkyProject/slurm-operator/api/v1beta1"
"github.com/SlinkyProject/slurm-operator/internal/builder/labels"
"github.com/SlinkyProject/slurm-operator/internal/utils/crypto"
"github.com/SlinkyProject/slurm-operator/internal/utils/structutils"
)

func (b *Builder) BuildWorkerSshHostKeys(nodeset *slinkyv1beta1.NodeSet) (*corev1.Secret, error) {
keyPairRsa, err := crypto.NewKeyPair(crypto.WithType(crypto.KeyPairRsa))
if err != nil {
return nil, fmt.Errorf("failed to create RSA key pair: %w", err)
}
keyPairEd25519, err := crypto.NewKeyPair(crypto.WithType(crypto.KeyPairEd25519))
if err != nil {
return nil, fmt.Errorf("failed to create ED25519 key pair: %w", err)
}
keyPairEcdsa, err := crypto.NewKeyPair(crypto.WithType(crypto.KeyPairEcdsa))
if err != nil {
return nil, fmt.Errorf("failed to create ECDSA key pair: %w", err)
}

opts := SecretOpts{
Key: nodeset.SshHostKeys(),
Metadata: nodeset.Spec.Template.PodMetadata,
Data: map[string][]byte{
sshHostEcdsaKeyFile: keyPairEcdsa.PrivateKey(),
sshHostEcdsaPubKeyFile: keyPairEcdsa.PublicKey(),
sshHostEd25519KeyFile: keyPairEd25519.PrivateKey(),
sshHostEd25519PubKeyFile: keyPairEd25519.PublicKey(),
sshHostRsaKeyFile: keyPairRsa.PrivateKey(),
sshHostRsaPubKeyFile: keyPairRsa.PublicKey(),
},
Immutable: true,
}

opts.Metadata.Labels = structutils.MergeMaps(opts.Metadata.Labels, labels.NewBuilder().WithWorkerLabels(nodeset).Build())

return b.BuildSecret(opts, nodeset)
}
73 changes: 73 additions & 0 deletions internal/builder/worker_secret_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// SPDX-FileCopyrightText: Copyright (C) SchedMD LLC.
// SPDX-License-Identifier: Apache-2.0

package builder

import (
"testing"

slinkyv1beta1 "github.com/SlinkyProject/slurm-operator/api/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
)

func TestBuilder_BuildWorkerSshHostKeys(t *testing.T) {
type fields struct {
client client.Client
}
type args struct {
nodeset *slinkyv1beta1.NodeSet
}
tests := []struct {
name string
fields fields
args args
wantErr bool
}{
{
name: "default",
fields: fields{
client: fake.NewFakeClient(),
},
args: args{
nodeset: &slinkyv1beta1.NodeSet{
ObjectMeta: metav1.ObjectMeta{
Name: "slurm",
},
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
b := New(tt.fields.client)
got, err := b.BuildWorkerSshHostKeys(tt.args.nodeset)
if (err != nil) != tt.wantErr {
t.Errorf("Builder.BuildWorkerSshHostKeys() error = %v, wantErr %v", err, tt.wantErr)
return
}

if err != nil {
return
}

switch {
case got.Data[sshHostEcdsaKeyFile] == nil && got.StringData[sshHostEcdsaKeyFile] == "":
t.Errorf("got.Data[%s] = %v", sshHostEcdsaKeyFile, got.Data[sshHostEcdsaKeyFile])
case got.Data[sshHostEcdsaPubKeyFile] == nil && got.StringData[sshHostEcdsaPubKeyFile] == "":
t.Errorf("got.Data[%s] = %v", sshHostEcdsaPubKeyFile, got.Data[sshHostEcdsaPubKeyFile])

case got.Data[sshHostEd25519KeyFile] == nil && got.StringData[sshHostEd25519KeyFile] == "":
t.Errorf("got.Data[%s] = %v", sshHostEd25519KeyFile, got.Data[sshHostEd25519KeyFile])
case got.Data[sshHostEd25519PubKeyFile] == nil && got.StringData[sshHostEd25519PubKeyFile] == "":
t.Errorf("got.Data[%s] = %v", sshHostEd25519PubKeyFile, got.Data[sshHostEd25519PubKeyFile])

case got.Data[sshHostRsaKeyFile] == nil && got.StringData[sshHostRsaKeyFile] == "":
t.Errorf("got.Data[%s] = %v", sshHostRsaKeyFile, got.Data[sshHostRsaKeyFile])
case got.Data[sshHostRsaPubKeyFile] == nil && got.StringData[sshHostRsaPubKeyFile] == "":
t.Errorf("got.Data[%s] = %v", sshHostRsaPubKeyFile, got.Data[sshHostRsaPubKeyFile])
}
})
}
}
Loading