Skip to content

Add support for compute-cluster #1003

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions examples/workers/vars-workers-compute-clusters.tfvars
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Copyright (c) 2023 Oracle Corporation and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl

worker_compute_clusters = {
"shared" = {
placement_ad = 1
}
}

worker_pools = {
oke-bm-rdma = {
description = "Self-managed nodes in a Compute Cluster with RDMA networking"
mode = "compute-cluster",
compute_cluster = "shared"
placement_ad = "1"
instance_ids = ["1", "2", "3"],
shape = "BM.HPC2.36",
boot_volume_size = 50,
},

oke-bm-gpu-rdma = {
description = "Self-managed GPU nodes in a Compute Cluster with RDMA networking"
mode = "cluster-network",
compute_cluster = "shared"
placement_ad = "1",
instance_ids = ["1", "2"],
shape = "BM.GPU4.8",
image_id = "ocid1.image..."
image_type = "custom"
boot_volume_size = 50,
}
}
5 changes: 4 additions & 1 deletion module-workers.tf
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ module "workers" {
cluster_type = var.cluster_type
kubernetes_version = var.kubernetes_version

# Compute clusters
compute_clusters = var.worker_compute_clusters

# Worker pools
worker_pool_mode = var.worker_pool_mode
worker_pool_size = var.worker_pool_size
Expand Down Expand Up @@ -103,4 +106,4 @@ output "worker_pool_ids" {
output "worker_pool_ips" {
description = "Created worker instance private IPs by pool for available modes ('node-pool', 'instance')."
value = local.worker_count_expected > 0 ? try(one(module.workers[*].worker_pool_ips), null) : null
}
}
181 changes: 181 additions & 0 deletions modules/workers/computecluster.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
# Copyright (c) 2022, 2025 Oracle Corporation and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl

# Create the shared compute clusters defined in workers_compute_clusters

resource "oci_core_compute_cluster" "shared" {
# Create an OCI Compute Cluster resource for each enabled entry of the worker_pools map with that mode.
for_each = var.compute_clusters
compartment_id = lookup(each.value, "compartment_id", var.compartment_id)
display_name = each.key
defined_tags = merge(
var.defined_tags,
lookup(each.value, "defined_tags", {})
)
freeform_tags = merge(
var.freeform_tags,
lookup(each.value, "freeform_tags", {})
)

availability_domain = lookup(var.ad_numbers_to_names, lookup(each.value, "placement_ad", 1))

lifecycle {
ignore_changes = [
display_name, defined_tags, freeform_tags,
]
}
}

# Dynamic resource block for Compute Cluster groups defined in worker_pools
resource "oci_core_compute_cluster" "workers" {
# Create an OCI Compute Cluster resource for each enabled entry of the worker_pools map with that mode.
for_each = { for k, v in local.enabled_compute_clusters : k => v if length(lookup(v, "instance_ids", [])) > 0 && lookup(v, "compute_cluster", null) == null }
compartment_id = each.value.compartment_id
display_name = each.key
defined_tags = each.value.defined_tags
freeform_tags = each.value.freeform_tags
availability_domain = lookup(each.value, "placement_ad", null) != null ? lookup(var.ad_numbers_to_names, lookup(each.value, "placement_ad")) : element(each.value.availability_domains, 0)

lifecycle {
ignore_changes = [
display_name, defined_tags, freeform_tags,
]
}
}

resource "oci_core_instance" "compute_cluster_workers" {
for_each = local.compute_cluster_instance_map

availability_domain = (lookup(oci_core_compute_cluster.shared, lookup(each.value, "compute_cluster", ""), null) != null ?
oci_core_compute_cluster.shared[lookup(each.value, "compute_cluster", "")].availability_domain :
lookup(each.value, "placement_ad", null) != null ? lookup(var.ad_numbers_to_names, lookup(each.value, "placement_ad")) : element(each.value.availability_domains, 0)
)
fault_domain = try(each.value.placement_fds[0], null)
compartment_id = each.value.compartment_id
display_name = format("%s-%s", element(split("###", each.key), 0), element(split("###", each.key), 1))
preserve_boot_volume = false
shape = each.value.shape

defined_tags = each.value.defined_tags
freeform_tags = each.value.freeform_tags
extended_metadata = each.value.extended_metadata
capacity_reservation_id = each.value.capacity_reservation_id
compute_cluster_id = (lookup(oci_core_compute_cluster.shared, lookup(each.value, "compute_cluster", ""), null) != null ?
oci_core_compute_cluster.shared[lookup(each.value, "compute_cluster", "")].id :
(lookup(oci_core_compute_cluster.workers, element(split("###", each.key), 0), null) != null ?
oci_core_compute_cluster.workers[element(split("###", each.key), 0)].id :
lookup(each.value, "compute_cluster", "")
)
)

dynamic "platform_config" {
for_each = each.value.platform_config != null ? [1] : []
content {
type = lookup(
# Attempt lookup against data source for the associated 'type' of configured worker shape
lookup(local.platform_config_by_shape, each.value.shape, {}), "type",
# Fall back to 'type' on pool with custom platform_config, or INTEL_VM default
lookup(each.value.platform_config, "type", "INTEL_VM")
)
# Remaining parameters as configured, validated by instance/instance config resource
are_virtual_instructions_enabled = lookup(each.value.platform_config, "are_virtual_instructions_enabled", null)
is_access_control_service_enabled = lookup(each.value.platform_config, "is_access_control_service_enabled", null)
is_input_output_memory_management_unit_enabled = lookup(each.value.platform_config, "is_input_output_memory_management_unit_enabled", null)
is_measured_boot_enabled = lookup(each.value.platform_config, "is_measured_boot_enabled", null)
is_memory_encryption_enabled = lookup(each.value.platform_config, "is_memory_encryption_enabled", null)
is_secure_boot_enabled = lookup(each.value.platform_config, "is_secure_boot_enabled", null)
is_symmetric_multi_threading_enabled = lookup(each.value.platform_config, "is_symmetric_multi_threading_enabled", null)
is_trusted_platform_module_enabled = lookup(each.value.platform_config, "is_trusted_platform_module_enabled", null)
numa_nodes_per_socket = lookup(each.value.platform_config, "numa_nodes_per_socket", null)
percentage_of_cores_enabled = lookup(each.value.platform_config, "percentage_of_cores_enabled", null)
}
}

agent_config {
are_all_plugins_disabled = each.value.agent_config.are_all_plugins_disabled
is_management_disabled = each.value.agent_config.is_management_disabled
is_monitoring_disabled = each.value.agent_config.is_monitoring_disabled
dynamic "plugins_config" {
for_each = merge(
{
"Compute HPC RDMA Authentication" : "ENABLED",
"Compute HPC RDMA Auto-Configuration" : "ENABLED"
},
each.value.agent_config.plugins_config
)
content {
name = plugins_config.key
desired_state = plugins_config.value
}
}
}

create_vnic_details {
assign_private_dns_record = var.assign_dns
assign_public_ip = each.value.assign_public_ip
nsg_ids = each.value.nsg_ids
subnet_id = each.value.subnet_id
defined_tags = each.value.defined_tags
freeform_tags = each.value.freeform_tags
}

instance_options {
are_legacy_imds_endpoints_disabled = false
}

metadata = merge(
{
apiserver_host = var.apiserver_private_host
cluster_ca_cert = var.cluster_ca_cert
oke-k8version = var.kubernetes_version
oke-kubeproxy-proxy-mode = var.kubeproxy_mode
oke-tenancy-id = var.tenancy_id
oke-initial-node-labels = join(",", [for k, v in each.value.node_labels : format("%v=%v", k, v)])
secondary_vnics = jsonencode(lookup(each.value, "secondary_vnics", {}))
ssh_authorized_keys = var.ssh_public_key
user_data = lookup(lookup(data.cloudinit_config.workers, element(split("###", each.key), 0), {}), "rendered", "")
},

# Add labels required for NPN CNI.
var.cni_type == "npn" ? {
oke-native-pod-networking = true
oke-max-pods = var.max_pods_per_node
pod-subnets = coalesce(var.pod_subnet_id, var.worker_subnet_id, "none")
pod-nsgids = join(",", each.value.pod_nsg_ids)
} : {},

# Only provide cluster DNS service address if set explicitly; determined automatically in practice.
coalesce(var.cluster_dns, "none") == "none" ? {} : { kubedns_svc_ip = var.cluster_dns },

# Extra user-defined fields merged last
var.node_metadata, # global
lookup(each.value, "node_metadata", {}), # pool-specific
)

source_details {
boot_volume_size_in_gbs = each.value.boot_volume_size
boot_volume_vpus_per_gb = each.value.boot_volume_vpus_per_gb
source_id = each.value.image_id
source_type = "image"
}

lifecycle {
precondition {
condition = coalesce(each.value.image_id, "none") != "none"
error_message = <<-EOT
Missing image_id; check provided value if image_type is 'custom', or image_os/image_os_version if image_type is 'oke' or 'platform'.
pool: ${element(split("###", each.key), 0)}
image_type: ${coalesce(each.value.image_type, "none")}
image_id: ${coalesce(each.value.image_id, "none")}
EOT
}

ignore_changes = [
agent_config, # TODO Not updateable; remove when supported
defined_tags, freeform_tags, display_name,
metadata["cluster_ca_cert"], metadata["user_data"],
create_vnic_details[0].defined_tags,
create_vnic_details[0].freeform_tags,
]
}
}
10 changes: 7 additions & 3 deletions modules/workers/instance.tf
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,15 @@ resource "oci_core_instance" "workers" {
secondary_vnics = jsonencode(lookup(each.value, "secondary_vnics", {}))
ssh_authorized_keys = var.ssh_public_key
user_data = lookup(lookup(data.cloudinit_config.workers, lookup(each.value, "key", ""), {}), "rendered", "")
oke-native-pod-networking = var.cni_type == "npn" ? true : false
},

# Add labels required for NPN CNI.
var.cni_type == "npn" ? {
oke-native-pod-networking = true
oke-max-pods = var.max_pods_per_node
pod-subnets = coalesce(var.pod_subnet_id, var.worker_subnet_id, "none")
pod-nsgids = var.cni_type == "npn" ? join(",", each.value.pod_nsg_ids) : null
},
pod-nsgids = join(",", each.value.pod_nsg_ids)
} : {},

# Only provide cluster DNS service address if set explicitly; determined automatically in practice.
coalesce(var.cluster_dns, "none") == "none" ? {} : { kubedns_svc_ip = var.cluster_dns },
Expand Down
10 changes: 7 additions & 3 deletions modules/workers/instanceconfig.tf
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,15 @@ resource "oci_core_instance_configuration" "workers" {
secondary_vnics = jsonencode(lookup(each.value, "secondary_vnics", {}))
ssh_authorized_keys = var.ssh_public_key
user_data = lookup(lookup(data.cloudinit_config.workers, each.key, {}), "rendered", "")
oke-native-pod-networking = var.cni_type == "npn" ? true : false
},

# Add labels required for NPN CNI.
var.cni_type == "npn" ? {
oke-native-pod-networking = true
oke-max-pods = var.max_pods_per_node
pod-subnets = coalesce(var.pod_subnet_id, var.worker_subnet_id, "none")
pod-nsgids = var.cni_type == "npn" ? join(",", each.value.pod_nsg_ids) : null
},
pod-nsgids = join(",", each.value.pod_nsg_ids)
} : {},

# Only provide cluster DNS service address if set explicitly; determined automatically in practice.
coalesce(var.cluster_dns, "none") == "none" ? {} : { kubedns_svc_ip = var.cluster_dns },
Expand Down
40 changes: 25 additions & 15 deletions modules/workers/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -104,20 +104,20 @@ locals {

# Use provided image_id for 'custom' type, or first match for all shape + OS criteria
image_id = (
pool.image_type == "custom" ?
pool.image_id :
element(split("###", element(reverse(sort([for entry in tolist(setintersection([
pool.image_type == "oke" ?
setintersection(
lookup(var.image_ids, "oke", null),
lookup(var.image_ids, trimprefix(lower(pool.kubernetes_version), "v"), null)
) :
lookup(var.image_ids, "platform", null),
lookup(var.image_ids, pool.image_type, null),
length(regexall("GPU", pool.shape)) > 0 ? var.image_ids.gpu : var.image_ids.nongpu,
length(regexall("A[12]\\.", pool.shape)) > 0 ? var.image_ids.aarch64 : var.image_ids.x86_64,
lookup(var.image_ids, format("%v %v", pool.os, split(".", pool.os_version)[0]), null),
]...)): "${var.indexed_images[entry].sort_key}###${entry}"])), 0)), 1)
pool.image_type == "custom" ?
pool.image_id :
element(split("###", element(reverse(sort([for entry in tolist(setintersection([
pool.image_type == "oke" ?
setintersection(
lookup(var.image_ids, "oke", null),
lookup(var.image_ids, trimprefix(lower(pool.kubernetes_version), "v"), null)
) :
lookup(var.image_ids, "platform", null),
lookup(var.image_ids, pool.image_type, null),
length(regexall("GPU", pool.shape)) > 0 ? var.image_ids.gpu : var.image_ids.nongpu,
length(regexall("A[12]\\.", pool.shape)) > 0 ? var.image_ids.aarch64 : var.image_ids.x86_64,
lookup(var.image_ids, format("%v %v", pool.os, split(".", pool.os_version)[0]), null),
]...)): "${var.indexed_images[entry].sort_key}###${entry}"])), 0)), 1)
)

# Standard tags as defined if enabled for use
Expand Down Expand Up @@ -224,6 +224,16 @@ locals {
for k, v in local.enabled_worker_pools : k => v if lookup(v, "mode", "") == "cluster-network"
}

# Enabled worker_pool map entries for compute clusters
enabled_compute_clusters = {
for k, v in local.enabled_worker_pools : k => v if lookup(v, "mode", "") == "compute-cluster"
}

# Prepare a map workers node enabled for compute_clusters { "pool_id###worker_id" => pool_values }
compute_cluster_instance_ids_map = { for k, v in local.enabled_compute_clusters : k => toset(lookup(v, "instance_ids", [])) }
compute_cluster_instance_ids = toset(concat(flatten([for k, v in local.compute_cluster_instance_ids_map : [for id in v : format("%s###%s", k, id)]])))
compute_cluster_instance_map = { for id in local.compute_cluster_instance_ids : id => lookup(local.enabled_compute_clusters, element(split("###", id), 0), {}) }

# Sanitized worker_pools output; some conditionally-used defaults would be misleading
worker_pools_final = {
for pool_name, pool in local.enabled_worker_pools : pool_name => { for a, b in pool : a => b
Expand Down Expand Up @@ -270,4 +280,4 @@ locals {

# Yields {<pool name> = {<instance id> = <instance ip>}} for modes: 'node-pool', 'instance'
worker_pool_ips = merge(local.worker_instance_ips, local.worker_nodepool_ips)
}
}
10 changes: 10 additions & 0 deletions modules/workers/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -318,3 +318,13 @@ variable "agent_config" {
plugins_config = map(string),
})
}

#
# Workers: compute-cluster
#

variable "compute_clusters" {
default = {}
description = "Whether to create compute clusters shared by nodes across multiple worker pools enabled for 'compute-cluster'."
type = map(any)
}
2 changes: 1 addition & 1 deletion modules/workers/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ terraform {

oci = {
source = "oracle/oci"
version = ">= 4.119.0"
version = ">= 6.37.0"
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions variables-workers.tf
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@ variable "worker_pool_size" {
type = number
}

#
# Workers: Compute clusters
#

variable "worker_compute_clusters" {
default = {}
description = "Whether to create compute clusters shared by nodes across multiple worker pools enabled for 'compute-cluster'."
type = map(any)
}

#
# Workers: network
#
Expand Down