-
Notifications
You must be signed in to change notification settings - Fork 54
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update existing module and support creating eks and vpc modules out o…
…f the box
- Loading branch information
1 parent
ee7093c
commit 299a1dd
Showing
48 changed files
with
1,231 additions
and
429 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,31 @@ | ||
.terraform | ||
# Local .terraform directories | ||
**/.terraform/* | ||
|
||
# .tfstate files | ||
*.tfstate | ||
*.tfstate.* | ||
|
||
# Crash log files | ||
crash.log | ||
|
||
# Ignore override files as they are usually used to override resources locally and should not be committed | ||
override.tf | ||
override.tf.json | ||
*_override.tf | ||
*_override.tf.json | ||
|
||
# Ignore CLI configuration files | ||
.terraformrc | ||
terraform.rc | ||
|
||
# Ignore sensitive variable files | ||
*.tfvars | ||
*.tfvars.json | ||
|
||
# Ignore plan output files | ||
*.tfplan | ||
|
||
# Ignore lock files | ||
.terraform.lock.hcl | ||
terraform.tfstate | ||
terraform.tfstate.backup | ||
|
||
*.metaflow* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
moved { | ||
from = module.metaflow-metadata-service | ||
to = module.metaflow-metadata-service[0] | ||
} | ||
|
||
module "metaflow-metadata-service" { | ||
source = "./modules/metadata-service" | ||
|
||
count = var.create_managed_metaflow_metadata_service ? 1 : 0 | ||
|
||
resource_prefix = local.resource_prefix | ||
resource_suffix = local.resource_suffix | ||
|
||
access_list_cidr_blocks = var.access_list_cidr_blocks | ||
database_name = local.database_name | ||
database_password = local.database_password | ||
database_username = local.database_username | ||
db_migrate_lambda_zip_file = var.db_migrate_lambda_zip_file | ||
datastore_s3_bucket_kms_key_arn = local.datastore_s3_bucket_kms_key_arn | ||
enable_api_basic_auth = var.metadata_service_enable_api_basic_auth | ||
enable_api_gateway = var.metadata_service_enable_api_gateway | ||
fargate_execution_role_arn = module.metaflow-computation[0].ecs_execution_role_arn | ||
iam_partition = var.iam_partition | ||
metadata_service_container_image = local.metadata_service_container_image | ||
metaflow_vpc_id = local.vpc_id | ||
rds_master_instance_endpoint = local.rds_master_instance_endpoint | ||
s3_bucket_arn = local.s3_bucket_arn | ||
subnet_ids = local.subnet_ids | ||
vpc_cidr_blocks = local.vpc_cidr_block | ||
with_public_ip = local.with_public_ip | ||
|
||
standard_tags = var.tags | ||
} | ||
|
||
module "metaflow-ui" { | ||
source = "./modules/ui" | ||
count = var.create_managed_metaflow_ui ? 1 : 0 | ||
|
||
resource_prefix = local.resource_prefix | ||
resource_suffix = local.resource_suffix | ||
|
||
database_name = local.database_name | ||
database_password = local.database_password | ||
database_username = local.database_username | ||
datastore_s3_bucket_kms_key_arn = local.datastore_s3_bucket_kms_key_arn | ||
fargate_execution_role_arn = module.metaflow-computation[0].ecs_execution_role_arn | ||
iam_partition = var.iam_partition | ||
metaflow_vpc_id = local.vpc_id | ||
rds_master_instance_endpoint = local.rds_master_instance_endpoint | ||
s3_bucket_arn = local.s3_bucket_arn | ||
subnet_ids = local.subnet_ids | ||
alb_subnet_ids = local.alb_subnet_ids | ||
ui_backend_container_image = local.metadata_service_container_image | ||
ui_static_container_image = var.ui_static_container_image | ||
alb_internal = !var.metaflow_ui_is_public | ||
ui_allow_list = var.ui_allow_list | ||
|
||
METAFLOW_DATASTORE_SYSROOT_S3 = local.METAFLOW_DATASTORE_SYSROOT_S3 | ||
certificate_arn = var.ui_certificate_arn | ||
metadata_service_security_group_id = module.metaflow-metadata-service[0].metadata_service_security_group_id | ||
|
||
extra_ui_static_env_vars = var.extra_ui_static_env_vars | ||
extra_ui_backend_env_vars = var.extra_ui_backend_env_vars | ||
standard_tags = var.tags | ||
} | ||
|
||
moved { | ||
from = module.metaflow-computation | ||
to = module.metaflow-computation[0] | ||
} | ||
|
||
module "metaflow-computation" { | ||
source = "./modules/computation" | ||
count = var.create_managed_compute ? 1 : 0 | ||
|
||
resource_prefix = local.resource_prefix | ||
resource_suffix = local.resource_suffix | ||
|
||
batch_type = var.batch_type | ||
compute_environment_desired_vcpus = var.compute_environment_desired_vcpus | ||
compute_environment_instance_types = var.compute_environment_instance_types | ||
compute_environment_max_vcpus = var.compute_environment_max_vcpus | ||
compute_environment_min_vcpus = var.compute_environment_min_vcpus | ||
compute_environment_egress_cidr_blocks = var.compute_environment_egress_cidr_blocks | ||
iam_partition = var.iam_partition | ||
metaflow_vpc_id = local.vpc_id | ||
subnet_ids = local.subnet_ids | ||
launch_template_http_endpoint = var.launch_template_http_endpoint | ||
launch_template_http_tokens = var.launch_template_http_tokens | ||
launch_template_http_put_response_hop_limit = var.launch_template_http_put_response_hop_limit | ||
|
||
standard_tags = var.tags | ||
} | ||
|
||
moved { | ||
from = module.metaflow-step-function | ||
to = module.metaflow-step-function[0] | ||
} | ||
|
||
module "metaflow-step-functions" { | ||
source = "./modules/step-functions" | ||
count = var.create_step_functions ? 1 : 0 | ||
|
||
resource_prefix = local.resource_prefix | ||
resource_suffix = local.resource_suffix | ||
batch_job_queue_arn = module.metaflow-computation[0].METAFLOW_BATCH_JOB_QUEUE | ||
iam_partition = var.iam_partition | ||
s3_bucket_arn = module.metaflow-datastore[0].s3_bucket_arn | ||
s3_bucket_kms_arn = module.metaflow-datastore[0].datastore_s3_bucket_kms_key_arn | ||
|
||
standard_tags = var.tags | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
module "eks" { | ||
source = "terraform-aws-modules/eks/aws" | ||
version = "20.31.6" | ||
count = var.create_eks_cluster ? 1 : 0 | ||
|
||
cluster_version = "1.31" # Specify the desired EKS version | ||
cluster_name = local.eks_name | ||
vpc_id = local.vpc_id | ||
subnet_ids = local.subnet_ids | ||
enable_irsa = true | ||
eks_managed_node_group_defaults = merge({ | ||
ami_type = "AL2023_x86_64_STANDARD" | ||
disk_size = 50 | ||
}, var.node_group_defaults) | ||
|
||
eks_managed_node_groups = merge({ | ||
metaflow_default = { | ||
desired_capacity = 2 | ||
max_size = 2 | ||
min_size = 1 | ||
instance_type = "m5.large" | ||
} }, var.node_groups) | ||
|
||
|
||
cluster_endpoint_public_access = true | ||
cluster_endpoint_private_access = true | ||
|
||
iam_role_additional_policies = length(var.node_group_iam_role_additional_policies) > 0 ? var.node_group_iam_role_additional_policies : { | ||
"default_node" = aws_iam_policy.default_node[0].arn, | ||
"autoscaler" = aws_iam_policy.cluster_autoscaler[0].arn, | ||
# Allow SSM access to the machines incase direct access is needed | ||
"ssm" = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore", | ||
} | ||
|
||
tags = var.tags | ||
} | ||
|
||
resource "aws_iam_policy" "default_node" { | ||
count = var.create_eks_cluster && length(var.node_group_iam_role_additional_policies) == 0 ? 1 : 0 | ||
|
||
name_prefix = "${local.resource_prefix}-default-node-policy${local.resource_suffix}" | ||
description = "Default policy for cluster ${local.resource_prefix}-eks${local.resource_suffix}" | ||
policy = data.aws_iam_policy_document.default_node.json | ||
} | ||
|
||
data "aws_iam_policy_document" "default_node" { | ||
statement { | ||
sid = "S3" | ||
effect = "Allow" | ||
|
||
actions = [ | ||
"s3:*", | ||
"kms:*", | ||
] | ||
|
||
resources = ["*"] | ||
} | ||
} | ||
|
||
data "aws_iam_role" "current_role" { | ||
name = element(split("/", data.aws_caller_identity.current.arn), 1) | ||
} | ||
|
||
resource "aws_eks_access_entry" "provider_cluster_admin" { | ||
count = var.create_eks_cluster ? 1 : 0 | ||
|
||
cluster_name = module.eks[0].cluster_name | ||
principal_arn = data.aws_iam_role.current_role.arn | ||
type = "STANDARD" | ||
} | ||
|
||
resource "aws_eks_access_policy_association" "provider_cluster_admin" { | ||
count = var.create_eks_cluster ? 1 : 0 | ||
|
||
depends_on = [aws_eks_access_entry.provider_cluster_admin] | ||
cluster_name = module.eks[0].cluster_name | ||
policy_arn = "arn:aws:eks::aws:cluster-access-policy/AmazonEKSClusterAdminPolicy" | ||
principal_arn = data.aws_iam_role.current_role.arn | ||
|
||
access_scope { | ||
type = "cluster" | ||
} | ||
} | ||
|
||
resource "aws_iam_policy" "cluster_autoscaler" { | ||
count = var.create_eks_cluster && length(var.node_group_iam_role_additional_policies) == 0 ? 1 : 0 | ||
|
||
name_prefix = "${local.resource_prefix}-cluster-autoscaler${local.resource_suffix}" | ||
description = "EKS cluster-autoscaler policy for cluster ${local.eks_name}" | ||
policy = data.aws_iam_policy_document.cluster_autoscaler[0].json | ||
} | ||
|
||
data "aws_iam_policy_document" "cluster_autoscaler" { | ||
count = var.create_eks_cluster ? 1 : 0 | ||
statement { | ||
sid = "clusterAutoscalerAll" | ||
effect = "Allow" | ||
|
||
actions = [ | ||
"autoscaling:DescribeAutoScalingGroups", | ||
"autoscaling:DescribeAutoScalingInstances", | ||
"autoscaling:DescribeLaunchConfigurations", | ||
"autoscaling:DescribeTags", | ||
"ec2:DescribeLaunchTemplateVersions", | ||
] | ||
|
||
resources = ["*"] | ||
} | ||
|
||
statement { | ||
sid = "clusterAutoscalerOwn" | ||
effect = "Allow" | ||
|
||
actions = [ | ||
"autoscaling:SetDesiredCapacity", | ||
"autoscaling:TerminateInstanceInAutoScalingGroup", | ||
"autoscaling:UpdateAutoScalingGroup", | ||
] | ||
|
||
resources = ["*"] | ||
|
||
condition { | ||
test = "StringEquals" | ||
variable = "autoscaling:ResourceTag/kubernetes.io/cluster/${local.eks_name}" | ||
values = ["owned"] | ||
} | ||
|
||
condition { | ||
test = "StringEquals" | ||
variable = "autoscaling:ResourceTag/k8s.io/cluster-autoscaler/enabled" | ||
values = ["true"] | ||
} | ||
} | ||
} | ||
|
||
data "aws_eks_cluster" "cluster" { | ||
count = var.create_eks_cluster ? 1 : 0 | ||
name = module.eks[0].cluster_name | ||
} | ||
|
||
data "aws_eks_cluster_auth" "cluster" { | ||
count = var.create_eks_cluster ? 1 : 0 | ||
name = module.eks[0].cluster_name | ||
} | ||
|
||
module "metaflow_helm" { | ||
source = "./modules/services" | ||
|
||
kubernetes_cluster_host = var.create_eks_cluster ? data.aws_eks_cluster.cluster[0].endpoint : "" | ||
kubernetes_cluster_ca_certificate = var.create_eks_cluster ? data.aws_eks_cluster.cluster[0].certificate_authority.0.data : "" | ||
kubernetes_token = var.create_eks_cluster ? data.aws_eks_cluster_auth.cluster[0].token : "" | ||
|
||
resource_name_prefix = local.resource_prefix | ||
deploy_metaflow_service = var.deploy_metaflow_services_in_eks | ||
metaflow_helm_values = var.metaflow_helm_values | ||
cluster_name = var.create_eks_cluster ? module.eks[0].cluster_name : "" | ||
region = data.aws_region.current.name | ||
deploy_cluster_autoscaler = var.deploy_cluster_autoscaler | ||
cluster_oidc_provider = var.create_eks_cluster ? module.eks[0].oidc_provider : "" | ||
account_id = data.aws_caller_identity.current.account_id | ||
|
||
metaflow_database = { | ||
database_name = local.database_name | ||
host = element(split(":", local.rds_master_instance_endpoint), 0) | ||
user = local.database_username | ||
password = local.database_password | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
############################################################################### | ||
# An example using this module to set up a minimal deployment Metaflow | ||
# with AWS Batch support, without the UI. | ||
############################################################################### | ||
|
||
terraform { | ||
required_version = ">= 1.10" | ||
|
||
required_providers { | ||
aws = ">= 5.82" | ||
random = ">= 3.6" | ||
} | ||
} | ||
|
||
provider "aws" { | ||
region = "us-west-2" # make sure to set the region to the one you want to deploy to | ||
} | ||
|
||
|
||
module "metaflow" { | ||
source = "../../" | ||
|
||
create_vpc = true | ||
|
||
tags = { | ||
"managedBy" = "terraform" | ||
} | ||
} | ||
|
||
# The module will generate a Metaflow config in JSON format, write it to a file | ||
resource "local_file" "metaflow_config" { | ||
content = module.metaflow.metaflow_aws_managed_profile_json | ||
filename = "./metaflow_profile.json" | ||
} |
Oops, something went wrong.