Skip to content

Commit

Permalink
Update existing module and support creating eks and vpc modules out o…
Browse files Browse the repository at this point in the history
…f the box
  • Loading branch information
josephsirak committed Jan 2, 2025
1 parent ee7093c commit 299a1dd
Show file tree
Hide file tree
Showing 48 changed files with 1,231 additions and 429 deletions.
33 changes: 30 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,31 @@
.terraform
# Local .terraform directories
**/.terraform/*

# .tfstate files
*.tfstate
*.tfstate.*

# Crash log files
crash.log

# Ignore override files as they are usually used to override resources locally and should not be committed
override.tf
override.tf.json
*_override.tf
*_override.tf.json

# Ignore CLI configuration files
.terraformrc
terraform.rc

# Ignore sensitive variable files
*.tfvars
*.tfvars.json

# Ignore plan output files
*.tfplan

# Ignore lock files
.terraform.lock.hcl
terraform.tfstate
terraform.tfstate.backup

*.metaflow*
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
rev: v5.0.0
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/antonbabenko/pre-commit-terraform
rev: v1.62.0 # Get the latest from: https://github.com/antonbabenko/pre-commit-terraform/releases
rev: v1.96.3
hooks:
- id: terraform_fmt
- repo: https://github.com/terraform-docs/terraform-docs
rev: "v0.15.0"
rev: "v0.18.0"
hooks:
- id: terraform-docs-go
name: "Main terraform module docs"
Expand Down
56 changes: 41 additions & 15 deletions README.md

Large diffs are not rendered by default.

112 changes: 112 additions & 0 deletions aws_managed.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
moved {
from = module.metaflow-metadata-service
to = module.metaflow-metadata-service[0]
}

module "metaflow-metadata-service" {
source = "./modules/metadata-service"

count = var.create_managed_metaflow_metadata_service ? 1 : 0

resource_prefix = local.resource_prefix
resource_suffix = local.resource_suffix

access_list_cidr_blocks = var.access_list_cidr_blocks
database_name = local.database_name
database_password = local.database_password
database_username = local.database_username
db_migrate_lambda_zip_file = var.db_migrate_lambda_zip_file
datastore_s3_bucket_kms_key_arn = local.datastore_s3_bucket_kms_key_arn
enable_api_basic_auth = var.metadata_service_enable_api_basic_auth
enable_api_gateway = var.metadata_service_enable_api_gateway
fargate_execution_role_arn = module.metaflow-computation[0].ecs_execution_role_arn
iam_partition = var.iam_partition
metadata_service_container_image = local.metadata_service_container_image
metaflow_vpc_id = local.vpc_id
rds_master_instance_endpoint = local.rds_master_instance_endpoint
s3_bucket_arn = local.s3_bucket_arn
subnet_ids = local.subnet_ids
vpc_cidr_blocks = local.vpc_cidr_block
with_public_ip = local.with_public_ip

standard_tags = var.tags
}

module "metaflow-ui" {
source = "./modules/ui"
count = var.create_managed_metaflow_ui ? 1 : 0

resource_prefix = local.resource_prefix
resource_suffix = local.resource_suffix

database_name = local.database_name
database_password = local.database_password
database_username = local.database_username
datastore_s3_bucket_kms_key_arn = local.datastore_s3_bucket_kms_key_arn
fargate_execution_role_arn = module.metaflow-computation[0].ecs_execution_role_arn
iam_partition = var.iam_partition
metaflow_vpc_id = local.vpc_id
rds_master_instance_endpoint = local.rds_master_instance_endpoint
s3_bucket_arn = local.s3_bucket_arn
subnet_ids = local.subnet_ids
alb_subnet_ids = local.alb_subnet_ids
ui_backend_container_image = local.metadata_service_container_image
ui_static_container_image = var.ui_static_container_image
alb_internal = !var.metaflow_ui_is_public
ui_allow_list = var.ui_allow_list

METAFLOW_DATASTORE_SYSROOT_S3 = local.METAFLOW_DATASTORE_SYSROOT_S3
certificate_arn = var.ui_certificate_arn
metadata_service_security_group_id = module.metaflow-metadata-service[0].metadata_service_security_group_id

extra_ui_static_env_vars = var.extra_ui_static_env_vars
extra_ui_backend_env_vars = var.extra_ui_backend_env_vars
standard_tags = var.tags
}

moved {
from = module.metaflow-computation
to = module.metaflow-computation[0]
}

module "metaflow-computation" {
source = "./modules/computation"
count = var.create_managed_compute ? 1 : 0

resource_prefix = local.resource_prefix
resource_suffix = local.resource_suffix

batch_type = var.batch_type
compute_environment_desired_vcpus = var.compute_environment_desired_vcpus
compute_environment_instance_types = var.compute_environment_instance_types
compute_environment_max_vcpus = var.compute_environment_max_vcpus
compute_environment_min_vcpus = var.compute_environment_min_vcpus
compute_environment_egress_cidr_blocks = var.compute_environment_egress_cidr_blocks
iam_partition = var.iam_partition
metaflow_vpc_id = local.vpc_id
subnet_ids = local.subnet_ids
launch_template_http_endpoint = var.launch_template_http_endpoint
launch_template_http_tokens = var.launch_template_http_tokens
launch_template_http_put_response_hop_limit = var.launch_template_http_put_response_hop_limit

standard_tags = var.tags
}

moved {
from = module.metaflow-step-function
to = module.metaflow-step-function[0]
}

module "metaflow-step-functions" {
source = "./modules/step-functions"
count = var.create_step_functions ? 1 : 0

resource_prefix = local.resource_prefix
resource_suffix = local.resource_suffix
batch_job_queue_arn = module.metaflow-computation[0].METAFLOW_BATCH_JOB_QUEUE
iam_partition = var.iam_partition
s3_bucket_arn = module.metaflow-datastore[0].s3_bucket_arn
s3_bucket_kms_arn = module.metaflow-datastore[0].datastore_s3_bucket_kms_key_arn

standard_tags = var.tags
}
3 changes: 0 additions & 3 deletions data.tf

This file was deleted.

7 changes: 0 additions & 7 deletions ecr.tf

This file was deleted.

168 changes: 168 additions & 0 deletions eks.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "20.31.6"
count = var.create_eks_cluster ? 1 : 0

cluster_version = "1.31" # Specify the desired EKS version
cluster_name = local.eks_name
vpc_id = local.vpc_id
subnet_ids = local.subnet_ids
enable_irsa = true
eks_managed_node_group_defaults = merge({
ami_type = "AL2023_x86_64_STANDARD"
disk_size = 50
}, var.node_group_defaults)

eks_managed_node_groups = merge({
metaflow_default = {
desired_capacity = 2
max_size = 2
min_size = 1
instance_type = "m5.large"
} }, var.node_groups)


cluster_endpoint_public_access = true
cluster_endpoint_private_access = true

iam_role_additional_policies = length(var.node_group_iam_role_additional_policies) > 0 ? var.node_group_iam_role_additional_policies : {
"default_node" = aws_iam_policy.default_node[0].arn,
"autoscaler" = aws_iam_policy.cluster_autoscaler[0].arn,
# Allow SSM access to the machines incase direct access is needed
"ssm" = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore",
}

tags = var.tags
}

resource "aws_iam_policy" "default_node" {
count = var.create_eks_cluster && length(var.node_group_iam_role_additional_policies) == 0 ? 1 : 0

name_prefix = "${local.resource_prefix}-default-node-policy${local.resource_suffix}"
description = "Default policy for cluster ${local.resource_prefix}-eks${local.resource_suffix}"
policy = data.aws_iam_policy_document.default_node.json
}

data "aws_iam_policy_document" "default_node" {
statement {
sid = "S3"
effect = "Allow"

actions = [
"s3:*",
"kms:*",
]

resources = ["*"]
}
}

data "aws_iam_role" "current_role" {
name = element(split("/", data.aws_caller_identity.current.arn), 1)
}

resource "aws_eks_access_entry" "provider_cluster_admin" {
count = var.create_eks_cluster ? 1 : 0

cluster_name = module.eks[0].cluster_name
principal_arn = data.aws_iam_role.current_role.arn
type = "STANDARD"
}

resource "aws_eks_access_policy_association" "provider_cluster_admin" {
count = var.create_eks_cluster ? 1 : 0

depends_on = [aws_eks_access_entry.provider_cluster_admin]
cluster_name = module.eks[0].cluster_name
policy_arn = "arn:aws:eks::aws:cluster-access-policy/AmazonEKSClusterAdminPolicy"
principal_arn = data.aws_iam_role.current_role.arn

access_scope {
type = "cluster"
}
}

resource "aws_iam_policy" "cluster_autoscaler" {
count = var.create_eks_cluster && length(var.node_group_iam_role_additional_policies) == 0 ? 1 : 0

name_prefix = "${local.resource_prefix}-cluster-autoscaler${local.resource_suffix}"
description = "EKS cluster-autoscaler policy for cluster ${local.eks_name}"
policy = data.aws_iam_policy_document.cluster_autoscaler[0].json
}

data "aws_iam_policy_document" "cluster_autoscaler" {
count = var.create_eks_cluster ? 1 : 0
statement {
sid = "clusterAutoscalerAll"
effect = "Allow"

actions = [
"autoscaling:DescribeAutoScalingGroups",
"autoscaling:DescribeAutoScalingInstances",
"autoscaling:DescribeLaunchConfigurations",
"autoscaling:DescribeTags",
"ec2:DescribeLaunchTemplateVersions",
]

resources = ["*"]
}

statement {
sid = "clusterAutoscalerOwn"
effect = "Allow"

actions = [
"autoscaling:SetDesiredCapacity",
"autoscaling:TerminateInstanceInAutoScalingGroup",
"autoscaling:UpdateAutoScalingGroup",
]

resources = ["*"]

condition {
test = "StringEquals"
variable = "autoscaling:ResourceTag/kubernetes.io/cluster/${local.eks_name}"
values = ["owned"]
}

condition {
test = "StringEquals"
variable = "autoscaling:ResourceTag/k8s.io/cluster-autoscaler/enabled"
values = ["true"]
}
}
}

data "aws_eks_cluster" "cluster" {
count = var.create_eks_cluster ? 1 : 0
name = module.eks[0].cluster_name
}

data "aws_eks_cluster_auth" "cluster" {
count = var.create_eks_cluster ? 1 : 0
name = module.eks[0].cluster_name
}

module "metaflow_helm" {
source = "./modules/services"

kubernetes_cluster_host = var.create_eks_cluster ? data.aws_eks_cluster.cluster[0].endpoint : ""
kubernetes_cluster_ca_certificate = var.create_eks_cluster ? data.aws_eks_cluster.cluster[0].certificate_authority.0.data : ""
kubernetes_token = var.create_eks_cluster ? data.aws_eks_cluster_auth.cluster[0].token : ""

resource_name_prefix = local.resource_prefix
deploy_metaflow_service = var.deploy_metaflow_services_in_eks
metaflow_helm_values = var.metaflow_helm_values
cluster_name = var.create_eks_cluster ? module.eks[0].cluster_name : ""
region = data.aws_region.current.name
deploy_cluster_autoscaler = var.deploy_cluster_autoscaler
cluster_oidc_provider = var.create_eks_cluster ? module.eks[0].oidc_provider : ""
account_id = data.aws_caller_identity.current.account_id

metaflow_database = {
database_name = local.database_name
host = element(split(":", local.rds_master_instance_endpoint), 0)
user = local.database_username
password = local.database_password
}
}
34 changes: 34 additions & 0 deletions examples/basic-aws-managed/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
###############################################################################
# An example using this module to set up a minimal deployment Metaflow
# with AWS Batch support, without the UI.
###############################################################################

terraform {
required_version = ">= 1.10"

required_providers {
aws = ">= 5.82"
random = ">= 3.6"
}
}

provider "aws" {
region = "us-west-2" # make sure to set the region to the one you want to deploy to
}


module "metaflow" {
source = "../../"

create_vpc = true

tags = {
"managedBy" = "terraform"
}
}

# The module will generate a Metaflow config in JSON format, write it to a file
resource "local_file" "metaflow_config" {
content = module.metaflow.metaflow_aws_managed_profile_json
filename = "./metaflow_profile.json"
}
Loading

0 comments on commit 299a1dd

Please sign in to comment.