# Create Exostellar's Karpenter (xkarpenter) namespace
# 
# Note: This is to let Terraform handle the namespace's lifecycle.
resource "kubernetes_namespace" "xkarpenter_namespace" {
  metadata {
    name = var.namespace
  }
}

# Create IRSA for Exostellar's Karpenter (xkarpenter).
#
# This role will be assumed by the Karpenter service account in the EKS cluster  via IRSA (IAM Roles for Service
# Accounts). The assume role trust policy allows the EKS OIDC provider to exchange the pod's service account token for
# IAM creds.
resource "aws_iam_role" "karpenter_role" {
  # Ensure Karpenter namespace exists before creating the IAM role.
  depends_on = [kubernetes_namespace.xkarpenter_namespace]

  # Name of the Karpenter IAM role (customized with prefix and cluster name).
  name = "${var.aws_resource_prefix}${var.eks_cluster}-karpenter-role"

  # Trust policy: defines who can assume this role. In this case, it's the Karpenter service account.
  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Effect = "Allow"

        # Trust the federated identity provider (EKS OIDC provider ARN).
        Principal = {
          Federated = var.eks_cluster_oidc_provider_arn
        }

        # Service account tokens will be exchanged for IAM credentials.
        #
        # Allow pods to use their Kubernetes service account token (OIDC web identity) to call AWS STS and assume this
        # IAM role. This enables IRSA: service account tokens are exchanged for short-lived AWS credentials instead of
        # using static keys or node IAM roles.
        Action = "sts:AssumeRoleWithWebIdentity"

        # Restrict assumption to ONLY the Karpenter service account in the given namespace.
        Condition = {
          StringEquals = {
            # Example:
            # oidc.eks.<region>.amazonaws.com/id/<OIDC_ID>:sub = system:serviceaccount:<namespace>:xkarpenter-exo
            "${replace(
              var.eks_cluster_oidc_issuer,
              "https://",
              ""
            )}:sub" = "system:serviceaccount:${var.namespace}:xkarpenter-exo"
          }
        }
      }
    ]
  })
}

# Attach Karpenter policy to IAM role.
resource "aws_iam_role_policy" "karpenter_policy" {
  name   = "${var.aws_resource_prefix}${var.eks_cluster}-karpenter-policy"
  role   = aws_iam_role.karpenter_role.id
  policy = file("${path.module}/policy/karpenter-policy.json")
}

# Create IRSA for Exostellar's xnode-controller.
#
# This role will be assumed by the exo-node-controller service account in the EKS cluster via IRSA (IAM Roles for
# Service Account). The assume role trust policy allows the EKS OIDC provider to exchange the pod's service account
# token for IAM creds.
resource "aws_iam_role" "xnode_controller_role" {
  # Ensure Karpenter namespace exists before creating the IAM role.
  depends_on = [kubernetes_namespace.xkarpenter_namespace]

  # Name of the exo-node-controller IAM role (customized with prefix and cluster name).
  name = "${var.aws_resource_prefix}${var.eks_cluster}-xnode-controller-role"

  # Trust policy: defines who can assume this role. In this case, it's the exo-node-controller service account.
  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Effect = "Allow"

        # Trust the federated identity provider (EKS OIDC provider ARN).
        Principal = {
          Federated = var.eks_cluster_oidc_provider_arn
        }

        # Service account tokens will be exchanged for IAM credentials.
        #
        # Allow pods to use their Kubernetes service account token (OIDC web identity) to call AWS STS and assume this
        # IAM role. This enables IRSA: service account tokens are exchanged for short-lived AWS credentials instead of
        # using static keys or node IAM roles.
        Action = "sts:AssumeRoleWithWebIdentity"

        # Restrict assumption to ONLY the exo-node-controller service account in the given namespace.
        Condition = {
          StringEquals = {
            # Example:
            # oidc.eks.<region>.amazonaws.com/id/<OIDC_ID>:sub = system:serviceaccount:<namespace>:exo-node-controller
            "${replace(
              var.eks_cluster_oidc_issuer,
              "https://",
              ""
            )}:sub" = "system:serviceaccount:${var.namespace}:exo-node-controller"
          }
        }
      }
    ]
  })
}

# Attach exo-node-controller policy to the IAM role.
resource "aws_iam_role_policy" "xnode_controller_role_policy" {
  name   = "${var.aws_resource_prefix}${var.eks_cluster}-xnode-controller-policy"
  role   = aws_iam_role.xnode_controller_role.id
  policy = file("${path.module}/policy/exo-node-controller-policy.json")
}

# Deploy the Exostellar's Karpenter (xkarpenter)'s Helm chart.
resource "helm_release" "xkarpenter" {
  depends_on = [kubernetes_namespace.xkarpenter_namespace]

  name      = "xkarpenter"
  chart     = var.xkarpenter_helm_chart_repository
  version   = var.xkarpenter_version
  namespace = var.namespace
  # Don't let the Helm chart attempt to create the xkarpenter's namespace, as it is handled by
  # kubernetes_namespace.xkarpenter_namespace now.
  create_namespace = false

  set = [
    {
      name  = "settings.clusterName"
      value = var.eks_cluster
    },
    {
      name  = "controller.resources.requests.cpu"
      value = var.pod_resources.requests.cpu
    },
    {
      name  = "controller.resources.requests.memory"
      value = var.pod_resources.requests.memory
    },
    {
      name  = "controller.resources.limits.cpu"
      value = var.pod_resources.limits.cpu
    },
    {
      name  = "controller.resources.limits.memory"
      value = var.pod_resources.limits.memory
    },
    {
      name  = "headnode"
      value = "http://${var.exostellar_management_server_private_ip}:5000"
    },
    {
      name  = "defaultControllerRole"
      value = var.xspot_controller_instance_profile_arn
    },
    {
      name  = "defaultWorkerRole"
      value = var.xspot_worker_instance_profile_arn
    },
    {
      # Set the xkarpenter role's ARN.
      # 
      # When this is set, the xkarpenter Helm chart annotates the service account "xkarpenter-exo" with the IAM role ARN
      # so that Karpenter's pods can assume the role and make API calls to AWS.
      # 
      # Annotation syntax:
      # 
      #   eks.amazonaws.com/role-arn=arn:aws:iam::<aws-account-id>:role/<prefix><cluster-name>-karpenter-role
      name  = "exokarpenterRole"
      value = aws_iam_role.karpenter_role.arn
    },
    {
      # Set the exo-node-controller role's ARN.
      # 
      # When this is set, the xkarpenter Helm chart annotates the service account "exo-node-controller" with the IAM
      # role ARN so that exo-node-controller's pods can assume the role and make API calls to AWS.
      # 
      # Annotation syntax:
      # 
      #   eks.amazonaws.com/role-arn=arn:aws:iam::<aws-account-id>:role/<prefix><cluster-name>-xnode-controller-role
      name  = "xnodeController.roleARN"
      value = aws_iam_role.xnode_controller_role.arn
    },
    {
      name  = "region"
      value = var.region
    }
  ]

  # Increase timeout from 300s to 600s as there might be a delay in pulling the image(s) and Helm chart from ECR,
  # Kubernetes API in EKS cluster, etc.
  timeout = 600
}

# Deploy the Exostellar's Karpenter (xkarpenter) resources (default ExoNodeClass and ExoNodePool) Helm chart.
resource "helm_release" "xkarpenter_resources" {
  depends_on = [helm_release.xkarpenter]

  name      = "xkarpenter-resources"
  chart     = var.xkarpenter_resources_helm_chart_repository
  version   = var.xkarpenter_version
  namespace = var.namespace
  # Don't let the Helm chart attempt to create the xkarpenter's namespace, as it is handled by
  # kubernetes_namespace.xkarpenter_namespace now.
  create_namespace = false

  # Don't install CRDs as they are handled by helm_release.xkarpenter block, if missing.
  skip_crds = true

  # Values to pass to the Helm chart as YAML.
  values = [
    yamlencode({
      # Create default ExoNodeClass.
      # Note: The default ExoNodeClass and ExoNodePool will only be created if both `defaultNodeClass.name` and
      # `defaultNodePool.name` are set.
      defaultNodeClass = {
        # Name of the default ExoNodeClass with name "default-class".
        name = "default-class"

        # X-compute configuration for the default ExoNodeClass.
        xcompute = {
          # Kubernetes node image snapshot name. Eg.: "k8s-132".
          # Note: This should match the Kubernetes version of the current EKS cluster.
          nodeImageName = var.k8s_node_image_name

          # Xspot controller and worker version. Eg.: "xspot-3.4.0".
          version = var.xspot_version

          # Enable/disable ballooning for xspot. Default is true.
          enableBalloon = var.xspot_enable_balloon

          # Enable/disable hyperthreading for xspot. Default is true.
          enableHyperthreading = var.xspot_enable_hyperthreading

          # Guest kernel version. Default is "5.15.185".
          kernelVersion = var.guest_kernel_version
        }

        # Selectors to match security groups for the default ExoNodeClass, to attach to the x-compute nodes. Either tags
        # or the security group IDs can be used. Here, the security group IDs are directly used.
        securityGroupSelectorTerms = [
          for sg_id in var.xcompute_node_security_groups : {
            id = sg_id
          }
        ]

        # List of private subnet IDs for the default ExoNodeClass to launch x-compute nodes in.
        subnetSelectorTerms = [
          for subnet_id in var.eks_cluster_private_subnet_ids : {
            id = subnet_id
          }
        ]
      }

      # Create default ExoNodePool.
      # Note: The default ExoNodeClass and ExoNodePool will only be created if both `defaultNodeClass.name` and
      # `defaultNodePool.name` are set.
      defaultNodePool = {
        # Name of the default ExoNodePool with name "default-pool".
        name = "default-pool"

        # Enable Infrastructure Optimizer (IO) for xspot? Defaults to true.
        infrastructureOptimizer = var.enable_infrastructure_optimizer

        # Enable Workload Optimizer (WO) for xspot? Defaults to true.
        workloadOptimizer = var.enable_workload_optimizer

        # Requirements for the worker node to be launched in this default ExoNodePool.
        requirements = [
          {
            key      = "node.kubernetes.io/instance-type"
            operator = "In"
            values   = ["m5.xlarge", "m5.8xlarge"]
          }
        ]
      }
    })
  ]

  # Increase timeout from 300s to 600s as there might be a delay in pulling the image(s) and Helm chart from ECR,
  # Kubernetes API in EKS cluster, etc.
  timeout = 600
}
