-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.tf
More file actions
175 lines (150 loc) · 4.89 KB
/
main.tf
File metadata and controls
175 lines (150 loc) · 4.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# SIE GKE Cluster - Development Example (L4 Spot)
#
# Creates a GKE cluster with GPU nodes. K8s resources (KEDA, Prometheus,
# SIE application) are deployed via Helm after this terraform apply.
#
# See oci://ghcr.io/superlinked/charts/sie-cluster for the Helm chart.
#
# Features:
# - 1x L4 GPU spot pool (scale 0-5)
# - NAP enabled for automatic node provisioning
# - Workload Identity for GCS access
# - Artifact Registry for SIE images
#
# Prerequisites:
# 1. GCP project with billing enabled
# 2. GPU quota (check with: gcloud compute regions describe REGION --format='table(quotas.filter(metric:NVIDIA))')
# 3. APIs enabled: container.googleapis.com, compute.googleapis.com
#
# Usage:
# export TF_VAR_project_id="your-project-id"
# terraform init
# terraform plan
# terraform apply
#
# After apply, deploy K8s resources (batteries-included Helm chart):
# $(terraform output -raw kubectl_command)
# helm upgrade --install sie-cluster deploy/helm/sie-cluster \
# -f values-gke.yaml \
# --create-namespace -n sie \
# --set serviceAccount.annotations."iam\.gke\.io/gcp-service-account"="$(terraform output -raw sie_workload_service_account)"
#
# Cleanup:
# helm uninstall sie-cluster
# terraform destroy
terraform {
required_version = ">= 1.14"
# Uncomment to use GCS backend for state
# backend "gcs" {
# bucket = "your-terraform-state-bucket"
# prefix = "sie/gke"
# }
}
# =============================================================================
# Variables
# =============================================================================
variable "project_id" {
description = "GCP project ID"
type = string
}
variable "region" {
description = "GCP region"
type = string
default = "us-central1"
}
variable "cluster_name" {
description = "GKE cluster name"
type = string
default = "sie-dev"
}
variable "create_artifact_registry" {
description = "Whether to create an Artifact Registry for SIE images"
type = bool
default = true
}
variable "deployer_service_account" {
description = "Email of the service account running Terraform (optional, for CI/CD)"
type = string
default = ""
}
# =============================================================================
# SIE GKE Infra Module
# =============================================================================
module "infra" {
source = "superlinked/sie/google"
version = "0.3.4"
project_id = var.project_id
region = var.region
cluster_name = var.cluster_name
deployer_service_account = var.deployer_service_account
deletion_protection = false # Dev cluster - allow easy cleanup
# Network
create_network = true
network = "sie-network"
subnetwork = "sie-subnet"
# Private cluster with NAT
enable_private_nodes = true
# Node Auto-Provisioning (NAP)
enable_node_auto_provisioning = true
nap_max_cpu = 100
nap_max_memory_gb = 400
# CPU node pool for system workloads
cpu_node_pool = {
machine_type = "e2-standard-4"
min_node_count = 1
max_node_count = 3
}
# GPU node pool - L4 for inference
gpu_node_pools = [
{
name = "l4-spot"
machine_type = "g2-standard-8" # 8 vCPU, 32GB RAM, 1x L4
gpu_type = "nvidia-l4"
gpu_count = 1
min_node_count = 0 # Scale to zero when idle
max_node_count = 5
spot = true # ~60% savings
local_ssd_count = 1 # 375GB local SSD for model cache
zones = ["us-central1-a", "us-central1-b", "us-central1-c"]
taints = [{
key = "nvidia.com/gpu"
value = "present"
effect = "NO_SCHEDULE"
}]
labels = {
"sie.superlinked.com/gpu-type" = "l4"
}
}
]
# Workload Identity for GCS access
enable_workload_identity = true
sie_namespace = "sie"
sie_service_account_name = "sie-server"
# Artifact Registry for SIE images
create_artifact_registry = var.create_artifact_registry
# GKE native logging
enable_cloud_logging = true
labels = {
"environment" = "dev"
"managed-by" = "terraform"
}
}
# =============================================================================
# Outputs
# =============================================================================
output "cluster_name" {
description = "GKE cluster name"
value = module.infra.cluster_name
}
output "kubectl_command" {
description = "Command to configure kubectl"
value = module.infra.kubectl_config_command
}
output "artifact_registry_url" {
description = "Artifact Registry URL for pushing images"
value = module.infra.artifact_registry_url
}
output "workload_identity_annotation" {
description = "Annotation for Kubernetes service accounts (Workload Identity)"
value = module.infra.workload_identity_annotation
}