-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdeploy.env.example
More file actions
75 lines (68 loc) · 4.54 KB
/
Copy pathdeploy.env.example
File metadata and controls
75 lines (68 loc) · 4.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# Operator config for bring-up + provisioning. Copy to deploy.env (gitignored) and edit:
# cp deploy.env.example deploy.env
# deploy.sh sources deploy.env (exported) before everything, so these reach both the
# CloudFormation parameters AND Ansible (when you run with PROVISION=1). For a
# standalone Ansible run (cd ansible && ansible-playbook ...) source it yourself first:
# set -a; . ./deploy.env; set +a
#
# Precedence: every knob uses ${VAR:-default}, so a value exported in your shell (or
# on the command line) WINS over this file; edit the default after :- to change the
# baseline. This file configures bring-up + provisioning; the benchmark RUN itself
# takes only inline env knobs (no config file). Nothing here is committed except this example.
# --- AWS target -------------------------------------------------------------
# AWS_PROFILE = the AWS CLI profile for the account you're deploying into (set it up
# with `aws configure --profile <name>`). Leave blank to use your default profile /
# ambient credentials. deploy.sh prints the resolved account id and asks to confirm
# before doing anything — always check it's the account you intend.
export AWS_PROFILE="${AWS_PROFILE:-}"
REGION="${REGION:-us-east-1}"
STACK="${STACK:-gw-bench}"
NET_STACK="${NET_STACK:-${STACK}-net}"
# --- SSH key pair (EC2) -----------------------------------------------------
# Created if absent. KEY_FILE is the local private key Ansible/SSH use.
KEY_NAME="${KEY_NAME:-llm-bench-key}"
KEY_FILE="${KEY_FILE:-$HOME/.ssh/${KEY_NAME}.pem}"
# --- Network (blank = use/auto-pick the default VPC + a public subnet) -------
VPC_ID="${VPC_ID:-}"
SUBNET_ID="${SUBNET_ID:-}"
# Security group: blank = the stack creates one with exactly the rig's ports. Set to
# an existing sg-... to reuse it (deploy.sh checks it for the required ports and warns
# about any gaps).
SECURITY_GROUP_ID="${SECURITY_GROUP_ID:-}"
# Who may reach SSH (:22). Blank = deploy.sh auto-detects your public IP as /32.
ADMIN_CIDR="${ADMIN_CIDR:-}"
# Who may reach gateway/UI/mock/metrics ports (public so others can reproduce).
ACCESS_CIDR="${ACCESS_CIDR:-0.0.0.0/0}"
# --- Instance sizing --------------------------------------------------------
GATEWAY_TYPE="${GATEWAY_TYPE:-c6i.4xlarge}" # gateway boxes
AUX_TYPE="${AUX_TYPE:-c6i.4xlarge}" # mock + loadtest boxes
VOLUME_GIB="${VOLUME_GIB:-120}"
VOLUME_IOPS="${VOLUME_IOPS:-12000}" # root gp3 IOPS (>= 4x throughput MB/s; region allows up to 80000)
VOLUME_THROUGHPUT="${VOLUME_THROUGHPUT:-1000}" # root gp3 throughput MB/s, applied ONLINE post-launch by
# scripts/set-ebs-throughput.sh (deploy.sh runs it for you) —
# EC2::Instance can't set gp3 Throughput inline. The stress test writes heavily
# (audit BYTEA + NATS jetstream + spill + PG WAL), so the
# volume is sized so it is never the bottleneck. The REAL cap
# is the instance's EBS bandwidth: ~593 MiB/s sustained /
# ~1250 burst on c6i.4xlarge. Need more sustained write than
# that? Bump GATEWAY_TYPE (bigger c6i = more EBS bw, or a
# NVMe-instance-store family) — not this volume knob.
# --- Which boxes to deploy (the matrix) -------------------------------------
DEPLOY_MOCK="${DEPLOY_MOCK:-true}"
DEPLOY_NEXUS="${DEPLOY_NEXUS:-true}"
DEPLOY_BIFROST="${DEPLOY_BIFROST:-true}"
DEPLOY_LOADTEST="${DEPLOY_LOADTEST:-true}"
DEPLOY_LITELLM="${DEPLOY_LITELLM:-false}"
DEPLOY_KONG="${DEPLOY_KONG:-false}"
DEPLOY_PORTKEY="${DEPLOY_PORTKEY:-false}"
DEPLOY_TENSORZERO="${DEPLOY_TENSORZERO:-false}"
# --- Operator SSH access (passwordless login with YOUR key, on every box) ----
# Read by the Ansible `common` role (group_vars/all.yml → BENCH_SSH_PUBLIC_KEYS).
# One or more PUBLIC keys separated by ';'. Empty = no-op (you can still use the
# stack key pair: ssh -i $KEY_FILE ec2-user@<ip>). Example:
# BENCH_SSH_PUBLIC_KEYS="ssh-ed25519 AAAA... me ; ssh-rsa AAAA... laptop"
# (ansible/authorized_keys, one key per line, is an equivalent file-based input.)
export BENCH_SSH_PUBLIC_KEYS="${BENCH_SSH_PUBLIC_KEYS:-}"
# --- Flow -------------------------------------------------------------------
ASSUME_YES="${ASSUME_YES:-0}" # 1 = skip the deploy confirmation prompt
PROVISION="${PROVISION:-0}" # 1 = also run the Ansible install after the boxes are up