Skip to content

Commit 34fc6dc

Browse files
authored
Observatory deployment (#952)
This PR is already deployed, and it uses images built from #923 that Pasha did for the backend and frontend parts. [Asana Task](https://app.asana.com/1/1209016784099267/project/1210348820405981/task/1210609656255216)
1 parent aa5da33 commit 34fc6dc

20 files changed

Lines changed: 505 additions & 17 deletions

File tree

devops/charts/README.md

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,24 @@ Install necessary tools with `./devops/macos/setup_machine.py --devops`.
88

99
## Updating charts
1010

11-
Edit some charts and/or `helmfile.yaml`.
11+
### Helmfile-powered charts
1212

13-
Then run `helmfile apply`.
13+
Most of the infrastructure is described by `helmfile.yaml`.
14+
15+
This file describes several Helm charts:
16+
17+
- core services such as `ingress-nginx` and `cert-manager`
18+
- `skypilot` chart (which is our fork of the upstream SkyPilot helm chart)
19+
20+
How to update infra charts:
21+
22+
1. Edit some charts and/or `helmfile.yaml`.
23+
2. Then run `helmfile apply`.
1424

1525
Refer to [helmfile documentation](https://helmfile.readthedocs.io/en/latest/) for more details.
26+
27+
### Observatory
28+
29+
Observatory charts (`./observatory` and `./observatory-backend`) are deployed by CI/CD pipelines powered by GitHub Actions.
30+
31+
Image tag values are updated on each build, so we can't describe these charts with all up-to-date values statically in `helmfile.yaml`.

devops/charts/helmfile.yaml

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@ repositories:
33
url: https://kubernetes-sigs.github.io/external-dns/
44
- name: cert-manager
55
url: https://charts.jetstack.io
6+
- name: ingress-nginx
7+
url: https://kubernetes.github.io/ingress-nginx
8+
- name: metrics-server
9+
url: https://kubernetes-sigs.github.io/metrics-server/
610

711
releases:
812
########## Core infrastructure ##########
@@ -26,6 +30,46 @@ releases:
2630
values:
2731
- installCRDs: true
2832

33+
- name: ingress-nginx
34+
chart: ingress-nginx/ingress-nginx
35+
version: 4.11.3
36+
namespace: ingress-nginx
37+
values:
38+
# The initial version of these values is borrowed from skypilot chart.
39+
# Comments below come from skypilot chart too. (Probably could be simplified since we're committed to AWS.)
40+
# Since we use ingress-nginx for more than just skypilot, we install it separately.
41+
- controller:
42+
service:
43+
type: LoadBalancer
44+
# Default annotations for the ingress controller service. We want an L4 loadbalancer by default for maximum compatibility,
45+
# especially for websocket SSH tunneling. Different cloud providers may require different annotations.
46+
# Annotations with no side effects are aggregated below to simplify the usage.
47+
annotations:
48+
# For AWS service reconciled by cloud-controller-manager, use NLB by default.
49+
# If you are using AWS Load Balancer Controller, refer to the following doc to configure annotations:
50+
# https://kubernetes-sigs.github.io/aws-load-balancer-controller/latest/guide/service/annotations/
51+
service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
52+
# For GKE, use backend service-based external passthrough Network Load Balancer as per best practices.
53+
# Ref: https://cloud.google.com/kubernetes-engine/docs/concepts/service-load-balancer#load_balancer_types
54+
cloud.google.com/l4-rbs: "enabled"
55+
# For Azure, override the healthz check protocol to TCP probe to avoid HTTP auth issues.
56+
service.beta.kubernetes.io/port_443_health-probe_protocol: "TCP"
57+
service.beta.kubernetes.io/port_80_health-probe_protocol: "TCP"
58+
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
59+
config:
60+
# necessary for observatory, we strip headers in observatory-api ingress
61+
allow-snippet-annotations: true
62+
http-snippet: |
63+
map $http_upgrade $connection_upgrade {
64+
default upgrade;
65+
'' close;
66+
}
67+
68+
- name: metrics-server
69+
chart: metrics-server/metrics-server
70+
version: 3.12.2
71+
namespace: metrics-server
72+
2973
########## Skypilot ##########
3074
- name: skypilot
3175
chart: ./skypilot
@@ -46,10 +90,7 @@ releases:
4690
enabled: true
4791
host: skypilot-api.softmax-research.net
4892
ingress-nginx:
49-
controller:
50-
service:
51-
annotations:
52-
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
93+
enabled: false
5394
lambdaAiCredentials:
5495
enabled: true
5596
# created by terraform
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
apiVersion: v2
2+
name: metta-observatory-backend
3+
version: "0.1.0"
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: {{ .Release.Name }}
5+
spec:
6+
selector:
7+
matchLabels:
8+
app: {{ .Release.Name }}
9+
template:
10+
metadata:
11+
labels:
12+
app: {{ .Release.Name }}
13+
spec:
14+
imagePullSecrets:
15+
- name: dockerconfig
16+
containers:
17+
- name: server
18+
image: "{{ .Values.image.registry }}/{{ .Values.image.name }}:{{ required "tag is required" .Values.image.tag }}"
19+
imagePullPolicy: Always
20+
envFrom:
21+
- secretRef:
22+
name: {{ .Values.secret_name }}
23+
resources:
24+
requests:
25+
memory: "1Gi"
26+
limits:
27+
memory: "4Gi"
28+
ports:
29+
- containerPort: 8000
30+
livenessProbe:
31+
httpGet:
32+
path: /whoami
33+
port: 8000
34+
timeoutSeconds: 30
35+
failureThreshold: 3
36+
readinessProbe:
37+
httpGet:
38+
path: /whoami
39+
port: 8000
40+
timeoutSeconds: 3
41+
periodSeconds: 1
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
apiVersion: networking.k8s.io/v1
2+
kind: Ingress
3+
metadata:
4+
name: {{ .Release.Name }}
5+
annotations:
6+
cert-manager.io/cluster-issuer: {{ .Values.cert_manager_issuer | required "cert_manager_issuer is required" }}
7+
nginx.ingress.kubernetes.io/configuration-snippet: |
8+
proxy_set_header X-Auth-Request-Email "";
9+
proxy_set_header X-Auth-Request-User "";
10+
11+
spec:
12+
ingressClassName: nginx
13+
rules:
14+
- host: "{{ .Values.host | required "host is required" }}"
15+
http:
16+
paths:
17+
- path: /
18+
pathType: Prefix
19+
backend:
20+
service:
21+
name: {{ .Release.Name }}
22+
port:
23+
number: 8000
24+
tls:
25+
- hosts:
26+
- {{ .Values.host | required "host is required" }}
27+
secretName: {{ .Release.Name }}-tls
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
apiVersion: v1
2+
kind: Service
3+
metadata:
4+
name: {{ .Release.Name }}
5+
spec:
6+
selector:
7+
app: {{ .Release.Name }}
8+
ports:
9+
- port: 8000
10+
targetPort: 8000
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# This host is used by scripts that authenticate with API tokens.
2+
# Backend server will be mounted under /
3+
#
4+
# Note that another chart, `observatory`, creates another endpoint for browser access, `https://observatory.softmax-research.net/api`.
5+
# That endpoint is protected by oauth2-proxy.
6+
host: api.observatory.softmax-research.net
7+
8+
image:
9+
registry: 751442549699.dkr.ecr.us-east-1.amazonaws.com
10+
name: metta-app-backend
11+
tag: "" # will be set by CI/CD pipeline
12+
13+
# created by terraform
14+
secret_name: observatory-backend-env
15+
16+
cert_manager_issuer: letsencrypt
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
apiVersion: v2
2+
name: metta-observatory
3+
version: "0.1.0"
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: {{ .Release.Name }}
5+
spec:
6+
selector:
7+
matchLabels:
8+
app: {{ .Release.Name }}
9+
template:
10+
metadata:
11+
labels:
12+
app: {{ .Release.Name }}
13+
spec:
14+
imagePullSecrets:
15+
- name: dockerconfig
16+
containers:
17+
- name: server
18+
image: "{{ .Values.image.registry }}/{{ .Values.image.name }}:{{ required "tag is required" .Values.image.tag }}"
19+
imagePullPolicy: Always
20+
resources:
21+
requests:
22+
memory: "128Mi"
23+
limits:
24+
memory: "1Gi"
25+
ports:
26+
- containerPort: 80
27+
livenessProbe:
28+
httpGet:
29+
path: /
30+
port: 80
31+
readinessProbe:
32+
httpGet:
33+
path: /
34+
port: 80
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
apiVersion: networking.k8s.io/v1
2+
kind: Ingress
3+
metadata:
4+
name: {{ .Release.Name }}-oauth2-proxy
5+
annotations:
6+
cert-manager.io/cluster-issuer: {{ .Values.cert_manager_issuer | required "cert_manager_issuer is required" }}
7+
spec:
8+
ingressClassName: nginx
9+
rules:
10+
- host: "{{ .Values.host | required "host is required" }}"
11+
http:
12+
paths:
13+
- path: /oauth2
14+
pathType: Prefix
15+
backend:
16+
service:
17+
name: "{{ .Release.Name }}-oauth2-proxy"
18+
port:
19+
number: 4180
20+
tls:
21+
- hosts:
22+
- {{ .Values.host | required "host is required" }}
23+
secretName: {{ .Release.Name }}-tls
24+
---
25+
26+
apiVersion: networking.k8s.io/v1
27+
kind: Ingress
28+
metadata:
29+
name: {{ .Release.Name }}
30+
annotations:
31+
nginx.ingress.kubernetes.io/auth-url: "https://$host/oauth2/auth"
32+
nginx.ingress.kubernetes.io/auth-signin: "https://$host/oauth2/start?rd=$escaped_request_uri"
33+
nginx.ingress.kubernetes.io/auth-response-headers: |
34+
X-Auth-Request-User, X-Auth-Request-Email
35+
cert-manager.io/cluster-issuer: {{ .Values.cert_manager_issuer | required "cert_manager_issuer is required" }}
36+
# needed for /api rewrites
37+
nginx.ingress.kubernetes.io/use-regex: "true"
38+
nginx.ingress.kubernetes.io/rewrite-target: "/$2"
39+
40+
spec:
41+
ingressClassName: nginx
42+
rules:
43+
- host: "{{ .Values.host | required "host is required" }}"
44+
http:
45+
paths:
46+
# /api –> backend, drop the prefix
47+
- path: /api(/|$)(.*)
48+
pathType: ImplementationSpecific
49+
backend:
50+
service:
51+
name: "{{ .Release.Name }}-backend"
52+
port:
53+
number: 8000
54+
# / –> frontend
55+
- path: /()(.*)
56+
pathType: ImplementationSpecific
57+
backend:
58+
service:
59+
name: {{ .Release.Name }}
60+
port:
61+
number: 80
62+
tls:
63+
- hosts:
64+
- {{ .Values.host | required "host is required" }}
65+
secretName: {{ .Release.Name }}-tls

0 commit comments

Comments
 (0)