Skip to content

Commit 05aa355

Browse files
committed
k8s service: connect with overlay IP address instead of pod IP
1 parent 5687c41 commit 05aa355

8 files changed

Lines changed: 133 additions & 25 deletions

File tree

src/deployment/__init__.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
from ._util import deployment_namespace
4747
from .deployment import DeploymentParameters
4848
from .grafana import create_vela_grafana_obj, delete_vela_grafana_obj
49-
from .kubernetes import KubernetesService
49+
from .kubernetes import KubernetesService, get_neon_vm
5050
from .kubernetes._util import custom_api_client
5151
from .settings import get_settings
5252
from .simplyblock_api import create_simplyblock_api
@@ -75,6 +75,8 @@
7575
AUTOSCALER_PVC_SUFFIX = "-block-data"
7676
_LOAD_BALANCER_TIMEOUT_SECONDS = float(600)
7777
_LOAD_BALANCER_POLL_INTERVAL_SECONDS = float(2)
78+
_OVERLAY_IP_TIMEOUT_SECONDS = float(300)
79+
_OVERLAY_IP_POLL_INTERVAL_SECONDS = float(5)
7880
DNSRecordType = Literal["AAAA", "CNAME"]
7981
DATABASE_DNS_RECORD_TYPE: Literal["AAAA"] = "AAAA"
8082

@@ -157,6 +159,68 @@ def branch_service_name(component: str) -> str:
157159
return f"{_release_fullname()}-{component}"
158160

159161

162+
async def _wait_for_autoscaler_overlay_ip(namespace: str, vm_name: str) -> str:
163+
loop = asyncio.get_running_loop()
164+
deadline = loop.time() + _OVERLAY_IP_TIMEOUT_SECONDS
165+
last_error: Exception | None = None
166+
logger.info("Waiting for overlay IP for autoscaler VM %s/%s", namespace, vm_name)
167+
168+
while True:
169+
try:
170+
vm = await get_neon_vm(namespace, vm_name)
171+
except (VelaKubernetesError, RuntimeError) as exc:
172+
last_error = exc
173+
vm = None
174+
175+
if vm:
176+
overlay_ip = (vm.status.extra_net_ip or "").strip()
177+
if overlay_ip:
178+
logger.info(
179+
"Autoscaler VM %s/%s overlay network %s is ready",
180+
namespace,
181+
vm_name,
182+
overlay_ip,
183+
)
184+
return overlay_ip
185+
186+
if loop.time() >= deadline:
187+
message = f"Timed out waiting for overlay IP for autoscaler VM {vm_name} in namespace {namespace}"
188+
if last_error is not None:
189+
raise VelaDeploymentError(message) from last_error
190+
raise VelaDeploymentError(message)
191+
192+
await asyncio.sleep(_OVERLAY_IP_POLL_INTERVAL_SECONDS)
193+
194+
195+
def _overlay_service_specs() -> list[tuple[str, int, str]]:
196+
return [
197+
(branch_service_name("db"), 5432, "postgres"),
198+
(branch_service_name("pgbouncer"), 6432, "pgbouncer"),
199+
(branch_service_name("rest"), 3000, "http"),
200+
(branch_service_name("storage"), 5000, "http"),
201+
(branch_service_name("meta"), 8080, "http"),
202+
(branch_service_name("pgexporter"), 9187, "http"),
203+
]
204+
205+
206+
async def _ensure_autoscaler_overlay_endpoint_slices(namespace: str, overlay_ip: str) -> None:
207+
for service_name, port, port_name in _overlay_service_specs():
208+
await kube_service.ensure_endpoint_slice(
209+
namespace=namespace,
210+
slice_name=service_name,
211+
service_name=service_name,
212+
address=overlay_ip,
213+
port=port,
214+
port_name=port_name,
215+
)
216+
217+
218+
async def _initialize_autoscaler_overlay_endpoints(namespace: str) -> None:
219+
vm_name = _autoscaler_vm_name()
220+
overlay_ip = await _wait_for_autoscaler_overlay_ip(namespace, vm_name)
221+
await _ensure_autoscaler_overlay_endpoint_slices(namespace, overlay_ip)
222+
223+
160224
def _build_storage_class_manifest(*, storage_class_name: str, iops: int, base_storage_class: Any) -> dict[str, Any]:
161225
provisioner = getattr(base_storage_class, "provisioner", None)
162226
if not provisioner:
@@ -486,6 +550,7 @@ async def create_vela_config(
486550
stderr=subprocess.PIPE,
487551
text=True,
488552
)
553+
await _initialize_autoscaler_overlay_endpoints(namespace)
489554
except subprocess.CalledProcessError as e:
490555
logger.exception(f"Failed to create deployment: {e.stderr}")
491556
release_name = _release_name()

src/deployment/charts/vela/templates/_helpers.tpl

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,6 @@ Return the PgBouncer config map name that lives next to the autoscaler VM.
4444
{{- printf "%s-pgbouncer" (include "vela.autoscaler.name" .) | trunc 63 | trimSuffix "-" }}
4545
{{- end }}
4646

47-
{{/*
48-
Selector label used to identify the autoscaler VM pods.
49-
*/}}
50-
{{- define "vela.autoscaler.selectorLabel" -}}
51-
vm.neon.tech/name: {{ include "vela.autoscaler.name" . }}
52-
{{- end }}
53-
5447
{{/*
5548
Create chart name and version as used by the chart label.
5649
*/}}

src/deployment/charts/vela/templates/autoscaler/db-service.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@ spec:
2828
targetPort: 5432
2929
protocol: TCP
3030
name: postgres
31-
selector:
32-
{{ include "vela.autoscaler.selectorLabel" . | nindent 4 }}
3331
---
3432
{{- end }}
3533
apiVersion: v1
@@ -50,5 +48,3 @@ spec:
5048
targetPort: 5432
5149
protocol: TCP
5250
name: postgres
53-
selector:
54-
{{ include "vela.autoscaler.selectorLabel" . | nindent 4 }}

src/deployment/charts/vela/templates/autoscaler/pgbouncer-service.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,4 @@ spec:
1313
targetPort: 6432
1414
protocol: TCP
1515
name: pgbouncer
16-
selector:
17-
{{ include "vela.autoscaler.selectorLabel" . | nindent 4 }}
1816
{{- end }}

src/deployment/charts/vela/templates/compose/services.yaml

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@ spec:
1313
targetPort: 3000
1414
protocol: TCP
1515
name: http
16-
selector:
17-
{{ include "vela.autoscaler.selectorLabel" . | nindent 4 }}
1816

1917
---
2018
{{- if .Values.storage.enabled }}
@@ -31,8 +29,6 @@ spec:
3129
targetPort: 5000
3230
protocol: TCP
3331
name: http
34-
selector:
35-
{{ include "vela.autoscaler.selectorLabel" . | nindent 4 }}
3632
---
3733
{{- end }}
3834
apiVersion: v1
@@ -48,8 +44,6 @@ spec:
4844
targetPort: 8080
4945
protocol: TCP
5046
name: http
51-
selector:
52-
{{ include "vela.autoscaler.selectorLabel" . | nindent 4 }}
5347
---
5448
apiVersion: v1
5549
kind: Service
@@ -65,6 +59,4 @@ spec:
6559
targetPort: 9187
6660
protocol: TCP
6761
name: http
68-
selector:
69-
{{ include "vela.autoscaler.selectorLabel" . | nindent 4 }}
7062
{{- end }}

src/deployment/charts/vela/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ autoscalerVm:
4646
fullnameOverride: ""
4747
powerState: Running
4848
extraNetwork:
49-
enable: false
49+
enable: true
5050
image:
5151
repository: docker.io/manoharbrm/vela-vm
5252
tag: latest

src/deployment/kubernetes/__init__.py

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from kubernetes_asyncio import client
88

99
from ...exceptions import VelaKubernetesError
10-
from ._util import core_v1_client, custom_api_client, storage_v1_client
10+
from ._util import core_v1_client, custom_api_client, discovery_v1_client, storage_v1_client
1111
from .neonvm import NeonVM, get_neon_vm
1212

1313
logger = logging.getLogger(__name__)
@@ -143,6 +143,64 @@ async def apply_kong_consumer(self, namespace: str, consumer: dict[str, Any]) ->
143143
else:
144144
raise
145145

146+
async def ensure_endpoint_slice(
147+
self,
148+
namespace: str,
149+
slice_name: str,
150+
service_name: str,
151+
address: str,
152+
port: int,
153+
port_name: str,
154+
) -> None:
155+
body = {
156+
"apiVersion": "discovery.k8s.io/v1",
157+
"kind": "EndpointSlice",
158+
"metadata": {
159+
"name": slice_name,
160+
"namespace": namespace,
161+
"labels": {
162+
"kubernetes.io/service-name": service_name,
163+
},
164+
},
165+
"addressType": "IPv4",
166+
"endpoints": [
167+
{
168+
"addresses": [address],
169+
"conditions": {"ready": True},
170+
}
171+
],
172+
"ports": [
173+
{
174+
"name": port_name,
175+
"port": port,
176+
"protocol": "TCP",
177+
}
178+
],
179+
}
180+
181+
async with discovery_v1_client() as discovery:
182+
try:
183+
await discovery.create_namespaced_endpoint_slice(namespace, body=body)
184+
logger.info(
185+
"Created EndpointSlice %s for service %s/%s via %s",
186+
slice_name,
187+
namespace,
188+
service_name,
189+
address,
190+
)
191+
except client.exceptions.ApiException as exc:
192+
if exc.status == 409:
193+
logger.info(
194+
"EndpointSlice %s already exists for service %s/%s, skipping",
195+
slice_name,
196+
namespace,
197+
service_name,
198+
)
199+
return
200+
raise VelaKubernetesError(
201+
f"Failed to create EndpointSlice {slice_name} for {service_name} in {namespace}: {exc.reason}"
202+
) from exc
203+
146204
async def apply_secret(self, namespace: str, secret: dict[str, Any]) -> None:
147205
name = secret["metadata"]["name"]
148206
async with core_v1_client() as core_v1:

src/deployment/kubernetes/_util.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from contextlib import asynccontextmanager
22

33
from aiohttp import ClientTimeout
4-
from kubernetes_asyncio.client import ApiClient, CoreV1Api, CustomObjectsApi, StorageV1Api
4+
from kubernetes_asyncio.client import ApiClient, CoreV1Api, CustomObjectsApi, DiscoveryV1Api, StorageV1Api
55
from kubernetes_asyncio.config import load_incluster_config, load_kube_config
66
from kubernetes_asyncio.config.config_exception import ConfigException
77

@@ -57,3 +57,9 @@ async def custom_api_client():
5757
async def storage_v1_client():
5858
async with api_client() as client:
5959
yield StorageV1Api(api_client=client)
60+
61+
62+
@asynccontextmanager
63+
async def discovery_v1_client():
64+
async with api_client() as client:
65+
yield DiscoveryV1Api(api_client=client)

0 commit comments

Comments
 (0)