|
46 | 46 | from ._util import deployment_namespace |
47 | 47 | from .deployment import DeploymentParameters |
48 | 48 | from .grafana import create_vela_grafana_obj, delete_vela_grafana_obj |
49 | | -from .kubernetes import KubernetesService |
| 49 | +from .kubernetes import KubernetesService, get_neon_vm |
50 | 50 | from .kubernetes._util import custom_api_client |
51 | 51 | from .settings import get_settings |
52 | 52 | from .simplyblock_api import create_simplyblock_api |
|
75 | 75 | AUTOSCALER_PVC_SUFFIX = "-block-data" |
76 | 76 | _LOAD_BALANCER_TIMEOUT_SECONDS = float(600) |
77 | 77 | _LOAD_BALANCER_POLL_INTERVAL_SECONDS = float(2) |
| 78 | +_OVERLAY_IP_TIMEOUT_SECONDS = float(300) |
| 79 | +_OVERLAY_IP_POLL_INTERVAL_SECONDS = float(5) |
78 | 80 | DNSRecordType = Literal["AAAA", "CNAME"] |
79 | 81 | DATABASE_DNS_RECORD_TYPE: Literal["AAAA"] = "AAAA" |
80 | 82 |
|
@@ -157,6 +159,68 @@ def branch_service_name(component: str) -> str: |
157 | 159 | return f"{_release_fullname()}-{component}" |
158 | 160 |
|
159 | 161 |
|
| 162 | +async def _wait_for_autoscaler_overlay_ip(namespace: str, vm_name: str) -> str: |
| 163 | + loop = asyncio.get_running_loop() |
| 164 | + deadline = loop.time() + _OVERLAY_IP_TIMEOUT_SECONDS |
| 165 | + last_error: Exception | None = None |
| 166 | + logger.info("Waiting for overlay IP for autoscaler VM %s/%s", namespace, vm_name) |
| 167 | + |
| 168 | + while True: |
| 169 | + try: |
| 170 | + vm = await get_neon_vm(namespace, vm_name) |
| 171 | + except (VelaKubernetesError, RuntimeError) as exc: |
| 172 | + last_error = exc |
| 173 | + vm = None |
| 174 | + |
| 175 | + if vm: |
| 176 | + overlay_ip = (vm.status.extra_net_ip or "").strip() |
| 177 | + if overlay_ip: |
| 178 | + logger.info( |
| 179 | + "Autoscaler VM %s/%s overlay network %s is ready", |
| 180 | + namespace, |
| 181 | + vm_name, |
| 182 | + overlay_ip, |
| 183 | + ) |
| 184 | + return overlay_ip |
| 185 | + |
| 186 | + if loop.time() >= deadline: |
| 187 | + message = f"Timed out waiting for overlay IP for autoscaler VM {vm_name} in namespace {namespace}" |
| 188 | + if last_error is not None: |
| 189 | + raise VelaDeploymentError(message) from last_error |
| 190 | + raise VelaDeploymentError(message) |
| 191 | + |
| 192 | + await asyncio.sleep(_OVERLAY_IP_POLL_INTERVAL_SECONDS) |
| 193 | + |
| 194 | + |
| 195 | +def _overlay_service_specs() -> list[tuple[str, int, str]]: |
| 196 | + return [ |
| 197 | + (branch_service_name("db"), 5432, "postgres"), |
| 198 | + (branch_service_name("pgbouncer"), 6432, "pgbouncer"), |
| 199 | + (branch_service_name("rest"), 3000, "http"), |
| 200 | + (branch_service_name("storage"), 5000, "http"), |
| 201 | + (branch_service_name("meta"), 8080, "http"), |
| 202 | + (branch_service_name("pgexporter"), 9187, "http"), |
| 203 | + ] |
| 204 | + |
| 205 | + |
| 206 | +async def _ensure_autoscaler_overlay_endpoint_slices(namespace: str, overlay_ip: str) -> None: |
| 207 | + for service_name, port, port_name in _overlay_service_specs(): |
| 208 | + await kube_service.ensure_endpoint_slice( |
| 209 | + namespace=namespace, |
| 210 | + slice_name=service_name, |
| 211 | + service_name=service_name, |
| 212 | + address=overlay_ip, |
| 213 | + port=port, |
| 214 | + port_name=port_name, |
| 215 | + ) |
| 216 | + |
| 217 | + |
| 218 | +async def _initialize_autoscaler_overlay_endpoints(namespace: str) -> None: |
| 219 | + vm_name = _autoscaler_vm_name() |
| 220 | + overlay_ip = await _wait_for_autoscaler_overlay_ip(namespace, vm_name) |
| 221 | + await _ensure_autoscaler_overlay_endpoint_slices(namespace, overlay_ip) |
| 222 | + |
| 223 | + |
160 | 224 | def _build_storage_class_manifest(*, storage_class_name: str, iops: int, base_storage_class: Any) -> dict[str, Any]: |
161 | 225 | provisioner = getattr(base_storage_class, "provisioner", None) |
162 | 226 | if not provisioner: |
@@ -486,6 +550,7 @@ async def create_vela_config( |
486 | 550 | stderr=subprocess.PIPE, |
487 | 551 | text=True, |
488 | 552 | ) |
| 553 | + await _initialize_autoscaler_overlay_endpoints(namespace) |
489 | 554 | except subprocess.CalledProcessError as e: |
490 | 555 | logger.exception(f"Failed to create deployment: {e.stderr}") |
491 | 556 | release_name = _release_name() |
|
0 commit comments