|
| 1 | +# SPDX-FileCopyrightText: 2026 Nextcloud GmbH and Nextcloud contributors |
| 2 | +# SPDX-License-Identifier: AGPL-3.0-or-later |
| 3 | +"""K8s ExApp routing integration test. |
| 4 | +
|
| 5 | +Verifies that HaRP keeps routing requests to a K8s-deployed ExApp correctly across the full enable/disable lifecycle. |
| 6 | +Catches the cache-overwrite bug where AppAPI's `harpExAppUpdate(true)` push (POST /exapp_storage/{appId}) |
| 7 | +ran at the end of `enableExApp` and replaced the K8s upstream that `/k8s/exapp/expose` had correctly |
| 8 | +resolved earlier with the logical Docker-style host (the appId). |
| 9 | +
|
| 10 | +Test flow: |
| 11 | + 1. Register `app-skeleton-python` on the K8s daemon |
| 12 | + (deploy -> expose -> init -> enable -> harpExAppUpdate(true) chain has fully completed by the time the call returns) |
| 13 | + 2. Hit /exapps/<appid>/public via HaRP. The /public route is declared PUBLIC in app-skeleton-python's info.xml, |
| 14 | + so no AppAPI auth headers are needed; we just need HaRP to route successfully |
| 15 | + 3. Disable, then re-enable. Re-enable also runs harpExAppUpdate(true) |
| 16 | + 4. Hit /public again — must still return 200 |
| 17 | +
|
| 18 | +Prerequisites (provided by the K8s test workflows): |
| 19 | + - Nextcloud + AppAPI installed, k3s running, HaRP container with K8s backend up, k8s_test daemon registered as default |
| 20 | + - app-skeleton-python image pre-pulled into k3s |
| 21 | + - NODE_IP env var pointing to the k3s node |
| 22 | +
|
| 23 | +Env: |
| 24 | + NODE_IP (required) k3s node IP exposing HaRP and NodePort Services |
| 25 | + HP_SHARED_KEY HaRP shared key (only used in failure messages) |
| 26 | + K8S_EXPOSE_TYPE informational; matches the workflow under test |
| 27 | + ROUTING_TEST_RETRIES override the retry count for routing checks |
| 28 | + ROUTING_TEST_RETRY_INTERVAL override the per-retry sleep (seconds) |
| 29 | +""" |
| 30 | + |
| 31 | +import json |
| 32 | +import os |
| 33 | +import sys |
| 34 | +import time |
| 35 | +from subprocess import run |
| 36 | + |
| 37 | +import requests |
| 38 | + |
| 39 | +EXPOSE_TYPE = os.environ.get("K8S_EXPOSE_TYPE", "nodeport") |
| 40 | +NODE_IP = os.environ.get("NODE_IP", "") |
| 41 | +HP_SHARED_KEY = os.environ.get("HP_SHARED_KEY", "test_shared_key_12345") |
| 42 | +MANUAL_CLUSTER_IP = os.environ.get("MANUAL_CLUSTER_IP", "10.43.200.200") |
| 43 | +ROUTING_RETRIES = int(os.environ.get("ROUTING_TEST_RETRIES", "15")) |
| 44 | +ROUTING_INTERVAL = float(os.environ.get("ROUTING_TEST_RETRY_INTERVAL", "1.0")) |
| 45 | + |
| 46 | +DAEMON = "k8s_test" |
| 47 | +APP_ID = "app-skeleton-python" |
| 48 | +NAMESPACE = "nextcloud-exapps" |
| 49 | +APP_PORT = 23000 |
| 50 | +SKELETON_XML_URL = "https://raw.githubusercontent.com/nextcloud/app-skeleton-python/main/appinfo/info.xml" |
| 51 | + |
| 52 | + |
| 53 | +def _harp_url() -> str: |
| 54 | + if not NODE_IP: |
| 55 | + sys.exit("NODE_IP env var required (export from kubectl get node ...)") |
| 56 | + return f"http://{NODE_IP}:8780" |
| 57 | + |
| 58 | + |
| 59 | +def occ(args: list[str], *, check: bool = False, timeout: int | None = None): |
| 60 | + cmd = ["php", "occ", "--no-warnings", *args] |
| 61 | + return run(cmd, capture_output=True, check=check, timeout=timeout) |
| 62 | + |
| 63 | + |
| 64 | +def http_get_via_harp(path: str, *, timeout: float = 10.0) -> requests.Response: |
| 65 | + """Hit HaRP's HTTP frontend (port 8780) directly, bypassing nginx.""" |
| 66 | + return requests.get(f"{_harp_url()}/exapps/{APP_ID}{path}", timeout=timeout) |
| 67 | + |
| 68 | + |
| 69 | +def assert_routing_works(label: str) -> None: |
| 70 | + """Make a routed request via HaRP and assert the ExApp pod responded. |
| 71 | +
|
| 72 | + `/public` is a PUBLIC route in app-skeleton-python's info.xml, so it needs no AppAPI auth |
| 73 | + """ |
| 74 | + last: str | None = None |
| 75 | + for attempt in range(ROUTING_RETRIES): |
| 76 | + try: |
| 77 | + r = http_get_via_harp("/public") |
| 78 | + except requests.RequestException as e: |
| 79 | + last = f"request error: {e}" |
| 80 | + else: |
| 81 | + if r.status_code == 200: |
| 82 | + print(f" [{label}] routing OK (attempt {attempt + 1})") |
| 83 | + return |
| 84 | + last = f"HTTP {r.status_code}: {r.text[:200]!r}" |
| 85 | + time.sleep(ROUTING_INTERVAL) |
| 86 | + |
| 87 | + raise AssertionError( |
| 88 | + f"[{label}] HaRP routing to '{APP_ID}' failed after {ROUTING_RETRIES} attempts: " |
| 89 | + f"{last}\n" |
| 90 | + f"A 404 here typically means HaRP's cached `host` for the ExApp is the appId rather " |
| 91 | + f"than a K8s upstream (NodePort/ClusterIP/LB), and the SPOA agent's resolve_ip() returned NXDOMAIN." |
| 92 | + f"See the comment on haproxy_agent.py:add_exapp." |
| 93 | + ) |
| 94 | + |
| 95 | + |
| 96 | +def ensure_manual_service() -> None: |
| 97 | + """Pre-create the operator-managed Service for manual expose tests. |
| 98 | +
|
| 99 | + For expose_type=manual, HaRP doesn't create a K8s Service — the operator is expected to. |
| 100 | + The k8s_test daemon is registered with a fixed upstream_host (MANUAL_CLUSTER_IP), |
| 101 | + so the Service we create here uses the same ClusterIP. No-op for other expose types. |
| 102 | + """ |
| 103 | + if EXPOSE_TYPE != "manual": |
| 104 | + return |
| 105 | + svc_name = f"nc-app-{APP_ID}" |
| 106 | + manifest = json.dumps({ |
| 107 | + "apiVersion": "v1", |
| 108 | + "kind": "Service", |
| 109 | + "metadata": {"name": svc_name, "namespace": NAMESPACE}, |
| 110 | + "spec": { |
| 111 | + "clusterIP": MANUAL_CLUSTER_IP, |
| 112 | + "selector": {"app": svc_name}, |
| 113 | + "ports": [{"name": "http", "port": APP_PORT, "targetPort": APP_PORT}], |
| 114 | + }, |
| 115 | + }) |
| 116 | + r = run( |
| 117 | + ["kubectl", "-n", NAMESPACE, "apply", "-f", "-"], |
| 118 | + input=manifest.encode(), capture_output=True, |
| 119 | + ) |
| 120 | + if r.returncode != 0: |
| 121 | + sys.exit( |
| 122 | + f"Failed to pre-create operator Service '{svc_name}': " |
| 123 | + f"{r.stderr.decode(errors='replace')}" |
| 124 | + ) |
| 125 | + |
| 126 | + |
| 127 | +def cleanup() -> None: |
| 128 | + occ( |
| 129 | + ["app_api:app:unregister", APP_ID, "--rm-data", "--silent", "--force"], |
| 130 | + timeout=180, |
| 131 | + ) |
| 132 | + if EXPOSE_TYPE == "manual": |
| 133 | + run( |
| 134 | + ["kubectl", "-n", NAMESPACE, "delete", "service", |
| 135 | + f"nc-app-{APP_ID}", "--ignore-not-found=true"], |
| 136 | + capture_output=True, |
| 137 | + ) |
| 138 | + |
| 139 | + |
| 140 | +def register_app() -> None: |
| 141 | + print(f"Registering {APP_ID} on daemon '{DAEMON}'...") |
| 142 | + r = occ( |
| 143 | + [ |
| 144 | + "app_api:app:register", APP_ID, DAEMON, |
| 145 | + "--info-xml", SKELETON_XML_URL, |
| 146 | + "--wait-finish", |
| 147 | + ], |
| 148 | + timeout=600, |
| 149 | + ) |
| 150 | + if r.returncode != 0: |
| 151 | + sys.exit( |
| 152 | + f"Register failed (exit {r.returncode}):\n" |
| 153 | + f" stdout: {r.stdout.decode(errors='replace')}\n" |
| 154 | + f" stderr: {r.stderr.decode(errors='replace')}" |
| 155 | + ) |
| 156 | + |
| 157 | + |
| 158 | +def disable_then_enable() -> None: |
| 159 | + print("Disabling...") |
| 160 | + r = occ(["app_api:app:disable", APP_ID], timeout=180) |
| 161 | + if r.returncode != 0: |
| 162 | + sys.exit(f"Disable failed: {r.stdout.decode(errors='replace')}") |
| 163 | + |
| 164 | + print("Re-enabling...") |
| 165 | + r = occ(["app_api:app:enable", APP_ID], timeout=300) |
| 166 | + if r.returncode != 0: |
| 167 | + sys.exit(f"Enable failed: {r.stdout.decode(errors='replace')}") |
| 168 | + |
| 169 | + |
| 170 | +def main() -> None: |
| 171 | + print(f"K8s routing test (expose_type={EXPOSE_TYPE}, harp={_harp_url()})") |
| 172 | + |
| 173 | + cleanup() # any leftover from a previous test run |
| 174 | + ensure_manual_service() |
| 175 | + register_app() |
| 176 | + |
| 177 | + try: |
| 178 | + # After register: |
| 179 | + # expose populated cache correctly, then init=100% triggered enableExApp -> harpExAppUpdate(true). |
| 180 | + assert_routing_works("after-register") |
| 181 | + |
| 182 | + # Re-enable goes through harpExAppUpdate(true) again, |
| 183 | + # exercising the same cache write path without a preceding expose call. |
| 184 | + disable_then_enable() |
| 185 | + assert_routing_works("after-re-enable") |
| 186 | + |
| 187 | + print("\nPASS: HaRP keeps routing correct across the enable lifecycle.") |
| 188 | + finally: |
| 189 | + cleanup() |
| 190 | + |
| 191 | + |
| 192 | +if __name__ == "__main__": |
| 193 | + main() |
0 commit comments