databricks-solutions
diff --git a/‎notebooks/api_gateway_demo.ipynb‎
Lines changed: 10 additions & 60 deletions b/‎notebooks/api_gateway_demo.ipynb‎
Lines changed: 10 additions & 60 deletions
@@ -4,21 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "# Genie Cache Gateway \u2014 API Gateway Demo",
-    "",
-    "The Databricks Genie API has a hard limit of **5 queries per minute per workspace**.",
-    "",
-    "This notebook fires 7 queries **in parallel** to show the problem and how the Gateway solves it.",
-    "",
-    "**The only change between \"Direct\" and \"Via Gateway\" is the host and the ID.**",
-    "",
-    "| Scenario | Host | ID | Result |",
-    "|----------|------|----|--------|",
-    "| Direct to Genie | `WORKSPACE_HOST` | `GENIE_SPACE_ID` | 429 errors (rate limited) |",
-    "| Via Gateway (1st) | `APP_HOST` | `GATEWAY_ID` | All succeed (queue + retry) |",
-    "| Via Gateway (2nd) | `APP_HOST` | `GATEWAY_ID` | Instant (semantic cache) |"
-   ]
+   "source": "# Genie Cache Gateway — API Gateway Demo\n\nThe Databricks Genie API has a hard limit of **5 queries per minute per workspace**.\n\nThis notebook fires 7 queries **in parallel** to show the problem and how the Gateway solves it.\n\n**The only change between \"Direct\" and \"Via Gateway\" is the host and the ID.**\n\n| Scenario | Host | ID | Result |\n|----------|------|----|--------|\n| Direct to Genie | `WORKSPACE_HOST` | `GENIE_SPACE_ID` | 429 errors (rate limited) |\n| Via Gateway (1st) | `APP_HOST` | `GATEWAY_ID` | All succeed (queue + retry) |\n| Via Gateway (2nd) | `APP_HOST` | `GATEWAY_ID` | Instant (semantic cache) |"
   },
   {
    "cell_type": "code",
@@ -36,50 +22,14 @@
    "metadata": {},
    "outputs": [],
    "execution_count": null,
-   "source": [
-    "APP_HOST       = dbutils.widgets.get(\"app_url\")\n",
-    "GATEWAY_ID     = dbutils.widgets.get(\"gateway_id\")\n",
-    "GENIE_SPACE_ID = dbutils.widgets.get(\"space_id\")\n",
-    "\n",
-    "questions = [\n",
-    "    \"What are the top 3 nations by total revenue?\",\n",
-    "    \"How many orders were placed in January 1994?\",\n",
-    "    \"What is the average account balance of customers in the BUILDING segment?\",\n",
-    "    \"Which supplier has the most parts?\",\n",
-    "    \"Total number of lineitems with quantity greater than 40\",\n",
-    "    \"Revenue by year for the ASIA region\",\n",
-    "    \"How many distinct part types exist?\",\n",
-    "]"
-   ]
+   "source": "APP_HOST       = dbutils.widgets.get(\"app_url\")\nGATEWAY_ID     = dbutils.widgets.get(\"gateway_id\")\nGENIE_SPACE_ID = dbutils.widgets.get(\"space_id\")\n\nquestions = [\n    \"What are the top 3 nations by total revenue?\",\n    \"How many orders were placed in January 1994?\",\n    \"What is the average account balance of customers in the BUILDING segment?\",\n    \"What is the total number of suppliers?\",\n    \"Total number of lineitems with quantity greater than 40\",\n    \"Revenue by year for the ASIA region\",\n    \"How many distinct part types exist?\",\n]"
   },
   {
    "cell_type": "code",
    "metadata": {},
    "outputs": [],
    "execution_count": null,
-   "source": [
-    "import requests, time, json\n",
-    "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
-    "from databricks.sdk import WorkspaceClient\n",
-    "\n",
-    "w = WorkspaceClient()\n",
-    "WORKSPACE_HOST = w.config.host\n",
-    "TOKEN = dbutils.secrets.get(scope=\"genie-cache\", key=\"pat\")\n",
-    "\n",
-    "H = {\"Authorization\": f\"Bearer {TOKEN}\", \"Content-Type\": \"application/json\"}\n",
-    "\n",
-    "assert GENIE_SPACE_ID, \"Set space_id widget\"\n",
-    "assert GATEWAY_ID, \"Set gateway_id widget (from app UI -> Gateways)\"\n",
-    "\n",
-    "r = requests.get(f\"{APP_HOST}/api/health\", headers=H, timeout=10)\n",
-    "assert r.status_code == 200, f\"App unreachable: {r.status_code} {r.text[:200]}\"\n",
-    "\n",
-    "gw = requests.get(f\"{APP_HOST}/api/gateways/{GATEWAY_ID}\", headers=H, timeout=10)\n",
-    "assert gw.status_code == 200, f\"Gateway not found: {gw.status_code} {gw.text[:200]}\"\n",
-    "gw_data = gw.json()\n",
-    "\n",
-    "print(f\"OK | Space: {GENIE_SPACE_ID[:12]}... | Gateway: {gw_data['name']}\")"
-   ]
+   "source": "import requests, time, json\nfrom concurrent.futures import ThreadPoolExecutor, as_completed\nfrom databricks.sdk import WorkspaceClient\n\nw = WorkspaceClient()\nWORKSPACE_HOST = w.config.host\nTOKEN = dbutils.secrets.get(scope=\"genie-cache\", key=\"oauth_token\")\n\nH = {\"Authorization\": f\"Bearer {TOKEN}\", \"Content-Type\": \"application/json\"}\n\nassert GENIE_SPACE_ID, \"Set space_id widget\"\nassert GATEWAY_ID, \"Set gateway_id widget (from app UI -> Gateways)\"\n\nr = requests.get(f\"{APP_HOST}/api/health\", headers=H, timeout=10)\nassert r.status_code == 200, f\"App unreachable: {r.status_code} {r.text[:200]}\"\n\ngw = requests.get(f\"{APP_HOST}/api/gateways/{GATEWAY_ID}\", headers=H, timeout=10)\nassert gw.status_code == 200, f\"Gateway not found: {gw.status_code} {gw.text[:200]}\"\ngw_data = gw.json()\n\nprint(f\"OK | Space: {GENIE_SPACE_ID[:12]}... | Gateway: {gw_data['name']}\")"
   },
   {
    "cell_type": "code",
@@ -98,7 +48,7 @@
     }
    },
    "outputs": [],
-   "source": "# =============================================================================\n# ONE function \u2014 only host and space_id change between scenarios\n# =============================================================================\n\ndef ask_genie(host, space_id, question):\n    \"\"\"Send a question to the Genie API. Same function for direct and gateway.\"\"\"\n    r = requests.post(\n        f\"{host}/api/2.0/genie/spaces/{space_id}/start-conversation\",\n        headers=H, json={\"content\": question}, timeout=180,\n    )\n    if r.status_code == 429:\n        return {\"status\": \"429\", \"sql\": None, \"data\": None, \"from_cache\": False}\n    if r.status_code != 200:\n        return {\"status\": f\"HTTP {r.status_code} | {r.text[:200]}\", \"sql\": None, \"data\": None, \"from_cache\": False}\n\n    data = r.json()\n    conv_id = data.get(\"conversation_id\", \"\")\n    msg_id = data.get(\"message_id\", \"\")\n\n    # Poll until terminal status\n    if data.get(\"status\") not in (\"COMPLETED\", \"FAILED\", \"CANCELLED\"):\n        for _ in range(90):\n            time.sleep(2)\n            r2 = requests.get(\n                f\"{host}/api/2.0/genie/spaces/{space_id}/conversations/{conv_id}/messages/{msg_id}\",\n                headers=H, timeout=30,\n            )\n            if r2.status_code != 200: continue\n            data = r2.json()\n            if data.get(\"status\") in (\"COMPLETED\", \"FAILED\", \"CANCELLED\"): break\n\n    if data.get(\"status\") != \"COMPLETED\":\n        return {\"status\": data.get(\"status\", \"UNKNOWN\"), \"sql\": None, \"data\": None, \"from_cache\": False}\n\n    # Extract SQL and detect cache hit from attachments\n    sql = None\n    query_att_id = None\n    from_cache = False\n    for att in data.get(\"attachments\", []):\n        if not isinstance(att, dict):\n            continue\n        q = att.get(\"query\")\n        if q:\n            sql = q.get(\"query\") or q.get(\"sql\")\n            query_att_id = att.get(\"attachment_id\")\n            if \"cache\" in (q.get(\"description\") or \"\").lower():\n                from_cache = True\n        t = att.get(\"text\", {})\n        if \"cache\" in str(t.get(\"content\", \"\")).lower():\n            from_cache = True\n\n    # Fetch query result (full flow: start-conversation \u2192 poll \u2192 query-result)\n    result_data = None\n    if sql and query_att_id:\n        try:\n            r3 = requests.get(\n                f\"{host}/api/2.0/genie/spaces/{space_id}/conversations/{conv_id}\"\n                f\"/messages/{msg_id}/attachments/{query_att_id}/query-result\",\n                headers=H, timeout=60,\n            )\n            if r3.status_code == 200:\n                qr = r3.json()\n                stmt = qr.get(\"statement_response\", qr)\n                result = stmt.get(\"result\") or {}\n                rows = result.get(\"data_array\", [])\n                row_count = result.get(\"row_count\", len(rows))\n                result_data = {\"row_count\": row_count, \"rows\": rows[:5]}\n        except Exception:\n            pass\n\n    return {\"status\": \"COMPLETED\", \"sql\": sql, \"data\": result_data, \"from_cache\": from_cache}\n\n\ndef run_parallel(label, host, space_id):\n    \"\"\"Fire all questions in parallel against a given host + space_id.\"\"\"\n    print(f\"\\n{'='*80}\")\n    print(f\"  {label}\")\n    print(f\"  host={host}\")\n    print(f\"  id={space_id}\")\n    print(f\"{'='*80}\")\n\n    results = [None] * len(questions)\n    t0 = time.time()\n    with ThreadPoolExecutor(max_workers=len(questions)) as pool:\n        futs = {pool.submit(ask_genie, host, space_id, q): i for i, q in enumerate(questions)}\n        for f in as_completed(futs):\n            results[futs[f]] = f.result()\n    total = time.time() - t0\n\n    for i, q in enumerate(questions):\n        r = results[i]\n        tag = \"CACHE\" if r.get(\"from_cache\") else (\"429\" if r[\"status\"] == \"429\" else \"GENIE\")\n        print(f\"\\n  [{i+1}] {tag:>5s} | {r['status']}\")\n        print(f\"  Q: {q}\")\n        if r.get(\"sql\"):\n            print(f\"  SQL: {r['sql'][:120]}\")\n        if r.get(\"data\"):\n            print(f\"  Data: {r['data']['row_count']} rows\")\n            for row in r[\"data\"][\"rows\"][:3]:\n                print(f\"        {row}\")\n\n    print(f\"\\n  Total: {total:.1f}s\")\n    return results, total\n\nprint(\"Functions ready.\")"
+   "source": "# =============================================================================\n# ONE function — only host and space_id change between scenarios\n# =============================================================================\n\ndef ask_genie(host, space_id, question):\n    \"\"\"Send a question to the Genie API. Same function for direct and gateway.\"\"\"\n    r = requests.post(\n        f\"{host}/api/2.0/genie/spaces/{space_id}/start-conversation\",\n        headers=H, json={\"content\": question}, timeout=180,\n    )\n    if r.status_code == 429:\n        return {\"status\": \"429\", \"sql\": None, \"data\": None, \"from_cache\": False}\n    if r.status_code != 200:\n        return {\"status\": f\"HTTP {r.status_code} | {r.text[:200]}\", \"sql\": None, \"data\": None, \"from_cache\": False}\n\n    data = r.json()\n    conv_id = data.get(\"conversation_id\", \"\")\n    msg_id = data.get(\"message_id\", \"\")\n\n    # Poll until terminal status\n    if data.get(\"status\") not in (\"COMPLETED\", \"FAILED\", \"CANCELLED\"):\n        for _ in range(90):\n            time.sleep(2)\n            r2 = requests.get(\n                f\"{host}/api/2.0/genie/spaces/{space_id}/conversations/{conv_id}/messages/{msg_id}\",\n                headers=H, timeout=30,\n            )\n            if r2.status_code != 200: continue\n            data = r2.json()\n            if data.get(\"status\") in (\"COMPLETED\", \"FAILED\", \"CANCELLED\"): break\n\n    if data.get(\"status\") != \"COMPLETED\":\n        return {\"status\": data.get(\"status\", \"UNKNOWN\"), \"sql\": None, \"data\": None, \"from_cache\": False}\n\n    # Extract SQL and detect cache hit from attachments\n    sql = None\n    query_att_id = None\n    from_cache = False\n    for att in data.get(\"attachments\", []):\n        if not isinstance(att, dict):\n            continue\n        q = att.get(\"query\")\n        if q:\n            sql = q.get(\"query\") or q.get(\"sql\")\n            query_att_id = att.get(\"attachment_id\")\n            if \"cache\" in (q.get(\"description\") or \"\").lower():\n                from_cache = True\n        t = att.get(\"text\", {})\n        if \"cache\" in str(t.get(\"content\", \"\")).lower():\n            from_cache = True\n\n    # Fetch query result (full flow: start-conversation → poll → query-result)\n    result_data = None\n    if sql and query_att_id:\n        try:\n            r3 = requests.get(\n                f\"{host}/api/2.0/genie/spaces/{space_id}/conversations/{conv_id}\"\n                f\"/messages/{msg_id}/attachments/{query_att_id}/query-result\",\n                headers=H, timeout=60,\n            )\n            if r3.status_code == 200:\n                qr = r3.json()\n                stmt = qr.get(\"statement_response\", qr)\n                result = stmt.get(\"result\") or {}\n                rows = result.get(\"data_array\", [])\n                row_count = result.get(\"row_count\", len(rows))\n                result_data = {\"row_count\": row_count, \"rows\": rows[:5]}\n        except Exception:\n            pass\n\n    return {\"status\": \"COMPLETED\", \"sql\": sql, \"data\": result_data, \"from_cache\": from_cache}\n\n\ndef run_parallel(label, host, space_id):\n    \"\"\"Fire all questions in parallel against a given host + space_id.\"\"\"\n    print(f\"\\n{'='*80}\")\n    print(f\"  {label}\")\n    print(f\"  host={host}\")\n    print(f\"  id={space_id}\")\n    print(f\"{'='*80}\")\n\n    results = [None] * len(questions)\n    t0 = time.time()\n    with ThreadPoolExecutor(max_workers=len(questions)) as pool:\n        futs = {pool.submit(ask_genie, host, space_id, q): i for i, q in enumerate(questions)}\n        for f in as_completed(futs):\n            results[futs[f]] = f.result()\n    total = time.time() - t0\n\n    for i, q in enumerate(questions):\n        r = results[i]\n        tag = \"CACHE\" if r.get(\"from_cache\") else (\"429\" if r[\"status\"] == \"429\" else \"GENIE\")\n        print(f\"\\n  [{i+1}] {tag:>5s} | {r['status']}\")\n        print(f\"  Q: {q}\")\n        if r.get(\"sql\"):\n            print(f\"  SQL: {r['sql'][:120]}\")\n        if r.get(\"data\"):\n            print(f\"  Data: {r['data']['row_count']} rows\")\n            for row in r[\"data\"][\"rows\"][:3]:\n                print(f\"        {row}\")\n\n    print(f\"\\n  Total: {total:.1f}s\")\n    return results, total\n\nprint(\"Functions ready.\")"
   },
   {
    "cell_type": "markdown",
@@ -112,7 +62,7 @@
      "title": ""
     }
    },
-   "source": "## Scenario 1: Direct to Genie (7 in parallel)\n\nSame Genie Space, 7 simultaneous queries \u2192 **429 Too Many Requests** for some.\n\n```\nask_genie(host=WORKSPACE_HOST, space_id=GENIE_SPACE_ID, question=...)\n```"
+   "source": "## Scenario 1: Direct to Genie (7 in parallel)\n\nSame Genie Space, 7 simultaneous queries → **429 Too Many Requests** for some.\n\n```\nask_genie(host=WORKSPACE_HOST, space_id=GENIE_SPACE_ID, question=...)\n```"
   },
   {
    "cell_type": "code",
@@ -131,7 +81,7 @@
     }
    },
    "outputs": [],
-   "source": "# Scenario 1: Direct \u2192 WORKSPACE_HOST + GENIE_SPACE_ID\ndirect, d_time = run_parallel(\"Direct to Genie (7 in parallel)\", WORKSPACE_HOST, GENIE_SPACE_ID)\n\nok = sum(1 for r in direct if r.get(\"sql\"))\nblocked = sum(1 for r in direct if r[\"status\"] == \"429\")\nprint(f\"\\nResult: {ok} completed, {blocked} blocked (429)\")"
+   "source": "# Scenario 1: Direct → WORKSPACE_HOST + GENIE_SPACE_ID\ndirect, d_time = run_parallel(\"Direct to Genie (7 in parallel)\", WORKSPACE_HOST, GENIE_SPACE_ID)\n\nok = sum(1 for r in direct if r.get(\"sql\"))\nblocked = sum(1 for r in direct if r[\"status\"] == \"429\")\nprint(f\"\\nResult: {ok} completed, {blocked} blocked (429)\")"
   },
   {
    "cell_type": "code",
@@ -150,7 +100,7 @@
     }
    },
    "outputs": [],
-   "source": "# Scenario 2a: Gateway \u2192 APP_HOST + GATEWAY_ID  (same function, only host + id change)\napp1, a1_time = run_parallel(\"Via Gateway, first round (7 in parallel)\", APP_HOST, GATEWAY_ID)\n\ngenie_ok = sum(1 for r in app1 if not r.get(\"from_cache\") and r.get(\"sql\"))\ncache_ok = sum(1 for r in app1 if r.get(\"from_cache\"))\nfailed   = sum(1 for r in app1 if \"COMPLETED\" not in r[\"status\"])\nprint(f\"\\nResult: {genie_ok} via Genie, {cache_ok} from cache, {failed} failures\")"
+   "source": "# Scenario 2a: Gateway → APP_HOST + GATEWAY_ID  (same function, only host + id change)\napp1, a1_time = run_parallel(\"Via Gateway, first round (7 in parallel)\", APP_HOST, GATEWAY_ID)\n\ngenie_ok = sum(1 for r in app1 if not r.get(\"from_cache\") and r.get(\"sql\"))\ncache_ok = sum(1 for r in app1 if r.get(\"from_cache\"))\nfailed   = sum(1 for r in app1 if \"COMPLETED\" not in r[\"status\"])\nprint(f\"\\nResult: {genie_ok} via Genie, {cache_ok} from cache, {failed} failures\")"
   },
   {
    "cell_type": "markdown",
@@ -164,7 +114,7 @@
      "title": ""
     }
    },
-   "source": "## Scenario 2b: Via Gateway \u2014 second round (all from cache)\n\nSame queries again, same host + id. All should hit the **semantic cache** \u2014 instant response.\n\n```\nask_genie(host=APP_HOST, space_id=GATEWAY_ID, question=...)  # same call, cached\n```"
+   "source": "## Scenario 2b: Via Gateway — second round (all from cache)\n\nSame queries again, same host + id. All should hit the **semantic cache** — instant response.\n\n```\nask_genie(host=APP_HOST, space_id=GATEWAY_ID, question=...)  # same call, cached\n```"
   },
   {
    "cell_type": "code",
@@ -183,7 +133,7 @@
     }
    },
    "outputs": [],
-   "source": "# Scenario 2b: Gateway again \u2192 APP_HOST + GATEWAY_ID  (expect all from cache)\napp2, a2_time = run_parallel(\"Via Gateway, second round (cache)\", APP_HOST, GATEWAY_ID)\n\ncache_count = sum(1 for r in app2 if r.get(\"from_cache\"))\nprint(f\"\\nResult: {cache_count}/{len(questions)} from cache | {a2_time:.1f}s vs {d_time:.1f}s direct\")"
+   "source": "# Scenario 2b: Gateway again → APP_HOST + GATEWAY_ID  (expect all from cache)\napp2, a2_time = run_parallel(\"Via Gateway, second round (cache)\", APP_HOST, GATEWAY_ID)\n\ncache_count = sum(1 for r in app2 if r.get(\"from_cache\"))\nprint(f\"\\nResult: {cache_count}/{len(questions)} from cache | {a2_time:.1f}s vs {d_time:.1f}s direct\")"
   },
   {
    "cell_type": "markdown",
@@ -216,7 +166,7 @@
     }
    },
    "outputs": [],
-   "source": "def _tag(r):\n    if r.get(\"from_cache\"): return \"CACHE\"\n    if r[\"status\"] == \"429\": return \"429\"\n    if r.get(\"sql\"): return \"OK\"\n    return r[\"status\"][:8]\n\nprint(f\"\\n{'='*70}\")\nprint(f\"  COMPARISON \u2014 Gateway: {gw_data['name']}\")\nprint(f\"{'='*70}\")\nprint(f\"\\n  {'Question':32s} | {'Direct':>8s} | {'GW 1st':>8s} | {'GW 2nd':>8s}\")\nprint(f\"  {'-'*64}\")\nfor i in range(len(questions)):\n    print(f\"  {questions[i][:30]:30s} | {_tag(direct[i]):>8s} | {_tag(app1[i]):>8s} | {_tag(app2[i]):>8s}\")\nprint(f\"  {'TIME':30s} | {d_time:>7.1f}s | {a1_time:>7.1f}s | {a2_time:>7.1f}s\")\n\nprint(f\"\\n{'='*70}\")\nprint(f\"  SUMMARY\")\nprint(f\"{'='*70}\")\nprint(f\"  Direct:     {sum(1 for r in direct if r['status']=='429')}/{len(questions)} blocked (429)\")\nprint(f\"  GW 1st run: {sum(1 for r in app1 if r.get('sql'))}/{len(questions)} completed, zero 429s\")\nprint(f\"  GW 2nd run: {sum(1 for r in app2 if r.get('from_cache'))}/{len(questions)} from cache ({a2_time:.1f}s)\")"
+   "source": "def _tag(r):\n    if r.get(\"from_cache\"): return \"CACHE\"\n    if r[\"status\"] == \"429\": return \"429\"\n    if r.get(\"sql\"): return \"OK\"\n    return r[\"status\"][:8]\n\nprint(f\"\\n{'='*70}\")\nprint(f\"  COMPARISON — Gateway: {gw_data['name']}\")\nprint(f\"{'='*70}\")\nprint(f\"\\n  {'Question':32s} | {'Direct':>8s} | {'GW 1st':>8s} | {'GW 2nd':>8s}\")\nprint(f\"  {'-'*64}\")\nfor i in range(len(questions)):\n    print(f\"  {questions[i][:30]:30s} | {_tag(direct[i]):>8s} | {_tag(app1[i]):>8s} | {_tag(app2[i]):>8s}\")\nprint(f\"  {'TIME':30s} | {d_time:>7.1f}s | {a1_time:>7.1f}s | {a2_time:>7.1f}s\")\n\nprint(f\"\\n{'='*70}\")\nprint(f\"  SUMMARY\")\nprint(f\"{'='*70}\")\nprint(f\"  Direct:     {sum(1 for r in direct if r['status']=='429')}/{len(questions)} blocked (429)\")\nprint(f\"  GW 1st run: {sum(1 for r in app1 if r.get('sql'))}/{len(questions)} completed, zero 429s\")\nprint(f\"  GW 2nd run: {sum(1 for r in app2 if r.get('from_cache'))}/{len(questions)} from cache ({a2_time:.1f}s)\")"
   }
  ],
  "metadata": {