hotdata-dev · eddietejeda · May 17, 2026 · May 14, 2026 · May 15, 2026 · May 15, 2026
diff --git a/CONTRACT.md b/CONTRACT.md
@@ -0,0 +1,85 @@
+# hotdata-runtime Contract
+
+`hotdata-runtime` is the framework-agnostic runtime contract for Hotdata integrations.
+
+## Scope
+
+This package provides shared primitives for:
+
+- Environment and workspace resolution
+- Query execution and polling
+- Normalized tabular result handling
+- Basic workspace health checks
+
+## Public Runtime Contract
+
+The supported import surface is:
+
+- `HotdataClient`
+- `QueryResult`
+- `from_env`
+- `workspace_health_lines`
+- `default_api_key`
+- `default_host`
+- `default_session_id`
+- `explicit_workspace_id`
+- `list_workspaces`
+- `normalize_host`
+- `pick_workspace`
+- `resolve_workspace_selection`
+- `WorkspaceSelection`
+
+Adapters should import from `hotdata_runtime` and treat this surface as the stable API.
+
+## Semantic Guarantees
+
+### `HotdataClient`
+
+- Represents runtime context: API key, host, workspace, optional session.
+- `from_env()` resolves runtime context from env vars and selected workspace.
+- `execute_sql(sql)` returns `QueryResult` or raises `RuntimeError`/`TimeoutError`.
+- `get_result(result_id)` returns a ready `QueryResult` and waits for readiness when needed.
+- `connections()` returns the connections API wrapper for adapter UI/status features.
+- `query_runs()` returns the query-runs API wrapper for adapter history views.
+- `results()` returns the results API wrapper for adapter result pickers.
+- `list_qualified_table_names(...)` returns sorted fully qualified table names.
+- `columns_for_qualified(qualified, connection_id=...)` resolves table columns, and
+  adapters should pass `connection_id` when known.
+
+### `QueryResult`
+
+- Canonical tabular result model with `columns`, `rows`, and `row_count`.
+- Carries server identifiers and execution metadata when available.
+- `to_pandas()` converts to a DataFrame with stable column ordering.
+
+### Env Resolution
+
+- `default_api_key()` reads `HOTDATA_API_KEY` then `HOTDATA_TOKEN`.
+- `default_host()` reads `HOTDATA_API_URL` (default: `https://api.hotdata.dev`) and normalizes it.
+- `default_session_id()` reads `HOTDATA_SANDBOX`.
+- `pick_workspace()` prefers explicit env workspace, then active workspace, then first workspace.
+- `resolve_workspace_selection()` is the canonical workspace selection algorithm. It returns `WorkspaceSelection` with selected workspace id, selection source, and discovered workspaces when auto-selected.
+
+## Adapter Responsibilities
+
+Framework packages (Jupyter, Marimo, LangChain, LangGraph, LlamaIndex, Streamlit) own:
+
+- Framework-native lifecycle and state management
+- Rendering/UI concerns
+- Tool/agent wrappers and callback integration
+
+They should not duplicate runtime env/workspace/query semantics.
+
+## Runtime Non-Goals
+
+`hotdata-runtime` does not define framework UI primitives and does not require framework dependencies.
+
+## Versioning Policy
+
+- Backward-incompatible contract changes require a major version bump.
+- Additive contract changes are minor versions.
+- Bug fixes that preserve contract semantics are patch versions.
+
+## Enforcement
+
+Contract stability is enforced by tests that verify the public export surface and key behavioral invariants.
diff --git a/README.md b/README.md
@@ -2,6 +2,8 @@
 
 Shared runtime primitives for Hotdata integrations: workspace/session semantics, execution context, query state, run history, and replayable result handles. Framework packages (Marimo, Jupyter, Streamlit, LangGraph) depend on this package.
 
+Runtime boundary and guarantees are defined in `CONTRACT.md`.
+
 Install:
 
 ```bash

diff --git a/hotdata_runtime/__init__.py b/hotdata_runtime/__init__.py
@@ -11,6 +11,8 @@
     list_workspaces,
     normalize_host,
     pick_workspace,
+    resolve_workspace_selection,
+    WorkspaceSelection,
 )
 from hotdata_runtime.health import workspace_health_lines
 from hotdata_runtime.result import QueryResult
@@ -33,4 +35,6 @@
     "list_workspaces",
     "normalize_host",
     "pick_workspace",
+    "resolve_workspace_selection",
+    "WorkspaceSelection",
 ]
diff --git a/hotdata_runtime/client.py b/hotdata_runtime/client.py
@@ -25,6 +25,7 @@
 from hotdata_runtime.result import QueryResult
 
 _TERMINAL = frozenset({"succeeded", "failed", "cancelled"})
+_RESULT_FAILURE = frozenset({"failed", "cancelled"})
 
 
 class HotdataClient:
@@ -143,9 +144,26 @@ def list_qualified_table_names(
 
     def connection_id_by_name(self) -> dict[str, str]:
         listing = self.connections().list_connections()
-        return {c.name: c.id for c in listing.connections}
+        id_map: dict[str, str] = {}
+        duplicate_names: set[str] = set()
+        for c in listing.connections:
+            if c.name in id_map and id_map[c.name] != c.id:
+                duplicate_names.add(c.name)
+            id_map[c.name] = c.id
+        if duplicate_names:
+            names = ", ".join(sorted(duplicate_names))
+            raise RuntimeError(
+                f"Duplicate connection names found: {names}. "
+                "Use an explicit connection_id."
+            )
+        return id_map
 
-    def columns_for_qualified(self, qualified: str) -> list[TableInfo]:
+    def columns_for_qualified(
+        self,
+        qualified: str,
+        *,
+        connection_id: str | None = None,
+    ) -> list[TableInfo]:
         parts = qualified.split(".")
         if len(parts) < 3:
             raise ValueError(
@@ -156,10 +174,12 @@ def columns_for_qualified(self, qualified: str) -> list[TableInfo]:
             parts[1],
             ".".join(parts[2:]),
         )
-        id_map = self.connection_id_by_name()
-        conn_id = id_map.get(conn_name)
-        if not conn_id:
-            raise KeyError(f"Unknown connection {conn_name!r}")
+        conn_id = connection_id
+        if conn_id is None:
+            id_map = self.connection_id_by_name()
+            conn_id = id_map.get(conn_name)
+            if not conn_id:
+                raise KeyError(f"Unknown connection {conn_name!r}")
         resp = self._information_schema().information_schema(
             connection_id=conn_id,
             var_schema=schema_name,
@@ -206,9 +226,9 @@ def _wait_result_ready(
             last = results.get_result(result_id)
             if last.status == "ready":
                 return last
-            if last.status == "failed":
+            if last.status in _RESULT_FAILURE:
                 raise RuntimeError(
-                    last.error_message or "Result persistence failed"
+                    last.error_message or f"Result {last.status}"
                 )
             time.sleep(interval_s)
         raise TimeoutError(

diff --git a/hotdata_runtime/env.py b/hotdata_runtime/env.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import os
+from dataclasses import dataclass
 from urllib.parse import urlparse
 
 from hotdata import ApiClient, Configuration
@@ -50,13 +51,35 @@ def list_workspaces(api_key: str, host: str, session_id: str | None):
     return listing.workspaces
 
 
-def pick_workspace(api_key: str, host: str, session_id: str | None) -> str:
+@dataclass(frozen=True)
+class WorkspaceSelection:
+    workspace_id: str
+    source: str
+    workspaces: list
+
+
+def resolve_workspace_selection(
+    api_key: str, host: str, session_id: str | None
+) -> WorkspaceSelection:
     explicit = explicit_workspace_id()
     if explicit:
-        return explicit
+        return WorkspaceSelection(
+            workspace_id=explicit,
+            source="explicit_env",
+            workspaces=[],
+        )
     workspaces = list_workspaces(api_key, host, session_id)
     if not workspaces:
         raise RuntimeError("No Hotdata workspaces found for this API key.")
     active = [w for w in workspaces if w.active]
     chosen = active[0] if active else workspaces[0]
-    return chosen.public_id
+    return WorkspaceSelection(
+        workspace_id=chosen.public_id,
+        source="active" if active else "first",
+        workspaces=workspaces,
+    )
+
+
+def pick_workspace(api_key: str, host: str, session_id: str | None) -> str:
+    selection = resolve_workspace_selection(api_key, host, session_id)
+    return selection.workspace_id
diff --git a/tests/test_client.py b/tests/test_client.py
@@ -6,7 +6,7 @@
 
 import pytest
 
-from hotdata_runtime.env import normalize_host, pick_workspace
+from hotdata_runtime.env import normalize_host, pick_workspace, resolve_workspace_selection
 from hotdata_runtime.client import HotdataClient
 
 
@@ -30,6 +30,20 @@ def test_pick_workspace_prefers_env(monkeypatch: pytest.MonkeyPatch):
     assert pick_workspace("k", "https://api.hotdata.dev", None) == "ws_explicit"
 
 
+def test_resolve_workspace_selection_prefers_env_without_listing(
+    monkeypatch: pytest.MonkeyPatch,
+):
+    monkeypatch.setenv("HOTDATA_WORKSPACE", "ws_explicit")
+    with patch("hotdata_runtime.env.list_workspaces") as listing:
+        resolved = resolve_workspace_selection(
+            "k", "https://api.hotdata.dev", None
+        )
+    listing.assert_not_called()
+    assert resolved.workspace_id == "ws_explicit"
+    assert resolved.source == "explicit_env"
+    assert resolved.workspaces == []
+
+
 def test_pick_workspace_prefers_workspace_id_env(monkeypatch: pytest.MonkeyPatch):
     monkeypatch.delenv("HOTDATA_WORKSPACE", raising=False)
     monkeypatch.setenv("HOTDATA_WORKSPACE_ID", "ws_from_id")
@@ -67,9 +81,106 @@ def test_pick_workspace_falls_back_to_first(monkeypatch: pytest.MonkeyPatch):
         assert pick_workspace("k", "https://api.hotdata.dev", None) == "ws_1"
 
 
+def test_resolve_workspace_selection_source_first(monkeypatch: pytest.MonkeyPatch):
+    monkeypatch.delenv("HOTDATA_WORKSPACE", raising=False)
+    monkeypatch.delenv("HOTDATA_WORKSPACE_ID", raising=False)
+    items = [
+        SimpleNamespace(public_id="ws_1", active=False),
+        SimpleNamespace(public_id="ws_2", active=False),
+    ]
+    listing = SimpleNamespace(workspaces=items)
+    with patch("hotdata_runtime.env.WorkspacesApi") as Api:
+        Api.return_value.list_workspaces.return_value = listing
+        resolved = resolve_workspace_selection(
+            "k", "https://api.hotdata.dev", None
+        )
+    assert resolved.workspace_id == "ws_1"
+    assert resolved.source == "first"
+    assert resolved.workspaces == items
+
+
+def test_resolve_workspace_selection_returns_workspaces_and_source(
+    monkeypatch: pytest.MonkeyPatch,
+):
+    monkeypatch.delenv("HOTDATA_WORKSPACE", raising=False)
+    monkeypatch.delenv("HOTDATA_WORKSPACE_ID", raising=False)
+
+    items = [
+        SimpleNamespace(public_id="ws_1", active=False),
+        SimpleNamespace(public_id="ws_2", active=True),
+    ]
+    listing = SimpleNamespace(workspaces=items)
+
+    with patch("hotdata_runtime.env.WorkspacesApi") as Api:
+        Api.return_value.list_workspaces.return_value = listing
+        resolved = resolve_workspace_selection(
+            "k", "https://api.hotdata.dev", None
+        )
+    assert resolved.workspace_id == "ws_2"
+    assert resolved.source == "active"
+    assert resolved.workspaces == items
+
+
 def test_list_qualified_table_names_passes_connection_id():
     client = HotdataClient("k", "ws", host="https://api.hotdata.dev")
     with patch.object(client, "iter_tables", return_value=iter([])) as it:
         client.list_qualified_table_names(limit=5, connection_id="conn_a")
     it.assert_called_once()
     assert it.call_args.kwargs["connection_id"] == "conn_a"
+
+
+def test_wait_result_ready_raises_on_cancelled():
+    client = HotdataClient("k", "ws", host="https://api.hotdata.dev")
+
+    class FakeResultsApi:
+        def get_result(self, result_id: str):
+            return SimpleNamespace(status="cancelled", error_message=None)
+
+    with patch.object(client, "_results_api", return_value=FakeResultsApi()):
+        with pytest.raises(RuntimeError, match="cancelled"):
+            client._wait_result_ready("res_1", timeout_s=0.1, interval_s=0)
+
+
+def test_connection_id_by_name_raises_on_duplicate_names():
+    client = HotdataClient("k", "ws", host="https://api.hotdata.dev")
+    listing = SimpleNamespace(
+        connections=[
+            SimpleNamespace(name="warehouse", id="conn_1"),
+            SimpleNamespace(name="warehouse", id="conn_2"),
+        ]
+    )
+
+    class FakeConnectionsApi:
+        def list_connections(self):
+            return listing
+
+    with patch.object(client, "connections", return_value=FakeConnectionsApi()):
+        with pytest.raises(RuntimeError, match="Duplicate connection names"):
+            client.connection_id_by_name()
+
+
+def test_columns_for_qualified_prefers_explicit_connection_id():
+    client = HotdataClient("k", "ws", host="https://api.hotdata.dev")
+    col = SimpleNamespace(name="a", data_type="INTEGER", nullable=True)
+    table = SimpleNamespace(columns=[col])
+    response = SimpleNamespace(tables=[table])
+
+    class FakeInformationSchemaApi:
+        def __init__(self):
+            self.kwargs = None
+
+        def information_schema(self, **kwargs):
+            self.kwargs = kwargs
+            return response
+
+    fake_api = FakeInformationSchemaApi()
+    with patch.object(client, "_information_schema", return_value=fake_api), patch.object(
+        client, "connection_id_by_name"
+    ) as id_map:
+        cols = client.columns_for_qualified(
+            "warehouse.public.orders",
+            connection_id="conn_explicit",
+        )
+    id_map.assert_not_called()
+    assert cols == [col]
+    assert fake_api.kwargs["connection_id"] == "conn_explicit"
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,8 @@ @@
     Shared runtime primitives for Hotdata integrations: workspace/session semantics, execution context, query state, run history, and replayable result handles. Framework packages (Marimo, Jupyter, Streamlit, LangGraph) depend on this package.
+    Runtime boundary and guarantees are defined in `CONTRACT.md`.
     Install:
     ```bash
@@ Expand Down @@