Skip to content

Commit dcd4d31

Browse files
committed
Initial commit: hotdata-core-notebook package
Shared Hotdata client and domain types for notebook UIs.
0 parents  commit dcd4d31

8 files changed

Lines changed: 561 additions & 0 deletions

File tree

.gitignore

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Python
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
*.so
6+
7+
# Distribution / packaging
8+
.Python
9+
build/
10+
develop-eggs/
11+
dist/
12+
downloads/
13+
eggs/
14+
.eggs/
15+
lib/
16+
lib64/
17+
parts/
18+
sdist/
19+
var/
20+
wheels/
21+
*.egg-info/
22+
.installed.cfg
23+
*.egg
24+
25+
# Virtual environments
26+
.env
27+
.venv
28+
env/
29+
venv/
30+
ENV/
31+
32+
# Testing / coverage
33+
.tox/
34+
.nox/
35+
.coverage
36+
.coverage.*
37+
.cache
38+
.pytest_cache/
39+
htmlcov/
40+
41+
# Type checkers
42+
.mypy_cache/
43+
.pyright/
44+
.ruff_cache/
45+
46+
# IDEs
47+
.idea/
48+
.vscode/
49+
*.swp
50+
.DS_Store

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# hotdata-core-notebook
2+
3+
Shared **Hotdata** client and domain types for notebook UIs (Marimo, Jupyter, etc.). UI frameworks depend on this package; they do not belong here.
4+
5+
Install:
6+
7+
```bash
8+
pip install hotdata-core-notebook
9+
```
10+
11+
Development:
12+
13+
```bash
14+
pip install -e ".[dev]"
15+
pytest
16+
```

hotdata_core_notebook/__init__.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
"""Hotdata client and shared models for notebook integrations."""
2+
3+
from hotdata_core_notebook.client import HotdataClient, from_env
4+
from hotdata_core_notebook.env import (
5+
default_api_key,
6+
default_host,
7+
default_session_id,
8+
explicit_workspace_id,
9+
list_workspaces,
10+
normalize_host,
11+
pick_workspace,
12+
)
13+
from hotdata_core_notebook.result import QueryResult
14+
15+
__all__ = [
16+
"HotdataClient",
17+
"QueryResult",
18+
"default_api_key",
19+
"default_host",
20+
"default_session_id",
21+
"explicit_workspace_id",
22+
"from_env",
23+
"list_workspaces",
24+
"normalize_host",
25+
"pick_workspace",
26+
]

hotdata_core_notebook/client.py

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
from __future__ import annotations
2+
3+
import time
4+
from typing import Any, Iterator
5+
6+
from hotdata import ApiClient, Configuration
7+
from hotdata.api.connections_api import ConnectionsApi
8+
from hotdata.api.information_schema_api import InformationSchemaApi
9+
from hotdata.api.query_api import QueryApi
10+
from hotdata.api.query_runs_api import QueryRunsApi
11+
from hotdata.api.results_api import ResultsApi
12+
from hotdata.exceptions import ApiException
13+
from hotdata.models.async_query_response import AsyncQueryResponse
14+
from hotdata.models.query_request import QueryRequest
15+
from hotdata.models.query_response import QueryResponse
16+
from hotdata.models.table_info import TableInfo
17+
18+
from hotdata_core_notebook.env import (
19+
default_api_key,
20+
default_host,
21+
default_session_id,
22+
normalize_host,
23+
pick_workspace,
24+
)
25+
from hotdata_core_notebook.result import QueryResult
26+
27+
_TERMINAL = frozenset({"succeeded", "failed", "cancelled"})
28+
29+
30+
class HotdataClient:
31+
"""Thin wrapper around the Hotdata Python SDK with query polling helpers."""
32+
33+
def __init__(
34+
self,
35+
api_key: str,
36+
workspace_id: str,
37+
*,
38+
host: str | None = None,
39+
session_id: str | None = None,
40+
) -> None:
41+
self._host = normalize_host(host) if host else default_host()
42+
self._api_key = api_key
43+
self._workspace_id = workspace_id
44+
self._session_id = session_id
45+
self._config = Configuration(
46+
host=self._host,
47+
api_key=api_key,
48+
workspace_id=workspace_id,
49+
session_id=session_id,
50+
)
51+
self._api = ApiClient(self._config)
52+
53+
@classmethod
54+
def from_env(cls) -> HotdataClient:
55+
api_key = default_api_key()
56+
if not api_key:
57+
raise RuntimeError(
58+
"HOTDATA_API_KEY or HOTDATA_TOKEN must be set."
59+
)
60+
host = default_host()
61+
session = default_session_id()
62+
workspace_id = pick_workspace(api_key, host, session)
63+
return cls(api_key, workspace_id, host=host, session_id=session)
64+
65+
@property
66+
def workspace_id(self) -> str:
67+
return self._workspace_id
68+
69+
@property
70+
def host(self) -> str:
71+
return self._host
72+
73+
@property
74+
def session_id(self) -> str | None:
75+
return self._session_id
76+
77+
@property
78+
def api(self) -> ApiClient:
79+
return self._api
80+
81+
def close(self) -> None:
82+
self._api.close()
83+
84+
def __enter__(self) -> HotdataClient:
85+
return self
86+
87+
def __exit__(self, *args: object) -> None:
88+
self.close()
89+
90+
def connections(self) -> ConnectionsApi:
91+
return ConnectionsApi(self._api)
92+
93+
def _information_schema(self) -> InformationSchemaApi:
94+
return InformationSchemaApi(self._api)
95+
96+
def _query_api(self) -> QueryApi:
97+
return QueryApi(self._api)
98+
99+
def _query_runs_api(self) -> QueryRunsApi:
100+
return QueryRunsApi(self._api)
101+
102+
def _results_api(self) -> ResultsApi:
103+
return ResultsApi(self._api)
104+
105+
def query_runs(self) -> QueryRunsApi:
106+
return self._query_runs_api()
107+
108+
def results(self) -> ResultsApi:
109+
return self._results_api()
110+
111+
def iter_tables(
112+
self,
113+
*,
114+
connection_id: str | None = None,
115+
include_columns: bool = False,
116+
page_size: int = 200,
117+
) -> Iterator[TableInfo]:
118+
cursor: str | None = None
119+
while True:
120+
resp = self._information_schema().information_schema(
121+
connection_id=connection_id,
122+
include_columns=include_columns,
123+
limit=page_size,
124+
cursor=cursor,
125+
)
126+
yield from resp.tables
127+
if not resp.has_more or not resp.next_cursor:
128+
break
129+
cursor = resp.next_cursor
130+
131+
def qualified_table_name(self, t: TableInfo) -> str:
132+
return f"{t.connection}.{t.var_schema}.{t.table}"
133+
134+
def list_qualified_table_names(
135+
self, *, limit: int = 5000, connection_id: str | None = None
136+
) -> list[str]:
137+
out: list[str] = []
138+
for t in self.iter_tables(connection_id=connection_id):
139+
out.append(self.qualified_table_name(t))
140+
if len(out) >= limit:
141+
break
142+
return sorted(out)
143+
144+
def connection_id_by_name(self) -> dict[str, str]:
145+
listing = self.connections().list_connections()
146+
return {c.name: c.id for c in listing.connections}
147+
148+
def columns_for_qualified(self, qualified: str) -> list[TableInfo]:
149+
parts = qualified.split(".")
150+
if len(parts) < 3:
151+
raise ValueError(
152+
f"Expected connection.schema.table, got {qualified!r}"
153+
)
154+
conn_name, schema_name, table_name = (
155+
parts[0],
156+
parts[1],
157+
".".join(parts[2:]),
158+
)
159+
id_map = self.connection_id_by_name()
160+
conn_id = id_map.get(conn_name)
161+
if not conn_id:
162+
raise KeyError(f"Unknown connection {conn_name!r}")
163+
resp = self._information_schema().information_schema(
164+
connection_id=conn_id,
165+
var_schema=schema_name,
166+
table=table_name,
167+
include_columns=True,
168+
limit=10,
169+
)
170+
if not resp.tables:
171+
return []
172+
first = resp.tables[0]
173+
return first.columns or []
174+
175+
def _poll_query_run(
176+
self,
177+
query_run_id: str,
178+
*,
179+
timeout_s: float = 300.0,
180+
interval_s: float = 0.5,
181+
):
182+
runs = self._query_runs_api()
183+
deadline = time.monotonic() + timeout_s
184+
last = None
185+
while time.monotonic() < deadline:
186+
last = runs.get_query_run(query_run_id)
187+
if last.status in _TERMINAL:
188+
return last
189+
time.sleep(interval_s)
190+
raise TimeoutError(
191+
f"Query run {query_run_id} did not finish within {timeout_s}s "
192+
f"(last status: {getattr(last, 'status', None)})"
193+
)
194+
195+
def _wait_result_ready(
196+
self,
197+
result_id: str,
198+
*,
199+
timeout_s: float = 300.0,
200+
interval_s: float = 0.5,
201+
):
202+
results = self._results_api()
203+
deadline = time.monotonic() + timeout_s
204+
last = None
205+
while time.monotonic() < deadline:
206+
last = results.get_result(result_id)
207+
if last.status == "ready":
208+
return last
209+
if last.status == "failed":
210+
raise RuntimeError(
211+
last.error_message or "Result persistence failed"
212+
)
213+
time.sleep(interval_s)
214+
raise TimeoutError(
215+
f"Result {result_id} not ready within {timeout_s}s "
216+
f"(last status: {getattr(last, 'status', None)})"
217+
)
218+
219+
def execute_sql(self, sql: str) -> QueryResult:
220+
q = self._query_api()
221+
try:
222+
raw = q.query(QueryRequest(sql=sql))
223+
except ApiException as e:
224+
raise RuntimeError(e.reason or str(e)) from e
225+
226+
if isinstance(raw, AsyncQueryResponse):
227+
run = self._poll_query_run(raw.query_run_id)
228+
if run.status != "succeeded":
229+
raise RuntimeError(
230+
run.error_message or f"Query failed ({run.status})"
231+
)
232+
if run.result_id:
233+
persisted = self._wait_result_ready(run.result_id)
234+
return QueryResult.from_get_result(persisted)
235+
raise RuntimeError("Query succeeded but no result_id was returned.")
236+
237+
if isinstance(raw, QueryResponse):
238+
return QueryResult.from_query_response(raw)
239+
240+
raise RuntimeError(f"Unexpected query response type: {type(raw)!r}")
241+
242+
def get_result(self, result_id: str) -> QueryResult:
243+
r = self._results_api().get_result(result_id)
244+
if r.status != "ready":
245+
r = self._wait_result_ready(result_id)
246+
return QueryResult.from_get_result(r)
247+
248+
249+
def from_env() -> HotdataClient:
250+
return HotdataClient.from_env()

0 commit comments

Comments
 (0)