|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import time |
| 4 | +from typing import Any, Iterator |
| 5 | + |
| 6 | +from hotdata import ApiClient, Configuration |
| 7 | +from hotdata.api.connections_api import ConnectionsApi |
| 8 | +from hotdata.api.information_schema_api import InformationSchemaApi |
| 9 | +from hotdata.api.query_api import QueryApi |
| 10 | +from hotdata.api.query_runs_api import QueryRunsApi |
| 11 | +from hotdata.api.results_api import ResultsApi |
| 12 | +from hotdata.exceptions import ApiException |
| 13 | +from hotdata.models.async_query_response import AsyncQueryResponse |
| 14 | +from hotdata.models.query_request import QueryRequest |
| 15 | +from hotdata.models.query_response import QueryResponse |
| 16 | +from hotdata.models.table_info import TableInfo |
| 17 | + |
| 18 | +from hotdata_core_notebook.env import ( |
| 19 | + default_api_key, |
| 20 | + default_host, |
| 21 | + default_session_id, |
| 22 | + normalize_host, |
| 23 | + pick_workspace, |
| 24 | +) |
| 25 | +from hotdata_core_notebook.result import QueryResult |
| 26 | + |
| 27 | +_TERMINAL = frozenset({"succeeded", "failed", "cancelled"}) |
| 28 | + |
| 29 | + |
| 30 | +class HotdataClient: |
| 31 | + """Thin wrapper around the Hotdata Python SDK with query polling helpers.""" |
| 32 | + |
| 33 | + def __init__( |
| 34 | + self, |
| 35 | + api_key: str, |
| 36 | + workspace_id: str, |
| 37 | + *, |
| 38 | + host: str | None = None, |
| 39 | + session_id: str | None = None, |
| 40 | + ) -> None: |
| 41 | + self._host = normalize_host(host) if host else default_host() |
| 42 | + self._api_key = api_key |
| 43 | + self._workspace_id = workspace_id |
| 44 | + self._session_id = session_id |
| 45 | + self._config = Configuration( |
| 46 | + host=self._host, |
| 47 | + api_key=api_key, |
| 48 | + workspace_id=workspace_id, |
| 49 | + session_id=session_id, |
| 50 | + ) |
| 51 | + self._api = ApiClient(self._config) |
| 52 | + |
| 53 | + @classmethod |
| 54 | + def from_env(cls) -> HotdataClient: |
| 55 | + api_key = default_api_key() |
| 56 | + if not api_key: |
| 57 | + raise RuntimeError( |
| 58 | + "HOTDATA_API_KEY or HOTDATA_TOKEN must be set." |
| 59 | + ) |
| 60 | + host = default_host() |
| 61 | + session = default_session_id() |
| 62 | + workspace_id = pick_workspace(api_key, host, session) |
| 63 | + return cls(api_key, workspace_id, host=host, session_id=session) |
| 64 | + |
| 65 | + @property |
| 66 | + def workspace_id(self) -> str: |
| 67 | + return self._workspace_id |
| 68 | + |
| 69 | + @property |
| 70 | + def host(self) -> str: |
| 71 | + return self._host |
| 72 | + |
| 73 | + @property |
| 74 | + def session_id(self) -> str | None: |
| 75 | + return self._session_id |
| 76 | + |
| 77 | + @property |
| 78 | + def api(self) -> ApiClient: |
| 79 | + return self._api |
| 80 | + |
| 81 | + def close(self) -> None: |
| 82 | + self._api.close() |
| 83 | + |
| 84 | + def __enter__(self) -> HotdataClient: |
| 85 | + return self |
| 86 | + |
| 87 | + def __exit__(self, *args: object) -> None: |
| 88 | + self.close() |
| 89 | + |
| 90 | + def connections(self) -> ConnectionsApi: |
| 91 | + return ConnectionsApi(self._api) |
| 92 | + |
| 93 | + def _information_schema(self) -> InformationSchemaApi: |
| 94 | + return InformationSchemaApi(self._api) |
| 95 | + |
| 96 | + def _query_api(self) -> QueryApi: |
| 97 | + return QueryApi(self._api) |
| 98 | + |
| 99 | + def _query_runs_api(self) -> QueryRunsApi: |
| 100 | + return QueryRunsApi(self._api) |
| 101 | + |
| 102 | + def _results_api(self) -> ResultsApi: |
| 103 | + return ResultsApi(self._api) |
| 104 | + |
| 105 | + def query_runs(self) -> QueryRunsApi: |
| 106 | + return self._query_runs_api() |
| 107 | + |
| 108 | + def results(self) -> ResultsApi: |
| 109 | + return self._results_api() |
| 110 | + |
| 111 | + def iter_tables( |
| 112 | + self, |
| 113 | + *, |
| 114 | + connection_id: str | None = None, |
| 115 | + include_columns: bool = False, |
| 116 | + page_size: int = 200, |
| 117 | + ) -> Iterator[TableInfo]: |
| 118 | + cursor: str | None = None |
| 119 | + while True: |
| 120 | + resp = self._information_schema().information_schema( |
| 121 | + connection_id=connection_id, |
| 122 | + include_columns=include_columns, |
| 123 | + limit=page_size, |
| 124 | + cursor=cursor, |
| 125 | + ) |
| 126 | + yield from resp.tables |
| 127 | + if not resp.has_more or not resp.next_cursor: |
| 128 | + break |
| 129 | + cursor = resp.next_cursor |
| 130 | + |
| 131 | + def qualified_table_name(self, t: TableInfo) -> str: |
| 132 | + return f"{t.connection}.{t.var_schema}.{t.table}" |
| 133 | + |
| 134 | + def list_qualified_table_names( |
| 135 | + self, *, limit: int = 5000, connection_id: str | None = None |
| 136 | + ) -> list[str]: |
| 137 | + out: list[str] = [] |
| 138 | + for t in self.iter_tables(connection_id=connection_id): |
| 139 | + out.append(self.qualified_table_name(t)) |
| 140 | + if len(out) >= limit: |
| 141 | + break |
| 142 | + return sorted(out) |
| 143 | + |
| 144 | + def connection_id_by_name(self) -> dict[str, str]: |
| 145 | + listing = self.connections().list_connections() |
| 146 | + return {c.name: c.id for c in listing.connections} |
| 147 | + |
| 148 | + def columns_for_qualified(self, qualified: str) -> list[TableInfo]: |
| 149 | + parts = qualified.split(".") |
| 150 | + if len(parts) < 3: |
| 151 | + raise ValueError( |
| 152 | + f"Expected connection.schema.table, got {qualified!r}" |
| 153 | + ) |
| 154 | + conn_name, schema_name, table_name = ( |
| 155 | + parts[0], |
| 156 | + parts[1], |
| 157 | + ".".join(parts[2:]), |
| 158 | + ) |
| 159 | + id_map = self.connection_id_by_name() |
| 160 | + conn_id = id_map.get(conn_name) |
| 161 | + if not conn_id: |
| 162 | + raise KeyError(f"Unknown connection {conn_name!r}") |
| 163 | + resp = self._information_schema().information_schema( |
| 164 | + connection_id=conn_id, |
| 165 | + var_schema=schema_name, |
| 166 | + table=table_name, |
| 167 | + include_columns=True, |
| 168 | + limit=10, |
| 169 | + ) |
| 170 | + if not resp.tables: |
| 171 | + return [] |
| 172 | + first = resp.tables[0] |
| 173 | + return first.columns or [] |
| 174 | + |
| 175 | + def _poll_query_run( |
| 176 | + self, |
| 177 | + query_run_id: str, |
| 178 | + *, |
| 179 | + timeout_s: float = 300.0, |
| 180 | + interval_s: float = 0.5, |
| 181 | + ): |
| 182 | + runs = self._query_runs_api() |
| 183 | + deadline = time.monotonic() + timeout_s |
| 184 | + last = None |
| 185 | + while time.monotonic() < deadline: |
| 186 | + last = runs.get_query_run(query_run_id) |
| 187 | + if last.status in _TERMINAL: |
| 188 | + return last |
| 189 | + time.sleep(interval_s) |
| 190 | + raise TimeoutError( |
| 191 | + f"Query run {query_run_id} did not finish within {timeout_s}s " |
| 192 | + f"(last status: {getattr(last, 'status', None)})" |
| 193 | + ) |
| 194 | + |
| 195 | + def _wait_result_ready( |
| 196 | + self, |
| 197 | + result_id: str, |
| 198 | + *, |
| 199 | + timeout_s: float = 300.0, |
| 200 | + interval_s: float = 0.5, |
| 201 | + ): |
| 202 | + results = self._results_api() |
| 203 | + deadline = time.monotonic() + timeout_s |
| 204 | + last = None |
| 205 | + while time.monotonic() < deadline: |
| 206 | + last = results.get_result(result_id) |
| 207 | + if last.status == "ready": |
| 208 | + return last |
| 209 | + if last.status == "failed": |
| 210 | + raise RuntimeError( |
| 211 | + last.error_message or "Result persistence failed" |
| 212 | + ) |
| 213 | + time.sleep(interval_s) |
| 214 | + raise TimeoutError( |
| 215 | + f"Result {result_id} not ready within {timeout_s}s " |
| 216 | + f"(last status: {getattr(last, 'status', None)})" |
| 217 | + ) |
| 218 | + |
| 219 | + def execute_sql(self, sql: str) -> QueryResult: |
| 220 | + q = self._query_api() |
| 221 | + try: |
| 222 | + raw = q.query(QueryRequest(sql=sql)) |
| 223 | + except ApiException as e: |
| 224 | + raise RuntimeError(e.reason or str(e)) from e |
| 225 | + |
| 226 | + if isinstance(raw, AsyncQueryResponse): |
| 227 | + run = self._poll_query_run(raw.query_run_id) |
| 228 | + if run.status != "succeeded": |
| 229 | + raise RuntimeError( |
| 230 | + run.error_message or f"Query failed ({run.status})" |
| 231 | + ) |
| 232 | + if run.result_id: |
| 233 | + persisted = self._wait_result_ready(run.result_id) |
| 234 | + return QueryResult.from_get_result(persisted) |
| 235 | + raise RuntimeError("Query succeeded but no result_id was returned.") |
| 236 | + |
| 237 | + if isinstance(raw, QueryResponse): |
| 238 | + return QueryResult.from_query_response(raw) |
| 239 | + |
| 240 | + raise RuntimeError(f"Unexpected query response type: {type(raw)!r}") |
| 241 | + |
| 242 | + def get_result(self, result_id: str) -> QueryResult: |
| 243 | + r = self._results_api().get_result(result_id) |
| 244 | + if r.status != "ready": |
| 245 | + r = self._wait_result_ready(result_id) |
| 246 | + return QueryResult.from_get_result(r) |
| 247 | + |
| 248 | + |
| 249 | +def from_env() -> HotdataClient: |
| 250 | + return HotdataClient.from_env() |
0 commit comments