Skip to content

Commit 985da29

Browse files
committed
feat: add LlamaIndex tools for Hotdata managed databases
0 parents  commit 985da29

12 files changed

Lines changed: 3099 additions & 0 deletions

File tree

.gitignore

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Python
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
*.so
6+
7+
# Virtual environments
8+
.env
9+
.venv
10+
env/
11+
venv/
12+
13+
# Testing
14+
.pytest_cache/
15+
.coverage
16+
htmlcov/
17+
18+
# Packaging
19+
*.egg-info/
20+
dist/
21+
build/
22+
23+
.DS_Store

README.md

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# hotdata-llamaindex
2+
3+
LlamaIndex tools for [Hotdata](https://hotdata.dev), built on **hotdata-runtime**.
4+
5+
## Features
6+
7+
- **SQL tool** — run workspace SQL and return JSON rows for agents
8+
- **Managed database tools** — list, create, and load parquet into Hotdata-owned catalogs (replaces legacy dataset uploads)
9+
10+
## Install
11+
12+
```bash
13+
pip install hotdata-llamaindex
14+
```
15+
16+
Requires `HOTDATA_API_KEY`. Optionally set `HOTDATA_WORKSPACE`, `HOTDATA_API_URL`, or `HOTDATA_SANDBOX`.
17+
18+
## Usage
19+
20+
```python
21+
import hotdata_llamaindex as hli
22+
23+
client = hli.from_env()
24+
tools = hli.make_hotdata_tools(client)
25+
26+
for tool in tools:
27+
print(tool.metadata.name, tool.metadata.description)
28+
```
29+
30+
Managed database example:
31+
32+
```python
33+
tools = {tool.metadata.name: tool for tool in hli.make_hotdata_tools(client)}
34+
35+
tools["hotdata_create_managed_database"].call(
36+
name="sales",
37+
schema_name="public",
38+
tables="orders",
39+
)
40+
41+
tools["hotdata_load_managed_table"].call(
42+
database="sales",
43+
table="orders",
44+
file="/path/to/orders.parquet",
45+
)
46+
```
47+
48+
## Examples
49+
50+
```bash
51+
uv run python examples/llamaindex_basic.py
52+
uv run python examples/llamaindex_managed_db.py
53+
```
54+
55+
## Development
56+
57+
```bash
58+
uv sync --locked
59+
uv run pytest
60+
```

examples/llamaindex_basic.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
"""Minimal LlamaIndex tool usage with hotdata-llamaindex."""
2+
3+
import hotdata_llamaindex as hli
4+
5+
6+
def main() -> None:
7+
client = hli.from_env()
8+
tools = hli.make_hotdata_tools(client)
9+
by_name = {tool.metadata.name: tool for tool in tools}
10+
11+
sql_tool = by_name["hotdata_execute_sql"]
12+
print(sql_tool.call(sql="SELECT 1 AS ok"))
13+
14+
list_tool = by_name["hotdata_list_managed_databases"]
15+
print(list_tool.call())
16+
17+
client.close()
18+
19+
20+
if __name__ == "__main__":
21+
main()

examples/llamaindex_managed_db.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""Managed database tools for LlamaIndex agents."""
2+
3+
import hotdata_llamaindex as hli
4+
5+
6+
def main() -> None:
7+
client = hli.from_env()
8+
tools = hli.make_hotdata_tools(client)
9+
by_name = {tool.metadata.name: tool for tool in tools}
10+
11+
create = by_name["hotdata_create_managed_database"]
12+
print(
13+
create.call(
14+
name="demo_sales",
15+
schema_name="public",
16+
tables="orders\ncustomers",
17+
)
18+
)
19+
20+
load = by_name["hotdata_load_managed_table"]
21+
print(
22+
load.call(
23+
database="demo_sales",
24+
table="orders",
25+
file="/path/to/orders.parquet",
26+
schema_name="public",
27+
)
28+
)
29+
30+
client.close()
31+
32+
33+
if __name__ == "__main__":
34+
main()

hotdata_llamaindex/__init__.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
"""LlamaIndex tools for Hotdata runtime."""
2+
3+
from importlib.metadata import PackageNotFoundError, version
4+
5+
try:
6+
__version__ = version("hotdata-llamaindex")
7+
except PackageNotFoundError:
8+
__version__ = "0.0.0+unknown"
9+
10+
from hotdata_runtime import HotdataClient, QueryResult, from_env
11+
from hotdata_llamaindex.databases import (
12+
create_managed_database,
13+
list_managed_databases_json,
14+
load_managed_table,
15+
load_result_summary,
16+
managed_database_summary,
17+
)
18+
from hotdata_llamaindex.tools import (
19+
execute_sql_json,
20+
make_hotdata_tools,
21+
result_rows_for_llm,
22+
)
23+
24+
__all__ = [
25+
"__version__",
26+
"HotdataClient",
27+
"QueryResult",
28+
"create_managed_database",
29+
"execute_sql_json",
30+
"from_env",
31+
"list_managed_databases_json",
32+
"load_managed_table",
33+
"load_result_summary",
34+
"make_hotdata_tools",
35+
"managed_database_summary",
36+
"result_rows_for_llm",
37+
]

hotdata_llamaindex/databases.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""Managed database helpers for LlamaIndex agents."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
from typing import Any
7+
8+
from hotdata_runtime import (
9+
DEFAULT_SCHEMA,
10+
HotdataClient,
11+
LoadManagedTableResult,
12+
ManagedDatabase,
13+
)
14+
15+
16+
def list_managed_databases_json(client: HotdataClient) -> str:
17+
rows = [
18+
{
19+
"name": db.name,
20+
"id": db.id,
21+
"sql_prefix": f"{db.name}.{{schema}}.{{table}}",
22+
}
23+
for db in client.list_managed_databases()
24+
]
25+
return json.dumps(rows, indent=2)
26+
27+
28+
def create_managed_database(
29+
client: HotdataClient,
30+
*,
31+
name: str,
32+
schema: str = DEFAULT_SCHEMA,
33+
tables: list[str] | None = None,
34+
) -> ManagedDatabase:
35+
return client.create_managed_database(name, schema=schema, tables=tables)
36+
37+
38+
def load_managed_table(
39+
client: HotdataClient,
40+
*,
41+
database: str,
42+
table: str,
43+
file: str,
44+
schema: str = DEFAULT_SCHEMA,
45+
) -> LoadManagedTableResult:
46+
return client.load_managed_table(database, table, schema=schema, file=file)
47+
48+
49+
def managed_database_summary(db: ManagedDatabase) -> dict[str, str]:
50+
return {"id": db.id, "name": db.name, "source_type": db.source_type}
51+
52+
53+
def load_result_summary(result: LoadManagedTableResult) -> dict[str, Any]:
54+
return {
55+
"connection_id": result.connection_id,
56+
"schema_name": result.schema_name,
57+
"table_name": result.table_name,
58+
"row_count": result.row_count,
59+
"full_name": result.full_name,
60+
}

hotdata_llamaindex/tools.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
"""LlamaIndex tools built on hotdata-runtime."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
from typing import Any
7+
8+
from llama_index.core.tools import FunctionTool
9+
10+
from hotdata_runtime import DEFAULT_SCHEMA, HotdataClient, QueryResult
11+
12+
from hotdata_llamaindex.databases import (
13+
create_managed_database,
14+
list_managed_databases_json,
15+
load_managed_table,
16+
load_result_summary,
17+
managed_database_summary,
18+
)
19+
20+
21+
def result_rows_for_llm(result: QueryResult, *, max_rows: int = 20) -> list[dict[str, Any]]:
22+
return result.to_records(max_rows=max_rows)
23+
24+
25+
def execute_sql_json(client: HotdataClient, sql: str, *, max_rows: int = 100) -> str:
26+
result = client.execute_sql(sql)
27+
payload = {
28+
"metadata": result.metadata_dict(),
29+
"rows": result.to_records(max_rows=max_rows),
30+
}
31+
return json.dumps(payload, indent=2)
32+
33+
34+
def make_hotdata_tools(
35+
client: HotdataClient,
36+
*,
37+
max_rows: int = 100,
38+
) -> list[FunctionTool]:
39+
"""Return LlamaIndex tools for SQL and managed database workflows."""
40+
41+
def hotdata_execute_sql(sql: str) -> str:
42+
"""Run SQL against the Hotdata workspace and return JSON rows."""
43+
return execute_sql_json(client, sql, max_rows=max_rows)
44+
45+
def hotdata_list_managed_databases() -> str:
46+
"""List Hotdata-managed databases in the workspace."""
47+
return list_managed_databases_json(client)
48+
49+
def hotdata_create_managed_database(
50+
name: str,
51+
schema_name: str = DEFAULT_SCHEMA,
52+
tables: str = "",
53+
) -> str:
54+
"""Create a Hotdata-managed database and optionally declare tables (one per line)."""
55+
table_names = [line.strip() for line in tables.splitlines() if line.strip()]
56+
db = create_managed_database(
57+
client,
58+
name=name,
59+
schema=schema_name or DEFAULT_SCHEMA,
60+
tables=table_names or None,
61+
)
62+
return json.dumps(managed_database_summary(db), indent=2)
63+
64+
def hotdata_load_managed_table(
65+
database: str,
66+
table: str,
67+
file: str,
68+
schema_name: str = DEFAULT_SCHEMA,
69+
) -> str:
70+
"""Load a local parquet file into a declared managed table."""
71+
loaded = load_managed_table(
72+
client,
73+
database=database,
74+
table=table,
75+
file=file,
76+
schema=schema_name or DEFAULT_SCHEMA,
77+
)
78+
return json.dumps(load_result_summary(loaded), indent=2)
79+
80+
return [
81+
FunctionTool.from_defaults(
82+
fn=hotdata_execute_sql,
83+
name="hotdata_execute_sql",
84+
),
85+
FunctionTool.from_defaults(
86+
fn=hotdata_list_managed_databases,
87+
name="hotdata_list_managed_databases",
88+
),
89+
FunctionTool.from_defaults(
90+
fn=hotdata_create_managed_database,
91+
name="hotdata_create_managed_database",
92+
),
93+
FunctionTool.from_defaults(
94+
fn=hotdata_load_managed_table,
95+
name="hotdata_load_managed_table",
96+
),
97+
]

pyproject.toml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
[build-system]
2+
requires = ["hatchling"]
3+
build-backend = "hatchling.build"
4+
5+
[project]
6+
name = "hotdata-llamaindex"
7+
version = "0.1.0"
8+
description = "LlamaIndex tools for Hotdata runtime"
9+
readme = "README.md"
10+
requires-python = ">=3.10"
11+
license = { text = "MIT" }
12+
dependencies = [
13+
"hotdata-runtime>=0.1.1",
14+
"llama-index-core>=0.12.0",
15+
]
16+
17+
[dependency-groups]
18+
dev = [
19+
"pytest>=8.0",
20+
]
21+
22+
[tool.uv]
23+
default-groups = ["dev"]
24+
25+
[tool.hatch.build.targets.wheel]
26+
packages = ["hotdata_llamaindex"]
27+
28+
[tool.pytest.ini_options]
29+
testpaths = ["tests"]

tests/conftest.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from __future__ import annotations
2+
3+
from unittest.mock import MagicMock
4+
5+
import pytest
6+
7+
from hotdata_runtime import QueryResult
8+
9+
10+
@pytest.fixture
11+
def sample_result() -> QueryResult:
12+
return QueryResult(
13+
columns=["n"],
14+
rows=[[1], [2]],
15+
row_count=2,
16+
result_id="res_1",
17+
query_run_id="run_1",
18+
execution_time_ms=12,
19+
warning=None,
20+
error_message=None,
21+
)
22+
23+
24+
@pytest.fixture
25+
def mock_client(sample_result: QueryResult):
26+
client = MagicMock()
27+
client.workspace_id = "ws_test"
28+
client.execute_sql = MagicMock(return_value=sample_result)
29+
client.list_managed_databases = MagicMock(return_value=[])
30+
return client

0 commit comments

Comments
 (0)