|
| 1 | +# Text-to-SQL query adapter |
| 2 | + |
| 3 | +## Install |
| 4 | + |
| 5 | +```shell |
| 6 | +uv pip install 'cratedb-toolkit[llm]' |
| 7 | +``` |
| 8 | + |
| 9 | +## Usage |
| 10 | + |
| 11 | +### CLI |
| 12 | + |
| 13 | +```shell |
| 14 | +export CRATEDB_CLUSTER_URL=crate://localhost/ |
| 15 | +export LLM_PROVIDER=openai |
| 16 | +export OPENAI_API_KEY=<YOUR_OPENAI_API_KEY> |
| 17 | +``` |
| 18 | + |
| 19 | +### API |
| 20 | + |
| 21 | +```python |
| 22 | +import sqlalchemy as sa |
| 23 | +from cratedb_toolkit.query.llm.api import DataQuery |
| 24 | +from cratedb_toolkit.query.llm.model import DatabaseInfo, ModelInfo, ModelProvider |
| 25 | + |
| 26 | +engine = sa.create_engine("crate://") |
| 27 | +schema = "doc" |
| 28 | + |
| 29 | +# Use Open AI GPT-4. |
| 30 | +dq = DataQuery( |
| 31 | + db=DatabaseInfo(engine=engine, schema=schema), |
| 32 | + model=ModelInfo(provider=ModelProvider.OPENAI, completion="gpt-4.1", embedding="text-embedding-3-large"), |
| 33 | +) |
| 34 | + |
| 35 | +# Use Gemma3 via Ollama. |
| 36 | +dq = DataQuery( |
| 37 | + db=DatabaseInfo(engine=engine, schema=schema), |
| 38 | + model=ModelInfo(provider=ModelProvider.OLLAMA, completion="gemma3:1b", embedding="local"), |
| 39 | +) |
| 40 | +``` |
| 41 | + |
| 42 | +## Example |
| 43 | + |
| 44 | +```sql |
| 45 | +CREATE TABLE IF NOT EXISTS time_series_data ( |
| 46 | + timestamp TIMESTAMP, |
| 47 | + value DOUBLE, |
| 48 | + location STRING, |
| 49 | + sensor_id INT |
| 50 | +); |
| 51 | + |
| 52 | +INSERT INTO time_series_data (timestamp, value, location, sensor_id) |
| 53 | +VALUES |
| 54 | + ('2023-09-14T00:00:00', 10.5, 'Sensor A', 1), |
| 55 | + ('2023-09-14T01:00:00', 15.2, 'Sensor A', 1), |
| 56 | + ('2023-09-14T02:00:00', 18.9, 'Sensor A', 1), |
| 57 | + ('2023-09-14T03:00:00', 12.7, 'Sensor B', 2), |
| 58 | + ('2023-09-14T04:00:00', 17.3, 'Sensor B', 2), |
| 59 | + ('2023-09-14T05:00:00', 20.1, 'Sensor B', 2), |
| 60 | + ('2023-09-14T06:00:00', 22.5, 'Sensor A', 1), |
| 61 | + ('2023-09-14T07:00:00', 18.3, 'Sensor A', 1), |
| 62 | + ('2023-09-14T08:00:00', 16.8, 'Sensor A', 1), |
| 63 | + ('2023-09-14T09:00:00', 14.6, 'Sensor B', 2), |
| 64 | + ('2023-09-14T10:00:00', 13.2, 'Sensor B', 2), |
| 65 | + ('2023-09-14T11:00:00', 11.7, 'Sensor B', 2); |
| 66 | + |
| 67 | +REFRESH TABLE time_series_data; |
| 68 | +``` |
| 69 | + |
| 70 | +```shell |
| 71 | +ctk query llm "What is the average value for sensor 1?" |
| 72 | +``` |
| 73 | +```text |
| 74 | +Answer: The average value for sensor 1 is approximately 17.03. |
| 75 | +``` |
0 commit comments