|
3 | 3 |
|
4 | 4 | import pytest |
5 | 5 |
|
| 6 | +from agentic_data_contracts.adapters.base import Column, TableSchema |
6 | 7 | from agentic_data_contracts.adapters.duckdb import DuckDBAdapter |
7 | 8 | from agentic_data_contracts.core.contract import DataContract |
8 | 9 | from agentic_data_contracts.semantic.yaml_source import YamlSource |
@@ -94,6 +95,98 @@ async def test_describe_table_without_adapter( |
94 | 95 | assert "unavailable" in text.lower() or "no database" in text.lower() |
95 | 96 |
|
96 | 97 |
|
| 98 | +@pytest.mark.asyncio |
| 99 | +async def test_describe_table_includes_semantic_descriptions( |
| 100 | + contract: DataContract, adapter: DuckDBAdapter, semantic: YamlSource |
| 101 | +) -> None: |
| 102 | + """Column descriptions from the semantic source must reach the agent.""" |
| 103 | + tools = create_tools(contract, adapter=adapter, semantic_source=semantic) |
| 104 | + tool = next(t for t in tools if t.name == "describe_table") |
| 105 | + result = await tool.callable({"schema": "analytics", "table": "orders"}) |
| 106 | + payload = json.loads(result["content"][0]["text"]) |
| 107 | + cols_by_name = {c["name"]: c for c in payload["columns"]} |
| 108 | + assert cols_by_name["amount"]["description"] == "Order total in USD" |
| 109 | + assert cols_by_name["tenant_id"]["description"] == ( |
| 110 | + "Tenant identifier for multi-tenancy" |
| 111 | + ) |
| 112 | + |
| 113 | + |
| 114 | +@pytest.mark.asyncio |
| 115 | +async def test_describe_table_falls_back_to_adapter_description( |
| 116 | + contract: DataContract, semantic: YamlSource |
| 117 | +) -> None: |
| 118 | + """When semantic source has no entry, adapter-supplied descriptions surface. |
| 119 | +
|
| 120 | + Mirrors deployments (e.g. Denodo) where the warehouse catalog already |
| 121 | + carries authored column comments and the adapter populates Column.description. |
| 122 | + """ |
| 123 | + |
| 124 | + class DescriptionAwareAdapter(DuckDBAdapter): |
| 125 | + def describe_table(self, schema: str, table: str) -> TableSchema: |
| 126 | + if (schema, table) == ("analytics", "subscriptions"): |
| 127 | + return TableSchema( |
| 128 | + columns=[ |
| 129 | + Column(name="id", type="INTEGER", description="Plan FK"), |
| 130 | + Column( |
| 131 | + name="plan", |
| 132 | + type="VARCHAR", |
| 133 | + description="Subscription tier from billing system", |
| 134 | + ), |
| 135 | + Column(name="tenant_id", type="VARCHAR"), |
| 136 | + ] |
| 137 | + ) |
| 138 | + return super().describe_table(schema, table) |
| 139 | + |
| 140 | + desc_adapter = DescriptionAwareAdapter(":memory:") |
| 141 | + desc_adapter.connection.execute( |
| 142 | + "CREATE SCHEMA analytics;" |
| 143 | + "CREATE TABLE analytics.subscriptions (" |
| 144 | + "id INTEGER, plan VARCHAR, tenant_id VARCHAR);" |
| 145 | + ) |
| 146 | + tools = create_tools(contract, adapter=desc_adapter, semantic_source=semantic) |
| 147 | + tool = next(t for t in tools if t.name == "describe_table") |
| 148 | + result = await tool.callable({"schema": "analytics", "table": "subscriptions"}) |
| 149 | + payload = json.loads(result["content"][0]["text"]) |
| 150 | + cols_by_name = {c["name"]: c for c in payload["columns"]} |
| 151 | + assert ( |
| 152 | + cols_by_name["plan"]["description"] == "Subscription tier from billing system" |
| 153 | + ) |
| 154 | + # No description anywhere → field omitted to keep responses tight. |
| 155 | + assert "description" not in cols_by_name["tenant_id"] |
| 156 | + |
| 157 | + |
| 158 | +@pytest.mark.asyncio |
| 159 | +async def test_describe_table_semantic_overrides_adapter_description( |
| 160 | + contract: DataContract, semantic: YamlSource |
| 161 | +) -> None: |
| 162 | + """Authored semantic-source descriptions win over adapter catalog comments.""" |
| 163 | + |
| 164 | + class CompetingAdapter(DuckDBAdapter): |
| 165 | + def describe_table(self, schema: str, table: str) -> TableSchema: |
| 166 | + return TableSchema( |
| 167 | + columns=[ |
| 168 | + Column( |
| 169 | + name="status", |
| 170 | + type="VARCHAR", |
| 171 | + description="catalog-side stale description", |
| 172 | + ), |
| 173 | + ] |
| 174 | + ) |
| 175 | + |
| 176 | + competing = CompetingAdapter(":memory:") |
| 177 | + competing.connection.execute( |
| 178 | + "CREATE SCHEMA analytics; CREATE TABLE analytics.orders (status VARCHAR);" |
| 179 | + ) |
| 180 | + tools = create_tools(contract, adapter=competing, semantic_source=semantic) |
| 181 | + tool = next(t for t in tools if t.name == "describe_table") |
| 182 | + result = await tool.callable({"schema": "analytics", "table": "orders"}) |
| 183 | + payload = json.loads(result["content"][0]["text"]) |
| 184 | + cols_by_name = {c["name"]: c for c in payload["columns"]} |
| 185 | + assert cols_by_name["status"]["description"] == ( |
| 186 | + "Order status: pending, completed, cancelled" |
| 187 | + ) |
| 188 | + |
| 189 | + |
97 | 190 | @pytest.mark.asyncio |
98 | 191 | async def test_run_query_valid( |
99 | 192 | contract: DataContract, adapter: DuckDBAdapter, semantic: YamlSource |
|
0 commit comments