@@ -610,12 +610,16 @@ async def execute_tool(
610610 if tool_name == "search_datasets" :
611611 query = arguments .get ("query" , "" )
612612 limit = arguments .get ("limit" , 20 )
613- datasets = await self .search_datasets (query , limit )
613+ datasets , total = await self ._search_datasets_with_count (
614+ query , limit
615+ )
614616 return ToolResult (
615617 content = [
616618 {
617619 "type" : "text" ,
618- "text" : self ._format_search_results (datasets ),
620+ "text" : self ._format_search_results (
621+ datasets , total = total , limit = limit
622+ ),
619623 }
620624 ],
621625 success = True ,
@@ -712,7 +716,10 @@ async def execute_tool(
712716 # Format SQL results
713717 records = result .get ("records" , [])
714718 fields = result .get ("fields" , [])
715- formatted_text = self ._format_sql_results (records , fields )
719+ effective_limit = result .get ("effective_limit" )
720+ formatted_text = self ._format_sql_results (
721+ records , fields , effective_limit = effective_limit
722+ )
716723 return ToolResult (
717724 content = [{"type" : "text" , "text" : formatted_text }],
718725 success = True ,
@@ -790,7 +797,9 @@ async def execute_tool(
790797 error_message = result .get ("message" , "Aggregation failed" ),
791798 )
792799 formatted = self ._format_sql_results (
793- result .get ("records" , []), result .get ("fields" , [])
800+ result .get ("records" , []),
801+ result .get ("fields" , []),
802+ effective_limit = result .get ("effective_limit" ),
794803 )
795804 return ToolResult (
796805 content = [{"type" : "text" , "text" : formatted }], success = True
@@ -821,12 +830,29 @@ async def search_datasets(
821830 limit: Maximum number of results
822831
823832 Returns:
824- List of dataset metadata dictionaries
833+ List of dataset metadata dictionaries (count-aware variant is
834+ ``_search_datasets_with_count``).
825835 """
836+ datasets , _count = await self ._search_datasets_with_count (query , limit )
837+ return datasets
838+
839+ async def _search_datasets_with_count (
840+ self , query : str , limit : int = 20
841+ ) -> Tuple [List [Dict [str , Any ]], Optional [int ]]:
842+ """Same as search_datasets but also returns CKAN's `count` — the
843+ true number of datasets matching the query, regardless of the row
844+ cap. Lets the formatter say "20 of 47 matching datasets returned"
845+ instead of just "Found 20"."""
826846 response = await self ._call_ckan_api (
827847 "package_search" , {"q" : query , "rows" : limit }
828848 )
829- return response .get ("result" , {}).get ("results" , [])
849+ result = response .get ("result" , {})
850+ count_val = result .get ("count" )
851+ try :
852+ count = int (count_val ) if count_val is not None else None
853+ except (TypeError , ValueError ):
854+ count = None
855+ return result .get ("results" , []), count
830856
831857 async def get_dataset (self , dataset_id : str ) -> Dict [str , Any ]:
832858 """Get detailed metadata for a specific dataset.
@@ -1045,7 +1071,8 @@ async def execute_sql(self, sql: str) -> Dict[str, Any]:
10451071 sql: PostgreSQL SELECT statement
10461072
10471073 Returns:
1048- Dictionary with success flag, records, fields, or error message
1074+ Dictionary with success flag, records, fields, effective_limit,
1075+ or error message
10491076 """
10501077 # Validate SQL
10511078 is_valid , error = SQLValidator .validate_query (sql )
@@ -1054,6 +1081,7 @@ async def execute_sql(self, sql: str) -> Dict[str, Any]:
10541081
10551082 # Bound upstream scan cost: append LIMIT if the caller didn't set one.
10561083 sql = SQLValidator .enforce_row_limit (sql )
1084+ effective_limit = SQLValidator .extract_top_level_limit (sql )
10571085
10581086 # Log SQL execution (truncated for security)
10591087 logger .info ("Executing SQL" , extra = {"sql" : sql [:500 ]})
@@ -1070,6 +1098,7 @@ async def execute_sql(self, sql: str) -> Dict[str, Any]:
10701098 "success" : True ,
10711099 "records" : result .get ("result" , {}).get ("records" , []),
10721100 "fields" : result .get ("result" , {}).get ("fields" , []),
1101+ "effective_limit" : effective_limit ,
10731102 }
10741103 except Exception as e :
10751104 logger .error (f"SQL execution failed: { e } " , exc_info = True )
@@ -1404,7 +1433,12 @@ async def health_check(self) -> bool:
14041433 logger .error (f"Health check failed: { e } " )
14051434 return False
14061435
1407- def _format_search_results (self , datasets : List [Dict [str , Any ]]) -> str :
1436+ def _format_search_results (
1437+ self ,
1438+ datasets : List [Dict [str , Any ]],
1439+ total : Optional [int ] = None ,
1440+ limit : int = 20 ,
1441+ ) -> str :
14081442 """Format search results for user display."""
14091443 if not datasets :
14101444 return f"No datasets found in { self .plugin_config .city_name } 's open data portal."
@@ -1448,9 +1482,25 @@ def _format_search_results(self, datasets: List[Dict[str, Any]]) -> str:
14481482 ]
14491483 )
14501484
1451- lines .append (
1452- f"Found { len (datasets )} dataset(s) in { self .plugin_config .city_name } 's open data portal:\n "
1453- )
1485+ # Lead with the X-of-Y framing so the model can't mistake the
1486+ # results-shown count for the count of matching datasets.
1487+ n_returned = len (datasets )
1488+ if total is not None and total > n_returned :
1489+ lines .append (
1490+ f"{ n_returned } of { total } matching dataset(s) shown "
1491+ f"(limit={ limit } ; raise limit to see more) in "
1492+ f"{ self .plugin_config .city_name } 's open data portal:\n "
1493+ )
1494+ elif total is not None :
1495+ lines .append (
1496+ f"{ total } matching dataset(s) (full result, limit={ limit } ) "
1497+ f"in { self .plugin_config .city_name } 's open data portal:\n "
1498+ )
1499+ else :
1500+ lines .append (
1501+ f"{ n_returned } dataset(s) in "
1502+ f"{ self .plugin_config .city_name } 's open data portal:\n "
1503+ )
14541504
14551505 for i , dataset in enumerate (datasets , 1 ):
14561506 title = dataset .get ("title" , "Untitled" )
@@ -1604,22 +1654,11 @@ def _format_query_results(
16041654 lines .append (truncated_warning )
16051655 lines .append ("" )
16061656
1607- # Header line: prefer "true total" wording when known, since the
1608- # model has consistently been mistaking len(records) for the total.
1609- if total is not None and total != n_returned :
1610- lines .append (
1611- f"total_matching_rows: { total } (returned { n_returned } , "
1612- f"limit={ limit } )\n "
1613- )
1614- elif total is not None :
1615- lines .append (
1616- f"total_matching_rows: { total } (limit={ limit } )\n "
1617- )
1618- else :
1619- lines .append (
1620- f"returned_rows: { n_returned } (limit={ limit } , "
1621- "total unknown — see warning above if any)\n "
1622- )
1657+ # Header line: lead with the X-of-Y framing so the model can't
1658+ # mistake the rows-returned count for the answer to a counting
1659+ # question.
1660+ lines .append (self ._format_count_header (n_returned , limit , total ))
1661+ lines .append ("" )
16231662
16241663 # Show first few records as examples
16251664 for i , record in enumerate (records [:5 ], 1 ):
@@ -1639,6 +1678,31 @@ def _format_query_results(
16391678
16401679 return "\n " .join (lines )
16411680
1681+ @staticmethod
1682+ def _format_count_header (
1683+ n_returned : int ,
1684+ limit : int ,
1685+ total : Optional [int ],
1686+ unit : str = "rows" ,
1687+ ) -> str :
1688+ """One-line "X of Y" summary used at the top of every row-returning
1689+ response. The model has been mistaking returned-rows for true count;
1690+ this phrasing makes the partial/total distinction unambiguous."""
1691+ if total is not None and total > n_returned :
1692+ return (
1693+ f"{ n_returned } of { total } { unit } returned "
1694+ f"(limit={ limit } ; raise limit or use ckan__aggregate_data "
1695+ "for full count)."
1696+ )
1697+ if total is not None :
1698+ # total == n_returned, all rows returned
1699+ return f"{ total } { unit } returned (full result, limit={ limit } )."
1700+ # total unknown
1701+ return (
1702+ f"{ n_returned } { unit } returned (limit={ limit } , "
1703+ "true total unknown — see TRUNCATED warning above if any)."
1704+ )
1705+
16421706 def _format_truncation_block (
16431707 self ,
16441708 n_returned : int ,
@@ -1747,20 +1811,7 @@ def _format_search_and_query(
17471811 resource_name = resource .get ("name" , "Unnamed" )
17481812 n_returned = len (records )
17491813
1750- # Total line: prefer "true total" so the model can't read the
1751- # returned-rows count as the answer to a counting question.
1752- if total is not None and total != n_returned :
1753- count_line = (
1754- f"total_matching_rows: { total } "
1755- f"(returned { n_returned } , limit={ limit } )"
1756- )
1757- elif total is not None :
1758- count_line = f"total_matching_rows: { total } (limit={ limit } )"
1759- else :
1760- count_line = (
1761- f"returned_rows: { n_returned } "
1762- f"(limit={ limit } , total unknown)"
1763- )
1814+ count_line = self ._format_count_header (n_returned , limit , total )
17641815
17651816 lines : List [str ] = []
17661817 truncated_warning = self ._format_truncation_block (
@@ -1789,11 +1840,13 @@ def _format_search_and_query(
17891840 "dataset/resource (see alternates below)."
17901841 )
17911842 else :
1792- preview_caption = (
1793- f"Showing first 5 of { n_returned } returned"
1794- + (f" (true total: { total } )" if total is not None else "" )
1795- + ":"
1796- )
1843+ if total is not None and total > n_returned :
1844+ preview_caption = (
1845+ f"Showing first 5 of { n_returned } returned "
1846+ f"(true total: { total } ):"
1847+ )
1848+ else :
1849+ preview_caption = f"Showing first 5 of { n_returned } returned:"
17971850 lines .append (preview_caption )
17981851 for i , record in enumerate (records [:5 ], 1 ):
17991852 lines .append (f"Record { i } :" )
@@ -1858,21 +1911,59 @@ def _format_search_and_query(
18581911 return "\n " .join (lines )
18591912
18601913 def _format_sql_results (
1861- self , records : List [Dict [str , Any ]], fields : List [Dict [str , Any ]]
1914+ self ,
1915+ records : List [Dict [str , Any ]],
1916+ fields : List [Dict [str , Any ]],
1917+ effective_limit : Optional [int ] = None ,
18621918 ) -> str :
18631919 """Format SQL query results for user display.
18641920
18651921 Args:
18661922 records: List of record dictionaries
18671923 fields: List of field metadata dictionaries
1924+ effective_limit: The LIMIT clause that was actually executed —
1925+ either user-supplied or the enforced default. Used to
1926+ detect truncation: if len(records) >= effective_limit, the
1927+ result was almost certainly capped.
18681928
18691929 Returns:
18701930 Formatted string representation of results
18711931 """
1932+ n_returned = len (records )
1933+
1934+ # Heuristic truncation detection — datastore_search_sql doesn't
1935+ # return a "total"; the only signal is "did we hit our LIMIT?"
1936+ truncation_block = ""
1937+ if effective_limit is not None and n_returned >= effective_limit :
1938+ truncation_block = (
1939+ "=== MAY BE TRUNCATED ===\n "
1940+ f"This SQL returned exactly the LIMIT ({ effective_limit } ) "
1941+ "rows. The true total could not be determined from "
1942+ "datastore_search_sql alone. For counting questions, do "
1943+ f"NOT report { n_returned } as the answer — instead run a "
1944+ "separate SELECT COUNT(*) with the same WHERE clause, or "
1945+ "use ckan__aggregate_data with metrics="
1946+ '{"count": "count(*)"}.\n '
1947+ "========================"
1948+ )
1949+
18721950 if not records :
1873- return "No records found matching the SQL query."
1951+ text = "No records found matching the SQL query."
1952+ return f"{ truncation_block } \n \n { text } " if truncation_block else text
1953+
1954+ lines : List [str ] = []
1955+ if truncation_block :
1956+ lines .append (truncation_block )
1957+ lines .append ("" )
18741958
1875- lines = [f"SQL Query Results: { len (records )} record(s)\n " ]
1959+ # Header — total is unknown for raw SQL, so show "X rows returned".
1960+ if effective_limit is not None :
1961+ lines .append (
1962+ f"{ n_returned } rows returned (limit={ effective_limit } , "
1963+ "true total unknown — see warning above if any).\n "
1964+ )
1965+ else :
1966+ lines .append (f"{ n_returned } rows returned.\n " )
18761967
18771968 # Show field names if available
18781969 if fields :
@@ -1887,7 +1978,7 @@ def _format_sql_results(
18871978 lines .append (f" { key } : { value } " )
18881979 lines .append ("" )
18891980
1890- if len ( records ) > 10 :
1891- lines .append (f"... and { len ( records ) - 10 } more record(s)" )
1981+ if n_returned > 10 :
1982+ lines .append (f"... and { n_returned - 10 } more record(s) returned " )
18921983
18931984 return "\n " .join (lines )
0 commit comments