Skip to content

Commit b401d59

Browse files
authored
Prioritize tables with mentioned columns in the suggestions (#6)
* Prioritize tables with mentioned columns in the suggestions * bump version
1 parent 9b8a549 commit b401d59

File tree

5 files changed

+301
-3
lines changed

5 files changed

+301
-3
lines changed

CHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
# Changelog
22

33

4-
## 0.1.1 - 2026.02.23
4+
## 0.1.2 - 2026.02.25
5+
### Fixed
6+
- Prioritize tables with mentioned columns in the suggestions [#6](https://github.com/questdb/sql-parser/pull/6)
57

8+
9+
## 0.1.1 - 2026.02.23
610
### Fixed
711
- grammar-level table/column classification, join-specific suggestions [#2](https://github.com/questdb/sql-parser/pull/2)
812

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@questdb/sql-parser",
3-
"version": "0.1.1",
3+
"version": "0.1.2",
44
"description": "SQL parser for QuestDB syntax using Chevrotain",
55
"type": "module",
66
"main": "dist/index.cjs",

src/autocomplete/content-assist.ts

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ export interface ContentAssistResult {
8080
suggestColumns: boolean
8181
/** Whether the grammar context expects table names (tableName positions, or expression context) */
8282
suggestTables: boolean
83+
/**
84+
* Bare column names (lowercase) referenced before the cursor in expression
85+
* context. Used by the provider to boost tables containing all these columns.
86+
*/
87+
referencedColumns: Set<string>
8388
}
8489

8590
// =============================================================================
@@ -791,6 +796,81 @@ function inferTableFromQualifiedRef(
791796
return { table }
792797
}
793798

799+
/**
800+
* Extract bare column names referenced in expression context from a token list.
801+
*
802+
* Scans the tokens and collects identifier names that are likely column
803+
* references, excluding:
804+
* - Qualified identifiers (followed by a Dot token — table/alias qualifiers)
805+
* - Middle segments of multi-part names (preceded AND followed by a Dot)
806+
* - Known table names and aliases (matched against tableAndAliasSet)
807+
* - Function calls (followed by a left-parenthesis token)
808+
*
809+
* @param tokens - Tokens to scan
810+
* @param tableAndAliasSet - Lowercase table names and aliases already in scope
811+
* (built from tablesInScope by the caller). Identifiers matching any of these
812+
* are excluded because they are table/alias references, not column names.
813+
*
814+
* Returns a Set of lowercase column names for efficient lookup.
815+
*/
816+
export function extractReferencedColumns(
817+
tokens: IToken[],
818+
tableAndAliasSet: Set<string>,
819+
): Set<string> {
820+
const result = new Set<string>()
821+
822+
for (let i = 0; i < tokens.length; i++) {
823+
const token = tokens[i]
824+
const name = token.tokenType.name
825+
826+
// Only consider identifier-like tokens
827+
if (
828+
name !== "Identifier" &&
829+
name !== "QuotedIdentifier" &&
830+
!IDENTIFIER_KEYWORD_TOKENS.has(name)
831+
) {
832+
continue
833+
}
834+
835+
// Exclude: followed by Dot → this is a table/alias qualifier (e.g. "t1" in "t1.col")
836+
if (i + 1 < tokens.length && tokens[i + 1].tokenType.name === "Dot") {
837+
continue
838+
}
839+
840+
// Exclude: preceded by Dot AND followed by Dot → middle segment of a multi-part name.
841+
// But a trailing segment (preceded by Dot, NOT followed by Dot) IS a column name
842+
// (e.g. "ecn" in "c.ecn") and should be included for table ranking.
843+
if (
844+
i > 0 &&
845+
tokens[i - 1].tokenType.name === "Dot" &&
846+
i + 1 < tokens.length &&
847+
tokens[i + 1].tokenType.name === "Dot"
848+
) {
849+
continue
850+
}
851+
852+
// Exclude: followed by "(" → function call
853+
if (i + 1 < tokens.length && tokens[i + 1].tokenType.name === "LParen") {
854+
continue
855+
}
856+
857+
const image =
858+
name === "QuotedIdentifier" ? token.image.slice(1, -1) : token.image
859+
const lower = image.toLowerCase()
860+
861+
// Exclude: matches a known table name or alias → this is a table reference,
862+
// not a column name. This replaces the keyword-whitelist approach and is
863+
// grammar-aware: tablesInScope is already built from the parsed AST.
864+
if (tableAndAliasSet.has(lower)) {
865+
continue
866+
}
867+
868+
result.add(lower)
869+
}
870+
871+
return result
872+
}
873+
794874
/**
795875
* Get content assist suggestions for a SQL string at a given cursor position
796876
*
@@ -825,6 +905,7 @@ export function getContentAssist(
825905
lexErrors: [],
826906
suggestColumns: false,
827907
suggestTables: false,
908+
referencedColumns: new Set(),
828909
}
829910
}
830911
}
@@ -918,6 +999,21 @@ export function getContentAssist(
918999
tablesInScope.push(qualifiedRef)
9191000
}
9201001

1002+
// Build a set of known table names and aliases so extractReferencedColumns
1003+
// can exclude them without a keyword whitelist.
1004+
const tableAndAliasSet = new Set<string>()
1005+
for (const t of tablesInScope) {
1006+
tableAndAliasSet.add(t.table.toLowerCase())
1007+
if (t.alias) tableAndAliasSet.add(t.alias.toLowerCase())
1008+
}
1009+
1010+
// Extract bare column references for table ranking (use tokensForAssist so
1011+
// a partial mid-word token isn't mistaken for a complete column name).
1012+
const referencedColumns = extractReferencedColumns(
1013+
tokensForAssist,
1014+
tableAndAliasSet,
1015+
)
1016+
9211017
return {
9221018
nextTokenTypes,
9231019
tablesInScope,
@@ -928,6 +1024,7 @@ export function getContentAssist(
9281024
qualifiedTableRef: qualifiedRef?.table,
9291025
suggestColumns,
9301026
suggestTables,
1027+
referencedColumns,
9311028
}
9321029
}
9331030

src/autocomplete/provider.ts

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,71 @@ const TABLE_NAME_TOKENS = new Set([
3838
"View",
3939
])
4040

41+
/**
42+
* Pre-built index: lowercase table name → Set of lowercase column names.
43+
* Built once at provider creation time so per-request ranking is O(N×M)
44+
* rather than O(N×C).
45+
*/
46+
function buildColumnIndex(
47+
schema: SchemaInfo,
48+
): Map<string, Set<string>> {
49+
const index = new Map<string, Set<string>>()
50+
for (const table of schema.tables) {
51+
const key = table.name.toLowerCase()
52+
const cols = schema.columns[key]
53+
if (cols) {
54+
index.set(key, new Set(cols.map((c) => c.name.toLowerCase())))
55+
}
56+
}
57+
return index
58+
}
59+
60+
/**
61+
* Boost the priority of table suggestions based on how many of the referenced
62+
* columns they contain:
63+
*
64+
* - ALL referenced columns present → SuggestionPriority.High (full match)
65+
* - SOME referenced columns present → SuggestionPriority.Medium (partial match)
66+
* - No referenced columns → priority unchanged (no match)
67+
*
68+
* Graceful fallback: if no table has any referenced column at all, nothing is
69+
* changed so the caller still sees all tables at their default priority.
70+
*
71+
* @param suggestions - The suggestion array (mutated in place)
72+
* @param referencedColumns - Lowercase column names found in expression context
73+
* @param columnIndex - Pre-built map of table → column name set
74+
*/
75+
function rankTableSuggestions(
76+
suggestions: Suggestion[],
77+
referencedColumns: Set<string>,
78+
columnIndex: Map<string, Set<string>>,
79+
): void {
80+
if (referencedColumns.size === 0) return
81+
82+
// Score each table: how many referenced columns does it contain?
83+
const scores = new Map<string, number>()
84+
for (const [tableName, colNames] of columnIndex) {
85+
let count = 0
86+
for (const ref of referencedColumns) {
87+
if (colNames.has(ref)) count++
88+
}
89+
if (count > 0) scores.set(tableName, count)
90+
}
91+
92+
// Graceful fallback: no table has any of the referenced columns
93+
if (scores.size === 0) return
94+
95+
for (const s of suggestions) {
96+
if (s.kind !== SuggestionKind.Table) continue
97+
const score = scores.get(s.label.toLowerCase())
98+
if (score === undefined) continue
99+
s.priority =
100+
score === referencedColumns.size
101+
? SuggestionPriority.High // full match
102+
: SuggestionPriority.Medium // partial match
103+
}
104+
}
105+
41106
function getLastSignificantTokens(tokens: IToken[]): string[] {
42107
const result: string[] = []
43108
for (let i = tokens.length - 1; i >= 0; i--) {
@@ -84,6 +149,9 @@ export function createAutocompleteProvider(
84149
),
85150
}
86151

152+
// Pre-build column index once so per-request ranking is fast
153+
const columnIndex = buildColumnIndex(normalizedSchema)
154+
87155
return {
88156
getSuggestions(query: string, cursorOffset: number): Suggestion[] {
89157
// Get content assist from parser
@@ -96,6 +164,7 @@ export function createAutocompleteProvider(
96164
qualifiedTableRef,
97165
suggestColumns,
98166
suggestTables,
167+
referencedColumns,
99168
} = getContentAssist(query, cursorOffset)
100169

101170
// Merge CTE columns into the schema so getColumnsInScope() can find them
@@ -146,7 +215,7 @@ export function createAutocompleteProvider(
146215

147216
// If parser returned valid next tokens, use grammar-based classification
148217
if (nextTokenTypes.length > 0) {
149-
return buildSuggestions(
218+
const suggestions = buildSuggestions(
150219
nextTokenTypes,
151220
effectiveSchema,
152221
effectiveTablesInScope,
@@ -156,6 +225,10 @@ export function createAutocompleteProvider(
156225
isMidWord,
157226
},
158227
)
228+
if (suggestTables) {
229+
rankTableSuggestions(suggestions, referencedColumns, columnIndex)
230+
}
231+
return suggestions
159232
}
160233

161234
// Fallback: when Chevrotain returns no suggestions (malformed SQL like
@@ -194,6 +267,7 @@ export function createAutocompleteProvider(
194267
})
195268
}
196269
}
270+
rankTableSuggestions(suggestions, referencedColumns, columnIndex)
197271
return suggestions
198272
}
199273

tests/autocomplete.test.ts

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2902,4 +2902,127 @@ describe("CTE autocomplete", () => {
29022902
expect(columns.map((s) => s.label)).toContain("symbol")
29032903
})
29042904
})
2905+
2906+
// ===========================================================================
2907+
// Column-based table ranking
2908+
// ===========================================================================
2909+
describe("column-based table ranking", () => {
2910+
it("boosts tables that contain all referenced columns", () => {
2911+
// "symbol" and "price" both exist in trades but not in orders or users
2912+
const sql = "SELECT symbol, price FROM "
2913+
const suggestions = provider.getSuggestions(sql, sql.length)
2914+
const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table)
2915+
const trades = tables.find((s) => s.label === "trades")
2916+
const orders = tables.find((s) => s.label === "orders")
2917+
const users = tables.find((s) => s.label === "users")
2918+
expect(trades?.priority).toBe(SuggestionPriority.High)
2919+
expect(orders?.priority).toBe(SuggestionPriority.MediumLow)
2920+
expect(users?.priority).toBe(SuggestionPriority.MediumLow)
2921+
})
2922+
2923+
it("partially matching tables get Medium priority; no-match tables stay MediumLow", () => {
2924+
// "symbol" is in trades; "id" is in orders — each table has one of the two
2925+
const sql = "SELECT symbol, id FROM "
2926+
const suggestions = provider.getSuggestions(sql, sql.length)
2927+
const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table)
2928+
const trades = tables.find((s) => s.label === "trades")
2929+
const orders = tables.find((s) => s.label === "orders")
2930+
const users = tables.find((s) => s.label === "users")
2931+
// partial match → Medium (boosted but not full match)
2932+
expect(trades?.priority).toBe(SuggestionPriority.Medium)
2933+
expect(orders?.priority).toBe(SuggestionPriority.Medium)
2934+
// no match → default
2935+
expect(users?.priority).toBe(SuggestionPriority.MediumLow)
2936+
})
2937+
2938+
it("columns from two tables: both partially-matching tables get Medium", () => {
2939+
// "symbol" and "price" only in trades; "status" only in orders; "name" only in users
2940+
// → trades and orders both partially match (2 and 1 out of 3); users has none
2941+
const sql = "SELECT symbol, price, status FROM "
2942+
const suggestions = provider.getSuggestions(sql, sql.length)
2943+
const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table)
2944+
const trades = tables.find((s) => s.label === "trades")
2945+
const orders = tables.find((s) => s.label === "orders")
2946+
const users = tables.find((s) => s.label === "users")
2947+
expect(trades?.priority).toBe(SuggestionPriority.Medium)
2948+
expect(orders?.priority).toBe(SuggestionPriority.Medium)
2949+
expect(users?.priority).toBe(SuggestionPriority.MediumLow)
2950+
})
2951+
2952+
it("graceful fallback: no boost when no table has any referenced column", () => {
2953+
// "nonexistent_col" doesn't exist in any table
2954+
const sql = "SELECT nonexistent_col FROM "
2955+
const suggestions = provider.getSuggestions(sql, sql.length)
2956+
const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table)
2957+
for (const t of tables) {
2958+
expect(t.priority).toBe(SuggestionPriority.MediumLow)
2959+
}
2960+
})
2961+
2962+
it("qualified references: the alias/qualifier is excluded but the column name is used", () => {
2963+
// "t1.symbol" → "symbol" is extracted; "t1" (alias qualifier) is not
2964+
const sql = "SELECT t1.symbol FROM "
2965+
const suggestions = provider.getSuggestions(sql, sql.length)
2966+
const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table)
2967+
const trades = tables.find((s) => s.label === "trades")
2968+
const orders = tables.find((s) => s.label === "orders")
2969+
// trades has "symbol" → boosted; orders does not
2970+
expect(trades?.priority).toBe(SuggestionPriority.High)
2971+
expect(orders?.priority).toBe(SuggestionPriority.MediumLow)
2972+
})
2973+
2974+
it("qualified references from multiple aliases boost the correct tables", () => {
2975+
// c.symbol → symbol in trades; o.id → id in orders
2976+
const sql = "SELECT c.symbol, o.id FROM "
2977+
const suggestions = provider.getSuggestions(sql, sql.length)
2978+
const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table)
2979+
const trades = tables.find((s) => s.label === "trades")
2980+
const orders = tables.find((s) => s.label === "orders")
2981+
const users = tables.find((s) => s.label === "users")
2982+
expect(trades?.priority).toBe(SuggestionPriority.Medium) // partial: symbol but not id
2983+
expect(orders?.priority).toBe(SuggestionPriority.Medium) // partial: id but not symbol
2984+
expect(users?.priority).toBe(SuggestionPriority.MediumLow)
2985+
})
2986+
2987+
it("function calls are excluded from column inference", () => {
2988+
// "count()" is a function call — should not influence ranking
2989+
const sql = "SELECT count() FROM "
2990+
const suggestions = provider.getSuggestions(sql, sql.length)
2991+
const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table)
2992+
for (const t of tables) {
2993+
expect(t.priority).toBe(SuggestionPriority.MediumLow)
2994+
}
2995+
})
2996+
2997+
it("all tables remain in the suggestion list even when some are boosted", () => {
2998+
const sql = "SELECT symbol, price FROM "
2999+
const suggestions = provider.getSuggestions(sql, sql.length)
3000+
const tableLabels = suggestions
3001+
.filter((s) => s.kind === SuggestionKind.Table)
3002+
.map((s) => s.label)
3003+
expect(tableLabels).toContain("trades")
3004+
expect(tableLabels).toContain("orders")
3005+
expect(tableLabels).toContain("users")
3006+
})
3007+
3008+
it("boosts a single-column match correctly", () => {
3009+
// "status" only exists in orders
3010+
const sql = "SELECT status FROM "
3011+
const suggestions = provider.getSuggestions(sql, sql.length)
3012+
const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table)
3013+
const orders = tables.find((s) => s.label === "orders")
3014+
const trades = tables.find((s) => s.label === "trades")
3015+
expect(orders?.priority).toBe(SuggestionPriority.High)
3016+
expect(trades?.priority).toBe(SuggestionPriority.MediumLow)
3017+
})
3018+
3019+
it("SELECT * FROM does not boost any table (no referenced columns)", () => {
3020+
const sql = "SELECT * FROM "
3021+
const suggestions = provider.getSuggestions(sql, sql.length)
3022+
const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table)
3023+
for (const t of tables) {
3024+
expect(t.priority).toBe(SuggestionPriority.MediumLow)
3025+
}
3026+
})
3027+
})
29053028
})

0 commit comments

Comments
 (0)