Skip to content

Commit 81c84ad

Browse files
authored
Run SQL directly in the chat and load tables dynamically (#382)
1 parent 417545d commit 81c84ad

File tree

8 files changed

+165
-39
lines changed

8 files changed

+165
-39
lines changed

bin/chat.js

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import readline from 'readline'
2+
import { parseSql } from 'squirreling'
3+
import { runSqlQuery } from './tools/parquetSql.js'
24
import { tools } from './tools/tools.js'
35

46
/** @type {'text' | 'tool'} */
@@ -277,11 +279,26 @@ export function chat() {
277279
rl.close()
278280
process.exit()
279281
} else if (input) {
282+
// If the input is valid SQL, run it directly without sending to the model
283+
let isSql = false
280284
try {
281-
write(colors.user, 'answer: ', colors.normal)
282-
outputMode = 'text' // switch to text output mode
283-
messages.push([{ role: 'user', content: input }])
284-
await sendMessages(messages)
285+
parseSql({ query: input })
286+
isSql = true
287+
} catch {
288+
// not SQL
289+
}
290+
291+
try {
292+
if (isSql) {
293+
write(colors.user, 'answer: ', colors.normal)
294+
const result = await runSqlQuery(input)
295+
write(result)
296+
} else {
297+
write(colors.user, 'answer: ', colors.normal)
298+
outputMode = 'text' // switch to text output mode
299+
messages.push([{ role: 'user', content: input }])
300+
await sendMessages(messages)
301+
}
285302
} catch (error) {
286303
console.error(colors.error, '\n' + error)
287304
} finally {

bin/cli.js

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,18 @@ import { serve } from './serve.js'
88
const updateCheck = checkForUpdates()
99

1010
const arg = process.argv[2]
11-
if (arg === 'chat') {
12-
await updateCheck // wait for update check to finish before chat
13-
chat()
14-
} else if (arg === '--help' || arg === '-H' || arg === '-h') {
11+
if (arg === '--help' || arg === '-H' || arg === '-h') {
1512
console.log('Usage:')
13+
console.log(' hyperparam start chat client')
1614
console.log(' hyperparam [path] start hyperparam webapp. "path" is a directory or a URL.')
17-
console.log(' defaults to the current directory.')
18-
console.log(' hyperparam chat start chat client')
1915
console.log(' ')
2016
console.log(' hyperparam -h, --help, give this help list')
2117
console.log(' hyperparam -v, --version print program version')
2218
} else if (arg === '--version' || arg === '-V' || arg === '-v') {
2319
console.log(packageJson.version)
2420
} else if (!arg) {
25-
serve(process.cwd(), undefined) // current directory
21+
await updateCheck
22+
chat()
2623
} else if (/^https?:\/\//.exec(arg)) {
2724
serve(undefined, arg) // url
2825
} else {

bin/tools/parquetSql.js

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,86 @@
11
import { asyncBufferFromFile, asyncBufferFromUrl, parquetMetadataAsync } from 'hyparquet'
22
import { compressors } from 'hyparquet-compressors'
3-
import { collect, executeSql } from 'squirreling'
3+
import { collect, executeSql, parseSql, planSql } from 'squirreling'
44
import { parquetDataSource } from 'hyperparam'
55
import { markdownTable } from './markdownTable.js'
66

77
const maxRows = 100
88

9+
/**
10+
* Recursively collect table names from all Scan/Count nodes in a query plan.
11+
*
12+
* @param {import('squirreling').QueryPlan} plan
13+
* @returns {Set<string>}
14+
*/
15+
function scanTables(plan) {
16+
/** @type {Set<string>} */
17+
const tables = new Set()
18+
/** @param {import('squirreling').QueryPlan} node */
19+
function walk(node) {
20+
if (!node) return
21+
if (node.type === 'Scan' || node.type === 'Count') {
22+
tables.add(node.table)
23+
} else if ('child' in node) {
24+
walk(node.child)
25+
}
26+
if ('left' in node) walk(node.left)
27+
if ('right' in node) walk(node.right)
28+
}
29+
walk(plan)
30+
return tables
31+
}
32+
33+
/**
34+
* Build an AsyncDataSource for a file path or URL.
35+
*
36+
* @param {string} file
37+
* @returns {Promise<import('squirreling').AsyncDataSource>}
38+
*/
39+
async function fileToDataSource(file) {
40+
const asyncBuffer = file.startsWith('http://') || file.startsWith('https://')
41+
? await asyncBufferFromUrl({ url: file })
42+
: await asyncBufferFromFile(file)
43+
const metadata = await parquetMetadataAsync(asyncBuffer)
44+
return parquetDataSource(asyncBuffer, metadata, compressors)
45+
}
46+
47+
/**
48+
* Execute a SQL query by extracting table names from the plan and loading them
49+
* as parquet data sources. Returns a formatted result string.
50+
*
51+
* @param {string} query
52+
* @param {boolean} [truncate]
53+
* @returns {Promise<string>}
54+
*/
55+
export async function runSqlQuery(query, truncate = true) {
56+
const startTime = performance.now()
57+
const ast = parseSql({ query })
58+
const plan = planSql({ query: ast })
59+
const tableNames = scanTables(plan)
60+
61+
/** @type {Record<string, import('squirreling').AsyncDataSource>} */
62+
const tables = {}
63+
await Promise.all([...tableNames].map(async name => {
64+
tables[name] = await fileToDataSource(name)
65+
}))
66+
67+
const results = await collect(executeSql({ tables, query }))
68+
const queryTime = (performance.now() - startTime) / 1000
69+
70+
if (results.length === 0) {
71+
return `Query executed successfully but returned no results in ${queryTime.toFixed(1)} seconds.`
72+
}
73+
74+
const rowCount = results.length
75+
const maxChars = truncate ? 1000 : 10000
76+
let content = `Query returned ${rowCount} row${rowCount === 1 ? '' : 's'} in ${queryTime.toFixed(1)} seconds.\n\n`
77+
content += markdownTable(results.slice(0, maxRows), maxChars)
78+
if (rowCount > maxRows) {
79+
content += `\n\n... and ${rowCount - maxRows} more row${rowCount - maxRows === 1 ? '' : 's'} (showing first ${maxRows} rows)`
80+
}
81+
return content
82+
}
83+
984
/**
1085
* @import { ToolHandler } from '../types.d.ts'
1186
* @type {ToolHandler}

package.json

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -57,39 +57,39 @@
5757
},
5858
"dependencies": {
5959
"hightable": "0.26.4",
60-
"hyparquet": "1.25.1",
60+
"hyparquet": "1.25.3",
6161
"hyparquet-compressors": "1.1.1",
6262
"icebird": "0.3.1",
63-
"squirreling": "0.10.3"
63+
"squirreling": "0.11.2"
6464
},
6565
"devDependencies": {
66-
"@storybook/react-vite": "10.2.19",
66+
"@storybook/react-vite": "10.3.3",
6767
"@testing-library/react": "16.3.2",
6868
"@types/node": "25.5.0",
6969
"@types/react": "19.2.14",
7070
"@types/react-dom": "19.2.3",
71-
"@vitejs/plugin-react": "5.1.4",
72-
"@vitest/coverage-v8": "4.1.0",
71+
"@vitejs/plugin-react": "6.0.1",
72+
"@vitest/coverage-v8": "4.1.2",
7373
"eslint": "9.39.2",
7474
"eslint-plugin-react": "7.37.5",
7575
"eslint-plugin-react-hooks": "7.0.1",
7676
"eslint-plugin-react-refresh": "0.5.2",
77-
"eslint-plugin-storybook": "10.2.19",
77+
"eslint-plugin-storybook": "10.3.3",
7878
"globals": "17.4.0",
79-
"jsdom": "29.0.0",
79+
"jsdom": "29.0.1",
8080
"nodemon": "3.1.14",
8181
"npm-run-all": "4.1.5",
8282
"react": "19.2.4",
8383
"react-dom": "19.2.4",
84-
"storybook": "10.2.19",
85-
"typescript": "5.9.3",
86-
"typescript-eslint": "8.57.0",
87-
"vite": "7.3.1",
88-
"vitest": "4.1.0"
84+
"storybook": "10.3.3",
85+
"typescript": "6.0.2",
86+
"typescript-eslint": "8.58.0",
87+
"vite": "8.0.3",
88+
"vitest": "4.1.2"
8989
},
9090
"peerDependencies": {
91-
"react": "^18.3.1 || ^19",
92-
"react-dom": "^18.3.1 || ^19"
91+
"react": "18.3.1 || ^19",
92+
"react-dom": "18.3.1 || ^19"
9393
},
9494
"eslintConfig": {
9595
"extends": [

src/components/Json/Json.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ function ByteArray({ bytes, label, expandRoot }: { bytes: Uint8Array, label?: st
9696
}
9797

9898
function CollapsedArray({ array }: {array: unknown[]}): ReactNode {
99-
const { elementRef, width } = useWidth<HTMLSpanElement>()
99+
const { elementRef, width } = useWidth()
100100
const maxCharacterCount = Math.max(20, Math.floor(width / 8))
101101
const separator = ', '
102102

@@ -159,7 +159,7 @@ function JsonArray({ array, label, expandRoot, pageLimit = defaultPageLimit }: {
159159
}
160160

161161
function CollapsedObject({ obj }: { obj: object }): ReactNode {
162-
const { elementRef, width } = useWidth<HTMLSpanElement>()
162+
const { elementRef, width } = useWidth()
163163
const maxCharacterCount = Math.max(20, Math.floor(width / 8))
164164
const separator = ', '
165165
const kvSeparator = ': '

src/components/ProgressBar/ProgressBar.module.css

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
@keyframes shimmer {
2+
0% {
3+
background-position: -1000px;
4+
}
5+
100% {
6+
background-position: 1000px;
7+
}
8+
}
9+
110
/* progress bar */
211
.progressBar {
312
position: fixed;
@@ -13,15 +22,6 @@
1322
background-size: 1000px;
1423
animation: shimmer 4s infinite linear;
1524

16-
@keyframes shimmer {
17-
0% {
18-
background-position: -1000px;
19-
}
20-
100% {
21-
background-position: 1000px;
22-
}
23-
}
24-
2525
& > [role="presentation"] {
2626
height: 100%;
2727
background-color: #3a4;

src/components/TableView/TableView.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ interface Content extends ContentSize {
2525
* Table file viewer for parquet, CSV, and JSONL files
2626
*/
2727
export default function TableView({ source, setProgress, setError }: ViewerProps) {
28-
const [isLoading, setIsLoading] = useState<boolean>(true)
28+
const [isLoading, setIsLoading] = useState(true)
2929
const [content, setContent] = useState<Content>()
3030
const [cell, setCell] = useState<{ row: number, col: number } | undefined>()
3131
const { customClass, routes } = useConfig()

src/lib/parquet/parquetDataSource.ts

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import { parquetReadObjects, parquetSchema } from 'hyparquet'
2-
import type { AsyncBuffer, Compressors, FileMetaData } from 'hyparquet'
2+
import { parquetReadAsync } from 'hyparquet/src/read.js'
3+
import { assembleAsync } from 'hyparquet/src/rowgroup.js'
4+
import type { AsyncBuffer, AsyncRowGroup, Compressors, FileMetaData } from 'hyparquet'
35
import { AsyncDataSource, ScanOptions, asyncRow } from 'squirreling'
46
import { whereToParquetFilter } from './parquetFilter.js'
57
import { extractSpatialFilter, rowGroupOverlaps } from './parquetSpatial.js'
@@ -87,5 +89,40 @@ export function parquetDataSource(file: AsyncBuffer, metadata: FileMetaData, com
8789
appliedLimitOffset,
8890
}
8991
},
92+
93+
async *scanColumn({ column, limit, offset, signal }) {
94+
const rowStart = offset ?? 0
95+
const rowEnd = limit !== undefined ? rowStart + limit : undefined
96+
const asyncGroups = parquetReadAsync({
97+
file,
98+
metadata,
99+
rowStart,
100+
rowEnd,
101+
columns: [column],
102+
compressors,
103+
})
104+
const schemaTree = parquetSchema(metadata)
105+
const assembled = asyncGroups.map((arg: AsyncRowGroup) => assembleAsync(arg, schemaTree))
106+
107+
for (const rg of assembled) {
108+
if (signal?.aborted) throw new DOMException('Aborted', 'AbortError')
109+
const [firstCol] = rg.asyncColumns
110+
if (!firstCol) continue
111+
const { skipped, data } = await firstCol.data
112+
if (signal?.aborted) throw new DOMException('Aborted', 'AbortError')
113+
let dataStart = rg.groupStart + skipped
114+
for (const page of data) {
115+
const pageRows = page.length
116+
const selectStart = Math.max(rowStart - dataStart, 0)
117+
const selectEnd = Math.min((rowEnd ?? Infinity) - dataStart, pageRows)
118+
if (selectEnd > selectStart) {
119+
yield selectStart > 0 || selectEnd < pageRows
120+
? page.slice(selectStart, selectEnd)
121+
: page
122+
}
123+
dataStart += pageRows
124+
}
125+
}
126+
},
90127
}
91128
}

0 commit comments

Comments
 (0)