diff --git a/.cargo/config.toml b/.cargo/config.toml index ba377b35d8..33d3f35bf4 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -2,8 +2,13 @@ bundle = "run -p perspective-bundle" [build] -# rustflags = ["--cfg=web_sys_unstable_apis", "--cfg=pyo3_disable_reference_pool"] rustflags = ["--cfg=web_sys_unstable_apis", "-Csymbol-mangling-version=v0"] +# rustflags = [ +# "--cfg=web_sys_unstable_apis", +# "--cfg=pyo3_disable_reference_pool", +# "-Csymbol-mangling-version=legacy", +# "-Zunstable-options", +# ] target-dir = "rust/target" [target.wasm32-unknown-unknown] diff --git a/Cargo.lock b/Cargo.lock index 1d1bf3b09f..a8df5f54da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,18 +2,6 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "ahash" -version = "0.8.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" -dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy", -] - [[package]] name = "aho-corasick" version = "1.1.4" @@ -97,6 +85,17 @@ dependencies = [ "event-listener", ] +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -279,6 +278,12 @@ dependencies = [ "thiserror 2.0.17", ] +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.2.48" @@ -1067,16 +1072,6 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -[[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", - "serde", -] - [[package]] name = "hashbrown" version = "0.15.5" @@ -1378,9 +1373,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.82" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" dependencies = [ "once_cell", "wasm-bindgen", @@ -1410,6 +1405,12 @@ version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + [[package]] name = "libredox" version = "0.1.10" @@ -1626,6 +1627,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -1650,6 +1652,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + [[package]] name = "option-ext" version = "0.2.0" @@ -3233,7 +3241,7 @@ dependencies = [ "rayon", "walrus-macro", "wasm-encoder", - "wasmparser 0.240.0", + "wasmparser", ] [[package]] @@ -3265,9 +3273,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" dependencies = [ "cfg-if", "once_cell", @@ -3280,9 +3288,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-cli-support" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d3f73cd40cc5c6adf6a090e331f24595f87e7a5bd70ccf3db46be0e1d6e0d92" +checksum = "03794299fa80bda34aef2784a496c6440fbc75fb1977c4e05750ddcd617e5a09" dependencies = [ "anyhow", "base64 0.22.1", @@ -3293,7 +3301,7 @@ dependencies = [ "serde_json", "walrus", "wasm-bindgen-shared", - "wasmparser 0.214.0", + "wasmparser", ] [[package]] @@ -3320,9 +3328,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.55" +version = "0.4.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" +checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" dependencies = [ "cfg-if", "js-sys", @@ -3333,9 +3341,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3343,9 +3351,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" dependencies = [ "bumpalo", "proc-macro2", @@ -3356,21 +3364,29 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" dependencies = [ "unicode-ident", ] [[package]] name = "wasm-bindgen-test" -version = "0.3.55" +version = "0.3.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfc379bfb624eb59050b509c13e77b4eb53150c350db69628141abce842f2373" +checksum = "25e90e66d265d3a1efc0e72a54809ab90b9c0c515915c67cdf658689d2c22c6c" dependencies = [ + "async-trait", + "cast", "js-sys", + "libm", "minicov", + "nu-ansi-term 0.50.3", + "num-traits", + "oorandom", + "serde", + "serde_json", "wasm-bindgen", "wasm-bindgen-futures", "wasm-bindgen-test-macro", @@ -3378,9 +3394,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-test-macro" -version = "0.3.55" +version = "0.3.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "085b2df989e1e6f9620c1311df6c996e83fe16f57792b272ce1e024ac16a90f1" +checksum = "7150335716dce6028bead2b848e72f47b45e7b9422f64cccdc23bedca89affc1" dependencies = [ "proc-macro2", "quote", @@ -3394,7 +3410,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06d642d8c5ecc083aafe9ceb32809276a304547a3a6eeecceb5d8152598bc71f" dependencies = [ "leb128fmt", - "wasmparser 0.240.0", + "wasmparser", ] [[package]] @@ -3437,20 +3453,6 @@ dependencies = [ "cxx-build", ] -[[package]] -name = "wasmparser" -version = "0.214.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5309c1090e3e84dad0d382f42064e9933fdaedb87e468cc239f0eabea73ddcb6" -dependencies = [ - "ahash", - "bitflags", - "hashbrown 0.14.5", - "indexmap 2.12.1", - "semver 1.0.27", - "serde", -] - [[package]] name = "wasmparser" version = "0.240.0" @@ -3466,9 +3468,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.82" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" +checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/examples/blocks/src/duckdb/index.js b/examples/blocks/src/duckdb/index.js index e35d56aa5c..8ceef9c0fb 100644 --- a/examples/blocks/src/duckdb/index.js +++ b/examples/blocks/src/duckdb/index.js @@ -90,7 +90,7 @@ await loadSampleData(db); const viewer = document.querySelector("#query"); viewer.load(client); viewer.restore({ - table: "data_source_one", + table: "memory.data_source_one", group_by: ["Region", "State", "City"], columns: ["Sales", "Profit", "Quantity", "Discount"], plugin: "Datagrid", diff --git a/examples/esbuild-clickhouse-virtual/build.js b/examples/esbuild-clickhouse-virtual/build.js new file mode 100644 index 0000000000..fe2aa0ba62 --- /dev/null +++ b/examples/esbuild-clickhouse-virtual/build.js @@ -0,0 +1,43 @@ +// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +// ┃ Copyright (c) 2017, the Perspective Authors. ┃ +// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +// ┃ This file is part of the Perspective library, distributed under the terms ┃ +// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +import esbuild from "esbuild"; +import fs from "fs"; +import path from "path"; +import { fileURLToPath } from "url"; +import { dirname } from "path"; +import "zx/globals"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +async function build() { + await esbuild.build({ + entryPoints: ["src/index.ts"], + outdir: "dist", + format: "esm", + bundle: true, + sourcemap: "inline", + target: "es2022", + loader: { + ".ttf": "file", + ".wasm": "file", + }, + assetNames: "[name]", + }); + + fs.writeFileSync( + path.join(__dirname, "dist/index.html"), + fs.readFileSync(path.join(__dirname, "src/index.html")).toString(), + ); +} + +await build(); diff --git a/examples/esbuild-clickhouse-virtual/package.json b/examples/esbuild-clickhouse-virtual/package.json new file mode 100644 index 0000000000..22ff1426f2 --- /dev/null +++ b/examples/esbuild-clickhouse-virtual/package.json @@ -0,0 +1,25 @@ +{ + "name": "esbuild-clickhouse-virtual", + "private": true, + "version": "4.1.1", + "type": "module", + "description": "Example of a custom VirtualServer for ClickHouse running in a Web Worker", + "scripts": { + "build": "node build.js", + "start": "node build.js && node server.mjs" + }, + "keywords": [], + "license": "Apache-2.0", + "dependencies": { + "@perspective-dev/client": "workspace:^", + "@perspective-dev/server": "workspace:^", + "@perspective-dev/viewer": "workspace:^", + "@perspective-dev/viewer-d3fc": "workspace:^", + "@perspective-dev/viewer-datagrid": "workspace:^", + "@clickhouse/client-web": "catalog:" + }, + "devDependencies": { + "esbuild": "catalog:", + "zx": "catalog:" + } +} diff --git a/examples/esbuild-clickhouse-virtual/server.mjs b/examples/esbuild-clickhouse-virtual/server.mjs new file mode 100644 index 0000000000..6c30eb491c --- /dev/null +++ b/examples/esbuild-clickhouse-virtual/server.mjs @@ -0,0 +1,29 @@ +// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +// ┃ Copyright (c) 2017, the Perspective Authors. ┃ +// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +// ┃ This file is part of the Perspective library, distributed under the terms ┃ +// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +// This is just a file server, the implementation is in `src/index.ts`. + +import http from "http"; +import { fileURLToPath } from "url"; +import { dirname } from "path"; +import { cwd_static_file_handler } from "@perspective-dev/client"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +// Create HTTP server for serving static files +const httpServer = http.createServer((req, res) => + cwd_static_file_handler(req, res, [`${__dirname}/dist`, __dirname]), +); + +httpServer.listen(8080, () => { + console.log("Server listening on http://localhost:8080"); +}); diff --git a/examples/esbuild-clickhouse-virtual/src/index.html b/examples/esbuild-clickhouse-virtual/src/index.html new file mode 100644 index 0000000000..db1840a6bc --- /dev/null +++ b/examples/esbuild-clickhouse-virtual/src/index.html @@ -0,0 +1,25 @@ + + + + + + Perspective + ClickHouse + + + + + + + diff --git a/examples/esbuild-clickhouse-virtual/src/index.ts b/examples/esbuild-clickhouse-virtual/src/index.ts new file mode 100644 index 0000000000..c2eced1f1f --- /dev/null +++ b/examples/esbuild-clickhouse-virtual/src/index.ts @@ -0,0 +1,47 @@ +// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +// ┃ Copyright (c) 2017, the Perspective Authors. ┃ +// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +// ┃ This file is part of the Perspective library, distributed under the terms ┃ +// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +import perspective from "@perspective-dev/client"; +import perspective_viewer from "@perspective-dev/viewer"; +import "@perspective-dev/viewer-datagrid"; +import "@perspective-dev/viewer-d3fc"; + +import "@perspective-dev/viewer/dist/css/dracula.css"; +import "@perspective-dev/viewer/dist/css/themes.css"; + +// @ts-ignore +import SERVER_WASM from "@perspective-dev/server/dist/wasm/perspective-server.wasm"; + +// @ts-ignore +import CLIENT_WASM from "@perspective-dev/viewer/dist/wasm/perspective-viewer.wasm"; + +import { ClickhouseHandler } from "@perspective-dev/client/src/ts/virtual_servers/clickhouse.ts"; +import { createClient } from "@clickhouse/client-web"; + +await Promise.all([ + perspective.init_server(fetch(SERVER_WASM)), + perspective_viewer.init_client(fetch(CLIENT_WASM)), +]); + +const db = createClient({ + url: "http://localhost:8123", + username: "default", + password: "", + database: "default", + session_id: Math.random() + "", +}); + +await perspective.init_client(fetch(CLIENT_WASM)); +const server = perspective.createMessageHandler(new ClickhouseHandler(db)); +const client = await perspective.worker(server); +const viewer = document.querySelector("perspective-viewer")!; +viewer.load(client); diff --git a/examples/python-clickhouse-virtual/index.html b/examples/python-clickhouse-virtual/index.html new file mode 100644 index 0000000000..947b8859eb --- /dev/null +++ b/examples/python-clickhouse-virtual/index.html @@ -0,0 +1,29 @@ + + + + + + + + + + + + diff --git a/examples/python-clickhouse-virtual/package.json b/examples/python-clickhouse-virtual/package.json new file mode 100644 index 0000000000..73df8fc4ae --- /dev/null +++ b/examples/python-clickhouse-virtual/package.json @@ -0,0 +1,22 @@ +{ + "name": "python-clickhouse-virtual", + "private": true, + "version": "4.1.1", + "description": "An example of streaming a ClickHouse-backed `perspective-python` server to the browser.", + "scripts": { + "start": "PYTHONPATH=../../python/perspective python3 server.py" + }, + "keywords": [], + "license": "Apache-2.0", + "dependencies": { + "@perspective-dev/client": "workspace:^", + "@perspective-dev/viewer": "workspace:^", + "@perspective-dev/viewer-d3fc": "workspace:^", + "@perspective-dev/viewer-datagrid": "workspace:^", + "@perspective-dev/workspace": "workspace:^", + "superstore-arrow": "catalog:" + }, + "devDependencies": { + "npm-run-all": "catalog:" + } +} diff --git a/examples/python-clickhouse-virtual/server.py b/examples/python-clickhouse-virtual/server.py new file mode 100644 index 0000000000..f5cfa0fb1a --- /dev/null +++ b/examples/python-clickhouse-virtual/server.py @@ -0,0 +1,98 @@ +# ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +# ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +# ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +# ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +# ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +# ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +# ┃ Copyright (c) 2017, the Perspective Authors. ┃ +# ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +# ┃ This file is part of the Perspective library, distributed under the terms ┃ +# ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +# ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +from pathlib import Path + +import clickhouse_connect +import perspective +import perspective.handlers.tornado +import perspective.virtual_servers.clickhouse +import pyarrow.parquet as pq +import tornado.ioloop +import tornado.web + +from loguru import logger +from tornado.web import StaticFileHandler + + +INPUT_FILE = ( + Path(__file__).parent.resolve() + / "node_modules" + / "superstore-arrow" + / "superstore.parquet" +) + + +def arrow_type_to_clickhouse(arrow_type): + t = str(arrow_type) + if t.startswith("int") or t.startswith("uint"): + return "Int64" + + if t in ("float", "double", "halffloat"): + return "Float64" + + if t.startswith("timestamp"): + return "DateTime" + + if t.startswith("date"): + return "Date" + + return "String" + + +if __name__ == "__main__": + client = clickhouse_connect.get_client(host="localhost") + + # Load superstore parquet data into ClickHouse + arrow_table = pq.read_table(str(INPUT_FILE)) + client.command("DROP TABLE IF EXISTS data_source_one") + cols = [] + for field in arrow_table.schema: + ch_type = arrow_type_to_clickhouse(field.type) + if field.nullable: + ch_type = f"Nullable({ch_type})" + + cols.append(f"`{field.name}` {ch_type}") + + client.command( + f"CREATE TABLE data_source_one ({', '.join(cols)})" + " ENGINE = MergeTree() ORDER BY tuple()" + ) + + client.insert_arrow("data_source_one", arrow_table) + logger.info("Loaded superstore data into ClickHouse") + + virtual_server = perspective.virtual_servers.clickhouse.ClickhouseVirtualServer( + client + ) + + app = tornado.web.Application( + [ + ( + r"/websocket", + perspective.handlers.tornado.PerspectiveTornadoHandler, + {"perspective_server": virtual_server}, + ), + (r"/node_modules/(.*)", StaticFileHandler, {"path": "../../node_modules/"}), + ( + r"/(.*)", + StaticFileHandler, + {"path": "./", "default_filename": "index.html"}, + ), + ], + websocket_max_message_size=100 * 1024 * 1024, + ) + + app.listen(3000) + logger.info("Listening on http://localhost:3000") + loop = tornado.ioloop.IOLoop.current() + loop.start() diff --git a/examples/python-duckdb-virtual/index.html b/examples/python-duckdb-virtual/index.html index c173a35350..947b8859eb 100644 --- a/examples/python-duckdb-virtual/index.html +++ b/examples/python-duckdb-virtual/index.html @@ -22,10 +22,8 @@ // Create a client that expects a Perspective server to accept // Websocket connections at the specified URL. const websocket = await perspective.websocket("ws://localhost:3000/websocket"); - const table = await websocket.open_table("data_source_one"); - - // Load this in the ``. - viewer.load(table); + viewer.load(websocket); + viewer.restore({table: "data_source_one"}) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 9c5f433b72..9fd5596a7c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -6,6 +6,9 @@ settings: catalogs: default: + '@clickhouse/client-web': + specifier: ^1.12.0 + version: 1.17.0 '@d3fc/d3fc-chart': specifier: 5.1.9 version: 5.1.9 @@ -370,6 +373,34 @@ importers: specifier: 'catalog:' version: 0.25.11 + examples/esbuild-clickhouse-virtual: + dependencies: + '@clickhouse/client-web': + specifier: 'catalog:' + version: 1.17.0 + '@perspective-dev/client': + specifier: workspace:^ + version: link:../../rust/perspective-js + '@perspective-dev/server': + specifier: workspace:^ + version: link:../../rust/perspective-server + '@perspective-dev/viewer': + specifier: workspace:^ + version: link:../../rust/perspective-viewer + '@perspective-dev/viewer-d3fc': + specifier: workspace:^ + version: link:../../packages/viewer-d3fc + '@perspective-dev/viewer-datagrid': + specifier: workspace:^ + version: link:../../packages/viewer-datagrid + devDependencies: + esbuild: + specifier: 'catalog:' + version: 0.25.11 + zx: + specifier: 'catalog:' + version: 8.8.5 + examples/esbuild-duckdb-virtual: dependencies: '@duckdb/duckdb-wasm': @@ -447,6 +478,31 @@ importers: examples/python-aiohttp: {} + examples/python-clickhouse-virtual: + dependencies: + '@perspective-dev/client': + specifier: workspace:^ + version: link:../../rust/perspective-js + '@perspective-dev/viewer': + specifier: workspace:^ + version: link:../../rust/perspective-viewer + '@perspective-dev/viewer-d3fc': + specifier: workspace:^ + version: link:../../packages/viewer-d3fc + '@perspective-dev/viewer-datagrid': + specifier: workspace:^ + version: link:../../packages/viewer-datagrid + '@perspective-dev/workspace': + specifier: workspace:^ + version: link:../../packages/workspace + superstore-arrow: + specifier: 'catalog:' + version: 3.2.0 + devDependencies: + npm-run-all: + specifier: 'catalog:' + version: 4.1.5 + examples/python-duckdb-virtual: dependencies: '@perspective-dev/client': @@ -945,6 +1001,9 @@ importers: specifier: 'catalog:' version: 8.18.3 devDependencies: + '@clickhouse/client-web': + specifier: 'catalog:' + version: 1.17.0 '@duckdb/duckdb-wasm': specifier: 'catalog:' version: 1.32.0 @@ -1911,6 +1970,12 @@ packages: resolution: {integrity: sha512-qQ5m48eI/MFLQ5PxQj4PFaprjyCTLI37ElWMmNs0K8Lk3dVeOdNpB3ks8jc7yM5CDmVC73eMVk/trk3fgmrUpA==} engines: {node: '>=6.9.0'} + '@clickhouse/client-common@1.17.0': + resolution: {integrity: sha512-MiwwgXViFAQA2YZkN4ymF1ynzG0K49KeSX9/iOcmJetWkxqSekDdpyp1GjwATWa9R215uQ+hGzJtJujeQVZZIw==} + + '@clickhouse/client-web@1.17.0': + resolution: {integrity: sha512-rady6FKTSKfB4DL3dO1Y9R4XMykc0WxY8Yex4RWYgsNuYWkvP8WQqQjEgHQZqVUYvIA6jth6AMBSKAnZLo4p7A==} + '@codemirror/state@6.5.2': resolution: {integrity: sha512-FVqsPqtPWKVVL3dPSxy8wEF/ymIEuVzF1PK3VbUgrxXpJUSHQWWZz4JMToquRxnkw+36LTamCZG2iua2Ptq0fA==} @@ -9947,6 +10012,12 @@ snapshots: '@babel/helper-string-parser': 7.27.1 '@babel/helper-validator-identifier': 7.28.5 + '@clickhouse/client-common@1.17.0': {} + + '@clickhouse/client-web@1.17.0': + dependencies: + '@clickhouse/client-common': 1.17.0 + '@codemirror/state@6.5.2': dependencies: '@marijn/find-cluster-break': 1.0.2 diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 099630e602..d77b10ac90 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -44,7 +44,9 @@ catalog: "ws": "^8.17.0" # Dev Dependencies + "@clickhouse/client-web": "^1.12.0" "@duckdb/duckdb-wasm": "^1.30.0" + "@duckdb/duckdb-wasm-shell": "^1.30.0" "@fontsource/roboto-mono": "4.5.10" "@iarna/toml": "3.0.0" "@jupyterlab/builder": "^4" diff --git a/rust/bundle/Cargo.toml b/rust/bundle/Cargo.toml index e94f5bfdc6..7417839877 100644 --- a/rust/bundle/Cargo.toml +++ b/rust/bundle/Cargo.toml @@ -23,7 +23,7 @@ bench = false [dependencies] clap = { version = "4.4.8", features = ["derive"] } -wasm-bindgen-cli-support = "0.2.105" +wasm-bindgen-cli-support = "0.2.106" # https://github.com/brson/wasm-opt-rs/issues/154 wasm-opt = { version = "0.116.1", default-features = false } diff --git a/rust/perspective-client/Cargo.toml b/rust/perspective-client/Cargo.toml index ef8af6a021..a5254c5336 100644 --- a/rust/perspective-client/Cargo.toml +++ b/rust/perspective-client/Cargo.toml @@ -33,6 +33,10 @@ include = [ [features] default = [] +# Removes the `Send` restriction from core `Future` traits, allowing easier +# integration with wasm while respecting compat with rust-analyzer and metadata. +sendable = [] + # Should the project build the `proto.rs` via protoc from source or assume it # already exists? generate-proto = [] diff --git a/rust/perspective-client/build.rs b/rust/perspective-client/build.rs index 542410f473..b3bef4885a 100644 --- a/rust/perspective-client/build.rs +++ b/rust/perspective-client/build.rs @@ -60,6 +60,7 @@ fn prost_build() -> Result<()> { .field_attribute("ViewToArrowResp.arrow", "#[serde(skip)]") .field_attribute("from_arrow", "#[serde(skip)]") .type_attribute(".", "#[derive(serde::Serialize)]") + .type_attribute("ViewPort", "#[derive(serde::Deserialize)]") .type_attribute("ViewDimensionsResp", "#[derive(serde::Deserialize)]") .type_attribute("TableValidateExprResp", "#[derive(serde::Deserialize)]") .type_attribute( diff --git a/rust/perspective-client/src/rust/virtual_server/data.rs b/rust/perspective-client/src/rust/virtual_server/data.rs index 5a7cbf4beb..1d80c4f6c1 100644 --- a/rust/perspective-client/src/rust/virtual_server/data.rs +++ b/rust/perspective-client/src/rust/virtual_server/data.rs @@ -16,7 +16,7 @@ use std::ops::{Deref, DerefMut}; use indexmap::IndexMap; use serde::Serialize; -use crate::config::Scalar; +use crate::config::{Scalar, ViewConfig}; /// A column of data returned from a virtual server query. /// @@ -133,24 +133,32 @@ template_psp!(bool, Boolean, Bool, bool); /// /// This struct represents a rectangular slice of data from a view. It can be /// serialized to JSON in either column-oriented or row-oriented format. -#[derive(Debug, Default, Serialize)] -pub struct VirtualDataSlice(IndexMap); +#[derive(Debug, Serialize)] +#[serde(transparent)] +pub struct VirtualDataSlice( + #[serde(skip)] ViewConfig, + IndexMap, +); impl Deref for VirtualDataSlice { type Target = IndexMap; fn deref(&self) -> &Self::Target { - &self.0 + &self.1 } } impl DerefMut for VirtualDataSlice { fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 + &mut self.1 } } impl VirtualDataSlice { + pub fn new(config: ViewConfig) -> Self { + VirtualDataSlice(config, IndexMap::default()) + } + pub(super) fn to_rows(&self) -> Vec> { let num_rows = self.values().next().map(|x| x.len()).unwrap_or(0); (0..num_rows) @@ -189,32 +197,38 @@ impl VirtualDataSlice { pub fn set_col( &mut self, name: &str, - group_by_index: Option, + grouping_id: Option, index: usize, value: T, ) -> Result<(), Box> { - if group_by_index.is_some() { - if !self.contains_key("__ROW_PATH__") { - self.insert( - "__ROW_PATH__".to_owned(), - VirtualDataColumn::RowPath(vec![]), - ); - } + if name.starts_with("__ROW_PATH_") { + let group_by_index: u32 = name[11..name.len() - 2].parse()?; + let max_grouping_id = 2_i32.pow((self.0.group_by.len() as u32) - group_by_index) - 1; + if grouping_id.map(|x| x as i32).unwrap_or(i32::MAX) < max_grouping_id + || !self.0.split_by.is_empty() + { + if !self.contains_key("__ROW_PATH__") { + self.insert( + "__ROW_PATH__".to_owned(), + VirtualDataColumn::RowPath(vec![]), + ); + } - let Some(VirtualDataColumn::RowPath(col)) = self.get_mut("__ROW_PATH__") else { - return Err("__ROW_PATH__ column has unexpected type".into()); - }; + let Some(VirtualDataColumn::RowPath(col)) = self.get_mut("__ROW_PATH__") else { + return Err("__ROW_PATH__ column has unexpected type".into()); + }; - if let Some(row) = col.get_mut(index) { - let scalar = value.to_scalar(); - row.push(scalar); - } else { - while col.len() < index { - col.push(vec![]) - } + if let Some(row) = col.get_mut(index) { + let scalar = value.to_scalar(); + row.push(scalar); + } else { + while col.len() < index { + col.push(vec![]) + } - let scalar = value.to_scalar(); - col.push(vec![scalar]); + let scalar = value.to_scalar(); + col.push(vec![scalar]); + } } Ok(()) diff --git a/rust/perspective-client/src/rust/virtual_server/features.rs b/rust/perspective-client/src/rust/virtual_server/features.rs index 902ff354fc..e17141299e 100644 --- a/rust/perspective-client/src/rust/virtual_server/features.rs +++ b/rust/perspective-client/src/rust/virtual_server/features.rs @@ -13,7 +13,7 @@ use std::borrow::Cow; use indexmap::IndexMap; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use crate::proto::get_features_resp::{AggregateArgs, AggregateOptions, ColumnTypeOptions}; use crate::proto::{ColumnType, GetFeaturesResp}; @@ -23,7 +23,7 @@ use crate::proto::{ColumnType, GetFeaturesResp}; /// This struct is returned by /// [`VirtualServerHandler::get_features`](super::VirtualServerHandler::get_features) /// to inform clients about which operations are available. -#[derive(Debug, Default, Deserialize)] +#[derive(Debug, Default, Deserialize, Serialize)] pub struct Features<'a> { /// Whether group-by aggregation is supported. #[serde(default)] @@ -58,7 +58,7 @@ pub struct Features<'a> { /// /// Aggregates can either take no additional arguments ([`AggSpec::Single`]) /// or require column type arguments ([`AggSpec::Multiple`]). -#[derive(Debug, Deserialize)] +#[derive(Clone, Debug, Deserialize, Serialize)] #[serde(untagged)] pub enum AggSpec<'a> { /// An aggregate function with no additional arguments. diff --git a/rust/perspective-client/src/rust/virtual_server/generic_sql_model.rs b/rust/perspective-client/src/rust/virtual_server/generic_sql_model.rs new file mode 100644 index 0000000000..f7103b6e67 --- /dev/null +++ b/rust/perspective-client/src/rust/virtual_server/generic_sql_model.rs @@ -0,0 +1,595 @@ +// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +// ┃ Copyright (c) 2017, the Perspective Authors. ┃ +// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +// ┃ This file is part of the Perspective library, distributed under the terms ┃ +// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +//! SQL query builder for virtual server operations. +//! +//! This module provides a stateless SQL query generator that produces +//! generic SQL strings for perspective virtual server operations. + +// TODO(texodus): Missing these features +// +// - `min_max` API for value-coloring and value-sizing. +// +// - row expand/collapse in the datagrid needs datamodel support, this is likely +// a "collapsed" boolean column in the temp table we `UPDATE`. +// +// - `on_update` real-time support will be method which takes sa view name and a +// handler and calls the handler when the view needs to be recalculated. +// +// Nice to have: +// +// - Optional `view_change` method can be implemented for engine optimization, +// defaulting to just delete & recreate (as Perspective engine does now). +// +// - Would like to add a metadata API so that e.g. Viewer debug panel could show +// internal generated SQL. + +use std::fmt; + +use indexmap::IndexMap; +use serde::Deserialize; + +use crate::config::{Aggregate, FilterTerm, Scalar, Sort, SortDir, ViewConfig}; +use crate::proto::{ColumnType, ViewPort}; + +/// Error type for SQL generation operations. +#[derive(Debug, Clone)] +pub enum GenericSQLError { + /// A required column was not found in the schema. + ColumnNotFound(String), + /// An invalid configuration was provided. + InvalidConfig(String), + /// An unsupported operation was requested. + UnsupportedOperation(String), +} + +impl fmt::Display for GenericSQLError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::ColumnNotFound(col) => write!(f, "Column not found: {}", col), + Self::InvalidConfig(msg) => write!(f, "Invalid configuration: {}", msg), + Self::UnsupportedOperation(msg) => write!(f, "Unsupported operation: {}", msg), + } + } +} + +impl std::error::Error for GenericSQLError {} + +/// Result type alias for SQL operations. +pub type GenericSQLResult = Result; + +#[derive(Clone, Debug, Deserialize, Default)] +pub struct GenericSQLVirtualServerModelArgs { + create_entity: Option, + grouping_fn: Option, +} + +/// A stateless SQL query builder virtual server operations. +/// +/// This struct generates SQL query strings without executing them, allowing +/// the caller to execute the queries against a SQL connection. +#[derive(Debug, Default, Clone)] +pub struct GenericSQLVirtualServerModel(GenericSQLVirtualServerModelArgs); + +impl GenericSQLVirtualServerModel { + /// Creates a new `GenericSQLVirtualServerModel` instance. + pub fn new(args: GenericSQLVirtualServerModelArgs) -> Self { + tracing::error!("{:?}", args); + Self(args) + } + + /// Returns the SQL query to list all hosted tables. + /// + /// # Returns + /// SQL: `SHOW ALL TABLES` + pub fn get_hosted_tables(&self) -> GenericSQLResult { + Ok("SHOW ALL TABLES".to_string()) + } + + /// Returns the SQL query to describe a table's schema. + /// + /// # Arguments + /// * `table_id` - The identifier of the table to describe. + /// + /// # Returns + /// SQL: `DESCRIBE {table_id}` + pub fn table_schema(&self, table_id: &str) -> GenericSQLResult { + Ok(format!("DESCRIBE {}", table_id)) + } + + /// Returns the SQL query to get the row count of a table. + /// + /// # Arguments + /// * `table_id` - The identifier of the table. + /// + /// # Returns + /// SQL: `SELECT COUNT(*) FROM {table_id}` + pub fn table_size(&self, table_id: &str) -> GenericSQLResult { + Ok(format!("SELECT COUNT(*) FROM {}", table_id)) + } + + /// Returns the SQL query to get the column count of a view. + /// + /// # Arguments + /// * `view_id` - The identifier of the view. + /// + /// # Returns + /// SQL: `SELECT COUNT(*) FROM (DESCRIBE {view_id})` + pub fn view_column_size(&self, view_id: &str) -> GenericSQLResult { + Ok(format!("SELECT COUNT(*) FROM (DESCRIBE {})", view_id)) + } + + /// Returns the SQL query to validate an expression against a table. + /// + /// # Arguments + /// * `table_id` - The identifier of the table. + /// * `expression` - The SQL expression to validate. + /// + /// # Returns + /// SQL: `DESCRIBE (SELECT {expression} FROM {table_id})` + pub fn table_validate_expression( + &self, + table_id: &str, + expression: &str, + ) -> GenericSQLResult { + Ok(format!( + "DESCRIBE (SELECT {} FROM {})", + expression, table_id + )) + } + + /// Returns the SQL query to delete a view. + /// + /// # Arguments + /// * `view_id` - The identifier of the view to delete. + /// + /// # Returns + /// SQL: `DROP TABLE IF EXISTS {view_id}` + pub fn view_delete(&self, view_id: &str) -> GenericSQLResult { + Ok(format!("DROP TABLE IF EXISTS {}", view_id)) + } + + /// Returns the SQL query to create a view from a table with the given + /// configuration. + /// + /// # Arguments + /// * `table_id` - The identifier of the source table. + /// * `view_id` - The identifier for the new view. + /// * `config` - The view configuration specifying columns, group_by, + /// split_by, etc. + /// + /// # Returns + /// SQL: `CREATE TABLE {view_id} AS (...)` + pub fn table_make_view( + &self, + table_id: &str, + view_id: &str, + config: &ViewConfig, + ) -> GenericSQLResult { + let columns = &config.columns; + let group_by = &config.group_by; + let split_by = &config.split_by; + let aggregates = &config.aggregates; + let sort = &config.sort; + let expressions = &config.expressions.0; + let filter = &config.filter; + + let col_name = |col: &str| -> String { + expressions + .get(col) + .cloned() + .unwrap_or_else(|| format!("\"{}\"", col)) + }; + + let get_aggregate = |col: &str| -> Option<&Aggregate> { aggregates.get(col) }; + let generate_select_clauses = || -> Vec { + let mut clauses = Vec::new(); + + if !group_by.is_empty() { + for col in columns.iter().flatten() { + let agg = get_aggregate(col) + .map(Self::aggregate_to_string) + .unwrap_or_else(|| "any_value".to_string()); + clauses.push(format!( + "{}({}) as \"{}\"", + agg, + col_name(col), + col.replace('"', "\"\"").replace("_", "-") + )); + } + + if split_by.is_empty() { + for (idx, gb_col) in group_by.iter().enumerate() { + clauses.push(format!("{} as __ROW_PATH_{}__", col_name(gb_col), idx)); + } + + let groups = group_by.iter().map(|c| col_name(c)).collect::>(); + let grouping_fn = self.0.grouping_fn.as_deref().unwrap_or("GROUPING_ID"); + clauses.push(format!( + "{}({}) AS __GROUPING_ID__", + grouping_fn, + groups.join(", ") + )); + } + } else if !columns.is_empty() { + for col in columns.iter().flatten() { + let escaped_col = col.replace('"', "\"\"").replace("_", "-"); + clauses.push(format!("{} as \"{}\"", col_name(col), escaped_col)); + } + } + + clauses + }; + + let mut order_by_clauses: Vec = Vec::new(); + let mut window_clauses: Vec = Vec::new(); + let mut where_clauses: Vec = Vec::new(); + + if !group_by.is_empty() { + for gidx in 0..group_by.len() { + let groups = group_by[..=gidx] + .iter() + .map(|c| col_name(c)) + .collect::>() + .join(", "); + + if split_by.is_empty() { + let grouping_fn = self.0.grouping_fn.as_deref().unwrap_or("GROUPING_ID"); + order_by_clauses.push(format!("{}({}) DESC", grouping_fn, groups)); + } + + for Sort(sort_col, sort_dir) in sort { + if *sort_dir != SortDir::None { + let agg = get_aggregate(sort_col) + .map(Self::aggregate_to_string) + .unwrap_or_else(|| "any_value".to_string()); + let dir_str = Self::sort_dir_to_string(sort_dir); + + if gidx >= group_by.len() - 1 { + order_by_clauses.push(format!( + "{}({}) {}", + agg, + col_name(sort_col), + dir_str + )); + } else { + order_by_clauses.push(format!( + "first({}({})) OVER __WINDOW_{}__ {}", + agg, + col_name(sort_col), + gidx, + dir_str + )); + } + } + } + + order_by_clauses.push(format!("__ROW_PATH_{}__ ASC", gidx)); + } + } else { + for Sort(sort_col, sort_dir) in sort { + if *sort_dir != SortDir::None { + let dir_str = Self::sort_dir_to_string(sort_dir); + order_by_clauses.push(format!("{} {}", col_name(sort_col), dir_str)); + } + } + } + + if !sort.is_empty() && group_by.len() > 1 { + for gidx in 0..(group_by.len() - 1) { + let partition = (0..=gidx) + .map(|i| format!("__ROW_PATH_{}__", i)) + .collect::>() + .join(", "); + + let sub_groups = group_by[..=gidx] + .iter() + .map(|c| col_name(c)) + .collect::>() + .join(", "); + + let groups = group_by.iter().map(|c| col_name(c)).collect::>(); + let grouping_fn = self.0.grouping_fn.as_deref().unwrap_or("GROUPING_ID"); + window_clauses.push(format!( + "__WINDOW_{}__ AS (PARTITION BY {}({}), {} ORDER BY {})", + gidx, + grouping_fn, + sub_groups, + partition, + groups.join(", ") + )); + } + } + + for flt in filter { + let term = Self::filter_term_to_sql(flt.term()); + if let Some(term_lit) = term { + where_clauses.push(format!( + "{} {} {}", + col_name(flt.column()), + flt.op(), + term_lit + )); + } + } + + let mut query = if !split_by.is_empty() { + format!("SELECT * FROM {}", table_id) + } else { + let select_clauses = generate_select_clauses(); + format!("SELECT {} FROM {}", select_clauses.join(", "), table_id) + }; + + if !where_clauses.is_empty() { + query = format!("{} WHERE {}", query, where_clauses.join(" AND ")); + } + + if !split_by.is_empty() { + let groups = group_by.iter().map(|c| col_name(c)).collect::>(); + let group_aliases = group_by + .iter() + .enumerate() + .map(|(i, c)| format!("{} AS __ROW_PATH_{}__", col_name(c), i)) + .collect::>() + .join(", "); + let pivot_on = split_by + .iter() + .map(|c| format!("\"{}\"", c)) + .collect::>() + .join(", "); + let pivot_using = generate_select_clauses().join(", "); + + query = format!( + "SELECT * EXCLUDE ({}) , {} FROM (PIVOT ({}) ON {} USING {} GROUP BY {})", + groups.join(", "), + group_aliases, + query, + pivot_on, + pivot_using, + groups.join(", ") + ); + } else if !group_by.is_empty() { + let groups = group_by.iter().map(|c| col_name(c)).collect::>(); + query = format!("{} GROUP BY ROLLUP({})", query, groups.join(", ")); + } + + if !window_clauses.is_empty() { + query = format!("{} WINDOW {}", query, window_clauses.join(", ")); + } + + if !order_by_clauses.is_empty() { + query = format!("{} ORDER BY {}", query, order_by_clauses.join(", ")); + } + + let template = self.0.create_entity.as_deref().unwrap_or("TABLE"); + Ok(format!("CREATE {} {} AS ({})", template, view_id, query)) + } + + /// Returns the SQL query to fetch data from a view with the given viewport. + /// + /// # Arguments + /// * `view_id` - The identifier of the view. + /// * `config` - The view configuration. + /// * `viewport` - The viewport specifying row/column ranges. + /// * `schema` - The schema of the view (column names to types). + /// + /// # Returns + /// SQL: `SELECT ... FROM {view_id} LIMIT ... OFFSET ...` + pub fn view_get_data( + &self, + view_id: &str, + config: &ViewConfig, + viewport: &ViewPort, + schema: &IndexMap, + ) -> GenericSQLResult { + let group_by = &config.group_by; + let split_by = &config.split_by; + let start_col = viewport.start_col.unwrap_or(0) as usize; + let end_col = viewport.end_col.map(|x| x as usize); + let start_row = viewport.start_row.unwrap_or(0); + let end_row = viewport.end_row; + let limit_clause = if let Some(end) = end_row { + format!("LIMIT {} OFFSET {}", end - start_row, start_row) + } else { + String::new() + }; + + let data_columns: Vec<&String> = schema + .keys() + .filter(|col_name| !col_name.starts_with("__")) + .skip(start_col) + .take(end_col.map(|e| e - start_col).unwrap_or(usize::MAX)) + .collect(); + + let mut group_by_cols: Vec = Vec::new(); + if !group_by.is_empty() { + if split_by.is_empty() { + group_by_cols.push("\"__GROUPING_ID__\"".to_string()); + } + + for idx in 0..group_by.len() { + group_by_cols.push(format!("\"__ROW_PATH_{}__\"", idx)); + } + } + + let all_columns: Vec = group_by_cols + .into_iter() + .chain(data_columns.iter().map(|col| format!("\"{}\"", col))) + .collect(); + + Ok(format!( + "SELECT {} FROM {} {}", + all_columns.join(", "), + view_id, + limit_clause + ) + .trim() + .to_string()) + } + + /// Returns the SQL query to describe a view's schema. + /// + /// # Arguments + /// * `view_id` - The identifier of the view. + /// + /// # Returns + /// SQL: `DESCRIBE {view_id}` + pub fn view_schema(&self, view_id: &str) -> GenericSQLResult { + Ok(format!("DESCRIBE {}", view_id)) + } + + /// Returns the SQL query to get the row count of a view. + /// + /// # Arguments + /// * `view_id` - The identifier of the view. + /// + /// # Returns + /// SQL: `SELECT COUNT(*) FROM {view_id}` + pub fn view_size(&self, view_id: &str) -> GenericSQLResult { + Ok(format!("SELECT COUNT(*) FROM {}", view_id)) + } + + fn aggregate_to_string(agg: &Aggregate) -> String { + match agg { + Aggregate::SingleAggregate(name) => name.clone(), + Aggregate::MultiAggregate(name, _args) => name.clone(), + } + } + + fn sort_dir_to_string(dir: &SortDir) -> &'static str { + match dir { + SortDir::None => "", + SortDir::Asc | SortDir::ColAsc | SortDir::AscAbs | SortDir::ColAscAbs => "ASC", + SortDir::Desc | SortDir::ColDesc | SortDir::DescAbs | SortDir::ColDescAbs => "DESC", + } + } + + fn filter_term_to_sql(term: &FilterTerm) -> Option { + match term { + FilterTerm::Scalar(scalar) => Self::scalar_to_sql(scalar), + FilterTerm::Array(scalars) => { + let values: Vec = scalars.iter().filter_map(Self::scalar_to_sql).collect(); + if values.is_empty() { + None + } else { + Some(format!("({})", values.join(", "))) + } + }, + } + } + + fn scalar_to_sql(scalar: &Scalar) -> Option { + match scalar { + Scalar::Null => None, + Scalar::Bool(b) => Some(if *b { "TRUE" } else { "FALSE" }.to_string()), + Scalar::Float(f) => Some(f.to_string()), + Scalar::String(s) => Some(format!("'{}'", s.replace('\'', "''"))), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_hosted_tables() { + let builder = + GenericSQLVirtualServerModel::new(GenericSQLVirtualServerModelArgs::default()); + assert_eq!(builder.get_hosted_tables().unwrap(), "SHOW ALL TABLES"); + } + + #[test] + fn test_table_schema() { + let builder = + GenericSQLVirtualServerModel::new(GenericSQLVirtualServerModelArgs::default()); + assert_eq!( + builder.table_schema("my_table").unwrap(), + "DESCRIBE my_table" + ); + } + + #[test] + fn test_table_size() { + let builder = + GenericSQLVirtualServerModel::new(GenericSQLVirtualServerModelArgs::default()); + assert_eq!( + builder.table_size("my_table").unwrap(), + "SELECT COUNT(*) FROM my_table" + ); + } + + #[test] + fn test_view_delete() { + let builder = + GenericSQLVirtualServerModel::new(GenericSQLVirtualServerModelArgs::default()); + assert_eq!( + builder.view_delete("my_view").unwrap(), + "DROP TABLE IF EXISTS my_view" + ); + } + + #[test] + fn test_table_make_view_simple() { + let builder = + GenericSQLVirtualServerModel::new(GenericSQLVirtualServerModelArgs::default()); + let mut config = ViewConfig::default(); + config.columns = vec![Some("col1".to_string()), Some("col2".to_string())]; + let sql = builder + .table_make_view("source_table", "dest_view", &config) + .unwrap(); + + assert!(sql.starts_with("CREATE TABLE dest_view AS")); + assert!(sql.contains("\"col1\"")); + assert!(sql.contains("\"col2\"")); + } + + #[test] + fn test_table_make_view_with_group_by() { + let builder = + GenericSQLVirtualServerModel::new(GenericSQLVirtualServerModelArgs::default()); + let mut config = ViewConfig::default(); + config.columns = vec![Some("value".to_string())]; + config.group_by = vec!["category".to_string()]; + let sql = builder + .table_make_view("source_table", "dest_view", &config) + .unwrap(); + + assert!(sql.contains("GROUP BY ROLLUP")); + assert!(sql.contains("__ROW_PATH_0__")); + assert!(sql.contains("__GROUPING_ID__")); + } + + #[test] + fn test_view_get_data() { + let builder = + GenericSQLVirtualServerModel::new(GenericSQLVirtualServerModelArgs::default()); + let config = ViewConfig::default(); + let viewport = ViewPort { + start_row: Some(0), + end_row: Some(100), + start_col: Some(0), + end_col: Some(5), + }; + + let mut schema = IndexMap::new(); + schema.insert("col1".to_string(), ColumnType::String); + schema.insert("col2".to_string(), ColumnType::Integer); + let sql = builder + .view_get_data("my_view", &config, &viewport, &schema) + .unwrap(); + + assert!(sql.contains("SELECT")); + assert!(sql.contains("FROM my_view")); + assert!(sql.contains("LIMIT 100 OFFSET 0")); + } +} diff --git a/rust/perspective-client/src/rust/virtual_server/handler.rs b/rust/perspective-client/src/rust/virtual_server/handler.rs index fa30e85d4f..f30139cb47 100644 --- a/rust/perspective-client/src/rust/virtual_server/handler.rs +++ b/rust/perspective-client/src/rust/virtual_server/handler.rs @@ -20,7 +20,7 @@ use super::features::Features; use crate::config::{ViewConfig, ViewConfigUpdate}; use crate::proto::{ColumnType, HostedTable, TableMakePortReq, ViewPort}; -#[cfg(target_arch = "wasm32")] +#[cfg(feature = "sendable")] pub type VirtualServerFuture<'a, T> = Pin + 'a>>; /// A boxed future that conditionally implements `Send` based on the target @@ -28,7 +28,7 @@ pub type VirtualServerFuture<'a, T> = Pin + 'a>>; /// /// This only compiles on wasm, except for `rust-analyzer` and `metadata` /// generation, so this type exists to tryck the compiler -#[cfg(not(target_arch = "wasm32"))] +#[cfg(not(feature = "sendable"))] pub type VirtualServerFuture<'a, T> = Pin + Send + 'a>>; /// Handler trait for implementing virtual server backends. @@ -41,8 +41,12 @@ pub trait VirtualServerHandler { // Required /// The error type returned by handler methods. + #[cfg(not(feature = "sendable"))] type Error: std::error::Error + Send + Sync + 'static; + #[cfg(feature = "sendable")] + type Error: std::error::Error + 'static; + /// Returns a list of all tables hosted by this handler. fn get_hosted_tables(&self) -> VirtualServerFuture<'_, Result, Self::Error>>; @@ -74,6 +78,7 @@ pub trait VirtualServerHandler { &self, view_id: &str, config: &ViewConfig, + schema: &IndexMap, viewport: &ViewPort, ) -> VirtualServerFuture<'_, Result>; diff --git a/rust/perspective-client/src/rust/virtual_server/mod.rs b/rust/perspective-client/src/rust/virtual_server/mod.rs index 2082c2c123..167856ed4d 100644 --- a/rust/perspective-client/src/rust/virtual_server/mod.rs +++ b/rust/perspective-client/src/rust/virtual_server/mod.rs @@ -18,11 +18,16 @@ mod data; mod error; mod features; +mod generic_sql_model; mod handler; mod server; pub use data::{SetVirtualDataColumn, VirtualDataCell, VirtualDataColumn, VirtualDataSlice}; pub use error::{ResultExt, VirtualServerError}; pub use features::{AggSpec, Features}; +pub use generic_sql_model::{ + GenericSQLError, GenericSQLResult, GenericSQLVirtualServerModel, + GenericSQLVirtualServerModelArgs, +}; pub use handler::{VirtualServerFuture, VirtualServerHandler}; pub use server::VirtualServer; diff --git a/rust/perspective-client/src/rust/virtual_server/server.rs b/rust/perspective-client/src/rust/virtual_server/server.rs index 99f721ba1d..0429be6976 100644 --- a/rust/perspective-client/src/rust/virtual_server/server.rs +++ b/rust/perspective-client/src/rust/virtual_server/server.rs @@ -22,8 +22,8 @@ use crate::config::{ViewConfig, ViewConfigUpdate}; use crate::proto::response::ClientResp; use crate::proto::table_validate_expr_resp::ExprValidationError; use crate::proto::{ - GetFeaturesResp, GetHostedTablesResp, MakeTableResp, Request, Response, ServerError, - TableMakePortResp, TableMakeViewResp, TableOnDeleteResp, TableRemoveDeleteResp, + ColumnType, GetFeaturesResp, GetHostedTablesResp, MakeTableResp, Request, Response, + ServerError, TableMakePortResp, TableMakeViewResp, TableOnDeleteResp, TableRemoveDeleteResp, TableSchemaResp, TableSizeResp, TableValidateExprResp, ViewColumnPathsResp, ViewDeleteResp, ViewDimensionsResp, ViewExpressionSchemaResp, ViewGetConfigResp, ViewOnDeleteResp, ViewOnUpdateResp, ViewRemoveDeleteResp, ViewRemoveOnUpdateResp, ViewSchemaResp, @@ -56,6 +56,7 @@ pub struct VirtualServer { handler: T, view_to_table: IndexMap, view_configs: IndexMap, + view_schemas: IndexMap>, } impl VirtualServer { @@ -65,6 +66,7 @@ impl VirtualServer { handler, view_configs: IndexMap::default(), view_to_table: IndexMap::default(), + view_schemas: IndexMap::default(), } } @@ -85,10 +87,45 @@ impl VirtualServer { match self.internal_handle_request(msg.clone()).await { Ok(resp) => Ok(resp), - Err(err) => Ok(respond!(msg, ServerError { - message: err.to_string(), - status_code: 1 - })), + Err(err) => { + tracing::error!("{}", err); + Ok(respond!(msg, ServerError { + message: err.to_string(), + status_code: 1 + })) + }, + } + } + + async fn get_cached_view_schema( + &mut self, + entity_id: &str, + to_psp_format: bool, + ) -> Result, VirtualServerError> { + if !self.view_schemas.contains_key(entity_id) { + self.view_schemas.insert( + entity_id.to_string(), + self.handler + .view_schema(entity_id, self.view_configs.get(entity_id).unwrap()) + .await?, + ); + } + + if to_psp_format { + Ok(self + .view_schemas + .get(entity_id) + .unwrap() + .iter() + .map(|(k, v)| { + ( + k.split("_").collect::>().last().unwrap().to_string(), + *v, + ) + }) + .collect()) + } else { + Ok(self.view_schemas.get(entity_id).cloned().unwrap()) } } @@ -109,20 +146,18 @@ impl VirtualServer { }, TableSchemaReq(_) => { respond!(msg, TableSchemaResp { - schema: self - .handler - .table_schema(msg.entity_id.as_str()) - .await - .ok() - .map(|value| crate::proto::Schema { - schema: value - .iter() - .map(|x| crate::proto::schema::KeyTypePair { - name: x.0.to_string(), - r#type: *x.1 as i32, - }) - .collect(), - }) + schema: Some(crate::proto::Schema { + schema: self + .handler + .table_schema(msg.entity_id.as_str()) + .await? + .iter() + .map(|x| crate::proto::schema::KeyTypePair { + name: x.0.to_string(), + r#type: *x.1 as i32, + }) + .collect() + }) }) }, TableMakePortReq(req) => { @@ -183,14 +218,10 @@ impl VirtualServer { ViewSchemaReq(_) => { respond!(msg, ViewSchemaResp { schema: self - .handler - .view_schema( - msg.entity_id.as_str(), - self.view_configs.get(&msg.entity_id).unwrap() - ) + .get_cached_view_schema(&msg.entity_id, true) .await? .into_iter() - .map(|(x, y)| (x, y as i32)) + .map(|(x, y)| (x.to_string(), y as i32)) .collect() }) }, @@ -268,10 +299,11 @@ impl VirtualServer { }, ViewToRowsStringReq(view_to_rows_string_req) => { let viewport = view_to_rows_string_req.viewport.unwrap(); + let schema = self.get_cached_view_schema(&msg.entity_id, false).await?; let config = self.view_configs.get(&msg.entity_id).unwrap(); let cols = self .handler - .view_get_data(msg.entity_id.as_str(), config, &viewport) + .view_get_data(msg.entity_id.as_str(), config, &schema, &viewport) .await?; let rows = cols.to_rows(); @@ -282,10 +314,11 @@ impl VirtualServer { }, ViewToColumnsStringReq(view_to_columns_string_req) => { let viewport = view_to_columns_string_req.viewport.unwrap(); + let schema = self.get_cached_view_schema(&msg.entity_id, false).await?; let config = self.view_configs.get(&msg.entity_id).unwrap(); let cols = self .handler - .view_get_data(msg.entity_id.as_str(), config, &viewport) + .view_get_data(msg.entity_id.as_str(), config, &schema, &viewport) .await?; let json_string = serde_json::to_string(&cols) diff --git a/rust/perspective-js/Cargo.toml b/rust/perspective-js/Cargo.toml index 19240b4571..93ab9f7948 100644 --- a/rust/perspective-js/Cargo.toml +++ b/rust/perspective-js/Cargo.toml @@ -63,7 +63,7 @@ anyhow = "1.0.66" wasm-bindgen-test = "0.3.13" [dependencies] -perspective-client = { version = "4.1.1" } +perspective-client = { version = "4.1.1", features = ["sendable"] } bytes = "1.10.1" chrono = "0.4" derivative = "2.2.0" @@ -91,7 +91,7 @@ version = "11.0.1" features = ["serde-json-impl", "no-serde-warnings", "import-esm"] [dependencies.wasm-bindgen] -version = "=0.2.105" +version = "=0.2.106" features = ["serde-serialize", "enable-interning"] [dependencies.web-sys] diff --git a/rust/perspective-js/build.mjs b/rust/perspective-js/build.mjs index a8281e456a..256285d483 100644 --- a/rust/perspective-js/build.mjs +++ b/rust/perspective-js/build.mjs @@ -70,13 +70,6 @@ const BUILD = [ // "Load as binary": true, // "Bundler friendly": true, // }, - { - entryPoints: ["src/ts/virtual_servers/duckdb.ts"], - format: "esm", - target: "es2022", - plugins: [PerspectiveEsbuildPlugin()], - outfile: "dist/esm/virtual_servers/duckdb.js", - }, { entryPoints: ["src/ts/perspective.browser.ts"], format: "esm", @@ -101,6 +94,21 @@ const BUILD = [ loader: { ".wasm": "binary" }, outdir: "dist/esm", }, + + { + entryPoints: ["src/ts/virtual_servers/duckdb.ts"], + format: "esm", + target: "es2022", + plugins: [PerspectiveEsbuildPlugin()], + outfile: "dist/esm/virtual_servers/duckdb.js", + }, + { + entryPoints: ["src/ts/virtual_servers/clickhouse.ts"], + format: "esm", + target: "es2022", + plugins: [PerspectiveEsbuildPlugin()], + outfile: "dist/esm/virtual_servers/clickhouse.js", + }, ]; const INHERIT = { diff --git a/rust/perspective-js/package.json b/rust/perspective-js/package.json index 09fd9ff70f..0dedfa5544 100644 --- a/rust/perspective-js/package.json +++ b/rust/perspective-js/package.json @@ -53,6 +53,7 @@ "@perspective-dev/esbuild-plugin": "workspace:", "@perspective-dev/metadata": "workspace:", "@perspective-dev/test": "workspace:", + "@clickhouse/client-web": "catalog:", "@duckdb/duckdb-wasm": "catalog:", "@playwright/experimental-ct-react": "catalog:", "@playwright/test": "catalog:", diff --git a/rust/perspective-js/src/rust/generic_sql_model.rs b/rust/perspective-js/src/rust/generic_sql_model.rs new file mode 100644 index 0000000000..ee281310a7 --- /dev/null +++ b/rust/perspective-js/src/rust/generic_sql_model.rs @@ -0,0 +1,189 @@ +// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +// ┃ Copyright (c) 2017, the Perspective Authors. ┃ +// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +// ┃ This file is part of the Perspective library, distributed under the terms ┃ +// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +//! WASM bindings for the DuckDB SQL query builder. + +use std::str::FromStr; + +use indexmap::IndexMap; +use js_sys::Object; +use perspective_client::config::ViewConfig; +use perspective_client::proto::{ColumnType, ViewPort}; +use perspective_client::virtual_server; +use wasm_bindgen::prelude::*; + +use crate::utils::*; + +/// JavaScript-facing DuckDB SQL query builder. +/// +/// This struct wraps the Rust `DuckDBSqlBuilder` and exposes it to JavaScript +/// via wasm_bindgen. +#[wasm_bindgen] +pub struct GenericSQLVirtualServerModel { + inner: virtual_server::GenericSQLVirtualServerModel, +} + +#[wasm_bindgen] +extern "C" { + pub type JsGenericSQLVirtualServerModelArgs; +} + +#[wasm_bindgen] +impl GenericSQLVirtualServerModel { + /// Creates a new `JsDuckDBSqlBuilder` instance. + #[wasm_bindgen(constructor)] + pub fn new(args: Option) -> Result { + Ok(Self { + inner: virtual_server::GenericSQLVirtualServerModel::new( + args.map(|x| x.into_serde_ext()) + .transpose()? + .unwrap_or_default(), + ), + }) + } + + /// Returns the SQL query to list all hosted tables. + #[wasm_bindgen(js_name = "getHostedTables")] + pub fn get_hosted_tables(&self) -> Result { + self.inner + .get_hosted_tables() + .map_err(|e| JsValue::from_str(&e.to_string())) + } + + /// Returns the SQL query to describe a table's schema. + #[wasm_bindgen(js_name = "tableSchema")] + pub fn table_schema(&self, table_id: &str) -> Result { + self.inner + .table_schema(table_id) + .map_err(|e| JsValue::from_str(&e.to_string())) + } + + /// Returns the SQL query to get the row count of a table. + #[wasm_bindgen(js_name = "tableSize")] + pub fn table_size(&self, table_id: &str) -> Result { + self.inner + .table_size(table_id) + .map_err(|e| JsValue::from_str(&e.to_string())) + } + + /// Returns the SQL query to get the column count of a view. + #[wasm_bindgen(js_name = "viewColumnSize")] + pub fn view_column_size(&self, view_id: &str) -> Result { + self.inner + .view_column_size(view_id) + .map_err(|e| JsValue::from_str(&e.to_string())) + } + + /// Returns the SQL query to validate an expression against a table. + #[wasm_bindgen(js_name = "tableValidateExpression")] + pub fn table_validate_expression( + &self, + table_id: &str, + expression: &str, + ) -> Result { + self.inner + .table_validate_expression(table_id, expression) + .map_err(|e| JsValue::from_str(&e.to_string())) + } + + /// Returns the SQL query to delete a view. + #[wasm_bindgen(js_name = "viewDelete")] + pub fn view_delete(&self, view_id: &str) -> Result { + self.inner + .view_delete(view_id) + .map_err(|e| JsValue::from_str(&e.to_string())) + } + + /// Returns the SQL query to create a view from a table with the given + /// configuration. + #[wasm_bindgen(js_name = "tableMakeView")] + pub fn table_make_view( + &self, + table_id: &str, + view_id: &str, + config: JsValue, + ) -> Result { + let config: ViewConfig = serde_wasm_bindgen::from_value(config) + .map_err(|e| JsValue::from_str(&e.to_string()))?; + + self.inner + .table_make_view(table_id, view_id, &config) + .map_err(|e| JsValue::from_str(&e.to_string())) + } + + /// Returns the SQL query to fetch data from a view with the given viewport. + #[wasm_bindgen(js_name = "viewGetData")] + pub fn view_get_data( + &self, + view_id: &str, + config: JsValue, + viewport: JsValue, + schema: JsValue, + ) -> Result { + let config: ViewConfig = serde_wasm_bindgen::from_value(config) + .map_err(|e| JsValue::from_str(&e.to_string()))?; + + let viewport: ViewPort = serde_wasm_bindgen::from_value(viewport) + .map_err(|e| JsValue::from_str(&e.to_string()))?; + + let schema = self.parse_schema(schema)?; + + self.inner + .view_get_data(view_id, &config, &viewport, &schema) + .map_err(|e| JsValue::from_str(&e.to_string())) + } + + /// Returns the SQL query to describe a view's schema. + #[wasm_bindgen(js_name = "viewSchema")] + pub fn view_schema(&self, view_id: &str) -> Result { + self.inner + .view_schema(view_id) + .map_err(|e| JsValue::from_str(&e.to_string())) + } + + /// Returns the SQL query to get the row count of a view. + #[wasm_bindgen(js_name = "viewSize")] + pub fn view_size(&self, view_id: &str) -> Result { + self.inner + .view_size(view_id) + .map_err(|e| JsValue::from_str(&e.to_string())) + } +} + +impl GenericSQLVirtualServerModel { + fn parse_schema(&self, schema: JsValue) -> Result, JsValue> { + let obj = schema.dyn_ref::().ok_or_else(|| { + JsValue::from_str("Schema must be an object mapping column names to types") + })?; + + let mut result = IndexMap::new(); + let entries = Object::entries(obj); + for i in 0..entries.length() { + let entry = entries.get(i); + let entry_array = entry + .dyn_ref::() + .ok_or_else(|| JsValue::from_str("Invalid schema entry"))?; + let key = entry_array + .get(0) + .as_string() + .ok_or_else(|| JsValue::from_str("Column name must be a string"))?; + let value = entry_array + .get(1) + .as_string() + .ok_or_else(|| JsValue::from_str("Column type must be a string"))?; + let column_type = ColumnType::from_str(&value) + .map_err(|_| JsValue::from_str(&format!("Unknown column type: {}", value)))?; + result.insert(key, column_type); + } + Ok(result) + } +} diff --git a/rust/perspective-js/src/rust/lib.rs b/rust/perspective-js/src/rust/lib.rs index 891be28f82..b259466712 100644 --- a/rust/perspective-js/src/rust/lib.rs +++ b/rust/perspective-js/src/rust/lib.rs @@ -26,20 +26,20 @@ extern crate alloc; mod client; +mod generic_sql_model; mod table; mod table_data; pub mod utils; mod view; -#[cfg(target_arch = "wasm32")] mod virtual_server; #[cfg(feature = "export-init")] use wasm_bindgen::prelude::*; pub use crate::client::Client; +pub use crate::generic_sql_model::*; pub use crate::table::*; pub use crate::table_data::*; -#[cfg(target_arch = "wasm32")] pub use crate::virtual_server::*; #[cfg(feature = "export-init")] diff --git a/rust/perspective-js/src/rust/virtual_server.rs b/rust/perspective-js/src/rust/virtual_server.rs index 7f5bb78328..3da54ef1b7 100644 --- a/rust/perspective-js/src/rust/virtual_server.rs +++ b/rust/perspective-js/src/rust/virtual_server.rs @@ -20,22 +20,17 @@ use std::sync::{Arc, Mutex}; use indexmap::IndexMap; use js_sys::{Array, Date, Object, Reflect}; use perspective_client::proto::{ColumnType, HostedTable}; -use perspective_client::virtual_server::{ - Features, ResultExt, VirtualDataSlice, VirtualServer, VirtualServerHandler, -}; +use perspective_client::virtual_server; +use perspective_client::virtual_server::{Features, ResultExt, VirtualServerHandler}; use serde::Serialize; use wasm_bindgen::prelude::*; use wasm_bindgen_futures::JsFuture; +use crate::JsViewConfig; use crate::utils::{ApiError, ApiFuture, *}; -// Conditional type alias matching the trait definition -#[cfg(target_arch = "wasm32")] type HandlerFuture = Pin>>; -#[cfg(not(target_arch = "wasm32"))] -type HandlerFuture = Pin + Send>>; - #[derive(Debug)] pub struct JsError(JsValue); @@ -65,16 +60,8 @@ impl From for JsError { } } -// SAFETY: In WASM, we're always single-threaded, so JsError can safely be Send -// + Sync -unsafe impl Send for JsError {} -unsafe impl Sync for JsError {} - pub struct JsServerHandler(Object); -unsafe impl Send for JsServerHandler {} -unsafe impl Sync for JsServerHandler {} - impl JsServerHandler { fn call_method_js(&self, method: &str, args: &Array) -> Result { let func = Reflect::get(&self.0, &JsValue::from_str(method))?; @@ -90,7 +77,7 @@ impl JsServerHandler { // Check if result is a Promise if result.is_instance_of::() { let promise = js_sys::Promise::from(result); - JsFuture::from(promise).await.map_err(|e| JsError(e)) + JsFuture::from(promise).await.map_err(JsError) } else { Ok(result) } @@ -293,11 +280,7 @@ impl VirtualServerHandler for JsServerHandler { let handler = self.0.clone(); let view_id = view_id.to_string(); - let config_value = if has_view_schema { - serde_wasm_bindgen::to_value(config).ok() - } else { - None - }; + let config_value = JsValue::from_serde_ext(config).ok(); Box::pin(async move { let this = JsServerHandler(handler); @@ -446,7 +429,6 @@ impl VirtualServerHandler for JsServerHandler { let handler = self.0.clone(); let table_id = table_id.to_string(); - use perspective_client::proto::make_table_data::Data; let data_value = match &data.data { Some(Data::FromCsv(csv)) => JsValue::from_str(csv), @@ -474,22 +456,25 @@ impl VirtualServerHandler for JsServerHandler { &self, view_id: &str, config: &perspective_client::config::ViewConfig, + schema: &IndexMap, viewport: &perspective_client::proto::ViewPort, - ) -> HandlerFuture> { + ) -> HandlerFuture> { let handler = self.0.clone(); let view_id = view_id.to_string(); let window: JsViewPort = viewport.clone().into(); let config_value = serde_wasm_bindgen::to_value(config).unwrap(); let window_value = serde_wasm_bindgen::to_value(&window).unwrap(); + let schema_value = JsValue::from_serde_ext(&schema).unwrap(); Box::pin(async move { let this = JsServerHandler(handler); - let data = JsVirtualDataSlice::default(); + let data = VirtualDataSlice::new(config_value.clone().unchecked_into()); { let args = Array::new(); args.push(&JsValue::from_str(&view_id)); args.push(&config_value); + args.push(&schema_value); args.push(&window_value); args.push(&JsValue::from(data.clone())); this.call_method_js_async("viewGetData", &args).await?; @@ -497,8 +482,8 @@ impl VirtualServerHandler for JsServerHandler { // Lock the mutex and take ownership of the inner data // We can't unwrap the Arc because the JsValue might still hold a reference - let JsVirtualDataSlice(_obj, arc) = data; - let slice = std::mem::take(&mut *arc.lock().unwrap()); + let VirtualDataSlice(_obj, arc) = data; + let slice = std::mem::take(&mut *arc.lock().unwrap()).unwrap(); Ok(slice) }) } @@ -530,24 +515,20 @@ impl From for JsViewPort { } } -#[wasm_bindgen] +#[wasm_bindgen(js_name = "VirtualDataSlice")] #[derive(Clone)] -pub struct JsVirtualDataSlice(Object, Arc>); - -impl Default for JsVirtualDataSlice { - fn default() -> Self { - JsVirtualDataSlice( - Object::new(), - Arc::new(Mutex::new(VirtualDataSlice::default())), - ) - } -} +pub struct VirtualDataSlice(Object, Arc>>); #[wasm_bindgen] -impl JsVirtualDataSlice { +impl VirtualDataSlice { #[wasm_bindgen(constructor)] - pub fn new() -> Self { - Self::default() + pub fn new(config: JsViewConfig) -> Self { + VirtualDataSlice( + Object::new(), + Arc::new(Mutex::new(Some(virtual_server::VirtualDataSlice::new( + config.into_serde_ext().unwrap(), + )))), + ) } #[wasm_bindgen(js_name = "setCol")] @@ -582,12 +563,16 @@ impl JsVirtualDataSlice { self.1 .lock() .unwrap() + .as_mut() + .unwrap() .set_col(name, group_by_index, index as usize, None as Option) .unwrap(); } else if let Some(s) = val.as_string() { self.1 .lock() .unwrap() + .as_mut() + .unwrap() .set_col(name, group_by_index, index as usize, Some(s)) .unwrap(); } else { @@ -608,12 +593,16 @@ impl JsVirtualDataSlice { self.1 .lock() .unwrap() + .as_mut() + .unwrap() .set_col(name, group_by_index, index as usize, None as Option) .unwrap(); } else if let Some(n) = val.as_f64() { self.1 .lock() .unwrap() + .as_mut() + .unwrap() .set_col(name, group_by_index, index as usize, Some(n as i32)) .unwrap(); } else { @@ -634,12 +623,16 @@ impl JsVirtualDataSlice { self.1 .lock() .unwrap() + .as_mut() + .unwrap() .set_col(name, group_by_index, index as usize, None as Option) .unwrap(); } else if let Some(n) = val.as_f64() { self.1 .lock() .unwrap() + .as_mut() + .unwrap() .set_col(name, group_by_index, index as usize, Some(n)) .unwrap(); } else { @@ -660,12 +653,16 @@ impl JsVirtualDataSlice { self.1 .lock() .unwrap() + .as_mut() + .unwrap() .set_col(name, group_by_index, index as usize, None as Option) .unwrap(); } else if let Some(b) = val.as_bool() { self.1 .lock() .unwrap() + .as_mut() + .unwrap() .set_col(name, group_by_index, index as usize, Some(b)) .unwrap(); } else { @@ -686,6 +683,8 @@ impl JsVirtualDataSlice { self.1 .lock() .unwrap() + .as_mut() + .unwrap() .set_col(name, group_by_index, index as usize, None as Option) .unwrap(); } else if let Some(date) = val.dyn_ref::() { @@ -693,30 +692,35 @@ impl JsVirtualDataSlice { self.1 .lock() .unwrap() + .as_mut() + .unwrap() .set_col(name, group_by_index, index as usize, Some(timestamp)) .unwrap(); } else if let Some(n) = val.as_f64() { self.1 .lock() .unwrap() + .as_mut() + .unwrap() .set_col(name, group_by_index, index as usize, Some(n as i64)) .unwrap(); } else { tracing::error!("Unhandled datetime value"); } + Ok(()) } } #[wasm_bindgen] -pub struct JsVirtualServer(Rc>>); +pub struct VirtualServer(Rc>>); #[wasm_bindgen] -impl JsVirtualServer { +impl VirtualServer { #[wasm_bindgen(constructor)] - pub fn new(handler: Object) -> Result { - Ok(JsVirtualServer(Rc::new(UnsafeCell::new( - VirtualServer::new(JsServerHandler(handler)), + pub fn new(handler: Object) -> Result { + Ok(VirtualServer(Rc::new(UnsafeCell::new( + virtual_server::VirtualServer::new(JsServerHandler(handler)), )))) } diff --git a/rust/perspective-js/src/ts/perspective.browser.ts b/rust/perspective-js/src/ts/perspective.browser.ts index 5278e96278..dc951ca1f2 100644 --- a/rust/perspective-js/src/ts/perspective.browser.ts +++ b/rust/perspective-js/src/ts/perspective.browser.ts @@ -13,13 +13,23 @@ export type * from "../../dist/wasm/perspective-js.d.ts"; import type * as psp from "../../dist/wasm/perspective-js.d.ts"; export type * from "./virtual_server.ts"; - import * as psp_virtual from "./virtual_server.ts"; - import * as wasm_module from "../../dist/wasm/perspective-js.js"; import * as api from "./wasm/browser.ts"; import { load_wasm_stage_0 } from "./wasm/decompress.ts"; +export { + GenericSQLVirtualServerModel, + VirtualDataSlice, + VirtualServer, +} from "../../dist/wasm/perspective-js.js"; + +import { + GenericSQLVirtualServerModel, + VirtualDataSlice, + VirtualServer, +} from "../../dist/wasm/perspective-js.js"; + let GLOBAL_SERVER_WASM: Promise; export async function createMessageHandler( @@ -144,4 +154,7 @@ export default { init_client, init_server, createMessageHandler, + GenericSQLVirtualServerModel, + VirtualDataSlice, + VirtualServer, }; diff --git a/rust/perspective-js/src/ts/perspective.node.ts b/rust/perspective-js/src/ts/perspective.node.ts index aa23f90c05..44aae2361b 100644 --- a/rust/perspective-js/src/ts/perspective.node.ts +++ b/rust/perspective-js/src/ts/perspective.node.ts @@ -29,9 +29,20 @@ import * as engine from "./wasm/engine.ts"; import { compile_perspective } from "./wasm/emscripten_api.ts"; import * as psp_websocket from "./websocket.ts"; import * as api from "./wasm/browser.ts"; - import * as virtual_server from "./virtual_server.ts"; +export { + GenericSQLVirtualServerModel, + VirtualDataSlice, + VirtualServer, +} from "../../dist/wasm/perspective-js.js"; + +import { + GenericSQLVirtualServerModel, + VirtualDataSlice, + VirtualServer, +} from "../../dist/wasm/perspective-js.js"; + const __dirname = path.dirname(url.fileURLToPath(import.meta.url)); const { resolve } = createRequire(import.meta.url); @@ -337,6 +348,12 @@ export function createMessageHandler( return virtual_server.createMessageHandler(perspective_client, handler); } +/** + * The initialized WASM module. Use this when you need to pass the module + * to components that require it, such as `DuckDBHandler`. + */ +export { perspective_client as wasmModule }; + export default { table, websocket, @@ -347,4 +364,7 @@ export default { on_error, system_info, WebSocketServer, + GenericSQLVirtualServerModel, + VirtualDataSlice, + VirtualServer, }; diff --git a/rust/perspective-js/src/ts/virtual_server.ts b/rust/perspective-js/src/ts/virtual_server.ts index b33b300ae4..d6534823a0 100644 --- a/rust/perspective-js/src/ts/virtual_server.ts +++ b/rust/perspective-js/src/ts/virtual_server.ts @@ -55,8 +55,9 @@ export interface VirtualServerHandler { viewGetData( viewId: string, config: ViewConfig, + schema: Record, viewport: ViewWindow, - dataSlice: perspective.JsVirtualDataSlice, + dataSlice: perspective.VirtualDataSlice, ): void | Promise; viewSchema?( viewId: string, @@ -78,11 +79,11 @@ export function createMessageHandler( mod: typeof perspective, handler: VirtualServerHandler, ) { - let virtualServer: perspective.JsVirtualServer; + let virtualServer: perspective.VirtualServer; async function postMessage(port: MessagePort, msg: MessageEvent) { if (msg.data.cmd === "init") { try { - virtualServer = new mod.JsVirtualServer(handler); + virtualServer = new mod.VirtualServer(handler); if (msg.data.id !== undefined) { port.postMessage({ id: msg.data.id }); } else { @@ -113,14 +114,3 @@ export function createMessageHandler( return channel.port2; } - -/** - * Re-export the WASM VirtualServer and VirtualDataSlice classes with better names. - * - * VirtualServer: Handles Perspective protocol messages using your custom handler - * VirtualDataSlice: Used to fill data in viewGetData callbacks - */ -export { - JsVirtualServer as VirtualServer, - JsVirtualDataSlice as VirtualDataSlice, -} from "../../dist/wasm/perspective-js.js"; diff --git a/rust/perspective-js/src/ts/virtual_servers/clickhouse.ts b/rust/perspective-js/src/ts/virtual_servers/clickhouse.ts new file mode 100644 index 0000000000..57ebc0f368 --- /dev/null +++ b/rust/perspective-js/src/ts/virtual_servers/clickhouse.ts @@ -0,0 +1,362 @@ +// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +// ┃ Copyright (c) 2017, the Perspective Authors. ┃ +// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +// ┃ This file is part of the Perspective library, distributed under the terms ┃ +// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +/** + * An implementation of a Perspective Virtual Server for DuckDB. + * + * This import is optional, and so must be imported manually from either + * `@perspective-dev/client/dist/esm/virtual_servers/duckdb.js` or + * `@perspective-dev/client/src/ts/virtual_servers/duckdb.ts`, it is not + * exported from the package root `@perspective-dev/client` + * + * @module + */ + +import type * as perspective from "@perspective-dev/client"; +import type { ColumnType } from "@perspective-dev/client/dist/esm/ts-rs/ColumnType.d.ts"; +import type { ViewConfig } from "@perspective-dev/client/dist/esm/ts-rs/ViewConfig.d.ts"; +import type { ViewWindow } from "@perspective-dev/client/dist/esm/ts-rs/ViewWindow.d.ts"; +import type * as clickhouse from "@clickhouse/client-web"; + +const NUMBER_AGGS = [ + "sum", + "count", + "any_value", + "arbitrary", + "array_agg", + "avg", + "bit_and", + "bit_or", + "bit_xor", + "bitstring_agg", + "bool_and", + "bool_or", + "countif", + "favg", + "fsum", + "geomean", + "kahan_sum", + "last", + "max", + "min", + "product", + "string_agg", + "sumkahan", +]; + +const STRING_AGGS = [ + "count", + "any_value", + "arbitrary", + "first", + "countif", + "last", + "string_agg", +]; + +const FILTER_OPS = [ + "==", + "!=", + "LIKE", + "IS DISTINCT FROM", + "IS NOT DISTINCT FROM", + ">=", + "<=", + ">", + "<", +]; + +function duckdbTypeToPsp(name: string): ColumnType { + if (name.startsWith("Nullable")) { + name = name.match(/Nullable\((.+?)\)/)![1]; + } + + if (name.startsWith("Array")) { + return "string"; + } + + if (name === "Int64" || name === "UInt64" || name === "Float64") { + return "float"; + } + + if (name === "String") { + return "string"; + } + + if (name === "DateTime") { + return "datetime"; + } + + if (name === "Date") { + return "date"; + } + + throw new Error(`Unknown type '${name}'`); +} + +function convertDecimalToNumber(value: any, dtypeString: string) { + if (!(value instanceof Uint32Array || value instanceof Int32Array)) { + return value; + } + + let bigIntValue = BigInt(0); + for (let i = 0; i < value.length; i++) { + bigIntValue |= BigInt(value[i]) << BigInt(i * 32); + } + + const scaleMatch = dtypeString.match(/Decimal\[\d+e(\d+)\]/); + if (scaleMatch) { + const scale = parseInt(scaleMatch[1]); + return Number(bigIntValue) / Math.pow(10, scale); + } else { + return Number(bigIntValue); + } +} + +class Lock { + lockPromise: Promise; + constructor() { + this.lockPromise = Promise.resolve(); + } + + acquire() { + let releaseLock: (value: void) => void; + const newLockPromise: Promise = new Promise((resolve) => { + releaseLock = resolve; + }); + + const acquirePromise = this.lockPromise.then(() => releaseLock); + this.lockPromise = newLockPromise; + return acquirePromise; + } +} + +const LOCK = new Lock(); + +async function runQuery( + db: clickhouse.ClickHouseClient, + query: string, + options: { columns?: true; execute?: boolean }, +): Promise<{ + rows: any[]; + columns: string[]; + dtypes: string[]; +}>; + +async function runQuery( + db: clickhouse.ClickHouseClient, + query: string, + options?: { columns?: false; execute?: boolean }, +): Promise; + +async function runQuery( + db: clickhouse.ClickHouseClient, + query: string, + options: { columns?: boolean; execute?: boolean } = {}, +) { + query = query.replace(/\s+/g, " ").trim(); + const release = await LOCK.acquire(); + try { + const result = await db.query({ query }); + if (!options.execute) { + const { data, meta } = + (await result.json()) as clickhouse.ResponseJSON; + + if (options.columns) { + return { + rows: data, + columns: meta!.map((f) => f.name), + dtypes: meta!.map((f) => f.type), + }; + } + + return data; + } + } catch (error) { + console.error("Query error:", error); + console.error("Query:", query); + throw error; + } finally { + release(); + } +} + +/** + * An implementation of Perspective's Virtual Server for `@duckdb/duckdb-wasm`. + */ +export class ClickhouseHandler implements perspective.VirtualServerHandler { + private db: clickhouse.ClickHouseClient; + private sqlBuilder: perspective.GenericSQLVirtualServerModel; + constructor(db: clickhouse.ClickHouseClient, mod?: typeof perspective) { + if (!mod) { + if (customElements) { + const viewer_class: any = + customElements.get("perspective-viewer"); + if (viewer_class) { + mod = viewer_class.__wasm_module__; + } else { + throw new Error("Missing perspective-client.wasm"); + } + } else { + } + } + + this.db = db; + this.sqlBuilder = new mod!.GenericSQLVirtualServerModel({ + create_entity: "VIEW", + grouping_fn: "GROUPING", + }); + } + + getFeatures() { + return { + group_by: true, + split_by: false, + sort: true, + expressions: true, + filter_ops: { + integer: FILTER_OPS, + float: FILTER_OPS, + string: FILTER_OPS, + boolean: FILTER_OPS, + date: FILTER_OPS, + datetime: FILTER_OPS, + }, + aggregates: { + integer: NUMBER_AGGS, + float: NUMBER_AGGS, + string: STRING_AGGS, + boolean: STRING_AGGS, + date: STRING_AGGS, + datetime: STRING_AGGS, + }, + }; + } + + async getHostedTables() { + const query = "SHOW TABLES"; + const results = await runQuery(this.db, query); + return results.map((row) => { + return `${row.name}`; + }); + } + + async tableSchema(tableId: string, config?: ViewConfig) { + const query = this.sqlBuilder.tableSchema(tableId); + const results = await runQuery(this.db, query); + const schema = {} as Record; + for (const result of results) { + const colName = result.name; + if (!colName.startsWith("__")) { + schema[colName] = duckdbTypeToPsp(result.type) as ColumnType; + } + } + + return schema; + } + + async viewColumnSize(viewId: string, config: ViewConfig) { + const query = `SELECT COUNT() FROM system.columns WHERE table = '${viewId}'`; + const results = await runQuery(this.db, query); + const gs = config.group_by?.length || 0; + const count = Number(results[0]["COUNT()"]); + console.log(count); + return ( + count - + (gs === 0 ? 0 : gs + (config.split_by?.length === 0 ? 1 : 0)) + ); + } + + async tableSize(tableId: string) { + const query = this.sqlBuilder.tableSize(tableId); + const results = await runQuery(this.db, query); + return Number(results[0]["COUNT()"]); + } + + async tableMakeView(tableId: string, viewId: string, config: ViewConfig) { + const query = this.sqlBuilder.tableMakeView(tableId, viewId, config); + await runQuery(this.db, query, { execute: true }); + } + + async tableValidateExpression(tableId: string, expression: string) { + const query = this.sqlBuilder.tableValidateExpression( + tableId, + expression, + ); + const results = await runQuery(this.db, query); + return duckdbTypeToPsp(results[0]["type"]) as ColumnType; + } + + async viewDelete(viewId: string) { + const query = this.sqlBuilder.viewDelete(viewId); + await runQuery(this.db, query, { execute: true }); + } + + async viewGetData( + viewId: string, + config: ViewConfig, + schema: Record, + viewport: ViewWindow, + dataSlice: perspective.VirtualDataSlice, + ) { + const is_group_by = config.group_by?.length > 0; + const is_split_by = config.split_by?.length > 0; + const query = this.sqlBuilder.viewGetData( + viewId, + config, + viewport, + schema, + ); + + const { rows, columns, dtypes } = await runQuery(this.db, query, { + columns: true, + }); + + for (let cidx = 0; cidx < columns.length; cidx++) { + if (cidx === 0 && is_group_by && !is_split_by) { + // This is the grouping_id column, skip it + continue; + } + + let col = columns[cidx]; + if (is_split_by && !col.startsWith("__ROW_PATH_")) { + col = col.replaceAll("_", "|"); + } + + const dtype = duckdbTypeToPsp(dtypes[cidx]) as ColumnType; + + const isDecimal = dtypes[cidx].startsWith("Decimal"); + for (let ridx = 0; ridx < rows.length; ridx++) { + const row = rows[ridx]; + const grouping_id = row["__GROUPING_ID__"]; + let value = row[columns[cidx]]; + if (isDecimal) { + value = convertDecimalToNumber(value, dtypes[cidx]); + } + + if (typeof value === "bigint") { + value = Number(value); + } + + if (dtype === "datetime" && typeof value === "string") { + value = +new Date(value); + } + + if (dtype === "string" && typeof value !== "string") { + value = `${value}`; + } + + dataSlice.setCol(dtype, col, ridx, value, grouping_id); + } + } + } +} diff --git a/rust/perspective-js/src/ts/virtual_servers/duckdb.ts b/rust/perspective-js/src/ts/virtual_servers/duckdb.ts index b540672776..ae16abe66b 100644 --- a/rust/perspective-js/src/ts/virtual_servers/duckdb.ts +++ b/rust/perspective-js/src/ts/virtual_servers/duckdb.ts @@ -10,10 +10,18 @@ // ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ // ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ -import type { - VirtualDataSlice, - VirtualServerHandler, -} from "@perspective-dev/client"; +/** + * An implementation of a Perspective Virtual Server for DuckDB. + * + * This import is optional, and so must be imported manually from either + * `@perspective-dev/client/dist/esm/virtual_servers/duckdb.js` or + * `@perspective-dev/client/src/ts/virtual_servers/duckdb.ts`, it is not + * exported from the package root `@perspective-dev/client` + * + * @module + */ + +import type * as perspective from "@perspective-dev/client"; import type { ColumnType } from "@perspective-dev/client/dist/esm/ts-rs/ColumnType.d.ts"; import type { ViewConfig } from "@perspective-dev/client/dist/esm/ts-rs/ViewConfig.d.ts"; import type { ViewWindow } from "@perspective-dev/client/dist/esm/ts-rs/ViewWindow.d.ts"; @@ -68,32 +76,45 @@ const FILTER_OPS = [ ]; function duckdbTypeToPsp(name: string): ColumnType { - if (name === "VARCHAR") return "string"; + if (name === "VARCHAR" || name == "Utf8") { + return "string"; + } + if ( name === "DOUBLE" || name === "BIGINT" || name === "HUGEINT" || + name === "Float64" || name.startsWith("Decimal") - ) + ) { return "float"; - if (name.startsWith("Decimal")) return "float"; - if (name.startsWith("Int")) return "integer"; - if (name === "INTEGER") return "integer"; - if (name === "Utf8") return "string"; - if (name === "Date32") return "date"; - if (name === "Float64") return "float"; - if (name === "DATE") return "date"; - if (name === "BOOLEAN") return "boolean"; - if (name === "TIMESTAMP" || name.startsWith("Timestamp")) return "datetime"; + } + + if (name.startsWith("Int") || name == "INTEGER") { + return "integer"; + } + + if (name === "INTEGER") { + return "integer"; + } + + if (name === "DATE" || name.startsWith("Date")) { + return "date"; + } + + if (name === "BOOLEAN" || name === "Bool") { + return "boolean"; + } + + if (name === "TIMESTAMP" || name.startsWith("Timestamp")) { + return "datetime"; + } + throw new Error(`Unknown type '${name}'`); } function convertDecimalToNumber(value: any, dtypeString: string) { - if ( - value === null || - value === undefined || - !(value instanceof Uint32Array || value instanceof Int32Array) - ) { + if (!(value instanceof Uint32Array || value instanceof Int32Array)) { return value; } @@ -102,15 +123,9 @@ function convertDecimalToNumber(value: any, dtypeString: string) { bigIntValue |= BigInt(value[i]) << BigInt(i * 32); } - const maxInt128 = BigInt(2) ** BigInt(127); - if (bigIntValue >= maxInt128) { - bigIntValue -= BigInt(2) ** BigInt(128); - } - const scaleMatch = dtypeString.match(/Decimal\[\d+e(\d+)\]/); - const scale = scaleMatch ? parseInt(scaleMatch[1]) : 0; - - if (scale > 0) { + if (scaleMatch) { + const scale = parseInt(scaleMatch[1]); return Number(bigIntValue) / Math.pow(10, scale); } else { return Number(bigIntValue); @@ -130,7 +145,7 @@ async function runQuery( async function runQuery( db: duckdb.AsyncDuckDBConnection, query: string, - options?: { columns: boolean }, + options?: { columns: false }, ): Promise; async function runQuery( @@ -139,7 +154,6 @@ async function runQuery( options: { columns?: boolean } = {}, ) { query = query.replace(/\s+/g, " ").trim(); - // console.log("Query:", query); try { const result = await db.query(query); if (options.columns) { @@ -158,11 +172,28 @@ async function runQuery( } } -export class DuckDBHandler implements VirtualServerHandler { +/** + * An implementation of Perspective's Virtual Server for `@duckdb/duckdb-wasm`. + */ +export class DuckDBHandler implements perspective.VirtualServerHandler { private db: duckdb.AsyncDuckDBConnection; + private sqlBuilder: perspective.GenericSQLVirtualServerModel; + constructor(db: duckdb.AsyncDuckDBConnection, mod?: typeof perspective) { + if (!mod) { + if (customElements) { + const viewer_class: any = + customElements.get("perspective-viewer"); + if (viewer_class) { + mod = viewer_class.__wasm_module__; + } else { + throw new Error("Missing perspective-client.wasm"); + } + } else { + } + } - constructor(db: duckdb.AsyncDuckDBConnection) { this.db = db; + this.sqlBuilder = new mod!.GenericSQLVirtualServerModel(); } getFeatures() { @@ -191,20 +222,25 @@ export class DuckDBHandler implements VirtualServerHandler { } async getHostedTables() { - const results = await runQuery(this.db, "SHOW ALL TABLES"); - return results.map((row) => row.toJSON().name); + const query = this.sqlBuilder.getHostedTables(); + const results = await runQuery(this.db, query); + return results.map((row) => { + const json = row.toJSON(); + return `${json.database || "memory"}.${json.name}`; + }); } - async tableSchema(tableId: string) { - const query = `DESCRIBE ${tableId}`; + async tableSchema(tableId: string, config?: ViewConfig) { + const query = this.sqlBuilder.tableSchema(tableId); const results = await runQuery(this.db, query); const schema = {} as Record; for (const result of results) { const res = result.toJSON(); const colName = res.column_name; - if (!colName.startsWith("__") || !colName.endsWith("__")) { - const cleanName = colName.split("_").slice(-1)[0] as string; - schema[cleanName] = duckdbTypeToPsp(res.column_type); + if (!colName.startsWith("__")) { + schema[colName] = duckdbTypeToPsp( + res.column_type, + ) as ColumnType; } } @@ -212,7 +248,7 @@ export class DuckDBHandler implements VirtualServerHandler { } async viewColumnSize(viewId: string, config: ViewConfig) { - const query = `SELECT COUNT(*) FROM (DESCRIBE ${viewId})`; + const query = this.sqlBuilder.viewColumnSize(viewId); const results = await runQuery(this.db, query); const gs = config.group_by?.length || 0; const count = Number(Object.values(results[0].toJSON())[0]); @@ -223,293 +259,79 @@ export class DuckDBHandler implements VirtualServerHandler { } async tableSize(tableId: string) { - const query = `SELECT COUNT(*) FROM ${tableId}`; + const query = this.sqlBuilder.tableSize(tableId); const results = await runQuery(this.db, query); return Number(results[0].toJSON()["count_star()"]); } - // async viewSchema(viewId: string, config: ViewConfig) { - // return this.tableSchema(viewId); - // } - - // async viewSize(viewId: string) { - // return this.tableSize(viewId); - // } - async tableMakeView(tableId: string, viewId: string, config: ViewConfig) { - const columns = config.columns || []; - const group_by = config.group_by || []; - const split_by = config.split_by || []; - const aggregates = config.aggregates || {}; - const sort = config.sort || []; - const expressions = config.expressions || {}; - const filter = config.filter || []; - - const colName = (col: string) => { - const expr = expressions[col]; - return expr || `"${col}"`; - }; - - const getAggregate = (col: string) => aggregates[col] || null; - - const generateSelectClauses = () => { - const clauses = []; - if (group_by.length > 0) { - for (const col of columns) { - if (col !== null) { - // TODO texodus - const agg = getAggregate(col) || "any_value"; - clauses.push(`${agg}(${colName(col)}) as "${col}"`); - } - } - - if (split_by.length === 0) { - for (let idx = 0; idx < group_by.length; idx++) { - clauses.push( - `${colName(group_by[idx])} as __ROW_PATH_${idx}__`, - ); - } - - const groups = group_by.map(colName).join(", "); - clauses.push(`GROUPING_ID(${groups}) AS __GROUPING_ID__`); - } - } else if (columns.length > 0) { - for (const col of columns) { - if (col !== null) { - // TODO texodus - clauses.push( - `${colName(col)} as "${col.replace(/"/g, '""')}"`, - ); - } - } - } - - return clauses; - }; - - const orderByClauses = []; - const windowClauses = []; - const whereClauses = []; - - if (group_by.length > 0) { - for (let gidx = 0; gidx < group_by.length; gidx++) { - const groups = group_by - .slice(0, gidx + 1) - .map(colName) - .join(", "); - if (split_by.length === 0) { - orderByClauses.push(`GROUPING_ID(${groups}) DESC`); - } - - for (const [sort_col, sort_dir] of sort) { - if (sort_dir !== "none") { - const agg = getAggregate(sort_col) || "any_value"; - if (gidx >= group_by.length - 1) { - orderByClauses.push( - `${agg}(${colName(sort_col)}) ${sort_dir}`, - ); - } else { - orderByClauses.push( - `first(${agg}(${colName(sort_col)})) OVER __WINDOW_${gidx}__ ${sort_dir}`, - ); - } - } - } - - orderByClauses.push(`__ROW_PATH_${gidx}__ ASC`); - } - } else { - for (const [sort_col, sort_dir] of sort) { - if (sort_dir) { - orderByClauses.push(`${colName(sort_col)} ${sort_dir}`); - } - } - } - - if (sort.length > 0 && group_by.length > 1) { - for (let gidx = 0; gidx < group_by.length - 1; gidx++) { - const partition = Array.from( - { length: gidx + 1 }, - (_, i) => `__ROW_PATH_${i}__`, - ).join(", "); - const sub_groups = group_by - .slice(0, gidx + 1) - .map(colName) - .join(", "); - const groups = group_by.map(colName).join(", "); - windowClauses.push( - `__WINDOW_${gidx}__ AS (PARTITION BY GROUPING_ID(${sub_groups}), ${partition} ORDER BY ${groups})`, - ); - } - } - - for (const [name, op, value] of filter) { - if (value !== null && value !== undefined) { - const term_lit = - typeof value === "string" ? `'${value}'` : String(value); - whereClauses.push(`${colName(name)} ${op} ${term_lit}`); - } - } - - let query; - if (split_by.length > 0) { - query = `SELECT * FROM ${tableId}`; - } else { - const selectClauses = generateSelectClauses(); - query = `SELECT ${selectClauses.join(", ")} FROM ${tableId}`; - } - - if (whereClauses.length > 0) { - query = `${query} WHERE ${whereClauses.join(" AND ")}`; - } - - if (split_by.length > 0) { - const groups = group_by.map(colName).join(", "); - const group_aliases = group_by - .map((x, i) => `${colName(x)} AS __ROW_PATH_${i}__`) - .join(", "); - const pivotOn = split_by.map((c) => `"${c}"`).join(", "); - const pivotUsing = generateSelectClauses().join(", "); - - query = ` - SELECT * EXCLUDE (${groups}), ${group_aliases} FROM ( - PIVOT (${query}) - ON ${pivotOn} - USING ${pivotUsing} - GROUP BY ${groups} - ) - `; - } else if (group_by.length > 0) { - const groups = group_by.map(colName).join(", "); - query = `${query} GROUP BY ROLLUP(${groups})`; - } - - if (windowClauses.length > 0) { - query = `${query} WINDOW ${windowClauses.join(", ")}`; - } - - if (orderByClauses.length > 0) { - query = `${query} ORDER BY ${orderByClauses.join(", ")}`; - } - - query = `CREATE TABLE ${viewId} AS (${query})`; + const query = this.sqlBuilder.tableMakeView(tableId, viewId, config); await runQuery(this.db, query); } async tableValidateExpression(tableId: string, expression: string) { - const query = `DESCRIBE (select ${expression} from ${tableId})`; + const query = this.sqlBuilder.tableValidateExpression( + tableId, + expression, + ); const results = await runQuery(this.db, query); - return duckdbTypeToPsp(results[0].toJSON()["column_type"]); + return duckdbTypeToPsp( + results[0].toJSON()["column_type"], + ) as ColumnType; } async viewDelete(viewId: string) { - const query = `DROP TABLE IF EXISTS ${viewId}`; + const query = this.sqlBuilder.viewDelete(viewId); await runQuery(this.db, query); } async viewGetData( viewId: string, config: ViewConfig, + schema: Record, viewport: ViewWindow, - dataSlice: VirtualDataSlice, + dataSlice: perspective.VirtualDataSlice, ) { - const group_by = config.group_by || []; - const split_by = config.split_by || []; - const start_col = viewport.start_col; - const end_col = viewport.end_col; - const start_row = viewport.start_row || 0; - const end_row = viewport.end_row; - - let limit = ""; - if (end_row !== null && end_row !== undefined) { - limit = `LIMIT ${end_row - start_row} OFFSET ${start_row}`; - } - - const schemaQuery = `DESCRIBE ${viewId}`; - const schemaResults = await runQuery(this.db, schemaQuery); - const columnTypes = new Map(); - for (const result of schemaResults) { - const res = result.toJSON(); - columnTypes.set(res.column_name, res.column_type); - } - - const dataColumns = Array.from(columnTypes.entries()) - .filter(([colName]) => !colName.startsWith("__")) - .slice(start_col, end_col); - - const groupByColsList = []; - if (group_by.length > 0) { - if (split_by.length === 0) { - groupByColsList.push("__GROUPING_ID__"); - } - for (let idx = 0; idx < group_by.length; idx++) { - groupByColsList.push(`__ROW_PATH_${idx}__`); - } - } - - const allColumns = [ - ...groupByColsList.map((col) => `"${col}"`), - ...dataColumns.map(([colName]) => `"${colName}"`), - ]; - - const query = ` - SELECT ${allColumns.join(", ")} - FROM ${viewId} ${limit} - `; + const is_group_by = config.group_by?.length > 0; + const is_split_by = config.split_by?.length > 0; + const query = this.sqlBuilder.viewGetData( + viewId, + config, + viewport, + schema, + ); const { rows, columns, dtypes } = await runQuery(this.db, query, { columns: true, }); for (let cidx = 0; cidx < columns.length; cidx++) { - const col = columns[cidx]; - - if (cidx === 0 && group_by.length > 0 && split_by.length === 0) { + if (cidx === 0 && is_group_by && !is_split_by) { + // This is the grouping_id column, skip it continue; } - let group_by_index = null; - let max_grouping_id = null; - const row_path_match = col.match(/__ROW_PATH_(\d+)__/); - if (row_path_match) { - group_by_index = parseInt(row_path_match[1]); - max_grouping_id = 2 ** (group_by.length - group_by_index) - 1; + let col = columns[cidx]; + if (is_split_by && !col.startsWith("__ROW_PATH_")) { + col = col.replaceAll("_", "|"); } - const dtype = duckdbTypeToPsp(dtypes[cidx]); - const isDecimal = dtypes[cidx].startsWith("Decimal"); - const colName = - group_by_index !== null - ? "__ROW_PATH__" - : col.replace(/_/g, "|"); + const dtype = duckdbTypeToPsp(dtypes[cidx]) as ColumnType; + const isDecimal = dtypes[cidx].startsWith("Decimal"); for (let ridx = 0; ridx < rows.length; ridx++) { - const row = rows[ridx]; - const rowArray = row.toArray(); - const shouldSet = - split_by.length > 0 || - max_grouping_id === null || - rowArray[0] < max_grouping_id; - - if (shouldSet) { - let value = rowArray[cidx]; - - if (isDecimal) { - value = convertDecimalToNumber(value, dtypes[cidx]); - } - - if (typeof value === "bigint") { - value = Number(value); - } - - dataSlice.setCol( - dtype, - colName, - ridx, - value, - group_by_index, - ); + const rowArray = rows[ridx].toArray(); + const grouping_id = Number(rowArray[0]); + let value = rowArray[cidx]; + if (isDecimal) { + value = convertDecimalToNumber(value, dtypes[cidx]); } + + if (typeof value === "bigint") { + value = Number(value); + } + + dataSlice.setCol(dtype, col, ridx, value, grouping_id); } } } diff --git a/rust/perspective-js/test/js/duckdb.spec.js b/rust/perspective-js/test/js/duckdb.spec.js index c447a3cb6f..77a0875b81 100644 --- a/rust/perspective-js/test/js/duckdb.spec.js +++ b/rust/perspective-js/test/js/duckdb.spec.js @@ -20,8 +20,9 @@ import { test, expect } from "@perspective-dev/test"; import { default as perspective, createMessageHandler, + wasmModule, } from "@perspective-dev/client"; -import { DuckDBHandler } from "@perspective-dev/client/dist/esm/virtual_servers/duckdb.js"; +import { DuckDBHandler } from "@perspective-dev/client/src/ts/virtual_servers/duckdb.ts"; const require = createRequire(import.meta.url); const DUCKDB_DIST = path.dirname(require.resolve("@duckdb/duckdb-wasm")); @@ -76,7 +77,7 @@ test.describe("DuckDB Virtual Server", function () { test.beforeAll(async () => { db = await initializeDuckDB(); - const server = createMessageHandler(new DuckDBHandler(db)); + const server = createMessageHandler(new DuckDBHandler(db, wasmModule)); client = await perspective.worker(server); await loadSuperstoreData(db); }); @@ -84,13 +85,13 @@ test.describe("DuckDB Virtual Server", function () { test.describe("client", () => { test("get_hosted_table_names()", async function () { const tables = await client.get_hosted_table_names(); - expect(tables).toContain("superstore"); + expect(tables).toContain("memory.superstore"); }); }); test.describe("table", () => { test("schema()", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const schema = await table.schema(); expect(schema).toHaveProperty("Sales"); expect(schema).toHaveProperty("Profit"); @@ -100,7 +101,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("schema() returns correct types", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const schema = await table.schema(); expect(schema["Sales"]).toBe("float"); expect(schema["Profit"]).toBe("float"); @@ -110,7 +111,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("columns()", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const columns = await table.columns(); expect(columns).toContain("Sales"); expect(columns).toContain("Profit"); @@ -120,7 +121,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("size()", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const size = await table.size(); expect(size).toBe(9994); }); @@ -128,7 +129,7 @@ test.describe("DuckDB Virtual Server", function () { test.describe("view", () => { test("num_rows()", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Profit"] }); const numRows = await view.num_rows(); expect(numRows).toBe(9994); @@ -136,7 +137,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("num_columns()", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Profit", "State"], }); @@ -147,7 +148,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("schema()", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Profit", "State"], }); @@ -161,7 +162,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("to_json()", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Quantity"], }); @@ -173,7 +174,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("to_columns()", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Quantity"], }); @@ -189,7 +190,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("column_paths()", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Profit", "State"], }); @@ -201,7 +202,7 @@ test.describe("DuckDB Virtual Server", function () { test.describe("group_by", () => { test("single group_by", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales"], group_by: ["Region"], @@ -216,7 +217,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("multi-level group_by", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales"], group_by: ["Region", "Category"], @@ -234,7 +235,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("group_by with count aggregate", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales"], group_by: ["Region"], @@ -247,7 +248,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("group_by with avg aggregate", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales"], group_by: ["Category"], @@ -263,7 +264,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("group_by with min aggregate", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Quantity"], group_by: ["Region"], @@ -277,7 +278,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("group_by with max aggregate", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Quantity"], group_by: ["Region"], @@ -291,9 +292,9 @@ test.describe("DuckDB Virtual Server", function () { }); }); - test.describe.skip("split_by", () => { + test.describe("split_by", () => { test("single split_by", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales"], split_by: ["Region"], @@ -310,8 +311,8 @@ test.describe("DuckDB Virtual Server", function () { await view.delete(); }); - test("split_by without group_by", async function () { - const table = await client.open_table("superstore"); + test.skip("split_by without group_by", async function () { + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales"], split_by: ["Category"], @@ -326,7 +327,7 @@ test.describe("DuckDB Virtual Server", function () { test.describe("filter", () => { test("filter with equals", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Region"], filter: [["Region", "==", "West"]], @@ -339,7 +340,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("filter with not equals", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Region"], filter: [["Region", "!=", "West"]], @@ -352,7 +353,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("filter with greater than", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Quantity"], filter: [["Quantity", ">", 5]], @@ -365,7 +366,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("filter with less than", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Quantity"], filter: [["Quantity", "<", 3]], @@ -378,7 +379,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("filter with greater than or equal", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Quantity"], filter: [["Quantity", ">=", 10]], @@ -391,7 +392,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("filter with less than or equal", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Quantity"], filter: [["Quantity", "<=", 2]], @@ -404,7 +405,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("filter with LIKE", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "State"], filter: [["State", "LIKE", "Cal%"]], @@ -417,7 +418,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("multiple filters", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Region", "Quantity"], filter: [ @@ -434,7 +435,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("filter with group_by", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales"], group_by: ["Category"], @@ -449,7 +450,7 @@ test.describe("DuckDB Virtual Server", function () { test.describe("sort", () => { test("sort ascending", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Quantity"], sort: [["Sales", "asc"]], @@ -464,7 +465,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("sort descending", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Quantity"], sort: [["Sales", "desc"]], @@ -479,7 +480,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("sort with group_by", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales"], group_by: ["Region"], @@ -498,7 +499,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("multi-column sort", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Region", "Sales", "Quantity"], sort: [ @@ -524,7 +525,7 @@ test.describe("DuckDB Virtual Server", function () { test.describe("expressions", () => { test("simple expression", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "doublesales"], expressions: { doublesales: '"Sales" * 2' }, @@ -540,7 +541,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("expression with multiple columns", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Profit", "margin"], expressions: { margin: '"Profit" / "Sales"' }, @@ -560,7 +561,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("expression with group_by", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["total"], group_by: ["Region"], @@ -580,7 +581,7 @@ test.describe("DuckDB Virtual Server", function () { test.describe("viewport", () => { test("start_row and end_row", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Profit"], }); @@ -590,7 +591,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("start_col and end_col", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Profit", "Quantity", "Discount"], }); @@ -609,7 +610,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("large viewport", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales"], }); @@ -621,7 +622,7 @@ test.describe("DuckDB Virtual Server", function () { test.describe("data types", () => { test("integer columns", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Quantity"], }); @@ -633,7 +634,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("float columns", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales", "Profit"], }); @@ -646,7 +647,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("string columns", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Region", "State", "City"], }); @@ -660,7 +661,7 @@ test.describe("DuckDB Virtual Server", function () { }); test("date columns", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Order Date"], }); @@ -675,7 +676,7 @@ test.describe("DuckDB Virtual Server", function () { test.describe("combined operations", () => { test("group_by + filter + sort", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales"], group_by: ["Category"], @@ -695,8 +696,8 @@ test.describe("DuckDB Virtual Server", function () { await view.delete(); }); - test.skip("split_by + group_by + filter", async function () { - const table = await client.open_table("superstore"); + test("split_by + group_by + filter", async function () { + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["Sales"], group_by: ["Category"], @@ -707,12 +708,12 @@ test.describe("DuckDB Virtual Server", function () { const paths = await view.column_paths(); expect(paths.length).toBeGreaterThan(0); const numRows = await view.num_rows(); - expect(numRows).toBe(4); // 3 categories + total + expect(numRows).toBe(3); // 3 categories + total await view.delete(); }); test("expressions + group_by + sort", async function () { - const table = await client.open_table("superstore"); + const table = await client.open_table("memory.superstore"); const view = await table.view({ columns: ["profitmargin"], group_by: ["Region"], diff --git a/rust/perspective-python/Cargo.toml b/rust/perspective-python/Cargo.toml index 69389f00f2..92772c4028 100644 --- a/rust/perspective-python/Cargo.toml +++ b/rust/perspective-python/Cargo.toml @@ -61,7 +61,7 @@ perspective-client = { version = "4.1.1" } perspective-server = { version = "4.1.1" } bytes = "1.10.1" chrono = "0.4" -macro_rules_attribute = "0.2.0" +macro_rules_attribute = "0.2.2" async-lock = "2.5.0" pollster = "0.3.0" extend = "1.1.2" diff --git a/rust/perspective-python/perspective/__init__.py b/rust/perspective-python/perspective/__init__.py index bdd22ed8d1..e2c43b6d4b 100644 --- a/rust/perspective-python/perspective/__init__.py +++ b/rust/perspective-python/perspective/__init__.py @@ -21,6 +21,7 @@ "ProxySession", "AsyncClient", "AsyncServer", + "GenericSQLVirtualServerModel", "VirtualServer", "num_cpus", "set_num_cpus", @@ -353,6 +354,7 @@ def delete_callback(): AsyncServer, AsyncClient, VirtualServer, + GenericSQLVirtualServerModel, # NOTE: these are classes without constructors, # so we import them just for type hinting Table, # noqa: F401 diff --git a/rust/perspective-python/perspective/virtual_servers/clickhouse.py b/rust/perspective-python/perspective/virtual_servers/clickhouse.py new file mode 100644 index 0000000000..53ef74cd63 --- /dev/null +++ b/rust/perspective-python/perspective/virtual_servers/clickhouse.py @@ -0,0 +1,245 @@ +# ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +# ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +# ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +# ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +# ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +# ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +# ┃ Copyright (c) 2017, the Perspective Authors. ┃ +# ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +# ┃ This file is part of the Perspective library, distributed under the terms ┃ +# ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +# ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +import perspective + +from datetime import datetime +from loguru import logger + +from perspective.virtual_servers import VirtualSessionModel + + +NUMBER_AGGS = [ + "sum", + "count", + "any_value", + "arbitrary", + "array_agg", + "avg", + "bit_and", + "bit_or", + "bit_xor", + "bitstring_agg", + "bool_and", + "bool_or", + "countif", + "favg", + "fsum", + "geomean", + "kahan_sum", + "last", + "max", + "min", + "product", + "string_agg", + "sumkahan", +] + +STRING_AGGS = [ + "count", + "any_value", + "arbitrary", + "first", + "countif", + "last", + "string_agg", +] + +FILTER_OPS = [ + "==", + "!=", + "LIKE", + "IS DISTINCT FROM", + "IS NOT DISTINCT FROM", + ">=", + "<=", + ">", + "<", +] + + +class ClickhouseVirtualSession: + def __init__(self, callback, db): + self.session = perspective.VirtualServer(ClickhouseVirtualSessionModel(db)) + self.callback = callback + + def handle_request(self, msg): + self.callback(self.session.handle_request(msg)) + + +class ClickhouseVirtualServer: + def __init__(self, db): + self.db = db + + def new_session(self, callback): + return ClickhouseVirtualSession(callback, self.db) + + +class ClickhouseVirtualSessionModel(VirtualSessionModel): + """ + An implementation of a `perspective.VirtualSessionModel` for ClickHouse. + """ + + def __init__(self, db): + self.db = db + self.sql_builder = perspective.GenericSQLVirtualServerModel( + {"create_entity": "VIEW", "grouping_fn": "GROUPING"} + ) + + def get_features(self): + return { + "group_by": True, + "split_by": False, + "sort": True, + "expressions": True, + "filter_ops": { + "integer": FILTER_OPS, + "float": FILTER_OPS, + "string": FILTER_OPS, + "boolean": FILTER_OPS, + "date": FILTER_OPS, + "datetime": FILTER_OPS, + }, + "aggregates": { + "integer": NUMBER_AGGS, + "float": NUMBER_AGGS, + "string": STRING_AGGS, + "boolean": STRING_AGGS, + "date": STRING_AGGS, + "datetime": STRING_AGGS, + }, + } + + def get_hosted_tables(self): + query = "SHOW TABLES" + results = run_query(self.db, query) + return [result[0] for result in results] + + def table_schema(self, table_name, config=None): + query = self.sql_builder.table_schema(table_name) + results = run_query(self.db, query) + schema = {} + for result in results: + col_name = result[0] + if not col_name.startswith("__"): + schema[col_name] = clickhouse_type_to_psp(result[1]) + + return schema + + def view_column_size(self, view_name, config): + query = f"SELECT COUNT() FROM system.columns WHERE table = '{view_name}'" + results = run_query(self.db, query) + gs = len(config["group_by"]) + return results[0][0] - ( + 0 if gs == 0 else gs + (1 if len(config["split_by"]) == 0 else 0) + ) + + def table_size(self, table_name): + query = self.sql_builder.table_size(table_name) + results = run_query(self.db, query) + return results[0][0] + + def table_make_view(self, table_name, view_name, config): + query = self.sql_builder.table_make_view(table_name, view_name, config) + run_query(self.db, query, execute=True) + + def table_validate_expression(self, view_name, expression): + query = self.sql_builder.table_validate_expression(view_name, expression) + results = run_query(self.db, query) + return clickhouse_type_to_psp(results[0][1]) + + def view_delete(self, view_name): + query = self.sql_builder.view_delete(view_name) + run_query(self.db, query, execute=True) + + def view_get_data(self, view_name, config, schema, viewport, data): + group_by = config["group_by"] + split_by = config["split_by"] + query = self.sql_builder.view_get_data(view_name, config, viewport, schema) + results, columns, dtypes = run_query(self.db, query, columns=True) + for cidx, col in enumerate(columns): + if cidx == 0 and len(group_by) > 0 and len(split_by) == 0: + continue + + if len(split_by) > 0 and not col.startswith("__ROW_PATH_"): + col = col.replace("_", "|") + + # print( + # dtypes[cidx], type(dtypes[cidx]), dir(dtypes[cidx]), dtypes[cidx].name + # ) + + dtype = clickhouse_type_to_psp(str(dtypes[cidx])) + for ridx, row in enumerate(results): + grouping_id = ( + row[0] if len(group_by) > 0 and len(split_by) == 0 else None + ) + + value = row[cidx] + if dtype == "string" and not isinstance(value, str): + value = str(value) + + data.set_col(dtype, col, ridx, value, grouping_id) + + +################################################################################ +# +# ClickHouse Utils + + +def clickhouse_type_to_psp(name): + """Convert a ClickHouse `dtype` to a Perspective `ColumnType`.""" + if name.startswith("Nullable(") and name.endswith(")"): + name = name[9:-1] + + if name.startswith("Array"): + return "string" + + if name in ("Int64", "UInt64", "Float64"): + return "float" + + if name == "String": + return "string" + + if name == "DateTime": + return "datetime" + + if name == "Date": + return "date" + + msg = f"Unknown type '{name}'" + raise ValueError(msg) + + +def run_query(db, query, execute=False, columns=False): + query = " ".join(query.split()) + start = datetime.now() + result = None + try: + if execute: + db.command(query) + else: + req = db.query(query) + result = req.result_rows + except Exception as e: + logger.error(e) + logger.error(f"{query}") + raise e + else: + logger.debug(f"{datetime.now() - start} {query}") + if columns: + return ( + result, + req.column_names, + [(x.name if hasattr(x, "name") else str(x)) for x in req.column_types], + ) + else: + return result diff --git a/rust/perspective-python/perspective/virtual_servers/duckdb.py b/rust/perspective-python/perspective/virtual_servers/duckdb.py index 98dae80219..f1b2567e6b 100644 --- a/rust/perspective-python/perspective/virtual_servers/duckdb.py +++ b/rust/perspective-python/perspective/virtual_servers/duckdb.py @@ -18,24 +18,6 @@ from perspective.virtual_servers import VirtualSessionModel -# TODO(texodus): Missing these features -# -# - `min_max` API for value-coloring and value-sizing. -# -# - row expand/collapse in the datagrid needs datamodel support, this is -# likely a "collapsed" boolean column in the temp table we `UPDATE`. -# -# - `on_update` real-time support will be method which takes sa view name and -# a handler and calls the handler when the view needs to be recalculated. -# -# Nice to have: -# -# - Optional `view_change` method can be implemented for engine optimization, -# defaulting to just delete & recreate (as Perspective engine does now). -# -# - Would like to add a metadata API so that e.g. Viewer debug panel could -# show internal generated SQL. - NUMBER_AGGS = [ "sum", @@ -120,6 +102,7 @@ class DuckDBVirtualSessionModel(VirtualSessionModel): def __init__(self, db): self.db = db + self.sql_builder = perspective.GenericSQLVirtualServerModel() def get_features(self): return { @@ -146,22 +129,23 @@ def get_features(self): } def get_hosted_tables(self): - logger.info("SHOW ALL TABLES") - results = self.db.sql("SHOW ALL TABLES").fetchall() + query = self.sql_builder.get_hosted_tables() + results = run_query(self.db, query) return [result[2] for result in results] - def table_schema(self, table_name): - query = f"DESCRIBE {table_name}" + def table_schema(self, table_name, config=None): + query = self.sql_builder.table_schema(table_name) results = run_query(self.db, query) - return { - result[0].split("_")[-1]: duckdb_type_to_psp(result[1]) - for result in results - if not (result[0].startswith("__") and result[0].endswith("__")) - } + schema = {} + for result in results: + col_name = result[0] + if not col_name.startswith("__"): + schema[col_name] = duckdb_type_to_psp(result[1]) + + return schema def view_column_size(self, table_name, config): - # TODO split this into 2 methods - query = f"SELECT COUNT(*) FROM (DESCRIBE {table_name})" + query = self.sql_builder.view_column_size(table_name) results = run_query(self.db, query) gs = len(config["group_by"]) return results[0][0] - ( @@ -169,207 +153,41 @@ def view_column_size(self, table_name, config): ) def table_size(self, table_name): - query = f"SELECT COUNT(*) FROM {table_name}" + query = self.sql_builder.table_size(table_name) results = run_query(self.db, query) return results[0][0] def table_make_view(self, table_name, view_name, config): - columns = config["columns"] - group_by = config["group_by"] - split_by = config["split_by"] - aggregates = config["aggregates"] - sort = config["sort"] - - def col_name(col): - return expr if (expr := config["expressions"].get(col)) else f'"{col}"' - - def select_clause(): - if len(group_by) > 0: - for col in columns: - yield f'{aggregates.get(col)}({col_name(col)}) as "{col}"' - - if len(split_by) == 0: - for idx, group in enumerate(group_by): - yield f"{col_name(group)} as __ROW_PATH_{idx}__" - - groups = ", ".join(col_name(g) for g in group_by) - yield f"GROUPING_ID({groups}) AS __GROUPING_ID__" - elif len(columns) > 0: - for col in columns: - yield f'''{col_name(col)} as "{col.replace('"', '""')}"''' - - def order_by_clause(): - if len(group_by) > 0: - for gidx in range(len(group_by)): - groups = ", ".join(col_name(g) for g in group_by[: (gidx + 1)]) - if len(split_by) == 0: - yield f"""GROUPING_ID({groups}) DESC""" - - for sort_col, sort_dir in sort: - if sort_dir != "none": - agg = aggregates.get(sort_col) - if gidx >= len(group_by) - 1: - yield f"{agg}({col_name(sort_col)}) {sort_dir}" - else: - yield f""" - first({agg}({col_name(sort_col)})) - OVER __WINDOW_{gidx}__ {sort_dir} - """ - - yield f"__ROW_PATH_{gidx}__ ASC" - else: - for sort_col, sort_dir in sort: - if sort_dir is not None: - yield f"{col_name(sort_col)} {sort_dir}" - - def window_clause(): - if len(config["sort"]) == 0: - return - - for gidx in range(len(group_by) - 1): - partition = ", ".join(f"__ROW_PATH_{i}__" for i in range(gidx + 1)) - sub_groups = ", ".join(col_name(g) for g in group_by[: (gidx + 1)]) - groups = ", ".join(col_name(g) for g in group_by) - yield f""" - __WINDOW_{gidx}__ AS ( - PARTITION BY - GROUPING_ID({sub_groups}), - {partition} - ORDER BY - {groups} - )""" - - def where_clause(): - for name, op, value in config["filter"]: - if value is not None: - term_lit = f"'{value}'" if isinstance(value, str) else str(value) - yield f"{col_name(name)} {op} {term_lit}" - - if len(split_by) > 0: - query = "SELECT * FROM {}".format(table_name) - else: - query = "SELECT {} FROM {}".format(", ".join(select_clause()), table_name) - - # else: - # for split in split_by: - # extra_cols_query = f""" - # SELECT DISTINCT {f'"{split}"'} - # FROM {table_name} - # """ - # results = self.db.sql(extra_cols_query).fetchall() - # real_columns = [] - # for result in results: - # for idx, col in enumerate(columns): - # real_columns.append( - # f'"{result[0]}_{col}" AS "{result[0]}|{col}"' - # ) - - if len(where := list(where_clause())) > 0: - query = "{} WHERE {}".format(query, " AND ".join(where)) - - if len(split_by) > 0: - groups = ", ".join(col_name(x) for x in group_by) - group_aliases = ", ".join( - f"{col_name(x)} AS __ROW_PATH_{i}__" for i, x in enumerate(group_by) - ) - - query = f""" - SELECT * EXCLUDE ({groups}), {group_aliases} FROM ( - PIVOT ({query}) - ON {", ".join(f'"{c}"' for c in split_by)} - USING {", ".join(select_clause())} - GROUP BY {groups} - ) - """ - - elif len(group_by) > 0: - groups = ", ".join(col_name(x) for x in group_by) - query = f"{query} GROUP BY ROLLUP({groups})" - - if len(window := list(window_clause())) > 0: - query = f"{query} WINDOW {', '.join(window)}" - - if len(order_by := list(order_by_clause())) > 0: - query = f"{query} ORDER BY {', '.join(order_by)}" - - query = f"CREATE TEMPORARY TABLE {view_name} AS ({query})" + query = self.sql_builder.table_make_view(table_name, view_name, config) run_query(self.db, query, execute=True) def table_validate_expression(self, view_name, expression): - query = f"DESCRIBE (select {expression} from {view_name})" + query = self.sql_builder.table_validate_expression(view_name, expression) results = run_query(self.db, query) return duckdb_type_to_psp(results[0][1]) def view_delete(self, view_name): - query = f"DROP TABLE {view_name}" + query = self.sql_builder.view_delete(view_name) run_query(self.db, query, execute=True) - def view_get_data(self, view_name, config, viewport, data): + def view_get_data(self, view_name, config, schema, viewport, data): group_by = config["group_by"] split_by = config["split_by"] - start_col = viewport.get("start_col") - end_col = viewport.get("end_col") - - limit = "" - if (end_row := viewport.get("end_row")) is not None: - start_row = viewport.get("start_row", 0) - limit = f"LIMIT {end_row - start_row} OFFSET {start_row}" - - col_limit = "" - if end_col is not None: - col_limit = f"LIMIT {end_col - start_col} OFFSET {start_col}" - - group_by_columns = "" - if len(group_by) > 0: - if len(split_by) == 0: - row_paths = ["__GROUPING_ID__"] - else: - row_paths = [] - - row_paths.extend(f"__ROW_PATH_{idx}__" for idx in range(len(group_by))) - group_by_columns = f"{', '.join(row_paths)}," - - query = f""" - SET VARIABLE col_names = ( - SELECT list(column_name) FROM ( - SELECT column_name - FROM (DESCRIBE {view_name}) - WHERE not(starts_with(column_name, '__')) - {col_limit} - ) - ); - - SELECT - {group_by_columns} - COLUMNS(c -> list_contains(getvariable('col_names'), c)) - FROM {view_name} {limit} - """ - + query = self.sql_builder.view_get_data(view_name, config, viewport, schema) results, columns, dtypes = run_query(self.db, query, columns=True) for cidx, col in enumerate(columns): if cidx == 0 and len(group_by) > 0 and len(split_by) == 0: continue - group_by_index = None - max_grouping_id = None - if len(prefix := col.split("__ROW_PATH_")) > 1: - group_by_index = int(prefix[1].split("__")[0]) - max_grouping_id = 2 ** (len(group_by) - group_by_index) - 1 + if len(split_by) > 0 and not col.startswith("__ROW_PATH_"): + col = col.replace("_", "|") + dtype = duckdb_type_to_psp(str(dtypes[cidx])) for ridx, row in enumerate(results): - dtype = duckdb_type_to_psp(dtypes[cidx]) - if ( - len(split_by) > 0 - or max_grouping_id is None - or row[0] < max_grouping_id - ): - data.set_col( - dtype, - col.replace("_", "|"), - ridx, - row[cidx], - group_by_index=group_by_index, - ) + grouping_id = ( + row[0] if len(group_by) > 0 and len(split_by) == 0 else None + ) + data.set_col(dtype, col, ridx, row[cidx], grouping_id) ################################################################################ @@ -377,24 +195,6 @@ def view_get_data(self, view_name, config, viewport, data): # DuckDB Utils -def val_to_duckdb_lit(value): - """ - Convert a Python value to a string representation of this values suitable - for SQL injecting. - """ - if isinstance(value, str): - return f"'{value}'" - return str(value) - - -def sort_to_duckdb_sort(sortdir): - if sortdir == "asc": - return "ASC" - if sortdir == "desc": - return "DESC" - return "DESC" - - def duckdb_type_to_psp(name): """Convert a DuckDB `dtype` to a Perspective `ColumnType`.""" if name == "VARCHAR": diff --git a/rust/perspective-python/requirements.txt b/rust/perspective-python/requirements.txt index 1ae58748da..e1e4262195 100644 --- a/rust/perspective-python/requirements.txt +++ b/rust/perspective-python/requirements.txt @@ -1,4 +1,5 @@ pytest>=7.4.3 +clickhouse_connect==0.11.0 Faker==26.0.0 ipywidgets==8.1.3 jupyterlab==4.2.3 diff --git a/rust/perspective-python/src/lib.rs b/rust/perspective-python/src/lib.rs index 0b77950122..1505122135 100644 --- a/rust/perspective-python/src/lib.rs +++ b/rust/perspective-python/src/lib.rs @@ -72,6 +72,7 @@ fn perspective(py: Python, m: &Bound) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add("PerspectiveError", py.get_type::())?; m.add_function(wrap_pyfunction!(num_cpus, m)?)?; m.add_function(wrap_pyfunction!(set_num_cpus, m)?)?; diff --git a/rust/perspective-python/src/server/generic_sql_model.rs b/rust/perspective-python/src/server/generic_sql_model.rs new file mode 100644 index 0000000000..680ae5e0ab --- /dev/null +++ b/rust/perspective-python/src/server/generic_sql_model.rs @@ -0,0 +1,167 @@ +// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +// ┃ Copyright (c) 2017, the Perspective Authors. ┃ +// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +// ┃ This file is part of the Perspective library, distributed under the terms ┃ +// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +use std::str::FromStr; + +use indexmap::IndexMap; +use perspective_client::config::ViewConfig; +use perspective_client::proto::{ColumnType, ViewPort}; +use perspective_client::virtual_server::GenericSQLVirtualServerModel; +use pyo3::exceptions::PyValueError; +use pyo3::types::{PyAnyMethods, PyDict, PyDictMethods}; +use pyo3::{Py, PyAny, PyResult, Python, pyclass, pymethods}; + +#[pyclass(name = "GenericSQLVirtualServerModel")] +pub struct PyGenericSQLVirtualServerModel { + inner: GenericSQLVirtualServerModel, +} + +#[pymethods] +impl PyGenericSQLVirtualServerModel { + #[new] + pub fn new(py: Python<'_>, config: Option>) -> Self { + Self { + inner: GenericSQLVirtualServerModel::new( + config + .map(|x| pythonize::depythonize(x.bind(py)).unwrap()) + .unwrap_or_default(), + ), + } + } + + pub fn get_hosted_tables(&self) -> PyResult { + self.inner + .get_hosted_tables() + .map_err(|e| PyValueError::new_err(e.to_string())) + } + + pub fn table_schema(&self, table_id: &str) -> PyResult { + self.inner + .table_schema(table_id) + .map_err(|e| PyValueError::new_err(e.to_string())) + } + + pub fn table_size(&self, table_id: &str) -> PyResult { + self.inner + .table_size(table_id) + .map_err(|e| PyValueError::new_err(e.to_string())) + } + + pub fn view_column_size(&self, view_id: &str) -> PyResult { + self.inner + .view_column_size(view_id) + .map_err(|e| PyValueError::new_err(e.to_string())) + } + + pub fn table_validate_expression(&self, table_id: &str, expression: &str) -> PyResult { + self.inner + .table_validate_expression(table_id, expression) + .map_err(|e| PyValueError::new_err(e.to_string())) + } + + pub fn view_delete(&self, view_id: &str) -> PyResult { + self.inner + .view_delete(view_id) + .map_err(|e| PyValueError::new_err(e.to_string())) + } + + pub fn table_make_view( + &self, + table_id: &str, + view_id: &str, + config: Py, + ) -> PyResult { + let config: ViewConfig = Python::with_gil(|py| { + pythonize::depythonize(config.bind(py)) + .map_err(|e| PyValueError::new_err(e.to_string())) + })?; + + self.inner + .table_make_view(table_id, view_id, &config) + .map_err(|e| PyValueError::new_err(e.to_string())) + } + + pub fn view_get_data( + &self, + view_id: &str, + config: Py, + viewport: Py, + schema: Py, + ) -> PyResult { + Python::with_gil(|py| { + let config: ViewConfig = pythonize::depythonize(config.bind(py)) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + + let viewport: PyViewPort = pythonize::depythonize(viewport.bind(py)) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + + let schema = + self.parse_schema(schema.downcast_bound::(py).map_err(|_| { + PyValueError::new_err("Schema must be a dict mapping column names to types") + })?)?; + + self.inner + .view_get_data(view_id, &config, &viewport.into(), &schema) + .map_err(|e| PyValueError::new_err(e.to_string())) + }) + } + + pub fn view_schema(&self, view_id: &str) -> PyResult { + self.inner + .view_schema(view_id) + .map_err(|e| PyValueError::new_err(e.to_string())) + } + + pub fn view_size(&self, view_id: &str) -> PyResult { + self.inner + .view_size(view_id) + .map_err(|e| PyValueError::new_err(e.to_string())) + } +} + +impl PyGenericSQLVirtualServerModel { + fn parse_schema( + &self, + schema: &pyo3::Bound<'_, PyDict>, + ) -> PyResult> { + let mut result = IndexMap::new(); + for (key, value) in schema.iter() { + let key: String = key.extract()?; + let value: String = value.extract()?; + let column_type = ColumnType::from_str(&value) + .map_err(|_| PyValueError::new_err(format!("Unknown column type: {}", value)))?; + + result.insert(key, column_type); + } + + Ok(result) + } +} + +#[derive(serde::Deserialize)] +struct PyViewPort { + start_row: Option, + start_col: Option, + end_row: Option, + end_col: Option, +} + +impl From for ViewPort { + fn from(value: PyViewPort) -> Self { + ViewPort { + start_row: value.start_row, + start_col: value.start_col, + end_row: value.end_row, + end_col: value.end_col, + } + } +} diff --git a/rust/perspective-python/src/server/mod.rs b/rust/perspective-python/src/server/mod.rs index 6ae102bf99..033b2d5521 100644 --- a/rust/perspective-python/src/server/mod.rs +++ b/rust/perspective-python/src/server/mod.rs @@ -10,6 +10,7 @@ // ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ // ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ +pub(crate) mod generic_sql_model; mod server_async; mod server_sync; pub(crate) mod session_async; diff --git a/rust/perspective-python/src/server/virtual_server_sync.rs b/rust/perspective-python/src/server/virtual_server_sync.rs index 86cbde1693..55ad12aeda 100644 --- a/rust/perspective-python/src/server/virtual_server_sync.rs +++ b/rust/perspective-python/src/server/virtual_server_sync.rs @@ -272,21 +272,25 @@ impl VirtualServerHandler for PyServerHandler { &self, view_id: &str, config: &perspective_client::config::ViewConfig, + schema: &IndexMap, viewport: &perspective_client::proto::ViewPort, ) -> VirtualServerFuture<'_, Result> { let handler = Python::with_gil(|py| self.0.clone_ref(py)); let view_id = view_id.to_string(); let config = config.clone(); + let schema = schema.clone(); let window: PyViewPort = viewport.clone().into(); Box::pin(async move { Python::with_gil(|py| { - let data = PyVirtualDataSlice::default(); + let data = + PyVirtualDataSlice(Arc::new(Mutex::new(VirtualDataSlice::new(config.clone())))); let _ = handler.call_method1( py, pyo3::intern!(py, "view_get_data"), ( &view_id, pythonize::pythonize(py, &config)?, + pythonize::pythonize(py, &schema)?, pythonize::pythonize(py, &window)?, data.clone(), ), @@ -324,46 +328,46 @@ impl From for PyViewPort { } } -#[derive(Clone, Default)] +#[derive(Clone)] #[pyclass(name = "VirtualDataSlice")] pub struct PyVirtualDataSlice(Arc>); #[pymethods] impl PyVirtualDataSlice { - #[pyo3(signature=(dtype, name, index, val, group_by_index = None))] + #[pyo3(signature=(dtype, name, index, val, grouping_id = None))] pub fn set_col( &self, dtype: &str, name: &str, index: u32, val: Py, - group_by_index: Option, + grouping_id: Option, ) -> PyResult<()> { match dtype { - "string" => self.set_string_col(name, index, val, group_by_index), - "integer" => self.set_integer_col(name, index, val, group_by_index), - "float" => self.set_float_col(name, index, val, group_by_index), - "date" => self.set_datetime_col(name, index, val, group_by_index), - "datetime" => self.set_datetime_col(name, index, val, group_by_index), - "boolean" => self.set_boolean_col(name, index, val, group_by_index), + "string" => self.set_string_col(name, index, val, grouping_id), + "integer" => self.set_integer_col(name, index, val, grouping_id), + "float" => self.set_float_col(name, index, val, grouping_id), + "date" => self.set_datetime_col(name, index, val, grouping_id), + "datetime" => self.set_datetime_col(name, index, val, grouping_id), + "boolean" => self.set_boolean_col(name, index, val, grouping_id), _ => Err(PyValueError::new_err("Unknown type")), } } - #[pyo3(signature=(name, index, val, group_by_index = None))] + #[pyo3(signature=(name, index, val, grouping_id = None))] pub fn set_string_col( &self, name: &str, index: u32, val: Py, - group_by_index: Option, + grouping_id: Option, ) -> PyResult<()> { Python::with_gil(|py| { if val.is_none(py) { self.0 .lock() .unwrap() - .set_col(name, group_by_index, index as usize, None as Option) + .set_col(name, grouping_id, index as usize, None as Option) .unwrap(); } else if let Ok(val) = val.downcast_bound::(py) { self.0 @@ -371,7 +375,7 @@ impl PyVirtualDataSlice { .unwrap() .set_col( name, - group_by_index, + grouping_id, index as usize, val.extract::().ok(), ) @@ -384,26 +388,26 @@ impl PyVirtualDataSlice { }) } - #[pyo3(signature=(name, index, val, group_by_index = None))] + #[pyo3(signature=(name, index, val, grouping_id = None))] pub fn set_integer_col( &self, name: &str, index: u32, val: Py, - group_by_index: Option, + grouping_id: Option, ) -> PyResult<()> { Python::with_gil(|py| { if val.is_none(py) { self.0 .lock() .unwrap() - .set_col(name, group_by_index, index as usize, None as Option) + .set_col(name, grouping_id, index as usize, None as Option) .unwrap(); } else if let Ok(val) = val.extract::(py) { self.0 .lock() .unwrap() - .set_col(name, group_by_index, index as usize, Some(val)) + .set_col(name, grouping_id, index as usize, Some(val)) .unwrap(); } else { tracing::error!("Unhandled") @@ -413,26 +417,26 @@ impl PyVirtualDataSlice { }) } - #[pyo3(signature=(name, index, val, group_by_index = None))] + #[pyo3(signature=(name, index, val, grouping_id = None))] pub fn set_float_col( &self, name: &str, index: u32, val: Py, - group_by_index: Option, + grouping_id: Option, ) -> PyResult<()> { Python::with_gil(|py| { if val.is_none(py) { self.0 .lock() .unwrap() - .set_col(name, group_by_index, index as usize, None as Option) + .set_col(name, grouping_id, index as usize, None as Option) .unwrap(); } else if let Ok(val) = val.extract::(py) { self.0 .lock() .unwrap() - .set_col(name, group_by_index, index as usize, Some(val)) + .set_col(name, grouping_id, index as usize, Some(val)) .unwrap(); } else { tracing::error!("Unhandled") @@ -442,26 +446,26 @@ impl PyVirtualDataSlice { }) } - #[pyo3(signature=(name, index, val, group_by_index = None))] + #[pyo3(signature=(name, index, val, grouping_id = None))] pub fn set_boolean_col( &self, name: &str, index: u32, val: Py, - group_by_index: Option, + grouping_id: Option, ) -> PyResult<()> { Python::with_gil(|py| { if val.is_none(py) { self.0 .lock() .unwrap() - .set_col(name, group_by_index, index as usize, None as Option) + .set_col(name, grouping_id, index as usize, None as Option) .unwrap(); } else if let Ok(val) = val.extract::(py) { self.0 .lock() .unwrap() - .set_col(name, group_by_index, index as usize, Some(val)) + .set_col(name, grouping_id, index as usize, Some(val)) .unwrap(); } else { tracing::error!("Unhandled") @@ -471,20 +475,20 @@ impl PyVirtualDataSlice { }) } - #[pyo3(signature=(name, index, val, group_by_index = None))] + #[pyo3(signature=(name, index, val, grouping_id = None))] pub fn set_datetime_col( &self, name: &str, index: u32, val: Py, - group_by_index: Option, + grouping_id: Option, ) -> PyResult<()> { Python::with_gil(|py| { if val.is_none(py) { self.0 .lock() .unwrap() - .set_col(name, group_by_index, index as usize, None as Option) + .set_col(name, grouping_id, index as usize, None as Option) .unwrap(); } else if let Ok(val) = val.downcast_bound::(py) { let dt: DateTime = Utc @@ -501,13 +505,13 @@ impl PyVirtualDataSlice { self.0 .lock() .unwrap() - .set_col(name, group_by_index, index as usize, Some(timestamp)) + .set_col(name, grouping_id, index as usize, Some(timestamp)) .unwrap(); } else if let Ok(val) = val.extract::(py) { self.0 .lock() .unwrap() - .set_col(name, group_by_index, index as usize, Some(val)) + .set_col(name, grouping_id, index as usize, Some(val)) .unwrap(); } else { tracing::error!("Unhandled") diff --git a/rust/perspective-viewer/Cargo.toml b/rust/perspective-viewer/Cargo.toml index 2577f98d2c..2b15f89bb0 100644 --- a/rust/perspective-viewer/Cargo.toml +++ b/rust/perspective-viewer/Cargo.toml @@ -106,7 +106,7 @@ tracing = { version = ">=0.1.36" } tracing-subscriber = "0.3.15" # Browser API bindings -wasm-bindgen = { version = "=0.2.105", features = [ +wasm-bindgen = { version = "=0.2.106", features = [ "serde-serialize", "enable-interning", ] } diff --git a/rust/perspective-viewer/src/rust/custom_elements/viewer.rs b/rust/perspective-viewer/src/rust/custom_elements/viewer.rs index 1c6bdb020e..f821dba09a 100644 --- a/rust/perspective-viewer/src/rust/custom_elements/viewer.rs +++ b/rust/perspective-viewer/src/rust/custom_elements/viewer.rs @@ -77,9 +77,6 @@ pub struct PerspectiveViewerElement { _subscriptions: Rc<[Subscription; 2]>, } -// derive_model!( Renderer, Root, Session, Presentation for -// PerspectiveViewerElement); - impl CustomElementMetadata for PerspectiveViewerElement { const CUSTOM_ELEMENT_NAME: &'static str = "perspective-viewer"; const STATICS: &'static [&'static str] = ["registerPlugin", "getExprTKCommands"].as_slice(); diff --git a/rust/perspective-viewer/src/rust/presentation/sheets.rs b/rust/perspective-viewer/src/rust/presentation/sheets.rs index 88e2b229d8..0f7fb1cb8b 100644 --- a/rust/perspective-viewer/src/rust/presentation/sheets.rs +++ b/rust/perspective-viewer/src/rust/presentation/sheets.rs @@ -46,7 +46,10 @@ fn fill_rule_theme_names( if property == "--theme-name" { let name = style.get_property_value("--theme-name")?; let trimmed = name.trim(); - themes.push(trimmed[1..trimmed.len() - 1].to_owned()); + let theme = &trimmed[1..trimmed.len() - 1]; + if themes.iter().find(|x| x == &theme).is_none() { + themes.push(theme.to_owned()); + } } } }