From c6b38ca756b492b729e876d13a8808786b4ee213 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 31 May 2026 10:52:12 -0700 Subject: [PATCH 1/3] Harden CLI validation and release workflows --- .github/workflows/ci.yml | 61 +++++++- .../workflows/duckdb-extension-release.yml | 9 +- .github/workflows/publish.yml | 25 ++-- .github/workflows/pyodide-test.yml | 27 +++- .github/workflows/release-rust-binaries.yml | 34 +++-- README.md | 12 +- docs/duckdb-extension.md | 21 ++- docs/pyodide.md | 20 +++ docs/rust-native-runtime-packaging.md | 6 +- docs/rust-native-runtime-roadmap.md | 4 +- docs/rust-parity-matrix.json | 29 ++++ examples/ecommerce/README.md | 34 ++--- examples/hive_parquet/README.md | 4 +- .../hive_parquet_preagg_example.py | 8 +- examples/integrations/chart_example.py | 2 +- examples/motherduck/README.md | 4 +- examples/motherduck/query_examples.py | 4 +- examples/pre_aggregations/README.md | 14 +- examples/pre_aggregations/demo.sh | 8 +- examples/pre_aggregations/sidemantic.yaml | 4 +- pyproject.toml | 2 + sidemantic-duckdb/.gitmodules | 8 -- sidemantic-duckdb/Makefile | 20 +++ sidemantic/charts.py | 2 +- sidemantic/cli.py | 44 +++++- sidemantic/loaders.py | 135 ++++++++++++++++-- sidemantic/validation_runner.py | 77 ++++++++++ sidemantic/workbench/validation_app.py | 73 +--------- tests/core/test_rust_parity.py | 9 ++ tests/test_charts.py | 13 ++ tests/test_cli_commands.py | 94 ++++++++++-- uv.lock | 4 + 32 files changed, 613 insertions(+), 198 deletions(-) create mode 100644 docs/pyodide.md create mode 100644 docs/rust-parity-matrix.json delete mode 100644 sidemantic-duckdb/.gitmodules create mode 100644 sidemantic/validation_runner.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e13a7c0f..aedf00d4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,6 +46,58 @@ jobs: - name: Run tests run: uv run pytest -v + base-install-cli: + name: Base Install CLI + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Set up Python + run: uv python install 3.12 + + - name: Smoke base CLI install + run: | + set -euo pipefail + tmpdir="$(mktemp -d)" + mkdir -p "$tmpdir/models" + cat > "$tmpdir/models/models.yml" <<'YAML' + models: + - name: orders + table: orders + primary_key: id + dimensions: + - name: status + type: categorical + metrics: + - name: order_count + agg: count + YAML + + uv run --no-project --with . sidemantic --version + uv run --no-project --with . sidemantic validate "$tmpdir/models" + uv run --no-project --with . sidemantic query "SELECT order_count, status FROM orders" --models "$tmpdir/models" --dry-run + + set +e + timeout 10s uv run --no-project --with . sidemantic serve "$tmpdir/models" >"$tmpdir/serve.out" 2>"$tmpdir/serve.err" + serve_status=$? + set -e + if [ "$serve_status" -eq 0 ]; then + echo "base install unexpectedly ran sidemantic serve without the serve extra" + exit 1 + fi + if [ "$serve_status" -eq 124 ]; then + echo "base install unexpectedly started sidemantic serve without the serve extra" + cat "$tmpdir/serve.out" + cat "$tmpdir/serve.err" >&2 + exit 1 + fi + grep -q "sidemantic\\[serve\\]" "$tmpdir/serve.err" + update-schema: name: Update JSON Schema needs: python @@ -267,14 +319,9 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Clone DuckDB dependencies + - name: Fetch DuckDB dependencies working-directory: sidemantic-duckdb - run: | - # These are listed as submodules in .gitmodules but aren't tracked properly - # Clone them directly at the commits the extension was built against - rm -rf duckdb extension-ci-tools - git clone --depth 1 --branch v1.4.2 https://github.com/duckdb/duckdb.git duckdb - git clone --depth 1 --branch v1.4.2 https://github.com/duckdb/extension-ci-tools.git extension-ci-tools + run: make deps DUCKDB_VERSION=v1.4.2 - name: Install Rust uses: dtolnay/rust-toolchain@stable diff --git a/.github/workflows/duckdb-extension-release.yml b/.github/workflows/duckdb-extension-release.yml index 50ea4eaf..3afa9dd1 100644 --- a/.github/workflows/duckdb-extension-release.yml +++ b/.github/workflows/duckdb-extension-release.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: inputs: duckdb_version: - description: "DuckDB tag used for extension build and tests" + description: "DuckDB tag used for extension build and tests; currently v1.4.2 only" required: true default: "v1.4.2" type: string @@ -42,12 +42,9 @@ jobs: echo "version=$VERSION" >> "$GITHUB_OUTPUT" - - name: Clone DuckDB dependencies + - name: Fetch DuckDB dependencies working-directory: sidemantic-duckdb - run: | - rm -rf duckdb extension-ci-tools - git clone --depth 1 --branch "${{ steps.duckdb.outputs.version }}" https://github.com/duckdb/duckdb.git duckdb - git clone --depth 1 --branch "${{ steps.duckdb.outputs.version }}" https://github.com/duckdb/extension-ci-tools.git extension-ci-tools + run: make deps DUCKDB_VERSION="${{ steps.duckdb.outputs.version }}" - name: Install Rust uses: dtolnay/rust-toolchain@stable diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 5c0db87c..31194da5 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -73,14 +73,15 @@ jobs: - name: Update lock file run: uv lock + - name: Validate release tree + run: | + uv run ruff check . --exclude docs/_extensions --exclude sidemantic-duckdb/extension-ci-tools --exclude sidemantic-duckdb/scripts --exclude sidemantic-duckdb/duckdb --exclude sidemantic/adapters/malloy_grammar --exclude sidemantic/adapters/holistics_grammar + uv run ruff format --check . --exclude docs/_extensions --exclude sidemantic-duckdb/extension-ci-tools --exclude sidemantic-duckdb/scripts --exclude sidemantic-duckdb/duckdb --exclude sidemantic/adapters/malloy_grammar --exclude sidemantic/adapters/holistics_grammar + uv run pytest -v + - name: Build package run: uv build - - name: Publish to PyPI - env: - UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }} - run: uv publish --token $UV_PUBLISH_TOKEN - - name: Commit version bump and create tag run: | git config user.name "github-actions[bot]" @@ -91,6 +92,11 @@ jobs: git push origin main git push origin "v${{ steps.version.outputs.new_version }}" + - name: Publish to PyPI + env: + UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }} + run: uv publish --token $UV_PUBLISH_TOKEN + - name: Create GitHub Release env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -98,12 +104,3 @@ jobs: gh release create "v${{ steps.version.outputs.new_version }}" \ --title "v${{ steps.version.outputs.new_version }}" \ --generate-notes - - rust-binaries: - name: Release Rust binaries - needs: publish - permissions: - contents: write - uses: ./.github/workflows/release-rust-binaries.yml - with: - tag: ${{ needs.publish.outputs.tag }} diff --git a/.github/workflows/pyodide-test.yml b/.github/workflows/pyodide-test.yml index f76725b8..71545457 100644 --- a/.github/workflows/pyodide-test.yml +++ b/.github/workflows/pyodide-test.yml @@ -22,6 +22,31 @@ jobs: - name: Build sidemantic wheel run: uv build + - name: Inspect wheel metadata + run: | + python - <<'PY' + import re + from email.parser import Parser + from pathlib import Path + from zipfile import ZipFile + + wheel = next(Path("dist").glob("*.whl")) + with ZipFile(wheel) as archive: + metadata_name = next(name for name in archive.namelist() if name.endswith(".dist-info/METADATA")) + metadata = Parser().parsestr(archive.read(metadata_name).decode()) + + print(metadata) + requires = metadata.get_all("Requires-Dist", []) + extras = set(metadata.get_all("Provides-Extra", [])) + heavy_optional = ("textual", "pyarrow", "mcp", "riffq", "altair", "vl-convert-python") + for requirement in requires: + name = re.split(r"[<>=!~;\[ ]", requirement.strip(), maxsplit=1)[0] + if name in heavy_optional: + assert "extra ==" in requirement, requirement + assert "serve" in extras + assert "workbench" in extras + PY + - name: Install Pyodide and dependencies run: npm install pyodide glob @@ -51,7 +76,7 @@ jobs: const wheelName = wheelPath.split('/').pop(); pyodide.FS.writeFile(`/tmp/${wheelName}`, wheelData); - console.log('Installing missing deps and sidemantic...'); + console.log('Installing sidemantic with the documented Pyodide no-deps path...'); await pyodide.runPythonAsync(` import micropip # Install missing pure-Python deps diff --git a/.github/workflows/release-rust-binaries.yml b/.github/workflows/release-rust-binaries.yml index f3cac51b..b4255629 100644 --- a/.github/workflows/release-rust-binaries.yml +++ b/.github/workflows/release-rust-binaries.yml @@ -10,7 +10,7 @@ on: workflow_dispatch: inputs: tag: - description: Git tag for the GitHub Release to attach binaries to, for example v0.1.0. + description: Git tag for the GitHub Release to attach binaries to, for example sidemantic-rs-v0.1.0. required: true type: string @@ -72,6 +72,27 @@ jobs: with: ref: ${{ needs.verify-release.outputs.tag }} + - name: Resolve Rust crate version + id: rust_version + shell: bash + run: | + set -euo pipefail + VERSION=$(grep '^version = ' sidemantic-rs/Cargo.toml | sed 's/version = "\(.*\)"/\1/') + echo "version=$VERSION" >> "$GITHUB_OUTPUT" + + - name: Verify release tag matches crate version + shell: bash + env: + RELEASE_TAG: ${{ needs.verify-release.outputs.tag }} + RUST_VERSION: ${{ steps.rust_version.outputs.version }} + run: | + set -euo pipefail + expected_tag="sidemantic-rs-v${RUST_VERSION}" + if [ "$RELEASE_TAG" != "$expected_tag" ]; then + echo "Release tag '$RELEASE_TAG' does not match Rust crate version tag '$expected_tag'" >&2 + exit 1 + fi + - name: Install Rust uses: dtolnay/rust-toolchain@stable with: @@ -115,11 +136,11 @@ jobs: if: runner.os != 'Windows' env: ASSET_SUFFIX: ${{ matrix.asset_suffix }} - TAG: ${{ needs.verify-release.outputs.tag }} + RUST_VERSION: ${{ steps.rust_version.outputs.version }} TARGET: ${{ matrix.target }} run: | set -euo pipefail - version="${TAG#v}" + version="$RUST_VERSION" asset_name="sidemantic-rs-${version}-${ASSET_SUFFIX}" archive_path="dist/${asset_name}.tar.gz" checksum_path="${archive_path}.sha256" @@ -147,14 +168,11 @@ jobs: if: runner.os == 'Windows' env: ASSET_SUFFIX: ${{ matrix.asset_suffix }} - TAG: ${{ needs.verify-release.outputs.tag }} + RUST_VERSION: ${{ steps.rust_version.outputs.version }} TARGET: ${{ matrix.target }} shell: pwsh run: | - $version = $env:TAG - if ($version.StartsWith("v")) { - $version = $version.Substring(1) - } + $version = $env:RUST_VERSION $assetName = "sidemantic-rs-$version-$env:ASSET_SUFFIX" $archivePath = "dist/$assetName.zip" $checksumPath = "$archivePath.sha256" diff --git a/README.md b/README.md index 21d28ab6..c156cf15 100644 --- a/README.md +++ b/README.md @@ -129,13 +129,13 @@ result = layer.sql("SELECT revenue, status FROM orders") sidemantic query "SELECT revenue FROM orders" --db data.duckdb # Interactive workbench (TUI with SQL editor + charts) -sidemantic workbench models/ --db data.duckdb +uvx --from "sidemantic[workbench]" sidemantic workbench models/ --db data.duckdb # PostgreSQL server (connect Tableau, DBeaver, etc.) -sidemantic serve models/ --port 5433 +uvx --from "sidemantic[serve]" sidemantic serve models/ --port 5433 # HTTP API server (JSON or Arrow) -sidemantic api-serve models/ --port 4400 --auth-token secret +uvx --from "sidemantic[api]" sidemantic api-serve models/ --port 4400 --auth-token secret # Validate definitions sidemantic validate models/ @@ -159,7 +159,7 @@ uvx --from "sidemantic[workbench]" sidemantic workbench --demo **PostgreSQL server** (connect Tableau, DBeaver, etc.): ```bash -uvx sidemantic serve --demo --port 5433 +uvx --from "sidemantic[serve]" sidemantic serve --demo --port 5433 ``` **HTTP API server** (JSON or Arrow): @@ -227,7 +227,7 @@ See `examples/` for more. - Multi-format adapters (Cube, MetricFlow, LookML, Hex, Rill, Superset, Omni, BSL, GoodData LDM, OSI, AtScale SML, ThoughtSpot TML, Graphene GSQL) - SQLGlot-based SQL generation and transpilation - Pydantic validation and type safety -- Pre-aggregations with automatic routing +- Pre-aggregations with explicit routing - Predicate pushdown for faster queries - Segments and metric-level filters - Jinja2 templating for dynamic SQL @@ -285,7 +285,7 @@ For Cloudflare Worker + Container deployment, see [`examples/cloudflare_containe Start the API server: ```bash -sidemantic api-serve models/ --db data.duckdb --port 4400 --auth-token secret +uvx --from "sidemantic[api]" sidemantic api-serve models/ --db data.duckdb --port 4400 --auth-token secret ``` Compile a structured semantic query: diff --git a/docs/duckdb-extension.md b/docs/duckdb-extension.md index d43ddad3..028ac70d 100644 --- a/docs/duckdb-extension.md +++ b/docs/duckdb-extension.md @@ -14,6 +14,11 @@ LOAD sidemantic; ``` Until community publication is complete, use a local extension artifact. +Local artifacts are unsigned, so start the DuckDB CLI with unsigned-extension loading enabled: + +```bash +duckdb -unsigned +``` ## Build From Source @@ -21,13 +26,15 @@ The extension build needs Rust, DuckDB extension build tooling, and Ninja. ```bash cd sidemantic-duckdb -rm -rf duckdb extension-ci-tools -git clone --depth 1 --branch v1.4.2 https://github.com/duckdb/duckdb.git duckdb -git clone --depth 1 --branch v1.4.2 https://github.com/duckdb/extension-ci-tools.git extension-ci-tools +make deps DUCKDB_VERSION=v1.4.2 make make test ``` +`DUCKDB_VERSION` is intentionally guarded to `v1.4.2` because the repository +vendors a matching `extension-ci-tools` checkout. Update both together before +building against a different DuckDB tag. + The local loadable extension is produced at: ```text @@ -36,10 +43,16 @@ sidemantic-duckdb/build/release/extension/sidemantic/sidemantic.duckdb_extension Load it in the DuckDB shell built by the extension workflow: +```bash +./build/release/duckdb -unsigned +``` + ```sql LOAD 'build/release/extension/sidemantic/sidemantic.duckdb_extension'; ``` +For embedded clients, set DuckDB's `allow_unsigned_extensions` database configuration before opening the connection. + ## Runtime API Load native YAML: @@ -88,7 +101,7 @@ Use `.github/workflows/duckdb-extension-release.yml`. The workflow: -- clones DuckDB and `extension-ci-tools` for the selected DuckDB tag, +- fetches DuckDB with `make deps` and verifies the vendored `extension-ci-tools`, - builds the Rust-backed extension, - runs the DuckDB sqllogictests, including native YAML load, native SQL definition file load, relationship rewrite, semantic select, persistence, and invalid-version coverage, - uploads a Linux extension artifact, diff --git a/docs/pyodide.md b/docs/pyodide.md new file mode 100644 index 00000000..fee56e5c --- /dev/null +++ b/docs/pyodide.md @@ -0,0 +1,20 @@ +# Pyodide Runtime + +Sidemantic's browser/WASM path is a no-dependency wheel install. Install the Pyodide-compatible runtime packages first, then install the Sidemantic wheel with dependency resolution disabled: + +```python +import micropip + +await pyodide.loadPackage(["micropip", "pydantic", "pyyaml", "jinja2"]) +await micropip.install(["sqlglot", "lkml", "inflect"], deps=False) +await micropip.install("emfs:/tmp/sidemantic--py3-none-any.whl", deps=False) +``` + +This is intentional. The published Python package includes CLI/database dependencies that are not part of the Pyodide runtime contract. The supported Pyodide import surface is the core semantic model API, for example: + +```python +from sidemantic import Model, Dimension, Metric, Relationship +from sidemantic.core.semantic_graph import SemanticGraph +``` + +Optional server, workbench, chart, and database execution paths are not Pyodide targets. diff --git a/docs/rust-native-runtime-packaging.md b/docs/rust-native-runtime-packaging.md index 4751675a..e8ed2fb1 100644 --- a/docs/rust-native-runtime-packaging.md +++ b/docs/rust-native-runtime-packaging.md @@ -6,7 +6,7 @@ The Rust native runtime is packaged separately from the main `sidemantic` Python | Artifact | Package name | Current version | Release path | |---|---|---:|---| -| Main Python package | `sidemantic` | `0.9.6` | `.github/workflows/publish.yml` | +| Main Python package | `sidemantic` | `0.10.0` | `.github/workflows/publish.yml` | | Rust runtime crate and CLI | `sidemantic` crate, `sidemantic` binary | `0.1.0` | `.github/workflows/rust-runtime-release.yml` | | Python extension wheel | `sidemantic-rs`, module `sidemantic_rs` | `0.1.0` | `.github/workflows/sidemantic-rs-wheels.yml` | | DuckDB extension | `sidemantic.duckdb_extension` | `0.1.0` source package | `.github/workflows/duckdb-extension-release.yml` | @@ -93,7 +93,7 @@ The DuckDB extension is currently documented as a source-build path. Do not docu See `docs/duckdb-extension.md` for build and load commands. -Use `.github/workflows/duckdb-extension-release.yml` to build a Linux extension artifact against a selected DuckDB tag and run the sqllogictests. The default DuckDB tag is `v1.4.2`, matching current CI. GitHub release upload is optional and controlled by `create_github_release`. +Use `.github/workflows/duckdb-extension-release.yml` to build a Linux extension artifact and run the sqllogictests. The repository currently supports DuckDB `v1.4.2` only, matching the vendored `extension-ci-tools` checkout. GitHub release upload is optional and controlled by `create_github_release`. Community extension publication remains a separate release step until repository signing, platform matrix, and DuckDB community registry metadata are finalized. @@ -101,7 +101,7 @@ Community extension publication remains a separate release step until repository | Python package | Rust runtime crate | `sidemantic-rs` wheel | Native format | DuckDB extension | DuckDB build target | |---|---:|---:|---:|---:|---:| -| `0.9.6` | `0.1.0` | `0.1.0` | `1` | `0.1.0` source package | `1.4.2` | +| `0.10.0` | `0.1.0` | `0.1.0` | `1` | `0.1.0` source package | `1.4.2` | Compatibility rules: diff --git a/docs/rust-native-runtime-roadmap.md b/docs/rust-native-runtime-roadmap.md index 4cf177a5..3481b5cc 100644 --- a/docs/rust-native-runtime-roadmap.md +++ b/docs/rust-native-runtime-roadmap.md @@ -1225,7 +1225,7 @@ Until then, docs should clearly say "build from source." Track: - Native format version: `1`. -- Python package version: `0.9.x`. +- Python package version: `0.10.x`. - Rust runtime version: `0.1.x`. - DuckDB extension version: `0.1.x`. @@ -1233,7 +1233,7 @@ Add compatibility docs: | Python package | Rust runtime | Native format | DuckDB extension | |---|---|---|---| -| `0.9.6` | `0.1.0` | `1` | `0.1.0` | +| `0.10.0` | `0.1.0` | `1` | `0.1.0` | ## Phase 13: Documentation Plan diff --git a/docs/rust-parity-matrix.json b/docs/rust-parity-matrix.json new file mode 100644 index 00000000..2afc9c13 --- /dev/null +++ b/docs/rust-parity-matrix.json @@ -0,0 +1,29 @@ +{ + "version": 1, + "description": "Strict-mode source of truth for Python surfaces that can require Rust-backed execution.", + "status_legend": { + "rust_backed": "The subsystem is backed by sidemantic-rs and may be required by strict mode.", + "rust_backed_opt_in": "A Rust path exists but is not complete enough for strict all-subsystem enforcement.", + "python_only": "The subsystem is intentionally Python-only." + }, + "subsystems": { + "sql_generator_entrypoint": { + "status": "rust_backed", + "python_surface": "SemanticLayer.compile and CLI structured query compilation", + "rust_surface": "sidemantic-rs native query compiler", + "notes": "Covers structured metric/dimension query compilation through the Rust entrypoint." + }, + "semantic_core_query_validation": { + "status": "rust_backed", + "python_surface": "SemanticLayer query reference validation", + "rust_surface": "sidemantic-rs validate_query_references", + "notes": "Covers metric and dimension reference validation before query compilation." + }, + "semantic_sql_rewriter": { + "status": "rust_backed_opt_in", + "python_surface": "QueryRewriter and CLI semantic SQL rewrite", + "rust_surface": "sidemantic-rs semantic SQL rewriter", + "notes": "Rust supports the native semantic SQL subset, but Python remains the full rewrite implementation." + } + } +} diff --git a/examples/ecommerce/README.md b/examples/ecommerce/README.md index aabf628f..e6918a7f 100644 --- a/examples/ecommerce/README.md +++ b/examples/ecommerce/README.md @@ -40,44 +40,44 @@ sidemantic info examples/ecommerce/models ### Interactive workbench ```bash -sidemantic workbench examples/ecommerce/models --db examples/ecommerce/data/ecommerce.db +uvx --from "sidemantic[workbench]" sidemantic workbench examples/ecommerce/models --db examples/ecommerce/data/ecommerce.db ``` ### Query from command line Total revenue: ```bash -sidemantic query examples/ecommerce/models \ - --db examples/ecommerce/data/ecommerce.db \ - --sql "SELECT total_revenue FROM orders" +sidemantic query "SELECT total_revenue FROM orders" \ + --models examples/ecommerce/models \ + --db examples/ecommerce/data/ecommerce.db ``` Revenue by country: ```bash -sidemantic query examples/ecommerce/models \ - --db examples/ecommerce/data/ecommerce.db \ - --sql "SELECT orders.revenue, customers.country FROM orders ORDER BY orders.revenue DESC" +sidemantic query "SELECT orders.revenue, customers.country FROM orders ORDER BY orders.revenue DESC" \ + --models examples/ecommerce/models \ + --db examples/ecommerce/data/ecommerce.db ``` Orders by status: ```bash -sidemantic query examples/ecommerce/models \ - --db examples/ecommerce/data/ecommerce.db \ - --sql "SELECT orders.order_count, orders.revenue, orders.status FROM orders" +sidemantic query "SELECT orders.order_count, orders.revenue, orders.status FROM orders" \ + --models examples/ecommerce/models \ + --db examples/ecommerce/data/ecommerce.db ``` Customer lifetime value by tier: ```bash -sidemantic query examples/ecommerce/models \ - --db examples/ecommerce/data/ecommerce.db \ - --sql "SELECT customer_lifetime_value, customers.tier FROM customers" +sidemantic query "SELECT customer_lifetime_value, customers.tier FROM customers" \ + --models examples/ecommerce/models \ + --db examples/ecommerce/data/ecommerce.db ``` Product performance: ```bash -sidemantic query examples/ecommerce/models \ - --db examples/ecommerce/data/ecommerce.db \ - --sql "SELECT order_items.net_revenue, products.category FROM order_items ORDER BY order_items.net_revenue DESC LIMIT 10" +sidemantic query "SELECT order_items.net_revenue, products.category FROM order_items ORDER BY order_items.net_revenue DESC LIMIT 10" \ + --models examples/ecommerce/models \ + --db examples/ecommerce/data/ecommerce.db ``` ### PostgreSQL-compatible server @@ -85,7 +85,7 @@ sidemantic query examples/ecommerce/models \ Start a server that BI tools can connect to: ```bash -sidemantic serve examples/ecommerce/models \ +uvx --from "sidemantic[serve]" sidemantic serve examples/ecommerce/models \ --db examples/ecommerce/data/ecommerce.db \ --port 5433 ``` diff --git a/examples/hive_parquet/README.md b/examples/hive_parquet/README.md index bcc801e9..417ff31f 100644 --- a/examples/hive_parquet/README.md +++ b/examples/hive_parquet/README.md @@ -9,7 +9,7 @@ DuckDB natively supports [Hive-partitioned parquet](https://duckdb.org/docs/data uv run python examples/hive_parquet/hive_parquet_example.py ``` -**Pre-aggregations**: Materialize rollup tables in DuckDB on top of parquet, with automatic query routing. +**Pre-aggregations**: Materialize rollup tables in DuckDB on top of parquet, with explicit query routing. ```bash uv run python examples/hive_parquet/hive_parquet_preagg_example.py ``` @@ -114,7 +114,7 @@ for preagg in events.pre_aggregations: ) ``` -Queries that match a pre-aggregation are automatically routed to the materialized table. The generated SQL includes `used_preagg=true` in the instrumentation comment so you can verify routing is working. +Queries that match a pre-aggregation route to the materialized table when pre-aggregation routing is enabled. The generated SQL includes `used_preagg=true` in the instrumentation comment so you can verify routing is working. See `hive_parquet_preagg_example.py` for the full working example. diff --git a/examples/hive_parquet/hive_parquet_preagg_example.py b/examples/hive_parquet/hive_parquet_preagg_example.py index 3dcaebc7..4246ea91 100644 --- a/examples/hive_parquet/hive_parquet_preagg_example.py +++ b/examples/hive_parquet/hive_parquet_preagg_example.py @@ -1,14 +1,14 @@ """Example: DuckDB pre-aggregation layer on top of Hive-partitioned parquet. This builds on the basic hive_parquet_example.py by adding a pre-aggregation -layer that materializes rollup tables in DuckDB. Queries are automatically -routed to the pre-aggregated tables when they match. +layer that materializes rollup tables in DuckDB. Queries route to the +pre-aggregated tables when pre-aggregation routing is enabled and they match. The flow: 1. Create Hive-partitioned parquet data (raw event logs) 2. Define a semantic model with pre-aggregation definitions 3. Materialize the pre-aggregations into DuckDB tables -4. Query -- Sidemantic automatically routes to preagg tables when possible +4. Query with pre-aggregation routing enabled This pattern is useful for: - Large parquet lakes where full scans are expensive @@ -172,7 +172,7 @@ def main(): # Step 4: Query WITH pre-aggregation routing print("=" * 70) - print("Step 4: Query with automatic preagg routing") + print("Step 4: Query with explicit preagg routing") print("=" * 70) print() diff --git a/examples/integrations/chart_example.py b/examples/integrations/chart_example.py index 0473de7d..470eed05 100644 --- a/examples/integrations/chart_example.py +++ b/examples/integrations/chart_example.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # /// script -# dependencies = ["sidemantic[serve]", "duckdb"] +# dependencies = ["sidemantic[charts]", "duckdb"] # /// """Example demonstrating chart generation from semantic layer queries. diff --git a/examples/motherduck/README.md b/examples/motherduck/README.md index 9f966a3e..831f8c84 100644 --- a/examples/motherduck/README.md +++ b/examples/motherduck/README.md @@ -7,7 +7,7 @@ This example demonstrates using Sidemantic with MotherDuck, a cloud-based DuckDB - Connect to MotherDuck cloud database - Define semantic models using YAML configuration - Create and refresh pre-aggregations for fast queries -- Query with automatic pre-aggregation routing +- Query with explicit pre-aggregation routing - Data persists in the cloud ## Prerequisites @@ -53,7 +53,7 @@ Two pre-aggregations speed up common queries: - **daily_status**: Orders aggregated by day and status - **monthly_summary**: Orders aggregated by month and status -Pre-aggregations are automatically used when queries match their definition. +Pre-aggregations are used when query routing is enabled and a query matches their definition. ## Queries diff --git a/examples/motherduck/query_examples.py b/examples/motherduck/query_examples.py index 5dfec7bc..b5c01a4c 100644 --- a/examples/motherduck/query_examples.py +++ b/examples/motherduck/query_examples.py @@ -110,9 +110,9 @@ print("\n" + "=" * 80) print("Query Examples Complete!") print("=" * 80) -print("\nāœ“ All queries used pre-aggregations when available") +print("\nāœ“ Queries used pre-aggregations when routing was enabled and a match was available") print("āœ“ Queries are much faster than scanning 10k raw rows") print("\nBenefits:") -print(" - Automatic pre-aggregation routing") +print(" - Explicit pre-aggregation routing") print(" - Fast queries on large datasets") print(" - Refresh periodically to keep data fresh") diff --git a/examples/pre_aggregations/README.md b/examples/pre_aggregations/README.md index 62b7f4b1..66315793 100644 --- a/examples/pre_aggregations/README.md +++ b/examples/pre_aggregations/README.md @@ -120,19 +120,19 @@ This creates tables like: - `preagg.orders_preagg_daily_region` - Daily metrics by region - `preagg.orders_preagg_monthly_summary` - Monthly rollup -### 5. Query with Automatic Routing +### 5. Query with Explicit Routing -Queries automatically use pre-aggregations when available: +Queries use pre-aggregations when routing is enabled: ```bash -# Query from CLI - automatically uses pre-aggregations -uvx sidemantic query "SELECT status, revenue FROM orders" +# Query from CLI with pre-aggregation routing enabled +uvx sidemantic query "SELECT status, revenue FROM orders" --models models --use-preaggregations # Or use the interactive workbench -uvx sidemantic workbench +uvx --from "sidemantic[workbench]" sidemantic workbench ``` -The config in `sidemantic.yaml` enables pre-aggregations and sets the schema. +The config in `sidemantic.yaml` sets the pre-aggregation schema. CLI query routing is explicit, so use `--use-preaggregations`. ## How It Works @@ -265,7 +265,7 @@ refresh_preaggs = BashOperator( Query from workbench or check the compiled SQL to see if pre-agg was used: ```bash -uvx sidemantic workbench +uvx --from "sidemantic[workbench]" sidemantic workbench # Run a query and look for "used_preagg=true" in the generated SQL ``` diff --git a/examples/pre_aggregations/demo.sh b/examples/pre_aggregations/demo.sh index 20adac6e..507d81a7 100755 --- a/examples/pre_aggregations/demo.sh +++ b/examples/pre_aggregations/demo.sh @@ -6,7 +6,7 @@ # 2. Discover pre-aggregation opportunities from query patterns # 3. Apply recommendations to model files # 4. Materialize pre-aggregations -# 5. Query with automatic routing +# 5. Query with explicit routing set -e @@ -51,10 +51,10 @@ echo "=====================================================================" echo "" echo "Try querying with pre-aggregations enabled:" echo "" -echo " uvx sidemantic workbench" +echo " uvx --from \"sidemantic[workbench]\" sidemantic workbench" echo "" echo "Or test from command line:" echo "" -echo " # This query will automatically use pre-aggregations when available" -echo " uvx sidemantic query \"SELECT status, revenue FROM orders\"" +echo " # Enable pre-aggregation routing explicitly" +echo " uvx sidemantic query \"SELECT status, revenue FROM orders\" --models models --use-preaggregations" echo "" diff --git a/examples/pre_aggregations/sidemantic.yaml b/examples/pre_aggregations/sidemantic.yaml index cd685def..1a90892b 100644 --- a/examples/pre_aggregations/sidemantic.yaml +++ b/examples/pre_aggregations/sidemantic.yaml @@ -7,5 +7,5 @@ connection: # Pre-aggregation configuration preagg_schema: preagg # Store in 'preagg' schema -# Note: Pre-aggregations are automatically used when querying via CLI -# The schema above tells the system where to find/create pre-agg tables +# Note: CLI query routing is explicit; pass --use-preaggregations to opt in. +# The schema above tells the system where to find/create pre-agg tables. diff --git a/pyproject.toml b/pyproject.toml index 3d4c96d3..341b1409 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,8 @@ mcp = [ ] apps = [ "mcp[cli]>=1.25.0,<2", + "altair>=5.0.0", + "vl-convert-python>=1.0.0", ] charts = [ "altair>=5.0.0", diff --git a/sidemantic-duckdb/.gitmodules b/sidemantic-duckdb/.gitmodules deleted file mode 100644 index 01b02cb4..00000000 --- a/sidemantic-duckdb/.gitmodules +++ /dev/null @@ -1,8 +0,0 @@ -[submodule "duckdb"] - path = duckdb - url = https://github.com/duckdb/duckdb - branch = main -[submodule "extension-ci-tools"] - path = extension-ci-tools - url = https://github.com/duckdb/extension-ci-tools - branch = main \ No newline at end of file diff --git a/sidemantic-duckdb/Makefile b/sidemantic-duckdb/Makefile index 645c52db..3d2705f1 100644 --- a/sidemantic-duckdb/Makefile +++ b/sidemantic-duckdb/Makefile @@ -1,8 +1,28 @@ PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) +.DEFAULT_GOAL := all # Configuration of extension EXT_NAME=sidemantic EXT_CONFIG=${PROJ_DIR}extension_config.cmake +SUPPORTED_DUCKDB_VERSION := v1.4.2 +DUCKDB_VERSION ?= $(SUPPORTED_DUCKDB_VERSION) + +.PHONY: deps +deps: + @if [ "$(DUCKDB_VERSION)" != "$(SUPPORTED_DUCKDB_VERSION)" ]; then \ + echo "DUCKDB_VERSION=$(DUCKDB_VERSION) is unsupported by the vendored extension-ci-tools checkout."; \ + echo "Use DUCKDB_VERSION=$(SUPPORTED_DUCKDB_VERSION), or update extension-ci-tools and this guard together."; \ + exit 1; \ + fi + @if [ ! -d duckdb/.git ]; then \ + rm -rf duckdb; \ + git clone --depth 1 --branch $(DUCKDB_VERSION) https://github.com/duckdb/duckdb.git duckdb; \ + else \ + git -C duckdb fetch --depth 1 origin $(DUCKDB_VERSION); \ + git -C duckdb checkout FETCH_HEAD; \ + fi + @test -f extension-ci-tools/makefiles/duckdb_extension.Makefile || \ + (echo "extension-ci-tools is missing from this checkout" && exit 1) # Include the Makefile from extension-ci-tools include extension-ci-tools/makefiles/duckdb_extension.Makefile diff --git a/sidemantic/charts.py b/sidemantic/charts.py index dd1a599a..aa012f0b 100644 --- a/sidemantic/charts.py +++ b/sidemantic/charts.py @@ -45,7 +45,7 @@ def check_altair_available() -> None: if alt is None or vl_convert is None: raise ImportError( "Altair and vl-convert-python are required for chart generation. " - "Install with: uv add altair vl-convert-python --optional serve" + "Install with: pip install 'sidemantic[charts]' or `uvx --from 'sidemantic[charts]' sidemantic ...`" ) diff --git a/sidemantic/cli.py b/sidemantic/cli.py index 4a605007..6460af77 100644 --- a/sidemantic/cli.py +++ b/sidemantic/cli.py @@ -690,7 +690,16 @@ def serve( """ import logging - from sidemantic.server.server import start_server + try: + from sidemantic.server.server import start_server + except ImportError as exc: + typer.echo( + "Error: `sidemantic serve` requires the optional serve dependencies. " + "Install with `pip install 'sidemantic[serve]'` or run with " + "`uvx --from 'sidemantic[serve]' sidemantic serve ...`.", + err=True, + ) + raise typer.Exit(1) from exc logging.basicConfig(level=logging.INFO) @@ -964,7 +973,7 @@ def validate( """ Validate semantic layer definitions. - Shows errors, warnings, and optionally detailed info in an interactive view. + Shows errors, warnings, and optionally detailed info. Examples: sidemantic validate @@ -992,14 +1001,37 @@ def validate( typer.echo(f"Error: Rust validation failed: {e}", err=True) raise typer.Exit(1) - from sidemantic.workbench import WorkbenchDependencyError, run_validation - try: - run_validation(directory, verbose=verbose) - except WorkbenchDependencyError as e: + from sidemantic.validation_runner import validate_directory + + report = validate_directory(directory) + except Exception as e: typer.echo(f"Error: {e}", err=True) raise typer.Exit(1) + typer.echo(f"Validation Results: {directory}") + + if report.errors: + typer.echo("Errors:") + for error in report.errors: + typer.echo(f" - {error}") + + if report.warnings: + typer.echo("Warnings:") + for warning in report.warnings: + typer.echo(f" - {warning}") + + if verbose or not (report.errors or report.warnings): + typer.echo("Info:") + for item in report.info: + typer.echo(f" - {item}") + + if report.errors: + typer.echo("Validation Failed", err=True) + raise typer.Exit(1) + + typer.echo("Validation Passed") + @app.command() def workbench( diff --git a/sidemantic/loaders.py b/sidemantic/loaders.py index 9c9169e7..27fc311f 100644 --- a/sidemantic/loaders.py +++ b/sidemantic/loaders.py @@ -12,7 +12,7 @@ from sidemantic.core.semantic_layer import SemanticLayer -def load_from_directory(layer: "SemanticLayer", directory: str | Path) -> None: +def load_from_directory(layer: "SemanticLayer", directory: str | Path, *, strict: bool = True) -> None: """Load all semantic layer definitions from a directory. Automatically detects and parses Cube, Hex, LookML, and other formats. @@ -21,6 +21,8 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path) -> None: Args: layer: SemanticLayer to add models to directory: Directory containing semantic layer files + strict: If True, fail on parse errors in detected semantic files. If + False, log parse errors and continue loading other files. Example: >>> layer = SemanticLayer() @@ -62,7 +64,7 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path) -> None: if not file_path.is_file(): continue - if _try_load_python_file(file_path, directory, all_models): + if _try_load_python_file(file_path, directory, all_models, strict=strict): continue # Detect format and parse @@ -107,7 +109,12 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path) -> None: elif suffix in (".yml", ".yaml"): # Try to detect which format by reading the file content = file_path.read_text() - yaml_data = _load_yaml_mapping(content) + try: + yaml_data = _load_yaml_mapping(content) + except Exception as e: + if _looks_like_semantic_yaml_text(content): + _handle_parse_error(file_path, e, strict=strict) + continue # Check for MetricFlow before Sidemantic native since # "semantic_models:" contains "models:" as a substring if _yaml_has_top_level_key(yaml_data, "semantic_models"): @@ -162,7 +169,7 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path) -> None: if adapter: try: - graph = adapter.parse(str(file_path)) + graph = _parse_adapter_without_auto_registration(adapter, file_path) # Track source format for each model adapter_name = adapter.__class__.__name__.replace("Adapter", "") for model in graph.models.values(): @@ -174,8 +181,9 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path) -> None: all_metrics.update(graph.metrics) all_parameters.update(graph.parameters) except Exception as e: - # Skip files that fail to parse - logging.warning("Could not parse %s: %s", file_path, e) + _handle_parse_error(file_path, e, strict=strict) + + _resolve_native_model_inheritance(all_models, strict=strict) # BSL files are parsed one at a time during auto-discovery. Finalize join # aliases after all files have been loaded so aliases can target models @@ -305,14 +313,32 @@ def _looks_like_python_semantic_definition(file_path: Path) -> bool: def _load_yaml_mapping(content: str) -> dict: - """Parse YAML content and return a mapping, or an empty mapping on failure.""" - try: - data = yaml.safe_load(content) - except Exception: - return {} + """Parse YAML content and return a mapping, or an empty mapping for scalar/list YAML.""" + data = yaml.safe_load(content) return data if isinstance(data, dict) else {} +def _looks_like_semantic_yaml_text(content: str) -> bool: + """Return True when malformed YAML text contains a known semantic-layer key.""" + semantic_keys = ( + "base_sql_table", + "cubes", + "datasets", + "dimensions", + "measures", + "metrics", + "models", + "semantic_model", + "semantic_models", + "table_name", + "tables", + "views", + "worksheet", + ) + prefixes = tuple(f"{key}:" for key in semantic_keys) + return any(line.lstrip().startswith(prefixes) for line in content.splitlines()) + + def _yaml_has_top_level_key(data: dict, key: str) -> bool: """Return True when a YAML mapping has an exact top-level key.""" return isinstance(data, dict) and key in data @@ -407,7 +433,90 @@ def collect(candidate: object) -> None: return extracted -def _try_load_python_file(file_path: Path, directory: Path, all_models: dict) -> bool: +def _handle_parse_error(file_path: Path, error: Exception, *, strict: bool) -> None: + if strict: + raise ValueError(f"Could not parse {file_path}: {error}") from error + logging.warning("Could not parse %s: %s", file_path, error) + + +def _parse_adapter_without_auto_registration(adapter, file_path: Path): + from sidemantic.core.registry import get_current_layer, set_current_layer + + previous_layer = get_current_layer() + set_current_layer(None) + try: + return adapter.parse(str(file_path)) + finally: + set_current_layer(previous_layer) + + +def _copy_model_source_attrs(source, target) -> None: + for attr in ("_source_format", "_source_file"): + if hasattr(source, attr): + setattr(target, attr, getattr(source, attr)) + + +def _resolve_native_model_inheritance(all_models: dict, *, strict: bool) -> None: + """Resolve Sidemantic-native model inheritance after directory-wide parsing.""" + native_children = { + name: model + for name, model in all_models.items() + if getattr(model, "_source_format", None) == "Sidemantic" and model.extends + } + if not native_children: + return + + from sidemantic.core.inheritance import merge_model + + resolved = {} + resolving = set() + + def fail(message: str): + if strict: + raise ValueError(message) + logging.warning(message) + return None + + def resolve(name: str): + if name in resolved: + return resolved[name] + + model = all_models.get(name) + if model is None: + return fail(f"Native model '{name}' not found") + + if name in resolving: + return fail(f"Circular native model inheritance detected for model '{name}'") + + if not model.extends: + resolved[name] = model + return model + + parent = all_models.get(model.extends) + if parent is None: + return fail(f"Native model '{name}' extends unknown model '{model.extends}'") + + resolving.add(name) + try: + if getattr(parent, "_source_format", None) == "Sidemantic" and parent.extends: + parent = resolve(model.extends) + finally: + resolving.remove(name) + + if parent is None: + return None + + merged = merge_model(model, parent) + _copy_model_source_attrs(model, merged) + resolved[name] = merged + all_models[name] = merged + return merged + + for name in native_children: + resolve(name) + + +def _try_load_python_file(file_path: Path, directory: Path, all_models: dict, *, strict: bool) -> bool: """Load semantic definitions from a Python file if it looks like Sidemantic code.""" if not _looks_like_python_semantic_definition(file_path): return False @@ -423,7 +532,7 @@ def _try_load_python_file(file_path: Path, directory: Path, all_models: dict) -> with captured_layer: namespace = runpy.run_path(str(file_path)) except Exception as e: - logging.warning("Could not parse %s: %s", file_path, e) + _handle_parse_error(file_path, e, strict=strict) return False finally: if sys.path and sys.path[0] == script_dir: diff --git a/sidemantic/validation_runner.py b/sidemantic/validation_runner.py new file mode 100644 index 00000000..27a54193 --- /dev/null +++ b/sidemantic/validation_runner.py @@ -0,0 +1,77 @@ +"""Noninteractive semantic layer validation.""" + +from dataclasses import dataclass, field +from pathlib import Path + +from sidemantic import SemanticLayer, load_from_directory +from sidemantic.validation import validate_metric, validate_model + + +@dataclass +class ValidationReport: + directory: Path + errors: list[str] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + info: list[str] = field(default_factory=list) + + @property + def passed(self) -> bool: + return not self.errors + + +def validate_directory(directory: str | Path) -> ValidationReport: + """Load and validate semantic layer definitions from a directory.""" + directory = Path(directory) + report = ValidationReport(directory=directory) + + layer = SemanticLayer() + load_from_directory(layer, str(directory)) + + if not layer.graph.models: + report.errors.append("No models found in directory") + return report + + report.info.append(f"Loaded {len(layer.graph.models)} models") + + for model_name, model in layer.graph.models.items(): + report.errors.extend(validate_model(model)) + + if not model.dimensions: + report.warnings.append(f"Model '{model_name}' has no dimensions") + if not model.metrics: + report.warnings.append(f"Model '{model_name}' has no metrics") + + for metric in model.metrics: + report.errors.extend(validate_metric(metric, layer.graph)) + + for rel in model.relationships: + if rel.name not in layer.graph.models: + report.errors.append(f"Model '{model_name}' has relationship to '{rel.name}' which doesn't exist") + + for metric in layer.graph.metrics.values(): + report.errors.extend(validate_metric(metric, layer.graph)) + + if len(layer.graph.models) > 1: + orphaned = [] + for model_name, model in layer.graph.models.items(): + has_outgoing = bool(model.relationships) + has_incoming = any( + any(rel.name == model_name for rel in other.relationships) + for other_name, other in layer.graph.models.items() + if other_name != model_name + ) + if not has_outgoing and not has_incoming: + orphaned.append(model_name) + + if orphaned: + report.warnings.append(f"Orphaned models (no relationships): {', '.join(orphaned)}") + + total_dims = sum(len(model.dimensions) for model in layer.graph.models.values()) + total_metrics = sum(len(model.metrics) for model in layer.graph.models.values()) + total_rels = sum(len(model.relationships) for model in layer.graph.models.values()) + + report.info.append(f"Total dimensions: {total_dims}") + report.info.append(f"Total metrics: {total_metrics}") + report.info.append(f"Total relationships: {total_rels}") + + return report diff --git a/sidemantic/workbench/validation_app.py b/sidemantic/workbench/validation_app.py index 0cb448c7..ba15cae7 100644 --- a/sidemantic/workbench/validation_app.py +++ b/sidemantic/workbench/validation_app.py @@ -7,7 +7,7 @@ from textual.containers import VerticalScroll from textual.widgets import Footer, Header, Static -from sidemantic import SemanticLayer, load_from_directory +from sidemantic.validation_runner import validate_directory class ValidationApp(App): @@ -68,74 +68,11 @@ def compose(self) -> ComposeResult: def on_mount(self) -> None: """Run validation.""" try: - layer = SemanticLayer() - load_from_directory(layer, str(self.directory)) - - if not layer.graph.models: - self.exit(message="No models found in directory") - return - - self.info.append(f"Loaded {len(layer.graph.models)} models") - - # Validate each model - for model_name, model in layer.graph.models.items(): - # Check primary key - if not model.primary_key: - self.warnings.append(f"Model '{model_name}' has no primary key defined") - - # Check for dimensions - if not model.dimensions: - self.warnings.append(f"Model '{model_name}' has no dimensions") - - # Check for metrics - if not model.metrics: - self.warnings.append(f"Model '{model_name}' has no metrics") - - # Validate relationships - for rel in model.relationships: - if rel.name not in layer.graph.models: - self.errors.append(f"Model '{model_name}' has relationship to '{rel.name}' which doesn't exist") - - # Check for duplicate dimension names - dim_names = [d.name for d in model.dimensions] - duplicates = [name for name in set(dim_names) if dim_names.count(name) > 1] - if duplicates: - self.errors.append(f"Model '{model_name}' has duplicate dimensions: {', '.join(duplicates)}") - - # Check for duplicate metric names - metric_names = [m.name for m in model.metrics] - duplicates = [name for name in set(metric_names) if metric_names.count(name) > 1] - if duplicates: - self.errors.append(f"Model '{model_name}' has duplicate metrics: {', '.join(duplicates)}") - - # Check for orphaned models - if len(layer.graph.models) > 1: - orphaned = [] - for model_name, model in layer.graph.models.items(): - has_outgoing = len(model.relationships) > 0 - has_incoming = any( - any(r.name == model_name for r in m.relationships) - for name, m in layer.graph.models.items() - if name != model_name - ) - if not has_outgoing and not has_incoming: - orphaned.append(model_name) - - if orphaned: - self.warnings.append(f"Orphaned models (no relationships): {', '.join(orphaned)}") - - # Add summary stats - total_dims = sum(len(m.dimensions) for m in layer.graph.models.values()) - total_metrics = sum(len(m.metrics) for m in layer.graph.models.values()) - total_rels = sum(len(m.relationships) for m in layer.graph.models.values()) - - self.info.append(f"Total dimensions: {total_dims}") - self.info.append(f"Total metrics: {total_metrics}") - self.info.append(f"Total relationships: {total_rels}") - - # Display results + report = validate_directory(self.directory) + self.errors = report.errors + self.warnings = report.warnings + self.info = report.info self._update_display() - except Exception as e: self.exit(message=f"Error during validation: {e}") diff --git a/tests/core/test_rust_parity.py b/tests/core/test_rust_parity.py index c561fdde..a95a5cc7 100644 --- a/tests/core/test_rust_parity.py +++ b/tests/core/test_rust_parity.py @@ -39,6 +39,15 @@ def test_require_rust_subsystem_passes_for_rust_backed_target(monkeypatch, tmp_p rust_parity.require_rust_subsystem("sql_generator_entrypoint", "compile") +def test_repo_matrix_declares_rust_backed_strict_subsystems(): + matrix = rust_parity._load_parity_matrix() + subsystems = matrix["subsystems"] + + assert subsystems["sql_generator_entrypoint"]["status"] == "rust_backed" + assert subsystems["semantic_core_query_validation"]["status"] == "rust_backed" + assert subsystems["semantic_sql_rewriter"]["status"] == "rust_backed_opt_in" + + def test_require_rust_subsystem_ignores_non_strict_targets(monkeypatch, tmp_path): _write_matrix( tmp_path, diff --git a/tests/test_charts.py b/tests/test_charts.py index 4abe2d9f..8c4879b6 100644 --- a/tests/test_charts.py +++ b/tests/test_charts.py @@ -39,5 +39,18 @@ def test_color_palette(): pytest.skip("altair not installed") +def test_missing_chart_dependencies_hint_names_charts_extra(monkeypatch): + from sidemantic import charts + + monkeypatch.setattr(charts, "alt", None) + monkeypatch.setattr(charts, "vl_convert", None) + + with pytest.raises(ImportError) as exc_info: + charts.check_altair_available() + + assert "sidemantic[charts]" in str(exc_info.value) + assert "optional serve" not in str(exc_info.value) + + if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/tests/test_cli_commands.py b/tests/test_cli_commands.py index 20afc4e4..3fae5e8b 100644 --- a/tests/test_cli_commands.py +++ b/tests/test_cli_commands.py @@ -1,7 +1,9 @@ """Tests for CLI command wiring.""" +import builtins import json import os +import sys from pathlib import Path import duckdb @@ -76,6 +78,23 @@ def test_info_prints_model_summary(tmp_path): assert "Metrics: 1" in result.stdout +def test_info_fails_on_detected_parse_error(tmp_path): + _write_min_model(tmp_path) + (tmp_path / "bad.yml").write_text( + """ +models: + - name: broken + table: [ +""" + ) + + result = runner.invoke(app, ["info", str(tmp_path)]) + + assert result.exit_code == 1 + assert "Could not parse" in result.output + assert "bad.yml" in result.output + + def test_query_dry_run_emits_sql(tmp_path): _write_min_model(tmp_path) @@ -336,22 +355,55 @@ def fake_run_workbench(directory, demo_mode=False, connection=None): assert "uvx --from 'sidemantic[workbench]' sidemantic workbench --demo" in result.output -def test_validate_calls_runner(monkeypatch, tmp_path): - pytest.importorskip("textual") - called = {} +def test_validate_python_runs_without_workbench_extra(monkeypatch, tmp_path): + for module_name in list(sys.modules): + if module_name == "sidemantic.workbench" or module_name.startswith("sidemantic.workbench."): + monkeypatch.delitem(sys.modules, module_name, raising=False) - def fake_run_validation(directory, verbose=False): - called["directory"] = directory - called["verbose"] = verbose + real_import = builtins.__import__ - monkeypatch.setattr("sidemantic.workbench.run_validation", fake_run_validation) + def blocked_workbench_import(name, *args, **kwargs): + if name == "sidemantic.workbench" or name.startswith("sidemantic.workbench."): + raise ImportError("simulated missing workbench extra") + return real_import(name, *args, **kwargs) + + monkeypatch.setattr(builtins, "__import__", blocked_workbench_import) _write_min_model(tmp_path) - result = runner.invoke(app, ["validate", str(tmp_path), "--verbose"]) + result = runner.invoke(app, ["validate", str(tmp_path), "--engine", "python", "--verbose"]) assert result.exit_code == 0 - assert called["directory"] == tmp_path - assert called["verbose"] is True + assert "Validation Results:" in result.output + assert "Loaded 1 models" in result.output + assert "Validation Passed" in result.output + + +def test_validate_python_fails_on_validation_errors(tmp_path): + (tmp_path / "models.yml").write_text( + """ +models: + - name: orders + table: orders + primary_key: id + dimensions: + - name: status + sql: status + type: categorical + metrics: + - name: order_count + agg: count + relationships: + - name: customers + type: many_to_one + foreign_key: customer_id +""" + ) + + result = runner.invoke(app, ["validate", str(tmp_path), "--engine", "python"]) + + assert result.exit_code == 1 + assert "relationship to 'customers' which doesn't exist" in result.output + assert "Validation Failed" in result.output def test_validate_engine_rust_uses_rust_loader(monkeypatch, tmp_path): @@ -414,6 +466,28 @@ def fake_start_server(layer, host, port, username, password): assert called["password"] == "p" +def test_serve_missing_extra_prints_install_hint(monkeypatch, tmp_path): + for module_name in list(sys.modules): + if module_name == "sidemantic.server.server" or module_name.startswith("sidemantic.server."): + monkeypatch.delitem(sys.modules, module_name, raising=False) + + real_import = builtins.__import__ + + def blocked_server_import(name, *args, **kwargs): + if name == "sidemantic.server.server": + raise ImportError("simulated missing serve extra") + return real_import(name, *args, **kwargs) + + monkeypatch.setattr(builtins, "__import__", blocked_server_import) + + _write_min_model(tmp_path) + result = runner.invoke(app, ["serve", str(tmp_path)]) + + assert result.exit_code == 1 + assert "requires the optional serve dependencies" in result.output + assert "sidemantic[serve]" in result.output + + def test_serve_rejects_partial_auth(monkeypatch, tmp_path): ensure_fake_riffq() diff --git a/uv.lock b/uv.lock index 15615186..122a52b7 100644 --- a/uv.lock +++ b/uv.lock @@ -3356,7 +3356,9 @@ api = [ { name = "uvicorn" }, ] apps = [ + { name = "altair" }, { name = "mcp", extra = ["cli"] }, + { name = "vl-convert-python" }, ] bigquery = [ { name = "google-cloud-bigquery" }, @@ -3475,6 +3477,7 @@ dev = [ [package.metadata] requires-dist = [ { name = "adbc-driver-manager", marker = "extra == 'adbc'", specifier = ">=1.0.0" }, + { name = "altair", marker = "extra == 'apps'", specifier = ">=5.0.0" }, { name = "altair", marker = "extra == 'charts'", specifier = ">=5.0.0" }, { name = "antlr4-python3-runtime", specifier = ">=4.13.2" }, { name = "antlr4-python3-runtime", marker = "extra == 'dev'", specifier = ">=4.13.2" }, @@ -3531,6 +3534,7 @@ requires-dist = [ { name = "typer", specifier = ">=0.9.0" }, { name = "uvicorn", marker = "extra == 'api'", specifier = ">=0.34.0" }, { name = "uvicorn", marker = "extra == 'dev'", specifier = ">=0.34.0" }, + { name = "vl-convert-python", marker = "extra == 'apps'", specifier = ">=1.0.0" }, { name = "vl-convert-python", marker = "extra == 'charts'", specifier = ">=1.0.0" }, ] provides-extras = ["dev", "workbench", "mcp", "apps", "charts", "serve", "api", "postgres", "bigquery", "snowflake", "clickhouse", "databricks", "spark", "adbc", "lsp", "lookml", "malloy", "metricflow", "widget", "all-databases", "full"] From 4c25d80cefa926d59abb5feee5c20e9f16b9aa48 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 31 May 2026 15:13:52 -0700 Subject: [PATCH 2/3] Suppress auto registration during native inheritance --- sidemantic/loaders.py | 8 ++++++-- tests/test_loaders.py | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/sidemantic/loaders.py b/sidemantic/loaders.py index 27fc311f..23143d31 100644 --- a/sidemantic/loaders.py +++ b/sidemantic/loaders.py @@ -440,12 +440,16 @@ def _handle_parse_error(file_path: Path, error: Exception, *, strict: bool) -> N def _parse_adapter_without_auto_registration(adapter, file_path: Path): + return _run_without_auto_registration(adapter.parse, str(file_path)) + + +def _run_without_auto_registration(callback, *args): from sidemantic.core.registry import get_current_layer, set_current_layer previous_layer = get_current_layer() set_current_layer(None) try: - return adapter.parse(str(file_path)) + return callback(*args) finally: set_current_layer(previous_layer) @@ -506,7 +510,7 @@ def resolve(name: str): if parent is None: return None - merged = merge_model(model, parent) + merged = _run_without_auto_registration(merge_model, model, parent) _copy_model_source_attrs(model, merged) resolved[name] = merged all_models[name] = merged diff --git a/tests/test_loaders.py b/tests/test_loaders.py index fa13f9f8..491ea407 100644 --- a/tests/test_loaders.py +++ b/tests/test_loaders.py @@ -31,3 +31,30 @@ def blocked_antlr4_import(name, *args, **kwargs): load_from_directory(layer, tmp_path) assert "orders" in layer.graph.models + + +def test_native_inheritance_does_not_register_model_metrics_globally(tmp_path): + (tmp_path / "models.yml").write_text( + """ +models: + - name: base_orders + table: orders + primary_key: order_id + metrics: + - name: margin_label + type: derived + sql: "'margin'" + + - name: orders + extends: base_orders + table: orders + primary_key: order_id +""" + ) + + layer = SemanticLayer(auto_register=True) + load_from_directory(layer, tmp_path) + + assert "orders" in layer.graph.models + assert layer.graph.models["orders"].get_metric("margin_label") is not None + assert "margin_label" not in layer.graph.metrics From 2b9c7897f6fb593d43decb5d0a2b46a34020e304 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 31 May 2026 15:40:43 -0700 Subject: [PATCH 3/3] Apply strict loading to Graphene projects --- sidemantic/loaders.py | 6 ++-- tests/adapters/graphene/test_parsing.py | 43 +++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/sidemantic/loaders.py b/sidemantic/loaders.py index 23143d31..eddbbd9c 100644 --- a/sidemantic/loaders.py +++ b/sidemantic/loaders.py @@ -57,7 +57,7 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path, *, strict if _try_load_sml(layer, directory, all_models): return - _load_graphene_project(directory, all_models, all_metrics, all_parameters) + _load_graphene_project(directory, all_models, all_metrics, all_parameters, strict=strict) # Find and parse all files for file_path in directory.rglob("*"): @@ -216,6 +216,8 @@ def _load_graphene_project( all_models: dict, all_metrics: dict, all_parameters: dict, + *, + strict: bool, ) -> None: """Parse Graphene `.gsql` files together so project-level links resolve.""" from sidemantic.adapters.graphene import GrapheneAdapter @@ -227,7 +229,7 @@ def _load_graphene_project( try: graph = adapter.parse(str(directory)) except Exception as e: - logging.warning("Could not parse Graphene project %s: %s", directory, e) + _handle_parse_error(directory, e, strict=strict) return adapter_name = adapter.__class__.__name__.replace("Adapter", "") diff --git a/tests/adapters/graphene/test_parsing.py b/tests/adapters/graphene/test_parsing.py index 3fa4f0d9..95afd031 100644 --- a/tests/adapters/graphene/test_parsing.py +++ b/tests/adapters/graphene/test_parsing.py @@ -1,3 +1,5 @@ +import pytest + from sidemantic import SemanticLayer, load_from_directory from sidemantic.adapters.graphene import GrapheneAdapter from tests.utils import df_rows @@ -680,6 +682,47 @@ def test_load_from_directory_detects_graphene_gsql(tmp_path): assert layer.graph.models["orders"].get_metric("revenue") is not None +def test_load_from_directory_strict_raises_on_graphene_parse_error(tmp_path): + (tmp_path / "broken.gsql").write_text( + """ +table broken ( + id INT64 +""" + ) + (tmp_path / "orders.yml").write_text( + """ +models: + - name: orders + table: orders +""" + ) + + layer = SemanticLayer() + with pytest.raises(ValueError, match="Could not parse .*Unclosed table body"): + load_from_directory(layer, tmp_path) + + +def test_load_from_directory_lenient_skips_graphene_parse_error(tmp_path): + (tmp_path / "broken.gsql").write_text( + """ +table broken ( + id INT64 +""" + ) + (tmp_path / "orders.yml").write_text( + """ +models: + - name: orders + table: orders +""" + ) + + layer = SemanticLayer() + load_from_directory(layer, tmp_path, strict=False) + + assert set(layer.graph.models) == {"orders"} + + def test_load_from_directory_accepts_graphene_percentile_aggregate(tmp_path): (tmp_path / "events.gsql").write_text( """