hotdata-dev
diff --git a/‎Cargo.toml‎
Lines changed: 2 additions & 0 deletions b/‎Cargo.toml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎scripts/release.sh‎
Lines changed: 43 additions & 11 deletions b/‎scripts/release.sh‎
Lines changed: 43 additions & 11 deletions
diff --git a/‎skills/hotdata-analytics/SKILL.md‎
Lines changed: 124 additions & 0 deletions b/‎skills/hotdata-analytics/SKILL.md‎
Lines changed: 124 additions & 0 deletions
diff --git a/‎skills/hotdata-analytics/references/WORKFLOWS.md‎
Lines changed: 116 additions & 0 deletions b/‎skills/hotdata-analytics/references/WORKFLOWS.md‎
Lines changed: 116 additions & 0 deletions
diff --git a/‎skills/hotdata-geospatial/SKILL.md‎
Lines changed: 2 additions & 0 deletions b/‎skills/hotdata-geospatial/SKILL.md‎
Lines changed: 2 additions & 0 deletions
@@ -60,6 +60,8 @@ pre-release-hook = ["git-cliff", "-o", "CHANGELOG.md", "--tag", "v{{version}}" ]
 publish = false
 pre-release-replacements = [
   { file = "skills/hotdata/SKILL.md", search = "^version: .+", replace = "version: {{version}}", exactly = 1 },
+  { file = "skills/hotdata-search/SKILL.md", search = "^version: .+", replace = "version: {{version}}", exactly = 1 },
+  { file = "skills/hotdata-analytics/SKILL.md", search = "^version: .+", replace = "version: {{version}}", exactly = 1 },
   { file = "skills/hotdata-geospatial/SKILL.md", search = "^version: .+", replace = "version: {{version}}", exactly = 1 },
   { file = "README.md", search = "version-[0-9.]+-blue", replace = "version-{{version}}-blue", exactly = 1 },
 ]
 
@@ -2,8 +2,8 @@
 # release.sh — two-phase release wrapper around cargo-release
 #
 # Usage:
-#   scripts/release.sh prepare <version>   # steps 0-2: branch, bump, push PR
-#   scripts/release.sh finish              # step 4: tag, publish, trigger dist
+#   scripts/release.sh prepare <version>   # branch, bump, changelog PR
+#   scripts/release.sh finish              # tag only (main is branch-protected)
 
 set -euo pipefail
 
@@ -13,7 +13,7 @@ VERSION="${2:-}"
 usage() {
     echo "Usage:"
     echo "  scripts/release.sh prepare <version>   # create release branch and open PR"
-    echo "  scripts/release.sh finish               # tag and publish from main"
+    echo "  scripts/release.sh finish               # push v<version> tag from main (no main push)"
     exit 1
 }
 
@@ -24,6 +24,16 @@ require_clean_tree() {
     fi
 }
 
+read_crate_version() {
+    local ver
+    ver="$(grep -E '^version = ' Cargo.toml | head -1 | sed -E 's/^version = "([^"]+)".*/\1/')"
+    if [ -z "$ver" ]; then
+        echo "error: could not read version from Cargo.toml" >&2
+        exit 1
+    fi
+    printf '%s' "$ver"
+}
+
 case "$COMMAND" in
     prepare)
         if [ -z "$VERSION" ]; then
@@ -35,17 +45,21 @@ case "$COMMAND" in
 
         require_clean_tree
 
-        # step 0: create release branch
         echo "→ Creating branch $BRANCH"
         git checkout -b "$BRANCH"
 
-        # step 2: bump versions, commit, push branch
         echo ""
         echo "→ Running cargo release (no publish, no tag)..."
-        # git-cliff (pre-release hook) is often installed via cargo install
         export PATH="${HOME}/.cargo/bin:${PATH}"
         cargo release --no-publish --no-tag --no-confirm --allow-branch="$BRANCH" --execute "$VERSION"
 
+        if [ -f scripts/validate-changelog.py ]; then
+            echo ""
+            echo "→ Validating CHANGELOG.md against origin/main..."
+            git fetch origin main 2>/dev/null || true
+            python3 scripts/validate-changelog.py origin/main
+        fi
+
         echo ""
         echo "→ Opening pull request..."
         PR_URL=$(gh pr create \
@@ -77,15 +91,33 @@ case "$COMMAND" in
         fi
 
         echo "→ Pulling latest main..."
-        git pull
+        git pull origin main
+
+        VERSION="$(read_crate_version)"
+        TAG="v${VERSION}"
 
         echo ""
-        echo "→ Running cargo release (tagging release)..."
-        export PATH="${HOME}/.cargo/bin:${PATH}"
-        cargo release --no-confirm --execute
+        echo "→ Release version from Cargo.toml: $VERSION (tag $TAG)"
+
+        if git rev-parse "$TAG" >/dev/null 2>&1; then
+            echo "error: tag $TAG already exists locally. Delete it or pick a new version." >&2
+            exit 1
+        fi
+
+        if git ls-remote --exit-code --tags origin "refs/tags/${TAG}" >/dev/null 2>&1; then
+            echo "error: tag $TAG already exists on origin." >&2
+            exit 1
+        fi
+
+        echo "→ Creating annotated tag $TAG (no commit to main)..."
+        git tag -a "$TAG" -m "Release hotdata-cli version $VERSION"
+
+        echo "→ Pushing tag to origin..."
+        git push origin "$TAG"
 
         echo ""
-        echo "✓ Release complete. Tag pushed and dist workflow triggered."
+        echo "✓ Tag $TAG pushed. Dist/release workflow should run on GitHub."
+        echo "  (main was not pushed — version bump must already be merged via release PR.)"
         ;;
 
     *)
 
@@ -0,0 +1,124 @@
+---
+name: hotdata-analytics
+description: Use this skill when the user wants OLAP-style SQL analytics in Hotdata — aggregations, GROUP BY, JOINs, reporting, exploratory queries, query run history, stored results, or materialized follow-up tables (Chain via datasets or managed databases). Activate for "analyze", "aggregate", "rollup", "pivot", "report", "metrics", "GROUP BY", "query history", "past queries", "query runs", "stored results", "materialize", "chain", "intermediate table", or sorted indexes for filters/range scans. Do not load for BM25/vector search or geospatial SQL — use hotdata-search or hotdata-geospatial. Requires the core hotdata skill for connections, tables, datasets, and auth.
+version: 0.2.3
+---
+
+# Hotdata Analytics Skill
+
+**OLAP-style analytics** in Hotdata: PostgreSQL-dialect SQL, query execution, run history, stored results, **Chain** materializations, and **sorted** indexes for filters and joins.
+
+**Prerequisites:** Authenticate, workspace, and catalog discovery via the **`hotdata`** skill (`connections`, `tables`, `datasets`, `databases`).
+
+**Related skills:** **`hotdata-search`** (BM25, vector, retrieval indexes), **`hotdata-geospatial`** (spatial SQL).
+
+---
+
+## Execute SQL
+
+```bash
+hotdata query "<sql>" [--workspace-id <workspace_id>] [--connection <connection_id>] [--output table|json|csv]
+hotdata query status <query_run_id> [--output table|json|csv]
+```
+
+- **PostgreSQL dialect.** Quote mixed-case identifiers: `"CustomerName"`.
+- Use **`hotdata tables list`** for schema discovery — not `information_schema` via `query`.
+- Fully qualified names: `<connection>.<schema>.<table>`, `datasets.<schema>.<table>`, `<database>.<schema>.<table>`.
+- Long-running queries may return `query_run_id` → poll with **`query status`** (exit `2` = still running). Do not re-run identical heavy SQL while polling.
+- For **workspace-wide** joins and naming, load **context:DATAMODEL** when listed (`hotdata context list` → `show DATAMODEL`) — see **`hotdata`** skill.
+
+### OLAP patterns
+
+Typical analytics SQL (all via `hotdata query`):
+
+- **Aggregations:** `COUNT`, `SUM`, `AVG`, `MIN`, `MAX` with `GROUP BY`
+- **Joins:** `INNER` / `LEFT JOIN` across `<connection>.<schema>.<table>` names
+- **Filtering:** `WHERE` on partition-friendly columns (consider **sorted** indexes below)
+- **Ordering:** `ORDER BY` on metrics or dimensions
+- **Bounded exploration:** always `LIMIT` while iterating; widen once validated
+
+Column names from CSV uploads may be case-sensitive — use double quotes when not all-lowercase.
+
+---
+
+## Query run history
+
+Uses the **active workspace only** (no `--workspace-id`; set with `hotdata workspaces set`).
+
+```bash
+hotdata queries list [--limit <int>] [--cursor <token>] [--status <csv>] [--output table|json|yaml]
+hotdata queries <query_run_id> [--output table|json|yaml]
+```
+
+- `list` — status, duration, row count, SQL preview (default limit 20). Filter: `--status running,failed`.
+- `<query_run_id>` — full metadata, formatted SQL, `result_id` when present.
+- Use history to find recurring `WHERE` / `JOIN` / `GROUP BY` patterns before adding indexes (search skill) or chains.
+
+---
+
+## Stored results
+
+```bash
+hotdata results list [--workspace-id <workspace_id>] [--limit <int>] [--offset <int>] [--output table|json|yaml]
+hotdata results <result_id> [--workspace-id <workspace_id>] [--output table|json|csv]
+```
+
+- Prefer **`results <id>`** over re-running identical heavy queries.
+- Query footers may include `[result-id: rslt...]`; also available from `queries <query_run_id>`.
+
+---
+
+## Chain (materialized follow-ups)
+
+**Pattern:** run SQL → materialize a smaller table → query the materialized name.
+
+1. **Base query**
+
+   ```bash
+   hotdata query "SELECT ..."
+   hotdata query status <query_run_id>   # if async
+   ```
+
+2. **Materialize** (pick one)
+
+   ```bash
+   hotdata datasets create --label "chain slice" --sql "SELECT ..." [--table-name chain_slice]
+   hotdata datasets create --label "from saved" --query-id <query_id> [--table-name ...]
+   ```
+
+   Or managed parquet:
+
+   ```bash
+   hotdata databases create --name analytics --table slice
+   hotdata databases tables load analytics slice --file ./slice.parquet
+   ```
+
+3. **Chain query** — use printed **`full_name`** or `datasets list` **FULL NAME** column:
+
+   ```bash
+   hotdata query "SELECT * FROM datasets.main.chain_slice WHERE ..."
+   hotdata query "SELECT * FROM analytics.public.slice WHERE ..."
+   ```
+
+Document stable chains in **context:DATAMODEL → Derived tables (Chain)**.
+
+Full procedure: [references/WORKFLOWS.md](references/WORKFLOWS.md).
+
+---
+
+## Sorted indexes (filters and range scans)
+
+For equality, range, and sort-heavy OLAP — not full-text or vector (see **`hotdata-search`**):
+
+```bash
+hotdata indexes create --connection-id <id> --schema <schema> --table <table> \
+  --name idx_orders_created --columns created_at --type sorted [--async]
+```
+
+List and delete use the same `hotdata indexes` commands as in the search skill; only **`--type sorted`** is the analytics focus here.
+
+---
+
+## Sandboxes and chains
+
+Sandbox datasets use **`datasets.<sandbox_id>.<table>`**, not `datasets.main`. Run queries with active sandbox config or `hotdata sandbox <id> run hotdata query "..."`. See **`hotdata`** skill **Sandboxes**.
@@ -0,0 +1,116 @@
+# Analytics workflows
+
+OLAP-style SQL, **History** (query runs and stored results), and **Chain** (materialized follow-ups). Requires **`hotdata`** for auth, workspaces, and catalog commands.
+
+**Related:** **`hotdata-search`** for BM25/vector indexes and `hotdata search`; **`hotdata`** [WORKFLOWS.md](../../hotdata/references/WORKFLOWS.md) for datasets vs managed databases.
+
+---
+
+## History
+
+**Goal:** Find prior work: query runs (execution history) and stored result rows.
+
+### Query runs
+
+Uses the **active workspace only** — no `--workspace-id` on `queries`. Set default workspace with `hotdata workspaces set` first.
+
+```bash
+hotdata queries list [--limit N] [--cursor <token>] [--status <csv>]
+hotdata queries <query_run_id>
+```
+
+- `list` — status, creation time, duration, row count, truncated SQL preview (default limit 20).
+- `--status` — filter comma-separated values, e.g. `--status running,failed`.
+- `<query_run_id>` — full metadata (timings, `result_id`, snapshot, hashes) and formatted SQL.
+- If a run has a `result_id`, fetch rows with `hotdata results <result_id>` below.
+
+Use history to spot recurring `WHERE`, `JOIN`, `GROUP BY`, or search-style SQL before adding indexes (**`hotdata-search`**) or new Chain tables.
+
+### Stored results
+
+```bash
+hotdata results list [--workspace-id <workspace_id>] [--limit N] [--offset N]
+hotdata results <result_id> [--workspace-id <workspace_id>] [--output table|json|csv]
+```
+
+- Query footers may include `[result-id: rslt...]` — record it for later.
+- Pick up `result_id` from `queries <query_run_id>` when present.
+- **Prefer `hotdata results <result_id>` over re-running identical heavy SQL.** Re-runs waste resources and may return different data.
+
+Results are paginated; the CLI hints the next `--offset` when more rows exist.
+
+---
+
+## Chain
+
+**Goal:** Follow-up analysis on a **bounded** intermediate without rescanning huge base tables.
+
+**Pattern:** run SQL → materialize → query the materialized **qualified name**.
+
+### 1. Base query
+
+```bash
+hotdata query "SELECT ..."
+```
+
+- Quote mixed-case columns with double quotes (PostgreSQL dialect).
+- If the CLI returns a `query_run_id`, poll instead of re-running:
+
+  ```bash
+  hotdata query status <query_run_id>
+  ```
+
+  Exit codes: `0` succeeded, `1` failed, `2` still running.
+
+### 2. Materialize
+
+Land a smaller table — pick one:
+
+**Datasets** (CSV/JSON/URL/SQL snapshot → `datasets.<schema>.<table>`):
+
+```bash
+hotdata datasets create --label "chain revenue slice" --sql "SELECT ..." [--table-name chain_revenue_slice]
+hotdata datasets create --label "from saved" --query-id <query_id> [--table-name ...]
+```
+
+**Managed database** (parquet → `<database>.<schema>.<table>`):
+
+```bash
+hotdata databases create --name chain_db --table revenue_slice
+hotdata databases tables load chain_db revenue_slice --file ./revenue_slice.parquet
+```
+
+Note the printed **`full_name`** (e.g. `datasets.main.chain_revenue_slice` or `chain_db.public.revenue_slice`). For datasets, **`FULL NAME`** from `datasets list` is authoritative.
+
+### 3. Chain query
+
+Query using that name — do not hardcode `datasets.main` if the schema segment is a sandbox id:
+
+```bash
+hotdata datasets list
+hotdata query "SELECT * FROM datasets.main.chain_revenue_slice WHERE ..."
+# Sandbox example (use actual full_name from create or list):
+# hotdata query "SELECT * FROM datasets.s_ufmblmvq.chain_revenue_slice WHERE ..."
+# Managed database:
+# hotdata query "SELECT * FROM chain_db.public.revenue_slice WHERE ..."
+```
+
+### Sandbox context
+
+For **sandbox-scoped** chain tables:
+
+- Qualified name is **`datasets.<sandbox_id>.<table>`**, not `datasets.main`.
+- Run queries with **active sandbox** in config (`hotdata sandbox set`) **or** inside **`hotdata sandbox <sandbox_id> run hotdata query "…"`**.
+- Without sandbox context, you may get **access denied** on sandbox-only tables.
+
+### Naming and documentation
+
+- Prefer predictable `--table-name` values: `chain_<topic>_<YYYYMMDD>`.
+- Record long-lived chains in **context:DATAMODEL → Derived tables (Chain)** with the **full** SQL name you use (`datasets.…` or `database.schema.table`).
+- Promote join/grain findings to **context:DATAMODEL** when they should outlive the sandbox (**`hotdata`** skill).
+
+### Guardrails
+
+- Materialize when the base scan is large and the follow-up runs many times.
+- Keep Chain tables focused; avoid wide `SELECT *` materializations when a narrow projection suffices.
+- For upload format choice (datasets vs databases), see **`hotdata`** WORKFLOWS — [Datasets vs managed databases](../../hotdata/references/WORKFLOWS.md#datasets-vs-managed-databases).
@@ -8,6 +8,8 @@ version: 0.2.3
 
 Use this skill when working with geospatial data in Hotdata. Hotdata supports a subset of PostGIS-style functions using **PostgreSQL dialect SQL**. This reference is dataset-agnostic — apply it to any table with geometry columns.
 
+**Related skills:** **`hotdata`** (core CLI), **`hotdata-search`** (BM25/vector), **`hotdata-analytics`** (OLAP SQL).
+
 ---
 
 ## Geometry Columns
Original file line number	Diff line number	Diff line change
`@@ -60,6 +60,8 @@ pre-release-hook = ["git-cliff", "-o", "CHANGELOG.md", "--tag", "v{{version}}" ]`
`60`	`60`	`publish = false`
`61`	`61`	`pre-release-replacements = [`
`62`	`62`	`{ file = "skills/hotdata/SKILL.md", search = "^version: .+", replace = "version: {{version}}", exactly = 1 },`
	`63`	`+ { file = "skills/hotdata-search/SKILL.md", search = "^version: .+", replace = "version: {{version}}", exactly = 1 },`
	`64`	`+ { file = "skills/hotdata-analytics/SKILL.md", search = "^version: .+", replace = "version: {{version}}", exactly = 1 },`
`63`	`65`	`{ file = "skills/hotdata-geospatial/SKILL.md", search = "^version: .+", replace = "version: {{version}}", exactly = 1 },`
`64`	`66`	`{ file = "README.md", search = "version-[0-9.]+-blue", replace = "version-{{version}}-blue", exactly = 1 },`
`65`	`67`	`]`