From 5f16fa208de15638e04d3212d1244286a50a21ce Mon Sep 17 00:00:00 2001 From: Anand Pant Date: Tue, 21 Apr 2026 03:58:26 -0500 Subject: [PATCH 1/2] docs: genericize getting started and fix apache license --- Cargo.toml | 2 +- README.md | 45 +++++++++++++++++++++++++++++++++++++-------- sources/README.md | 6 +++--- 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 68c1a19..cd4c197 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ resolver = "2" [workspace.package] edition = "2021" -license = "MIT" +license = "Apache-2.0" publish = false version = "0.0.3" diff --git a/README.md b/README.md index 61311f1..4e63e79 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Recurring Convex export pipelines for local analytics, Databricks, and downstrea [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/shpitdev/convex-sync-kit) [![Release](https://img.shields.io/github/v/release/shpitdev/convex-sync-kit?display_name=tag)](https://github.com/shpitdev/convex-sync-kit/releases) -[![License: MIT](https://img.shields.io/badge/license-MIT-2ea44f)](LICENSE) +[![License: Apache-2.0](https://img.shields.io/github/license/shpitdev/convex-sync-kit)](LICENSE) ![Rust](https://img.shields.io/badge/Rust-000000?logo=rust&logoColor=white) ![Convex](https://img.shields.io/badge/Convex-EE342F?logo=convex&logoColor=white) @@ -12,6 +12,25 @@ Recurring Convex export pipelines for local analytics, Databricks, and downstrea ![Databricks](https://img.shields.io/badge/Databricks-FF3621?logo=databricks&logoColor=white) ![Palantir Foundry](https://img.shields.io/badge/Palantir%20Foundry-virtual%20tables-101828) +## Required Inputs + +These are the minimum inputs almost everyone needs before a recurring sync will work: + +```bash +export CONVEX_DEPLOYMENT_URL=https://your-deployment.convex.cloud +export CONVEX_DEPLOY_KEY=your-convex-deploy-key +``` + +Target-specific requirements: + +| Target | Also required | +|---|---| +| Local recurring analysis | writable output paths | +| S3/export | AWS credentials, `--bucket`, optional `--prefix` | +| Databricks Delta | Databricks profile plus a SQL warehouse ID for bootstrap | +| Databricks over S3 | Databricks profile, SQL warehouse ID, and Unity Catalog external-location coverage | +| Palantir Foundry | either Databricks/Unity Catalog or an S3 path to connect Foundry to | + ## Choose Your Path ```mermaid @@ -34,6 +53,8 @@ flowchart TD If you only need a one-time export or ad hoc backfill, use the official Convex tooling directly. This repo is aimed at recurring pipelines, not the simplest possible one-shot export. +See: + - [Convex streaming import/export](https://docs.convex.dev/production/integrations/streaming-import-export) - [Convex streaming export API](https://docs.convex.dev/streaming-export-api) @@ -68,21 +89,29 @@ There are two supported Databricks paths: Recommended Databricks Delta flow: ```bash -export CONVEX_SYNC_SOURCE=meshix-api +export CONVEX_SYNC_SOURCE= -just databricks-delta-bootstrap 63d28889f3eb3c4b +just databricks-delta-bootstrap just databricks-delta-sync-secret DEFAULT just databricks-delta-deploy DEFAULT prod just databricks-delta-run DEFAULT prod ``` +The Delta path creates and updates: + +- `convex_sync_kit__delta_control` +- `convex_sync_kit__delta_bronze` +- `convex_sync_kit__delta_silver` + +The silver schema is expected to stay empty until you stand up a Lakeflow `AUTO CDC` pipeline for the tables you actually want to materialize there. + Reference Databricks over S3 flow: ```bash -export CONVEX_SYNC_SOURCE=meshix-api +export CONVEX_SYNC_SOURCE= -just run --bucket your-bucket --prefix prod -just databricks-sync-staging-views --warehouse-id 63d28889f3eb3c4b --bucket your-bucket --prefix prod +just run --bucket --prefix prod +just databricks-sync-staging-views --warehouse-id --bucket --prefix prod ``` ### 4. Using Palantir Foundry @@ -118,7 +147,7 @@ Relevant Foundry docs: | Databricks over S3 | Unity Catalog views over published parquet snapshots | `convex_sync_kit__s3` | | Databricks Delta | checkpoint table, bronze CDC tables, silver current-state tables | `convex_sync_kit__delta_{control,bronze,silver}` | -The current checked-in source profile is [`sources/meshix-api/env.sh`](sources/meshix-api/env.sh). That is only one source profile, not a repo identity. Add more source directories as you onboard more Convex projects. +The checked-in [`sources/meshix-api/env.sh`](sources/meshix-api/env.sh) file is only an example source profile, not a repo identity. Add more source directories as you onboard more Convex projects. ## Output Paths And Defaults @@ -191,4 +220,4 @@ There is a more detailed capture list in [docs/demo-storyboard.md](docs/demo-sto ## License -[MIT](LICENSE) +[Apache License 2.0](LICENSE) diff --git a/sources/README.md b/sources/README.md index af79d7a..84d63b5 100644 --- a/sources/README.md +++ b/sources/README.md @@ -22,6 +22,6 @@ Recommended contents: - `DATABRICKS_DELTA_SILVER_SCHEMA` - `DATABRICKS_DELTA_CHECKPOINT_TABLE` -Scripts load `sources/${CONVEX_SYNC_SOURCE:-meshix-api}/env.sh` automatically. -Explicit environment variables still win because the source files only set -defaults. +Scripts load `sources/$CONVEX_SYNC_SOURCE/env.sh` automatically when +`CONVEX_SYNC_SOURCE` is set. Explicit environment variables still win because +the source files only set defaults. From 3a3feb52023b849ddf841298b04d33ef48c60e70 Mon Sep 17 00:00:00 2001 From: Anand Pant Date: Tue, 21 Apr 2026 05:19:25 -0500 Subject: [PATCH 2/2] fix: require explicit source config selection --- Cargo.lock | 8 ++++---- scripts/load-source-config.sh | 13 +++++++------ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 08ba30c..7d82ab3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -867,7 +867,7 @@ dependencies = [ [[package]] name = "convex-export-s3" -version = "0.0.1" +version = "0.0.3" dependencies = [ "arrow-array", "arrow-schema", @@ -887,7 +887,7 @@ dependencies = [ [[package]] name = "convex-inspect" -version = "0.0.2" +version = "0.0.3" dependencies = [ "clap", "convex-sync-core", @@ -898,7 +898,7 @@ dependencies = [ [[package]] name = "convex-sync" -version = "0.0.2" +version = "0.0.3" dependencies = [ "clap", "convex-export-s3", @@ -910,7 +910,7 @@ dependencies = [ [[package]] name = "convex-sync-core" -version = "0.0.1" +version = "0.0.3" dependencies = [ "clap", "hex", diff --git a/scripts/load-source-config.sh b/scripts/load-source-config.sh index 3a69e74..4ff49c6 100755 --- a/scripts/load-source-config.sh +++ b/scripts/load-source-config.sh @@ -3,15 +3,16 @@ set -euo pipefail load_convex_sync_source_config() { local repo_root="$1" - local source_name="${CONVEX_SYNC_SOURCE:-meshix-api}" + local source_name="${CONVEX_SYNC_SOURCE:-}" + + if [[ -z "$source_name" ]]; then + return 0 + fi local source_file="$repo_root/sources/$source_name/env.sh" if [[ ! -f "$source_file" ]]; then - if [[ -n "${CONVEX_SYNC_SOURCE:-}" ]]; then - echo "unknown Convex source config: $source_name" >&2 - return 1 - fi - return 0 + echo "unknown Convex source config: $source_name" >&2 + return 1 fi # shellcheck source=/dev/null